diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..844462e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:1.11.1-alpine + +COPY . /mstdn-ebooks/ +RUN cd /mstdn-ebooks/ \ + && apk add --no-cache git \ + && CGO_ENABLED=0 go build -o /usr/local/bin/mstdn-ebooks \ + && apk del git + +VOLUME /mstdn-ebooks/data +WORKDIR /mstdn-ebooks/data + +CMD ["mstdn-ebooks", "-server", "https://botsin.space"] diff --git a/README.md b/README.md index a14bab7..9ba6506 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,22 @@ # mstdn-ebooks + **Lynnear Edition** -This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks), such as: +This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks) and [the other original](https://github.com/Lynnesbian/mstdn-ebooks/tree/3d059d0b9b66fd31378574104f1a56f2be5a319c), such as: + - Unicode support - Non-Markov stuff -- Stores toots in a sqlite database rather than a text file - - Doesn't unecessarily redownload all toots every time - -## Install/usage guide -An installation and usage guide is available for unix-based platforms (linux, macOS...) [here](https://cloud.lynnesbian.space/s/Qxxm2sYdMZaqWat). +- Doesn't unecessarily redownload all toots every time +- Uses an API called "webfinger" to allow downloading toots not known to your bot's instance +- Self-contained executable handles scheduling +- Docker support +- Written in Go -## Original README -hey look it's an ebooks bot +## Installation -python3 - -install the requirements with `sudo pip3 install -r requirements` - -make a bot (probably on bots in space) and follow the target accounts - -run `python3 main.py` to login and scrape - -run `python3 gen.py` to make a toot - -cron is an okay choice to make it toot regularly +1. Build mstdn-ebooks the same way you would build any Go program (`go get`, etc.) Alternatively, if you don't want to build it yourself, download a [precompiled release version](https://github.com/Lynnesbian/mstdn-ebooks/releases/latest). +2. If you haven't already, create an account on [botsin.space](https://botsin.space) or another instance. +3. Make sure the bot account is ONLY following you. Remove any default follows. +4. Run the `mstdn-ebooks` command. If your instance is not botsin.space, run the command as `mstdn-ebooks -server https://[your instance]`. +5. Copy the URL it generates into a browser logged into your bot account, and copy the code that Mastodon generates back to the program. +6. Congratulations! Your ebooks bot is now running. To restart it, you only need to redo step 4. diff --git a/auth.go b/auth.go new file mode 100644 index 0000000..23c44ec --- /dev/null +++ b/auth.go @@ -0,0 +1,115 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" + "os" + + "github.com/mattn/go-mastodon" +) + +func ensureApp(ctx context.Context, cfg *mastodon.Config) { + if b, err := ioutil.ReadFile(*flagApp); os.IsNotExist(err) { + log.Println("No clientcred.secret, registering application") + + app, err := mastodon.RegisterApp(ctx, &mastodon.AppConfig{ + Server: *flagServer, + ClientName: "lynnesbian_mastodon_ebooks", + Website: "https://github.com/Lynnesbian/mstdn-ebooks", + Scopes: scopes, + RedirectURIs: noRedirect, + }) + checkError(err, "Could not register app") + + cfg.ClientID = app.ClientID + cfg.ClientSecret = app.ClientSecret + + checkError(ioutil.WriteFile(*flagApp, []byte(app.ClientID+"\n"+app.ClientSecret+"\n"), 0644), "Could not save app credentials") + + // If the app credentials were just generated, the user access + // token cannot possibly be valid. + _ = os.Remove(*flagUser) + } else { + checkError(err, "Could not read app credentials") + + lines := bytes.Split(b, []byte{'\n'}) + + // consider final newline to be optional + if len(lines) == 3 && len(lines[2]) == 0 { + lines = lines[:2] + } + + if len(lines) != 2 { + log.Fatalf("App credentials (%q) malformed. Cannot proceed.", *flagApp) + } + + cfg.ClientID = string(lines[0]) + cfg.ClientSecret = string(lines[1]) + } +} + +func ensureUser(ctx context.Context, cfg *mastodon.Config) { + if b, err := ioutil.ReadFile(*flagUser); os.IsNotExist(err) { + log.Println("No usercred.secret, registering application") + authURL, err := url.Parse(*flagServer) + checkError(err, "Could not parse instance root URL") + authURL.Path = "/oauth/authorize" + authURL.RawQuery = url.Values{ + "scope": {scopes}, + "response_type": {"code"}, + "redirect_uri": {noRedirect}, + "client_id": {cfg.ClientID}, + }.Encode() + log.Println("Visit this url:", authURL) + fmt.Print("Secret: ") + var authCode string + _, err = fmt.Scanln(&authCode) + checkError(err, "Failed to read authorization code") + + authURL.Path = "/oauth/token" + authURL.RawQuery = "" + + resp, err := http.PostForm(authURL.String(), url.Values{ + "client_id": {cfg.ClientID}, + "client_secret": {cfg.ClientSecret}, + "grant_type": {"authorization_code"}, + "code": {authCode}, + "redirect_uri": {noRedirect}, + }) + checkError(err, "Failed to request access token") + + defer func() { + checkError(resp.Body.Close(), "Error closing response body") + }() + + if resp.StatusCode == http.StatusOK { + var payload struct { + AccessToken string `json:"access_token"` + } + checkError(json.NewDecoder(resp.Body).Decode(&payload), "Error decoding authentication response") + + cfg.AccessToken = payload.AccessToken + + checkError(ioutil.WriteFile(*flagUser, []byte(payload.AccessToken+"\n"), 0644), "Error saving access token") + } else { + body, err := ioutil.ReadAll(resp.Body) + checkError(err, "Network error reading authentication error") + + log.Fatalln("Authentication failed:", string(body)) + } + } else { + checkError(err, "Could not read user access token") + + cfg.AccessToken = string(bytes.TrimSuffix(b, []byte{'\n'})) + } +} diff --git a/config.json b/config.json deleted file mode 100644 index 6b9b611..0000000 --- a/config.json +++ /dev/null @@ -1 +0,0 @@ -{"site":"https://botsin.space"} diff --git a/create.py b/create.py deleted file mode 100755 index 08ec488..0000000 --- a/create.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import markovify -import json -import re, random, multiprocessing, time, sqlite3, shutil, os - -def make_sentence(output): - class nlt_fixed(markovify.NewlineText): - def test_sentence_input(self, sentence): - return True #all sentences are valid <3 - - # with open("corpus.txt", encoding="utf-8") as fp: - # model = nlt_fixed(fp.read()) - - shutil.copyfile("toots.db", "toots-copy.db") - db = sqlite3.connect("toots-copy.db") - db.text_factory=str - c = db.cursor() - toots = c.execute("SELECT content FROM `toots`").fetchall() - toots_str = "" - for toot in toots: - toots_str += "\n{}".format(toot[0]) - model = nlt_fixed(toots_str) - toots_str = None - db.close() - os.remove("toots-copy.db") - - sentence = None - while sentence is None: - sentence = model.make_short_sentence(500, tries=100000) - sentence = sentence.replace("\0", "\n") - output.send(sentence) - -def make_toot(force_markov = False, args = None): - return make_toot_markov() - -def make_toot_markov(): - tries = 0 - toot = None - while toot == None and tries < 10: - pin, pout = multiprocessing.Pipe(False) - p = multiprocessing.Process(target = make_sentence, args = [pout]) - p.start() - p.join(10) - if p.is_alive(): - p.terminate() - p.join() - toot = None - tries = tries + 1 - else: - toot = pin.recv() - return { - "toot":toot, - "media":None - } diff --git a/gen.go b/gen.go new file mode 100644 index 0000000..0a9d8a8 --- /dev/null +++ b/gen.go @@ -0,0 +1,70 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "context" + "math/rand" + "strings" + "sync" + "time" + + "github.com/mattn/go-mastodon" +) + +func genToot(ctx context.Context, me *mastodon.Account, replyTo *mastodon.Status) *mastodon.Toot { + var replyToID mastodon.ID + visibility := "public" + var body []byte + var seed string + + if replyTo != nil { + replyToID = replyTo.ID + visibility = replyTo.Visibility + seed = cleanContent(replyTo.Content) + + body = append(body, '@') + body = append(body, replyTo.Account.Acct...) + body = append(body, ' ') + + for _, m := range replyTo.Mentions { + if m.ID != me.ID && m.ID != replyTo.Account.ID { + body = append(body, '@') + body = append(body, m.Acct...) + body = append(body, ' ') + } + } + } + + body = append(body, generateMessage(ctx, seed)...) + + return &mastodon.Toot{ + Status: string(body), + Visibility: visibility, + InReplyToID: replyToID, + } +} + +var rngPool = sync.Pool{ + New: func() interface{} { + return rand.New(rand.NewSource(time.Now().UnixNano())) + }, +} + +func generateMessage(ctx context.Context, seed string) string { + r := rngPool.Get().(*rand.Rand) + defer rngPool.Put(r) + + words := strings.Fields(seed) + if len(words) != 0 { + for i := 0; i < 10; i++ { + if line := genMarkov(r, words[r.Intn(len(words))]); line != nil { + return strings.Join(line, " ") + } + } + } + + return strings.Join(genMarkov(r, ""), " ") +} diff --git a/gen.py b/gen.py deleted file mode 100755 index c34dac5..0000000 --- a/gen.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from mastodon import Mastodon -import argparse, sys, traceback, json -import create - -parser = argparse.ArgumentParser(description='Generate and post a toot.') -parser.add_argument('reply', metavar='reply', type=str, nargs='?', - help='ID of the status to reply to') -parser.add_argument('-s', '--simulate', dest='simulate', action='store_true', - help="Print the toot to stdout without posting it") - -args = parser.parse_args() - -cfg = json.load(open('config.json', 'r')) - -client = Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=cfg['site']) - -toot = create.make_toot() -if not args.simulate: - try: - if toot['media'] != None: - mediaID = client.media_post(toot['media'], description = toot['toot']) - client.status_post(toot['toot'].replace("\n", " "), - media_ids = [mediaID], visibility = "unlisted") - else: - client.status_post(toot['toot'], visibility = 'unlisted') - except Exception as err: - toot = { - "toot": - "Mistress @lynnesbian@deadinsi.de, something has gone terribly" \ - + " wrong! While attempting to post a toot, I received the following" \ - + " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2])) - } - client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") -print(toot['toot']) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4cd84c5 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/lynnesbian/mstdn-ebooks + +require ( + github.com/gorilla/websocket v1.4.0 // indirect + github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a + github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect + golang.org/x/net v0.0.0-20181017193950-04a2e542c03f +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d56bf02 --- /dev/null +++ b/go.sum @@ -0,0 +1,8 @@ +github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a h1:poZfGmljz6MxMXvFcYQvTx7TJQ2J9Gb+B2lgjOIPQnA= +github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a/go.mod h1:/OSOSDJyV0OUlBuDV0Qrllizt3BJNj4Ir5xhckYRVmg= +github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y= +github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE= +golang.org/x/net v0.0.0-20181017193950-04a2e542c03f h1:4pRM7zYwpBjCnfA1jRmhItLxYJkaEnsmuAcRtA347DA= +golang.org/x/net v0.0.0-20181017193950-04a2e542c03f/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/main.go b/main.go new file mode 100644 index 0000000..85c05a2 --- /dev/null +++ b/main.go @@ -0,0 +1,129 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "context" + "flag" + "log" + "time" + + "github.com/mattn/go-mastodon" +) + +var flagServer = flag.String("server", "https://botsin.space", "base URL of Mastodon server") +var flagApp = flag.String("app", "clientcred.secret", "location of Mastodon app credentials") +var flagUser = flag.String("user", "usercred.secret", "location of Mastodon user access token") +var flagData = flag.String("data", "ebooks.dat", "location of bot cache") + +const ( + scopes = "read:statuses read:accounts read:follows write:statuses" + noRedirect = "urn:ietf:wg:oauth:2.0:oob" +) + +func main() { + log.SetFlags(0) + + flag.Parse() + + ctx := context.Background() + + cfg := &mastodon.Config{ + Server: *flagServer, + } + + ensureApp(ctx, cfg) + ensureUser(ctx, cfg) + + client := mastodon.NewClient(cfg) + + instance, err := client.GetInstance(ctx) + checkError(err, "Could not get instance metadata") + me, err := client.GetAccountCurrentUser(ctx) + checkError(err, "Could not get current user") + + log.Println("Logged in as", me.Acct+"@"+instance.URI) + + feed, err := client.NewWSClient().StreamingWSUser(ctx) + checkError(err, "Could not connect to user feed") + + var following []*mastodon.Account + var pg mastodon.Pagination + isFollowing := make(map[mastodon.ID]*mastodon.Account) + for { + fs, err := client.GetAccountFollowing(ctx, me.ID, &pg) + checkError(err, "Failed to get followed accounts") + + following = append(following, fs...) + for _, f := range fs { + isFollowing[f.ID] = f + } + + if pg.MaxID == "" { + break + } + } + + downloadToots(ctx, instance, following) + log.Println("Initial history downloaded.") + + go func() { + for range markovDirty { + saveMarkov() + } + }() + + // Synchronize to the next half hour interval + halfHourSync := time.After(time.Hour/2 - time.Since(time.Now().Truncate(time.Hour/2))) + var halfHour <-chan time.Time + + for { + select { + case event := <-feed: + switch e := event.(type) { + case *mastodon.ErrorEvent: + log.Println("Mastodon error:", e) + case *mastodon.DeleteEvent: + // Ignore (for now) + case *mastodon.NotificationEvent: + if e.Notification.Type != "mention" { + log.Printf("Ignoring notification of type %q", e.Notification.Type) + continue + } + _, err := client.PostStatus(ctx, genToot(ctx, me, e.Notification.Status)) + checkError(err, "Error replying to mention %q", e.Notification.Status.URL) + case *mastodon.UpdateEvent: + if _, ok := isFollowing[e.Status.Account.ID]; !ok { + continue + } + if e.Status.Visibility != "unlisted" && e.Status.Visibility != "public" { + continue + } + if e.Status.Sensitive { + continue + } + insertStatus(ctx, e.Status.Account.ID, e.Status.URI, e.Status.Content) + default: + log.Printf("Unexpected event type: %T", e) + } + case <-halfHourSync: + halfHourSync = nil + halfHour = time.Tick(time.Hour / 2) + _, err := client.PostStatus(ctx, genToot(ctx, me, nil)) + checkError(err, "Error posting status") + case <-halfHour: + _, err := client.PostStatus(ctx, genToot(ctx, me, nil)) + checkError(err, "Error posting status") + } + } +} + +func checkError(err error, message string, arguments ...interface{}) { + if err == nil { + return + } + + log.Panicf(message+": %v", append(arguments, err)...) +} diff --git a/main.py b/main.py deleted file mode 100755 index 5f6872d..0000000 --- a/main.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from mastodon import Mastodon -from os import path -from bs4 import BeautifulSoup -import shutil, os, sqlite3, signal, sys, json -# import re - -scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"] -cfg = json.load(open('config.json', 'r')) - -if not path.exists("clientcred.secret"): - - print("No clientcred.secret, registering application") - Mastodon.create_app("lynnesbian_mastodon_ebooks", api_base_url=cfg['site'], to_file="clientcred.secret", scopes=scopes, website="https://github.com/Lynnesbian/mastodon-ebooks") - -if not path.exists("usercred.secret"): - print("No usercred.secret, registering application") - client = Mastodon(client_id="clientcred.secret", api_base_url=cfg['site']) - print("Visit this url:") - print(client.auth_request_url(scopes=scopes)) - client.log_in(code=input("Secret: "), to_file="usercred.secret", scopes=scopes) - -def parse_toot(toot): - if toot.spoiler_text != "": return - if toot.reblog is not None: return - if toot.visibility not in ["public", "unlisted"]: return - - soup = BeautifulSoup(toot.content, "html.parser") - - # pull the mentions out - # for mention in soup.select("span.h-card"): - # mention.unwrap() - - # for mention in soup.select("a.u-url.mention"): - # mention.unwrap() - - # this is the code that removes all mentions - # TODO: make it so that it removes the @ and instance but keeps the name - for mention in soup.select("span.h-card"): - mention.decompose() - - # make all linebreaks actual linebreaks - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() - - # make each p element its own line because sometimes they decide not to be - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - - # keep hashtags in the toots - for ht in soup.select("a.hashtag"): - ht.unwrap() - - # unwrap all links (i like the bots posting links) - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() - - text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) - - # next up: store this and patch markovify to take it - # return {"text": text, "mentions": mentions, "links": links} - # it's 4am though so we're not doing that now, but i still want the parser updates - #todo: we split above and join now, which is dumb, but i don't wanna mess with the map code bc i don't understand it uwu - text = "\n".join(list(text)) - text = text.replace("'", "'") - return text - -def get_toots(client, id, since_id): - i = 0 - toots = client.account_statuses(id, since_id = since_id) - while toots is not None and len(toots) > 0: - for toot in toots: - t = parse_toot(toot) - if t != None: - yield { - "content": t, - "id": toot.id - } - try: - toots = client.fetch_next(toots) - except TimeoutError: - print("Operation timed out, committing to database and exiting.") - db.commit() - db.close() - sys.exit(1) - i += 1 - if i%10 == 0: - print(i) - -client = Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=cfg['site']) - -me = client.account_verify_credentials() -following = client.account_following(me.id) - -db = sqlite3.connect("toots.db") -db.text_factory=str -c = db.cursor() -c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID") -db.commit() - -def handleCtrlC(signal, frame): - print("\nPREMATURE EVACUATION - Saving chunks") - db.commit() - sys.exit(1) - -signal.signal(signal.SIGINT, handleCtrlC) - -for f in following: - last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() - if last_toot != None: - last_toot = last_toot[0] - else: - last_toot = 0 - print("Downloading toots for user @{}, starting from {}".format(f.username, last_toot)) - for t in get_toots(client, f.id, last_toot): - # try: - c.execute("REPLACE INTO toots (id, userid, content) VALUES (?, ?, ?)", (t['id'], f.id, t['content'])) - # except: - # pass #ignore toots that can't be encoded properly - -db.commit() -db.execute("VACUUM") #compact db -db.commit() -db.close() \ No newline at end of file diff --git a/markov.go b/markov.go new file mode 100644 index 0000000..5fd3544 --- /dev/null +++ b/markov.go @@ -0,0 +1,214 @@ +package main + +import ( + "context" + "encoding/gob" + "log" + "math/rand" + "os" + "path" + "strconv" + "strings" + "sync" + "unicode" + + mastodon "github.com/mattn/go-mastodon" +) + +const markovPrefixLength = 2 + +type probableString struct { + Order []string + Count map[string]int + Total int +} + +func (ps probableString) insert(s string) probableString { + if n, ok := ps.Count[s]; ok { + ps.Count[s] = n + 1 + ps.Total++ + return ps + } + + if ps.Count == nil { + ps.Count = make(map[string]int) + } + + ps.Count[s] = 1 + ps.Order = append(ps.Order, s) + ps.Total++ + + return ps +} + +func (ps probableString) rand(r *rand.Rand) string { + n := r.Intn(ps.Total) + + for _, s := range ps.Order { + n -= ps.Count[s] + if n < 0 { + return s + } + } + + panic("unreachable") +} + +var markovDirty = make(chan struct{}, 1) + +func loadData() { + markovLock.Lock() + defer markovLock.Unlock() + + f, err := os.Open(*flagData) + if os.IsNotExist(err) { + return + } + checkError(err, "Could not open data cache") + defer func() { + checkError(f.Close(), "Could not close data cache") + }() + + checkError(gob.NewDecoder(f).Decode(&markov), "Could not read data") +} + +func saveMarkov() { + markovLock.Lock() + defer markovLock.Unlock() + + f, err := os.Create(*flagData + ".tmp") + checkError(err, "Could not create data staging file") + checkError(gob.NewEncoder(f).Encode(&markov), "Could not save data") + checkError(f.Close(), "Could not close data staging file") + checkError(os.Rename(*flagData+".tmp", *flagData), "Could not commit data update") +} + +var markovLock sync.Mutex +var markov = struct { + Accounts map[mastodon.ID]accountCache + Next map[[markovPrefixLength]string]probableString + Prev map[[markovPrefixLength]string]probableString +}{ + Accounts: make(map[mastodon.ID]accountCache), + Next: make(map[[markovPrefixLength]string]probableString), + Prev: make(map[[markovPrefixLength]string]probableString), +} + +func insertStatus(ctx context.Context, account mastodon.ID, id, content string) { + content = cleanContent(content) + + paragraphs := strings.Split(content, "\n\n") + + markovLock.Lock() + defer markovLock.Unlock() + + cache := markov.Accounts[account] + if n, err := strconv.ParseUint(path.Base(id), 10, 64); err == nil && n > cache.LatestRemoteTootID { + cache.LatestRemoteTootID = n + markov.Accounts[account] = cache + } + + for _, p := range paragraphs { + updateMarkov(strings.Fields(p)) + } + + select { + case markovDirty <- struct{}{}: + default: + } +} + +func updateMarkov(words []string) { + const last = markovPrefixLength - 1 + var prefix [markovPrefixLength]string + + for _, word := range words { + markov.Next[prefix] = markov.Next[prefix].insert(word) + copy(prefix[:], prefix[1:]) + prefix[last] = normalizeWord(word) + } + + markov.Next[prefix] = markov.Next[prefix].insert("") + + for i := 1; i < len(words); i++ { + for l := 1; l <= markovPrefixLength; l++ { + var suffix [markovPrefixLength]string + copy(suffix[:l], words[i:]) + for j := range suffix { + suffix[j] = normalizeWord(suffix[j]) + } + markov.Prev[suffix] = markov.Prev[suffix].insert(words[i-1]) + } + } + for l := 1; l <= markovPrefixLength; l++ { + var suffix [markovPrefixLength]string + copy(suffix[:l], words) + for j := range suffix { + suffix[j] = normalizeWord(suffix[j]) + } + markov.Prev[suffix] = markov.Prev[suffix].insert("") + } +} + +func genMarkov(r *rand.Rand, seed string) []string { + const last = markovPrefixLength - 1 + var prefix [markovPrefixLength]string + var line []string + + markovLock.Lock() + defer markovLock.Unlock() + + if seed != "" { + var suffix [markovPrefixLength]string + suffix[0] = normalizeWord(seed) + ps, ok := markov.Prev[suffix] + if !ok { + return nil + } + + line = append(line, seed) + + for len(line) < 1000 { + s := ps.rand(r) + if s == "" { + break + } + line = append(line, s) + copy(suffix[1:], suffix[:]) + suffix[0] = normalizeWord(s) + ps = markov.Prev[suffix] + } + + for i, j := 0, len(line)-1; i < j; i, j = i+1, j-1 { + line[i], line[j] = line[j], line[i] + } + + for i, j := len(line)-1, last; i >= 0 && j >= 0; i, j = i-1, j-1 { + prefix[j] = normalizeWord(line[i]) + } + } + + if _, ok := markov.Next[prefix]; !ok { + log.Panicln("No markov data available for prefix", prefix, line) + } + + for len(line) < 1000 { + ps := markov.Next[prefix] + s := ps.rand(r) + + if s == "" { + return line + } + + line = append(line, s) + copy(prefix[:], prefix[1:]) + prefix[last] = normalizeWord(s) + } + + // probably an infinite loop + return line +} + +func normalizeWord(s string) string { + return strings.Join(strings.FieldsFunc(strings.ToLower(s), unicode.IsPunct), "") +} diff --git a/reply.py b/reply.py deleted file mode 100755 index a3f0f2b..0000000 --- a/reply.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import mastodon -import os, random, re, json -import create -from bs4 import BeautifulSoup - -cfg = json.load(open('config.json', 'r')) - -api_base_url = "https://knzk.me" -client = mastodon.Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=cfg['site']) - -def extract_toot(toot): - #copied from main.py, see there for comments - soup = BeautifulSoup(toot, "html.parser") - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - for ht in soup.select("a.hashtag"): - ht.unwrap() - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() - text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) - text = "\n".join(list(text)) - text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in - text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one - text = text.lower() #for easier matching - return text - -class ReplyListener(mastodon.StreamListener): - def on_notification(self, notification): - if notification['type'] == 'mention': - acct = "@" + notification['account']['acct'] - post_id = notification['status']['id'] - mention = extract_toot(notification['status']['content']) - toot = create.make_toot(True)['toot'] - toot = acct + " " + toot - print(acct + " says " + mention) - client.status_post(toot, post_id, visibility=notification['status']['visibility']) - print("replied with " + toot) - -rl = ReplyListener() -client.stream_user(rl) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 20a079f..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -Mastodon.py==1.3.1 -markovify==0.7.1 -beautifulsoup4==4.6.0 diff --git a/status.go b/status.go new file mode 100644 index 0000000..8c9213e --- /dev/null +++ b/status.go @@ -0,0 +1,199 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "context" + "encoding/json" + "encoding/xml" + "fmt" + "log" + "net/http" + "net/url" + "strings" + "sync" + + "github.com/mattn/go-mastodon" + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +type accountCache struct { + LatestRemoteTootID uint64 +} + +func downloadToots(ctx context.Context, instance *mastodon.Instance, following []*mastodon.Account) { + loadData() + + var wg sync.WaitGroup + wg.Add(len(following)) + + markovLock.Lock() + for _, f := range following { + go func(account *mastodon.Account, start uint64) { + defer wg.Done() + + log.Printf("Downloading toots for user %s, starting from %d", account.Acct, start) + + acct := account.Acct + if !strings.Contains(acct, "@") { + acct += "@" + instance.URI + } + + loadAllToots(ctx, acct, account.URL, start, func(id, content string) { + insertStatus(ctx, account.ID, id, content) + }) + }(f, markov.Accounts[f.ID].LatestRemoteTootID) + } + markovLock.Unlock() + + wg.Wait() +} + +func cleanContent(s string) string { + paragraphs, err := html.ParseFragment(strings.NewReader(s), &html.Node{ + Type: html.ElementNode, + Data: "div", + DataAtom: atom.Div, + }) + checkError(err, "Failed to parse HTML %q", s) + + var body []byte + var walk func(*html.Node) + walk = func(n *html.Node) { + for n != nil { + if n.Type == html.TextNode { + body = append(body, n.Data...) + } else if n.Type == html.ElementNode { + var isMention bool + if n.DataAtom == atom.A { + for _, a := range n.Attr { + if a.Key == "class" { + for _, c := range strings.Fields(a.Val) { + if c == "mention" { + isMention = true + break + } + } + break + } + } + } else if n.DataAtom == atom.Img { + for _, a := range n.Attr { + if a.Key == "alt" { + body = append(body, a.Val...) + break + } + } + } + + if !isMention { + walk(n.FirstChild) + } + } + n = n.NextSibling + } + } + + for i, p := range paragraphs { + if i != 0 { + body = append(body, "\n\n"...) + } + walk(p.FirstChild) + } + + return string(body) +} + +func getJSON(ctx context.Context, uri string, v interface{}) { + resp, err := http.Get(uri) + checkError(err, "Could not download %q", uri) + defer func() { + checkError(resp.Body.Close(), "Error when closing %q", uri) + }() + + if resp.StatusCode != http.StatusOK { + log.Panicf("Error downloading %q: %v", uri, resp.Status) + } + + checkError(json.NewDecoder(resp.Body).Decode(v), "Error decoding %q", uri) +} + +func loadAllToots(ctx context.Context, acct, userURL string, start uint64, foundStatus func(id, content string)) { + webFingerURL := getWebFingerURL(ctx, acct, userURL) + outbox := webFingerUserActivity(ctx, webFingerURL) + "/outbox" + prev := fmt.Sprintf("%s?min_id=%d&page=true", outbox, start) + for prev != "" { + var page struct { + OrderedItems []struct { + Type string `json:"type"` + Object json.RawMessage `json:"object"` + } `json:"orderedItems"` + Prev string `json:"prev"` + } + getJSON(ctx, prev, &page) + for _, i := range page.OrderedItems { + if i.Type == "Create" { + var object struct { + ID string `json:"id"` + Sensitive bool `json:"sensitive"` + Content string `json:"content"` + } + checkError(json.Unmarshal(i.Object, &object), "Failed to decode toot JSON in %q", prev) + if !object.Sensitive { + foundStatus(object.ID, object.Content) + } + } + } + prev = page.Prev + } +} + +func getWebFingerURL(ctx context.Context, acct, userURL string) string { + acct = url.QueryEscape("acct:" + acct) + + u, err := url.Parse(userURL) + checkError(err, "Failed to parse user URL") + u.Path = "/.well-known/host-meta" + u.RawQuery = "" + + resp, err := http.Get(u.String()) + checkError(err, "Could not retrieve host-meta") + defer func() { + checkError(resp.Body.Close(), "Error closing host-meta request") + }() + if resp.StatusCode != http.StatusOK { + log.Panicf("Failed to load %q: %s", u, resp.Status) + } + var meta struct { + Link struct { + Template string `xml:"template,attr"` + } `xml:"Link"` + } + checkError(xml.NewDecoder(resp.Body).Decode(&meta), "Could not find webfinger URL") + + return strings.Replace(meta.Link.Template, "{uri}", acct, -1) +} + +func webFingerUserActivity(ctx context.Context, uri string) string { + var body struct { + Links []struct { + Href string `json:"href"` + Rel string `json:"rel"` + Type string `json:"type"` + } `json:"links"` + } + + getJSON(ctx, uri, &body) + + for _, l := range body.Links { + if l.Rel == "self" && l.Type == "application/activity+json" { + return l.Href + } + } + + log.Panicf("Could not find ActivityPub URL in web finger response: %q", uri) + return "" +}