From db7b9d6e102016b8821f4d14a25c72d701edcb7f Mon Sep 17 00:00:00 2001 From: Ben Lubar Date: Sat, 20 Oct 2018 14:30:52 -0500 Subject: [PATCH] Rewrite in Go. --- Dockerfile | 12 +++ README.md | 34 ++++---- auth.go | 115 +++++++++++++++++++++++++ config.json | 1 - create.py | 58 ------------- gen.go | 70 ++++++++++++++++ gen.py | 42 ---------- go.mod | 8 ++ go.sum | 8 ++ main.go | 129 ++++++++++++++++++++++++++++ main.py | 134 ----------------------------- markov.go | 214 +++++++++++++++++++++++++++++++++++++++++++++++ reply.py | 53 ------------ requirements.txt | 3 - status.go | 199 +++++++++++++++++++++++++++++++++++++++++++ 15 files changed, 770 insertions(+), 310 deletions(-) create mode 100644 Dockerfile create mode 100644 auth.go delete mode 100644 config.json delete mode 100755 create.py create mode 100644 gen.go delete mode 100755 gen.py create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go delete mode 100755 main.py create mode 100644 markov.go delete mode 100755 reply.py delete mode 100644 requirements.txt create mode 100644 status.go diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..844462e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:1.11.1-alpine + +COPY . /mstdn-ebooks/ +RUN cd /mstdn-ebooks/ \ + && apk add --no-cache git \ + && CGO_ENABLED=0 go build -o /usr/local/bin/mstdn-ebooks \ + && apk del git + +VOLUME /mstdn-ebooks/data +WORKDIR /mstdn-ebooks/data + +CMD ["mstdn-ebooks", "-server", "https://botsin.space"] diff --git a/README.md b/README.md index a14bab7..9ba6506 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,22 @@ # mstdn-ebooks + **Lynnear Edition** -This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks), such as: +This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks) and [the other original](https://github.com/Lynnesbian/mstdn-ebooks/tree/3d059d0b9b66fd31378574104f1a56f2be5a319c), such as: + - Unicode support - Non-Markov stuff -- Stores toots in a sqlite database rather than a text file - - Doesn't unecessarily redownload all toots every time - -## Install/usage guide -An installation and usage guide is available for unix-based platforms (linux, macOS...) [here](https://cloud.lynnesbian.space/s/Qxxm2sYdMZaqWat). +- Doesn't unecessarily redownload all toots every time +- Uses an API called "webfinger" to allow downloading toots not known to your bot's instance +- Self-contained executable handles scheduling +- Docker support +- Written in Go -## Original README -hey look it's an ebooks bot +## Installation -python3 - -install the requirements with `sudo pip3 install -r requirements` - -make a bot (probably on bots in space) and follow the target accounts - -run `python3 main.py` to login and scrape - -run `python3 gen.py` to make a toot - -cron is an okay choice to make it toot regularly +1. Build mstdn-ebooks the same way you would build any Go program (`go get`, etc.) Alternatively, if you don't want to build it yourself, download a [precompiled release version](https://github.com/Lynnesbian/mstdn-ebooks/releases/latest). +2. If you haven't already, create an account on [botsin.space](https://botsin.space) or another instance. +3. Make sure the bot account is ONLY following you. Remove any default follows. +4. Run the `mstdn-ebooks` command. If your instance is not botsin.space, run the command as `mstdn-ebooks -server https://[your instance]`. +5. Copy the URL it generates into a browser logged into your bot account, and copy the code that Mastodon generates back to the program. +6. Congratulations! Your ebooks bot is now running. To restart it, you only need to redo step 4. diff --git a/auth.go b/auth.go new file mode 100644 index 0000000..23c44ec --- /dev/null +++ b/auth.go @@ -0,0 +1,115 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" + "os" + + "github.com/mattn/go-mastodon" +) + +func ensureApp(ctx context.Context, cfg *mastodon.Config) { + if b, err := ioutil.ReadFile(*flagApp); os.IsNotExist(err) { + log.Println("No clientcred.secret, registering application") + + app, err := mastodon.RegisterApp(ctx, &mastodon.AppConfig{ + Server: *flagServer, + ClientName: "lynnesbian_mastodon_ebooks", + Website: "https://github.com/Lynnesbian/mstdn-ebooks", + Scopes: scopes, + RedirectURIs: noRedirect, + }) + checkError(err, "Could not register app") + + cfg.ClientID = app.ClientID + cfg.ClientSecret = app.ClientSecret + + checkError(ioutil.WriteFile(*flagApp, []byte(app.ClientID+"\n"+app.ClientSecret+"\n"), 0644), "Could not save app credentials") + + // If the app credentials were just generated, the user access + // token cannot possibly be valid. + _ = os.Remove(*flagUser) + } else { + checkError(err, "Could not read app credentials") + + lines := bytes.Split(b, []byte{'\n'}) + + // consider final newline to be optional + if len(lines) == 3 && len(lines[2]) == 0 { + lines = lines[:2] + } + + if len(lines) != 2 { + log.Fatalf("App credentials (%q) malformed. Cannot proceed.", *flagApp) + } + + cfg.ClientID = string(lines[0]) + cfg.ClientSecret = string(lines[1]) + } +} + +func ensureUser(ctx context.Context, cfg *mastodon.Config) { + if b, err := ioutil.ReadFile(*flagUser); os.IsNotExist(err) { + log.Println("No usercred.secret, registering application") + authURL, err := url.Parse(*flagServer) + checkError(err, "Could not parse instance root URL") + authURL.Path = "/oauth/authorize" + authURL.RawQuery = url.Values{ + "scope": {scopes}, + "response_type": {"code"}, + "redirect_uri": {noRedirect}, + "client_id": {cfg.ClientID}, + }.Encode() + log.Println("Visit this url:", authURL) + fmt.Print("Secret: ") + var authCode string + _, err = fmt.Scanln(&authCode) + checkError(err, "Failed to read authorization code") + + authURL.Path = "/oauth/token" + authURL.RawQuery = "" + + resp, err := http.PostForm(authURL.String(), url.Values{ + "client_id": {cfg.ClientID}, + "client_secret": {cfg.ClientSecret}, + "grant_type": {"authorization_code"}, + "code": {authCode}, + "redirect_uri": {noRedirect}, + }) + checkError(err, "Failed to request access token") + + defer func() { + checkError(resp.Body.Close(), "Error closing response body") + }() + + if resp.StatusCode == http.StatusOK { + var payload struct { + AccessToken string `json:"access_token"` + } + checkError(json.NewDecoder(resp.Body).Decode(&payload), "Error decoding authentication response") + + cfg.AccessToken = payload.AccessToken + + checkError(ioutil.WriteFile(*flagUser, []byte(payload.AccessToken+"\n"), 0644), "Error saving access token") + } else { + body, err := ioutil.ReadAll(resp.Body) + checkError(err, "Network error reading authentication error") + + log.Fatalln("Authentication failed:", string(body)) + } + } else { + checkError(err, "Could not read user access token") + + cfg.AccessToken = string(bytes.TrimSuffix(b, []byte{'\n'})) + } +} diff --git a/config.json b/config.json deleted file mode 100644 index 6b9b611..0000000 --- a/config.json +++ /dev/null @@ -1 +0,0 @@ -{"site":"https://botsin.space"} diff --git a/create.py b/create.py deleted file mode 100755 index 08ec488..0000000 --- a/create.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import markovify -import json -import re, random, multiprocessing, time, sqlite3, shutil, os - -def make_sentence(output): - class nlt_fixed(markovify.NewlineText): - def test_sentence_input(self, sentence): - return True #all sentences are valid <3 - - # with open("corpus.txt", encoding="utf-8") as fp: - # model = nlt_fixed(fp.read()) - - shutil.copyfile("toots.db", "toots-copy.db") - db = sqlite3.connect("toots-copy.db") - db.text_factory=str - c = db.cursor() - toots = c.execute("SELECT content FROM `toots`").fetchall() - toots_str = "" - for toot in toots: - toots_str += "\n{}".format(toot[0]) - model = nlt_fixed(toots_str) - toots_str = None - db.close() - os.remove("toots-copy.db") - - sentence = None - while sentence is None: - sentence = model.make_short_sentence(500, tries=100000) - sentence = sentence.replace("\0", "\n") - output.send(sentence) - -def make_toot(force_markov = False, args = None): - return make_toot_markov() - -def make_toot_markov(): - tries = 0 - toot = None - while toot == None and tries < 10: - pin, pout = multiprocessing.Pipe(False) - p = multiprocessing.Process(target = make_sentence, args = [pout]) - p.start() - p.join(10) - if p.is_alive(): - p.terminate() - p.join() - toot = None - tries = tries + 1 - else: - toot = pin.recv() - return { - "toot":toot, - "media":None - } diff --git a/gen.go b/gen.go new file mode 100644 index 0000000..0a9d8a8 --- /dev/null +++ b/gen.go @@ -0,0 +1,70 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "context" + "math/rand" + "strings" + "sync" + "time" + + "github.com/mattn/go-mastodon" +) + +func genToot(ctx context.Context, me *mastodon.Account, replyTo *mastodon.Status) *mastodon.Toot { + var replyToID mastodon.ID + visibility := "public" + var body []byte + var seed string + + if replyTo != nil { + replyToID = replyTo.ID + visibility = replyTo.Visibility + seed = cleanContent(replyTo.Content) + + body = append(body, '@') + body = append(body, replyTo.Account.Acct...) + body = append(body, ' ') + + for _, m := range replyTo.Mentions { + if m.ID != me.ID && m.ID != replyTo.Account.ID { + body = append(body, '@') + body = append(body, m.Acct...) + body = append(body, ' ') + } + } + } + + body = append(body, generateMessage(ctx, seed)...) + + return &mastodon.Toot{ + Status: string(body), + Visibility: visibility, + InReplyToID: replyToID, + } +} + +var rngPool = sync.Pool{ + New: func() interface{} { + return rand.New(rand.NewSource(time.Now().UnixNano())) + }, +} + +func generateMessage(ctx context.Context, seed string) string { + r := rngPool.Get().(*rand.Rand) + defer rngPool.Put(r) + + words := strings.Fields(seed) + if len(words) != 0 { + for i := 0; i < 10; i++ { + if line := genMarkov(r, words[r.Intn(len(words))]); line != nil { + return strings.Join(line, " ") + } + } + } + + return strings.Join(genMarkov(r, ""), " ") +} diff --git a/gen.py b/gen.py deleted file mode 100755 index c34dac5..0000000 --- a/gen.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from mastodon import Mastodon -import argparse, sys, traceback, json -import create - -parser = argparse.ArgumentParser(description='Generate and post a toot.') -parser.add_argument('reply', metavar='reply', type=str, nargs='?', - help='ID of the status to reply to') -parser.add_argument('-s', '--simulate', dest='simulate', action='store_true', - help="Print the toot to stdout without posting it") - -args = parser.parse_args() - -cfg = json.load(open('config.json', 'r')) - -client = Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=cfg['site']) - -toot = create.make_toot() -if not args.simulate: - try: - if toot['media'] != None: - mediaID = client.media_post(toot['media'], description = toot['toot']) - client.status_post(toot['toot'].replace("\n", " "), - media_ids = [mediaID], visibility = "unlisted") - else: - client.status_post(toot['toot'], visibility = 'unlisted') - except Exception as err: - toot = { - "toot": - "Mistress @lynnesbian@deadinsi.de, something has gone terribly" \ - + " wrong! While attempting to post a toot, I received the following" \ - + " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2])) - } - client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") -print(toot['toot']) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4cd84c5 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/lynnesbian/mstdn-ebooks + +require ( + github.com/gorilla/websocket v1.4.0 // indirect + github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a + github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect + golang.org/x/net v0.0.0-20181017193950-04a2e542c03f +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d56bf02 --- /dev/null +++ b/go.sum @@ -0,0 +1,8 @@ +github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a h1:poZfGmljz6MxMXvFcYQvTx7TJQ2J9Gb+B2lgjOIPQnA= +github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a/go.mod h1:/OSOSDJyV0OUlBuDV0Qrllizt3BJNj4Ir5xhckYRVmg= +github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y= +github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE= +golang.org/x/net v0.0.0-20181017193950-04a2e542c03f h1:4pRM7zYwpBjCnfA1jRmhItLxYJkaEnsmuAcRtA347DA= +golang.org/x/net v0.0.0-20181017193950-04a2e542c03f/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/main.go b/main.go new file mode 100644 index 0000000..85c05a2 --- /dev/null +++ b/main.go @@ -0,0 +1,129 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "context" + "flag" + "log" + "time" + + "github.com/mattn/go-mastodon" +) + +var flagServer = flag.String("server", "https://botsin.space", "base URL of Mastodon server") +var flagApp = flag.String("app", "clientcred.secret", "location of Mastodon app credentials") +var flagUser = flag.String("user", "usercred.secret", "location of Mastodon user access token") +var flagData = flag.String("data", "ebooks.dat", "location of bot cache") + +const ( + scopes = "read:statuses read:accounts read:follows write:statuses" + noRedirect = "urn:ietf:wg:oauth:2.0:oob" +) + +func main() { + log.SetFlags(0) + + flag.Parse() + + ctx := context.Background() + + cfg := &mastodon.Config{ + Server: *flagServer, + } + + ensureApp(ctx, cfg) + ensureUser(ctx, cfg) + + client := mastodon.NewClient(cfg) + + instance, err := client.GetInstance(ctx) + checkError(err, "Could not get instance metadata") + me, err := client.GetAccountCurrentUser(ctx) + checkError(err, "Could not get current user") + + log.Println("Logged in as", me.Acct+"@"+instance.URI) + + feed, err := client.NewWSClient().StreamingWSUser(ctx) + checkError(err, "Could not connect to user feed") + + var following []*mastodon.Account + var pg mastodon.Pagination + isFollowing := make(map[mastodon.ID]*mastodon.Account) + for { + fs, err := client.GetAccountFollowing(ctx, me.ID, &pg) + checkError(err, "Failed to get followed accounts") + + following = append(following, fs...) + for _, f := range fs { + isFollowing[f.ID] = f + } + + if pg.MaxID == "" { + break + } + } + + downloadToots(ctx, instance, following) + log.Println("Initial history downloaded.") + + go func() { + for range markovDirty { + saveMarkov() + } + }() + + // Synchronize to the next half hour interval + halfHourSync := time.After(time.Hour/2 - time.Since(time.Now().Truncate(time.Hour/2))) + var halfHour <-chan time.Time + + for { + select { + case event := <-feed: + switch e := event.(type) { + case *mastodon.ErrorEvent: + log.Println("Mastodon error:", e) + case *mastodon.DeleteEvent: + // Ignore (for now) + case *mastodon.NotificationEvent: + if e.Notification.Type != "mention" { + log.Printf("Ignoring notification of type %q", e.Notification.Type) + continue + } + _, err := client.PostStatus(ctx, genToot(ctx, me, e.Notification.Status)) + checkError(err, "Error replying to mention %q", e.Notification.Status.URL) + case *mastodon.UpdateEvent: + if _, ok := isFollowing[e.Status.Account.ID]; !ok { + continue + } + if e.Status.Visibility != "unlisted" && e.Status.Visibility != "public" { + continue + } + if e.Status.Sensitive { + continue + } + insertStatus(ctx, e.Status.Account.ID, e.Status.URI, e.Status.Content) + default: + log.Printf("Unexpected event type: %T", e) + } + case <-halfHourSync: + halfHourSync = nil + halfHour = time.Tick(time.Hour / 2) + _, err := client.PostStatus(ctx, genToot(ctx, me, nil)) + checkError(err, "Error posting status") + case <-halfHour: + _, err := client.PostStatus(ctx, genToot(ctx, me, nil)) + checkError(err, "Error posting status") + } + } +} + +func checkError(err error, message string, arguments ...interface{}) { + if err == nil { + return + } + + log.Panicf(message+": %v", append(arguments, err)...) +} diff --git a/main.py b/main.py deleted file mode 100755 index 5f6872d..0000000 --- a/main.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from mastodon import Mastodon -from os import path -from bs4 import BeautifulSoup -import shutil, os, sqlite3, signal, sys, json -# import re - -scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"] -cfg = json.load(open('config.json', 'r')) - -if not path.exists("clientcred.secret"): - - print("No clientcred.secret, registering application") - Mastodon.create_app("lynnesbian_mastodon_ebooks", api_base_url=cfg['site'], to_file="clientcred.secret", scopes=scopes, website="https://github.com/Lynnesbian/mastodon-ebooks") - -if not path.exists("usercred.secret"): - print("No usercred.secret, registering application") - client = Mastodon(client_id="clientcred.secret", api_base_url=cfg['site']) - print("Visit this url:") - print(client.auth_request_url(scopes=scopes)) - client.log_in(code=input("Secret: "), to_file="usercred.secret", scopes=scopes) - -def parse_toot(toot): - if toot.spoiler_text != "": return - if toot.reblog is not None: return - if toot.visibility not in ["public", "unlisted"]: return - - soup = BeautifulSoup(toot.content, "html.parser") - - # pull the mentions out - # for mention in soup.select("span.h-card"): - # mention.unwrap() - - # for mention in soup.select("a.u-url.mention"): - # mention.unwrap() - - # this is the code that removes all mentions - # TODO: make it so that it removes the @ and instance but keeps the name - for mention in soup.select("span.h-card"): - mention.decompose() - - # make all linebreaks actual linebreaks - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() - - # make each p element its own line because sometimes they decide not to be - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - - # keep hashtags in the toots - for ht in soup.select("a.hashtag"): - ht.unwrap() - - # unwrap all links (i like the bots posting links) - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() - - text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) - - # next up: store this and patch markovify to take it - # return {"text": text, "mentions": mentions, "links": links} - # it's 4am though so we're not doing that now, but i still want the parser updates - #todo: we split above and join now, which is dumb, but i don't wanna mess with the map code bc i don't understand it uwu - text = "\n".join(list(text)) - text = text.replace("'", "'") - return text - -def get_toots(client, id, since_id): - i = 0 - toots = client.account_statuses(id, since_id = since_id) - while toots is not None and len(toots) > 0: - for toot in toots: - t = parse_toot(toot) - if t != None: - yield { - "content": t, - "id": toot.id - } - try: - toots = client.fetch_next(toots) - except TimeoutError: - print("Operation timed out, committing to database and exiting.") - db.commit() - db.close() - sys.exit(1) - i += 1 - if i%10 == 0: - print(i) - -client = Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=cfg['site']) - -me = client.account_verify_credentials() -following = client.account_following(me.id) - -db = sqlite3.connect("toots.db") -db.text_factory=str -c = db.cursor() -c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID") -db.commit() - -def handleCtrlC(signal, frame): - print("\nPREMATURE EVACUATION - Saving chunks") - db.commit() - sys.exit(1) - -signal.signal(signal.SIGINT, handleCtrlC) - -for f in following: - last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() - if last_toot != None: - last_toot = last_toot[0] - else: - last_toot = 0 - print("Downloading toots for user @{}, starting from {}".format(f.username, last_toot)) - for t in get_toots(client, f.id, last_toot): - # try: - c.execute("REPLACE INTO toots (id, userid, content) VALUES (?, ?, ?)", (t['id'], f.id, t['content'])) - # except: - # pass #ignore toots that can't be encoded properly - -db.commit() -db.execute("VACUUM") #compact db -db.commit() -db.close() \ No newline at end of file diff --git a/markov.go b/markov.go new file mode 100644 index 0000000..5fd3544 --- /dev/null +++ b/markov.go @@ -0,0 +1,214 @@ +package main + +import ( + "context" + "encoding/gob" + "log" + "math/rand" + "os" + "path" + "strconv" + "strings" + "sync" + "unicode" + + mastodon "github.com/mattn/go-mastodon" +) + +const markovPrefixLength = 2 + +type probableString struct { + Order []string + Count map[string]int + Total int +} + +func (ps probableString) insert(s string) probableString { + if n, ok := ps.Count[s]; ok { + ps.Count[s] = n + 1 + ps.Total++ + return ps + } + + if ps.Count == nil { + ps.Count = make(map[string]int) + } + + ps.Count[s] = 1 + ps.Order = append(ps.Order, s) + ps.Total++ + + return ps +} + +func (ps probableString) rand(r *rand.Rand) string { + n := r.Intn(ps.Total) + + for _, s := range ps.Order { + n -= ps.Count[s] + if n < 0 { + return s + } + } + + panic("unreachable") +} + +var markovDirty = make(chan struct{}, 1) + +func loadData() { + markovLock.Lock() + defer markovLock.Unlock() + + f, err := os.Open(*flagData) + if os.IsNotExist(err) { + return + } + checkError(err, "Could not open data cache") + defer func() { + checkError(f.Close(), "Could not close data cache") + }() + + checkError(gob.NewDecoder(f).Decode(&markov), "Could not read data") +} + +func saveMarkov() { + markovLock.Lock() + defer markovLock.Unlock() + + f, err := os.Create(*flagData + ".tmp") + checkError(err, "Could not create data staging file") + checkError(gob.NewEncoder(f).Encode(&markov), "Could not save data") + checkError(f.Close(), "Could not close data staging file") + checkError(os.Rename(*flagData+".tmp", *flagData), "Could not commit data update") +} + +var markovLock sync.Mutex +var markov = struct { + Accounts map[mastodon.ID]accountCache + Next map[[markovPrefixLength]string]probableString + Prev map[[markovPrefixLength]string]probableString +}{ + Accounts: make(map[mastodon.ID]accountCache), + Next: make(map[[markovPrefixLength]string]probableString), + Prev: make(map[[markovPrefixLength]string]probableString), +} + +func insertStatus(ctx context.Context, account mastodon.ID, id, content string) { + content = cleanContent(content) + + paragraphs := strings.Split(content, "\n\n") + + markovLock.Lock() + defer markovLock.Unlock() + + cache := markov.Accounts[account] + if n, err := strconv.ParseUint(path.Base(id), 10, 64); err == nil && n > cache.LatestRemoteTootID { + cache.LatestRemoteTootID = n + markov.Accounts[account] = cache + } + + for _, p := range paragraphs { + updateMarkov(strings.Fields(p)) + } + + select { + case markovDirty <- struct{}{}: + default: + } +} + +func updateMarkov(words []string) { + const last = markovPrefixLength - 1 + var prefix [markovPrefixLength]string + + for _, word := range words { + markov.Next[prefix] = markov.Next[prefix].insert(word) + copy(prefix[:], prefix[1:]) + prefix[last] = normalizeWord(word) + } + + markov.Next[prefix] = markov.Next[prefix].insert("") + + for i := 1; i < len(words); i++ { + for l := 1; l <= markovPrefixLength; l++ { + var suffix [markovPrefixLength]string + copy(suffix[:l], words[i:]) + for j := range suffix { + suffix[j] = normalizeWord(suffix[j]) + } + markov.Prev[suffix] = markov.Prev[suffix].insert(words[i-1]) + } + } + for l := 1; l <= markovPrefixLength; l++ { + var suffix [markovPrefixLength]string + copy(suffix[:l], words) + for j := range suffix { + suffix[j] = normalizeWord(suffix[j]) + } + markov.Prev[suffix] = markov.Prev[suffix].insert("") + } +} + +func genMarkov(r *rand.Rand, seed string) []string { + const last = markovPrefixLength - 1 + var prefix [markovPrefixLength]string + var line []string + + markovLock.Lock() + defer markovLock.Unlock() + + if seed != "" { + var suffix [markovPrefixLength]string + suffix[0] = normalizeWord(seed) + ps, ok := markov.Prev[suffix] + if !ok { + return nil + } + + line = append(line, seed) + + for len(line) < 1000 { + s := ps.rand(r) + if s == "" { + break + } + line = append(line, s) + copy(suffix[1:], suffix[:]) + suffix[0] = normalizeWord(s) + ps = markov.Prev[suffix] + } + + for i, j := 0, len(line)-1; i < j; i, j = i+1, j-1 { + line[i], line[j] = line[j], line[i] + } + + for i, j := len(line)-1, last; i >= 0 && j >= 0; i, j = i-1, j-1 { + prefix[j] = normalizeWord(line[i]) + } + } + + if _, ok := markov.Next[prefix]; !ok { + log.Panicln("No markov data available for prefix", prefix, line) + } + + for len(line) < 1000 { + ps := markov.Next[prefix] + s := ps.rand(r) + + if s == "" { + return line + } + + line = append(line, s) + copy(prefix[:], prefix[1:]) + prefix[last] = normalizeWord(s) + } + + // probably an infinite loop + return line +} + +func normalizeWord(s string) string { + return strings.Join(strings.FieldsFunc(strings.ToLower(s), unicode.IsPunct), "") +} diff --git a/reply.py b/reply.py deleted file mode 100755 index a3f0f2b..0000000 --- a/reply.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import mastodon -import os, random, re, json -import create -from bs4 import BeautifulSoup - -cfg = json.load(open('config.json', 'r')) - -api_base_url = "https://knzk.me" -client = mastodon.Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=cfg['site']) - -def extract_toot(toot): - #copied from main.py, see there for comments - soup = BeautifulSoup(toot, "html.parser") - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - for ht in soup.select("a.hashtag"): - ht.unwrap() - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() - text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) - text = "\n".join(list(text)) - text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in - text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one - text = text.lower() #for easier matching - return text - -class ReplyListener(mastodon.StreamListener): - def on_notification(self, notification): - if notification['type'] == 'mention': - acct = "@" + notification['account']['acct'] - post_id = notification['status']['id'] - mention = extract_toot(notification['status']['content']) - toot = create.make_toot(True)['toot'] - toot = acct + " " + toot - print(acct + " says " + mention) - client.status_post(toot, post_id, visibility=notification['status']['visibility']) - print("replied with " + toot) - -rl = ReplyListener() -client.stream_user(rl) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 20a079f..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -Mastodon.py==1.3.1 -markovify==0.7.1 -beautifulsoup4==4.6.0 diff --git a/status.go b/status.go new file mode 100644 index 0000000..8c9213e --- /dev/null +++ b/status.go @@ -0,0 +1,199 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package main + +import ( + "context" + "encoding/json" + "encoding/xml" + "fmt" + "log" + "net/http" + "net/url" + "strings" + "sync" + + "github.com/mattn/go-mastodon" + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +type accountCache struct { + LatestRemoteTootID uint64 +} + +func downloadToots(ctx context.Context, instance *mastodon.Instance, following []*mastodon.Account) { + loadData() + + var wg sync.WaitGroup + wg.Add(len(following)) + + markovLock.Lock() + for _, f := range following { + go func(account *mastodon.Account, start uint64) { + defer wg.Done() + + log.Printf("Downloading toots for user %s, starting from %d", account.Acct, start) + + acct := account.Acct + if !strings.Contains(acct, "@") { + acct += "@" + instance.URI + } + + loadAllToots(ctx, acct, account.URL, start, func(id, content string) { + insertStatus(ctx, account.ID, id, content) + }) + }(f, markov.Accounts[f.ID].LatestRemoteTootID) + } + markovLock.Unlock() + + wg.Wait() +} + +func cleanContent(s string) string { + paragraphs, err := html.ParseFragment(strings.NewReader(s), &html.Node{ + Type: html.ElementNode, + Data: "div", + DataAtom: atom.Div, + }) + checkError(err, "Failed to parse HTML %q", s) + + var body []byte + var walk func(*html.Node) + walk = func(n *html.Node) { + for n != nil { + if n.Type == html.TextNode { + body = append(body, n.Data...) + } else if n.Type == html.ElementNode { + var isMention bool + if n.DataAtom == atom.A { + for _, a := range n.Attr { + if a.Key == "class" { + for _, c := range strings.Fields(a.Val) { + if c == "mention" { + isMention = true + break + } + } + break + } + } + } else if n.DataAtom == atom.Img { + for _, a := range n.Attr { + if a.Key == "alt" { + body = append(body, a.Val...) + break + } + } + } + + if !isMention { + walk(n.FirstChild) + } + } + n = n.NextSibling + } + } + + for i, p := range paragraphs { + if i != 0 { + body = append(body, "\n\n"...) + } + walk(p.FirstChild) + } + + return string(body) +} + +func getJSON(ctx context.Context, uri string, v interface{}) { + resp, err := http.Get(uri) + checkError(err, "Could not download %q", uri) + defer func() { + checkError(resp.Body.Close(), "Error when closing %q", uri) + }() + + if resp.StatusCode != http.StatusOK { + log.Panicf("Error downloading %q: %v", uri, resp.Status) + } + + checkError(json.NewDecoder(resp.Body).Decode(v), "Error decoding %q", uri) +} + +func loadAllToots(ctx context.Context, acct, userURL string, start uint64, foundStatus func(id, content string)) { + webFingerURL := getWebFingerURL(ctx, acct, userURL) + outbox := webFingerUserActivity(ctx, webFingerURL) + "/outbox" + prev := fmt.Sprintf("%s?min_id=%d&page=true", outbox, start) + for prev != "" { + var page struct { + OrderedItems []struct { + Type string `json:"type"` + Object json.RawMessage `json:"object"` + } `json:"orderedItems"` + Prev string `json:"prev"` + } + getJSON(ctx, prev, &page) + for _, i := range page.OrderedItems { + if i.Type == "Create" { + var object struct { + ID string `json:"id"` + Sensitive bool `json:"sensitive"` + Content string `json:"content"` + } + checkError(json.Unmarshal(i.Object, &object), "Failed to decode toot JSON in %q", prev) + if !object.Sensitive { + foundStatus(object.ID, object.Content) + } + } + } + prev = page.Prev + } +} + +func getWebFingerURL(ctx context.Context, acct, userURL string) string { + acct = url.QueryEscape("acct:" + acct) + + u, err := url.Parse(userURL) + checkError(err, "Failed to parse user URL") + u.Path = "/.well-known/host-meta" + u.RawQuery = "" + + resp, err := http.Get(u.String()) + checkError(err, "Could not retrieve host-meta") + defer func() { + checkError(resp.Body.Close(), "Error closing host-meta request") + }() + if resp.StatusCode != http.StatusOK { + log.Panicf("Failed to load %q: %s", u, resp.Status) + } + var meta struct { + Link struct { + Template string `xml:"template,attr"` + } `xml:"Link"` + } + checkError(xml.NewDecoder(resp.Body).Decode(&meta), "Could not find webfinger URL") + + return strings.Replace(meta.Link.Template, "{uri}", acct, -1) +} + +func webFingerUserActivity(ctx context.Context, uri string) string { + var body struct { + Links []struct { + Href string `json:"href"` + Rel string `json:"rel"` + Type string `json:"type"` + } `json:"links"` + } + + getJSON(ctx, uri, &body) + + for _, l := range body.Links { + if l.Rel == "self" && l.Type == "application/activity+json" { + return l.Href + } + } + + log.Panicf("Could not find ActivityPub URL in web finger response: %q", uri) + return "" +}