Rewrite in Go.

This commit is contained in:
Ben Lubar 2018-10-20 14:30:52 -05:00
parent 3d059d0b9b
commit db7b9d6e10
No known key found for this signature in database
GPG Key ID: 92939677AB59EDA4
15 changed files with 770 additions and 310 deletions

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM golang:1.11.1-alpine
COPY . /mstdn-ebooks/
RUN cd /mstdn-ebooks/ \
&& apk add --no-cache git \
&& CGO_ENABLED=0 go build -o /usr/local/bin/mstdn-ebooks \
&& apk del git
VOLUME /mstdn-ebooks/data
WORKDIR /mstdn-ebooks/data
CMD ["mstdn-ebooks", "-server", "https://botsin.space"]

View File

@ -1,26 +1,22 @@
# mstdn-ebooks
**Lynnear Edition**
This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks), such as:
This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks) and [the other original](https://github.com/Lynnesbian/mstdn-ebooks/tree/3d059d0b9b66fd31378574104f1a56f2be5a319c), such as:
- Unicode support
- Non-Markov stuff
- Stores toots in a sqlite database rather than a text file
- Doesn't unecessarily redownload all toots every time
## Install/usage guide
An installation and usage guide is available for unix-based platforms (linux, macOS...) [here](https://cloud.lynnesbian.space/s/Qxxm2sYdMZaqWat).
- Doesn't unecessarily redownload all toots every time
- Uses an API called "webfinger" to allow downloading toots not known to your bot's instance
- Self-contained executable handles scheduling
- Docker support
- Written in Go
## Original README
hey look it's an ebooks bot
## Installation
python3
install the requirements with `sudo pip3 install -r requirements`
make a bot (probably on bots in space) and follow the target accounts
run `python3 main.py` to login and scrape
run `python3 gen.py` to make a toot
cron is an okay choice to make it toot regularly
1. Build mstdn-ebooks the same way you would build any Go program (`go get`, etc.) Alternatively, if you don't want to build it yourself, download a [precompiled release version](https://github.com/Lynnesbian/mstdn-ebooks/releases/latest).
2. If you haven't already, create an account on [botsin.space](https://botsin.space) or another instance.
3. Make sure the bot account is ONLY following you. Remove any default follows.
4. Run the `mstdn-ebooks` command. If your instance is not botsin.space, run the command as `mstdn-ebooks -server https://[your instance]`.
5. Copy the URL it generates into a browser logged into your bot account, and copy the code that Mastodon generates back to the program.
6. Congratulations! Your ebooks bot is now running. To restart it, you only need to redo step 4.

115
auth.go Normal file
View File

@ -0,0 +1,115 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"github.com/mattn/go-mastodon"
)
func ensureApp(ctx context.Context, cfg *mastodon.Config) {
if b, err := ioutil.ReadFile(*flagApp); os.IsNotExist(err) {
log.Println("No clientcred.secret, registering application")
app, err := mastodon.RegisterApp(ctx, &mastodon.AppConfig{
Server: *flagServer,
ClientName: "lynnesbian_mastodon_ebooks",
Website: "https://github.com/Lynnesbian/mstdn-ebooks",
Scopes: scopes,
RedirectURIs: noRedirect,
})
checkError(err, "Could not register app")
cfg.ClientID = app.ClientID
cfg.ClientSecret = app.ClientSecret
checkError(ioutil.WriteFile(*flagApp, []byte(app.ClientID+"\n"+app.ClientSecret+"\n"), 0644), "Could not save app credentials")
// If the app credentials were just generated, the user access
// token cannot possibly be valid.
_ = os.Remove(*flagUser)
} else {
checkError(err, "Could not read app credentials")
lines := bytes.Split(b, []byte{'\n'})
// consider final newline to be optional
if len(lines) == 3 && len(lines[2]) == 0 {
lines = lines[:2]
}
if len(lines) != 2 {
log.Fatalf("App credentials (%q) malformed. Cannot proceed.", *flagApp)
}
cfg.ClientID = string(lines[0])
cfg.ClientSecret = string(lines[1])
}
}
func ensureUser(ctx context.Context, cfg *mastodon.Config) {
if b, err := ioutil.ReadFile(*flagUser); os.IsNotExist(err) {
log.Println("No usercred.secret, registering application")
authURL, err := url.Parse(*flagServer)
checkError(err, "Could not parse instance root URL")
authURL.Path = "/oauth/authorize"
authURL.RawQuery = url.Values{
"scope": {scopes},
"response_type": {"code"},
"redirect_uri": {noRedirect},
"client_id": {cfg.ClientID},
}.Encode()
log.Println("Visit this url:", authURL)
fmt.Print("Secret: ")
var authCode string
_, err = fmt.Scanln(&authCode)
checkError(err, "Failed to read authorization code")
authURL.Path = "/oauth/token"
authURL.RawQuery = ""
resp, err := http.PostForm(authURL.String(), url.Values{
"client_id": {cfg.ClientID},
"client_secret": {cfg.ClientSecret},
"grant_type": {"authorization_code"},
"code": {authCode},
"redirect_uri": {noRedirect},
})
checkError(err, "Failed to request access token")
defer func() {
checkError(resp.Body.Close(), "Error closing response body")
}()
if resp.StatusCode == http.StatusOK {
var payload struct {
AccessToken string `json:"access_token"`
}
checkError(json.NewDecoder(resp.Body).Decode(&payload), "Error decoding authentication response")
cfg.AccessToken = payload.AccessToken
checkError(ioutil.WriteFile(*flagUser, []byte(payload.AccessToken+"\n"), 0644), "Error saving access token")
} else {
body, err := ioutil.ReadAll(resp.Body)
checkError(err, "Network error reading authentication error")
log.Fatalln("Authentication failed:", string(body))
}
} else {
checkError(err, "Could not read user access token")
cfg.AccessToken = string(bytes.TrimSuffix(b, []byte{'\n'}))
}
}

View File

@ -1 +0,0 @@
{"site":"https://botsin.space"}

View File

@ -1,58 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import markovify
import json
import re, random, multiprocessing, time, sqlite3, shutil, os
def make_sentence(output):
class nlt_fixed(markovify.NewlineText):
def test_sentence_input(self, sentence):
return True #all sentences are valid <3
# with open("corpus.txt", encoding="utf-8") as fp:
# model = nlt_fixed(fp.read())
shutil.copyfile("toots.db", "toots-copy.db")
db = sqlite3.connect("toots-copy.db")
db.text_factory=str
c = db.cursor()
toots = c.execute("SELECT content FROM `toots`").fetchall()
toots_str = ""
for toot in toots:
toots_str += "\n{}".format(toot[0])
model = nlt_fixed(toots_str)
toots_str = None
db.close()
os.remove("toots-copy.db")
sentence = None
while sentence is None:
sentence = model.make_short_sentence(500, tries=100000)
sentence = sentence.replace("\0", "\n")
output.send(sentence)
def make_toot(force_markov = False, args = None):
return make_toot_markov()
def make_toot_markov():
tries = 0
toot = None
while toot == None and tries < 10:
pin, pout = multiprocessing.Pipe(False)
p = multiprocessing.Process(target = make_sentence, args = [pout])
p.start()
p.join(10)
if p.is_alive():
p.terminate()
p.join()
toot = None
tries = tries + 1
else:
toot = pin.recv()
return {
"toot":toot,
"media":None
}

70
gen.go Normal file
View File

@ -0,0 +1,70 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"math/rand"
"strings"
"sync"
"time"
"github.com/mattn/go-mastodon"
)
func genToot(ctx context.Context, me *mastodon.Account, replyTo *mastodon.Status) *mastodon.Toot {
var replyToID mastodon.ID
visibility := "public"
var body []byte
var seed string
if replyTo != nil {
replyToID = replyTo.ID
visibility = replyTo.Visibility
seed = cleanContent(replyTo.Content)
body = append(body, '@')
body = append(body, replyTo.Account.Acct...)
body = append(body, ' ')
for _, m := range replyTo.Mentions {
if m.ID != me.ID && m.ID != replyTo.Account.ID {
body = append(body, '@')
body = append(body, m.Acct...)
body = append(body, ' ')
}
}
}
body = append(body, generateMessage(ctx, seed)...)
return &mastodon.Toot{
Status: string(body),
Visibility: visibility,
InReplyToID: replyToID,
}
}
var rngPool = sync.Pool{
New: func() interface{} {
return rand.New(rand.NewSource(time.Now().UnixNano()))
},
}
func generateMessage(ctx context.Context, seed string) string {
r := rngPool.Get().(*rand.Rand)
defer rngPool.Put(r)
words := strings.Fields(seed)
if len(words) != 0 {
for i := 0; i < 10; i++ {
if line := genMarkov(r, words[r.Intn(len(words))]); line != nil {
return strings.Join(line, " ")
}
}
}
return strings.Join(genMarkov(r, ""), " ")
}

42
gen.py
View File

@ -1,42 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon
import argparse, sys, traceback, json
import create
parser = argparse.ArgumentParser(description='Generate and post a toot.')
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
help='ID of the status to reply to')
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
help="Print the toot to stdout without posting it")
args = parser.parse_args()
cfg = json.load(open('config.json', 'r'))
client = Mastodon(
client_id="clientcred.secret",
access_token="usercred.secret",
api_base_url=cfg['site'])
toot = create.make_toot()
if not args.simulate:
try:
if toot['media'] != None:
mediaID = client.media_post(toot['media'], description = toot['toot'])
client.status_post(toot['toot'].replace("\n", " "),
media_ids = [mediaID], visibility = "unlisted")
else:
client.status_post(toot['toot'], visibility = 'unlisted')
except Exception as err:
toot = {
"toot":
"Mistress @lynnesbian@deadinsi.de, something has gone terribly" \
+ " wrong! While attempting to post a toot, I received the following" \
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
}
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
print(toot['toot'])

8
go.mod Normal file
View File

@ -0,0 +1,8 @@
module github.com/lynnesbian/mstdn-ebooks
require (
github.com/gorilla/websocket v1.4.0 // indirect
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f
)

8
go.sum Normal file
View File

@ -0,0 +1,8 @@
github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a h1:poZfGmljz6MxMXvFcYQvTx7TJQ2J9Gb+B2lgjOIPQnA=
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a/go.mod h1:/OSOSDJyV0OUlBuDV0Qrllizt3BJNj4Ir5xhckYRVmg=
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y=
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE=
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f h1:4pRM7zYwpBjCnfA1jRmhItLxYJkaEnsmuAcRtA347DA=
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=

129
main.go Normal file
View File

@ -0,0 +1,129 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"flag"
"log"
"time"
"github.com/mattn/go-mastodon"
)
var flagServer = flag.String("server", "https://botsin.space", "base URL of Mastodon server")
var flagApp = flag.String("app", "clientcred.secret", "location of Mastodon app credentials")
var flagUser = flag.String("user", "usercred.secret", "location of Mastodon user access token")
var flagData = flag.String("data", "ebooks.dat", "location of bot cache")
const (
scopes = "read:statuses read:accounts read:follows write:statuses"
noRedirect = "urn:ietf:wg:oauth:2.0:oob"
)
func main() {
log.SetFlags(0)
flag.Parse()
ctx := context.Background()
cfg := &mastodon.Config{
Server: *flagServer,
}
ensureApp(ctx, cfg)
ensureUser(ctx, cfg)
client := mastodon.NewClient(cfg)
instance, err := client.GetInstance(ctx)
checkError(err, "Could not get instance metadata")
me, err := client.GetAccountCurrentUser(ctx)
checkError(err, "Could not get current user")
log.Println("Logged in as", me.Acct+"@"+instance.URI)
feed, err := client.NewWSClient().StreamingWSUser(ctx)
checkError(err, "Could not connect to user feed")
var following []*mastodon.Account
var pg mastodon.Pagination
isFollowing := make(map[mastodon.ID]*mastodon.Account)
for {
fs, err := client.GetAccountFollowing(ctx, me.ID, &pg)
checkError(err, "Failed to get followed accounts")
following = append(following, fs...)
for _, f := range fs {
isFollowing[f.ID] = f
}
if pg.MaxID == "" {
break
}
}
downloadToots(ctx, instance, following)
log.Println("Initial history downloaded.")
go func() {
for range markovDirty {
saveMarkov()
}
}()
// Synchronize to the next half hour interval
halfHourSync := time.After(time.Hour/2 - time.Since(time.Now().Truncate(time.Hour/2)))
var halfHour <-chan time.Time
for {
select {
case event := <-feed:
switch e := event.(type) {
case *mastodon.ErrorEvent:
log.Println("Mastodon error:", e)
case *mastodon.DeleteEvent:
// Ignore (for now)
case *mastodon.NotificationEvent:
if e.Notification.Type != "mention" {
log.Printf("Ignoring notification of type %q", e.Notification.Type)
continue
}
_, err := client.PostStatus(ctx, genToot(ctx, me, e.Notification.Status))
checkError(err, "Error replying to mention %q", e.Notification.Status.URL)
case *mastodon.UpdateEvent:
if _, ok := isFollowing[e.Status.Account.ID]; !ok {
continue
}
if e.Status.Visibility != "unlisted" && e.Status.Visibility != "public" {
continue
}
if e.Status.Sensitive {
continue
}
insertStatus(ctx, e.Status.Account.ID, e.Status.URI, e.Status.Content)
default:
log.Printf("Unexpected event type: %T", e)
}
case <-halfHourSync:
halfHourSync = nil
halfHour = time.Tick(time.Hour / 2)
_, err := client.PostStatus(ctx, genToot(ctx, me, nil))
checkError(err, "Error posting status")
case <-halfHour:
_, err := client.PostStatus(ctx, genToot(ctx, me, nil))
checkError(err, "Error posting status")
}
}
}
func checkError(err error, message string, arguments ...interface{}) {
if err == nil {
return
}
log.Panicf(message+": %v", append(arguments, err)...)
}

134
main.py
View File

@ -1,134 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon
from os import path
from bs4 import BeautifulSoup
import shutil, os, sqlite3, signal, sys, json
# import re
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"]
cfg = json.load(open('config.json', 'r'))
if not path.exists("clientcred.secret"):
print("No clientcred.secret, registering application")
Mastodon.create_app("lynnesbian_mastodon_ebooks", api_base_url=cfg['site'], to_file="clientcred.secret", scopes=scopes, website="https://github.com/Lynnesbian/mastodon-ebooks")
if not path.exists("usercred.secret"):
print("No usercred.secret, registering application")
client = Mastodon(client_id="clientcred.secret", api_base_url=cfg['site'])
print("Visit this url:")
print(client.auth_request_url(scopes=scopes))
client.log_in(code=input("Secret: "), to_file="usercred.secret", scopes=scopes)
def parse_toot(toot):
if toot.spoiler_text != "": return
if toot.reblog is not None: return
if toot.visibility not in ["public", "unlisted"]: return
soup = BeautifulSoup(toot.content, "html.parser")
# pull the mentions out
# for mention in soup.select("span.h-card"):
# mention.unwrap()
# for mention in soup.select("a.u-url.mention"):
# mention.unwrap()
# this is the code that removes all mentions
# TODO: make it so that it removes the @ and instance but keeps the name
for mention in soup.select("span.h-card"):
mention.decompose()
# make all linebreaks actual linebreaks
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# make each p element its own line because sometimes they decide not to be
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# keep hashtags in the toots
for ht in soup.select("a.hashtag"):
ht.unwrap()
# unwrap all links (i like the bots posting links)
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
# next up: store this and patch markovify to take it
# return {"text": text, "mentions": mentions, "links": links}
# it's 4am though so we're not doing that now, but i still want the parser updates
#todo: we split above and join now, which is dumb, but i don't wanna mess with the map code bc i don't understand it uwu
text = "\n".join(list(text))
text = text.replace("&apos;", "'")
return text
def get_toots(client, id, since_id):
i = 0
toots = client.account_statuses(id, since_id = since_id)
while toots is not None and len(toots) > 0:
for toot in toots:
t = parse_toot(toot)
if t != None:
yield {
"content": t,
"id": toot.id
}
try:
toots = client.fetch_next(toots)
except TimeoutError:
print("Operation timed out, committing to database and exiting.")
db.commit()
db.close()
sys.exit(1)
i += 1
if i%10 == 0:
print(i)
client = Mastodon(
client_id="clientcred.secret",
access_token="usercred.secret",
api_base_url=cfg['site'])
me = client.account_verify_credentials()
following = client.account_following(me.id)
db = sqlite3.connect("toots.db")
db.text_factory=str
c = db.cursor()
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
db.commit()
def handleCtrlC(signal, frame):
print("\nPREMATURE EVACUATION - Saving chunks")
db.commit()
sys.exit(1)
signal.signal(signal.SIGINT, handleCtrlC)
for f in following:
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
if last_toot != None:
last_toot = last_toot[0]
else:
last_toot = 0
print("Downloading toots for user @{}, starting from {}".format(f.username, last_toot))
for t in get_toots(client, f.id, last_toot):
# try:
c.execute("REPLACE INTO toots (id, userid, content) VALUES (?, ?, ?)", (t['id'], f.id, t['content']))
# except:
# pass #ignore toots that can't be encoded properly
db.commit()
db.execute("VACUUM") #compact db
db.commit()
db.close()

214
markov.go Normal file
View File

@ -0,0 +1,214 @@
package main
import (
"context"
"encoding/gob"
"log"
"math/rand"
"os"
"path"
"strconv"
"strings"
"sync"
"unicode"
mastodon "github.com/mattn/go-mastodon"
)
const markovPrefixLength = 2
type probableString struct {
Order []string
Count map[string]int
Total int
}
func (ps probableString) insert(s string) probableString {
if n, ok := ps.Count[s]; ok {
ps.Count[s] = n + 1
ps.Total++
return ps
}
if ps.Count == nil {
ps.Count = make(map[string]int)
}
ps.Count[s] = 1
ps.Order = append(ps.Order, s)
ps.Total++
return ps
}
func (ps probableString) rand(r *rand.Rand) string {
n := r.Intn(ps.Total)
for _, s := range ps.Order {
n -= ps.Count[s]
if n < 0 {
return s
}
}
panic("unreachable")
}
var markovDirty = make(chan struct{}, 1)
func loadData() {
markovLock.Lock()
defer markovLock.Unlock()
f, err := os.Open(*flagData)
if os.IsNotExist(err) {
return
}
checkError(err, "Could not open data cache")
defer func() {
checkError(f.Close(), "Could not close data cache")
}()
checkError(gob.NewDecoder(f).Decode(&markov), "Could not read data")
}
func saveMarkov() {
markovLock.Lock()
defer markovLock.Unlock()
f, err := os.Create(*flagData + ".tmp")
checkError(err, "Could not create data staging file")
checkError(gob.NewEncoder(f).Encode(&markov), "Could not save data")
checkError(f.Close(), "Could not close data staging file")
checkError(os.Rename(*flagData+".tmp", *flagData), "Could not commit data update")
}
var markovLock sync.Mutex
var markov = struct {
Accounts map[mastodon.ID]accountCache
Next map[[markovPrefixLength]string]probableString
Prev map[[markovPrefixLength]string]probableString
}{
Accounts: make(map[mastodon.ID]accountCache),
Next: make(map[[markovPrefixLength]string]probableString),
Prev: make(map[[markovPrefixLength]string]probableString),
}
func insertStatus(ctx context.Context, account mastodon.ID, id, content string) {
content = cleanContent(content)
paragraphs := strings.Split(content, "\n\n")
markovLock.Lock()
defer markovLock.Unlock()
cache := markov.Accounts[account]
if n, err := strconv.ParseUint(path.Base(id), 10, 64); err == nil && n > cache.LatestRemoteTootID {
cache.LatestRemoteTootID = n
markov.Accounts[account] = cache
}
for _, p := range paragraphs {
updateMarkov(strings.Fields(p))
}
select {
case markovDirty <- struct{}{}:
default:
}
}
func updateMarkov(words []string) {
const last = markovPrefixLength - 1
var prefix [markovPrefixLength]string
for _, word := range words {
markov.Next[prefix] = markov.Next[prefix].insert(word)
copy(prefix[:], prefix[1:])
prefix[last] = normalizeWord(word)
}
markov.Next[prefix] = markov.Next[prefix].insert("")
for i := 1; i < len(words); i++ {
for l := 1; l <= markovPrefixLength; l++ {
var suffix [markovPrefixLength]string
copy(suffix[:l], words[i:])
for j := range suffix {
suffix[j] = normalizeWord(suffix[j])
}
markov.Prev[suffix] = markov.Prev[suffix].insert(words[i-1])
}
}
for l := 1; l <= markovPrefixLength; l++ {
var suffix [markovPrefixLength]string
copy(suffix[:l], words)
for j := range suffix {
suffix[j] = normalizeWord(suffix[j])
}
markov.Prev[suffix] = markov.Prev[suffix].insert("")
}
}
func genMarkov(r *rand.Rand, seed string) []string {
const last = markovPrefixLength - 1
var prefix [markovPrefixLength]string
var line []string
markovLock.Lock()
defer markovLock.Unlock()
if seed != "" {
var suffix [markovPrefixLength]string
suffix[0] = normalizeWord(seed)
ps, ok := markov.Prev[suffix]
if !ok {
return nil
}
line = append(line, seed)
for len(line) < 1000 {
s := ps.rand(r)
if s == "" {
break
}
line = append(line, s)
copy(suffix[1:], suffix[:])
suffix[0] = normalizeWord(s)
ps = markov.Prev[suffix]
}
for i, j := 0, len(line)-1; i < j; i, j = i+1, j-1 {
line[i], line[j] = line[j], line[i]
}
for i, j := len(line)-1, last; i >= 0 && j >= 0; i, j = i-1, j-1 {
prefix[j] = normalizeWord(line[i])
}
}
if _, ok := markov.Next[prefix]; !ok {
log.Panicln("No markov data available for prefix", prefix, line)
}
for len(line) < 1000 {
ps := markov.Next[prefix]
s := ps.rand(r)
if s == "" {
return line
}
line = append(line, s)
copy(prefix[:], prefix[1:])
prefix[last] = normalizeWord(s)
}
// probably an infinite loop
return line
}
func normalizeWord(s string) string {
return strings.Join(strings.FieldsFunc(strings.ToLower(s), unicode.IsPunct), "")
}

View File

@ -1,53 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import mastodon
import os, random, re, json
import create
from bs4 import BeautifulSoup
cfg = json.load(open('config.json', 'r'))
api_base_url = "https://knzk.me"
client = mastodon.Mastodon(
client_id="clientcred.secret",
access_token="usercred.secret",
api_base_url=cfg['site'])
def extract_toot(toot):
#copied from main.py, see there for comments
soup = BeautifulSoup(toot, "html.parser")
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
for ht in soup.select("a.hashtag"):
ht.unwrap()
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
text = "\n".join(list(text))
text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in
text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one
text = text.lower() #for easier matching
return text
class ReplyListener(mastodon.StreamListener):
def on_notification(self, notification):
if notification['type'] == 'mention':
acct = "@" + notification['account']['acct']
post_id = notification['status']['id']
mention = extract_toot(notification['status']['content'])
toot = create.make_toot(True)['toot']
toot = acct + " " + toot
print(acct + " says " + mention)
client.status_post(toot, post_id, visibility=notification['status']['visibility'])
print("replied with " + toot)
rl = ReplyListener()
client.stream_user(rl)

View File

@ -1,3 +0,0 @@
Mastodon.py==1.3.1
markovify==0.7.1
beautifulsoup4==4.6.0

199
status.go Normal file
View File

@ -0,0 +1,199 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"encoding/json"
"encoding/xml"
"fmt"
"log"
"net/http"
"net/url"
"strings"
"sync"
"github.com/mattn/go-mastodon"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
type accountCache struct {
LatestRemoteTootID uint64
}
func downloadToots(ctx context.Context, instance *mastodon.Instance, following []*mastodon.Account) {
loadData()
var wg sync.WaitGroup
wg.Add(len(following))
markovLock.Lock()
for _, f := range following {
go func(account *mastodon.Account, start uint64) {
defer wg.Done()
log.Printf("Downloading toots for user %s, starting from %d", account.Acct, start)
acct := account.Acct
if !strings.Contains(acct, "@") {
acct += "@" + instance.URI
}
loadAllToots(ctx, acct, account.URL, start, func(id, content string) {
insertStatus(ctx, account.ID, id, content)
})
}(f, markov.Accounts[f.ID].LatestRemoteTootID)
}
markovLock.Unlock()
wg.Wait()
}
func cleanContent(s string) string {
paragraphs, err := html.ParseFragment(strings.NewReader(s), &html.Node{
Type: html.ElementNode,
Data: "div",
DataAtom: atom.Div,
})
checkError(err, "Failed to parse HTML %q", s)
var body []byte
var walk func(*html.Node)
walk = func(n *html.Node) {
for n != nil {
if n.Type == html.TextNode {
body = append(body, n.Data...)
} else if n.Type == html.ElementNode {
var isMention bool
if n.DataAtom == atom.A {
for _, a := range n.Attr {
if a.Key == "class" {
for _, c := range strings.Fields(a.Val) {
if c == "mention" {
isMention = true
break
}
}
break
}
}
} else if n.DataAtom == atom.Img {
for _, a := range n.Attr {
if a.Key == "alt" {
body = append(body, a.Val...)
break
}
}
}
if !isMention {
walk(n.FirstChild)
}
}
n = n.NextSibling
}
}
for i, p := range paragraphs {
if i != 0 {
body = append(body, "\n\n"...)
}
walk(p.FirstChild)
}
return string(body)
}
func getJSON(ctx context.Context, uri string, v interface{}) {
resp, err := http.Get(uri)
checkError(err, "Could not download %q", uri)
defer func() {
checkError(resp.Body.Close(), "Error when closing %q", uri)
}()
if resp.StatusCode != http.StatusOK {
log.Panicf("Error downloading %q: %v", uri, resp.Status)
}
checkError(json.NewDecoder(resp.Body).Decode(v), "Error decoding %q", uri)
}
func loadAllToots(ctx context.Context, acct, userURL string, start uint64, foundStatus func(id, content string)) {
webFingerURL := getWebFingerURL(ctx, acct, userURL)
outbox := webFingerUserActivity(ctx, webFingerURL) + "/outbox"
prev := fmt.Sprintf("%s?min_id=%d&page=true", outbox, start)
for prev != "" {
var page struct {
OrderedItems []struct {
Type string `json:"type"`
Object json.RawMessage `json:"object"`
} `json:"orderedItems"`
Prev string `json:"prev"`
}
getJSON(ctx, prev, &page)
for _, i := range page.OrderedItems {
if i.Type == "Create" {
var object struct {
ID string `json:"id"`
Sensitive bool `json:"sensitive"`
Content string `json:"content"`
}
checkError(json.Unmarshal(i.Object, &object), "Failed to decode toot JSON in %q", prev)
if !object.Sensitive {
foundStatus(object.ID, object.Content)
}
}
}
prev = page.Prev
}
}
func getWebFingerURL(ctx context.Context, acct, userURL string) string {
acct = url.QueryEscape("acct:" + acct)
u, err := url.Parse(userURL)
checkError(err, "Failed to parse user URL")
u.Path = "/.well-known/host-meta"
u.RawQuery = ""
resp, err := http.Get(u.String())
checkError(err, "Could not retrieve host-meta")
defer func() {
checkError(resp.Body.Close(), "Error closing host-meta request")
}()
if resp.StatusCode != http.StatusOK {
log.Panicf("Failed to load %q: %s", u, resp.Status)
}
var meta struct {
Link struct {
Template string `xml:"template,attr"`
} `xml:"Link"`
}
checkError(xml.NewDecoder(resp.Body).Decode(&meta), "Could not find webfinger URL")
return strings.Replace(meta.Link.Template, "{uri}", acct, -1)
}
func webFingerUserActivity(ctx context.Context, uri string) string {
var body struct {
Links []struct {
Href string `json:"href"`
Rel string `json:"rel"`
Type string `json:"type"`
} `json:"links"`
}
getJSON(ctx, uri, &body)
for _, l := range body.Links {
if l.Rel == "self" && l.Type == "application/activity+json" {
return l.Href
}
}
log.Panicf("Could not find ActivityPub URL in web finger response: %q", uri)
return ""
}