Rewrite in Go.

This commit is contained in:
Ben Lubar 2018-10-20 14:30:52 -05:00
parent 3d059d0b9b
commit db7b9d6e10
No known key found for this signature in database
GPG Key ID: 92939677AB59EDA4
15 changed files with 770 additions and 310 deletions

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM golang:1.11.1-alpine
COPY . /mstdn-ebooks/
RUN cd /mstdn-ebooks/ \
&& apk add --no-cache git \
&& CGO_ENABLED=0 go build -o /usr/local/bin/mstdn-ebooks \
&& apk del git
VOLUME /mstdn-ebooks/data
WORKDIR /mstdn-ebooks/data
CMD ["mstdn-ebooks", "-server", "https://botsin.space"]

View File

@ -1,26 +1,22 @@
# mstdn-ebooks # mstdn-ebooks
**Lynnear Edition** **Lynnear Edition**
This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks), such as: This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks) and [the other original](https://github.com/Lynnesbian/mstdn-ebooks/tree/3d059d0b9b66fd31378574104f1a56f2be5a319c), such as:
- Unicode support - Unicode support
- Non-Markov stuff - Non-Markov stuff
- Stores toots in a sqlite database rather than a text file - Doesn't unecessarily redownload all toots every time
- Doesn't unecessarily redownload all toots every time - Uses an API called "webfinger" to allow downloading toots not known to your bot's instance
- Self-contained executable handles scheduling
- Docker support
- Written in Go
## Install/usage guide ## Installation
An installation and usage guide is available for unix-based platforms (linux, macOS...) [here](https://cloud.lynnesbian.space/s/Qxxm2sYdMZaqWat).
## Original README 1. Build mstdn-ebooks the same way you would build any Go program (`go get`, etc.) Alternatively, if you don't want to build it yourself, download a [precompiled release version](https://github.com/Lynnesbian/mstdn-ebooks/releases/latest).
hey look it's an ebooks bot 2. If you haven't already, create an account on [botsin.space](https://botsin.space) or another instance.
3. Make sure the bot account is ONLY following you. Remove any default follows.
python3 4. Run the `mstdn-ebooks` command. If your instance is not botsin.space, run the command as `mstdn-ebooks -server https://[your instance]`.
5. Copy the URL it generates into a browser logged into your bot account, and copy the code that Mastodon generates back to the program.
install the requirements with `sudo pip3 install -r requirements` 6. Congratulations! Your ebooks bot is now running. To restart it, you only need to redo step 4.
make a bot (probably on bots in space) and follow the target accounts
run `python3 main.py` to login and scrape
run `python3 gen.py` to make a toot
cron is an okay choice to make it toot regularly

115
auth.go Normal file
View File

@ -0,0 +1,115 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"github.com/mattn/go-mastodon"
)
func ensureApp(ctx context.Context, cfg *mastodon.Config) {
if b, err := ioutil.ReadFile(*flagApp); os.IsNotExist(err) {
log.Println("No clientcred.secret, registering application")
app, err := mastodon.RegisterApp(ctx, &mastodon.AppConfig{
Server: *flagServer,
ClientName: "lynnesbian_mastodon_ebooks",
Website: "https://github.com/Lynnesbian/mstdn-ebooks",
Scopes: scopes,
RedirectURIs: noRedirect,
})
checkError(err, "Could not register app")
cfg.ClientID = app.ClientID
cfg.ClientSecret = app.ClientSecret
checkError(ioutil.WriteFile(*flagApp, []byte(app.ClientID+"\n"+app.ClientSecret+"\n"), 0644), "Could not save app credentials")
// If the app credentials were just generated, the user access
// token cannot possibly be valid.
_ = os.Remove(*flagUser)
} else {
checkError(err, "Could not read app credentials")
lines := bytes.Split(b, []byte{'\n'})
// consider final newline to be optional
if len(lines) == 3 && len(lines[2]) == 0 {
lines = lines[:2]
}
if len(lines) != 2 {
log.Fatalf("App credentials (%q) malformed. Cannot proceed.", *flagApp)
}
cfg.ClientID = string(lines[0])
cfg.ClientSecret = string(lines[1])
}
}
func ensureUser(ctx context.Context, cfg *mastodon.Config) {
if b, err := ioutil.ReadFile(*flagUser); os.IsNotExist(err) {
log.Println("No usercred.secret, registering application")
authURL, err := url.Parse(*flagServer)
checkError(err, "Could not parse instance root URL")
authURL.Path = "/oauth/authorize"
authURL.RawQuery = url.Values{
"scope": {scopes},
"response_type": {"code"},
"redirect_uri": {noRedirect},
"client_id": {cfg.ClientID},
}.Encode()
log.Println("Visit this url:", authURL)
fmt.Print("Secret: ")
var authCode string
_, err = fmt.Scanln(&authCode)
checkError(err, "Failed to read authorization code")
authURL.Path = "/oauth/token"
authURL.RawQuery = ""
resp, err := http.PostForm(authURL.String(), url.Values{
"client_id": {cfg.ClientID},
"client_secret": {cfg.ClientSecret},
"grant_type": {"authorization_code"},
"code": {authCode},
"redirect_uri": {noRedirect},
})
checkError(err, "Failed to request access token")
defer func() {
checkError(resp.Body.Close(), "Error closing response body")
}()
if resp.StatusCode == http.StatusOK {
var payload struct {
AccessToken string `json:"access_token"`
}
checkError(json.NewDecoder(resp.Body).Decode(&payload), "Error decoding authentication response")
cfg.AccessToken = payload.AccessToken
checkError(ioutil.WriteFile(*flagUser, []byte(payload.AccessToken+"\n"), 0644), "Error saving access token")
} else {
body, err := ioutil.ReadAll(resp.Body)
checkError(err, "Network error reading authentication error")
log.Fatalln("Authentication failed:", string(body))
}
} else {
checkError(err, "Could not read user access token")
cfg.AccessToken = string(bytes.TrimSuffix(b, []byte{'\n'}))
}
}

View File

@ -1 +0,0 @@
{"site":"https://botsin.space"}

View File

@ -1,58 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import markovify
import json
import re, random, multiprocessing, time, sqlite3, shutil, os
def make_sentence(output):
class nlt_fixed(markovify.NewlineText):
def test_sentence_input(self, sentence):
return True #all sentences are valid <3
# with open("corpus.txt", encoding="utf-8") as fp:
# model = nlt_fixed(fp.read())
shutil.copyfile("toots.db", "toots-copy.db")
db = sqlite3.connect("toots-copy.db")
db.text_factory=str
c = db.cursor()
toots = c.execute("SELECT content FROM `toots`").fetchall()
toots_str = ""
for toot in toots:
toots_str += "\n{}".format(toot[0])
model = nlt_fixed(toots_str)
toots_str = None
db.close()
os.remove("toots-copy.db")
sentence = None
while sentence is None:
sentence = model.make_short_sentence(500, tries=100000)
sentence = sentence.replace("\0", "\n")
output.send(sentence)
def make_toot(force_markov = False, args = None):
return make_toot_markov()
def make_toot_markov():
tries = 0
toot = None
while toot == None and tries < 10:
pin, pout = multiprocessing.Pipe(False)
p = multiprocessing.Process(target = make_sentence, args = [pout])
p.start()
p.join(10)
if p.is_alive():
p.terminate()
p.join()
toot = None
tries = tries + 1
else:
toot = pin.recv()
return {
"toot":toot,
"media":None
}

70
gen.go Normal file
View File

@ -0,0 +1,70 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"math/rand"
"strings"
"sync"
"time"
"github.com/mattn/go-mastodon"
)
func genToot(ctx context.Context, me *mastodon.Account, replyTo *mastodon.Status) *mastodon.Toot {
var replyToID mastodon.ID
visibility := "public"
var body []byte
var seed string
if replyTo != nil {
replyToID = replyTo.ID
visibility = replyTo.Visibility
seed = cleanContent(replyTo.Content)
body = append(body, '@')
body = append(body, replyTo.Account.Acct...)
body = append(body, ' ')
for _, m := range replyTo.Mentions {
if m.ID != me.ID && m.ID != replyTo.Account.ID {
body = append(body, '@')
body = append(body, m.Acct...)
body = append(body, ' ')
}
}
}
body = append(body, generateMessage(ctx, seed)...)
return &mastodon.Toot{
Status: string(body),
Visibility: visibility,
InReplyToID: replyToID,
}
}
var rngPool = sync.Pool{
New: func() interface{} {
return rand.New(rand.NewSource(time.Now().UnixNano()))
},
}
func generateMessage(ctx context.Context, seed string) string {
r := rngPool.Get().(*rand.Rand)
defer rngPool.Put(r)
words := strings.Fields(seed)
if len(words) != 0 {
for i := 0; i < 10; i++ {
if line := genMarkov(r, words[r.Intn(len(words))]); line != nil {
return strings.Join(line, " ")
}
}
}
return strings.Join(genMarkov(r, ""), " ")
}

42
gen.py
View File

@ -1,42 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon
import argparse, sys, traceback, json
import create
parser = argparse.ArgumentParser(description='Generate and post a toot.')
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
help='ID of the status to reply to')
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
help="Print the toot to stdout without posting it")
args = parser.parse_args()
cfg = json.load(open('config.json', 'r'))
client = Mastodon(
client_id="clientcred.secret",
access_token="usercred.secret",
api_base_url=cfg['site'])
toot = create.make_toot()
if not args.simulate:
try:
if toot['media'] != None:
mediaID = client.media_post(toot['media'], description = toot['toot'])
client.status_post(toot['toot'].replace("\n", " "),
media_ids = [mediaID], visibility = "unlisted")
else:
client.status_post(toot['toot'], visibility = 'unlisted')
except Exception as err:
toot = {
"toot":
"Mistress @lynnesbian@deadinsi.de, something has gone terribly" \
+ " wrong! While attempting to post a toot, I received the following" \
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
}
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
print(toot['toot'])

8
go.mod Normal file
View File

@ -0,0 +1,8 @@
module github.com/lynnesbian/mstdn-ebooks
require (
github.com/gorilla/websocket v1.4.0 // indirect
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f
)

8
go.sum Normal file
View File

@ -0,0 +1,8 @@
github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a h1:poZfGmljz6MxMXvFcYQvTx7TJQ2J9Gb+B2lgjOIPQnA=
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a/go.mod h1:/OSOSDJyV0OUlBuDV0Qrllizt3BJNj4Ir5xhckYRVmg=
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y=
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE=
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f h1:4pRM7zYwpBjCnfA1jRmhItLxYJkaEnsmuAcRtA347DA=
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=

129
main.go Normal file
View File

@ -0,0 +1,129 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"flag"
"log"
"time"
"github.com/mattn/go-mastodon"
)
var flagServer = flag.String("server", "https://botsin.space", "base URL of Mastodon server")
var flagApp = flag.String("app", "clientcred.secret", "location of Mastodon app credentials")
var flagUser = flag.String("user", "usercred.secret", "location of Mastodon user access token")
var flagData = flag.String("data", "ebooks.dat", "location of bot cache")
const (
scopes = "read:statuses read:accounts read:follows write:statuses"
noRedirect = "urn:ietf:wg:oauth:2.0:oob"
)
func main() {
log.SetFlags(0)
flag.Parse()
ctx := context.Background()
cfg := &mastodon.Config{
Server: *flagServer,
}
ensureApp(ctx, cfg)
ensureUser(ctx, cfg)
client := mastodon.NewClient(cfg)
instance, err := client.GetInstance(ctx)
checkError(err, "Could not get instance metadata")
me, err := client.GetAccountCurrentUser(ctx)
checkError(err, "Could not get current user")
log.Println("Logged in as", me.Acct+"@"+instance.URI)
feed, err := client.NewWSClient().StreamingWSUser(ctx)
checkError(err, "Could not connect to user feed")
var following []*mastodon.Account
var pg mastodon.Pagination
isFollowing := make(map[mastodon.ID]*mastodon.Account)
for {
fs, err := client.GetAccountFollowing(ctx, me.ID, &pg)
checkError(err, "Failed to get followed accounts")
following = append(following, fs...)
for _, f := range fs {
isFollowing[f.ID] = f
}
if pg.MaxID == "" {
break
}
}
downloadToots(ctx, instance, following)
log.Println("Initial history downloaded.")
go func() {
for range markovDirty {
saveMarkov()
}
}()
// Synchronize to the next half hour interval
halfHourSync := time.After(time.Hour/2 - time.Since(time.Now().Truncate(time.Hour/2)))
var halfHour <-chan time.Time
for {
select {
case event := <-feed:
switch e := event.(type) {
case *mastodon.ErrorEvent:
log.Println("Mastodon error:", e)
case *mastodon.DeleteEvent:
// Ignore (for now)
case *mastodon.NotificationEvent:
if e.Notification.Type != "mention" {
log.Printf("Ignoring notification of type %q", e.Notification.Type)
continue
}
_, err := client.PostStatus(ctx, genToot(ctx, me, e.Notification.Status))
checkError(err, "Error replying to mention %q", e.Notification.Status.URL)
case *mastodon.UpdateEvent:
if _, ok := isFollowing[e.Status.Account.ID]; !ok {
continue
}
if e.Status.Visibility != "unlisted" && e.Status.Visibility != "public" {
continue
}
if e.Status.Sensitive {
continue
}
insertStatus(ctx, e.Status.Account.ID, e.Status.URI, e.Status.Content)
default:
log.Printf("Unexpected event type: %T", e)
}
case <-halfHourSync:
halfHourSync = nil
halfHour = time.Tick(time.Hour / 2)
_, err := client.PostStatus(ctx, genToot(ctx, me, nil))
checkError(err, "Error posting status")
case <-halfHour:
_, err := client.PostStatus(ctx, genToot(ctx, me, nil))
checkError(err, "Error posting status")
}
}
}
func checkError(err error, message string, arguments ...interface{}) {
if err == nil {
return
}
log.Panicf(message+": %v", append(arguments, err)...)
}

134
main.py
View File

@ -1,134 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon
from os import path
from bs4 import BeautifulSoup
import shutil, os, sqlite3, signal, sys, json
# import re
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"]
cfg = json.load(open('config.json', 'r'))
if not path.exists("clientcred.secret"):
print("No clientcred.secret, registering application")
Mastodon.create_app("lynnesbian_mastodon_ebooks", api_base_url=cfg['site'], to_file="clientcred.secret", scopes=scopes, website="https://github.com/Lynnesbian/mastodon-ebooks")
if not path.exists("usercred.secret"):
print("No usercred.secret, registering application")
client = Mastodon(client_id="clientcred.secret", api_base_url=cfg['site'])
print("Visit this url:")
print(client.auth_request_url(scopes=scopes))
client.log_in(code=input("Secret: "), to_file="usercred.secret", scopes=scopes)
def parse_toot(toot):
if toot.spoiler_text != "": return
if toot.reblog is not None: return
if toot.visibility not in ["public", "unlisted"]: return
soup = BeautifulSoup(toot.content, "html.parser")
# pull the mentions out
# for mention in soup.select("span.h-card"):
# mention.unwrap()
# for mention in soup.select("a.u-url.mention"):
# mention.unwrap()
# this is the code that removes all mentions
# TODO: make it so that it removes the @ and instance but keeps the name
for mention in soup.select("span.h-card"):
mention.decompose()
# make all linebreaks actual linebreaks
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# make each p element its own line because sometimes they decide not to be
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# keep hashtags in the toots
for ht in soup.select("a.hashtag"):
ht.unwrap()
# unwrap all links (i like the bots posting links)
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
# next up: store this and patch markovify to take it
# return {"text": text, "mentions": mentions, "links": links}
# it's 4am though so we're not doing that now, but i still want the parser updates
#todo: we split above and join now, which is dumb, but i don't wanna mess with the map code bc i don't understand it uwu
text = "\n".join(list(text))
text = text.replace("&apos;", "'")
return text
def get_toots(client, id, since_id):
i = 0
toots = client.account_statuses(id, since_id = since_id)
while toots is not None and len(toots) > 0:
for toot in toots:
t = parse_toot(toot)
if t != None:
yield {
"content": t,
"id": toot.id
}
try:
toots = client.fetch_next(toots)
except TimeoutError:
print("Operation timed out, committing to database and exiting.")
db.commit()
db.close()
sys.exit(1)
i += 1
if i%10 == 0:
print(i)
client = Mastodon(
client_id="clientcred.secret",
access_token="usercred.secret",
api_base_url=cfg['site'])
me = client.account_verify_credentials()
following = client.account_following(me.id)
db = sqlite3.connect("toots.db")
db.text_factory=str
c = db.cursor()
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
db.commit()
def handleCtrlC(signal, frame):
print("\nPREMATURE EVACUATION - Saving chunks")
db.commit()
sys.exit(1)
signal.signal(signal.SIGINT, handleCtrlC)
for f in following:
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
if last_toot != None:
last_toot = last_toot[0]
else:
last_toot = 0
print("Downloading toots for user @{}, starting from {}".format(f.username, last_toot))
for t in get_toots(client, f.id, last_toot):
# try:
c.execute("REPLACE INTO toots (id, userid, content) VALUES (?, ?, ?)", (t['id'], f.id, t['content']))
# except:
# pass #ignore toots that can't be encoded properly
db.commit()
db.execute("VACUUM") #compact db
db.commit()
db.close()

214
markov.go Normal file
View File

@ -0,0 +1,214 @@
package main
import (
"context"
"encoding/gob"
"log"
"math/rand"
"os"
"path"
"strconv"
"strings"
"sync"
"unicode"
mastodon "github.com/mattn/go-mastodon"
)
const markovPrefixLength = 2
type probableString struct {
Order []string
Count map[string]int
Total int
}
func (ps probableString) insert(s string) probableString {
if n, ok := ps.Count[s]; ok {
ps.Count[s] = n + 1
ps.Total++
return ps
}
if ps.Count == nil {
ps.Count = make(map[string]int)
}
ps.Count[s] = 1
ps.Order = append(ps.Order, s)
ps.Total++
return ps
}
func (ps probableString) rand(r *rand.Rand) string {
n := r.Intn(ps.Total)
for _, s := range ps.Order {
n -= ps.Count[s]
if n < 0 {
return s
}
}
panic("unreachable")
}
var markovDirty = make(chan struct{}, 1)
func loadData() {
markovLock.Lock()
defer markovLock.Unlock()
f, err := os.Open(*flagData)
if os.IsNotExist(err) {
return
}
checkError(err, "Could not open data cache")
defer func() {
checkError(f.Close(), "Could not close data cache")
}()
checkError(gob.NewDecoder(f).Decode(&markov), "Could not read data")
}
func saveMarkov() {
markovLock.Lock()
defer markovLock.Unlock()
f, err := os.Create(*flagData + ".tmp")
checkError(err, "Could not create data staging file")
checkError(gob.NewEncoder(f).Encode(&markov), "Could not save data")
checkError(f.Close(), "Could not close data staging file")
checkError(os.Rename(*flagData+".tmp", *flagData), "Could not commit data update")
}
var markovLock sync.Mutex
var markov = struct {
Accounts map[mastodon.ID]accountCache
Next map[[markovPrefixLength]string]probableString
Prev map[[markovPrefixLength]string]probableString
}{
Accounts: make(map[mastodon.ID]accountCache),
Next: make(map[[markovPrefixLength]string]probableString),
Prev: make(map[[markovPrefixLength]string]probableString),
}
func insertStatus(ctx context.Context, account mastodon.ID, id, content string) {
content = cleanContent(content)
paragraphs := strings.Split(content, "\n\n")
markovLock.Lock()
defer markovLock.Unlock()
cache := markov.Accounts[account]
if n, err := strconv.ParseUint(path.Base(id), 10, 64); err == nil && n > cache.LatestRemoteTootID {
cache.LatestRemoteTootID = n
markov.Accounts[account] = cache
}
for _, p := range paragraphs {
updateMarkov(strings.Fields(p))
}
select {
case markovDirty <- struct{}{}:
default:
}
}
func updateMarkov(words []string) {
const last = markovPrefixLength - 1
var prefix [markovPrefixLength]string
for _, word := range words {
markov.Next[prefix] = markov.Next[prefix].insert(word)
copy(prefix[:], prefix[1:])
prefix[last] = normalizeWord(word)
}
markov.Next[prefix] = markov.Next[prefix].insert("")
for i := 1; i < len(words); i++ {
for l := 1; l <= markovPrefixLength; l++ {
var suffix [markovPrefixLength]string
copy(suffix[:l], words[i:])
for j := range suffix {
suffix[j] = normalizeWord(suffix[j])
}
markov.Prev[suffix] = markov.Prev[suffix].insert(words[i-1])
}
}
for l := 1; l <= markovPrefixLength; l++ {
var suffix [markovPrefixLength]string
copy(suffix[:l], words)
for j := range suffix {
suffix[j] = normalizeWord(suffix[j])
}
markov.Prev[suffix] = markov.Prev[suffix].insert("")
}
}
func genMarkov(r *rand.Rand, seed string) []string {
const last = markovPrefixLength - 1
var prefix [markovPrefixLength]string
var line []string
markovLock.Lock()
defer markovLock.Unlock()
if seed != "" {
var suffix [markovPrefixLength]string
suffix[0] = normalizeWord(seed)
ps, ok := markov.Prev[suffix]
if !ok {
return nil
}
line = append(line, seed)
for len(line) < 1000 {
s := ps.rand(r)
if s == "" {
break
}
line = append(line, s)
copy(suffix[1:], suffix[:])
suffix[0] = normalizeWord(s)
ps = markov.Prev[suffix]
}
for i, j := 0, len(line)-1; i < j; i, j = i+1, j-1 {
line[i], line[j] = line[j], line[i]
}
for i, j := len(line)-1, last; i >= 0 && j >= 0; i, j = i-1, j-1 {
prefix[j] = normalizeWord(line[i])
}
}
if _, ok := markov.Next[prefix]; !ok {
log.Panicln("No markov data available for prefix", prefix, line)
}
for len(line) < 1000 {
ps := markov.Next[prefix]
s := ps.rand(r)
if s == "" {
return line
}
line = append(line, s)
copy(prefix[:], prefix[1:])
prefix[last] = normalizeWord(s)
}
// probably an infinite loop
return line
}
func normalizeWord(s string) string {
return strings.Join(strings.FieldsFunc(strings.ToLower(s), unicode.IsPunct), "")
}

View File

@ -1,53 +0,0 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import mastodon
import os, random, re, json
import create
from bs4 import BeautifulSoup
cfg = json.load(open('config.json', 'r'))
api_base_url = "https://knzk.me"
client = mastodon.Mastodon(
client_id="clientcred.secret",
access_token="usercred.secret",
api_base_url=cfg['site'])
def extract_toot(toot):
#copied from main.py, see there for comments
soup = BeautifulSoup(toot, "html.parser")
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
for ht in soup.select("a.hashtag"):
ht.unwrap()
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
text = "\n".join(list(text))
text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in
text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one
text = text.lower() #for easier matching
return text
class ReplyListener(mastodon.StreamListener):
def on_notification(self, notification):
if notification['type'] == 'mention':
acct = "@" + notification['account']['acct']
post_id = notification['status']['id']
mention = extract_toot(notification['status']['content'])
toot = create.make_toot(True)['toot']
toot = acct + " " + toot
print(acct + " says " + mention)
client.status_post(toot, post_id, visibility=notification['status']['visibility'])
print("replied with " + toot)
rl = ReplyListener()
client.stream_user(rl)

View File

@ -1,3 +0,0 @@
Mastodon.py==1.3.1
markovify==0.7.1
beautifulsoup4==4.6.0

199
status.go Normal file
View File

@ -0,0 +1,199 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"encoding/json"
"encoding/xml"
"fmt"
"log"
"net/http"
"net/url"
"strings"
"sync"
"github.com/mattn/go-mastodon"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
type accountCache struct {
LatestRemoteTootID uint64
}
func downloadToots(ctx context.Context, instance *mastodon.Instance, following []*mastodon.Account) {
loadData()
var wg sync.WaitGroup
wg.Add(len(following))
markovLock.Lock()
for _, f := range following {
go func(account *mastodon.Account, start uint64) {
defer wg.Done()
log.Printf("Downloading toots for user %s, starting from %d", account.Acct, start)
acct := account.Acct
if !strings.Contains(acct, "@") {
acct += "@" + instance.URI
}
loadAllToots(ctx, acct, account.URL, start, func(id, content string) {
insertStatus(ctx, account.ID, id, content)
})
}(f, markov.Accounts[f.ID].LatestRemoteTootID)
}
markovLock.Unlock()
wg.Wait()
}
func cleanContent(s string) string {
paragraphs, err := html.ParseFragment(strings.NewReader(s), &html.Node{
Type: html.ElementNode,
Data: "div",
DataAtom: atom.Div,
})
checkError(err, "Failed to parse HTML %q", s)
var body []byte
var walk func(*html.Node)
walk = func(n *html.Node) {
for n != nil {
if n.Type == html.TextNode {
body = append(body, n.Data...)
} else if n.Type == html.ElementNode {
var isMention bool
if n.DataAtom == atom.A {
for _, a := range n.Attr {
if a.Key == "class" {
for _, c := range strings.Fields(a.Val) {
if c == "mention" {
isMention = true
break
}
}
break
}
}
} else if n.DataAtom == atom.Img {
for _, a := range n.Attr {
if a.Key == "alt" {
body = append(body, a.Val...)
break
}
}
}
if !isMention {
walk(n.FirstChild)
}
}
n = n.NextSibling
}
}
for i, p := range paragraphs {
if i != 0 {
body = append(body, "\n\n"...)
}
walk(p.FirstChild)
}
return string(body)
}
func getJSON(ctx context.Context, uri string, v interface{}) {
resp, err := http.Get(uri)
checkError(err, "Could not download %q", uri)
defer func() {
checkError(resp.Body.Close(), "Error when closing %q", uri)
}()
if resp.StatusCode != http.StatusOK {
log.Panicf("Error downloading %q: %v", uri, resp.Status)
}
checkError(json.NewDecoder(resp.Body).Decode(v), "Error decoding %q", uri)
}
func loadAllToots(ctx context.Context, acct, userURL string, start uint64, foundStatus func(id, content string)) {
webFingerURL := getWebFingerURL(ctx, acct, userURL)
outbox := webFingerUserActivity(ctx, webFingerURL) + "/outbox"
prev := fmt.Sprintf("%s?min_id=%d&page=true", outbox, start)
for prev != "" {
var page struct {
OrderedItems []struct {
Type string `json:"type"`
Object json.RawMessage `json:"object"`
} `json:"orderedItems"`
Prev string `json:"prev"`
}
getJSON(ctx, prev, &page)
for _, i := range page.OrderedItems {
if i.Type == "Create" {
var object struct {
ID string `json:"id"`
Sensitive bool `json:"sensitive"`
Content string `json:"content"`
}
checkError(json.Unmarshal(i.Object, &object), "Failed to decode toot JSON in %q", prev)
if !object.Sensitive {
foundStatus(object.ID, object.Content)
}
}
}
prev = page.Prev
}
}
func getWebFingerURL(ctx context.Context, acct, userURL string) string {
acct = url.QueryEscape("acct:" + acct)
u, err := url.Parse(userURL)
checkError(err, "Failed to parse user URL")
u.Path = "/.well-known/host-meta"
u.RawQuery = ""
resp, err := http.Get(u.String())
checkError(err, "Could not retrieve host-meta")
defer func() {
checkError(resp.Body.Close(), "Error closing host-meta request")
}()
if resp.StatusCode != http.StatusOK {
log.Panicf("Failed to load %q: %s", u, resp.Status)
}
var meta struct {
Link struct {
Template string `xml:"template,attr"`
} `xml:"Link"`
}
checkError(xml.NewDecoder(resp.Body).Decode(&meta), "Could not find webfinger URL")
return strings.Replace(meta.Link.Template, "{uri}", acct, -1)
}
func webFingerUserActivity(ctx context.Context, uri string) string {
var body struct {
Links []struct {
Href string `json:"href"`
Rel string `json:"rel"`
Type string `json:"type"`
} `json:"links"`
}
getJSON(ctx, uri, &body)
for _, l := range body.Links {
if l.Rel == "self" && l.Type == "application/activity+json" {
return l.Href
}
}
log.Panicf("Could not find ActivityPub URL in web finger response: %q", uri)
return ""
}