Rewrite in Go.
This commit is contained in:
parent
3d059d0b9b
commit
db7b9d6e10
|
@ -0,0 +1,12 @@
|
|||
FROM golang:1.11.1-alpine
|
||||
|
||||
COPY . /mstdn-ebooks/
|
||||
RUN cd /mstdn-ebooks/ \
|
||||
&& apk add --no-cache git \
|
||||
&& CGO_ENABLED=0 go build -o /usr/local/bin/mstdn-ebooks \
|
||||
&& apk del git
|
||||
|
||||
VOLUME /mstdn-ebooks/data
|
||||
WORKDIR /mstdn-ebooks/data
|
||||
|
||||
CMD ["mstdn-ebooks", "-server", "https://botsin.space"]
|
34
README.md
34
README.md
|
@ -1,26 +1,22 @@
|
|||
# mstdn-ebooks
|
||||
|
||||
**Lynnear Edition**
|
||||
|
||||
This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks), such as:
|
||||
This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks) and [the other original](https://github.com/Lynnesbian/mstdn-ebooks/tree/3d059d0b9b66fd31378574104f1a56f2be5a319c), such as:
|
||||
|
||||
- Unicode support
|
||||
- Non-Markov stuff
|
||||
- Stores toots in a sqlite database rather than a text file
|
||||
- Doesn't unecessarily redownload all toots every time
|
||||
|
||||
## Install/usage guide
|
||||
An installation and usage guide is available for unix-based platforms (linux, macOS...) [here](https://cloud.lynnesbian.space/s/Qxxm2sYdMZaqWat).
|
||||
- Doesn't unecessarily redownload all toots every time
|
||||
- Uses an API called "webfinger" to allow downloading toots not known to your bot's instance
|
||||
- Self-contained executable handles scheduling
|
||||
- Docker support
|
||||
- Written in Go
|
||||
|
||||
## Original README
|
||||
hey look it's an ebooks bot
|
||||
## Installation
|
||||
|
||||
python3
|
||||
|
||||
install the requirements with `sudo pip3 install -r requirements`
|
||||
|
||||
make a bot (probably on bots in space) and follow the target accounts
|
||||
|
||||
run `python3 main.py` to login and scrape
|
||||
|
||||
run `python3 gen.py` to make a toot
|
||||
|
||||
cron is an okay choice to make it toot regularly
|
||||
1. Build mstdn-ebooks the same way you would build any Go program (`go get`, etc.) Alternatively, if you don't want to build it yourself, download a [precompiled release version](https://github.com/Lynnesbian/mstdn-ebooks/releases/latest).
|
||||
2. If you haven't already, create an account on [botsin.space](https://botsin.space) or another instance.
|
||||
3. Make sure the bot account is ONLY following you. Remove any default follows.
|
||||
4. Run the `mstdn-ebooks` command. If your instance is not botsin.space, run the command as `mstdn-ebooks -server https://[your instance]`.
|
||||
5. Copy the URL it generates into a browser logged into your bot account, and copy the code that Mastodon generates back to the program.
|
||||
6. Congratulations! Your ebooks bot is now running. To restart it, you only need to redo step 4.
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
|
||||
"github.com/mattn/go-mastodon"
|
||||
)
|
||||
|
||||
func ensureApp(ctx context.Context, cfg *mastodon.Config) {
|
||||
if b, err := ioutil.ReadFile(*flagApp); os.IsNotExist(err) {
|
||||
log.Println("No clientcred.secret, registering application")
|
||||
|
||||
app, err := mastodon.RegisterApp(ctx, &mastodon.AppConfig{
|
||||
Server: *flagServer,
|
||||
ClientName: "lynnesbian_mastodon_ebooks",
|
||||
Website: "https://github.com/Lynnesbian/mstdn-ebooks",
|
||||
Scopes: scopes,
|
||||
RedirectURIs: noRedirect,
|
||||
})
|
||||
checkError(err, "Could not register app")
|
||||
|
||||
cfg.ClientID = app.ClientID
|
||||
cfg.ClientSecret = app.ClientSecret
|
||||
|
||||
checkError(ioutil.WriteFile(*flagApp, []byte(app.ClientID+"\n"+app.ClientSecret+"\n"), 0644), "Could not save app credentials")
|
||||
|
||||
// If the app credentials were just generated, the user access
|
||||
// token cannot possibly be valid.
|
||||
_ = os.Remove(*flagUser)
|
||||
} else {
|
||||
checkError(err, "Could not read app credentials")
|
||||
|
||||
lines := bytes.Split(b, []byte{'\n'})
|
||||
|
||||
// consider final newline to be optional
|
||||
if len(lines) == 3 && len(lines[2]) == 0 {
|
||||
lines = lines[:2]
|
||||
}
|
||||
|
||||
if len(lines) != 2 {
|
||||
log.Fatalf("App credentials (%q) malformed. Cannot proceed.", *flagApp)
|
||||
}
|
||||
|
||||
cfg.ClientID = string(lines[0])
|
||||
cfg.ClientSecret = string(lines[1])
|
||||
}
|
||||
}
|
||||
|
||||
func ensureUser(ctx context.Context, cfg *mastodon.Config) {
|
||||
if b, err := ioutil.ReadFile(*flagUser); os.IsNotExist(err) {
|
||||
log.Println("No usercred.secret, registering application")
|
||||
authURL, err := url.Parse(*flagServer)
|
||||
checkError(err, "Could not parse instance root URL")
|
||||
authURL.Path = "/oauth/authorize"
|
||||
authURL.RawQuery = url.Values{
|
||||
"scope": {scopes},
|
||||
"response_type": {"code"},
|
||||
"redirect_uri": {noRedirect},
|
||||
"client_id": {cfg.ClientID},
|
||||
}.Encode()
|
||||
log.Println("Visit this url:", authURL)
|
||||
fmt.Print("Secret: ")
|
||||
var authCode string
|
||||
_, err = fmt.Scanln(&authCode)
|
||||
checkError(err, "Failed to read authorization code")
|
||||
|
||||
authURL.Path = "/oauth/token"
|
||||
authURL.RawQuery = ""
|
||||
|
||||
resp, err := http.PostForm(authURL.String(), url.Values{
|
||||
"client_id": {cfg.ClientID},
|
||||
"client_secret": {cfg.ClientSecret},
|
||||
"grant_type": {"authorization_code"},
|
||||
"code": {authCode},
|
||||
"redirect_uri": {noRedirect},
|
||||
})
|
||||
checkError(err, "Failed to request access token")
|
||||
|
||||
defer func() {
|
||||
checkError(resp.Body.Close(), "Error closing response body")
|
||||
}()
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
var payload struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
}
|
||||
checkError(json.NewDecoder(resp.Body).Decode(&payload), "Error decoding authentication response")
|
||||
|
||||
cfg.AccessToken = payload.AccessToken
|
||||
|
||||
checkError(ioutil.WriteFile(*flagUser, []byte(payload.AccessToken+"\n"), 0644), "Error saving access token")
|
||||
} else {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
checkError(err, "Network error reading authentication error")
|
||||
|
||||
log.Fatalln("Authentication failed:", string(body))
|
||||
}
|
||||
} else {
|
||||
checkError(err, "Could not read user access token")
|
||||
|
||||
cfg.AccessToken = string(bytes.TrimSuffix(b, []byte{'\n'}))
|
||||
}
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
{"site":"https://botsin.space"}
|
58
create.py
58
create.py
|
@ -1,58 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import markovify
|
||||
import json
|
||||
import re, random, multiprocessing, time, sqlite3, shutil, os
|
||||
|
||||
def make_sentence(output):
|
||||
class nlt_fixed(markovify.NewlineText):
|
||||
def test_sentence_input(self, sentence):
|
||||
return True #all sentences are valid <3
|
||||
|
||||
# with open("corpus.txt", encoding="utf-8") as fp:
|
||||
# model = nlt_fixed(fp.read())
|
||||
|
||||
shutil.copyfile("toots.db", "toots-copy.db")
|
||||
db = sqlite3.connect("toots-copy.db")
|
||||
db.text_factory=str
|
||||
c = db.cursor()
|
||||
toots = c.execute("SELECT content FROM `toots`").fetchall()
|
||||
toots_str = ""
|
||||
for toot in toots:
|
||||
toots_str += "\n{}".format(toot[0])
|
||||
model = nlt_fixed(toots_str)
|
||||
toots_str = None
|
||||
db.close()
|
||||
os.remove("toots-copy.db")
|
||||
|
||||
sentence = None
|
||||
while sentence is None:
|
||||
sentence = model.make_short_sentence(500, tries=100000)
|
||||
sentence = sentence.replace("\0", "\n")
|
||||
output.send(sentence)
|
||||
|
||||
def make_toot(force_markov = False, args = None):
|
||||
return make_toot_markov()
|
||||
|
||||
def make_toot_markov():
|
||||
tries = 0
|
||||
toot = None
|
||||
while toot == None and tries < 10:
|
||||
pin, pout = multiprocessing.Pipe(False)
|
||||
p = multiprocessing.Process(target = make_sentence, args = [pout])
|
||||
p.start()
|
||||
p.join(10)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
toot = None
|
||||
tries = tries + 1
|
||||
else:
|
||||
toot = pin.recv()
|
||||
return {
|
||||
"toot":toot,
|
||||
"media":None
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mattn/go-mastodon"
|
||||
)
|
||||
|
||||
func genToot(ctx context.Context, me *mastodon.Account, replyTo *mastodon.Status) *mastodon.Toot {
|
||||
var replyToID mastodon.ID
|
||||
visibility := "public"
|
||||
var body []byte
|
||||
var seed string
|
||||
|
||||
if replyTo != nil {
|
||||
replyToID = replyTo.ID
|
||||
visibility = replyTo.Visibility
|
||||
seed = cleanContent(replyTo.Content)
|
||||
|
||||
body = append(body, '@')
|
||||
body = append(body, replyTo.Account.Acct...)
|
||||
body = append(body, ' ')
|
||||
|
||||
for _, m := range replyTo.Mentions {
|
||||
if m.ID != me.ID && m.ID != replyTo.Account.ID {
|
||||
body = append(body, '@')
|
||||
body = append(body, m.Acct...)
|
||||
body = append(body, ' ')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
body = append(body, generateMessage(ctx, seed)...)
|
||||
|
||||
return &mastodon.Toot{
|
||||
Status: string(body),
|
||||
Visibility: visibility,
|
||||
InReplyToID: replyToID,
|
||||
}
|
||||
}
|
||||
|
||||
var rngPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
},
|
||||
}
|
||||
|
||||
func generateMessage(ctx context.Context, seed string) string {
|
||||
r := rngPool.Get().(*rand.Rand)
|
||||
defer rngPool.Put(r)
|
||||
|
||||
words := strings.Fields(seed)
|
||||
if len(words) != 0 {
|
||||
for i := 0; i < 10; i++ {
|
||||
if line := genMarkov(r, words[r.Intn(len(words))]); line != nil {
|
||||
return strings.Join(line, " ")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(genMarkov(r, ""), " ")
|
||||
}
|
42
gen.py
42
gen.py
|
@ -1,42 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from mastodon import Mastodon
|
||||
import argparse, sys, traceback, json
|
||||
import create
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate and post a toot.')
|
||||
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
|
||||
help='ID of the status to reply to')
|
||||
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
|
||||
help="Print the toot to stdout without posting it")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
|
||||
client = Mastodon(
|
||||
client_id="clientcred.secret",
|
||||
access_token="usercred.secret",
|
||||
api_base_url=cfg['site'])
|
||||
|
||||
toot = create.make_toot()
|
||||
if not args.simulate:
|
||||
try:
|
||||
if toot['media'] != None:
|
||||
mediaID = client.media_post(toot['media'], description = toot['toot'])
|
||||
client.status_post(toot['toot'].replace("\n", " "),
|
||||
media_ids = [mediaID], visibility = "unlisted")
|
||||
else:
|
||||
client.status_post(toot['toot'], visibility = 'unlisted')
|
||||
except Exception as err:
|
||||
toot = {
|
||||
"toot":
|
||||
"Mistress @lynnesbian@deadinsi.de, something has gone terribly" \
|
||||
+ " wrong! While attempting to post a toot, I received the following" \
|
||||
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
|
||||
}
|
||||
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
|
||||
print(toot['toot'])
|
|
@ -0,0 +1,8 @@
|
|||
module github.com/lynnesbian/mstdn-ebooks
|
||||
|
||||
require (
|
||||
github.com/gorilla/websocket v1.4.0 // indirect
|
||||
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a
|
||||
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect
|
||||
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q=
|
||||
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
|
||||
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a h1:poZfGmljz6MxMXvFcYQvTx7TJQ2J9Gb+B2lgjOIPQnA=
|
||||
github.com/mattn/go-mastodon v0.0.3-0.20180129050910-2ccbcfe14d7a/go.mod h1:/OSOSDJyV0OUlBuDV0Qrllizt3BJNj4Ir5xhckYRVmg=
|
||||
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y=
|
||||
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE=
|
||||
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f h1:4pRM7zYwpBjCnfA1jRmhItLxYJkaEnsmuAcRtA347DA=
|
||||
golang.org/x/net v0.0.0-20181017193950-04a2e542c03f/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
|
@ -0,0 +1,129 @@
|
|||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/mattn/go-mastodon"
|
||||
)
|
||||
|
||||
var flagServer = flag.String("server", "https://botsin.space", "base URL of Mastodon server")
|
||||
var flagApp = flag.String("app", "clientcred.secret", "location of Mastodon app credentials")
|
||||
var flagUser = flag.String("user", "usercred.secret", "location of Mastodon user access token")
|
||||
var flagData = flag.String("data", "ebooks.dat", "location of bot cache")
|
||||
|
||||
const (
|
||||
scopes = "read:statuses read:accounts read:follows write:statuses"
|
||||
noRedirect = "urn:ietf:wg:oauth:2.0:oob"
|
||||
)
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
|
||||
flag.Parse()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
cfg := &mastodon.Config{
|
||||
Server: *flagServer,
|
||||
}
|
||||
|
||||
ensureApp(ctx, cfg)
|
||||
ensureUser(ctx, cfg)
|
||||
|
||||
client := mastodon.NewClient(cfg)
|
||||
|
||||
instance, err := client.GetInstance(ctx)
|
||||
checkError(err, "Could not get instance metadata")
|
||||
me, err := client.GetAccountCurrentUser(ctx)
|
||||
checkError(err, "Could not get current user")
|
||||
|
||||
log.Println("Logged in as", me.Acct+"@"+instance.URI)
|
||||
|
||||
feed, err := client.NewWSClient().StreamingWSUser(ctx)
|
||||
checkError(err, "Could not connect to user feed")
|
||||
|
||||
var following []*mastodon.Account
|
||||
var pg mastodon.Pagination
|
||||
isFollowing := make(map[mastodon.ID]*mastodon.Account)
|
||||
for {
|
||||
fs, err := client.GetAccountFollowing(ctx, me.ID, &pg)
|
||||
checkError(err, "Failed to get followed accounts")
|
||||
|
||||
following = append(following, fs...)
|
||||
for _, f := range fs {
|
||||
isFollowing[f.ID] = f
|
||||
}
|
||||
|
||||
if pg.MaxID == "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
downloadToots(ctx, instance, following)
|
||||
log.Println("Initial history downloaded.")
|
||||
|
||||
go func() {
|
||||
for range markovDirty {
|
||||
saveMarkov()
|
||||
}
|
||||
}()
|
||||
|
||||
// Synchronize to the next half hour interval
|
||||
halfHourSync := time.After(time.Hour/2 - time.Since(time.Now().Truncate(time.Hour/2)))
|
||||
var halfHour <-chan time.Time
|
||||
|
||||
for {
|
||||
select {
|
||||
case event := <-feed:
|
||||
switch e := event.(type) {
|
||||
case *mastodon.ErrorEvent:
|
||||
log.Println("Mastodon error:", e)
|
||||
case *mastodon.DeleteEvent:
|
||||
// Ignore (for now)
|
||||
case *mastodon.NotificationEvent:
|
||||
if e.Notification.Type != "mention" {
|
||||
log.Printf("Ignoring notification of type %q", e.Notification.Type)
|
||||
continue
|
||||
}
|
||||
_, err := client.PostStatus(ctx, genToot(ctx, me, e.Notification.Status))
|
||||
checkError(err, "Error replying to mention %q", e.Notification.Status.URL)
|
||||
case *mastodon.UpdateEvent:
|
||||
if _, ok := isFollowing[e.Status.Account.ID]; !ok {
|
||||
continue
|
||||
}
|
||||
if e.Status.Visibility != "unlisted" && e.Status.Visibility != "public" {
|
||||
continue
|
||||
}
|
||||
if e.Status.Sensitive {
|
||||
continue
|
||||
}
|
||||
insertStatus(ctx, e.Status.Account.ID, e.Status.URI, e.Status.Content)
|
||||
default:
|
||||
log.Printf("Unexpected event type: %T", e)
|
||||
}
|
||||
case <-halfHourSync:
|
||||
halfHourSync = nil
|
||||
halfHour = time.Tick(time.Hour / 2)
|
||||
_, err := client.PostStatus(ctx, genToot(ctx, me, nil))
|
||||
checkError(err, "Error posting status")
|
||||
case <-halfHour:
|
||||
_, err := client.PostStatus(ctx, genToot(ctx, me, nil))
|
||||
checkError(err, "Error posting status")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func checkError(err error, message string, arguments ...interface{}) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
log.Panicf(message+": %v", append(arguments, err)...)
|
||||
}
|
134
main.py
134
main.py
|
@ -1,134 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from mastodon import Mastodon
|
||||
from os import path
|
||||
from bs4 import BeautifulSoup
|
||||
import shutil, os, sqlite3, signal, sys, json
|
||||
# import re
|
||||
|
||||
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"]
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
|
||||
if not path.exists("clientcred.secret"):
|
||||
|
||||
print("No clientcred.secret, registering application")
|
||||
Mastodon.create_app("lynnesbian_mastodon_ebooks", api_base_url=cfg['site'], to_file="clientcred.secret", scopes=scopes, website="https://github.com/Lynnesbian/mastodon-ebooks")
|
||||
|
||||
if not path.exists("usercred.secret"):
|
||||
print("No usercred.secret, registering application")
|
||||
client = Mastodon(client_id="clientcred.secret", api_base_url=cfg['site'])
|
||||
print("Visit this url:")
|
||||
print(client.auth_request_url(scopes=scopes))
|
||||
client.log_in(code=input("Secret: "), to_file="usercred.secret", scopes=scopes)
|
||||
|
||||
def parse_toot(toot):
|
||||
if toot.spoiler_text != "": return
|
||||
if toot.reblog is not None: return
|
||||
if toot.visibility not in ["public", "unlisted"]: return
|
||||
|
||||
soup = BeautifulSoup(toot.content, "html.parser")
|
||||
|
||||
# pull the mentions out
|
||||
# for mention in soup.select("span.h-card"):
|
||||
# mention.unwrap()
|
||||
|
||||
# for mention in soup.select("a.u-url.mention"):
|
||||
# mention.unwrap()
|
||||
|
||||
# this is the code that removes all mentions
|
||||
# TODO: make it so that it removes the @ and instance but keeps the name
|
||||
for mention in soup.select("span.h-card"):
|
||||
mention.decompose()
|
||||
|
||||
# make all linebreaks actual linebreaks
|
||||
for lb in soup.select("br"):
|
||||
lb.insert_after("\n")
|
||||
lb.decompose()
|
||||
|
||||
# make each p element its own line because sometimes they decide not to be
|
||||
for p in soup.select("p"):
|
||||
p.insert_after("\n")
|
||||
p.unwrap()
|
||||
|
||||
# keep hashtags in the toots
|
||||
for ht in soup.select("a.hashtag"):
|
||||
ht.unwrap()
|
||||
|
||||
# unwrap all links (i like the bots posting links)
|
||||
for link in soup.select("a"):
|
||||
link.insert_after(link["href"])
|
||||
link.decompose()
|
||||
|
||||
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
||||
|
||||
# next up: store this and patch markovify to take it
|
||||
# return {"text": text, "mentions": mentions, "links": links}
|
||||
# it's 4am though so we're not doing that now, but i still want the parser updates
|
||||
#todo: we split above and join now, which is dumb, but i don't wanna mess with the map code bc i don't understand it uwu
|
||||
text = "\n".join(list(text))
|
||||
text = text.replace("'", "'")
|
||||
return text
|
||||
|
||||
def get_toots(client, id, since_id):
|
||||
i = 0
|
||||
toots = client.account_statuses(id, since_id = since_id)
|
||||
while toots is not None and len(toots) > 0:
|
||||
for toot in toots:
|
||||
t = parse_toot(toot)
|
||||
if t != None:
|
||||
yield {
|
||||
"content": t,
|
||||
"id": toot.id
|
||||
}
|
||||
try:
|
||||
toots = client.fetch_next(toots)
|
||||
except TimeoutError:
|
||||
print("Operation timed out, committing to database and exiting.")
|
||||
db.commit()
|
||||
db.close()
|
||||
sys.exit(1)
|
||||
i += 1
|
||||
if i%10 == 0:
|
||||
print(i)
|
||||
|
||||
client = Mastodon(
|
||||
client_id="clientcred.secret",
|
||||
access_token="usercred.secret",
|
||||
api_base_url=cfg['site'])
|
||||
|
||||
me = client.account_verify_credentials()
|
||||
following = client.account_following(me.id)
|
||||
|
||||
db = sqlite3.connect("toots.db")
|
||||
db.text_factory=str
|
||||
c = db.cursor()
|
||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
|
||||
db.commit()
|
||||
|
||||
def handleCtrlC(signal, frame):
|
||||
print("\nPREMATURE EVACUATION - Saving chunks")
|
||||
db.commit()
|
||||
sys.exit(1)
|
||||
|
||||
signal.signal(signal.SIGINT, handleCtrlC)
|
||||
|
||||
for f in following:
|
||||
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
|
||||
if last_toot != None:
|
||||
last_toot = last_toot[0]
|
||||
else:
|
||||
last_toot = 0
|
||||
print("Downloading toots for user @{}, starting from {}".format(f.username, last_toot))
|
||||
for t in get_toots(client, f.id, last_toot):
|
||||
# try:
|
||||
c.execute("REPLACE INTO toots (id, userid, content) VALUES (?, ?, ?)", (t['id'], f.id, t['content']))
|
||||
# except:
|
||||
# pass #ignore toots that can't be encoded properly
|
||||
|
||||
db.commit()
|
||||
db.execute("VACUUM") #compact db
|
||||
db.commit()
|
||||
db.close()
|
|
@ -0,0 +1,214 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"log"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
|
||||
mastodon "github.com/mattn/go-mastodon"
|
||||
)
|
||||
|
||||
const markovPrefixLength = 2
|
||||
|
||||
type probableString struct {
|
||||
Order []string
|
||||
Count map[string]int
|
||||
Total int
|
||||
}
|
||||
|
||||
func (ps probableString) insert(s string) probableString {
|
||||
if n, ok := ps.Count[s]; ok {
|
||||
ps.Count[s] = n + 1
|
||||
ps.Total++
|
||||
return ps
|
||||
}
|
||||
|
||||
if ps.Count == nil {
|
||||
ps.Count = make(map[string]int)
|
||||
}
|
||||
|
||||
ps.Count[s] = 1
|
||||
ps.Order = append(ps.Order, s)
|
||||
ps.Total++
|
||||
|
||||
return ps
|
||||
}
|
||||
|
||||
func (ps probableString) rand(r *rand.Rand) string {
|
||||
n := r.Intn(ps.Total)
|
||||
|
||||
for _, s := range ps.Order {
|
||||
n -= ps.Count[s]
|
||||
if n < 0 {
|
||||
return s
|
||||
}
|
||||
}
|
||||
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
var markovDirty = make(chan struct{}, 1)
|
||||
|
||||
func loadData() {
|
||||
markovLock.Lock()
|
||||
defer markovLock.Unlock()
|
||||
|
||||
f, err := os.Open(*flagData)
|
||||
if os.IsNotExist(err) {
|
||||
return
|
||||
}
|
||||
checkError(err, "Could not open data cache")
|
||||
defer func() {
|
||||
checkError(f.Close(), "Could not close data cache")
|
||||
}()
|
||||
|
||||
checkError(gob.NewDecoder(f).Decode(&markov), "Could not read data")
|
||||
}
|
||||
|
||||
func saveMarkov() {
|
||||
markovLock.Lock()
|
||||
defer markovLock.Unlock()
|
||||
|
||||
f, err := os.Create(*flagData + ".tmp")
|
||||
checkError(err, "Could not create data staging file")
|
||||
checkError(gob.NewEncoder(f).Encode(&markov), "Could not save data")
|
||||
checkError(f.Close(), "Could not close data staging file")
|
||||
checkError(os.Rename(*flagData+".tmp", *flagData), "Could not commit data update")
|
||||
}
|
||||
|
||||
var markovLock sync.Mutex
|
||||
var markov = struct {
|
||||
Accounts map[mastodon.ID]accountCache
|
||||
Next map[[markovPrefixLength]string]probableString
|
||||
Prev map[[markovPrefixLength]string]probableString
|
||||
}{
|
||||
Accounts: make(map[mastodon.ID]accountCache),
|
||||
Next: make(map[[markovPrefixLength]string]probableString),
|
||||
Prev: make(map[[markovPrefixLength]string]probableString),
|
||||
}
|
||||
|
||||
func insertStatus(ctx context.Context, account mastodon.ID, id, content string) {
|
||||
content = cleanContent(content)
|
||||
|
||||
paragraphs := strings.Split(content, "\n\n")
|
||||
|
||||
markovLock.Lock()
|
||||
defer markovLock.Unlock()
|
||||
|
||||
cache := markov.Accounts[account]
|
||||
if n, err := strconv.ParseUint(path.Base(id), 10, 64); err == nil && n > cache.LatestRemoteTootID {
|
||||
cache.LatestRemoteTootID = n
|
||||
markov.Accounts[account] = cache
|
||||
}
|
||||
|
||||
for _, p := range paragraphs {
|
||||
updateMarkov(strings.Fields(p))
|
||||
}
|
||||
|
||||
select {
|
||||
case markovDirty <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func updateMarkov(words []string) {
|
||||
const last = markovPrefixLength - 1
|
||||
var prefix [markovPrefixLength]string
|
||||
|
||||
for _, word := range words {
|
||||
markov.Next[prefix] = markov.Next[prefix].insert(word)
|
||||
copy(prefix[:], prefix[1:])
|
||||
prefix[last] = normalizeWord(word)
|
||||
}
|
||||
|
||||
markov.Next[prefix] = markov.Next[prefix].insert("")
|
||||
|
||||
for i := 1; i < len(words); i++ {
|
||||
for l := 1; l <= markovPrefixLength; l++ {
|
||||
var suffix [markovPrefixLength]string
|
||||
copy(suffix[:l], words[i:])
|
||||
for j := range suffix {
|
||||
suffix[j] = normalizeWord(suffix[j])
|
||||
}
|
||||
markov.Prev[suffix] = markov.Prev[suffix].insert(words[i-1])
|
||||
}
|
||||
}
|
||||
for l := 1; l <= markovPrefixLength; l++ {
|
||||
var suffix [markovPrefixLength]string
|
||||
copy(suffix[:l], words)
|
||||
for j := range suffix {
|
||||
suffix[j] = normalizeWord(suffix[j])
|
||||
}
|
||||
markov.Prev[suffix] = markov.Prev[suffix].insert("")
|
||||
}
|
||||
}
|
||||
|
||||
func genMarkov(r *rand.Rand, seed string) []string {
|
||||
const last = markovPrefixLength - 1
|
||||
var prefix [markovPrefixLength]string
|
||||
var line []string
|
||||
|
||||
markovLock.Lock()
|
||||
defer markovLock.Unlock()
|
||||
|
||||
if seed != "" {
|
||||
var suffix [markovPrefixLength]string
|
||||
suffix[0] = normalizeWord(seed)
|
||||
ps, ok := markov.Prev[suffix]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
line = append(line, seed)
|
||||
|
||||
for len(line) < 1000 {
|
||||
s := ps.rand(r)
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
line = append(line, s)
|
||||
copy(suffix[1:], suffix[:])
|
||||
suffix[0] = normalizeWord(s)
|
||||
ps = markov.Prev[suffix]
|
||||
}
|
||||
|
||||
for i, j := 0, len(line)-1; i < j; i, j = i+1, j-1 {
|
||||
line[i], line[j] = line[j], line[i]
|
||||
}
|
||||
|
||||
for i, j := len(line)-1, last; i >= 0 && j >= 0; i, j = i-1, j-1 {
|
||||
prefix[j] = normalizeWord(line[i])
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := markov.Next[prefix]; !ok {
|
||||
log.Panicln("No markov data available for prefix", prefix, line)
|
||||
}
|
||||
|
||||
for len(line) < 1000 {
|
||||
ps := markov.Next[prefix]
|
||||
s := ps.rand(r)
|
||||
|
||||
if s == "" {
|
||||
return line
|
||||
}
|
||||
|
||||
line = append(line, s)
|
||||
copy(prefix[:], prefix[1:])
|
||||
prefix[last] = normalizeWord(s)
|
||||
}
|
||||
|
||||
// probably an infinite loop
|
||||
return line
|
||||
}
|
||||
|
||||
func normalizeWord(s string) string {
|
||||
return strings.Join(strings.FieldsFunc(strings.ToLower(s), unicode.IsPunct), "")
|
||||
}
|
53
reply.py
53
reply.py
|
@ -1,53 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import mastodon
|
||||
import os, random, re, json
|
||||
import create
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
cfg = json.load(open('config.json', 'r'))
|
||||
|
||||
api_base_url = "https://knzk.me"
|
||||
client = mastodon.Mastodon(
|
||||
client_id="clientcred.secret",
|
||||
access_token="usercred.secret",
|
||||
api_base_url=cfg['site'])
|
||||
|
||||
def extract_toot(toot):
|
||||
#copied from main.py, see there for comments
|
||||
soup = BeautifulSoup(toot, "html.parser")
|
||||
for lb in soup.select("br"):
|
||||
lb.insert_after("\n")
|
||||
lb.decompose()
|
||||
for p in soup.select("p"):
|
||||
p.insert_after("\n")
|
||||
p.unwrap()
|
||||
for ht in soup.select("a.hashtag"):
|
||||
ht.unwrap()
|
||||
for link in soup.select("a"):
|
||||
link.insert_after(link["href"])
|
||||
link.decompose()
|
||||
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
||||
text = "\n".join(list(text))
|
||||
text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in
|
||||
text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one
|
||||
text = text.lower() #for easier matching
|
||||
return text
|
||||
|
||||
class ReplyListener(mastodon.StreamListener):
|
||||
def on_notification(self, notification):
|
||||
if notification['type'] == 'mention':
|
||||
acct = "@" + notification['account']['acct']
|
||||
post_id = notification['status']['id']
|
||||
mention = extract_toot(notification['status']['content'])
|
||||
toot = create.make_toot(True)['toot']
|
||||
toot = acct + " " + toot
|
||||
print(acct + " says " + mention)
|
||||
client.status_post(toot, post_id, visibility=notification['status']['visibility'])
|
||||
print("replied with " + toot)
|
||||
|
||||
rl = ReplyListener()
|
||||
client.stream_user(rl)
|
|
@ -1,3 +0,0 @@
|
|||
Mastodon.py==1.3.1
|
||||
markovify==0.7.1
|
||||
beautifulsoup4==4.6.0
|
|
@ -0,0 +1,199 @@
|
|||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/mattn/go-mastodon"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
type accountCache struct {
|
||||
LatestRemoteTootID uint64
|
||||
}
|
||||
|
||||
func downloadToots(ctx context.Context, instance *mastodon.Instance, following []*mastodon.Account) {
|
||||
loadData()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(following))
|
||||
|
||||
markovLock.Lock()
|
||||
for _, f := range following {
|
||||
go func(account *mastodon.Account, start uint64) {
|
||||
defer wg.Done()
|
||||
|
||||
log.Printf("Downloading toots for user %s, starting from %d", account.Acct, start)
|
||||
|
||||
acct := account.Acct
|
||||
if !strings.Contains(acct, "@") {
|
||||
acct += "@" + instance.URI
|
||||
}
|
||||
|
||||
loadAllToots(ctx, acct, account.URL, start, func(id, content string) {
|
||||
insertStatus(ctx, account.ID, id, content)
|
||||
})
|
||||
}(f, markov.Accounts[f.ID].LatestRemoteTootID)
|
||||
}
|
||||
markovLock.Unlock()
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func cleanContent(s string) string {
|
||||
paragraphs, err := html.ParseFragment(strings.NewReader(s), &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: "div",
|
||||
DataAtom: atom.Div,
|
||||
})
|
||||
checkError(err, "Failed to parse HTML %q", s)
|
||||
|
||||
var body []byte
|
||||
var walk func(*html.Node)
|
||||
walk = func(n *html.Node) {
|
||||
for n != nil {
|
||||
if n.Type == html.TextNode {
|
||||
body = append(body, n.Data...)
|
||||
} else if n.Type == html.ElementNode {
|
||||
var isMention bool
|
||||
if n.DataAtom == atom.A {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "class" {
|
||||
for _, c := range strings.Fields(a.Val) {
|
||||
if c == "mention" {
|
||||
isMention = true
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if n.DataAtom == atom.Img {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "alt" {
|
||||
body = append(body, a.Val...)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !isMention {
|
||||
walk(n.FirstChild)
|
||||
}
|
||||
}
|
||||
n = n.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
for i, p := range paragraphs {
|
||||
if i != 0 {
|
||||
body = append(body, "\n\n"...)
|
||||
}
|
||||
walk(p.FirstChild)
|
||||
}
|
||||
|
||||
return string(body)
|
||||
}
|
||||
|
||||
func getJSON(ctx context.Context, uri string, v interface{}) {
|
||||
resp, err := http.Get(uri)
|
||||
checkError(err, "Could not download %q", uri)
|
||||
defer func() {
|
||||
checkError(resp.Body.Close(), "Error when closing %q", uri)
|
||||
}()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
log.Panicf("Error downloading %q: %v", uri, resp.Status)
|
||||
}
|
||||
|
||||
checkError(json.NewDecoder(resp.Body).Decode(v), "Error decoding %q", uri)
|
||||
}
|
||||
|
||||
func loadAllToots(ctx context.Context, acct, userURL string, start uint64, foundStatus func(id, content string)) {
|
||||
webFingerURL := getWebFingerURL(ctx, acct, userURL)
|
||||
outbox := webFingerUserActivity(ctx, webFingerURL) + "/outbox"
|
||||
prev := fmt.Sprintf("%s?min_id=%d&page=true", outbox, start)
|
||||
for prev != "" {
|
||||
var page struct {
|
||||
OrderedItems []struct {
|
||||
Type string `json:"type"`
|
||||
Object json.RawMessage `json:"object"`
|
||||
} `json:"orderedItems"`
|
||||
Prev string `json:"prev"`
|
||||
}
|
||||
getJSON(ctx, prev, &page)
|
||||
for _, i := range page.OrderedItems {
|
||||
if i.Type == "Create" {
|
||||
var object struct {
|
||||
ID string `json:"id"`
|
||||
Sensitive bool `json:"sensitive"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
checkError(json.Unmarshal(i.Object, &object), "Failed to decode toot JSON in %q", prev)
|
||||
if !object.Sensitive {
|
||||
foundStatus(object.ID, object.Content)
|
||||
}
|
||||
}
|
||||
}
|
||||
prev = page.Prev
|
||||
}
|
||||
}
|
||||
|
||||
func getWebFingerURL(ctx context.Context, acct, userURL string) string {
|
||||
acct = url.QueryEscape("acct:" + acct)
|
||||
|
||||
u, err := url.Parse(userURL)
|
||||
checkError(err, "Failed to parse user URL")
|
||||
u.Path = "/.well-known/host-meta"
|
||||
u.RawQuery = ""
|
||||
|
||||
resp, err := http.Get(u.String())
|
||||
checkError(err, "Could not retrieve host-meta")
|
||||
defer func() {
|
||||
checkError(resp.Body.Close(), "Error closing host-meta request")
|
||||
}()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
log.Panicf("Failed to load %q: %s", u, resp.Status)
|
||||
}
|
||||
var meta struct {
|
||||
Link struct {
|
||||
Template string `xml:"template,attr"`
|
||||
} `xml:"Link"`
|
||||
}
|
||||
checkError(xml.NewDecoder(resp.Body).Decode(&meta), "Could not find webfinger URL")
|
||||
|
||||
return strings.Replace(meta.Link.Template, "{uri}", acct, -1)
|
||||
}
|
||||
|
||||
func webFingerUserActivity(ctx context.Context, uri string) string {
|
||||
var body struct {
|
||||
Links []struct {
|
||||
Href string `json:"href"`
|
||||
Rel string `json:"rel"`
|
||||
Type string `json:"type"`
|
||||
} `json:"links"`
|
||||
}
|
||||
|
||||
getJSON(ctx, uri, &body)
|
||||
|
||||
for _, l := range body.Links {
|
||||
if l.Rel == "self" && l.Type == "application/activity+json" {
|
||||
return l.Href
|
||||
}
|
||||
}
|
||||
|
||||
log.Panicf("Could not find ActivityPub URL in web finger response: %q", uri)
|
||||
return ""
|
||||
}
|
Reference in New Issue