diff --git a/.gitignore b/.gitignore
index d23632f..4c3ed36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ toots.db
toots.db-journal
toots.db-wal
__pycache__/*
+public
\ No newline at end of file
diff --git a/Procfile b/Procfile
new file mode 100644
index 0000000..dc056c1
--- /dev/null
+++ b/Procfile
@@ -0,0 +1,3 @@
+generate: python gen.py
+train: python main.py
+web: mkdir public && cd public && python -m http.server $PORT
\ No newline at end of file
diff --git a/app.json b/app.json
index 6f11d0e..68441ef 100644
--- a/app.json
+++ b/app.json
@@ -1,8 +1,13 @@
{
- "name": "mstdn-ebooks",
- "description": "An ebooks bot for Mastodon (and compatible) users",
- "repository": "https://github.com/Lynnesbian/mstdn-ebooks",
- "keywords": ["python", "mastodon"],
- "website":"https://fedi.lynnesbian.space/@lynnesbian",
- "image":"heroku/heroku"
+ "name": "mstdn-ebooks",
+ "description": "An ebooks bot for Mastodon (and compatible) users",
+ "repository": "https://github.com/Lynnesbian/mstdn-ebooks",
+ "keywords": ["python", "mastodon"],
+ "website": "https://fedi.lynnesbian.space/@lynnesbian",
+ "image": "heroku/heroku",
+ "env": {
+ "MASTODON_API_TOKEN": "",
+ "MASTODON_BASE_URL": ""
+ },
+ "addons": ["scheduler"]
}
diff --git a/create.py b/create.py
index caa62a3..050c16a 100755
--- a/create.py
+++ b/create.py
@@ -4,59 +4,67 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import markovify
-import json
-import re, random, multiprocessing, time, sqlite3, shutil, os
+import re
+import random
+import multiprocessing
+import time
+import sqlite3
+import shutil
+import os
+
def make_sentence(output):
- class nlt_fixed(markovify.NewlineText):
- def test_sentence_input(self, sentence):
- return True #all sentences are valid <3
+ class nlt_fixed(markovify.NewlineText):
+ def test_sentence_input(self, sentence):
+ return True # all sentences are valid <3
- # with open("corpus.txt", encoding="utf-8") as fp:
- # model = nlt_fixed(fp.read())
+ # with open("corpus.txt", encoding="utf-8") as fp:
+ # model = nlt_fixed(fp.read())
- shutil.copyfile("toots.db", "toots-copy.db")
- db = sqlite3.connect("toots-copy.db")
- db.text_factory=str
- c = db.cursor()
- toots = c.execute("SELECT content FROM `toots`").fetchall()
- toots_str = ""
- for toot in toots:
- toots_str += "\n{}".format(toot[0])
- model = nlt_fixed(toots_str)
- toots_str = None
- db.close()
- os.remove("toots-copy.db")
+ shutil.copyfile("toots.db", "toots-copy.db")
+ db = sqlite3.connect("toots-copy.db")
+ db.text_factory = str
+ c = db.cursor()
+ toots = c.execute("SELECT content FROM `toots`").fetchall()
+ toots_str = ""
+ for toot in toots:
+ toots_str += "\n{}".format(toot[0])
+ model = nlt_fixed(toots_str)
+ toots_str = None
+ db.close()
+ os.remove("toots-copy.db")
- sentence = None
- tries = 0
- while sentence is None and tries < 10:
- sentence = model.make_short_sentence(500, tries=10000)
- tries = tries + 1
- sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
- output.send(sentence)
+ sentence = None
+ tries = 0
+ while sentence is None and tries < 10:
+ sentence = model.make_short_sentence(500, tries=10000)
+ tries = tries + 1
+ sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
+ output.send(sentence)
-def make_toot(force_markov = False, args = None):
- return make_toot_markov()
-def make_toot_markov(query = None):
- tries = 0
- toot = None
- while toot == None and tries < 25:
- pin, pout = multiprocessing.Pipe(False)
- p = multiprocessing.Process(target = make_sentence, args = [pout])
- p.start()
- p.join(10)
- if p.is_alive():
- p.terminate()
- p.join()
- toot = None
- tries = tries + 1
- else:
- toot = pin.recv()
- if toot == None:
- toot = "Toot generation failed! Contact Lynne for assistance."
- return {
- "toot":toot,
- "media":None
- }
+def make_toot(force_markov=False, args=None):
+ return make_toot_markov()
+
+
+def make_toot_markov(query=None):
+ tries = 0
+ toot = None
+ while toot == None and tries < 25:
+ pin, pout = multiprocessing.Pipe(False)
+ p = multiprocessing.Process(target=make_sentence, args=[pout])
+ p.start()
+ p.join(10)
+ if p.is_alive():
+ p.terminate()
+ p.join()
+ toot = None
+ tries = tries + 1
+ else:
+ toot = pin.recv()
+ if toot == None:
+ toot = "Toot generation failed! Contact Lynne for assistance."
+ return {
+ "toot": toot,
+ "media": None
+ }
diff --git a/gen.py b/gen.py
index a07dcce..8c7e72c 100755
--- a/gen.py
+++ b/gen.py
@@ -4,41 +4,47 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon
-import argparse, sys, traceback, json
+import argparse
+import sys
+import traceback
+import json
import create
+from util import get_config
parser = argparse.ArgumentParser(description='Generate and post a toot.')
-parser.add_argument('reply', metavar='reply', type=str, nargs='?',
- help='ID of the status to reply to')
+parser.add_argument('reply', metavar='reply', type=str, nargs='?',
+ help='ID of the status to reply to')
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
- help="Print the toot to stdout without posting it")
+ help="Print the toot to stdout without posting it")
args = parser.parse_args()
-cfg = json.load(open('config.json'))
+cfg = get_config()
client = Mastodon(
- client_id=cfg['client']['id'],
- client_secret=cfg['client']['secret'],
- access_token=cfg['secret'],
- api_base_url=cfg['site'])
+ access_token=cfg['secret'],
+ api_base_url=cfg['site'])
if __name__ == '__main__':
- toot = create.make_toot()
- if not args.simulate:
- try:
- if toot['media'] != None:
- mediaID = client.media_post(toot['media'], description = toot['toot'])
- client.status_post(toot['toot'].replace("\n", " "),
- media_ids = [mediaID], visibility = "unlisted", spoiler_text = cfg['cw'])
- else:
- client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
- except Exception as err:
- toot = {
- "toot":
- "Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \
- + " wrong! While attempting to post a toot, I received the following" \
- + " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
- }
- client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
- print(toot['toot'])
+ toot = create.make_toot()
+ if not args.simulate:
+ try:
+ if toot['media'] != None:
+ mediaID = client.media_post(
+ toot['media'], description=toot['toot'])
+ client.status_post(toot['toot'].replace("\n", " "),
+ media_ids=[mediaID], visibility="unlisted", spoiler_text=cfg['cw'])
+ else:
+ client.status_post(
+ toot['toot'], visibility='unlisted', spoiler_text=cfg['cw'])
+ except Exception as err:
+ toot = {
+ "toot":
+ "Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly"
+ + " wrong! While attempting to post a toot, I received the following"
+ + " error:\n" +
+ "\n".join(traceback.format_tb(sys.exc_info()[2]))
+ }
+ client.status_post(
+ toot['toot'], visibility='unlisted', spoiler_text="Error!")
+ print(toot['toot'])
diff --git a/main.py b/main.py
index b167a8e..00b14af 100755
--- a/main.py
+++ b/main.py
@@ -6,223 +6,165 @@
from mastodon import Mastodon
from os import path
-from bs4 import BeautifulSoup
-import os, sqlite3, signal, sys, json, re
+import os
+import sqlite3
+import signal
+import sys
+import json
+import re
import requests
+from util import get_config, extract_toot
-scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
-cfg = json.load(open('config.json', 'r'))
-
-if os.path.exists("clientcred.secret"):
- print("Upgrading to new storage method")
- cc = open("clientcred.secret").read().split("\n")
- cfg['client'] = {
- "id": cc[0],
- "secret": cc[1]
- }
- cfg['secret'] = open("usercred.secret").read().rstrip("\n")
- os.remove("clientcred.secret")
- os.remove("usercred.secret")
-
-
-if "client" not in cfg:
- print("No client credentials, registering application")
- client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
- api_base_url=cfg['site'],
- scopes=scopes,
- website="https://github.com/Lynnesbian/mstdn-ebooks")
-
- cfg['client'] = {
- "id": client_id,
- "secret": client_secret
- }
-
-if "secret" not in cfg:
- print("No user credentials, logging in")
- client = Mastodon(client_id = cfg['client']['id'],
- client_secret = cfg['client']['secret'],
- api_base_url=cfg['site'])
-
- print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
- cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
-
-json.dump(cfg, open("config.json", "w+"))
-
-def extract_toot(toot):
- toot = toot.replace("'", "'")
- toot = toot.replace(""", '"')
- soup = BeautifulSoup(toot, "html.parser")
-
- # this is the code that removes all mentions
- # TODO: make it so that it removes the @ and instance but keeps the name
- for mention in soup.select("span.h-card"):
- mention.a.unwrap()
- mention.span.unwrap()
-
- # replace
with linebreak
- for lb in soup.select("br"):
- lb.insert_after("\n")
- lb.decompose()
-
- # replace
with linebreak
- for p in soup.select("p"):
- p.insert_after("\n")
- p.unwrap()
-
- # fix hashtags
- for ht in soup.select("a.hashtag"):
- ht.unwrap()
-
- # fix links
- for link in soup.select("a"):
- link.insert_after(link["href"])
- link.decompose()
-
- toot = soup.get_text()
- toot = toot.rstrip("\n") #remove trailing newline
- toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
- return(toot)
+cfg = get_config()
client = Mastodon(
- client_id=cfg['client']['id'],
- client_secret = cfg['client']['secret'],
- access_token=cfg['secret'],
- api_base_url=cfg['site'])
+ access_token=cfg['secret'],
+ api_base_url=cfg['site'])
me = client.account_verify_credentials()
following = client.account_following(me.id)
db = sqlite3.connect("toots.db")
-db.text_factory=str
+db.text_factory = str
c = db.cursor()
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
db.commit()
+
def handleCtrlC(signal, frame):
- print("\nPREMATURE EVACUATION - Saving chunks")
- db.commit()
- sys.exit(1)
+ print("\nPREMATURE EVACUATION - Saving chunks")
+ db.commit()
+ sys.exit(1)
+
signal.signal(signal.SIGINT, handleCtrlC)
+
def get_toots_legacy(client, id):
- i = 0
- toots = client.account_statuses(id)
- while toots is not None and len(toots) > 0:
- for toot in toots:
- if toot.spoiler_text != "": continue
- if toot.reblog is not None: continue
- if toot.visibility not in ["public", "unlisted"]: continue
- t = extract_toot(toot.content)
- if t != None:
- yield {
- "toot": t,
- "id": toot.id,
- "uri": toot.uri
- }
- toots = client.fetch_next(toots)
- i += 1
- if i%20 == 0:
- print('.', end='', flush=True)
+ i = 0
+ toots = client.account_statuses(id)
+ while toots is not None and len(toots) > 0:
+ for toot in toots:
+ if toot.spoiler_text != "":
+ continue
+ if toot.reblog is not None:
+ continue
+ if toot.visibility not in ["public", "unlisted"]:
+ continue
+ t = extract_toot(toot.content)
+ if t != None:
+ yield {
+ "toot": t,
+ "id": toot.id,
+ "uri": toot.uri
+ }
+ toots = client.fetch_next(toots)
+ i += 1
+ if i % 20 == 0:
+ print('.', end='', flush=True)
+
for f in following:
- last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
- if last_toot != None:
- last_toot = last_toot[0]
- else:
- last_toot = 0
- print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
+ last_toot = c.execute(
+ "SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
+ if last_toot != None:
+ last_toot = last_toot[0]
+ else:
+ last_toot = 0
+ print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
- #find the user's activitypub outbox
- print("WebFingering...")
- instance = re.search(r"^.*@(.+)", f.acct)
- if instance == None:
- instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
- else:
- instance = instance.group(1)
+ # find the user's activitypub outbox
+ print("WebFingering...")
+ instance = re.search(r"^.*@(.+)", f.acct)
+ if instance == None:
+ instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
+ else:
+ instance = instance.group(1)
- if instance == "bofa.lol":
- print("rest in piece bofa, skipping")
- continue
-
- # print("{} is on {}".format(f.acct, instance))
- try:
- r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
- uri = re.search(r'template="([^"]+)"', r.text).group(1)
- uri = uri.format(uri = "{}@{}".format(f.username, instance))
- r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
- j = r.json()
- if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
- uri = j['aliases'][0]
- else:
- uri = j['aliases'][1]
- uri = "{}/outbox?page=true".format(uri)
- r = requests.get(uri, timeout=10)
- j = r.json()
- except Exception:
- print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
- sys.exit(1)
+ if instance == "bofa.lol":
+ print("rest in piece bofa, skipping")
+ continue
- pleroma = False
- if 'first' in j and type(j['first']) != str:
- print("Pleroma instance detected")
- pleroma = True
- j = j['first']
- else:
- print("Mastodon instance detected")
- uri = "{}&min_id={}".format(uri, last_toot)
- r = requests.get(uri)
- j = r.json()
+ # print("{} is on {}".format(f.acct, instance))
+ try:
+ r = requests.get(
+ "https://{}/.well-known/host-meta".format(instance), timeout=10)
+ uri = re.search(r'template="([^"]+)"', r.text).group(1)
+ uri = uri.format(uri="{}@{}".format(f.username, instance))
+ r = requests.get(
+ uri, headers={"Accept": "application/json"}, timeout=10)
+ j = r.json()
+ if len(j['aliases']) == 1: # TODO: this is a hack on top of a hack, fix it
+ uri = j['aliases'][0]
+ else:
+ uri = j['aliases'][1]
+ uri = "{}/outbox?page=true".format(uri)
+ r = requests.get(uri, timeout=10)
+ j = r.json()
+ except Exception:
+ print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
+ sys.exit(1)
- print("Downloading and parsing toots", end='', flush=True)
- done = False
- try:
- while not done and len(j['orderedItems']) > 0:
- for oi in j['orderedItems']:
- if oi['type'] != "Create":
- continue #not a toost. fuck outta here
-
- # its a toost baby
- content = oi['object']['content']
- if oi['object']['summary'] != None:
- #don't download CW'd toots
- continue
- toot = extract_toot(content)
- # print(toot)
- try:
- if pleroma:
- if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
- #we've caught up to the notices we've already downloaded, so we can stop now
- done = True
- break
- pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
- c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
- (pid,
- f.id,
- oi['object']['id'],
- toot
- )
- )
- pass
- except:
- pass #ignore any toots that don't successfully go into the DB
- # sys.exit(0)
- if not pleroma:
- r = requests.get(j['prev'], timeout=15)
- else:
- r = requests.get(j['next'], timeout=15)
- j = r.json()
- print('.', end='', flush=True)
- print(" Done!")
- db.commit()
- except:
- print("Encountered an error! Saving toots to database and continuing.")
- db.commit()
- # db.close()
+ pleroma = False
+ if 'first' in j and type(j['first']) != str:
+ print("Pleroma instance detected")
+ pleroma = True
+ j = j['first']
+ else:
+ print("Mastodon instance detected")
+ uri = "{}&min_id={}".format(uri, last_toot)
+ r = requests.get(uri)
+ j = r.json()
+
+ print("Downloading and parsing toots", end='', flush=True)
+ done = False
+ try:
+ while not done and len(j['orderedItems']) > 0:
+ for oi in j['orderedItems']:
+ if oi['type'] != "Create":
+ continue # not a toost. fuck outta here
+
+ # its a toost baby
+ content = oi['object']['content']
+ if oi['object']['summary'] != None:
+ # don't download CW'd toots
+ continue
+ toot = extract_toot(content)
+ # print(toot)
+ try:
+ if pleroma:
+ if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
+ # we've caught up to the notices we've already downloaded, so we can stop now
+ done = True
+ break
+ pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
+ c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
+ (pid,
+ f.id,
+ oi['object']['id'],
+ toot
+ )
+ )
+ pass
+ except:
+ pass # ignore any toots that don't successfully go into the DB
+ # sys.exit(0)
+ if not pleroma:
+ r = requests.get(j['prev'], timeout=15)
+ else:
+ r = requests.get(j['next'], timeout=15)
+ j = r.json()
+ print('.', end='', flush=True)
+ print(" Done!")
+ db.commit()
+ except:
+ print("Encountered an error! Saving toots to database and continuing.")
+ db.commit()
+ # db.close()
print("Done!")
db.commit()
-db.execute("VACUUM") #compact db
+db.execute("VACUUM") # compact db
db.commit()
-db.close()
\ No newline at end of file
+db.close()
diff --git a/util.py b/util.py
new file mode 100644
index 0000000..8ebcbfc
--- /dev/null
+++ b/util.py
@@ -0,0 +1,97 @@
+"""
+Various utility tools
+"""
+
+import json
+import os
+from mastodon import Mastodon
+from bs4 import BeautifulSoup
+
+
+def get_config():
+ access_token = os.getenv("MASTODON_API_TOKEN")
+ api_base_url = os.getenv("MASTODON_API_BASE_URL")
+
+ if (access_token and api_base_url): # Heroku mode; use preset token
+ return {
+ "secret": access_token,
+ "site": api_base_url,
+ "is_heroku": True
+ }
+ else: # Local mode; do OAuth login dance
+ scopes = ["read:statuses", "read:accounts",
+ "read:follows", "write:statuses", "read:notifications"]
+ cfg = json.load(open('config.json', 'r'))
+
+ if os.path.exists("clientcred.secret"):
+ print("Upgrading to new storage method")
+ cc = open("clientcred.secret").read().split("\n")
+ cfg['client'] = {
+ "id": cc[0],
+ "secret": cc[1]
+ }
+ cfg['secret'] = open("usercred.secret").read().rstrip("\n")
+ os.remove("clientcred.secret")
+ os.remove("usercred.secret")
+
+ if "client" not in cfg:
+ print("No client credentials, registering application")
+ client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
+ api_base_url=cfg['site'],
+ scopes=scopes,
+ website="https://github.com/Lynnesbian/mstdn-ebooks")
+
+ cfg['client'] = {
+ "id": client_id,
+ "secret": client_secret
+ }
+
+ if "secret" not in cfg:
+ print("No user credentials, logging in")
+ client = Mastodon(client_id=cfg['client']['id'],
+ client_secret=cfg['client']['secret'],
+ api_base_url=cfg['site'])
+
+ print("Open this URL: {}".format(
+ client.auth_request_url(scopes=scopes)))
+ cfg['secret'] = client.log_in(
+ code=input("Secret: "), scopes=scopes)
+
+ json.dump(cfg, open("config.json", "w+"))
+
+
+def extract_toot(toot):
+ toot = toot.replace("'", "'")
+ toot = toot.replace(""", '"')
+ soup = BeautifulSoup(toot, "html.parser")
+
+ # this is the code that removes all mentions
+ # TODO: make it so that it removes the @ and instance but keeps the name
+ for mention in soup.select("span.h-card"):
+ mention.a.unwrap()
+ mention.span.unwrap()
+
+ # replace
with linebreak
+ for lb in soup.select("br"):
+ lb.insert_after("\n")
+ lb.decompose()
+
+ # replace
with linebreak + for p in soup.select("p"): + p.insert_after("\n") + p.unwrap() + + # fix hashtags + for ht in soup.select("a.hashtag"): + ht.unwrap() + + # fix links + for link in soup.select("a"): + link.insert_after(link["href"]) + link.decompose() + + toot = soup.get_text() + toot = toot.rstrip("\n") # remove trailing newline + # put a zws between @ and username to avoid mentioning + toot = toot.replace("@", "@\u200B") + return(toot)