First pass at attempting to pull login values from env vars

This commit is contained in:
Andi N. Fiziks 2018-12-05 04:15:19 +00:00
parent 5d7fe7a80c
commit 767911757b
7 changed files with 335 additions and 273 deletions

1
.gitignore vendored
View File

@ -7,3 +7,4 @@ toots.db
toots.db-journal toots.db-journal
toots.db-wal toots.db-wal
__pycache__/* __pycache__/*
public

3
Procfile Normal file
View File

@ -0,0 +1,3 @@
generate: python gen.py
train: python main.py
web: mkdir public && cd public && python -m http.server $PORT

View File

@ -1,8 +1,13 @@
{ {
"name": "mstdn-ebooks", "name": "mstdn-ebooks",
"description": "An ebooks bot for Mastodon (and compatible) users", "description": "An ebooks bot for Mastodon (and compatible) users",
"repository": "https://github.com/Lynnesbian/mstdn-ebooks", "repository": "https://github.com/Lynnesbian/mstdn-ebooks",
"keywords": ["python", "mastodon"], "keywords": ["python", "mastodon"],
"website":"https://fedi.lynnesbian.space/@lynnesbian", "website": "https://fedi.lynnesbian.space/@lynnesbian",
"image":"heroku/heroku" "image": "heroku/heroku",
"env": {
"MASTODON_API_TOKEN": "",
"MASTODON_BASE_URL": ""
},
"addons": ["scheduler"]
} }

106
create.py
View File

@ -4,59 +4,67 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
import markovify import markovify
import json import re
import re, random, multiprocessing, time, sqlite3, shutil, os import random
import multiprocessing
import time
import sqlite3
import shutil
import os
def make_sentence(output): def make_sentence(output):
class nlt_fixed(markovify.NewlineText): class nlt_fixed(markovify.NewlineText):
def test_sentence_input(self, sentence): def test_sentence_input(self, sentence):
return True #all sentences are valid <3 return True # all sentences are valid <3
# with open("corpus.txt", encoding="utf-8") as fp: # with open("corpus.txt", encoding="utf-8") as fp:
# model = nlt_fixed(fp.read()) # model = nlt_fixed(fp.read())
shutil.copyfile("toots.db", "toots-copy.db") shutil.copyfile("toots.db", "toots-copy.db")
db = sqlite3.connect("toots-copy.db") db = sqlite3.connect("toots-copy.db")
db.text_factory=str db.text_factory = str
c = db.cursor() c = db.cursor()
toots = c.execute("SELECT content FROM `toots`").fetchall() toots = c.execute("SELECT content FROM `toots`").fetchall()
toots_str = "" toots_str = ""
for toot in toots: for toot in toots:
toots_str += "\n{}".format(toot[0]) toots_str += "\n{}".format(toot[0])
model = nlt_fixed(toots_str) model = nlt_fixed(toots_str)
toots_str = None toots_str = None
db.close() db.close()
os.remove("toots-copy.db") os.remove("toots-copy.db")
sentence = None sentence = None
tries = 0 tries = 0
while sentence is None and tries < 10: while sentence is None and tries < 10:
sentence = model.make_short_sentence(500, tries=10000) sentence = model.make_short_sentence(500, tries=10000)
tries = tries + 1 tries = tries + 1
sentence = re.sub("^@\u202B[^ ]* ", "", sentence) sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
output.send(sentence) output.send(sentence)
def make_toot(force_markov = False, args = None):
return make_toot_markov()
def make_toot_markov(query = None): def make_toot(force_markov=False, args=None):
tries = 0 return make_toot_markov()
toot = None
while toot == None and tries < 25:
pin, pout = multiprocessing.Pipe(False) def make_toot_markov(query=None):
p = multiprocessing.Process(target = make_sentence, args = [pout]) tries = 0
p.start() toot = None
p.join(10) while toot == None and tries < 25:
if p.is_alive(): pin, pout = multiprocessing.Pipe(False)
p.terminate() p = multiprocessing.Process(target=make_sentence, args=[pout])
p.join() p.start()
toot = None p.join(10)
tries = tries + 1 if p.is_alive():
else: p.terminate()
toot = pin.recv() p.join()
if toot == None: toot = None
toot = "Toot generation failed! Contact Lynne for assistance." tries = tries + 1
return { else:
"toot":toot, toot = pin.recv()
"media":None if toot == None:
} toot = "Toot generation failed! Contact Lynne for assistance."
return {
"toot": toot,
"media": None
}

60
gen.py
View File

@ -4,41 +4,47 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon from mastodon import Mastodon
import argparse, sys, traceback, json import argparse
import sys
import traceback
import json
import create import create
from util import get_config
parser = argparse.ArgumentParser(description='Generate and post a toot.') parser = argparse.ArgumentParser(description='Generate and post a toot.')
parser.add_argument('reply', metavar='reply', type=str, nargs='?', parser.add_argument('reply', metavar='reply', type=str, nargs='?',
help='ID of the status to reply to') help='ID of the status to reply to')
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true', parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
help="Print the toot to stdout without posting it") help="Print the toot to stdout without posting it")
args = parser.parse_args() args = parser.parse_args()
cfg = json.load(open('config.json')) cfg = get_config()
client = Mastodon( client = Mastodon(
client_id=cfg['client']['id'], access_token=cfg['secret'],
client_secret=cfg['client']['secret'], api_base_url=cfg['site'])
access_token=cfg['secret'],
api_base_url=cfg['site'])
if __name__ == '__main__': if __name__ == '__main__':
toot = create.make_toot() toot = create.make_toot()
if not args.simulate: if not args.simulate:
try: try:
if toot['media'] != None: if toot['media'] != None:
mediaID = client.media_post(toot['media'], description = toot['toot']) mediaID = client.media_post(
client.status_post(toot['toot'].replace("\n", " "), toot['media'], description=toot['toot'])
media_ids = [mediaID], visibility = "unlisted", spoiler_text = cfg['cw']) client.status_post(toot['toot'].replace("\n", " "),
else: media_ids=[mediaID], visibility="unlisted", spoiler_text=cfg['cw'])
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw']) else:
except Exception as err: client.status_post(
toot = { toot['toot'], visibility='unlisted', spoiler_text=cfg['cw'])
"toot": except Exception as err:
"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \ toot = {
+ " wrong! While attempting to post a toot, I received the following" \ "toot":
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2])) "Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly"
} + " wrong! While attempting to post a toot, I received the following"
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") + " error:\n" +
print(toot['toot']) "\n".join(traceback.format_tb(sys.exc_info()[2]))
}
client.status_post(
toot['toot'], visibility='unlisted', spoiler_text="Error!")
print(toot['toot'])

324
main.py
View File

@ -6,223 +6,165 @@
from mastodon import Mastodon from mastodon import Mastodon
from os import path from os import path
from bs4 import BeautifulSoup import os
import os, sqlite3, signal, sys, json, re import sqlite3
import signal
import sys
import json
import re
import requests import requests
from util import get_config, extract_toot
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"] cfg = get_config()
cfg = json.load(open('config.json', 'r'))
if os.path.exists("clientcred.secret"):
print("Upgrading to new storage method")
cc = open("clientcred.secret").read().split("\n")
cfg['client'] = {
"id": cc[0],
"secret": cc[1]
}
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
os.remove("clientcred.secret")
os.remove("usercred.secret")
if "client" not in cfg:
print("No client credentials, registering application")
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
api_base_url=cfg['site'],
scopes=scopes,
website="https://github.com/Lynnesbian/mstdn-ebooks")
cfg['client'] = {
"id": client_id,
"secret": client_secret
}
if "secret" not in cfg:
print("No user credentials, logging in")
client = Mastodon(client_id = cfg['client']['id'],
client_secret = cfg['client']['secret'],
api_base_url=cfg['site'])
print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
json.dump(cfg, open("config.json", "w+"))
def extract_toot(toot):
toot = toot.replace("&apos;", "'")
toot = toot.replace("&quot;", '"')
soup = BeautifulSoup(toot, "html.parser")
# this is the code that removes all mentions
# TODO: make it so that it removes the @ and instance but keeps the name
for mention in soup.select("span.h-card"):
mention.a.unwrap()
mention.span.unwrap()
# replace <br> with linebreak
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# replace <p> with linebreak
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# fix hashtags
for ht in soup.select("a.hashtag"):
ht.unwrap()
# fix links
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
toot = soup.get_text()
toot = toot.rstrip("\n") #remove trailing newline
toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
return(toot)
client = Mastodon( client = Mastodon(
client_id=cfg['client']['id'], access_token=cfg['secret'],
client_secret = cfg['client']['secret'], api_base_url=cfg['site'])
access_token=cfg['secret'],
api_base_url=cfg['site'])
me = client.account_verify_credentials() me = client.account_verify_credentials()
following = client.account_following(me.id) following = client.account_following(me.id)
db = sqlite3.connect("toots.db") db = sqlite3.connect("toots.db")
db.text_factory=str db.text_factory = str
c = db.cursor() c = db.cursor()
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID") c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
db.commit() db.commit()
def handleCtrlC(signal, frame): def handleCtrlC(signal, frame):
print("\nPREMATURE EVACUATION - Saving chunks") print("\nPREMATURE EVACUATION - Saving chunks")
db.commit() db.commit()
sys.exit(1) sys.exit(1)
signal.signal(signal.SIGINT, handleCtrlC) signal.signal(signal.SIGINT, handleCtrlC)
def get_toots_legacy(client, id): def get_toots_legacy(client, id):
i = 0 i = 0
toots = client.account_statuses(id) toots = client.account_statuses(id)
while toots is not None and len(toots) > 0: while toots is not None and len(toots) > 0:
for toot in toots: for toot in toots:
if toot.spoiler_text != "": continue if toot.spoiler_text != "":
if toot.reblog is not None: continue continue
if toot.visibility not in ["public", "unlisted"]: continue if toot.reblog is not None:
t = extract_toot(toot.content) continue
if t != None: if toot.visibility not in ["public", "unlisted"]:
yield { continue
"toot": t, t = extract_toot(toot.content)
"id": toot.id, if t != None:
"uri": toot.uri yield {
} "toot": t,
toots = client.fetch_next(toots) "id": toot.id,
i += 1 "uri": toot.uri
if i%20 == 0: }
print('.', end='', flush=True) toots = client.fetch_next(toots)
i += 1
if i % 20 == 0:
print('.', end='', flush=True)
for f in following: for f in following:
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() last_toot = c.execute(
if last_toot != None: "SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
last_toot = last_toot[0] if last_toot != None:
else: last_toot = last_toot[0]
last_toot = 0 else:
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) last_toot = 0
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
#find the user's activitypub outbox # find the user's activitypub outbox
print("WebFingering...") print("WebFingering...")
instance = re.search(r"^.*@(.+)", f.acct) instance = re.search(r"^.*@(.+)", f.acct)
if instance == None: if instance == None:
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1) instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
else: else:
instance = instance.group(1) instance = instance.group(1)
if instance == "bofa.lol": if instance == "bofa.lol":
print("rest in piece bofa, skipping") print("rest in piece bofa, skipping")
continue continue
# print("{} is on {}".format(f.acct, instance))
try:
r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
uri = re.search(r'template="([^"]+)"', r.text).group(1)
uri = uri.format(uri = "{}@{}".format(f.username, instance))
r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
j = r.json()
if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
uri = j['aliases'][0]
else:
uri = j['aliases'][1]
uri = "{}/outbox?page=true".format(uri)
r = requests.get(uri, timeout=10)
j = r.json()
except Exception:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1)
pleroma = False # print("{} is on {}".format(f.acct, instance))
if 'first' in j and type(j['first']) != str: try:
print("Pleroma instance detected") r = requests.get(
pleroma = True "https://{}/.well-known/host-meta".format(instance), timeout=10)
j = j['first'] uri = re.search(r'template="([^"]+)"', r.text).group(1)
else: uri = uri.format(uri="{}@{}".format(f.username, instance))
print("Mastodon instance detected") r = requests.get(
uri = "{}&min_id={}".format(uri, last_toot) uri, headers={"Accept": "application/json"}, timeout=10)
r = requests.get(uri) j = r.json()
j = r.json() if len(j['aliases']) == 1: # TODO: this is a hack on top of a hack, fix it
uri = j['aliases'][0]
else:
uri = j['aliases'][1]
uri = "{}/outbox?page=true".format(uri)
r = requests.get(uri, timeout=10)
j = r.json()
except Exception:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1)
print("Downloading and parsing toots", end='', flush=True) pleroma = False
done = False if 'first' in j and type(j['first']) != str:
try: print("Pleroma instance detected")
while not done and len(j['orderedItems']) > 0: pleroma = True
for oi in j['orderedItems']: j = j['first']
if oi['type'] != "Create": else:
continue #not a toost. fuck outta here print("Mastodon instance detected")
uri = "{}&min_id={}".format(uri, last_toot)
# its a toost baby r = requests.get(uri)
content = oi['object']['content'] j = r.json()
if oi['object']['summary'] != None:
#don't download CW'd toots print("Downloading and parsing toots", end='', flush=True)
continue done = False
toot = extract_toot(content) try:
# print(toot) while not done and len(j['orderedItems']) > 0:
try: for oi in j['orderedItems']:
if pleroma: if oi['type'] != "Create":
if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: continue # not a toost. fuck outta here
#we've caught up to the notices we've already downloaded, so we can stop now
done = True # its a toost baby
break content = oi['object']['content']
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0) if oi['object']['summary'] != None:
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", # don't download CW'd toots
(pid, continue
f.id, toot = extract_toot(content)
oi['object']['id'], # print(toot)
toot try:
) if pleroma:
) if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
pass # we've caught up to the notices we've already downloaded, so we can stop now
except: done = True
pass #ignore any toots that don't successfully go into the DB break
# sys.exit(0) pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
if not pleroma: c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
r = requests.get(j['prev'], timeout=15) (pid,
else: f.id,
r = requests.get(j['next'], timeout=15) oi['object']['id'],
j = r.json() toot
print('.', end='', flush=True) )
print(" Done!") )
db.commit() pass
except: except:
print("Encountered an error! Saving toots to database and continuing.") pass # ignore any toots that don't successfully go into the DB
db.commit() # sys.exit(0)
# db.close() if not pleroma:
r = requests.get(j['prev'], timeout=15)
else:
r = requests.get(j['next'], timeout=15)
j = r.json()
print('.', end='', flush=True)
print(" Done!")
db.commit()
except:
print("Encountered an error! Saving toots to database and continuing.")
db.commit()
# db.close()
print("Done!") print("Done!")
db.commit() db.commit()
db.execute("VACUUM") #compact db db.execute("VACUUM") # compact db
db.commit() db.commit()
db.close() db.close()

97
util.py Normal file
View File

@ -0,0 +1,97 @@
"""
Various utility tools
"""
import json
import os
from mastodon import Mastodon
from bs4 import BeautifulSoup
def get_config():
access_token = os.getenv("MASTODON_API_TOKEN")
api_base_url = os.getenv("MASTODON_API_BASE_URL")
if (access_token and api_base_url): #  Heroku mode; use preset token
return {
"secret": access_token,
"site": api_base_url,
"is_heroku": True
}
else: #  Local mode; do OAuth login dance
scopes = ["read:statuses", "read:accounts",
"read:follows", "write:statuses", "read:notifications"]
cfg = json.load(open('config.json', 'r'))
if os.path.exists("clientcred.secret"):
print("Upgrading to new storage method")
cc = open("clientcred.secret").read().split("\n")
cfg['client'] = {
"id": cc[0],
"secret": cc[1]
}
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
os.remove("clientcred.secret")
os.remove("usercred.secret")
if "client" not in cfg:
print("No client credentials, registering application")
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
api_base_url=cfg['site'],
scopes=scopes,
website="https://github.com/Lynnesbian/mstdn-ebooks")
cfg['client'] = {
"id": client_id,
"secret": client_secret
}
if "secret" not in cfg:
print("No user credentials, logging in")
client = Mastodon(client_id=cfg['client']['id'],
client_secret=cfg['client']['secret'],
api_base_url=cfg['site'])
print("Open this URL: {}".format(
client.auth_request_url(scopes=scopes)))
cfg['secret'] = client.log_in(
code=input("Secret: "), scopes=scopes)
json.dump(cfg, open("config.json", "w+"))
def extract_toot(toot):
toot = toot.replace("&apos;", "'")
toot = toot.replace("&quot;", '"')
soup = BeautifulSoup(toot, "html.parser")
# this is the code that removes all mentions
# TODO: make it so that it removes the @ and instance but keeps the name
for mention in soup.select("span.h-card"):
mention.a.unwrap()
mention.span.unwrap()
# replace <br> with linebreak
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# replace <p> with linebreak
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# fix hashtags
for ht in soup.select("a.hashtag"):
ht.unwrap()
# fix links
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
toot = soup.get_text()
toot = toot.rstrip("\n") # remove trailing newline
# put a zws between @ and username to avoid mentioning
toot = toot.replace("@", "@\u200B")
return(toot)