First pass at attempting to pull login values from env vars

This commit is contained in:
Andi N. Fiziks 2018-12-05 04:15:19 +00:00
parent 5d7fe7a80c
commit 767911757b
7 changed files with 335 additions and 273 deletions

1
.gitignore vendored
View File

@ -7,3 +7,4 @@ toots.db
toots.db-journal
toots.db-wal
__pycache__/*
public

3
Procfile Normal file
View File

@ -0,0 +1,3 @@
generate: python gen.py
train: python main.py
web: mkdir public && cd public && python -m http.server $PORT

View File

@ -1,8 +1,13 @@
{
"name": "mstdn-ebooks",
"description": "An ebooks bot for Mastodon (and compatible) users",
"repository": "https://github.com/Lynnesbian/mstdn-ebooks",
"keywords": ["python", "mastodon"],
"website":"https://fedi.lynnesbian.space/@lynnesbian",
"image":"heroku/heroku"
"name": "mstdn-ebooks",
"description": "An ebooks bot for Mastodon (and compatible) users",
"repository": "https://github.com/Lynnesbian/mstdn-ebooks",
"keywords": ["python", "mastodon"],
"website": "https://fedi.lynnesbian.space/@lynnesbian",
"image": "heroku/heroku",
"env": {
"MASTODON_API_TOKEN": "",
"MASTODON_BASE_URL": ""
},
"addons": ["scheduler"]
}

106
create.py
View File

@ -4,59 +4,67 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import markovify
import json
import re, random, multiprocessing, time, sqlite3, shutil, os
import re
import random
import multiprocessing
import time
import sqlite3
import shutil
import os
def make_sentence(output):
class nlt_fixed(markovify.NewlineText):
def test_sentence_input(self, sentence):
return True #all sentences are valid <3
class nlt_fixed(markovify.NewlineText):
def test_sentence_input(self, sentence):
return True # all sentences are valid <3
# with open("corpus.txt", encoding="utf-8") as fp:
# model = nlt_fixed(fp.read())
# with open("corpus.txt", encoding="utf-8") as fp:
# model = nlt_fixed(fp.read())
shutil.copyfile("toots.db", "toots-copy.db")
db = sqlite3.connect("toots-copy.db")
db.text_factory=str
c = db.cursor()
toots = c.execute("SELECT content FROM `toots`").fetchall()
toots_str = ""
for toot in toots:
toots_str += "\n{}".format(toot[0])
model = nlt_fixed(toots_str)
toots_str = None
db.close()
os.remove("toots-copy.db")
shutil.copyfile("toots.db", "toots-copy.db")
db = sqlite3.connect("toots-copy.db")
db.text_factory = str
c = db.cursor()
toots = c.execute("SELECT content FROM `toots`").fetchall()
toots_str = ""
for toot in toots:
toots_str += "\n{}".format(toot[0])
model = nlt_fixed(toots_str)
toots_str = None
db.close()
os.remove("toots-copy.db")
sentence = None
tries = 0
while sentence is None and tries < 10:
sentence = model.make_short_sentence(500, tries=10000)
tries = tries + 1
sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
output.send(sentence)
sentence = None
tries = 0
while sentence is None and tries < 10:
sentence = model.make_short_sentence(500, tries=10000)
tries = tries + 1
sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
output.send(sentence)
def make_toot(force_markov = False, args = None):
return make_toot_markov()
def make_toot_markov(query = None):
tries = 0
toot = None
while toot == None and tries < 25:
pin, pout = multiprocessing.Pipe(False)
p = multiprocessing.Process(target = make_sentence, args = [pout])
p.start()
p.join(10)
if p.is_alive():
p.terminate()
p.join()
toot = None
tries = tries + 1
else:
toot = pin.recv()
if toot == None:
toot = "Toot generation failed! Contact Lynne for assistance."
return {
"toot":toot,
"media":None
}
def make_toot(force_markov=False, args=None):
return make_toot_markov()
def make_toot_markov(query=None):
tries = 0
toot = None
while toot == None and tries < 25:
pin, pout = multiprocessing.Pipe(False)
p = multiprocessing.Process(target=make_sentence, args=[pout])
p.start()
p.join(10)
if p.is_alive():
p.terminate()
p.join()
toot = None
tries = tries + 1
else:
toot = pin.recv()
if toot == None:
toot = "Toot generation failed! Contact Lynne for assistance."
return {
"toot": toot,
"media": None
}

60
gen.py
View File

@ -4,41 +4,47 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from mastodon import Mastodon
import argparse, sys, traceback, json
import argparse
import sys
import traceback
import json
import create
from util import get_config
parser = argparse.ArgumentParser(description='Generate and post a toot.')
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
help='ID of the status to reply to')
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
help='ID of the status to reply to')
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
help="Print the toot to stdout without posting it")
help="Print the toot to stdout without posting it")
args = parser.parse_args()
cfg = json.load(open('config.json'))
cfg = get_config()
client = Mastodon(
client_id=cfg['client']['id'],
client_secret=cfg['client']['secret'],
access_token=cfg['secret'],
api_base_url=cfg['site'])
access_token=cfg['secret'],
api_base_url=cfg['site'])
if __name__ == '__main__':
toot = create.make_toot()
if not args.simulate:
try:
if toot['media'] != None:
mediaID = client.media_post(toot['media'], description = toot['toot'])
client.status_post(toot['toot'].replace("\n", " "),
media_ids = [mediaID], visibility = "unlisted", spoiler_text = cfg['cw'])
else:
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
except Exception as err:
toot = {
"toot":
"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \
+ " wrong! While attempting to post a toot, I received the following" \
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
}
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
print(toot['toot'])
toot = create.make_toot()
if not args.simulate:
try:
if toot['media'] != None:
mediaID = client.media_post(
toot['media'], description=toot['toot'])
client.status_post(toot['toot'].replace("\n", " "),
media_ids=[mediaID], visibility="unlisted", spoiler_text=cfg['cw'])
else:
client.status_post(
toot['toot'], visibility='unlisted', spoiler_text=cfg['cw'])
except Exception as err:
toot = {
"toot":
"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly"
+ " wrong! While attempting to post a toot, I received the following"
+ " error:\n" +
"\n".join(traceback.format_tb(sys.exc_info()[2]))
}
client.status_post(
toot['toot'], visibility='unlisted', spoiler_text="Error!")
print(toot['toot'])

324
main.py
View File

@ -6,223 +6,165 @@
from mastodon import Mastodon
from os import path
from bs4 import BeautifulSoup
import os, sqlite3, signal, sys, json, re
import os
import sqlite3
import signal
import sys
import json
import re
import requests
from util import get_config, extract_toot
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
cfg = json.load(open('config.json', 'r'))
if os.path.exists("clientcred.secret"):
print("Upgrading to new storage method")
cc = open("clientcred.secret").read().split("\n")
cfg['client'] = {
"id": cc[0],
"secret": cc[1]
}
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
os.remove("clientcred.secret")
os.remove("usercred.secret")
if "client" not in cfg:
print("No client credentials, registering application")
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
api_base_url=cfg['site'],
scopes=scopes,
website="https://github.com/Lynnesbian/mstdn-ebooks")
cfg['client'] = {
"id": client_id,
"secret": client_secret
}
if "secret" not in cfg:
print("No user credentials, logging in")
client = Mastodon(client_id = cfg['client']['id'],
client_secret = cfg['client']['secret'],
api_base_url=cfg['site'])
print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
json.dump(cfg, open("config.json", "w+"))
def extract_toot(toot):
toot = toot.replace("&apos;", "'")
toot = toot.replace("&quot;", '"')
soup = BeautifulSoup(toot, "html.parser")
# this is the code that removes all mentions
# TODO: make it so that it removes the @ and instance but keeps the name
for mention in soup.select("span.h-card"):
mention.a.unwrap()
mention.span.unwrap()
# replace <br> with linebreak
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# replace <p> with linebreak
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# fix hashtags
for ht in soup.select("a.hashtag"):
ht.unwrap()
# fix links
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
toot = soup.get_text()
toot = toot.rstrip("\n") #remove trailing newline
toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
return(toot)
cfg = get_config()
client = Mastodon(
client_id=cfg['client']['id'],
client_secret = cfg['client']['secret'],
access_token=cfg['secret'],
api_base_url=cfg['site'])
access_token=cfg['secret'],
api_base_url=cfg['site'])
me = client.account_verify_credentials()
following = client.account_following(me.id)
db = sqlite3.connect("toots.db")
db.text_factory=str
db.text_factory = str
c = db.cursor()
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
db.commit()
def handleCtrlC(signal, frame):
print("\nPREMATURE EVACUATION - Saving chunks")
db.commit()
sys.exit(1)
print("\nPREMATURE EVACUATION - Saving chunks")
db.commit()
sys.exit(1)
signal.signal(signal.SIGINT, handleCtrlC)
def get_toots_legacy(client, id):
i = 0
toots = client.account_statuses(id)
while toots is not None and len(toots) > 0:
for toot in toots:
if toot.spoiler_text != "": continue
if toot.reblog is not None: continue
if toot.visibility not in ["public", "unlisted"]: continue
t = extract_toot(toot.content)
if t != None:
yield {
"toot": t,
"id": toot.id,
"uri": toot.uri
}
toots = client.fetch_next(toots)
i += 1
if i%20 == 0:
print('.', end='', flush=True)
i = 0
toots = client.account_statuses(id)
while toots is not None and len(toots) > 0:
for toot in toots:
if toot.spoiler_text != "":
continue
if toot.reblog is not None:
continue
if toot.visibility not in ["public", "unlisted"]:
continue
t = extract_toot(toot.content)
if t != None:
yield {
"toot": t,
"id": toot.id,
"uri": toot.uri
}
toots = client.fetch_next(toots)
i += 1
if i % 20 == 0:
print('.', end='', flush=True)
for f in following:
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
if last_toot != None:
last_toot = last_toot[0]
else:
last_toot = 0
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
last_toot = c.execute(
"SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
if last_toot != None:
last_toot = last_toot[0]
else:
last_toot = 0
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
#find the user's activitypub outbox
print("WebFingering...")
instance = re.search(r"^.*@(.+)", f.acct)
if instance == None:
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
else:
instance = instance.group(1)
# find the user's activitypub outbox
print("WebFingering...")
instance = re.search(r"^.*@(.+)", f.acct)
if instance == None:
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
else:
instance = instance.group(1)
if instance == "bofa.lol":
print("rest in piece bofa, skipping")
continue
# print("{} is on {}".format(f.acct, instance))
try:
r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
uri = re.search(r'template="([^"]+)"', r.text).group(1)
uri = uri.format(uri = "{}@{}".format(f.username, instance))
r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
j = r.json()
if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
uri = j['aliases'][0]
else:
uri = j['aliases'][1]
uri = "{}/outbox?page=true".format(uri)
r = requests.get(uri, timeout=10)
j = r.json()
except Exception:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1)
if instance == "bofa.lol":
print("rest in piece bofa, skipping")
continue
pleroma = False
if 'first' in j and type(j['first']) != str:
print("Pleroma instance detected")
pleroma = True
j = j['first']
else:
print("Mastodon instance detected")
uri = "{}&min_id={}".format(uri, last_toot)
r = requests.get(uri)
j = r.json()
# print("{} is on {}".format(f.acct, instance))
try:
r = requests.get(
"https://{}/.well-known/host-meta".format(instance), timeout=10)
uri = re.search(r'template="([^"]+)"', r.text).group(1)
uri = uri.format(uri="{}@{}".format(f.username, instance))
r = requests.get(
uri, headers={"Accept": "application/json"}, timeout=10)
j = r.json()
if len(j['aliases']) == 1: # TODO: this is a hack on top of a hack, fix it
uri = j['aliases'][0]
else:
uri = j['aliases'][1]
uri = "{}/outbox?page=true".format(uri)
r = requests.get(uri, timeout=10)
j = r.json()
except Exception:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1)
print("Downloading and parsing toots", end='', flush=True)
done = False
try:
while not done and len(j['orderedItems']) > 0:
for oi in j['orderedItems']:
if oi['type'] != "Create":
continue #not a toost. fuck outta here
# its a toost baby
content = oi['object']['content']
if oi['object']['summary'] != None:
#don't download CW'd toots
continue
toot = extract_toot(content)
# print(toot)
try:
if pleroma:
if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
#we've caught up to the notices we've already downloaded, so we can stop now
done = True
break
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
(pid,
f.id,
oi['object']['id'],
toot
)
)
pass
except:
pass #ignore any toots that don't successfully go into the DB
# sys.exit(0)
if not pleroma:
r = requests.get(j['prev'], timeout=15)
else:
r = requests.get(j['next'], timeout=15)
j = r.json()
print('.', end='', flush=True)
print(" Done!")
db.commit()
except:
print("Encountered an error! Saving toots to database and continuing.")
db.commit()
# db.close()
pleroma = False
if 'first' in j and type(j['first']) != str:
print("Pleroma instance detected")
pleroma = True
j = j['first']
else:
print("Mastodon instance detected")
uri = "{}&min_id={}".format(uri, last_toot)
r = requests.get(uri)
j = r.json()
print("Downloading and parsing toots", end='', flush=True)
done = False
try:
while not done and len(j['orderedItems']) > 0:
for oi in j['orderedItems']:
if oi['type'] != "Create":
continue # not a toost. fuck outta here
# its a toost baby
content = oi['object']['content']
if oi['object']['summary'] != None:
# don't download CW'd toots
continue
toot = extract_toot(content)
# print(toot)
try:
if pleroma:
if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
# we've caught up to the notices we've already downloaded, so we can stop now
done = True
break
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
(pid,
f.id,
oi['object']['id'],
toot
)
)
pass
except:
pass # ignore any toots that don't successfully go into the DB
# sys.exit(0)
if not pleroma:
r = requests.get(j['prev'], timeout=15)
else:
r = requests.get(j['next'], timeout=15)
j = r.json()
print('.', end='', flush=True)
print(" Done!")
db.commit()
except:
print("Encountered an error! Saving toots to database and continuing.")
db.commit()
# db.close()
print("Done!")
db.commit()
db.execute("VACUUM") #compact db
db.execute("VACUUM") # compact db
db.commit()
db.close()
db.close()

97
util.py Normal file
View File

@ -0,0 +1,97 @@
"""
Various utility tools
"""
import json
import os
from mastodon import Mastodon
from bs4 import BeautifulSoup
def get_config():
access_token = os.getenv("MASTODON_API_TOKEN")
api_base_url = os.getenv("MASTODON_API_BASE_URL")
if (access_token and api_base_url): #  Heroku mode; use preset token
return {
"secret": access_token,
"site": api_base_url,
"is_heroku": True
}
else: #  Local mode; do OAuth login dance
scopes = ["read:statuses", "read:accounts",
"read:follows", "write:statuses", "read:notifications"]
cfg = json.load(open('config.json', 'r'))
if os.path.exists("clientcred.secret"):
print("Upgrading to new storage method")
cc = open("clientcred.secret").read().split("\n")
cfg['client'] = {
"id": cc[0],
"secret": cc[1]
}
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
os.remove("clientcred.secret")
os.remove("usercred.secret")
if "client" not in cfg:
print("No client credentials, registering application")
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
api_base_url=cfg['site'],
scopes=scopes,
website="https://github.com/Lynnesbian/mstdn-ebooks")
cfg['client'] = {
"id": client_id,
"secret": client_secret
}
if "secret" not in cfg:
print("No user credentials, logging in")
client = Mastodon(client_id=cfg['client']['id'],
client_secret=cfg['client']['secret'],
api_base_url=cfg['site'])
print("Open this URL: {}".format(
client.auth_request_url(scopes=scopes)))
cfg['secret'] = client.log_in(
code=input("Secret: "), scopes=scopes)
json.dump(cfg, open("config.json", "w+"))
def extract_toot(toot):
toot = toot.replace("&apos;", "'")
toot = toot.replace("&quot;", '"')
soup = BeautifulSoup(toot, "html.parser")
# this is the code that removes all mentions
# TODO: make it so that it removes the @ and instance but keeps the name
for mention in soup.select("span.h-card"):
mention.a.unwrap()
mention.span.unwrap()
# replace <br> with linebreak
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# replace <p> with linebreak
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# fix hashtags
for ht in soup.select("a.hashtag"):
ht.unwrap()
# fix links
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
toot = soup.get_text()
toot = toot.rstrip("\n") # remove trailing newline
# put a zws between @ and username to avoid mentioning
toot = toot.replace("@", "@\u200B")
return(toot)