From 767911757bcbd8ef2d0f895356fe7c7c82300e63 Mon Sep 17 00:00:00 2001
From: "Andi N. Fiziks" <andi@nuklearfiziks.com>
Date: Wed, 5 Dec 2018 04:15:19 +0000
Subject: [PATCH] First pass at attempting to pull login values from env vars

---
 .gitignore |   1 +
 Procfile   |   3 +
 app.json   |  17 ++-
 create.py  | 106 ++++++++++--------
 gen.py     |  60 +++++-----
 main.py    | 324 ++++++++++++++++++++++-------------------------------
 util.py    |  97 ++++++++++++++++
 7 files changed, 335 insertions(+), 273 deletions(-)
 create mode 100644 Procfile
 create mode 100644 util.py

diff --git a/.gitignore b/.gitignore
index d23632f..4c3ed36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ toots.db
 toots.db-journal
 toots.db-wal
 __pycache__/*
+public
\ No newline at end of file
diff --git a/Procfile b/Procfile
new file mode 100644
index 0000000..dc056c1
--- /dev/null
+++ b/Procfile
@@ -0,0 +1,3 @@
+generate: python gen.py
+train: python main.py
+web: mkdir public && cd public && python -m http.server $PORT
\ No newline at end of file
diff --git a/app.json b/app.json
index 6f11d0e..68441ef 100644
--- a/app.json
+++ b/app.json
@@ -1,8 +1,13 @@
 {
-	"name": "mstdn-ebooks",
-	"description": "An ebooks bot for Mastodon (and compatible) users",
-	"repository": "https://github.com/Lynnesbian/mstdn-ebooks",
-	"keywords": ["python", "mastodon"],
-	"website":"https://fedi.lynnesbian.space/@lynnesbian",
-	"image":"heroku/heroku"
+  "name": "mstdn-ebooks",
+  "description": "An ebooks bot for Mastodon (and compatible) users",
+  "repository": "https://github.com/Lynnesbian/mstdn-ebooks",
+  "keywords": ["python", "mastodon"],
+  "website": "https://fedi.lynnesbian.space/@lynnesbian",
+  "image": "heroku/heroku",
+  "env": {
+    "MASTODON_API_TOKEN": "",
+    "MASTODON_BASE_URL": ""
+  },
+  "addons": ["scheduler"]
 }
diff --git a/create.py b/create.py
index caa62a3..050c16a 100755
--- a/create.py
+++ b/create.py
@@ -4,59 +4,67 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import markovify
-import json
-import re, random, multiprocessing, time, sqlite3, shutil, os
+import re
+import random
+import multiprocessing
+import time
+import sqlite3
+import shutil
+import os
+
 
 def make_sentence(output):
-	class nlt_fixed(markovify.NewlineText):
-		def test_sentence_input(self, sentence):
-			return True #all sentences are valid <3
+    class nlt_fixed(markovify.NewlineText):
+        def test_sentence_input(self, sentence):
+            return True  # all sentences are valid <3
 
-	# with open("corpus.txt", encoding="utf-8") as fp:
-	#   model = nlt_fixed(fp.read())
+    # with open("corpus.txt", encoding="utf-8") as fp:
+    #   model = nlt_fixed(fp.read())
 
-	shutil.copyfile("toots.db", "toots-copy.db")
-	db = sqlite3.connect("toots-copy.db")
-	db.text_factory=str
-	c = db.cursor()
-	toots = c.execute("SELECT content FROM `toots`").fetchall()
-	toots_str = ""
-	for toot in toots:
-		toots_str += "\n{}".format(toot[0])
-	model = nlt_fixed(toots_str)
-	toots_str = None
-	db.close()
-	os.remove("toots-copy.db")
+    shutil.copyfile("toots.db", "toots-copy.db")
+    db = sqlite3.connect("toots-copy.db")
+    db.text_factory = str
+    c = db.cursor()
+    toots = c.execute("SELECT content FROM `toots`").fetchall()
+    toots_str = ""
+    for toot in toots:
+        toots_str += "\n{}".format(toot[0])
+    model = nlt_fixed(toots_str)
+    toots_str = None
+    db.close()
+    os.remove("toots-copy.db")
 
-	sentence = None
-	tries = 0
-	while sentence is None and tries < 10:
-		sentence = model.make_short_sentence(500, tries=10000)
-		tries = tries + 1
-	sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
-	output.send(sentence)
+    sentence = None
+    tries = 0
+    while sentence is None and tries < 10:
+        sentence = model.make_short_sentence(500, tries=10000)
+        tries = tries + 1
+    sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
+    output.send(sentence)
 
-def make_toot(force_markov = False, args = None):
-	return make_toot_markov()
 
-def make_toot_markov(query = None):
-	tries = 0
-	toot = None
-	while toot == None and tries < 25:
-		pin, pout = multiprocessing.Pipe(False)
-		p = multiprocessing.Process(target = make_sentence, args = [pout])
-		p.start()
-		p.join(10)
-		if p.is_alive():
-			p.terminate()
-			p.join()
-			toot = None
-			tries = tries + 1
-		else:
-			toot = pin.recv()
-	if toot == None:
-		toot = "Toot generation failed! Contact Lynne for assistance."
-	return {
-			"toot":toot,
-			"media":None
-		}
+def make_toot(force_markov=False, args=None):
+    return make_toot_markov()
+
+
+def make_toot_markov(query=None):
+    tries = 0
+    toot = None
+    while toot == None and tries < 25:
+        pin, pout = multiprocessing.Pipe(False)
+        p = multiprocessing.Process(target=make_sentence, args=[pout])
+        p.start()
+        p.join(10)
+        if p.is_alive():
+            p.terminate()
+            p.join()
+            toot = None
+            tries = tries + 1
+        else:
+            toot = pin.recv()
+    if toot == None:
+        toot = "Toot generation failed! Contact Lynne for assistance."
+    return {
+        "toot": toot,
+        "media": None
+    }
diff --git a/gen.py b/gen.py
index a07dcce..8c7e72c 100755
--- a/gen.py
+++ b/gen.py
@@ -4,41 +4,47 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from mastodon import Mastodon
-import argparse, sys, traceback, json
+import argparse
+import sys
+import traceback
+import json
 import create
+from util import get_config
 
 parser = argparse.ArgumentParser(description='Generate and post a toot.')
-parser.add_argument('reply', metavar='reply', type=str, nargs='?', 
-	help='ID of the status to reply to')
+parser.add_argument('reply', metavar='reply', type=str, nargs='?',
+                    help='ID of the status to reply to')
 parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
-	help="Print the toot to stdout without posting it")
+                    help="Print the toot to stdout without posting it")
 
 args = parser.parse_args()
 
-cfg = json.load(open('config.json'))
+cfg = get_config()
 
 client = Mastodon(
-  client_id=cfg['client']['id'],
-  client_secret=cfg['client']['secret'], 
-  access_token=cfg['secret'], 
-  api_base_url=cfg['site'])
+    access_token=cfg['secret'],
+    api_base_url=cfg['site'])
 
 if __name__ == '__main__':
-	toot = create.make_toot()
-	if not args.simulate:
-		try:
-			if toot['media'] != None:
-				mediaID = client.media_post(toot['media'], description = toot['toot'])
-				client.status_post(toot['toot'].replace("\n", " "),
-					media_ids = [mediaID], visibility = "unlisted", spoiler_text = cfg['cw'])
-			else:
-				client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
-		except Exception as err:
-			toot = {
-			"toot":
-			"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \
-			+ " wrong! While attempting to post a toot, I received the following" \
-			+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
-			}
-			client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
-	print(toot['toot'])
+    toot = create.make_toot()
+    if not args.simulate:
+        try:
+            if toot['media'] != None:
+                mediaID = client.media_post(
+                    toot['media'], description=toot['toot'])
+                client.status_post(toot['toot'].replace("\n", " "),
+                                   media_ids=[mediaID], visibility="unlisted", spoiler_text=cfg['cw'])
+            else:
+                client.status_post(
+                    toot['toot'], visibility='unlisted', spoiler_text=cfg['cw'])
+        except Exception as err:
+            toot = {
+                "toot":
+                "Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly"
+                + " wrong! While attempting to post a toot, I received the following"
+                + " error:\n" +
+                    "\n".join(traceback.format_tb(sys.exc_info()[2]))
+            }
+            client.status_post(
+                toot['toot'], visibility='unlisted', spoiler_text="Error!")
+    print(toot['toot'])
diff --git a/main.py b/main.py
index b167a8e..00b14af 100755
--- a/main.py
+++ b/main.py
@@ -6,223 +6,165 @@
 
 from mastodon import Mastodon
 from os import path
-from bs4 import BeautifulSoup
-import os, sqlite3, signal, sys, json, re
+import os
+import sqlite3
+import signal
+import sys
+import json
+import re
 import requests
+from util import get_config, extract_toot
 
-scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
-cfg = json.load(open('config.json', 'r'))
-
-if os.path.exists("clientcred.secret"):
-		print("Upgrading to new storage method")
-		cc = open("clientcred.secret").read().split("\n")
-		cfg['client'] = {
-				"id": cc[0],
-				"secret": cc[1]
-		}
-		cfg['secret'] = open("usercred.secret").read().rstrip("\n")
-		os.remove("clientcred.secret")
-		os.remove("usercred.secret")
-		
-
-if "client" not in cfg:
-	print("No client credentials, registering application")
-	client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
-		api_base_url=cfg['site'],
-		scopes=scopes,
-		website="https://github.com/Lynnesbian/mstdn-ebooks")
-
-	cfg['client'] = {
-		"id": client_id,
-		"secret": client_secret
-	}
-
-if "secret" not in cfg:
-	print("No user credentials, logging in")
-	client = Mastodon(client_id = cfg['client']['id'],
-		client_secret = cfg['client']['secret'],
-		api_base_url=cfg['site'])
-
-	print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
-	cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
-
-json.dump(cfg, open("config.json", "w+"))
-
-def extract_toot(toot):
-	toot = toot.replace("&apos;", "'")
-	toot = toot.replace("&quot;", '"')
-	soup = BeautifulSoup(toot, "html.parser")
-	
-	# this is the code that removes all mentions
-	# TODO: make it so that it removes the @ and instance but keeps the name
-	for mention in soup.select("span.h-card"):
-		mention.a.unwrap()
-		mention.span.unwrap()
-	
-	# replace <br> with linebreak
-	for lb in soup.select("br"):
-		lb.insert_after("\n")
-		lb.decompose()
-
-	# replace <p> with linebreak
-	for p in soup.select("p"):
-		p.insert_after("\n")
-		p.unwrap()
-	
-	# fix hashtags
-	for ht in soup.select("a.hashtag"):
-		ht.unwrap()
-
-	# fix links
-	for link in soup.select("a"):
-		link.insert_after(link["href"])
-		link.decompose()
-
-	toot = soup.get_text()
-	toot = toot.rstrip("\n") #remove trailing newline
-	toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
-	return(toot)
+cfg = get_config()
 
 client = Mastodon(
-	client_id=cfg['client']['id'],
-	client_secret = cfg['client']['secret'], 
-	access_token=cfg['secret'], 
-	api_base_url=cfg['site'])
+    access_token=cfg['secret'],
+    api_base_url=cfg['site'])
 
 me = client.account_verify_credentials()
 following = client.account_following(me.id)
 
 db = sqlite3.connect("toots.db")
-db.text_factory=str
+db.text_factory = str
 c = db.cursor()
 c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
 db.commit()
 
+
 def handleCtrlC(signal, frame):
-	print("\nPREMATURE EVACUATION - Saving chunks")
-	db.commit()
-	sys.exit(1)
+    print("\nPREMATURE EVACUATION - Saving chunks")
+    db.commit()
+    sys.exit(1)
+
 
 signal.signal(signal.SIGINT, handleCtrlC)
 
+
 def get_toots_legacy(client, id):
-	i = 0
-	toots = client.account_statuses(id)
-	while toots is not None and len(toots) > 0:
-		for toot in toots:
-			if toot.spoiler_text != "": continue
-			if toot.reblog is not None: continue
-			if toot.visibility not in ["public", "unlisted"]: continue
-			t = extract_toot(toot.content)
-			if t != None:
-				yield {
-					"toot": t,
-					"id": toot.id,
-					"uri": toot.uri
-				}
-			toots = client.fetch_next(toots)
-			i += 1
-			if i%20 == 0:
-				print('.', end='', flush=True)
+    i = 0
+    toots = client.account_statuses(id)
+    while toots is not None and len(toots) > 0:
+        for toot in toots:
+            if toot.spoiler_text != "":
+                continue
+            if toot.reblog is not None:
+                continue
+            if toot.visibility not in ["public", "unlisted"]:
+                continue
+            t = extract_toot(toot.content)
+            if t != None:
+                yield {
+                    "toot": t,
+                    "id": toot.id,
+                    "uri": toot.uri
+                }
+            toots = client.fetch_next(toots)
+            i += 1
+            if i % 20 == 0:
+                print('.', end='', flush=True)
+
 
 for f in following:
-	last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
-	if last_toot != None:
-		last_toot = last_toot[0]
-	else:
-		last_toot = 0
-	print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
+    last_toot = c.execute(
+        "SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
+    if last_toot != None:
+        last_toot = last_toot[0]
+    else:
+        last_toot = 0
+    print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
 
-	#find the user's activitypub outbox
-	print("WebFingering...")
-	instance = re.search(r"^.*@(.+)", f.acct)
-	if instance == None:
-		instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
-	else:
-		instance = instance.group(1)
+    # find the user's activitypub outbox
+    print("WebFingering...")
+    instance = re.search(r"^.*@(.+)", f.acct)
+    if instance == None:
+        instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
+    else:
+        instance = instance.group(1)
 
-	if instance == "bofa.lol":
-		print("rest in piece bofa, skipping")
-		continue
-				
-	# print("{} is on {}".format(f.acct, instance))
-	try:
-		r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
-		uri = re.search(r'template="([^"]+)"', r.text).group(1)
-		uri = uri.format(uri = "{}@{}".format(f.username, instance))
-		r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
-		j = r.json()
-		if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
-			uri = j['aliases'][0]
-		else:
-			uri = j['aliases'][1]
-		uri = "{}/outbox?page=true".format(uri)
-		r = requests.get(uri, timeout=10)
-		j = r.json()
-	except Exception:
-		print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
-		sys.exit(1)
+    if instance == "bofa.lol":
+        print("rest in piece bofa, skipping")
+        continue
 
-	pleroma = False
-	if 'first' in j and type(j['first']) != str:
-		print("Pleroma instance detected")
-		pleroma = True
-		j = j['first']
-	else:
-		print("Mastodon instance detected")
-		uri = "{}&min_id={}".format(uri, last_toot)
-		r = requests.get(uri)
-		j = r.json()
+    # print("{} is on {}".format(f.acct, instance))
+    try:
+        r = requests.get(
+            "https://{}/.well-known/host-meta".format(instance), timeout=10)
+        uri = re.search(r'template="([^"]+)"', r.text).group(1)
+        uri = uri.format(uri="{}@{}".format(f.username, instance))
+        r = requests.get(
+            uri, headers={"Accept": "application/json"}, timeout=10)
+        j = r.json()
+        if len(j['aliases']) == 1:  # TODO: this is a hack on top of a hack, fix it
+            uri = j['aliases'][0]
+        else:
+            uri = j['aliases'][1]
+        uri = "{}/outbox?page=true".format(uri)
+        r = requests.get(uri, timeout=10)
+        j = r.json()
+    except Exception:
+        print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
+        sys.exit(1)
 
-	print("Downloading and parsing toots", end='', flush=True)
-	done = False
-	try:
-		while not done and len(j['orderedItems']) > 0:
-			for oi in j['orderedItems']:
-				if oi['type'] != "Create":
-					continue #not a toost. fuck outta here
-				
-				# its a toost baby
-				content = oi['object']['content']
-				if oi['object']['summary'] != None:
-					#don't download CW'd toots
-					continue
-				toot = extract_toot(content)
-				# print(toot)
-				try:
-					if pleroma:
-						if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
-							#we've caught up to the notices we've already downloaded, so we can stop now
-							done = True
-							break
-					pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
-					c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
-						(pid,
-						f.id,
-						oi['object']['id'],
-						toot
-						)
-					)
-					pass
-				except:
-					pass #ignore any toots that don't successfully go into the DB
-			# sys.exit(0)
-			if not pleroma:
-				r = requests.get(j['prev'], timeout=15)
-			else:
-				r = requests.get(j['next'], timeout=15)
-			j = r.json()
-			print('.', end='', flush=True)
-		print(" Done!")
-		db.commit()
-	except:
-		print("Encountered an error! Saving toots to database and continuing.")
-		db.commit()
-		# db.close()
+    pleroma = False
+    if 'first' in j and type(j['first']) != str:
+        print("Pleroma instance detected")
+        pleroma = True
+        j = j['first']
+    else:
+        print("Mastodon instance detected")
+        uri = "{}&min_id={}".format(uri, last_toot)
+        r = requests.get(uri)
+        j = r.json()
+
+    print("Downloading and parsing toots", end='', flush=True)
+    done = False
+    try:
+        while not done and len(j['orderedItems']) > 0:
+            for oi in j['orderedItems']:
+                if oi['type'] != "Create":
+                    continue  # not a toost. fuck outta here
+
+                # its a toost baby
+                content = oi['object']['content']
+                if oi['object']['summary'] != None:
+                    # don't download CW'd toots
+                    continue
+                toot = extract_toot(content)
+                # print(toot)
+                try:
+                    if pleroma:
+                        if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
+                            # we've caught up to the notices we've already downloaded, so we can stop now
+                            done = True
+                            break
+                    pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
+                    c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
+                              (pid,
+                               f.id,
+                               oi['object']['id'],
+                               toot
+                               )
+                              )
+                    pass
+                except:
+                    pass  # ignore any toots that don't successfully go into the DB
+            # sys.exit(0)
+            if not pleroma:
+                r = requests.get(j['prev'], timeout=15)
+            else:
+                r = requests.get(j['next'], timeout=15)
+            j = r.json()
+            print('.', end='', flush=True)
+        print(" Done!")
+        db.commit()
+    except:
+        print("Encountered an error! Saving toots to database and continuing.")
+        db.commit()
+        # db.close()
 
 print("Done!")
 
 db.commit()
-db.execute("VACUUM") #compact db
+db.execute("VACUUM")  # compact db
 db.commit()
-db.close()
\ No newline at end of file
+db.close()
diff --git a/util.py b/util.py
new file mode 100644
index 0000000..8ebcbfc
--- /dev/null
+++ b/util.py
@@ -0,0 +1,97 @@
+"""
+Various utility tools
+"""
+
+import json
+import os
+from mastodon import Mastodon
+from bs4 import BeautifulSoup
+
+
+def get_config():
+    access_token = os.getenv("MASTODON_API_TOKEN")
+    api_base_url = os.getenv("MASTODON_API_BASE_URL")
+
+    if (access_token and api_base_url):  #  Heroku mode; use preset token
+        return {
+            "secret": access_token,
+            "site": api_base_url,
+            "is_heroku": True
+        }
+    else:  #  Local mode; do OAuth login dance
+        scopes = ["read:statuses", "read:accounts",
+                  "read:follows", "write:statuses", "read:notifications"]
+        cfg = json.load(open('config.json', 'r'))
+
+        if os.path.exists("clientcred.secret"):
+            print("Upgrading to new storage method")
+            cc = open("clientcred.secret").read().split("\n")
+            cfg['client'] = {
+                "id": cc[0],
+                "secret": cc[1]
+            }
+            cfg['secret'] = open("usercred.secret").read().rstrip("\n")
+            os.remove("clientcred.secret")
+            os.remove("usercred.secret")
+
+        if "client" not in cfg:
+            print("No client credentials, registering application")
+            client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
+                                                           api_base_url=cfg['site'],
+                                                           scopes=scopes,
+                                                           website="https://github.com/Lynnesbian/mstdn-ebooks")
+
+            cfg['client'] = {
+                "id": client_id,
+                "secret": client_secret
+            }
+
+        if "secret" not in cfg:
+            print("No user credentials, logging in")
+            client = Mastodon(client_id=cfg['client']['id'],
+                              client_secret=cfg['client']['secret'],
+                              api_base_url=cfg['site'])
+
+            print("Open this URL: {}".format(
+                client.auth_request_url(scopes=scopes)))
+            cfg['secret'] = client.log_in(
+                code=input("Secret: "), scopes=scopes)
+
+        json.dump(cfg, open("config.json", "w+"))
+
+
+def extract_toot(toot):
+    toot = toot.replace("&apos;", "'")
+    toot = toot.replace("&quot;", '"')
+    soup = BeautifulSoup(toot, "html.parser")
+
+    # this is the code that removes all mentions
+    # TODO: make it so that it removes the @ and instance but keeps the name
+    for mention in soup.select("span.h-card"):
+        mention.a.unwrap()
+        mention.span.unwrap()
+
+    # replace <br> with linebreak
+    for lb in soup.select("br"):
+        lb.insert_after("\n")
+        lb.decompose()
+
+    # replace <p> with linebreak
+    for p in soup.select("p"):
+        p.insert_after("\n")
+        p.unwrap()
+
+    # fix hashtags
+    for ht in soup.select("a.hashtag"):
+        ht.unwrap()
+
+    # fix links
+    for link in soup.select("a"):
+        link.insert_after(link["href"])
+        link.decompose()
+
+    toot = soup.get_text()
+    toot = toot.rstrip("\n")  # remove trailing newline
+    # put a zws between @ and username to avoid mentioning
+    toot = toot.replace("@", "@\u200B")
+    return(toot)