First pass at attempting to pull login values from env vars

2018-12-05 04:15:19 +00:00 · 2018-12-05 04:15:19 +00:00 · 767911757b
parent 5d7fe7a80c
commit 767911757b
7 changed files with 335 additions and 273 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,3 +7,4 @@ toots.db
 toots.db-journal
 toots.db-wal
 __pycache__/*
 public
--- a/3
+++ b/3
@ -0,0 +1,3 @@
 generate: python gen.py
 train: python main.py
 web: mkdir public && cd public && python -m http.server $PORT
--- a/app.json
+++ b/app.json
@ -1,8 +1,13 @@
 {
-	"name": "mstdn-ebooks",
+  "name": "mstdn-ebooks",
-	"description": "An ebooks bot for Mastodon (and compatible) users",
+  "description": "An ebooks bot for Mastodon (and compatible) users",
-	"repository": "https://github.com/Lynnesbian/mstdn-ebooks",
+  "repository": "https://github.com/Lynnesbian/mstdn-ebooks",
-	"keywords": ["python", "mastodon"],
+  "keywords": ["python", "mastodon"],
-	"website":"https://fedi.lynnesbian.space/@lynnesbian",
+  "website": "https://fedi.lynnesbian.space/@lynnesbian",
-	"image":"heroku/heroku"
+  "image": "heroku/heroku",
  "env": {
    "MASTODON_API_TOKEN": "",
    "MASTODON_BASE_URL": ""
  },
  "addons": ["scheduler"]
 }
--- a/create.py
+++ b/create.py
@ -4,59 +4,67 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 import markovify
-import json
+import re
-import re, random, multiprocessing, time, sqlite3, shutil, os
+import random
 import multiprocessing
 import time
 import sqlite3
 import shutil
 import os
 def make_sentence(output):
-	class nlt_fixed(markovify.NewlineText):
+    class nlt_fixed(markovify.NewlineText):
-		def test_sentence_input(self, sentence):
+        def test_sentence_input(self, sentence):
-			return True #all sentences are valid <3
+            return True  # all sentences are valid <3
-	# with open("corpus.txt", encoding="utf-8") as fp:
+    # with open("corpus.txt", encoding="utf-8") as fp:
-	#   model = nlt_fixed(fp.read())
+    #   model = nlt_fixed(fp.read())
-	shutil.copyfile("toots.db", "toots-copy.db")
+    shutil.copyfile("toots.db", "toots-copy.db")
-	db = sqlite3.connect("toots-copy.db")
+    db = sqlite3.connect("toots-copy.db")
-	db.text_factory=str
+    db.text_factory = str
-	c = db.cursor()
+    c = db.cursor()
-	toots = c.execute("SELECT content FROM `toots`").fetchall()
+    toots = c.execute("SELECT content FROM `toots`").fetchall()
-	toots_str = ""
+    toots_str = ""
-	for toot in toots:
+    for toot in toots:
-		toots_str += "\n{}".format(toot[0])
+        toots_str += "\n{}".format(toot[0])
-	model = nlt_fixed(toots_str)
+    model = nlt_fixed(toots_str)
-	toots_str = None
+    toots_str = None
-	db.close()
+    db.close()
-	os.remove("toots-copy.db")
+    os.remove("toots-copy.db")
-	sentence = None
+    sentence = None
-	tries = 0
+    tries = 0
-	while sentence is None and tries < 10:
+    while sentence is None and tries < 10:
-		sentence = model.make_short_sentence(500, tries=10000)
+        sentence = model.make_short_sentence(500, tries=10000)
-		tries = tries + 1
+        tries = tries + 1
-	sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
+    sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
-	output.send(sentence)
+    output.send(sentence)
 def make_toot(force_markov = False, args = None):
 	return make_toot_markov()
-def make_toot_markov(query = None):
+def make_toot(force_markov=False, args=None):
-	tries = 0
+    return make_toot_markov()
-	toot = None
+
-	while toot == None and tries < 25:
+
-		pin, pout = multiprocessing.Pipe(False)
+def make_toot_markov(query=None):
-		p = multiprocessing.Process(target = make_sentence, args = [pout])
+    tries = 0
-		p.start()
+    toot = None
-		p.join(10)
+    while toot == None and tries < 25:
-		if p.is_alive():
+        pin, pout = multiprocessing.Pipe(False)
-			p.terminate()
+        p = multiprocessing.Process(target=make_sentence, args=[pout])
-			p.join()
+        p.start()
-			toot = None
+        p.join(10)
-			tries = tries + 1
+        if p.is_alive():
-		else:
+            p.terminate()
-			toot = pin.recv()
+            p.join()
-	if toot == None:
+            toot = None
-		toot = "Toot generation failed! Contact Lynne for assistance."
+            tries = tries + 1
-	return {
+        else:
-			"toot":toot,
+            toot = pin.recv()
-			"media":None
+    if toot == None:
-		}
+        toot = "Toot generation failed! Contact Lynne for assistance."
    return {
        "toot": toot,
        "media": None
    }
--- a/gen.py
+++ b/gen.py
@ -4,41 +4,47 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from mastodon import Mastodon
-import argparse, sys, traceback, json
+import argparse
 import sys
 import traceback
 import json
 import create
 from util import get_config
 parser = argparse.ArgumentParser(description='Generate and post a toot.')
-parser.add_argument('reply', metavar='reply', type=str, nargs='?', 
+parser.add_argument('reply', metavar='reply', type=str, nargs='?',
-	help='ID of the status to reply to')
+                    help='ID of the status to reply to')
 parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
-	help="Print the toot to stdout without posting it")
+                    help="Print the toot to stdout without posting it")
 args = parser.parse_args()
-cfg = json.load(open('config.json'))
+cfg = get_config()
 client = Mastodon(
-  client_id=cfg['client']['id'],
+    access_token=cfg['secret'],
-  client_secret=cfg['client']['secret'], 
+    api_base_url=cfg['site'])
  access_token=cfg['secret'], 
  api_base_url=cfg['site'])
 if __name__ == '__main__':
-	toot = create.make_toot()
+    toot = create.make_toot()
-	if not args.simulate:
+    if not args.simulate:
-		try:
+        try:
-			if toot['media'] != None:
+            if toot['media'] != None:
-				mediaID = client.media_post(toot['media'], description = toot['toot'])
+                mediaID = client.media_post(
-				client.status_post(toot['toot'].replace("\n", " "),
+                    toot['media'], description=toot['toot'])
-					media_ids = [mediaID], visibility = "unlisted", spoiler_text = cfg['cw'])
+                client.status_post(toot['toot'].replace("\n", " "),
-			else:
+                                   media_ids=[mediaID], visibility="unlisted", spoiler_text=cfg['cw'])
-				client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
+            else:
-		except Exception as err:
+                client.status_post(
-			toot = {
+                    toot['toot'], visibility='unlisted', spoiler_text=cfg['cw'])
-			"toot":
+        except Exception as err:
-			"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \
+            toot = {
-			+ " wrong! While attempting to post a toot, I received the following" \
+                "toot":
-			+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
+                "Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly"
-			}
+                + " wrong! While attempting to post a toot, I received the following"
-			client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
+                + " error:\n" +
-	print(toot['toot'])
+                    "\n".join(traceback.format_tb(sys.exc_info()[2]))
            }
            client.status_post(
                toot['toot'], visibility='unlisted', spoiler_text="Error!")
    print(toot['toot'])
--- a/main.py
+++ b/main.py
@ -6,223 +6,165 @@
 from mastodon import Mastodon
 from os import path
-from bs4 import BeautifulSoup
+import os
-import os, sqlite3, signal, sys, json, re
+import sqlite3
 import signal
 import sys
 import json
 import re
 import requests
 from util import get_config, extract_toot
-scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
+cfg = get_config()
 cfg = json.load(open('config.json', 'r'))
 if os.path.exists("clientcred.secret"):
 		print("Upgrading to new storage method")
 		cc = open("clientcred.secret").read().split("\n")
 		cfg['client'] = {
 				"id": cc[0],
 				"secret": cc[1]
 		}
 		cfg['secret'] = open("usercred.secret").read().rstrip("\n")
 		os.remove("clientcred.secret")
 		os.remove("usercred.secret")
 if "client" not in cfg:
 	print("No client credentials, registering application")
 	client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
 		api_base_url=cfg['site'],
 		scopes=scopes,
 		website="https://github.com/Lynnesbian/mstdn-ebooks")
 	cfg['client'] = {
 		"id": client_id,
 		"secret": client_secret
 	}
 if "secret" not in cfg:
 	print("No user credentials, logging in")
 	client = Mastodon(client_id = cfg['client']['id'],
 		client_secret = cfg['client']['secret'],
 		api_base_url=cfg['site'])
 	print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
 	cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
 json.dump(cfg, open("config.json", "w+"))
 def extract_toot(toot):
 	toot = toot.replace("&apos;", "'")
 	toot = toot.replace("&quot;", '"')
 	soup = BeautifulSoup(toot, "html.parser")
 	# this is the code that removes all mentions
 	# TODO: make it so that it removes the @ and instance but keeps the name
 	for mention in soup.select("span.h-card"):
 		mention.a.unwrap()
 		mention.span.unwrap()
 	# replace <br> with linebreak
 	for lb in soup.select("br"):
 		lb.insert_after("\n")
 		lb.decompose()
 	# replace <p> with linebreak
 	for p in soup.select("p"):
 		p.insert_after("\n")
 		p.unwrap()
 	# fix hashtags
 	for ht in soup.select("a.hashtag"):
 		ht.unwrap()
 	# fix links
 	for link in soup.select("a"):
 		link.insert_after(link["href"])
 		link.decompose()
 	toot = soup.get_text()
 	toot = toot.rstrip("\n") #remove trailing newline
 	toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
 	return(toot)
 client = Mastodon(
-	client_id=cfg['client']['id'],
+    access_token=cfg['secret'],
-	client_secret = cfg['client']['secret'], 
+    api_base_url=cfg['site'])
 	access_token=cfg['secret'], 
 	api_base_url=cfg['site'])
 me = client.account_verify_credentials()
 following = client.account_following(me.id)
 db = sqlite3.connect("toots.db")
-db.text_factory=str
+db.text_factory = str
 c = db.cursor()
 c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
 db.commit()
 def handleCtrlC(signal, frame):
-	print("\nPREMATURE EVACUATION - Saving chunks")
+    print("\nPREMATURE EVACUATION - Saving chunks")
-	db.commit()
+    db.commit()
-	sys.exit(1)
+    sys.exit(1)
 signal.signal(signal.SIGINT, handleCtrlC)
 def get_toots_legacy(client, id):
-	i = 0
+    i = 0
-	toots = client.account_statuses(id)
+    toots = client.account_statuses(id)
-	while toots is not None and len(toots) > 0:
+    while toots is not None and len(toots) > 0:
-		for toot in toots:
+        for toot in toots:
-			if toot.spoiler_text != "": continue
+            if toot.spoiler_text != "":
-			if toot.reblog is not None: continue
+                continue
-			if toot.visibility not in ["public", "unlisted"]: continue
+            if toot.reblog is not None:
-			t = extract_toot(toot.content)
+                continue
-			if t != None:
+            if toot.visibility not in ["public", "unlisted"]:
-				yield {
+                continue
-					"toot": t,
+            t = extract_toot(toot.content)
-					"id": toot.id,
+            if t != None:
-					"uri": toot.uri
+                yield {
-				}
+                    "toot": t,
-			toots = client.fetch_next(toots)
+                    "id": toot.id,
-			i += 1
+                    "uri": toot.uri
-			if i%20 == 0:
+                }
-				print('.', end='', flush=True)
+            toots = client.fetch_next(toots)
            i += 1
            if i % 20 == 0:
                print('.', end='', flush=True)
 for f in following:
-	last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
+    last_toot = c.execute(
-	if last_toot != None:
+        "SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
-		last_toot = last_toot[0]
+    if last_toot != None:
-	else:
+        last_toot = last_toot[0]
-		last_toot = 0
+    else:
-	print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
+        last_toot = 0
    print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
-	#find the user's activitypub outbox
+    # find the user's activitypub outbox
-	print("WebFingering...")
+    print("WebFingering...")
-	instance = re.search(r"^.*@(.+)", f.acct)
+    instance = re.search(r"^.*@(.+)", f.acct)
-	if instance == None:
+    if instance == None:
-		instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
+        instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
-	else:
+    else:
-		instance = instance.group(1)
+        instance = instance.group(1)
-	if instance == "bofa.lol":
+    if instance == "bofa.lol":
-		print("rest in piece bofa, skipping")
+        print("rest in piece bofa, skipping")
-		continue
+        continue
 	# print("{} is on {}".format(f.acct, instance))
 	try:
 		r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
 		uri = re.search(r'template="([^"]+)"', r.text).group(1)
 		uri = uri.format(uri = "{}@{}".format(f.username, instance))
 		r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
 		j = r.json()
 		if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
 			uri = j['aliases'][0]
 		else:
 			uri = j['aliases'][1]
 		uri = "{}/outbox?page=true".format(uri)
 		r = requests.get(uri, timeout=10)
 		j = r.json()
 	except Exception:
 		print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
 		sys.exit(1)
-	pleroma = False
+    # print("{} is on {}".format(f.acct, instance))
-	if 'first' in j and type(j['first']) != str:
+    try:
-		print("Pleroma instance detected")
+        r = requests.get(
-		pleroma = True
+            "https://{}/.well-known/host-meta".format(instance), timeout=10)
-		j = j['first']
+        uri = re.search(r'template="([^"]+)"', r.text).group(1)
-	else:
+        uri = uri.format(uri="{}@{}".format(f.username, instance))
-		print("Mastodon instance detected")
+        r = requests.get(
-		uri = "{}&min_id={}".format(uri, last_toot)
+            uri, headers={"Accept": "application/json"}, timeout=10)
-		r = requests.get(uri)
+        j = r.json()
-		j = r.json()
+        if len(j['aliases']) == 1:  # TODO: this is a hack on top of a hack, fix it
            uri = j['aliases'][0]
        else:
            uri = j['aliases'][1]
        uri = "{}/outbox?page=true".format(uri)
        r = requests.get(uri, timeout=10)
        j = r.json()
    except Exception:
        print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
        sys.exit(1)
-	print("Downloading and parsing toots", end='', flush=True)
+    pleroma = False
-	done = False
+    if 'first' in j and type(j['first']) != str:
-	try:
+        print("Pleroma instance detected")
-		while not done and len(j['orderedItems']) > 0:
+        pleroma = True
-			for oi in j['orderedItems']:
+        j = j['first']
-				if oi['type'] != "Create":
+    else:
-					continue #not a toost. fuck outta here
+        print("Mastodon instance detected")
-				
+        uri = "{}&min_id={}".format(uri, last_toot)
-				# its a toost baby
+        r = requests.get(uri)
-				content = oi['object']['content']
+        j = r.json()
-				if oi['object']['summary'] != None:
+
-					#don't download CW'd toots
+    print("Downloading and parsing toots", end='', flush=True)
-					continue
+    done = False
-				toot = extract_toot(content)
+    try:
-				# print(toot)
+        while not done and len(j['orderedItems']) > 0:
-				try:
+            for oi in j['orderedItems']:
-					if pleroma:
+                if oi['type'] != "Create":
-						if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
+                    continue  # not a toost. fuck outta here
-							#we've caught up to the notices we've already downloaded, so we can stop now
+
-							done = True
+                # its a toost baby
-							break
+                content = oi['object']['content']
-					pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
+                if oi['object']['summary'] != None:
-					c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
+                    # don't download CW'd toots
-						(pid,
+                    continue
-						f.id,
+                toot = extract_toot(content)
-						oi['object']['id'],
+                # print(toot)
-						toot
+                try:
-						)
+                    if pleroma:
-					)
+                        if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
-					pass
+                            # we've caught up to the notices we've already downloaded, so we can stop now
-				except:
+                            done = True
-					pass #ignore any toots that don't successfully go into the DB
+                            break
-			# sys.exit(0)
+                    pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
-			if not pleroma:
+                    c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
-				r = requests.get(j['prev'], timeout=15)
+                              (pid,
-			else:
+                               f.id,
-				r = requests.get(j['next'], timeout=15)
+                               oi['object']['id'],
-			j = r.json()
+                               toot
-			print('.', end='', flush=True)
+                               )
-		print(" Done!")
+                              )
-		db.commit()
+                    pass
-	except:
+                except:
-		print("Encountered an error! Saving toots to database and continuing.")
+                    pass  # ignore any toots that don't successfully go into the DB
-		db.commit()
+            # sys.exit(0)
-		# db.close()
+            if not pleroma:
                r = requests.get(j['prev'], timeout=15)
            else:
                r = requests.get(j['next'], timeout=15)
            j = r.json()
            print('.', end='', flush=True)
        print(" Done!")
        db.commit()
    except:
        print("Encountered an error! Saving toots to database and continuing.")
        db.commit()
        # db.close()
 print("Done!")
 db.commit()
-db.execute("VACUUM") #compact db
+db.execute("VACUUM")  # compact db
 db.commit()
-db.close()
+db.close()
--- a/util.py
+++ b/util.py
@ -0,0 +1,97 @@
 """
 Various utility tools
 """
 import json
 import os
 from mastodon import Mastodon
 from bs4 import BeautifulSoup
 def get_config():
    access_token = os.getenv("MASTODON_API_TOKEN")
    api_base_url = os.getenv("MASTODON_API_BASE_URL")
    if (access_token and api_base_url):  #  Heroku mode; use preset token
        return {
            "secret": access_token,
            "site": api_base_url,
            "is_heroku": True
        }
    else:  #  Local mode; do OAuth login dance
        scopes = ["read:statuses", "read:accounts",
                  "read:follows", "write:statuses", "read:notifications"]
        cfg = json.load(open('config.json', 'r'))
        if os.path.exists("clientcred.secret"):
            print("Upgrading to new storage method")
            cc = open("clientcred.secret").read().split("\n")
            cfg['client'] = {
                "id": cc[0],
                "secret": cc[1]
            }
            cfg['secret'] = open("usercred.secret").read().rstrip("\n")
            os.remove("clientcred.secret")
            os.remove("usercred.secret")
        if "client" not in cfg:
            print("No client credentials, registering application")
            client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
                                                           api_base_url=cfg['site'],
                                                           scopes=scopes,
                                                           website="https://github.com/Lynnesbian/mstdn-ebooks")
            cfg['client'] = {
                "id": client_id,
                "secret": client_secret
            }
        if "secret" not in cfg:
            print("No user credentials, logging in")
            client = Mastodon(client_id=cfg['client']['id'],
                              client_secret=cfg['client']['secret'],
                              api_base_url=cfg['site'])
            print("Open this URL: {}".format(
                client.auth_request_url(scopes=scopes)))
            cfg['secret'] = client.log_in(
                code=input("Secret: "), scopes=scopes)
        json.dump(cfg, open("config.json", "w+"))
 def extract_toot(toot):
    toot = toot.replace("&apos;", "'")
    toot = toot.replace("&quot;", '"')
    soup = BeautifulSoup(toot, "html.parser")
    # this is the code that removes all mentions
    # TODO: make it so that it removes the @ and instance but keeps the name
    for mention in soup.select("span.h-card"):
        mention.a.unwrap()
        mention.span.unwrap()
    # replace <br> with linebreak
    for lb in soup.select("br"):
        lb.insert_after("\n")
        lb.decompose()
    # replace <p> with linebreak
    for p in soup.select("p"):
        p.insert_after("\n")
        p.unwrap()
    # fix hashtags
    for ht in soup.select("a.hashtag"):
        ht.unwrap()
    # fix links
    for link in soup.select("a"):
        link.insert_after(link["href"])
        link.decompose()
    toot = soup.get_text()
    toot = toot.rstrip("\n")  # remove trailing newline
    # put a zws between @ and username to avoid mentioning
    toot = toot.replace("@", "@\u200B")
    return(toot)