code cleanup, fixes #23

2019-05-19 23:06:31 +10:00 · 2019-05-19 23:06:31 +10:00 · 354ea46dcb
parent 5fcefd12b0
commit 354ea46dcb
4 changed files with 55 additions and 41 deletions
--- a/functions.py
+++ b/functions.py
@ -5,7 +5,7 @@
 import markovify
 from bs4 import BeautifulSoup
-import re, multiprocessing, sqlite3, shutil, os, json
+import re, multiprocessing, sqlite3, shutil, os, json, html
 cfg = json.load(open('config.json'))
@ -48,21 +48,18 @@ def make_toot(force_markov = False, args = None):
 	return make_toot_markov()
 def make_toot_markov(query = None):
 	tries = 0
 	toot = None
 	while toot == None and tries < 10: #try to make a toot 10 times
 	pin, pout = multiprocessing.Pipe(False)
 	p = multiprocessing.Process(target = make_sentence, args = [pout])
 	p.start()
-		p.join(10) #wait 10 seconds to get something
+	p.join(5) #wait 5 seconds to get something
-		if p.is_alive(): #if it's still trying to make a toot after 10 seconds
+	if p.is_alive(): #if it's still trying to make a toot after 5 seconds
 		p.terminate()
 		p.join()
 			toot = None
 			tries = tries + 1 #give up, and increment tries by one
 	else:
 		toot = pin.recv()
-	if toot == None: #if we've tried and failed ten times, just give up
+
 	if toot == None:
 		toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance."
 	return {
 			"toot": toot,
@ -70,8 +67,7 @@ def make_toot_markov(query = None):
 		}
 def extract_toot(toot):
-	toot = toot.replace("&apos;", "'") #convert HTML stuff to normal stuff
+	toot = html.unescape(toot) #convert HTML escape codes to text
 	toot = toot.replace("&quot;", '"') #ditto
 	soup = BeautifulSoup(toot, "html.parser")
 	for lb in soup.select("br"): #replace <br> with linebreak
 		lb.insert_after("\n")
--- a/gen.py
+++ b/gen.py
@ -36,7 +36,7 @@ if __name__ == '__main__':
 				client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
 		except Exception as err:
 			toot = {
-			"toot": "An unknown error that should never happen occurred. Maybe it's because of the spoiler text, which is {}. If not, I have no idea what went wrong. This is an error message -- contact lynnesbian@fedi.lynnesbian.space for assistance.".format(cfg['cw'])
+			"toot": "An error occurred while submitting the generated post. Contact lynnesbian@fedi.lynnesbian.space for assistance."
 			}
 			client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
 	try:
--- a/main.py
+++ b/main.py
@ -107,7 +107,7 @@ for f in following:
 		last_toot = last_toot[0]
 	else:
 		last_toot = 0
-	print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
+	print("Downloading posts for user @{}, starting from {}".format(f.acct, last_toot))
 	#find the user's activitypub outbox
 	print("WebFingering...")
@ -122,34 +122,43 @@ for f in following:
 		continue
 	try:
 		# 1. download host-meta to find webfing URL
 		r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
 		# 2. use webfinger to find user's info page
 		uri = patterns["uri"].search(r.text).group(1)
 		uri = uri.format(uri = "{}@{}".format(f.username, instance))
 		r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
 		j = r.json()
 		found = False
 		for link in j['links']:
 			if link['rel'] == 'self':
 				#this is a link formatted like "https://instan.ce/users/username", which is what we need
 				uri = link['href']
 				found = True
 				break
 		if not found:
 			print("Couldn't find a valid ActivityPub outbox URL.")
 		# 3. download first page of outbox
 		uri = "{}/outbox?page=true".format(uri)
-		r = requests.get(uri, timeout=10)
+		r = requests.get(uri, timeout=15)
 		j = r.json()
-	except Exception:
+	except:
 		print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
 		sys.exit(1)
 	pleroma = False
-	if 'first' in j and type(j['first']) != str:
+	if 'next' not in j:
-		print("Pleroma instance detected")
+		print("Using Pleroma compatibility mode")
 		pleroma = True
 		j = j['first']
 	else:
-		print("Mastodon/Misskey instance detected")
+		print("Using standard mode")
 		uri = "{}&min_id={}".format(uri, last_toot)
 		r = requests.get(uri)
 		j = r.json()
-	print("Downloading and saving toots", end='', flush=True)
+	print("Downloading and saving posts", end='', flush=True)
 	done = False
 	try:
 		while not done and len(j['orderedItems']) > 0:
@ -179,10 +188,18 @@ for f in following:
 					pass
 				except:
 					pass #ignore any toots that don't successfully go into the DB
 			# get the next/previous page
 			try:
 				if not pleroma:
 					r = requests.get(j['prev'], timeout=15)
 				else:
 					r = requests.get(j['next'], timeout=15)
 			except requests.Timeout:
 				print("HTTP timeout, site did not respond within 15 seconds")
 			except:
 				print("An error occurred while trying to obtain more posts.")
 			j = r.json()
 			print('.', end='', flush=True)
 		print(" Done!")
@ -193,10 +210,10 @@ for f in following:
 			db.commit()
 		else:
 			# TODO: remove duplicate code
-			print("Encountered an error! Saving toots to database and moving to next followed account.")
+			print("Encountered an error! Saving posts to database and moving to next followed account.")
 			db.commit()
 	except:
-		print("Encountered an error! Saving toots to database and moving to next followed account.")
+		print("Encountered an error! Saving posts to database and moving to next followed account.")
 		db.commit()
 print("Done!")
--- a/reply.py
+++ b/reply.py
@ -4,7 +4,7 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 import mastodon
-import os, random, re, json
+import random, re, json
 import functions
 from bs4 import BeautifulSoup
@ -28,6 +28,7 @@ class ReplyListener(mastodon.StreamListener):
 		if notification['type'] == 'mention': #if we're mentioned:
 			acct = "@" + notification['account']['acct'] #get the account's @
 			post_id = notification['status']['id']
 			# check if we've already been participating in this thread
 			try:
 				context = client.status_context(post_id)