diff --git a/functions.py b/functions.py index 50029b9..ca75c3f 100755 --- a/functions.py +++ b/functions.py @@ -5,7 +5,7 @@ import markovify from bs4 import BeautifulSoup -import re, multiprocessing, sqlite3, shutil, os, json +import re, multiprocessing, sqlite3, shutil, os, json, html cfg = json.load(open('config.json')) @@ -48,21 +48,18 @@ def make_toot(force_markov = False, args = None): return make_toot_markov() def make_toot_markov(query = None): - tries = 0 toot = None - while toot == None and tries < 10: #try to make a toot 10 times - pin, pout = multiprocessing.Pipe(False) - p = multiprocessing.Process(target = make_sentence, args = [pout]) - p.start() - p.join(10) #wait 10 seconds to get something - if p.is_alive(): #if it's still trying to make a toot after 10 seconds - p.terminate() - p.join() - toot = None - tries = tries + 1 #give up, and increment tries by one - else: - toot = pin.recv() - if toot == None: #if we've tried and failed ten times, just give up + pin, pout = multiprocessing.Pipe(False) + p = multiprocessing.Process(target = make_sentence, args = [pout]) + p.start() + p.join(5) #wait 5 seconds to get something + if p.is_alive(): #if it's still trying to make a toot after 5 seconds + p.terminate() + p.join() + else: + toot = pin.recv() + + if toot == None: toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance." return { "toot": toot, @@ -70,8 +67,7 @@ def make_toot_markov(query = None): } def extract_toot(toot): - toot = toot.replace("'", "'") #convert HTML stuff to normal stuff - toot = toot.replace(""", '"') #ditto + toot = html.unescape(toot) #convert HTML escape codes to text soup = BeautifulSoup(toot, "html.parser") for lb in soup.select("br"): #replace
with linebreak lb.insert_after("\n") diff --git a/gen.py b/gen.py index 208b2f8..442254f 100755 --- a/gen.py +++ b/gen.py @@ -20,8 +20,8 @@ client = None if not args.simulate: client = Mastodon( client_id=cfg['client']['id'], - client_secret=cfg['client']['secret'], - access_token=cfg['secret'], + client_secret=cfg['client']['secret'], + access_token=cfg['secret'], api_base_url=cfg['site']) if __name__ == '__main__': @@ -36,7 +36,7 @@ if __name__ == '__main__': client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw']) except Exception as err: toot = { - "toot": "An unknown error that should never happen occurred. Maybe it's because of the spoiler text, which is {}. If not, I have no idea what went wrong. This is an error message -- contact lynnesbian@fedi.lynnesbian.space for assistance.".format(cfg['cw']) + "toot": "An error occurred while submitting the generated post. Contact lynnesbian@fedi.lynnesbian.space for assistance." } client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") try: diff --git a/main.py b/main.py index 0411921..9393244 100755 --- a/main.py +++ b/main.py @@ -107,7 +107,7 @@ for f in following: last_toot = last_toot[0] else: last_toot = 0 - print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) + print("Downloading posts for user @{}, starting from {}".format(f.acct, last_toot)) #find the user's activitypub outbox print("WebFingering...") @@ -122,34 +122,43 @@ for f in following: continue try: + # 1. download host-meta to find webfing URL r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) + # 2. use webfinger to find user's info page uri = patterns["uri"].search(r.text).group(1) uri = uri.format(uri = "{}@{}".format(f.username, instance)) r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) j = r.json() + found = False for link in j['links']: if link['rel'] == 'self': #this is a link formatted like "https://instan.ce/users/username", which is what we need uri = link['href'] + found = True + break + if not found: + print("Couldn't find a valid ActivityPub outbox URL.") + + # 3. download first page of outbox uri = "{}/outbox?page=true".format(uri) - r = requests.get(uri, timeout=10) + r = requests.get(uri, timeout=15) j = r.json() - except Exception: + except: print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") sys.exit(1) pleroma = False - if 'first' in j and type(j['first']) != str: - print("Pleroma instance detected") + if 'next' not in j: + print("Using Pleroma compatibility mode") pleroma = True j = j['first'] else: - print("Mastodon/Misskey instance detected") + print("Using standard mode") uri = "{}&min_id={}".format(uri, last_toot) r = requests.get(uri) j = r.json() - print("Downloading and saving toots", end='', flush=True) + print("Downloading and saving posts", end='', flush=True) done = False try: while not done and len(j['orderedItems']) > 0: @@ -169,7 +178,7 @@ for f in following: done = True if cfg['lang']: try: - if oi['object']['contentMap'][cfg['lang']]: # filter for language + if oi['object']['contentMap'][cfg['lang']]: # filter for language insert_toot(oi, f, toot, c) except KeyError: #JSON doesn't have contentMap, just insert the toot irregardlessly @@ -179,10 +188,18 @@ for f in following: pass except: pass #ignore any toots that don't successfully go into the DB - if not pleroma: - r = requests.get(j['prev'], timeout=15) - else: - r = requests.get(j['next'], timeout=15) + + # get the next/previous page + try: + if not pleroma: + r = requests.get(j['prev'], timeout=15) + else: + r = requests.get(j['next'], timeout=15) + except requests.Timeout: + print("HTTP timeout, site did not respond within 15 seconds") + except: + print("An error occurred while trying to obtain more posts.") + j = r.json() print('.', end='', flush=True) print(" Done!") @@ -193,10 +210,10 @@ for f in following: db.commit() else: # TODO: remove duplicate code - print("Encountered an error! Saving toots to database and moving to next followed account.") + print("Encountered an error! Saving posts to database and moving to next followed account.") db.commit() except: - print("Encountered an error! Saving toots to database and moving to next followed account.") + print("Encountered an error! Saving posts to database and moving to next followed account.") db.commit() print("Done!") diff --git a/reply.py b/reply.py index 060a673..837977b 100755 --- a/reply.py +++ b/reply.py @@ -4,7 +4,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. import mastodon -import os, random, re, json +import random, re, json import functions from bs4 import BeautifulSoup @@ -13,8 +13,8 @@ threads = {} client = mastodon.Mastodon( client_id=cfg['client']['id'], - client_secret=cfg['client']['secret'], - access_token=cfg['secret'], + client_secret=cfg['client']['secret'], + access_token=cfg['secret'], api_base_url=cfg['site']) def extract_toot(toot): @@ -28,6 +28,7 @@ class ReplyListener(mastodon.StreamListener): if notification['type'] == 'mention': #if we're mentioned: acct = "@" + notification['account']['acct'] #get the account's @ post_id = notification['status']['id'] + # check if we've already been participating in this thread try: context = client.status_context(post_id) @@ -39,10 +40,10 @@ class ReplyListener(mastodon.StreamListener): for post in context['ancestors']: if post['account']['id'] == me: posts += 1 - if posts >= cfg['max_thread_length']: - # stop replying - print("didn't reply (max_thread_length exceeded)") - return + if posts >= cfg['max_thread_length']: + # stop replying + print("didn't reply (max_thread_length exceeded)") + return mention = extract_toot(notification['status']['content']) toot = functions.make_toot(True)['toot'] #generate a toot