code cleanup, fixes #23

This commit is contained in:
Lynne 2019-05-19 23:06:31 +10:00
parent 5fcefd12b0
commit 354ea46dcb
No known key found for this signature in database
GPG Key ID: FB7B970303ACE499
4 changed files with 55 additions and 41 deletions

View File

@ -5,7 +5,7 @@
import markovify
from bs4 import BeautifulSoup
import re, multiprocessing, sqlite3, shutil, os, json
import re, multiprocessing, sqlite3, shutil, os, json, html
cfg = json.load(open('config.json'))
@ -48,21 +48,18 @@ def make_toot(force_markov = False, args = None):
return make_toot_markov()
def make_toot_markov(query = None):
tries = 0
toot = None
while toot == None and tries < 10: #try to make a toot 10 times
pin, pout = multiprocessing.Pipe(False)
p = multiprocessing.Process(target = make_sentence, args = [pout])
p.start()
p.join(10) #wait 10 seconds to get something
if p.is_alive(): #if it's still trying to make a toot after 10 seconds
p.join(5) #wait 5 seconds to get something
if p.is_alive(): #if it's still trying to make a toot after 5 seconds
p.terminate()
p.join()
toot = None
tries = tries + 1 #give up, and increment tries by one
else:
toot = pin.recv()
if toot == None: #if we've tried and failed ten times, just give up
if toot == None:
toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance."
return {
"toot": toot,
@ -70,8 +67,7 @@ def make_toot_markov(query = None):
}
def extract_toot(toot):
toot = toot.replace("&apos;", "'") #convert HTML stuff to normal stuff
toot = toot.replace("&quot;", '"') #ditto
toot = html.unescape(toot) #convert HTML escape codes to text
soup = BeautifulSoup(toot, "html.parser")
for lb in soup.select("br"): #replace <br> with linebreak
lb.insert_after("\n")

2
gen.py
View File

@ -36,7 +36,7 @@ if __name__ == '__main__':
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
except Exception as err:
toot = {
"toot": "An unknown error that should never happen occurred. Maybe it's because of the spoiler text, which is {}. If not, I have no idea what went wrong. This is an error message -- contact lynnesbian@fedi.lynnesbian.space for assistance.".format(cfg['cw'])
"toot": "An error occurred while submitting the generated post. Contact lynnesbian@fedi.lynnesbian.space for assistance."
}
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
try:

35
main.py
View File

@ -107,7 +107,7 @@ for f in following:
last_toot = last_toot[0]
else:
last_toot = 0
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
print("Downloading posts for user @{}, starting from {}".format(f.acct, last_toot))
#find the user's activitypub outbox
print("WebFingering...")
@ -122,34 +122,43 @@ for f in following:
continue
try:
# 1. download host-meta to find webfing URL
r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
# 2. use webfinger to find user's info page
uri = patterns["uri"].search(r.text).group(1)
uri = uri.format(uri = "{}@{}".format(f.username, instance))
r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
j = r.json()
found = False
for link in j['links']:
if link['rel'] == 'self':
#this is a link formatted like "https://instan.ce/users/username", which is what we need
uri = link['href']
found = True
break
if not found:
print("Couldn't find a valid ActivityPub outbox URL.")
# 3. download first page of outbox
uri = "{}/outbox?page=true".format(uri)
r = requests.get(uri, timeout=10)
r = requests.get(uri, timeout=15)
j = r.json()
except Exception:
except:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1)
pleroma = False
if 'first' in j and type(j['first']) != str:
print("Pleroma instance detected")
if 'next' not in j:
print("Using Pleroma compatibility mode")
pleroma = True
j = j['first']
else:
print("Mastodon/Misskey instance detected")
print("Using standard mode")
uri = "{}&min_id={}".format(uri, last_toot)
r = requests.get(uri)
j = r.json()
print("Downloading and saving toots", end='', flush=True)
print("Downloading and saving posts", end='', flush=True)
done = False
try:
while not done and len(j['orderedItems']) > 0:
@ -179,10 +188,18 @@ for f in following:
pass
except:
pass #ignore any toots that don't successfully go into the DB
# get the next/previous page
try:
if not pleroma:
r = requests.get(j['prev'], timeout=15)
else:
r = requests.get(j['next'], timeout=15)
except requests.Timeout:
print("HTTP timeout, site did not respond within 15 seconds")
except:
print("An error occurred while trying to obtain more posts.")
j = r.json()
print('.', end='', flush=True)
print(" Done!")
@ -193,10 +210,10 @@ for f in following:
db.commit()
else:
# TODO: remove duplicate code
print("Encountered an error! Saving toots to database and moving to next followed account.")
print("Encountered an error! Saving posts to database and moving to next followed account.")
db.commit()
except:
print("Encountered an error! Saving toots to database and moving to next followed account.")
print("Encountered an error! Saving posts to database and moving to next followed account.")
db.commit()
print("Done!")

View File

@ -4,7 +4,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import mastodon
import os, random, re, json
import random, re, json
import functions
from bs4 import BeautifulSoup
@ -28,6 +28,7 @@ class ReplyListener(mastodon.StreamListener):
if notification['type'] == 'mention': #if we're mentioned:
acct = "@" + notification['account']['acct'] #get the account's @
post_id = notification['status']['id']
# check if we've already been participating in this thread
try:
context = client.status_context(post_id)