First pass at attempting to pull login values from env vars
This commit is contained in:
parent
5d7fe7a80c
commit
767911757b
|
@ -7,3 +7,4 @@ toots.db
|
||||||
toots.db-journal
|
toots.db-journal
|
||||||
toots.db-wal
|
toots.db-wal
|
||||||
__pycache__/*
|
__pycache__/*
|
||||||
|
public
|
|
@ -0,0 +1,3 @@
|
||||||
|
generate: python gen.py
|
||||||
|
train: python main.py
|
||||||
|
web: mkdir public && cd public && python -m http.server $PORT
|
17
app.json
17
app.json
|
@ -1,8 +1,13 @@
|
||||||
{
|
{
|
||||||
"name": "mstdn-ebooks",
|
"name": "mstdn-ebooks",
|
||||||
"description": "An ebooks bot for Mastodon (and compatible) users",
|
"description": "An ebooks bot for Mastodon (and compatible) users",
|
||||||
"repository": "https://github.com/Lynnesbian/mstdn-ebooks",
|
"repository": "https://github.com/Lynnesbian/mstdn-ebooks",
|
||||||
"keywords": ["python", "mastodon"],
|
"keywords": ["python", "mastodon"],
|
||||||
"website":"https://fedi.lynnesbian.space/@lynnesbian",
|
"website": "https://fedi.lynnesbian.space/@lynnesbian",
|
||||||
"image":"heroku/heroku"
|
"image": "heroku/heroku",
|
||||||
|
"env": {
|
||||||
|
"MASTODON_API_TOKEN": "",
|
||||||
|
"MASTODON_BASE_URL": ""
|
||||||
|
},
|
||||||
|
"addons": ["scheduler"]
|
||||||
}
|
}
|
||||||
|
|
106
create.py
106
create.py
|
@ -4,59 +4,67 @@
|
||||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
import markovify
|
import markovify
|
||||||
import json
|
import re
|
||||||
import re, random, multiprocessing, time, sqlite3, shutil, os
|
import random
|
||||||
|
import multiprocessing
|
||||||
|
import time
|
||||||
|
import sqlite3
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
def make_sentence(output):
|
def make_sentence(output):
|
||||||
class nlt_fixed(markovify.NewlineText):
|
class nlt_fixed(markovify.NewlineText):
|
||||||
def test_sentence_input(self, sentence):
|
def test_sentence_input(self, sentence):
|
||||||
return True #all sentences are valid <3
|
return True # all sentences are valid <3
|
||||||
|
|
||||||
# with open("corpus.txt", encoding="utf-8") as fp:
|
# with open("corpus.txt", encoding="utf-8") as fp:
|
||||||
# model = nlt_fixed(fp.read())
|
# model = nlt_fixed(fp.read())
|
||||||
|
|
||||||
shutil.copyfile("toots.db", "toots-copy.db")
|
shutil.copyfile("toots.db", "toots-copy.db")
|
||||||
db = sqlite3.connect("toots-copy.db")
|
db = sqlite3.connect("toots-copy.db")
|
||||||
db.text_factory=str
|
db.text_factory = str
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
toots = c.execute("SELECT content FROM `toots`").fetchall()
|
toots = c.execute("SELECT content FROM `toots`").fetchall()
|
||||||
toots_str = ""
|
toots_str = ""
|
||||||
for toot in toots:
|
for toot in toots:
|
||||||
toots_str += "\n{}".format(toot[0])
|
toots_str += "\n{}".format(toot[0])
|
||||||
model = nlt_fixed(toots_str)
|
model = nlt_fixed(toots_str)
|
||||||
toots_str = None
|
toots_str = None
|
||||||
db.close()
|
db.close()
|
||||||
os.remove("toots-copy.db")
|
os.remove("toots-copy.db")
|
||||||
|
|
||||||
sentence = None
|
sentence = None
|
||||||
tries = 0
|
tries = 0
|
||||||
while sentence is None and tries < 10:
|
while sentence is None and tries < 10:
|
||||||
sentence = model.make_short_sentence(500, tries=10000)
|
sentence = model.make_short_sentence(500, tries=10000)
|
||||||
tries = tries + 1
|
tries = tries + 1
|
||||||
sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
|
sentence = re.sub("^@\u202B[^ ]* ", "", sentence)
|
||||||
output.send(sentence)
|
output.send(sentence)
|
||||||
|
|
||||||
def make_toot(force_markov = False, args = None):
|
|
||||||
return make_toot_markov()
|
|
||||||
|
|
||||||
def make_toot_markov(query = None):
|
def make_toot(force_markov=False, args=None):
|
||||||
tries = 0
|
return make_toot_markov()
|
||||||
toot = None
|
|
||||||
while toot == None and tries < 25:
|
|
||||||
pin, pout = multiprocessing.Pipe(False)
|
def make_toot_markov(query=None):
|
||||||
p = multiprocessing.Process(target = make_sentence, args = [pout])
|
tries = 0
|
||||||
p.start()
|
toot = None
|
||||||
p.join(10)
|
while toot == None and tries < 25:
|
||||||
if p.is_alive():
|
pin, pout = multiprocessing.Pipe(False)
|
||||||
p.terminate()
|
p = multiprocessing.Process(target=make_sentence, args=[pout])
|
||||||
p.join()
|
p.start()
|
||||||
toot = None
|
p.join(10)
|
||||||
tries = tries + 1
|
if p.is_alive():
|
||||||
else:
|
p.terminate()
|
||||||
toot = pin.recv()
|
p.join()
|
||||||
if toot == None:
|
toot = None
|
||||||
toot = "Toot generation failed! Contact Lynne for assistance."
|
tries = tries + 1
|
||||||
return {
|
else:
|
||||||
"toot":toot,
|
toot = pin.recv()
|
||||||
"media":None
|
if toot == None:
|
||||||
}
|
toot = "Toot generation failed! Contact Lynne for assistance."
|
||||||
|
return {
|
||||||
|
"toot": toot,
|
||||||
|
"media": None
|
||||||
|
}
|
||||||
|
|
58
gen.py
58
gen.py
|
@ -4,41 +4,47 @@
|
||||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
import argparse, sys, traceback, json
|
import argparse
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
import json
|
||||||
import create
|
import create
|
||||||
|
from util import get_config
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Generate and post a toot.')
|
parser = argparse.ArgumentParser(description='Generate and post a toot.')
|
||||||
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
|
parser.add_argument('reply', metavar='reply', type=str, nargs='?',
|
||||||
help='ID of the status to reply to')
|
help='ID of the status to reply to')
|
||||||
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
|
parser.add_argument('-s', '--simulate', dest='simulate', action='store_true',
|
||||||
help="Print the toot to stdout without posting it")
|
help="Print the toot to stdout without posting it")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
cfg = json.load(open('config.json'))
|
cfg = get_config()
|
||||||
|
|
||||||
client = Mastodon(
|
client = Mastodon(
|
||||||
client_id=cfg['client']['id'],
|
access_token=cfg['secret'],
|
||||||
client_secret=cfg['client']['secret'],
|
api_base_url=cfg['site'])
|
||||||
access_token=cfg['secret'],
|
|
||||||
api_base_url=cfg['site'])
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
toot = create.make_toot()
|
toot = create.make_toot()
|
||||||
if not args.simulate:
|
if not args.simulate:
|
||||||
try:
|
try:
|
||||||
if toot['media'] != None:
|
if toot['media'] != None:
|
||||||
mediaID = client.media_post(toot['media'], description = toot['toot'])
|
mediaID = client.media_post(
|
||||||
client.status_post(toot['toot'].replace("\n", " "),
|
toot['media'], description=toot['toot'])
|
||||||
media_ids = [mediaID], visibility = "unlisted", spoiler_text = cfg['cw'])
|
client.status_post(toot['toot'].replace("\n", " "),
|
||||||
else:
|
media_ids=[mediaID], visibility="unlisted", spoiler_text=cfg['cw'])
|
||||||
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
|
else:
|
||||||
except Exception as err:
|
client.status_post(
|
||||||
toot = {
|
toot['toot'], visibility='unlisted', spoiler_text=cfg['cw'])
|
||||||
"toot":
|
except Exception as err:
|
||||||
"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \
|
toot = {
|
||||||
+ " wrong! While attempting to post a toot, I received the following" \
|
"toot":
|
||||||
+ " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2]))
|
"Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly"
|
||||||
}
|
+ " wrong! While attempting to post a toot, I received the following"
|
||||||
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
|
+ " error:\n" +
|
||||||
print(toot['toot'])
|
"\n".join(traceback.format_tb(sys.exc_info()[2]))
|
||||||
|
}
|
||||||
|
client.status_post(
|
||||||
|
toot['toot'], visibility='unlisted', spoiler_text="Error!")
|
||||||
|
print(toot['toot'])
|
||||||
|
|
318
main.py
318
main.py
|
@ -6,223 +6,165 @@
|
||||||
|
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
from os import path
|
from os import path
|
||||||
from bs4 import BeautifulSoup
|
import os
|
||||||
import os, sqlite3, signal, sys, json, re
|
import sqlite3
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import re
|
||||||
import requests
|
import requests
|
||||||
|
from util import get_config, extract_toot
|
||||||
|
|
||||||
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
|
cfg = get_config()
|
||||||
cfg = json.load(open('config.json', 'r'))
|
|
||||||
|
|
||||||
if os.path.exists("clientcred.secret"):
|
|
||||||
print("Upgrading to new storage method")
|
|
||||||
cc = open("clientcred.secret").read().split("\n")
|
|
||||||
cfg['client'] = {
|
|
||||||
"id": cc[0],
|
|
||||||
"secret": cc[1]
|
|
||||||
}
|
|
||||||
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
|
|
||||||
os.remove("clientcred.secret")
|
|
||||||
os.remove("usercred.secret")
|
|
||||||
|
|
||||||
|
|
||||||
if "client" not in cfg:
|
|
||||||
print("No client credentials, registering application")
|
|
||||||
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
|
|
||||||
api_base_url=cfg['site'],
|
|
||||||
scopes=scopes,
|
|
||||||
website="https://github.com/Lynnesbian/mstdn-ebooks")
|
|
||||||
|
|
||||||
cfg['client'] = {
|
|
||||||
"id": client_id,
|
|
||||||
"secret": client_secret
|
|
||||||
}
|
|
||||||
|
|
||||||
if "secret" not in cfg:
|
|
||||||
print("No user credentials, logging in")
|
|
||||||
client = Mastodon(client_id = cfg['client']['id'],
|
|
||||||
client_secret = cfg['client']['secret'],
|
|
||||||
api_base_url=cfg['site'])
|
|
||||||
|
|
||||||
print("Open this URL: {}".format(client.auth_request_url(scopes=scopes)))
|
|
||||||
cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes)
|
|
||||||
|
|
||||||
json.dump(cfg, open("config.json", "w+"))
|
|
||||||
|
|
||||||
def extract_toot(toot):
|
|
||||||
toot = toot.replace("'", "'")
|
|
||||||
toot = toot.replace(""", '"')
|
|
||||||
soup = BeautifulSoup(toot, "html.parser")
|
|
||||||
|
|
||||||
# this is the code that removes all mentions
|
|
||||||
# TODO: make it so that it removes the @ and instance but keeps the name
|
|
||||||
for mention in soup.select("span.h-card"):
|
|
||||||
mention.a.unwrap()
|
|
||||||
mention.span.unwrap()
|
|
||||||
|
|
||||||
# replace <br> with linebreak
|
|
||||||
for lb in soup.select("br"):
|
|
||||||
lb.insert_after("\n")
|
|
||||||
lb.decompose()
|
|
||||||
|
|
||||||
# replace <p> with linebreak
|
|
||||||
for p in soup.select("p"):
|
|
||||||
p.insert_after("\n")
|
|
||||||
p.unwrap()
|
|
||||||
|
|
||||||
# fix hashtags
|
|
||||||
for ht in soup.select("a.hashtag"):
|
|
||||||
ht.unwrap()
|
|
||||||
|
|
||||||
# fix links
|
|
||||||
for link in soup.select("a"):
|
|
||||||
link.insert_after(link["href"])
|
|
||||||
link.decompose()
|
|
||||||
|
|
||||||
toot = soup.get_text()
|
|
||||||
toot = toot.rstrip("\n") #remove trailing newline
|
|
||||||
toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
|
|
||||||
return(toot)
|
|
||||||
|
|
||||||
client = Mastodon(
|
client = Mastodon(
|
||||||
client_id=cfg['client']['id'],
|
access_token=cfg['secret'],
|
||||||
client_secret = cfg['client']['secret'],
|
api_base_url=cfg['site'])
|
||||||
access_token=cfg['secret'],
|
|
||||||
api_base_url=cfg['site'])
|
|
||||||
|
|
||||||
me = client.account_verify_credentials()
|
me = client.account_verify_credentials()
|
||||||
following = client.account_following(me.id)
|
following = client.account_following(me.id)
|
||||||
|
|
||||||
db = sqlite3.connect("toots.db")
|
db = sqlite3.connect("toots.db")
|
||||||
db.text_factory=str
|
db.text_factory = str
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
|
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
def handleCtrlC(signal, frame):
|
def handleCtrlC(signal, frame):
|
||||||
print("\nPREMATURE EVACUATION - Saving chunks")
|
print("\nPREMATURE EVACUATION - Saving chunks")
|
||||||
db.commit()
|
db.commit()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, handleCtrlC)
|
signal.signal(signal.SIGINT, handleCtrlC)
|
||||||
|
|
||||||
|
|
||||||
def get_toots_legacy(client, id):
|
def get_toots_legacy(client, id):
|
||||||
i = 0
|
i = 0
|
||||||
toots = client.account_statuses(id)
|
toots = client.account_statuses(id)
|
||||||
while toots is not None and len(toots) > 0:
|
while toots is not None and len(toots) > 0:
|
||||||
for toot in toots:
|
for toot in toots:
|
||||||
if toot.spoiler_text != "": continue
|
if toot.spoiler_text != "":
|
||||||
if toot.reblog is not None: continue
|
continue
|
||||||
if toot.visibility not in ["public", "unlisted"]: continue
|
if toot.reblog is not None:
|
||||||
t = extract_toot(toot.content)
|
continue
|
||||||
if t != None:
|
if toot.visibility not in ["public", "unlisted"]:
|
||||||
yield {
|
continue
|
||||||
"toot": t,
|
t = extract_toot(toot.content)
|
||||||
"id": toot.id,
|
if t != None:
|
||||||
"uri": toot.uri
|
yield {
|
||||||
}
|
"toot": t,
|
||||||
toots = client.fetch_next(toots)
|
"id": toot.id,
|
||||||
i += 1
|
"uri": toot.uri
|
||||||
if i%20 == 0:
|
}
|
||||||
print('.', end='', flush=True)
|
toots = client.fetch_next(toots)
|
||||||
|
i += 1
|
||||||
|
if i % 20 == 0:
|
||||||
|
print('.', end='', flush=True)
|
||||||
|
|
||||||
|
|
||||||
for f in following:
|
for f in following:
|
||||||
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
|
last_toot = c.execute(
|
||||||
if last_toot != None:
|
"SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
|
||||||
last_toot = last_toot[0]
|
if last_toot != None:
|
||||||
else:
|
last_toot = last_toot[0]
|
||||||
last_toot = 0
|
else:
|
||||||
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
|
last_toot = 0
|
||||||
|
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
|
||||||
|
|
||||||
#find the user's activitypub outbox
|
# find the user's activitypub outbox
|
||||||
print("WebFingering...")
|
print("WebFingering...")
|
||||||
instance = re.search(r"^.*@(.+)", f.acct)
|
instance = re.search(r"^.*@(.+)", f.acct)
|
||||||
if instance == None:
|
if instance == None:
|
||||||
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
|
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
|
||||||
else:
|
else:
|
||||||
instance = instance.group(1)
|
instance = instance.group(1)
|
||||||
|
|
||||||
if instance == "bofa.lol":
|
if instance == "bofa.lol":
|
||||||
print("rest in piece bofa, skipping")
|
print("rest in piece bofa, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# print("{} is on {}".format(f.acct, instance))
|
# print("{} is on {}".format(f.acct, instance))
|
||||||
try:
|
try:
|
||||||
r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
|
r = requests.get(
|
||||||
uri = re.search(r'template="([^"]+)"', r.text).group(1)
|
"https://{}/.well-known/host-meta".format(instance), timeout=10)
|
||||||
uri = uri.format(uri = "{}@{}".format(f.username, instance))
|
uri = re.search(r'template="([^"]+)"', r.text).group(1)
|
||||||
r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
|
uri = uri.format(uri="{}@{}".format(f.username, instance))
|
||||||
j = r.json()
|
r = requests.get(
|
||||||
if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
|
uri, headers={"Accept": "application/json"}, timeout=10)
|
||||||
uri = j['aliases'][0]
|
j = r.json()
|
||||||
else:
|
if len(j['aliases']) == 1: # TODO: this is a hack on top of a hack, fix it
|
||||||
uri = j['aliases'][1]
|
uri = j['aliases'][0]
|
||||||
uri = "{}/outbox?page=true".format(uri)
|
else:
|
||||||
r = requests.get(uri, timeout=10)
|
uri = j['aliases'][1]
|
||||||
j = r.json()
|
uri = "{}/outbox?page=true".format(uri)
|
||||||
except Exception:
|
r = requests.get(uri, timeout=10)
|
||||||
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
|
j = r.json()
|
||||||
sys.exit(1)
|
except Exception:
|
||||||
|
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
pleroma = False
|
pleroma = False
|
||||||
if 'first' in j and type(j['first']) != str:
|
if 'first' in j and type(j['first']) != str:
|
||||||
print("Pleroma instance detected")
|
print("Pleroma instance detected")
|
||||||
pleroma = True
|
pleroma = True
|
||||||
j = j['first']
|
j = j['first']
|
||||||
else:
|
else:
|
||||||
print("Mastodon instance detected")
|
print("Mastodon instance detected")
|
||||||
uri = "{}&min_id={}".format(uri, last_toot)
|
uri = "{}&min_id={}".format(uri, last_toot)
|
||||||
r = requests.get(uri)
|
r = requests.get(uri)
|
||||||
j = r.json()
|
j = r.json()
|
||||||
|
|
||||||
print("Downloading and parsing toots", end='', flush=True)
|
print("Downloading and parsing toots", end='', flush=True)
|
||||||
done = False
|
done = False
|
||||||
try:
|
try:
|
||||||
while not done and len(j['orderedItems']) > 0:
|
while not done and len(j['orderedItems']) > 0:
|
||||||
for oi in j['orderedItems']:
|
for oi in j['orderedItems']:
|
||||||
if oi['type'] != "Create":
|
if oi['type'] != "Create":
|
||||||
continue #not a toost. fuck outta here
|
continue # not a toost. fuck outta here
|
||||||
|
|
||||||
# its a toost baby
|
# its a toost baby
|
||||||
content = oi['object']['content']
|
content = oi['object']['content']
|
||||||
if oi['object']['summary'] != None:
|
if oi['object']['summary'] != None:
|
||||||
#don't download CW'd toots
|
# don't download CW'd toots
|
||||||
continue
|
continue
|
||||||
toot = extract_toot(content)
|
toot = extract_toot(content)
|
||||||
# print(toot)
|
# print(toot)
|
||||||
try:
|
try:
|
||||||
if pleroma:
|
if pleroma:
|
||||||
if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
|
if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0:
|
||||||
#we've caught up to the notices we've already downloaded, so we can stop now
|
# we've caught up to the notices we've already downloaded, so we can stop now
|
||||||
done = True
|
done = True
|
||||||
break
|
break
|
||||||
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
|
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
|
||||||
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
|
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
|
||||||
(pid,
|
(pid,
|
||||||
f.id,
|
f.id,
|
||||||
oi['object']['id'],
|
oi['object']['id'],
|
||||||
toot
|
toot
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
pass #ignore any toots that don't successfully go into the DB
|
pass # ignore any toots that don't successfully go into the DB
|
||||||
# sys.exit(0)
|
# sys.exit(0)
|
||||||
if not pleroma:
|
if not pleroma:
|
||||||
r = requests.get(j['prev'], timeout=15)
|
r = requests.get(j['prev'], timeout=15)
|
||||||
else:
|
else:
|
||||||
r = requests.get(j['next'], timeout=15)
|
r = requests.get(j['next'], timeout=15)
|
||||||
j = r.json()
|
j = r.json()
|
||||||
print('.', end='', flush=True)
|
print('.', end='', flush=True)
|
||||||
print(" Done!")
|
print(" Done!")
|
||||||
db.commit()
|
db.commit()
|
||||||
except:
|
except:
|
||||||
print("Encountered an error! Saving toots to database and continuing.")
|
print("Encountered an error! Saving toots to database and continuing.")
|
||||||
db.commit()
|
db.commit()
|
||||||
# db.close()
|
# db.close()
|
||||||
|
|
||||||
print("Done!")
|
print("Done!")
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
db.execute("VACUUM") #compact db
|
db.execute("VACUUM") # compact db
|
||||||
db.commit()
|
db.commit()
|
||||||
db.close()
|
db.close()
|
|
@ -0,0 +1,97 @@
|
||||||
|
"""
|
||||||
|
Various utility tools
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from mastodon import Mastodon
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
def get_config():
|
||||||
|
access_token = os.getenv("MASTODON_API_TOKEN")
|
||||||
|
api_base_url = os.getenv("MASTODON_API_BASE_URL")
|
||||||
|
|
||||||
|
if (access_token and api_base_url): # Heroku mode; use preset token
|
||||||
|
return {
|
||||||
|
"secret": access_token,
|
||||||
|
"site": api_base_url,
|
||||||
|
"is_heroku": True
|
||||||
|
}
|
||||||
|
else: # Local mode; do OAuth login dance
|
||||||
|
scopes = ["read:statuses", "read:accounts",
|
||||||
|
"read:follows", "write:statuses", "read:notifications"]
|
||||||
|
cfg = json.load(open('config.json', 'r'))
|
||||||
|
|
||||||
|
if os.path.exists("clientcred.secret"):
|
||||||
|
print("Upgrading to new storage method")
|
||||||
|
cc = open("clientcred.secret").read().split("\n")
|
||||||
|
cfg['client'] = {
|
||||||
|
"id": cc[0],
|
||||||
|
"secret": cc[1]
|
||||||
|
}
|
||||||
|
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
|
||||||
|
os.remove("clientcred.secret")
|
||||||
|
os.remove("usercred.secret")
|
||||||
|
|
||||||
|
if "client" not in cfg:
|
||||||
|
print("No client credentials, registering application")
|
||||||
|
client_id, client_secret = Mastodon.create_app("mstdn-ebooks",
|
||||||
|
api_base_url=cfg['site'],
|
||||||
|
scopes=scopes,
|
||||||
|
website="https://github.com/Lynnesbian/mstdn-ebooks")
|
||||||
|
|
||||||
|
cfg['client'] = {
|
||||||
|
"id": client_id,
|
||||||
|
"secret": client_secret
|
||||||
|
}
|
||||||
|
|
||||||
|
if "secret" not in cfg:
|
||||||
|
print("No user credentials, logging in")
|
||||||
|
client = Mastodon(client_id=cfg['client']['id'],
|
||||||
|
client_secret=cfg['client']['secret'],
|
||||||
|
api_base_url=cfg['site'])
|
||||||
|
|
||||||
|
print("Open this URL: {}".format(
|
||||||
|
client.auth_request_url(scopes=scopes)))
|
||||||
|
cfg['secret'] = client.log_in(
|
||||||
|
code=input("Secret: "), scopes=scopes)
|
||||||
|
|
||||||
|
json.dump(cfg, open("config.json", "w+"))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_toot(toot):
|
||||||
|
toot = toot.replace("'", "'")
|
||||||
|
toot = toot.replace(""", '"')
|
||||||
|
soup = BeautifulSoup(toot, "html.parser")
|
||||||
|
|
||||||
|
# this is the code that removes all mentions
|
||||||
|
# TODO: make it so that it removes the @ and instance but keeps the name
|
||||||
|
for mention in soup.select("span.h-card"):
|
||||||
|
mention.a.unwrap()
|
||||||
|
mention.span.unwrap()
|
||||||
|
|
||||||
|
# replace <br> with linebreak
|
||||||
|
for lb in soup.select("br"):
|
||||||
|
lb.insert_after("\n")
|
||||||
|
lb.decompose()
|
||||||
|
|
||||||
|
# replace <p> with linebreak
|
||||||
|
for p in soup.select("p"):
|
||||||
|
p.insert_after("\n")
|
||||||
|
p.unwrap()
|
||||||
|
|
||||||
|
# fix hashtags
|
||||||
|
for ht in soup.select("a.hashtag"):
|
||||||
|
ht.unwrap()
|
||||||
|
|
||||||
|
# fix links
|
||||||
|
for link in soup.select("a"):
|
||||||
|
link.insert_after(link["href"])
|
||||||
|
link.decompose()
|
||||||
|
|
||||||
|
toot = soup.get_text()
|
||||||
|
toot = toot.rstrip("\n") # remove trailing newline
|
||||||
|
# put a zws between @ and username to avoid mentioning
|
||||||
|
toot = toot.replace("@", "@\u200B")
|
||||||
|
return(toot)
|
Reference in New Issue