main.py now uses functions.py's extract_toot func

This commit is contained in:
Lynne 2019-01-11 22:58:17 +10:00
parent 3e3f905839
commit 9bbd659bf5
No known key found for this signature in database
GPG Key ID: FB7B970303ACE499
1 changed files with 2 additions and 30 deletions

32
main.py
View File

@ -9,6 +9,7 @@ from os import path
from bs4 import BeautifulSoup
import os, sqlite3, signal, sys, json, re
import requests
import functions
scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"]
cfg = json.load(open('config.json', 'r'))
@ -56,36 +57,7 @@ if "secret" not in cfg:
json.dump(cfg, open("config.json", "w+"))
def extract_toot(toot):
toot = toot.replace("'", "'")
toot = toot.replace(""", '"')
soup = BeautifulSoup(toot, "html.parser")
# this is the code that removes all mentions
for mention in soup.select("span.h-card"):
mention.a.unwrap()
mention.span.unwrap()
# replace <br> with linebreak
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# replace <p> with linebreak
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# fix hashtags
for ht in soup.select("a.hashtag"):
ht.unwrap()
# fix links
for link in soup.select("a"):
link.insert_after(link["href"])
link.decompose()
toot = soup.get_text()
toot = toot.rstrip("\n") #remove trailing newline
toot = functions.extract_toot(toot)
toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning
return(toot)