partially fixed misskey scraping. Stolen from Grumbulon's fedi-books fork, commit e58c24bf0a

This commit is contained in:
Amber 2022-03-21 22:18:06 -05:00
parent 64676f77a1
commit 7fcec206b0
1 changed files with 3 additions and 3 deletions

View File

@ -37,7 +37,7 @@ def make_sentence(output, cfg):
sentence = None sentence = None
tries = 0 tries = 0
while sentence is None and tries < 10: while sentence is None and tries < 10:
sentence = model.make_short_sentence(500, tries=10000) sentence = model.make_short_sentence(500, 200, tries=10000)
tries = tries + 1 tries = tries + 1
# optionally remove mentions # optionally remove mentions
@ -68,10 +68,10 @@ def extract_toot(toot):
toot = html.unescape(toot) # convert HTML escape codes to text toot = html.unescape(toot) # convert HTML escape codes to text
soup = BeautifulSoup(toot, "html.parser") soup = BeautifulSoup(toot, "html.parser")
for lb in soup.select("br"): # replace <br> with linebreak for lb in soup.select("br"): # replace <br> with linebreak
lb.replace_with("\n") lb.name = "\n"
for p in soup.select("p"): # ditto for <p> for p in soup.select("p"): # ditto for <p>
p.replace_with("\n") lb.name = "\n"
for ht in soup.select("a.hashtag"): # convert hashtags from links to text for ht in soup.select("a.hashtag"): # convert hashtags from links to text
ht.unwrap() ht.unwrap()