From de3449ae562b5a3f60190ec63842b0d2fc1dd890 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:55:31 +1000 Subject: [PATCH] added extract_toot function to functions.py --- functions.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/functions.py b/functions.py index 80ec6fe..31caf36 100755 --- a/functions.py +++ b/functions.py @@ -60,3 +60,26 @@ def make_toot_markov(query = None): "toot": toot, "media": None } + +def extract_toot(toot): + soup = BeautifulSoup(toot, "html.parser") + for lb in soup.select("br"): #replace
with linebreak + lb.insert_after("\n") + lb.decompose() + + for p in soup.select("p"): #ditto for

+ p.insert_after("\n") + p.unwrap() + + for ht in soup.select("a.hashtag"): #make hashtags no longer links, just text + ht.unwrap() + + for link in soup.select("a"): #ocnvert