From 33dd6c98c75568e789d1b23b90003551e2b423a6 Mon Sep 17 00:00:00 2001 From: blankie Date: Wed, 14 Sep 2022 15:47:44 +0700 Subject: [PATCH] Try to handle empty titles --- redditbot.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/redditbot.py b/redditbot.py index 199e1dd..c73500a 100644 --- a/redditbot.py +++ b/redditbot.py @@ -51,6 +51,59 @@ if isinstance(_send_to_chats, list): send_to_chats[i] = j bot_admins = config_data['config']['bot_admins'] +def title_or_shortlink(submission): + title = submission.title.strip(''.join(( + # https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1) + '\u0000', # NULL (used in null-terminated strings) + '\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key) + '\u000A', # LINE FEED (LF) (used as a line break) + '\u000C', # FORM FEED (FF) (denotes a page break in a plain text file) + '\u000D', # CARRIAGE RETURN (CR) (used in some line-breaking conventions) + '\u0085', # NEXT LINE (NEL) (sometimes used as a line break in text transcoded from EBCDIC) + '\u0009\u000A\u000B\u000C\u000D', + '\u001C\u001D\u001E\u001F', + '\u0085' + # https://en.wikipedia.org/wiki/Unicode_control_characters#Unicode_introduced_separators + '\u2028', # LINE SEPARATOR (abbreviated LS or LSEP) + '\u2029', # PARAGRAPH SEPARATOR (abbreviated PS or PSEP) + # https://en.wikipedia.org/wiki/Unicode_control_characters#Bidirectional_text_control + '\u061C', # ARABIC LETTER MARK + '\u200E', # LEFT-TO-RIGHT MARK + '\u200F', # RIGHT-TO-LEFT MARK + '\u202A', # LEFT-TO-RIGHT EMBEDDING + '\u202B', # RIGHT-TO-LEFT EMBEDDING + '\u202C', # POP DIRECTIONAL FORMATTING + '\u202D', # LEFT-TO-RIGHT OVERRIDE + '\u202E', # RIGHT-TO-LEFT OVERRIDE + '\u2066', # LEFT-TO-RIGHT ISOLATE + '\u2067', # RIGHT-TO-LEFT ISOLATE + '\u2068', # FIRST STRONG ISOLATE + '\u2069', # POP DIRECTIONAL ISOLATE + # https://en.wikipedia.org/wiki/Whitespace_character#Unicode + '\u000B', # line tabulation + '\u0020', # space + '\u0085', # next line + '\u00A0', # no-break space + '\u1680', # ogham space mark + '\u2000', # en quad + '\u2001', # em quad + '\u2002', # en space + '\u2003', # em space + '\u2004', # three-per-em space + '\u2005', # four-per-em space + '\u2006', # six-per-em space + '\u2007', # figure space + '\u2008', # punctuation space + '\u2009', # thin space + '\u200A', # hair space + '\u2028', # line separator + '\u2029', # paragraph separator + '\u202F', # narrow no-break space + '\u205F', # medium mathematical space + '\u3000', # ideographic space + ))) + return f'{html.escape(title)}' if title else submission.shortlink + logging.basicConfig(level=logging.INFO) async def main(): _added_chats = [] @@ -299,7 +352,7 @@ async def main(): return ext async def _actual_start_post(random_post, chats, cpp=None, snw=None, ssw=None): - text = f'{html.escape(random_post.title)}' + text = title_or_shortlink(random_post) nsfw = random_post.over_18 spoilers = random_post.spoiler cpid = getattr(random_post, 'crosspost_parent', None) @@ -311,7 +364,7 @@ async def main(): nsfw = random_post.over_18 if ssw and not spoilers: spoilers = random_post.spoiler - text += f' (crosspost of {html.escape(random_post.title)})' + text += f' (crosspost of {title_or_shortlink(random_post)})' if spoilers and ssw: text = '🙈🙈🙈 SPOILERS 🙈🙈🙈\n' + text if nsfw and snw: