Try to handle empty titles

This commit is contained in:
blankie 2022-09-14 15:47:44 +07:00
parent e5962cfdc3
commit 33dd6c98c7
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
1 changed files with 55 additions and 2 deletions

View File

@ -51,6 +51,59 @@ if isinstance(_send_to_chats, list):
send_to_chats[i] = j send_to_chats[i] = j
bot_admins = config_data['config']['bot_admins'] bot_admins = config_data['config']['bot_admins']
def title_or_shortlink(submission):
title = submission.title.strip(''.join((
# https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1)
'\u0000', # NULL (used in null-terminated strings)
'\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key)
'\u000A', # LINE FEED (LF) (used as a line break)
'\u000C', # FORM FEED (FF) (denotes a page break in a plain text file)
'\u000D', # CARRIAGE RETURN (CR) (used in some line-breaking conventions)
'\u0085', # NEXT LINE (NEL) (sometimes used as a line break in text transcoded from EBCDIC)
'\u0009\u000A\u000B\u000C\u000D',
'\u001C\u001D\u001E\u001F',
'\u0085'
# https://en.wikipedia.org/wiki/Unicode_control_characters#Unicode_introduced_separators
'\u2028', # LINE SEPARATOR (abbreviated LS or LSEP)
'\u2029', # PARAGRAPH SEPARATOR (abbreviated PS or PSEP)
# https://en.wikipedia.org/wiki/Unicode_control_characters#Bidirectional_text_control
'\u061C', # ARABIC LETTER MARK
'\u200E', # LEFT-TO-RIGHT MARK
'\u200F', # RIGHT-TO-LEFT MARK
'\u202A', # LEFT-TO-RIGHT EMBEDDING
'\u202B', # RIGHT-TO-LEFT EMBEDDING
'\u202C', # POP DIRECTIONAL FORMATTING
'\u202D', # LEFT-TO-RIGHT OVERRIDE
'\u202E', # RIGHT-TO-LEFT OVERRIDE
'\u2066', # LEFT-TO-RIGHT ISOLATE
'\u2067', # RIGHT-TO-LEFT ISOLATE
'\u2068', # FIRST STRONG ISOLATE
'\u2069', # POP DIRECTIONAL ISOLATE
# https://en.wikipedia.org/wiki/Whitespace_character#Unicode
'\u000B', # line tabulation
'\u0020', # space
'\u0085', # next line
'\u00A0', # no-break space
'\u1680', # ogham space mark
'\u2000', # en quad
'\u2001', # em quad
'\u2002', # en space
'\u2003', # em space
'\u2004', # three-per-em space
'\u2005', # four-per-em space
'\u2006', # six-per-em space
'\u2007', # figure space
'\u2008', # punctuation space
'\u2009', # thin space
'\u200A', # hair space
'\u2028', # line separator
'\u2029', # paragraph separator
'\u202F', # narrow no-break space
'\u205F', # medium mathematical space
'\u3000', # ideographic space
)))
return f'<a href="{submission.shortlink}">{html.escape(title)}</a>' if title else submission.shortlink
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
async def main(): async def main():
_added_chats = [] _added_chats = []
@ -299,7 +352,7 @@ async def main():
return ext return ext
async def _actual_start_post(random_post, chats, cpp=None, snw=None, ssw=None): async def _actual_start_post(random_post, chats, cpp=None, snw=None, ssw=None):
text = f'<a href="{random_post.shortlink}">{html.escape(random_post.title)}</a>' text = title_or_shortlink(random_post)
nsfw = random_post.over_18 nsfw = random_post.over_18
spoilers = random_post.spoiler spoilers = random_post.spoiler
cpid = getattr(random_post, 'crosspost_parent', None) cpid = getattr(random_post, 'crosspost_parent', None)
@ -311,7 +364,7 @@ async def main():
nsfw = random_post.over_18 nsfw = random_post.over_18
if ssw and not spoilers: if ssw and not spoilers:
spoilers = random_post.spoiler spoilers = random_post.spoiler
text += f' (crosspost of <a href="{random_post.shortlink}">{html.escape(random_post.title)}</a>)' text += f' (crosspost of {title_or_shortlink(random_post)})'
if spoilers and ssw: if spoilers and ssw:
text = '🙈🙈🙈 SPOILERS 🙈🙈🙈\n' + text text = '🙈🙈🙈 SPOILERS 🙈🙈🙈\n' + text
if nsfw and snw: if nsfw and snw: