Support links in reddit galleries

In the wild: https://redd.it/10vnkmy
Tests:
- A sane, ordinary test: https://redd.it/10vwtow
- 3*4096 occurances of <&> in captions and URLs: https://redd.it/10vxerk
- 3*200 occurances of <&> in captions and l&r in URLs: https://redd.it/10vxn7n
This commit is contained in:
blankie 2023-02-07 16:25:36 +07:00
parent 94f50613ef
commit 246ce4bd7d
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
1 changed files with 19 additions and 4 deletions

View File

@ -52,8 +52,8 @@ if isinstance(_send_to_chats, list):
send_to_chats[i] = j send_to_chats[i] = j
bot_admins = config_data['config']['bot_admins'] bot_admins = config_data['config']['bot_admins']
def title_or_shortlink(submission): def superstrip(text):
title = submission.title.strip(''.join(( return text.strip(''.join((
# https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1) # https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1)
'\u0000', # NULL (used in null-terminated strings) '\u0000', # NULL (used in null-terminated strings)
'\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key) '\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key)
@ -109,6 +109,9 @@ def title_or_shortlink(submission):
'\u2060', # word joiner '\u2060', # word joiner
'\uFEFF', # zero width non-breaking space '\uFEFF', # zero width non-breaking space
))) )))
def title_or_shortlink(submission):
title = superstrip(submission.title)
return f'<a href="{submission.shortlink}">{html.escape(title)}</a>' if title else submission.shortlink return f'<a href="{submission.shortlink}">{html.escape(title)}</a>' if title else submission.shortlink
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -424,9 +427,21 @@ async def main():
await _download_file(filename, media_metadata_item['s'][media_type]) await _download_file(filename, media_metadata_item['s'][media_type])
break break
caption = f'{text}\n#{count}' caption = f'{text}\n#{count}'
real_caption = (gallery_data_item.get('caption') or '').strip() real_caption = superstrip(gallery_data_item.get('caption') or '')
if real_caption: outbound_url = gallery_data_item.get('outbound_url')
# tested longest characters with https://redd.it/10vxerk, got 510 chars max
# probably not a worry unless if they really up both limits
if real_caption and outbound_url:
caption += f': <a href="{html.escape(outbound_url)}">{html.escape(real_caption)}</a>'
elif real_caption:
caption += f': {html.escape(real_caption)}' caption += f': {html.escape(real_caption)}'
elif outbound_url:
# outbound url is unlimited (for some reason)
# a nice arbitrary 150 characters should be ok
if len(outbound_url) <= 150:
caption += f': {html.escape(outbound_url)}'
else:
caption += f': <a href="{html.escape(outbound_url)}">{html.escape(outbound_url[:150])}…</a>'
captions.append(caption) captions.append(caption)
files.append(filename) files.append(filename)
count += 1 count += 1