From 246ce4bd7dfbee93302abb01594fbe158f58695f Mon Sep 17 00:00:00 2001 From: blankie Date: Tue, 7 Feb 2023 16:25:36 +0700 Subject: [PATCH] Support links in reddit galleries In the wild: https://redd.it/10vnkmy Tests: - A sane, ordinary test: https://redd.it/10vwtow - 3*4096 occurances of <&> in captions and URLs: https://redd.it/10vxerk - 3*200 occurances of <&> in captions and l&r in URLs: https://redd.it/10vxn7n --- redditbot.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/redditbot.py b/redditbot.py index 1afdefd..69d1876 100644 --- a/redditbot.py +++ b/redditbot.py @@ -52,8 +52,8 @@ if isinstance(_send_to_chats, list): send_to_chats[i] = j bot_admins = config_data['config']['bot_admins'] -def title_or_shortlink(submission): - title = submission.title.strip(''.join(( +def superstrip(text): + return text.strip(''.join(( # https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1) '\u0000', # NULL (used in null-terminated strings) '\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key) @@ -109,6 +109,9 @@ def title_or_shortlink(submission): '\u2060', # word joiner '\uFEFF', # zero width non-breaking space ))) + +def title_or_shortlink(submission): + title = superstrip(submission.title) return f'{html.escape(title)}' if title else submission.shortlink logging.basicConfig(level=logging.INFO) @@ -424,9 +427,21 @@ async def main(): await _download_file(filename, media_metadata_item['s'][media_type]) break caption = f'{text}\n#{count}' - real_caption = (gallery_data_item.get('caption') or '').strip() - if real_caption: + real_caption = superstrip(gallery_data_item.get('caption') or '') + outbound_url = gallery_data_item.get('outbound_url') + # tested longest characters with https://redd.it/10vxerk, got 510 chars max + # probably not a worry unless if they really up both limits + if real_caption and outbound_url: + caption += f': {html.escape(real_caption)}' + elif real_caption: caption += f': {html.escape(real_caption)}' + elif outbound_url: + # outbound url is unlimited (for some reason) + # a nice arbitrary 150 characters should be ok + if len(outbound_url) <= 150: + caption += f': {html.escape(outbound_url)}' + else: + caption += f': {html.escape(outbound_url[:150])}…' captions.append(caption) files.append(filename) count += 1