Support links in reddit galleries
In the wild: https://redd.it/10vnkmy Tests: - A sane, ordinary test: https://redd.it/10vwtow - 3*4096 occurances of <&> in captions and URLs: https://redd.it/10vxerk - 3*200 occurances of <&> in captions and l&r in URLs: https://redd.it/10vxn7n
This commit is contained in:
parent
94f50613ef
commit
246ce4bd7d
23
redditbot.py
23
redditbot.py
|
@ -52,8 +52,8 @@ if isinstance(_send_to_chats, list):
|
||||||
send_to_chats[i] = j
|
send_to_chats[i] = j
|
||||||
bot_admins = config_data['config']['bot_admins']
|
bot_admins = config_data['config']['bot_admins']
|
||||||
|
|
||||||
def title_or_shortlink(submission):
|
def superstrip(text):
|
||||||
title = submission.title.strip(''.join((
|
return text.strip(''.join((
|
||||||
# https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1)
|
# https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1)
|
||||||
'\u0000', # NULL (used in null-terminated strings)
|
'\u0000', # NULL (used in null-terminated strings)
|
||||||
'\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key)
|
'\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key)
|
||||||
|
@ -109,6 +109,9 @@ def title_or_shortlink(submission):
|
||||||
'\u2060', # word joiner
|
'\u2060', # word joiner
|
||||||
'\uFEFF', # zero width non-breaking space
|
'\uFEFF', # zero width non-breaking space
|
||||||
)))
|
)))
|
||||||
|
|
||||||
|
def title_or_shortlink(submission):
|
||||||
|
title = superstrip(submission.title)
|
||||||
return f'<a href="{submission.shortlink}">{html.escape(title)}</a>' if title else submission.shortlink
|
return f'<a href="{submission.shortlink}">{html.escape(title)}</a>' if title else submission.shortlink
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
@ -424,9 +427,21 @@ async def main():
|
||||||
await _download_file(filename, media_metadata_item['s'][media_type])
|
await _download_file(filename, media_metadata_item['s'][media_type])
|
||||||
break
|
break
|
||||||
caption = f'{text}\n#{count}'
|
caption = f'{text}\n#{count}'
|
||||||
real_caption = (gallery_data_item.get('caption') or '').strip()
|
real_caption = superstrip(gallery_data_item.get('caption') or '')
|
||||||
if real_caption:
|
outbound_url = gallery_data_item.get('outbound_url')
|
||||||
|
# tested longest characters with https://redd.it/10vxerk, got 510 chars max
|
||||||
|
# probably not a worry unless if they really up both limits
|
||||||
|
if real_caption and outbound_url:
|
||||||
|
caption += f': <a href="{html.escape(outbound_url)}">{html.escape(real_caption)}</a>'
|
||||||
|
elif real_caption:
|
||||||
caption += f': {html.escape(real_caption)}'
|
caption += f': {html.escape(real_caption)}'
|
||||||
|
elif outbound_url:
|
||||||
|
# outbound url is unlimited (for some reason)
|
||||||
|
# a nice arbitrary 150 characters should be ok
|
||||||
|
if len(outbound_url) <= 150:
|
||||||
|
caption += f': {html.escape(outbound_url)}'
|
||||||
|
else:
|
||||||
|
caption += f': <a href="{html.escape(outbound_url)}">{html.escape(outbound_url[:150])}…</a>'
|
||||||
captions.append(caption)
|
captions.append(caption)
|
||||||
files.append(filename)
|
files.append(filename)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
Loading…
Reference in New Issue