diff --git a/redditbot.py b/redditbot.py index b08bb64..425d3df 100644 --- a/redditbot.py +++ b/redditbot.py @@ -15,6 +15,7 @@ import yaml import praw import aiohttp import aiocron +from bs4 import BeautifulSoup from telethon import TelegramClient, events from telethon.utils import chunks @@ -189,7 +190,8 @@ async def main(): splitted = os.path.splitext(parsed[2]) domain = getattr(random_post, 'domain', parsed[1]) preview = getattr(random_post, 'preview', None) - if domain.endswith('imgur.com'): + if domain.endswith( + 'imgur.com'): parsed[1] = 'i.imgur.com' if parsed[2].startswith('/a/'): albumid = os.path.split(parsed[2])[1] @@ -204,11 +206,16 @@ async def main(): files = [] captions = [] for a, i in enumerate(apidata['images']): - to_append = f'{text}\n#{a + 1}' + to_append = f'#{a + 1}' desc = i['description'] if desc: - to_append += ': ' + html.escape(desc) - captions.append(to_append) + to_append += ': ' + desc.strip() + caplength = 2047 - len(client.parse_mode.parse(text)[0]) + captext = to_append[:caplength] + if len(captext) >= caplength: + captext = captext[:-1] + captext += '…' + captions.append(text + '\n' + html.escape(captext)) filename = os.path.join(tempdir, str(time.time())) await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}') files.append(filename) @@ -243,8 +250,36 @@ async def main(): await _download_file(filename, url) ext = await _get_file_ext(filename) if ext.startswith('.htm'): + with open(filename) as file: + soup = BeautifulSoup(file.read()) + ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title') + if ptitle: + ptitle = ptitle.attrs.get('content', ptitle.text).strip() + pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content')) + if pdesc: + pdesc = pdesc.attrs.get('content', pdesc.text).strip() + pimg = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content')) + if pimg: + pimg = pimg.attrs.get('content', '').strip() + tat = f'{text}\n\nURL: ' + if ptitle: + tat += f'{html.escape(ptitle)}' + else: + tat += url files = [] - captions = [f'{zws}{text}\n\nURL: {url}'] + if pimg: + await _download_file(filename, pimg) + files.append(filename) + tat = f'{zws}{tat}' + if pdesc: + caplength = 2047 if pimg else 4095 + caplength -= len(client.parse_mode.parse(tat)[0]) + captext = pdesc[:caplength] + if len(captext) >= caplength: + captext = captext[:-1] + captext += '…' + tat += '\n' + captext + captions = [tat] for a, i in enumerate(files): ext = await _get_file_ext(i) os.rename(i, i + ext) @@ -254,7 +289,7 @@ async def main(): if getattr(random_post, 'selftext', None): caplength = 4094 - len(client.parse_mode.parse(text)[0]) text += '\n\n' - captext = random_post.selftext[:caplength] + captext = random_post.selftext.strip()[:caplength] if len(captext) >= caplength: captext = captext[:-1] captext += '…'