diff --git a/redditbot.py b/redditbot.py
index b08bb64..425d3df 100644
--- a/redditbot.py
+++ b/redditbot.py
@@ -15,6 +15,7 @@ import yaml
import praw
import aiohttp
import aiocron
+from bs4 import BeautifulSoup
from telethon import TelegramClient, events
from telethon.utils import chunks
@@ -189,7 +190,8 @@ async def main():
splitted = os.path.splitext(parsed[2])
domain = getattr(random_post, 'domain', parsed[1])
preview = getattr(random_post, 'preview', None)
- if domain.endswith('imgur.com'):
+ if domain.endswith(
+ 'imgur.com'):
parsed[1] = 'i.imgur.com'
if parsed[2].startswith('/a/'):
albumid = os.path.split(parsed[2])[1]
@@ -204,11 +206,16 @@ async def main():
files = []
captions = []
for a, i in enumerate(apidata['images']):
- to_append = f'{text}\n#{a + 1}'
+ to_append = f'#{a + 1}'
desc = i['description']
if desc:
- to_append += ': ' + html.escape(desc)
- captions.append(to_append)
+ to_append += ': ' + desc.strip()
+ caplength = 2047 - len(client.parse_mode.parse(text)[0])
+ captext = to_append[:caplength]
+ if len(captext) >= caplength:
+ captext = captext[:-1]
+ captext += '…'
+ captions.append(text + '\n' + html.escape(captext))
filename = os.path.join(tempdir, str(time.time()))
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
files.append(filename)
@@ -243,8 +250,36 @@ async def main():
await _download_file(filename, url)
ext = await _get_file_ext(filename)
if ext.startswith('.htm'):
+ with open(filename) as file:
+ soup = BeautifulSoup(file.read())
+ ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title')
+ if ptitle:
+ ptitle = ptitle.attrs.get('content', ptitle.text).strip()
+ pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content'))
+ if pdesc:
+ pdesc = pdesc.attrs.get('content', pdesc.text).strip()
+ pimg = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content'))
+ if pimg:
+ pimg = pimg.attrs.get('content', '').strip()
+ tat = f'{text}\n\nURL: '
+ if ptitle:
+ tat += f'{html.escape(ptitle)}'
+ else:
+ tat += url
files = []
- captions = [f'{zws}{text}\n\nURL: {url}']
+ if pimg:
+ await _download_file(filename, pimg)
+ files.append(filename)
+ tat = f'{zws}{tat}'
+ if pdesc:
+ caplength = 2047 if pimg else 4095
+ caplength -= len(client.parse_mode.parse(tat)[0])
+ captext = pdesc[:caplength]
+ if len(captext) >= caplength:
+ captext = captext[:-1]
+ captext += '…'
+ tat += '\n' + captext
+ captions = [tat]
for a, i in enumerate(files):
ext = await _get_file_ext(i)
os.rename(i, i + ext)
@@ -254,7 +289,7 @@ async def main():
if getattr(random_post, 'selftext', None):
caplength = 4094 - len(client.parse_mode.parse(text)[0])
text += '\n\n'
- captext = random_post.selftext[:caplength]
+ captext = random_post.selftext.strip()[:caplength]
if len(captext) >= caplength:
captext = captext[:-1]
captext += '…'