Make HTML documents sexier
This commit is contained in:
parent
b3ee8bcece
commit
5fe3809c4e
47
redditbot.py
47
redditbot.py
|
@ -15,6 +15,7 @@ import yaml
|
|||
import praw
|
||||
import aiohttp
|
||||
import aiocron
|
||||
from bs4 import BeautifulSoup
|
||||
from telethon import TelegramClient, events
|
||||
from telethon.utils import chunks
|
||||
|
||||
|
@ -189,7 +190,8 @@ async def main():
|
|||
splitted = os.path.splitext(parsed[2])
|
||||
domain = getattr(random_post, 'domain', parsed[1])
|
||||
preview = getattr(random_post, 'preview', None)
|
||||
if domain.endswith('imgur.com'):
|
||||
if domain.endswith(
|
||||
'imgur.com'):
|
||||
parsed[1] = 'i.imgur.com'
|
||||
if parsed[2].startswith('/a/'):
|
||||
albumid = os.path.split(parsed[2])[1]
|
||||
|
@ -204,11 +206,16 @@ async def main():
|
|||
files = []
|
||||
captions = []
|
||||
for a, i in enumerate(apidata['images']):
|
||||
to_append = f'{text}\n#{a + 1}'
|
||||
to_append = f'#{a + 1}'
|
||||
desc = i['description']
|
||||
if desc:
|
||||
to_append += ': ' + html.escape(desc)
|
||||
captions.append(to_append)
|
||||
to_append += ': ' + desc.strip()
|
||||
caplength = 2047 - len(client.parse_mode.parse(text)[0])
|
||||
captext = to_append[:caplength]
|
||||
if len(captext) >= caplength:
|
||||
captext = captext[:-1]
|
||||
captext += '…'
|
||||
captions.append(text + '\n' + html.escape(captext))
|
||||
filename = os.path.join(tempdir, str(time.time()))
|
||||
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
|
||||
files.append(filename)
|
||||
|
@ -243,8 +250,36 @@ async def main():
|
|||
await _download_file(filename, url)
|
||||
ext = await _get_file_ext(filename)
|
||||
if ext.startswith('.htm'):
|
||||
with open(filename) as file:
|
||||
soup = BeautifulSoup(file.read())
|
||||
ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title')
|
||||
if ptitle:
|
||||
ptitle = ptitle.attrs.get('content', ptitle.text).strip()
|
||||
pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content'))
|
||||
if pdesc:
|
||||
pdesc = pdesc.attrs.get('content', pdesc.text).strip()
|
||||
pimg = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content'))
|
||||
if pimg:
|
||||
pimg = pimg.attrs.get('content', '').strip()
|
||||
tat = f'{text}\n\nURL: '
|
||||
if ptitle:
|
||||
tat += f'<a href="{url}">{html.escape(ptitle)}</a>'
|
||||
else:
|
||||
tat += url
|
||||
files = []
|
||||
captions = [f'<a href="{url}">{zws}</a>{text}\n\nURL: {url}']
|
||||
if pimg:
|
||||
await _download_file(filename, pimg)
|
||||
files.append(filename)
|
||||
tat = f'<a href="{url}">{zws}</a>{tat}'
|
||||
if pdesc:
|
||||
caplength = 2047 if pimg else 4095
|
||||
caplength -= len(client.parse_mode.parse(tat)[0])
|
||||
captext = pdesc[:caplength]
|
||||
if len(captext) >= caplength:
|
||||
captext = captext[:-1]
|
||||
captext += '…'
|
||||
tat += '\n' + captext
|
||||
captions = [tat]
|
||||
for a, i in enumerate(files):
|
||||
ext = await _get_file_ext(i)
|
||||
os.rename(i, i + ext)
|
||||
|
@ -254,7 +289,7 @@ async def main():
|
|||
if getattr(random_post, 'selftext', None):
|
||||
caplength = 4094 - len(client.parse_mode.parse(text)[0])
|
||||
text += '\n\n'
|
||||
captext = random_post.selftext[:caplength]
|
||||
captext = random_post.selftext.strip()[:caplength]
|
||||
if len(captext) >= caplength:
|
||||
captext = captext[:-1]
|
||||
captext += '…'
|
||||
|
|
Loading…
Reference in New Issue