Make HTML documents sexier

This commit is contained in:
blank X 2020-09-04 17:20:09 +07:00
parent b3ee8bcece
commit 5fe3809c4e
1 changed files with 41 additions and 6 deletions

View File

@ -15,6 +15,7 @@ import yaml
import praw import praw
import aiohttp import aiohttp
import aiocron import aiocron
from bs4 import BeautifulSoup
from telethon import TelegramClient, events from telethon import TelegramClient, events
from telethon.utils import chunks from telethon.utils import chunks
@ -189,7 +190,8 @@ async def main():
splitted = os.path.splitext(parsed[2]) splitted = os.path.splitext(parsed[2])
domain = getattr(random_post, 'domain', parsed[1]) domain = getattr(random_post, 'domain', parsed[1])
preview = getattr(random_post, 'preview', None) preview = getattr(random_post, 'preview', None)
if domain.endswith('imgur.com'): if domain.endswith(
'imgur.com'):
parsed[1] = 'i.imgur.com' parsed[1] = 'i.imgur.com'
if parsed[2].startswith('/a/'): if parsed[2].startswith('/a/'):
albumid = os.path.split(parsed[2])[1] albumid = os.path.split(parsed[2])[1]
@ -204,11 +206,16 @@ async def main():
files = [] files = []
captions = [] captions = []
for a, i in enumerate(apidata['images']): for a, i in enumerate(apidata['images']):
to_append = f'{text}\n#{a + 1}' to_append = f'#{a + 1}'
desc = i['description'] desc = i['description']
if desc: if desc:
to_append += ': ' + html.escape(desc) to_append += ': ' + desc.strip()
captions.append(to_append) caplength = 2047 - len(client.parse_mode.parse(text)[0])
captext = to_append[:caplength]
if len(captext) >= caplength:
captext = captext[:-1]
captext += ''
captions.append(text + '\n' + html.escape(captext))
filename = os.path.join(tempdir, str(time.time())) filename = os.path.join(tempdir, str(time.time()))
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}') await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
files.append(filename) files.append(filename)
@ -243,8 +250,36 @@ async def main():
await _download_file(filename, url) await _download_file(filename, url)
ext = await _get_file_ext(filename) ext = await _get_file_ext(filename)
if ext.startswith('.htm'): if ext.startswith('.htm'):
with open(filename) as file:
soup = BeautifulSoup(file.read())
ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title')
if ptitle:
ptitle = ptitle.attrs.get('content', ptitle.text).strip()
pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content'))
if pdesc:
pdesc = pdesc.attrs.get('content', pdesc.text).strip()
pimg = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content'))
if pimg:
pimg = pimg.attrs.get('content', '').strip()
tat = f'{text}\n\nURL: '
if ptitle:
tat += f'<a href="{url}">{html.escape(ptitle)}</a>'
else:
tat += url
files = [] files = []
captions = [f'<a href="{url}">{zws}</a>{text}\n\nURL: {url}'] if pimg:
await _download_file(filename, pimg)
files.append(filename)
tat = f'<a href="{url}">{zws}</a>{tat}'
if pdesc:
caplength = 2047 if pimg else 4095
caplength -= len(client.parse_mode.parse(tat)[0])
captext = pdesc[:caplength]
if len(captext) >= caplength:
captext = captext[:-1]
captext += ''
tat += '\n' + captext
captions = [tat]
for a, i in enumerate(files): for a, i in enumerate(files):
ext = await _get_file_ext(i) ext = await _get_file_ext(i)
os.rename(i, i + ext) os.rename(i, i + ext)
@ -254,7 +289,7 @@ async def main():
if getattr(random_post, 'selftext', None): if getattr(random_post, 'selftext', None):
caplength = 4094 - len(client.parse_mode.parse(text)[0]) caplength = 4094 - len(client.parse_mode.parse(text)[0])
text += '\n\n' text += '\n\n'
captext = random_post.selftext[:caplength] captext = random_post.selftext.strip()[:caplength]
if len(captext) >= caplength: if len(captext) >= caplength:
captext = captext[:-1] captext = captext[:-1]
captext += '' captext += ''