Make HTML documents sexier
This commit is contained in:
parent
b3ee8bcece
commit
5fe3809c4e
47
redditbot.py
47
redditbot.py
|
@ -15,6 +15,7 @@ import yaml
|
||||||
import praw
|
import praw
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import aiocron
|
import aiocron
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from telethon import TelegramClient, events
|
from telethon import TelegramClient, events
|
||||||
from telethon.utils import chunks
|
from telethon.utils import chunks
|
||||||
|
|
||||||
|
@ -189,7 +190,8 @@ async def main():
|
||||||
splitted = os.path.splitext(parsed[2])
|
splitted = os.path.splitext(parsed[2])
|
||||||
domain = getattr(random_post, 'domain', parsed[1])
|
domain = getattr(random_post, 'domain', parsed[1])
|
||||||
preview = getattr(random_post, 'preview', None)
|
preview = getattr(random_post, 'preview', None)
|
||||||
if domain.endswith('imgur.com'):
|
if domain.endswith(
|
||||||
|
'imgur.com'):
|
||||||
parsed[1] = 'i.imgur.com'
|
parsed[1] = 'i.imgur.com'
|
||||||
if parsed[2].startswith('/a/'):
|
if parsed[2].startswith('/a/'):
|
||||||
albumid = os.path.split(parsed[2])[1]
|
albumid = os.path.split(parsed[2])[1]
|
||||||
|
@ -204,11 +206,16 @@ async def main():
|
||||||
files = []
|
files = []
|
||||||
captions = []
|
captions = []
|
||||||
for a, i in enumerate(apidata['images']):
|
for a, i in enumerate(apidata['images']):
|
||||||
to_append = f'{text}\n#{a + 1}'
|
to_append = f'#{a + 1}'
|
||||||
desc = i['description']
|
desc = i['description']
|
||||||
if desc:
|
if desc:
|
||||||
to_append += ': ' + html.escape(desc)
|
to_append += ': ' + desc.strip()
|
||||||
captions.append(to_append)
|
caplength = 2047 - len(client.parse_mode.parse(text)[0])
|
||||||
|
captext = to_append[:caplength]
|
||||||
|
if len(captext) >= caplength:
|
||||||
|
captext = captext[:-1]
|
||||||
|
captext += '…'
|
||||||
|
captions.append(text + '\n' + html.escape(captext))
|
||||||
filename = os.path.join(tempdir, str(time.time()))
|
filename = os.path.join(tempdir, str(time.time()))
|
||||||
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
|
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
|
||||||
files.append(filename)
|
files.append(filename)
|
||||||
|
@ -243,8 +250,36 @@ async def main():
|
||||||
await _download_file(filename, url)
|
await _download_file(filename, url)
|
||||||
ext = await _get_file_ext(filename)
|
ext = await _get_file_ext(filename)
|
||||||
if ext.startswith('.htm'):
|
if ext.startswith('.htm'):
|
||||||
|
with open(filename) as file:
|
||||||
|
soup = BeautifulSoup(file.read())
|
||||||
|
ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title')
|
||||||
|
if ptitle:
|
||||||
|
ptitle = ptitle.attrs.get('content', ptitle.text).strip()
|
||||||
|
pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content'))
|
||||||
|
if pdesc:
|
||||||
|
pdesc = pdesc.attrs.get('content', pdesc.text).strip()
|
||||||
|
pimg = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content'))
|
||||||
|
if pimg:
|
||||||
|
pimg = pimg.attrs.get('content', '').strip()
|
||||||
|
tat = f'{text}\n\nURL: '
|
||||||
|
if ptitle:
|
||||||
|
tat += f'<a href="{url}">{html.escape(ptitle)}</a>'
|
||||||
|
else:
|
||||||
|
tat += url
|
||||||
files = []
|
files = []
|
||||||
captions = [f'<a href="{url}">{zws}</a>{text}\n\nURL: {url}']
|
if pimg:
|
||||||
|
await _download_file(filename, pimg)
|
||||||
|
files.append(filename)
|
||||||
|
tat = f'<a href="{url}">{zws}</a>{tat}'
|
||||||
|
if pdesc:
|
||||||
|
caplength = 2047 if pimg else 4095
|
||||||
|
caplength -= len(client.parse_mode.parse(tat)[0])
|
||||||
|
captext = pdesc[:caplength]
|
||||||
|
if len(captext) >= caplength:
|
||||||
|
captext = captext[:-1]
|
||||||
|
captext += '…'
|
||||||
|
tat += '\n' + captext
|
||||||
|
captions = [tat]
|
||||||
for a, i in enumerate(files):
|
for a, i in enumerate(files):
|
||||||
ext = await _get_file_ext(i)
|
ext = await _get_file_ext(i)
|
||||||
os.rename(i, i + ext)
|
os.rename(i, i + ext)
|
||||||
|
@ -254,7 +289,7 @@ async def main():
|
||||||
if getattr(random_post, 'selftext', None):
|
if getattr(random_post, 'selftext', None):
|
||||||
caplength = 4094 - len(client.parse_mode.parse(text)[0])
|
caplength = 4094 - len(client.parse_mode.parse(text)[0])
|
||||||
text += '\n\n'
|
text += '\n\n'
|
||||||
captext = random_post.selftext[:caplength]
|
captext = random_post.selftext.strip()[:caplength]
|
||||||
if len(captext) >= caplength:
|
if len(captext) >= caplength:
|
||||||
captext = captext[:-1]
|
captext = captext[:-1]
|
||||||
captext += '…'
|
captext += '…'
|
||||||
|
|
Loading…
Reference in New Issue