Deal with HTML documents properly

2020-09-03 11:14:28 +07:00 · 2020-09-03 11:14:28 +07:00 · b66da5612b
parent 8dd917a5fa
commit b66da5612b
1 changed files with 5 additions and 0 deletions
--- a/redditbot.py
+++ b/redditbot.py
@ -37,6 +37,7 @@ cron_duration = config_data['config']['cron_duration']

 logging.basicConfig(level=logging.INFO)
 async def main():
+    zws = '\u200b'
    client = await TelegramClient('redditbot', tg_api_id, tg_api_hash).start(bot_token=bot_token)
    client.parse_mode = 'html'
    session = aiohttp.ClientSession()
@ -228,6 +229,10 @@ async def main():
                        url = preview['images'][0]['source']['url']
                if url:
                    await _download_file(filename, url)
+                    ext = await _get_file_ext(filename)
+                    if ext.startswith('.htm'):
+                        files = []
+                        captions = [f'<a href="{url}">{zws}</a>{text}\n\nURL: {url}']
                for a, i in enumerate(files):
                    ext = await _get_file_ext(i)
                    os.rename(i, i + ext)