From b66da5612b61ac836c91ee2ca57d4fc8422917ad Mon Sep 17 00:00:00 2001 From: blank X Date: Thu, 3 Sep 2020 11:14:28 +0700 Subject: [PATCH] Deal with HTML documents properly --- redditbot.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/redditbot.py b/redditbot.py index 2e7c5f3..7bcda5e 100644 --- a/redditbot.py +++ b/redditbot.py @@ -37,6 +37,7 @@ cron_duration = config_data['config']['cron_duration'] logging.basicConfig(level=logging.INFO) async def main(): + zws = '\u200b' client = await TelegramClient('redditbot', tg_api_id, tg_api_hash).start(bot_token=bot_token) client.parse_mode = 'html' session = aiohttp.ClientSession() @@ -228,6 +229,10 @@ async def main(): url = preview['images'][0]['source']['url'] if url: await _download_file(filename, url) + ext = await _get_file_ext(filename) + if ext.startswith('.htm'): + files = [] + captions = [f'{zws}{text}\n\nURL: {url}'] for a, i in enumerate(files): ext = await _get_file_ext(i) os.rename(i, i + ext)