diff --git a/autoytarchive/utils.py b/autoytarchive/utils.py index 9fd2865..538bfe7 100644 --- a/autoytarchive/utils.py +++ b/autoytarchive/utils.py @@ -1,4 +1,7 @@ +import time import json +import logging +import feedparser from io import BytesIO from youtube_dl.extractor import youtube from . import config, client, seen_videos @@ -26,3 +29,17 @@ async def update_seen_videos(): file.name = 'autoytarchive.json' file.seek(0) await client.edit_message(config['config']['storage_chat_id'], config['config']['storage_message_id'], file=file) + +async def get_video_list(session, channel_id): + for i in config['config'].get('invidious_instances', []): + try: + async with session.get(f'{i}/api/v1/channels/{channel_id}/latest?fields=videoId&a={time.time()}', headers={'Cache-Control': 'no-store, max-age=0'}) as resp: + if resp.status != 200: + logging.error('Invidious instance %s returned %s', i, str(resp.status)) + continue + return list(map(lambda i: i['videoId'], await resp.json())) + except BaseException: + logging.exception('Invidious instance %s raised exception', i) + async with session.get(f'https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}&a={time.time()}', headers={'Cache-Control': 'no-store, max-age=0'}) as resp: + d = feedparser.parse(await resp.text()) + return list(map(lambda i: i['yt_videoid'], d['entries'])) diff --git a/autoytarchive/workers.py b/autoytarchive/workers.py index c53ead2..94e8d51 100644 --- a/autoytarchive/workers.py +++ b/autoytarchive/workers.py @@ -7,13 +7,12 @@ import asyncio import logging import tempfile import traceback -import feedparser from io import BytesIO from decimal import Decimal -from urllib.parse import quote as urlencode, urlparse +from urllib.parse import urlparse from youtube_dl import YoutubeDL from . import session, config, client, seen_videos -from .utils import update_seen_videos +from .utils import update_seen_videos, get_video_list from .cappedio import bopen tmp_handled = [] @@ -46,39 +45,37 @@ async def _check_channels(nodl): async with check_channels_lock: for i in config['config']['channels']: logging.info('Checking channel %s', i) - async with session.get(f'https://youtube.com/feeds/videos.xml?channel_id={urlencode(i)}&a={time.time()}') as resp: - data = feedparser.parse(await resp.text()) - for j in data['items']: - if j['yt_videoid'] in seen_videos + tmp_handled: + for video_id in await get_video_list(session, i): + if video_id in seen_videos + tmp_handled: continue if nodl: - seen_videos.append(j['yt_videoid']) + seen_videos.append(video_id) continue - asyncio.create_task(check_video(j)) - tmp_handled.append(j['yt_videoid']) + asyncio.create_task(check_video(video_id)) + tmp_handled.append(video_id) await asyncio.sleep(random.randint(1, 10)) -async def check_video(video): +async def check_video(video_id): for _ in range(5): try: - return await _check_video(video) + return await _check_video(video_id) except BaseException: - logging.exception('Exception encountered with checking video %s', video.get('yt_videoid')) + logging.exception('Exception encountered with checking video %s', video_id) try: with BytesIO(traceback.format_exc().encode()) as file: - file.name = f'check-videos-error-{video.get("yt_videoid")}.txt' + file.name = f'check-videos-error-{video_id}.txt' file.seek(0) - await client.send_message(config['config']['storage_chat_id'], f'Exception encountered with checking video {video.get("yt_videoid")}', file=file, parse_mode=None) + await client.send_message(config['config']['storage_chat_id'], f'Exception encountered with checking video {video_id}', file=file, parse_mode=None) except BaseException: - logging.exception('Exception encountered when sending message to Telegram about checking video %s exception', video.get('yt_videoid')) + logging.exception('Exception encountered when sending message to Telegram about checking video %s exception', video_id) -async def _check_video(video): - logging.info('Checking video %s', video['yt_videoid']) +async def _check_video(video_id): + logging.info('Checking video %s', video_id) first_try_live = waited = False too_many_requests_count = 1 while True: try: - video_json = await client.loop.run_in_executor(None, ytdl.extract_info, video['link']) + video_json = await client.loop.run_in_executor(None, ytdl.extract_info, f'https://youtube.com/watch?v={video_id}') except BaseException as e: wait_time = 30 message = str(e) diff --git a/example-config.yaml b/example-config.yaml index 2ebcd1b..19e2f59 100644 --- a/example-config.yaml +++ b/example-config.yaml @@ -11,3 +11,12 @@ config: channels: - UCL_qhgtOy0dy1Agp8vkySQg - UCHsx4Hqa-1ORjQTh9TYDhww + # If the invidious_instances key doesn't exist or all the instances fail, the youtube rss feed will be used as a fallback + invidious_instances: + - https://tube.connect.cafe + - https://invidious.zapashcanon.fr + - https://invidious.site + - https://invidious.048596.xyz + - https://vid.puffyan.us + - https://invidious.himiko.cloud + - https://invidious.silkky.cloud