diff --git a/autoytarchive/utils.py b/autoytarchive/utils.py index df95d6d..94a1453 100644 --- a/autoytarchive/utils.py +++ b/autoytarchive/utils.py @@ -1,11 +1,29 @@ import os -import sys import json import shlex import asyncio from io import BytesIO +from youtube_dl.extractor import youtube from . import config, client, seen_videos +youtube._try_get = _try_get = youtube.try_get +def traverse_dict(src): + for (key, value) in src.items(): + if key == 'scheduledStartTime': + return value + if isinstance(value, dict): + if value := traverse_dict(value): + return value + return None + +def try_get(src, getter, expected_type=None): + if reason := src.get('reason'): + if isinstance(reason, str) and (reason.startswith('This live event will begin in ') or reason.startswith('Premieres in ')): + if t := _try_get(src, traverse_dict, str): + src['reason'] = f'autoytarchive:{t} {reason}' + return _try_get(src, getter, expected_type) +youtube.try_get = try_get + async def split_files(filename, destination_dir): args = [ 'split', @@ -25,11 +43,3 @@ async def update_seen_videos(): file.name = 'autoytarchive.json' file.seek(0) await client.edit_message(config['config']['storage_chat_id'], config['config']['storage_message_id'], file=file) - -async def get_video(video_id): - proc = await asyncio.create_subprocess_exec(sys.executable, 'youtube-dl-injected.py', '--dump-single-json', '--', video_id, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) - stdout, stderr = await proc.communicate() - if proc.returncode: - error_message = next(i.strip() for i in stderr.decode().split('\n') if i.startswith('ERROR: ')) - return error_message or (proc.returncode, (stderr + stdout).decode()) - return json.loads(stdout) diff --git a/autoytarchive/workers.py b/autoytarchive/workers.py index 5df89ee..b3dd7ab 100644 --- a/autoytarchive/workers.py +++ b/autoytarchive/workers.py @@ -1,3 +1,4 @@ +import re import os import json import time @@ -9,10 +10,15 @@ import feedparser from io import BytesIO from decimal import Decimal from urllib.parse import quote as urlencode, urlparse +from youtube_dl import YoutubeDL from . import session, config, client, seen_videos -from .utils import split_files, update_seen_videos, get_video +from .utils import split_files, update_seen_videos tmp_handled = [] +live_regex = re.compile(r'error: (?:autoytarchive:([0-9]+) )?(?:this live event will begin|premieres) in .+', re.I) +strip_date = re.compile(r' \d{4}-\d{2}-\d{2} \d{2}:\d{2}$') +ytdl = YoutubeDL({'skip_download': True, 'no_color': True}) +ytdl.add_default_info_extractors() async def check_channels(nodl): if nodl: @@ -65,30 +71,28 @@ async def check_video(video): async def _check_video(video): logging.info('Checking video %s', video['yt_videoid']) first_try_live = waited = False - tmr_attempts = 1 + too_many_requests_count = 1 while True: - video_json = await get_video(video['link']) - if isinstance(video_json, dict): + try: + video_json = await client.loop.run_in_executor(None, ytdl.extract_info, video['link']) + except BaseException as e: + wait_time = 30 + message = str(e) + if '429' in message or 'too many' in message.lower(): + wait_time = too_many_requests_count * 60 * 60 + too_many_requests_count += 1 + elif match := live_regex.match(message.rstrip('.')): + end_schedule_time = match.group(1) or 0 + if end_schedule_time := int(end_schedule_time): + tmp_wait_time = end_schedule_time - time.time() + if tmp_wait_time > wait_time: + wait_time = tmp_wait_time + await asyncio.sleep(wait_time) + waited = True + else: if not waited: first_try_live = True break - wait_time = 30 - if isinstance(video_json, str): - error_message = video_json[7:] - if 'too many request' in error_message.lower(): - wait_time = tmr_attempts * 60 * 60 - tmr_attempts += 1 - elif error_message.startswith('AUTOYTARCHIVE:'): - tmp = error_message.split(':', 1)[1].split(' ', 1)[0] - if tmp.isnumeric(): - new_wait_time = int(tmp) - int(time.time()) - if new_wait_time > 0: - wait_time = new_wait_time - logging.error('Error on video %s: %s', video['yt_videoid'], error_message) - else: - logging.error('Video %s returned status code %s\n%s', video['yt_videoid'], *video_json) - await asyncio.sleep(wait_time) - waited = True if not video_json.get('is_live'): first_try_live = False video_queue.put_nowait((video_json, time.time(), first_try_live)) @@ -119,17 +123,16 @@ async def _video_worker(): tempdir = tempdir_obj.name if late_to_queue: for i in range(5): - tmp = await get_video(video_json['id']) - if isinstance(tmp, dict): + wait_time = 30 + try: + tmp = await client.loop.run_in_executor(None, ytdl.extract_info, video_json['id']) + except BaseException as e: + e = str(e) + if '429' in e or 'too many request' in e.lower(): + wait_time = (i + 1) * 60 * 60 + else: video_json = tmp break - wait_time = 30 - if isinstance(tmp, str): - if 'too many request' in tmp.lower(): - wait_time = (i + 1) * 60 * 60 - logging.error('Error on video %s: %s', video_json['id'], tmp) - else: - logging.error('Video %s returned status code %s\n%s', video_json['id'], *video_json) await asyncio.sleep(wait_time) if video_json.get('requested_formats'): for i in video_json['requested_formats']: @@ -180,16 +183,15 @@ async def _video_worker(): except BaseException: logging.exception('Exception encountered when sending message to Telegram about download failure exception') for i in range(5): - tmp = await get_video(video_json['id']) - if isinstance(tmp, dict): + try: + tmp = await client.loop.run_in_executor(None, ytdl.extract_info, video_json['id']) + except BaseException as e: + e = str(e) + if '429' in e or 'too many request' in e.lower(): + wait_time = (i + 1) * 60 * 60 + else: video_json = tmp break - if isinstance(tmp, str): - if 'too many request' in tmp.lower(): - wait_time = (i + 1) * 60 * 60 - logging.error('Error on video %s: %s', video_json['id'], tmp) - else: - logging.error('Video %s returned status code %s\n%s', video_json['id'], *tmp) await asyncio.sleep(wait_time) except BaseException: tempdir_obj.cleanup() diff --git a/youtube-dl-injected.py b/youtube-dl-injected.py deleted file mode 100644 index 631b879..0000000 --- a/youtube-dl-injected.py +++ /dev/null @@ -1,12 +0,0 @@ -from youtube_dl.extractor import youtube -from youtube_dl import main - -youtube._try_get = _try_get = youtube.try_get -def try_get(src, getter, expected_type=None): - res = _try_get(src, getter, expected_type) - if isinstance(res, str) and res.startswith('This live event will begin in '): - t = _try_get(src, lambda x: x['playabilityStatus']['liveStreamability']['liveStreamabilityRenderer']['offlineSlate']['liveStreamOfflineSlateRenderer']['scheduledStartTime'], str) - res = f'AUTOYTARCHIVE:{t} {res}' - return res -youtube.try_get = try_get -main()