import re import os import json import time import asyncio import logging import tempfile import traceback import feedparser from io import BytesIO from decimal import Decimal from urllib.parse import quote as urlencode, urlparse from youtube_dl import YoutubeDL from . import session, config, client, seen_videos from .utils import update_seen_videos from .cappedio import bopen tmp_handled = [] size_limit = 2000 * 1024 * 1024 live_regex = re.compile(r'error: (?:autoytarchive:([0-9]+) )?(?:this live event will begin|premieres) in .+', re.I) strip_date = re.compile(r' \d{4}-\d{2}-\d{2} \d{2}:\d{2}$') ytdl = YoutubeDL({'skip_download': True, 'no_color': True}) ytdl.add_default_info_extractors() async def check_channels(nodl): if nodl: await _check_channels(True) return while True: try: await _check_channels(False) except BaseException: logging.exception('Exception encountered with check channels') try: with BytesIO(traceback.format_exc().encode()) as file: file.name = 'check-channels-error.txt' file.seek(0) await client.send_message(config['config']['storage_chat_id'], 'Exception encountered with check channels', file=file) except BaseException: logging.exception('Exception encountered when sending message to Telegram about check channels exception') await asyncio.sleep(config['config']['wait_seconds']) check_channels_lock = asyncio.Lock() async def _check_channels(nodl): async with check_channels_lock: for i in config['config']['channels']: logging.info('Checking channel %s', i) async with session.get(f'https://youtube.com/feeds/videos.xml?channel_id={urlencode(i)}&a={time.time()}') as resp: data = feedparser.parse(await resp.text()) for j in data['items']: if j['yt_videoid'] in seen_videos + tmp_handled: continue if nodl: seen_videos.append(j['yt_videoid']) continue asyncio.create_task(check_video(j)) tmp_handled.append(j['yt_videoid']) async def check_video(video): for _ in range(5): try: return await _check_video(video) except BaseException: logging.exception('Exception encountered with checking video %s', video.get('yt_videoid')) try: with BytesIO(traceback.format_exc().encode()) as file: file.name = f'check-videos-error-{video.get("yt_videoid")}.txt' file.seek(0) await client.send_message(config['config']['storage_chat_id'], f'Exception encountered with checking video {video.get("yt_videoid")}', file=file, parse_mode=None) except BaseException: logging.exception('Exception encountered when sending message to Telegram about checking video %s exception', video.get('yt_videoid')) async def _check_video(video): logging.info('Checking video %s', video['yt_videoid']) first_try_live = waited = False too_many_requests_count = 1 while True: try: video_json = await client.loop.run_in_executor(None, ytdl.extract_info, video['link']) except BaseException as e: wait_time = 30 message = str(e) if '429' in message or 'too many' in message.lower(): wait_time = too_many_requests_count * 60 * 60 too_many_requests_count += 1 elif match := live_regex.match(message.rstrip('.')): end_schedule_time = match.group(1) or 0 if end_schedule_time := int(end_schedule_time): tmp_wait_time = end_schedule_time - time.time() if tmp_wait_time > wait_time: wait_time = tmp_wait_time await asyncio.sleep(wait_time) waited = True else: if not waited: first_try_live = True break if not video_json.get('is_live'): first_try_live = False video_queue.put_nowait((video_json, time.time(), first_try_live)) video_queue = asyncio.Queue() async def video_worker(): while True: try: await _video_worker() except BaseException: logging.exception('Exception encountered with video worker') try: with BytesIO(traceback.format_exc().encode()) as file: file.name = 'video-worker-error.txt' file.seek(0) await client.send_message(config['config']['storage_chat_id'], 'Exception encountered with video worker', file=file) except BaseException: logging.exception('Exception encountered when sending message to Telegram about video worker exception') async def _video_worker(): while True: video_json, start_time, first_try_live = await video_queue.get() late_to_queue = (Decimal(time.time()) - Decimal(start_time)) > 5 is_late = first_try_live or late_to_queue command = ['ffmpeg', '-y'] tempdir_obj = tempfile.TemporaryDirectory(dir='.') try: tempdir = tempdir_obj.name if late_to_queue: for i in range(5): wait_time = 30 try: tmp = await client.loop.run_in_executor(None, ytdl.extract_info, video_json['id']) except BaseException as e: e = str(e) if '429' in e or 'too many request' in e.lower(): wait_time = (i + 1) * 60 * 60 else: video_json = tmp break await asyncio.sleep(wait_time) if video_json.get('requested_formats'): for i in video_json['requested_formats']: command.extend(('-i', i['url'])) else: command.extend(('-i', video_json['url'])) video_filename = os.path.join(tempdir, video_json['id'] + '.mkv') command.extend(('-c', 'copy', video_filename)) proc = await asyncio.create_subprocess_exec(*command) text = 'New video' if is_late: text += ' (is late)' video_json['title'] = strip_date.sub('', video_json['title']).strip() text += f': {video_json["title"]}\nhttps://youtube.com/watch?v={video_json["id"]}' with BytesIO(json.dumps(video_json, indent=4).encode()) as file: file.name = video_json['id'] + '.json' file.seek(0) await client.send_file(config['config']['storage_chat_id'], file, caption=text, parse_mode=None) if video_json.get('thumbnail'): thumbnail_ext = os.path.splitext(urlparse(video_json['thumbnail']).path)[1] thumbnail_filename = os.path.join(tempdir, video_json['id'] + thumbnail_ext) async with session.get(video_json['thumbnail']) as resp: with open(thumbnail_filename, 'wb') as file: while True: chunk = await resp.content.read(4096) if not chunk: break file.write(chunk) await client.send_file(config['config']['storage_chat_id'], thumbnail_filename, caption=text, parse_mode=None) os.remove(thumbnail_filename) for _ in range(50): await proc.communicate() if not proc.returncode: break wait_time = 30 if video_json.get('duration'): is_manifest = False if video_json.get('url'): is_manifest = urlparse(video_json['url']).netloc == 'manifest.googlevideo.com' if not is_manifest and video_json.get('requested_formats'): for i in video_json['requested_formats']: if urlparse(i['url']).netloc == 'manifest.googlevideo.com': is_manifest = True break if is_manifest: wait_time += video_json['duration'] try: await client.send_message(config['config']['storage_chat_id'], f'Failed to download video {video_json["id"]}, please check logs', parse_mode=None) except BaseException: logging.exception('Exception encountered when sending message to Telegram about download failure exception') for i in range(5): try: tmp = await client.loop.run_in_executor(None, ytdl.extract_info, video_json['id']) except BaseException as e: e = str(e) if '429' in e or 'too many request' in e.lower(): wait_time = (i + 1) * 60 * 60 else: video_json = tmp break await asyncio.sleep(wait_time) except BaseException: tempdir_obj.cleanup() raise upload_queue.put_nowait((tempdir_obj, video_json)) video_queue.task_done() upload_queue = asyncio.Queue() async def upload_worker(): while True: try: await _upload_worker() except BaseException: logging.exception('Exception encountered with upload worker') try: with BytesIO(traceback.format_exc().encode()) as file: file.name = 'upload-worker-error.txt' file.seek(0) await client.send_message(config['config']['storage_chat_id'], 'Exception encountered with upload worker', file=file) except BaseException: logging.exception('Exception encountered when sending message to Telegram about upload worker exception') async def _upload_worker(): while True: tempdir_obj, video_json = await upload_queue.get() try: tempdir = tempdir_obj.name base_filename = video_json['id'] + '.mkv' video_filename = os.path.join(tempdir, base_filename) total_size = os.path.getsize(video_filename) is_big = total_size > size_limit files_sent = size_sent = 0 messages = [] file = bopen(video_filename, size_limit, None) while total_size > 0: file.fake_start = size_sent if is_big: file.name = f'{base_filename}.part{str(files_sent).rjust(2, "0")}' messages.append((await client.send_message(config['config']['storage_chat_id'], f'Uploading {file.name}...', parse_mode=None)).id) await client.send_file(config['config']['storage_chat_id'], file, caption=file.name, parse_mode=None) total_size -= size_limit if total_size > 0: size_sent += size_limit files_sent += 1 file = bopen(video_filename, size_limit, None) if messages: await client.delete_messages(config['config']['storage_chat_id'], messages) finally: tempdir_obj.cleanup() seen_videos.append(video_json['id']) await update_seen_videos() upload_queue.task_done()