2020-08-31 06:51:07 +00:00
|
|
|
import os
|
|
|
|
import time
|
|
|
|
import html
|
|
|
|
import json
|
|
|
|
import random
|
|
|
|
import logging
|
|
|
|
import asyncio
|
|
|
|
import tempfile
|
|
|
|
import functools
|
|
|
|
import mimetypes
|
|
|
|
import traceback
|
2021-10-09 13:06:59 +00:00
|
|
|
from decimal import Decimal
|
2020-08-31 06:51:07 +00:00
|
|
|
from itertools import zip_longest
|
2021-05-07 07:46:48 +00:00
|
|
|
from urllib.parse import urlparse, urlunparse, urljoin
|
2020-08-31 06:51:07 +00:00
|
|
|
import yaml
|
2021-04-04 06:10:56 +00:00
|
|
|
import asyncpraw
|
2020-08-31 06:51:07 +00:00
|
|
|
import aiohttp
|
|
|
|
import aiocron
|
2020-09-04 10:20:09 +00:00
|
|
|
from bs4 import BeautifulSoup
|
2020-08-31 06:51:07 +00:00
|
|
|
from telethon import TelegramClient, events
|
2020-10-30 06:17:39 +00:00
|
|
|
from telethon.utils import chunks, is_list_like
|
2021-10-09 13:06:59 +00:00
|
|
|
from telethon.tl.types import DocumentAttributeVideo
|
2020-08-31 06:51:07 +00:00
|
|
|
|
2021-04-29 10:39:02 +00:00
|
|
|
mimetypes.init(['mime.types'])
|
2020-08-31 06:51:07 +00:00
|
|
|
with open('config.yaml') as file:
|
|
|
|
config_data = yaml.safe_load(file)
|
|
|
|
|
|
|
|
tg_api_id = config_data['telegram']['api_id']
|
|
|
|
tg_api_hash = config_data['telegram']['api_hash']
|
|
|
|
bot_token = config_data['telegram'].get('bot_token')
|
|
|
|
|
|
|
|
reddit_client_id = config_data['reddit']['client_id']
|
|
|
|
reddit_client_secret = config_data['reddit']['client_secret']
|
|
|
|
|
|
|
|
storage_chat = config_data['config'].get('storage_chat')
|
|
|
|
storage_msg_id = config_data['config'].get('storage_message_id')
|
2020-09-04 14:11:02 +00:00
|
|
|
_bkup_subreddits = config_data['config'].get('subreddits')
|
2020-09-05 20:23:38 +00:00
|
|
|
_send_to_chats = send_to_chats = config_data['config']['send_to_chats']
|
|
|
|
if isinstance(_send_to_chats, list):
|
|
|
|
send_to_chats = dict()
|
|
|
|
for i in _send_to_chats:
|
|
|
|
j = None
|
|
|
|
if isinstance(i, dict):
|
|
|
|
j = tuple(i.values())[0]
|
|
|
|
i = tuple(i.keys())[0]
|
|
|
|
if isinstance(j, list) or not j:
|
|
|
|
j = {'subreddits': j, 'cron_duration': config_data['config']['cron_duration'],
|
|
|
|
'allow_selfposts': True, 'allow_nsfw': True,
|
|
|
|
'allow_spoilers': True, 'show_nsfw_warning': True,
|
2020-09-16 14:21:50 +00:00
|
|
|
'show_spoilers_warning': True, 'timeout': None}
|
2020-09-05 20:23:38 +00:00
|
|
|
send_to_chats[i] = j
|
2020-08-31 06:51:07 +00:00
|
|
|
bot_admins = config_data['config']['bot_admins']
|
|
|
|
|
2022-09-14 08:47:44 +00:00
|
|
|
def title_or_shortlink(submission):
|
|
|
|
title = submission.title.strip(''.join((
|
|
|
|
# https://en.wikipedia.org/wiki/Unicode_control_characters#Category_%22Cc%22_control_codes_(C0_and_C1)
|
|
|
|
'\u0000', # NULL (used in null-terminated strings)
|
|
|
|
'\u0009', # HORIZONTAL TABULATION (HT) (inserted by the tab key)
|
|
|
|
'\u000A', # LINE FEED (LF) (used as a line break)
|
|
|
|
'\u000C', # FORM FEED (FF) (denotes a page break in a plain text file)
|
|
|
|
'\u000D', # CARRIAGE RETURN (CR) (used in some line-breaking conventions)
|
|
|
|
'\u0085', # NEXT LINE (NEL) (sometimes used as a line break in text transcoded from EBCDIC)
|
|
|
|
'\u0009\u000A\u000B\u000C\u000D',
|
|
|
|
'\u001C\u001D\u001E\u001F',
|
|
|
|
'\u0085'
|
|
|
|
# https://en.wikipedia.org/wiki/Unicode_control_characters#Unicode_introduced_separators
|
|
|
|
'\u2028', # LINE SEPARATOR (abbreviated LS or LSEP)
|
|
|
|
'\u2029', # PARAGRAPH SEPARATOR (abbreviated PS or PSEP)
|
|
|
|
# https://en.wikipedia.org/wiki/Unicode_control_characters#Bidirectional_text_control
|
|
|
|
'\u061C', # ARABIC LETTER MARK
|
|
|
|
'\u200E', # LEFT-TO-RIGHT MARK
|
|
|
|
'\u200F', # RIGHT-TO-LEFT MARK
|
|
|
|
'\u202A', # LEFT-TO-RIGHT EMBEDDING
|
|
|
|
'\u202B', # RIGHT-TO-LEFT EMBEDDING
|
|
|
|
'\u202C', # POP DIRECTIONAL FORMATTING
|
|
|
|
'\u202D', # LEFT-TO-RIGHT OVERRIDE
|
|
|
|
'\u202E', # RIGHT-TO-LEFT OVERRIDE
|
|
|
|
'\u2066', # LEFT-TO-RIGHT ISOLATE
|
|
|
|
'\u2067', # RIGHT-TO-LEFT ISOLATE
|
|
|
|
'\u2068', # FIRST STRONG ISOLATE
|
|
|
|
'\u2069', # POP DIRECTIONAL ISOLATE
|
|
|
|
# https://en.wikipedia.org/wiki/Whitespace_character#Unicode
|
|
|
|
'\u000B', # line tabulation
|
|
|
|
'\u0020', # space
|
|
|
|
'\u0085', # next line
|
|
|
|
'\u00A0', # no-break space
|
|
|
|
'\u1680', # ogham space mark
|
|
|
|
'\u2000', # en quad
|
|
|
|
'\u2001', # em quad
|
|
|
|
'\u2002', # en space
|
|
|
|
'\u2003', # em space
|
|
|
|
'\u2004', # three-per-em space
|
|
|
|
'\u2005', # four-per-em space
|
|
|
|
'\u2006', # six-per-em space
|
|
|
|
'\u2007', # figure space
|
|
|
|
'\u2008', # punctuation space
|
|
|
|
'\u2009', # thin space
|
|
|
|
'\u200A', # hair space
|
|
|
|
'\u2028', # line separator
|
|
|
|
'\u2029', # paragraph separator
|
|
|
|
'\u202F', # narrow no-break space
|
|
|
|
'\u205F', # medium mathematical space
|
|
|
|
'\u3000', # ideographic space
|
2022-10-19 15:54:57 +00:00
|
|
|
'\u180E', # mongolian vowel separator
|
|
|
|
'\u200B', # zero width space
|
|
|
|
'\u200C', # zero width non-joiner
|
|
|
|
'\u200D', # zero width joiner
|
|
|
|
'\u2060', # word joiner
|
|
|
|
'\uFEFF', # zero width non-breaking space
|
2022-09-14 08:47:44 +00:00
|
|
|
)))
|
|
|
|
return f'<a href="{submission.shortlink}">{html.escape(title)}</a>' if title else submission.shortlink
|
|
|
|
|
2020-08-31 06:51:07 +00:00
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
async def main():
|
2020-09-05 20:23:38 +00:00
|
|
|
_added_chats = []
|
2020-08-31 06:51:07 +00:00
|
|
|
client = await TelegramClient('redditbot', tg_api_id, tg_api_hash).start(bot_token=bot_token)
|
|
|
|
client.parse_mode = 'html'
|
|
|
|
session = aiohttp.ClientSession()
|
2021-04-04 06:10:56 +00:00
|
|
|
reddit = asyncpraw.Reddit(client_id=reddit_client_id, client_secret=reddit_client_secret, user_agent='linux:redditbot:v1.0.0 (by /u/the_blank_x)')
|
2020-08-31 06:51:07 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
if storage_chat and storage_msg_id:
|
|
|
|
await (await client.get_messages(storage_chat, ids=storage_msg_id)).download_media('redditbot.json')
|
|
|
|
with open('redditbot.json') as file:
|
|
|
|
seen_posts = json.load(file)
|
2020-09-04 14:11:02 +00:00
|
|
|
if isinstance(seen_posts, list):
|
|
|
|
seen_posts = {'version': 0, 'chats': {'global': seen_posts}}
|
2020-12-11 12:00:51 +00:00
|
|
|
except BaseException:
|
2020-09-04 14:11:02 +00:00
|
|
|
logging.exception('Loading JSON')
|
|
|
|
seen_posts = {'version': 0, 'chats': {'global': []}}
|
|
|
|
# chat dict: {chatid: [array of submission ids]}
|
2020-08-31 06:51:07 +00:00
|
|
|
|
2020-09-05 20:23:38 +00:00
|
|
|
uploading_lock = asyncio.Lock()
|
2020-08-31 06:51:07 +00:00
|
|
|
async def write_seen_posts():
|
|
|
|
with open('redditbot.json', 'w') as file:
|
|
|
|
json.dump(seen_posts, file)
|
|
|
|
if storage_chat and storage_msg_id:
|
2020-09-05 20:23:38 +00:00
|
|
|
async with uploading_lock:
|
|
|
|
await client.edit_message(storage_chat, storage_msg_id, file='redditbot.json')
|
2020-08-31 06:51:07 +00:00
|
|
|
|
2020-09-05 20:23:38 +00:00
|
|
|
async def add_chat(chat, chat_data):
|
|
|
|
global_sp = chat_sp = seen_posts['chats']['global']
|
|
|
|
subreddits = chat_data['subreddits']
|
|
|
|
if subreddits:
|
2020-09-04 14:11:02 +00:00
|
|
|
chat = await client.get_peer_id(chat)
|
|
|
|
if str(chat) not in seen_posts['chats']:
|
|
|
|
seen_posts['chats'][str(chat)] = []
|
2020-09-05 20:23:38 +00:00
|
|
|
chat_sp = seen_posts['chats'][str(chat)]
|
|
|
|
else:
|
|
|
|
subreddits = _bkup_subreddits.copy()
|
|
|
|
cron_duration = chat_data['cron_duration']
|
|
|
|
allow_selfposts = chat_data['allow_selfposts']
|
|
|
|
allow_nsfw = chat_data['allow_nsfw']
|
|
|
|
allow_spoilers = chat_data['allow_spoilers']
|
|
|
|
show_nsfw_warning = chat_data['show_nsfw_warning']
|
|
|
|
show_spoilers_warning = chat_data['show_spoilers_warning']
|
2020-09-16 14:21:50 +00:00
|
|
|
timeout = chat_data.get('timeout')
|
2020-09-05 20:23:38 +00:00
|
|
|
|
2020-09-16 14:52:26 +00:00
|
|
|
give_ups = set()
|
2021-04-04 06:10:56 +00:00
|
|
|
async def _get_submission(unique_id):
|
2020-09-16 14:52:26 +00:00
|
|
|
while unique_id not in give_ups:
|
2021-04-04 06:10:56 +00:00
|
|
|
subreddit = await reddit.subreddit(random.choice(subreddits))
|
|
|
|
random_post = await subreddit.random()
|
2020-09-16 14:21:50 +00:00
|
|
|
cpid = cpp = None
|
|
|
|
if random_post is None:
|
2022-08-24 14:41:05 +00:00
|
|
|
async for submission in subreddit.hot(limit=None):
|
2020-09-16 14:52:26 +00:00
|
|
|
if unique_id in give_ups:
|
|
|
|
return
|
2020-09-16 14:21:50 +00:00
|
|
|
cpid = getattr(submission, 'crosspost_parent', None)
|
2022-08-23 05:36:10 +00:00
|
|
|
if cpid and getattr(submission, 'crosspost_parent_list', None):
|
2020-09-04 14:11:02 +00:00
|
|
|
cpid = cpid[3:]
|
2020-09-16 14:21:50 +00:00
|
|
|
if submission.id in chat_sp + global_sp or cpid in chat_sp + global_sp:
|
2020-09-04 14:11:02 +00:00
|
|
|
continue
|
2020-09-05 20:23:38 +00:00
|
|
|
if not (allow_selfposts and allow_nsfw and allow_spoilers):
|
2020-09-16 14:21:50 +00:00
|
|
|
is_self = submission.is_self
|
|
|
|
nsfw = submission.over_18
|
|
|
|
spoilers = submission.spoiler
|
2020-09-05 20:23:38 +00:00
|
|
|
if cpid:
|
2021-04-04 06:10:56 +00:00
|
|
|
cpp = await reddit.submission(cpid)
|
2020-09-05 20:23:38 +00:00
|
|
|
if not allow_selfposts:
|
|
|
|
is_self = cpp.is_self
|
|
|
|
if not (nsfw and allow_nsfw):
|
|
|
|
nsfw = cpp.over_18
|
|
|
|
if not (spoilers and allow_spoilers):
|
2022-08-24 14:37:51 +00:00
|
|
|
spoilers = cpp.spoiler
|
2020-09-05 20:23:38 +00:00
|
|
|
if is_self and not allow_selfposts:
|
|
|
|
continue
|
|
|
|
if nsfw and not allow_nsfw:
|
|
|
|
continue
|
|
|
|
if spoilers and not allow_spoilers:
|
|
|
|
continue
|
2020-09-16 14:21:50 +00:00
|
|
|
random_post = submission
|
|
|
|
break
|
2022-08-24 14:41:05 +00:00
|
|
|
if random_post is None:
|
|
|
|
return
|
|
|
|
|
2020-09-16 14:21:50 +00:00
|
|
|
cpid = getattr(random_post, 'crosspost_parent', None)
|
|
|
|
if cpid and getattr(random_post, 'crosspost_parent_list', None):
|
|
|
|
cpid = cpid[3:]
|
|
|
|
if random_post.id in chat_sp + global_sp or cpid in chat_sp + global_sp:
|
|
|
|
continue
|
|
|
|
if not (allow_selfposts and allow_nsfw and allow_spoilers):
|
|
|
|
is_self = random_post.is_self
|
|
|
|
nsfw = random_post.over_18
|
|
|
|
spoilers = random_post.spoiler
|
|
|
|
if cpid and not cpp:
|
2021-04-04 06:10:56 +00:00
|
|
|
cpp = await reddit.submission(cpid)
|
2020-09-16 14:21:50 +00:00
|
|
|
if cpid:
|
|
|
|
if not allow_selfposts:
|
|
|
|
is_self = cpp.is_self
|
|
|
|
if not (nsfw and allow_nsfw):
|
|
|
|
nsfw = cpp.over_18
|
|
|
|
if not (spoilers and allow_spoilers):
|
2022-08-24 14:37:51 +00:00
|
|
|
spoilers = cpp.spoiler
|
2020-09-16 14:21:50 +00:00
|
|
|
if is_self and not allow_selfposts:
|
|
|
|
continue
|
|
|
|
if nsfw and not allow_nsfw:
|
|
|
|
continue
|
|
|
|
if spoilers and not allow_spoilers:
|
|
|
|
continue
|
|
|
|
chat_sp.append(cpid or random_post.id)
|
|
|
|
print(random_post.id, random_post.shortlink)
|
|
|
|
return random_post, cpp
|
|
|
|
|
|
|
|
@aiocron.crontab(cron_duration)
|
|
|
|
async def start_post():
|
|
|
|
while True:
|
2020-09-16 14:52:26 +00:00
|
|
|
unique_id = time.time()
|
2020-08-31 06:51:07 +00:00
|
|
|
try:
|
2022-08-24 14:41:05 +00:00
|
|
|
result = await asyncio.wait_for(_get_submission(unique_id), timeout)
|
2020-09-16 14:21:50 +00:00
|
|
|
except asyncio.TimeoutError:
|
2020-09-16 14:52:26 +00:00
|
|
|
give_ups.add(unique_id)
|
2020-09-16 14:21:50 +00:00
|
|
|
logging.error('%s timed out', chat)
|
2022-08-24 14:42:40 +00:00
|
|
|
return
|
|
|
|
except Exception:
|
2020-09-16 14:52:26 +00:00
|
|
|
give_ups.add(unique_id)
|
2020-09-16 14:21:50 +00:00
|
|
|
logging.exception(chat)
|
2020-08-31 06:51:07 +00:00
|
|
|
for i in bot_admins:
|
2020-09-16 14:21:50 +00:00
|
|
|
await client.send_message(i, f'{chat}\n{traceback.format_exc()}')
|
2020-08-31 06:51:07 +00:00
|
|
|
else:
|
2022-08-24 14:41:05 +00:00
|
|
|
if not result:
|
|
|
|
return
|
|
|
|
random_post, cpp = result
|
2020-09-16 14:21:50 +00:00
|
|
|
try:
|
|
|
|
await _actual_start_post(random_post, [chat], cpp, show_nsfw_warning, show_spoilers_warning)
|
2022-08-24 14:42:40 +00:00
|
|
|
except Exception:
|
2020-09-16 14:21:50 +00:00
|
|
|
logging.exception(random_post.id)
|
|
|
|
for i in bot_admins:
|
|
|
|
await client.send_message(i, f'{random_post.id}\n{traceback.format_exc()}', parse_mode=None)
|
|
|
|
else:
|
|
|
|
break
|
2020-09-05 20:23:38 +00:00
|
|
|
await write_seen_posts()
|
|
|
|
|
|
|
|
_added_chats.append(start_post)
|
|
|
|
|
|
|
|
for chat in send_to_chats:
|
|
|
|
print(chat, send_to_chats[chat])
|
|
|
|
await add_chat(chat, send_to_chats[chat])
|
2020-08-31 06:51:07 +00:00
|
|
|
|
|
|
|
async def _start_broadcast(text, file, chats):
|
|
|
|
for chat in chats:
|
2021-10-09 13:06:59 +00:00
|
|
|
for i in chunks(zip_longest(text, file or []), 10):
|
2020-08-31 06:51:07 +00:00
|
|
|
j, k = zip(*i)
|
|
|
|
if not any(k):
|
|
|
|
k = None
|
|
|
|
if not k and len(j) == 1:
|
|
|
|
j = j[0]
|
2020-10-30 06:17:39 +00:00
|
|
|
if is_list_like(j) and is_list_like(k):
|
|
|
|
if len(j) == 1 and len(k) == 1:
|
|
|
|
j = j[0]
|
|
|
|
k = k[0]
|
2021-10-09 13:06:59 +00:00
|
|
|
attributes = []
|
2021-10-23 04:24:44 +00:00
|
|
|
try:
|
|
|
|
mimetype = (await _get_file_mimetype(k)) if k else ''
|
|
|
|
except TypeError:
|
|
|
|
# (for now) telethon doesn't easily support attributes for grouped media
|
|
|
|
mimetype = ''
|
2021-10-23 04:34:55 +00:00
|
|
|
thumb = None
|
2021-10-09 13:06:59 +00:00
|
|
|
if mimetype.startswith('video/'):
|
|
|
|
try:
|
|
|
|
data = await _get_video_data(k)
|
|
|
|
duration = int(Decimal(data['format']['duration']))
|
|
|
|
w = h = None
|
|
|
|
for l in data['streams']:
|
|
|
|
if l['codec_type'] != 'video':
|
|
|
|
continue
|
|
|
|
w = l['width']
|
|
|
|
h = l['height']
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
logging.exception('Exception when getting video data')
|
|
|
|
else:
|
2021-10-15 02:50:28 +00:00
|
|
|
attributes.append(DocumentAttributeVideo(duration, w, h, supports_streaming=mimetype == 'video/mp4' or None))
|
2021-10-23 04:34:55 +00:00
|
|
|
dn, _ = os.path.split(k)
|
|
|
|
try:
|
|
|
|
nthumb = os.path.join(dn, f'{time.time()}.jpg')
|
|
|
|
if await _make_thumbnail(nthumb, k):
|
|
|
|
thumb = nthumb
|
|
|
|
except Exception:
|
|
|
|
logging.exception('Exception while making thumbnail')
|
|
|
|
await client.send_message(chat, j, file=k, link_preview=False, attributes=attributes, thumb=thumb)
|
2021-10-09 13:06:59 +00:00
|
|
|
|
|
|
|
async def _get_video_data(filename):
|
|
|
|
proc = await asyncio.create_subprocess_exec('ffprobe', '-show_format', '-show_streams', '-print_format', 'json', filename, stdout=asyncio.subprocess.PIPE)
|
|
|
|
stdout, _ = await proc.communicate()
|
|
|
|
data = json.loads(stdout)
|
|
|
|
if data.get('format') and 'duration' not in data['format']:
|
|
|
|
with tempfile.NamedTemporaryFile() as tempf:
|
|
|
|
proc = await asyncio.create_subprocess_exec('ffmpeg', '-an', '-sn', '-i', filename, '-c', 'copy', '-f', 'matroska', tempf.name)
|
|
|
|
await proc.communicate()
|
|
|
|
ndata = await _get_video_data(tempf.name)
|
|
|
|
if ndata.get('format') and 'duration' in ndata['format']:
|
|
|
|
data['format']['duration'] = ndata['format']['duration']
|
|
|
|
return data
|
2020-08-31 06:51:07 +00:00
|
|
|
|
2021-10-23 04:34:55 +00:00
|
|
|
async def _make_thumbnail(filename, video):
|
|
|
|
data = await _get_video_data(video)
|
|
|
|
if not data.get('format'):
|
|
|
|
return False
|
|
|
|
if data['format'].get('duration') is None:
|
|
|
|
return False
|
|
|
|
for i in (0, 5, 10, 15):
|
|
|
|
if i and data['format']['duration'] > i:
|
|
|
|
continue
|
|
|
|
proc = await asyncio.create_subprocess_exec('ffmpeg', '-an', '-sn', '-ss', str(i), '-i', video, '-frames:v', '1', filename)
|
|
|
|
await proc.communicate()
|
|
|
|
if not proc.returncode:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2020-08-31 06:51:07 +00:00
|
|
|
async def _download_file(filename, url):
|
|
|
|
print(url)
|
2020-09-16 14:21:50 +00:00
|
|
|
async with session.get(url) as resp:
|
2020-08-31 06:51:07 +00:00
|
|
|
with open(filename, 'wb') as file:
|
|
|
|
while True:
|
|
|
|
chunk = await resp.content.read(10)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
file.write(chunk)
|
|
|
|
|
2020-09-03 04:53:54 +00:00
|
|
|
async def _get_file_mimetype(filename):
|
2021-04-29 09:52:08 +00:00
|
|
|
mimetype = mimetypes.guess_type(filename, strict=False)[0]
|
2020-09-03 04:53:54 +00:00
|
|
|
if not mimetype:
|
|
|
|
proc = await asyncio.create_subprocess_exec('file', '--brief', '--mime-type', filename, stdout=asyncio.subprocess.PIPE)
|
|
|
|
stdout, _ = await proc.communicate()
|
|
|
|
mimetype = stdout.decode().strip()
|
|
|
|
return mimetype or ''
|
|
|
|
|
2020-08-31 06:51:07 +00:00
|
|
|
async def _get_file_ext(filename):
|
|
|
|
proc = await asyncio.create_subprocess_exec('file', '--brief', '--extension', filename, stdout=asyncio.subprocess.PIPE)
|
|
|
|
stdout, _ = await proc.communicate()
|
|
|
|
ext = stdout.decode().strip().split('/', maxsplit=1)[0]
|
|
|
|
if not ext or ext == '???':
|
2020-09-03 04:53:54 +00:00
|
|
|
mimetype = await _get_file_mimetype(filename)
|
2021-04-29 09:52:08 +00:00
|
|
|
ext = mimetypes.guess_extension(mimetype, strict=False) or '.bin'
|
2020-08-31 06:51:07 +00:00
|
|
|
if not ext.startswith('.'):
|
|
|
|
ext = '.' + ext
|
|
|
|
return ext
|
|
|
|
|
2020-09-05 20:23:38 +00:00
|
|
|
async def _actual_start_post(random_post, chats, cpp=None, snw=None, ssw=None):
|
2022-09-14 08:47:44 +00:00
|
|
|
text = title_or_shortlink(random_post)
|
2020-09-05 20:23:38 +00:00
|
|
|
nsfw = random_post.over_18
|
|
|
|
spoilers = random_post.spoiler
|
2020-08-31 06:51:07 +00:00
|
|
|
cpid = getattr(random_post, 'crosspost_parent', None)
|
2020-09-05 20:23:38 +00:00
|
|
|
if cpid and getattr(random_post, 'crosspost_parent_list', None) and not cpp:
|
2021-04-04 06:10:56 +00:00
|
|
|
cpp = await reddit.submission(cpid[3:])
|
2020-09-05 20:23:38 +00:00
|
|
|
if cpp:
|
|
|
|
random_post = cpp
|
|
|
|
if snw and not nsfw:
|
|
|
|
nsfw = random_post.over_18
|
|
|
|
if ssw and not spoilers:
|
|
|
|
spoilers = random_post.spoiler
|
2022-09-14 08:47:44 +00:00
|
|
|
text += f' (crosspost of {title_or_shortlink(random_post)})'
|
2020-09-05 20:23:38 +00:00
|
|
|
if spoilers and ssw:
|
|
|
|
text = '🙈🙈🙈 SPOILERS 🙈🙈🙈\n' + text
|
|
|
|
if nsfw and snw:
|
|
|
|
text = '🔞🔞🔞 18+ / NSFW 🔞🔞🔞\n' + text
|
2020-08-31 06:51:07 +00:00
|
|
|
if not random_post.is_self:
|
|
|
|
with tempfile.TemporaryDirectory() as tempdir:
|
|
|
|
url = random_post.url
|
|
|
|
filename = os.path.join(tempdir, str(time.time()))
|
|
|
|
files = [filename]
|
|
|
|
captions = [text]
|
|
|
|
if random_post.is_video:
|
|
|
|
ffmpeg_exists = any(True for i in os.environ.get('PATH', '').split(':') if os.path.exists(os.path.join(i, 'ffmpeg')))
|
|
|
|
reddit_video = random_post.secure_media['reddit_video']
|
|
|
|
for i in ('hls_url', 'dash_url'):
|
|
|
|
if not ffmpeg_exists:
|
|
|
|
continue
|
2020-12-04 04:59:14 +00:00
|
|
|
if not reddit_video.get(i):
|
2020-08-31 06:51:07 +00:00
|
|
|
continue
|
2020-12-04 04:59:14 +00:00
|
|
|
url = reddit_video[i]
|
2020-08-31 06:51:07 +00:00
|
|
|
print(url)
|
|
|
|
proc = await asyncio.create_subprocess_exec('ffmpeg', '-nostdin', '-y', '-i', url, '-c', 'copy', '-f', 'mp4', filename)
|
|
|
|
await proc.communicate()
|
|
|
|
if not proc.returncode:
|
|
|
|
url = None
|
|
|
|
break
|
|
|
|
else:
|
2020-12-04 04:44:50 +00:00
|
|
|
if 'fallback_url' in reddit_video:
|
|
|
|
url = reddit_video['fallback_url']
|
2020-08-31 06:51:07 +00:00
|
|
|
elif getattr(random_post, 'is_gallery', None):
|
|
|
|
files = []
|
|
|
|
captions = []
|
2022-12-15 10:19:05 +00:00
|
|
|
gallery_data = sorted(random_post.gallery_data['items'], key=lambda i: i['id'])
|
|
|
|
# no enumerate() because we want to update the count only if the media's status is valid
|
|
|
|
count = 1
|
|
|
|
for gallery_data_item in gallery_data:
|
|
|
|
media_metadata_item = random_post.media_metadata[gallery_data_item['media_id']]
|
|
|
|
if media_metadata_item['status'] != 'valid':
|
|
|
|
continue
|
|
|
|
filename = os.path.join(tempdir, str(time.time()))
|
|
|
|
for media_type in ('u', 'mp4', 'gif'):
|
|
|
|
if media_type not in media_metadata_item['s']:
|
|
|
|
continue
|
|
|
|
await _download_file(filename, media_metadata_item['s'][media_type])
|
|
|
|
break
|
|
|
|
caption = f'{text}\n#{count}'
|
|
|
|
real_caption = (gallery_data_item.get('caption') or '').strip()
|
|
|
|
if real_caption:
|
|
|
|
caption += f': {html.escape(real_caption)}'
|
|
|
|
captions.append(caption)
|
|
|
|
files.append(filename)
|
|
|
|
count += 1
|
2020-08-31 06:51:07 +00:00
|
|
|
url = None
|
|
|
|
if url:
|
|
|
|
parsed = list(urlparse(url))
|
|
|
|
splitted = os.path.splitext(parsed[2])
|
|
|
|
domain = getattr(random_post, 'domain', parsed[1])
|
|
|
|
preview = getattr(random_post, 'preview', None)
|
2021-11-13 19:04:32 +00:00
|
|
|
if domain == 'imgur.com' or domain.endswith('.imgur.com'):
|
2020-08-31 06:51:07 +00:00
|
|
|
parsed[1] = 'i.imgur.com'
|
2021-11-13 19:04:32 +00:00
|
|
|
if parsed[2].startswith('/a/') or parsed[2].startswith('/gallery/'):
|
2022-03-21 04:07:53 +00:00
|
|
|
albumid = os.path.splitext(os.path.split(parsed[2].rstrip('/'))[1])[0]
|
2020-08-31 06:51:07 +00:00
|
|
|
async with session.get(f'https://imgur.com/ajaxalbums/getimages/{albumid}/hit.json?all=true') as resp:
|
|
|
|
apidata = (await resp.json())['data']
|
|
|
|
if apidata['count'] == 1:
|
|
|
|
parsed[2] = apidata['images'][0]['hash'] + apidata['images'][0]['ext']
|
|
|
|
desc = apidata['images'][0]['description']
|
|
|
|
if desc:
|
|
|
|
captions[0] += '\n' + html.escape(desc)
|
|
|
|
else:
|
|
|
|
files = []
|
|
|
|
captions = []
|
|
|
|
for a, i in enumerate(apidata['images']):
|
2020-09-04 10:20:09 +00:00
|
|
|
to_append = f'#{a + 1}'
|
2020-08-31 06:51:07 +00:00
|
|
|
desc = i['description']
|
|
|
|
if desc:
|
2020-09-04 10:20:09 +00:00
|
|
|
to_append += ': ' + desc.strip()
|
2020-09-04 14:52:30 +00:00
|
|
|
caplength = 1023 - len(client.parse_mode.parse(text)[0])
|
2020-09-04 10:20:09 +00:00
|
|
|
captext = to_append[:caplength]
|
|
|
|
if len(captext) >= caplength:
|
|
|
|
captext = captext[:-1]
|
|
|
|
captext += '…'
|
|
|
|
captions.append(text + '\n' + html.escape(captext))
|
2020-08-31 06:51:07 +00:00
|
|
|
filename = os.path.join(tempdir, str(time.time()))
|
|
|
|
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
|
|
|
|
files.append(filename)
|
|
|
|
url = None
|
|
|
|
if splitted[1] == '.gifv':
|
|
|
|
parsed[2] = splitted[0] + '.mp4'
|
|
|
|
if url:
|
|
|
|
url = urlunparse(parsed)
|
|
|
|
elif domain == 'gfycat.com':
|
2021-11-17 18:19:10 +00:00
|
|
|
async with session.get(f'https://api.gfycat.com/v1/gfycats/{splitted[0]}') as resp:
|
2020-08-31 06:51:07 +00:00
|
|
|
apidata = await resp.json()
|
|
|
|
gfyitem = apidata.get('gfyItem')
|
|
|
|
if gfyitem:
|
|
|
|
url = gfyitem.get('mp4Url', url)
|
2020-12-04 04:44:50 +00:00
|
|
|
elif random_post.is_reddit_media_domain and preview:
|
|
|
|
ppreview = preview['images'][0]
|
|
|
|
if splitted[1] == '.gif':
|
|
|
|
for i in ('mp4', 'gif'):
|
|
|
|
if i in ppreview['variants']:
|
|
|
|
url = ppreview['variants'][i]['source']['url']
|
|
|
|
break
|
|
|
|
elif random_post.is_video:
|
|
|
|
url = ppreview['source']['url']
|
2020-08-31 06:51:07 +00:00
|
|
|
if url:
|
2020-09-04 19:18:25 +00:00
|
|
|
url = urlunparse(urlparse(url, 'https'))
|
2020-08-31 06:51:07 +00:00
|
|
|
await _download_file(filename, url)
|
2020-09-03 04:53:54 +00:00
|
|
|
mimetype = await _get_file_mimetype(filename)
|
2020-10-29 17:03:02 +00:00
|
|
|
if mimetype.startswith('image') and preview and preview.get('enabled'):
|
2020-09-03 04:53:54 +00:00
|
|
|
preview = preview['images'][0]
|
2020-09-03 05:03:15 +00:00
|
|
|
urls = [i['url'] for i in preview['resolutions']]
|
|
|
|
urls.append(preview['source']['url'])
|
|
|
|
urls.reverse()
|
2020-09-03 04:53:54 +00:00
|
|
|
for url in urls:
|
|
|
|
if os.path.getsize(filename) < 10000000:
|
|
|
|
break
|
2020-09-04 19:18:25 +00:00
|
|
|
url = urlunparse(urlparse(url, 'https'))
|
2020-09-03 04:53:54 +00:00
|
|
|
await _download_file(filename, url)
|
2020-09-03 04:14:28 +00:00
|
|
|
ext = await _get_file_ext(filename)
|
2021-04-29 09:33:29 +00:00
|
|
|
if ext in ('.htm', '.html'):
|
2020-09-04 10:20:09 +00:00
|
|
|
with open(filename) as file:
|
|
|
|
soup = BeautifulSoup(file.read())
|
|
|
|
ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title')
|
|
|
|
if ptitle:
|
|
|
|
ptitle = ptitle.attrs.get('content', ptitle.text).strip()
|
|
|
|
pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content'))
|
|
|
|
if pdesc:
|
|
|
|
pdesc = pdesc.attrs.get('content', pdesc.text).strip()
|
2021-04-29 09:32:53 +00:00
|
|
|
pmedia = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:video' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content'))
|
|
|
|
if pmedia:
|
|
|
|
pmedia = pmedia.attrs.get('content', '').strip()
|
2020-09-04 10:20:09 +00:00
|
|
|
tat = f'{text}\n\nURL: '
|
|
|
|
if ptitle:
|
|
|
|
tat += f'<a href="{url}">{html.escape(ptitle)}</a>'
|
|
|
|
else:
|
|
|
|
tat += url
|
2020-09-03 04:14:28 +00:00
|
|
|
files = []
|
2021-04-29 09:32:53 +00:00
|
|
|
if pmedia:
|
2021-05-07 07:46:48 +00:00
|
|
|
pmedia = urljoin(url, pmedia)
|
2021-04-29 09:32:53 +00:00
|
|
|
await _download_file(filename, pmedia)
|
2021-10-09 13:09:46 +00:00
|
|
|
if await _get_file_mimetype(filename) == 'video/x-m4v':
|
|
|
|
ofilename = filename + '.oc'
|
|
|
|
os.rename(filename, ofilename)
|
|
|
|
proc = await asyncio.create_subprocess_exec('ffmpeg', '-nostdin', '-y', '-i', ofilename, '-c', 'copy', '-f', 'mp4', filename)
|
|
|
|
await proc.communicate()
|
|
|
|
if not proc.returncode:
|
|
|
|
os.remove(ofilename)
|
|
|
|
else:
|
|
|
|
os.rename(ofilename, filename)
|
|
|
|
try:
|
|
|
|
os.remove(filename)
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
2020-09-04 10:20:09 +00:00
|
|
|
files.append(filename)
|
|
|
|
if pdesc:
|
2021-04-29 09:32:53 +00:00
|
|
|
caplength = 1023 if pmedia else 4095
|
2020-09-04 10:20:09 +00:00
|
|
|
caplength -= len(client.parse_mode.parse(tat)[0])
|
|
|
|
captext = pdesc[:caplength]
|
|
|
|
if len(captext) >= caplength:
|
|
|
|
captext = captext[:-1]
|
|
|
|
captext += '…'
|
|
|
|
tat += '\n' + captext
|
|
|
|
captions = [tat]
|
2020-08-31 06:51:07 +00:00
|
|
|
for a, i in enumerate(files):
|
|
|
|
ext = await _get_file_ext(i)
|
|
|
|
os.rename(i, i + ext)
|
|
|
|
files[a] = i + ext
|
|
|
|
await _start_broadcast(captions, files, chats)
|
|
|
|
else:
|
2020-09-04 08:34:48 +00:00
|
|
|
if getattr(random_post, 'selftext', None):
|
2020-09-04 09:13:20 +00:00
|
|
|
caplength = 4094 - len(client.parse_mode.parse(text)[0])
|
|
|
|
text += '\n\n'
|
2020-09-04 10:20:09 +00:00
|
|
|
captext = random_post.selftext.strip()[:caplength]
|
2020-09-04 09:13:20 +00:00
|
|
|
if len(captext) >= caplength:
|
|
|
|
captext = captext[:-1]
|
|
|
|
captext += '…'
|
|
|
|
text += html.escape(captext)
|
2020-08-31 06:51:07 +00:00
|
|
|
await _start_broadcast([text], None, chats)
|
|
|
|
|
|
|
|
def register(pattern):
|
|
|
|
def wrapper(func):
|
|
|
|
@functools.wraps(func)
|
|
|
|
@client.on(events.NewMessage(chats=bot_admins, pattern=pattern))
|
|
|
|
async def awrapper(e):
|
|
|
|
try:
|
|
|
|
await func(e)
|
2020-12-11 12:00:51 +00:00
|
|
|
except BaseException:
|
2020-08-31 06:51:07 +00:00
|
|
|
await e.reply(traceback.format_exc(), parse_mode=None)
|
|
|
|
raise
|
|
|
|
return awrapper
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
@register('/(start|help)')
|
|
|
|
async def start_or_help(e):
|
|
|
|
await e.reply(('/start - /help\n'
|
|
|
|
'/help - /start\n'
|
|
|
|
'/poweroff - shuts down bot\n'
|
2020-09-05 20:23:38 +00:00
|
|
|
'/test <submission id> [ns] - tests sending submission'), parse_mode=None)
|
2020-08-31 06:51:07 +00:00
|
|
|
|
|
|
|
@register('/poweroff')
|
|
|
|
async def poweroff(e):
|
|
|
|
await e.reply('ok')
|
|
|
|
await e.client.disconnect()
|
|
|
|
|
2020-09-05 20:23:38 +00:00
|
|
|
@register(r'/test (\S+)(?: ([ns]+))?')
|
2020-08-31 06:51:07 +00:00
|
|
|
async def test_post(e):
|
|
|
|
await e.reply('ok')
|
2021-04-04 06:10:56 +00:00
|
|
|
post = await reddit.submission(e.pattern_match.group(1))
|
2020-09-05 20:23:38 +00:00
|
|
|
flags = e.pattern_match.group(2) or ''
|
|
|
|
snw = 'n' in flags
|
|
|
|
ssw = 's' in flags
|
|
|
|
await _actual_start_post(post, [e.chat_id], None, snw, ssw)
|
2020-08-31 06:51:07 +00:00
|
|
|
|
2020-09-05 20:23:38 +00:00
|
|
|
# await asyncio.gather(*[i.func() for i in _added_chats])
|
2020-08-31 06:51:07 +00:00
|
|
|
try:
|
|
|
|
await client.run_until_disconnected()
|
|
|
|
finally:
|
|
|
|
await session.close()
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
asyncio.run(main())
|