redditbot/redditbot.py

458 lines
22 KiB
Python
Raw Normal View History

2020-08-31 06:51:07 +00:00
import os
import time
import html
import json
import random
import logging
import asyncio
import tempfile
import functools
import mimetypes
import traceback
from itertools import zip_longest
from urllib.parse import urlparse, urlunparse
import yaml
2021-04-04 06:10:56 +00:00
import asyncpraw
2020-08-31 06:51:07 +00:00
import aiohttp
import aiocron
2020-09-04 10:20:09 +00:00
from bs4 import BeautifulSoup
2020-08-31 06:51:07 +00:00
from telethon import TelegramClient, events
2020-10-30 06:17:39 +00:00
from telethon.utils import chunks, is_list_like
2020-08-31 06:51:07 +00:00
mimetypes.init(['mime.types'])
2020-08-31 06:51:07 +00:00
with open('config.yaml') as file:
config_data = yaml.safe_load(file)
tg_api_id = config_data['telegram']['api_id']
tg_api_hash = config_data['telegram']['api_hash']
bot_token = config_data['telegram'].get('bot_token')
reddit_client_id = config_data['reddit']['client_id']
reddit_client_secret = config_data['reddit']['client_secret']
storage_chat = config_data['config'].get('storage_chat')
storage_msg_id = config_data['config'].get('storage_message_id')
2020-09-04 14:11:02 +00:00
_bkup_subreddits = config_data['config'].get('subreddits')
_send_to_chats = send_to_chats = config_data['config']['send_to_chats']
if isinstance(_send_to_chats, list):
send_to_chats = dict()
for i in _send_to_chats:
j = None
if isinstance(i, dict):
j = tuple(i.values())[0]
i = tuple(i.keys())[0]
if isinstance(j, list) or not j:
j = {'subreddits': j, 'cron_duration': config_data['config']['cron_duration'],
'allow_selfposts': True, 'allow_nsfw': True,
'allow_spoilers': True, 'show_nsfw_warning': True,
2020-09-16 14:21:50 +00:00
'show_spoilers_warning': True, 'timeout': None}
send_to_chats[i] = j
2020-08-31 06:51:07 +00:00
bot_admins = config_data['config']['bot_admins']
logging.basicConfig(level=logging.INFO)
async def main():
_added_chats = []
2020-08-31 06:51:07 +00:00
client = await TelegramClient('redditbot', tg_api_id, tg_api_hash).start(bot_token=bot_token)
client.parse_mode = 'html'
session = aiohttp.ClientSession()
2021-04-04 06:10:56 +00:00
reddit = asyncpraw.Reddit(client_id=reddit_client_id, client_secret=reddit_client_secret, user_agent='linux:redditbot:v1.0.0 (by /u/the_blank_x)')
2020-08-31 06:51:07 +00:00
try:
if storage_chat and storage_msg_id:
await (await client.get_messages(storage_chat, ids=storage_msg_id)).download_media('redditbot.json')
with open('redditbot.json') as file:
seen_posts = json.load(file)
2020-09-04 14:11:02 +00:00
if isinstance(seen_posts, list):
seen_posts = {'version': 0, 'chats': {'global': seen_posts}}
except BaseException:
2020-09-04 14:11:02 +00:00
logging.exception('Loading JSON')
seen_posts = {'version': 0, 'chats': {'global': []}}
# chat dict: {chatid: [array of submission ids]}
2020-08-31 06:51:07 +00:00
uploading_lock = asyncio.Lock()
2020-08-31 06:51:07 +00:00
async def write_seen_posts():
with open('redditbot.json', 'w') as file:
json.dump(seen_posts, file)
if storage_chat and storage_msg_id:
async with uploading_lock:
await client.edit_message(storage_chat, storage_msg_id, file='redditbot.json')
2020-08-31 06:51:07 +00:00
async def add_chat(chat, chat_data):
global_sp = chat_sp = seen_posts['chats']['global']
subreddits = chat_data['subreddits']
if subreddits:
2020-09-04 14:11:02 +00:00
chat = await client.get_peer_id(chat)
if str(chat) not in seen_posts['chats']:
seen_posts['chats'][str(chat)] = []
chat_sp = seen_posts['chats'][str(chat)]
else:
subreddits = _bkup_subreddits.copy()
cron_duration = chat_data['cron_duration']
allow_selfposts = chat_data['allow_selfposts']
allow_nsfw = chat_data['allow_nsfw']
allow_spoilers = chat_data['allow_spoilers']
show_nsfw_warning = chat_data['show_nsfw_warning']
show_spoilers_warning = chat_data['show_spoilers_warning']
2020-09-16 14:21:50 +00:00
timeout = chat_data.get('timeout')
2020-09-16 14:52:26 +00:00
give_ups = set()
2021-04-04 06:10:56 +00:00
async def _get_submission(unique_id):
2020-09-16 14:52:26 +00:00
while unique_id not in give_ups:
2021-04-04 06:10:56 +00:00
subreddit = await reddit.subreddit(random.choice(subreddits))
random_post = await subreddit.random()
2020-09-16 14:21:50 +00:00
cpid = cpp = None
if random_post is None:
2021-04-04 06:10:56 +00:00
async for submission in subreddit.hot():
2020-09-16 14:52:26 +00:00
if unique_id in give_ups:
return
2020-09-16 14:21:50 +00:00
cpid = getattr(submission, 'crosspost_parent', None)
if cpid and getattr(random_post, 'crosspost_parent_list', None):
2020-09-04 14:11:02 +00:00
cpid = cpid[3:]
2020-09-16 14:21:50 +00:00
if submission.id in chat_sp + global_sp or cpid in chat_sp + global_sp:
2020-09-04 14:11:02 +00:00
continue
if not (allow_selfposts and allow_nsfw and allow_spoilers):
2020-09-16 14:21:50 +00:00
is_self = submission.is_self
nsfw = submission.over_18
spoilers = submission.spoiler
if cpid:
2021-04-04 06:10:56 +00:00
cpp = await reddit.submission(cpid)
if not allow_selfposts:
is_self = cpp.is_self
if not (nsfw and allow_nsfw):
nsfw = cpp.over_18
if not (spoilers and allow_spoilers):
nsfw = cpp.spoiler
if is_self and not allow_selfposts:
continue
if nsfw and not allow_nsfw:
continue
if spoilers and not allow_spoilers:
continue
2020-09-16 14:21:50 +00:00
random_post = submission
break
cpid = getattr(random_post, 'crosspost_parent', None)
if cpid and getattr(random_post, 'crosspost_parent_list', None):
cpid = cpid[3:]
if random_post.id in chat_sp + global_sp or cpid in chat_sp + global_sp:
continue
if not (allow_selfposts and allow_nsfw and allow_spoilers):
is_self = random_post.is_self
nsfw = random_post.over_18
spoilers = random_post.spoiler
if cpid and not cpp:
2021-04-04 06:10:56 +00:00
cpp = await reddit.submission(cpid)
2020-09-16 14:21:50 +00:00
if cpid:
if not allow_selfposts:
is_self = cpp.is_self
if not (nsfw and allow_nsfw):
nsfw = cpp.over_18
if not (spoilers and allow_spoilers):
nsfw = cpp.spoiler
if is_self and not allow_selfposts:
continue
if nsfw and not allow_nsfw:
continue
if spoilers and not allow_spoilers:
continue
chat_sp.append(cpid or random_post.id)
print(random_post.id, random_post.shortlink)
return random_post, cpp
@aiocron.crontab(cron_duration)
async def start_post():
while True:
2020-09-16 14:52:26 +00:00
unique_id = time.time()
2020-08-31 06:51:07 +00:00
try:
2021-04-04 06:10:56 +00:00
random_post, cpp = await asyncio.wait_for(_get_submission(unique_id), timeout)
2020-09-16 14:21:50 +00:00
except asyncio.TimeoutError:
2020-09-16 14:52:26 +00:00
give_ups.add(unique_id)
2020-09-16 14:21:50 +00:00
logging.error('%s timed out', chat)
for i in bot_admins:
await client.send_message(i, f'{chat} timed out')
break
except BaseException:
2020-09-16 14:52:26 +00:00
give_ups.add(unique_id)
2020-09-16 14:21:50 +00:00
logging.exception(chat)
2020-08-31 06:51:07 +00:00
for i in bot_admins:
2020-09-16 14:21:50 +00:00
await client.send_message(i, f'{chat}\n{traceback.format_exc()}')
2020-08-31 06:51:07 +00:00
else:
2020-09-16 14:21:50 +00:00
try:
await _actual_start_post(random_post, [chat], cpp, show_nsfw_warning, show_spoilers_warning)
except BaseException:
2020-09-16 14:21:50 +00:00
logging.exception(random_post.id)
for i in bot_admins:
await client.send_message(i, f'{random_post.id}\n{traceback.format_exc()}', parse_mode=None)
else:
break
await write_seen_posts()
_added_chats.append(start_post)
for chat in send_to_chats:
print(chat, send_to_chats[chat])
await add_chat(chat, send_to_chats[chat])
2020-08-31 06:51:07 +00:00
async def _start_broadcast(text, file, chats):
uploaded_files = []
for i in file or []:
uploaded_files.append(await client.upload_file(i))
for chat in chats:
for i in chunks(zip_longest(text, uploaded_files), 10):
j, k = zip(*i)
if not any(k):
k = None
if not k and len(j) == 1:
j = j[0]
2020-10-30 06:17:39 +00:00
if is_list_like(j) and is_list_like(k):
if len(j) == 1 and len(k) == 1:
j = j[0]
k = k[0]
2020-09-04 10:54:07 +00:00
await client.send_message(chat, j, file=k, link_preview=False)
2020-08-31 06:51:07 +00:00
async def _download_file(filename, url):
print(url)
2020-09-16 14:21:50 +00:00
async with session.get(url) as resp:
2020-08-31 06:51:07 +00:00
with open(filename, 'wb') as file:
while True:
chunk = await resp.content.read(10)
if not chunk:
break
file.write(chunk)
2020-09-03 04:53:54 +00:00
async def _get_file_mimetype(filename):
2021-04-29 09:52:08 +00:00
mimetype = mimetypes.guess_type(filename, strict=False)[0]
2020-09-03 04:53:54 +00:00
if not mimetype:
proc = await asyncio.create_subprocess_exec('file', '--brief', '--mime-type', filename, stdout=asyncio.subprocess.PIPE)
stdout, _ = await proc.communicate()
mimetype = stdout.decode().strip()
return mimetype or ''
2020-08-31 06:51:07 +00:00
async def _get_file_ext(filename):
proc = await asyncio.create_subprocess_exec('file', '--brief', '--extension', filename, stdout=asyncio.subprocess.PIPE)
stdout, _ = await proc.communicate()
ext = stdout.decode().strip().split('/', maxsplit=1)[0]
if not ext or ext == '???':
2020-09-03 04:53:54 +00:00
mimetype = await _get_file_mimetype(filename)
2021-04-29 09:52:08 +00:00
ext = mimetypes.guess_extension(mimetype, strict=False) or '.bin'
2020-08-31 06:51:07 +00:00
if not ext.startswith('.'):
ext = '.' + ext
return ext
async def _actual_start_post(random_post, chats, cpp=None, snw=None, ssw=None):
2020-08-31 06:51:07 +00:00
text = f'<a href="{random_post.shortlink}">{html.escape(random_post.title)}</a>'
nsfw = random_post.over_18
spoilers = random_post.spoiler
2020-08-31 06:51:07 +00:00
cpid = getattr(random_post, 'crosspost_parent', None)
if cpid and getattr(random_post, 'crosspost_parent_list', None) and not cpp:
2021-04-04 06:10:56 +00:00
cpp = await reddit.submission(cpid[3:])
if cpp:
random_post = cpp
if snw and not nsfw:
nsfw = random_post.over_18
if ssw and not spoilers:
spoilers = random_post.spoiler
2020-08-31 06:51:07 +00:00
text += f' (crosspost of <a href="{random_post.shortlink}">{html.escape(random_post.title)}</a>)'
if spoilers and ssw:
text = '🙈🙈🙈 SPOILERS 🙈🙈🙈\n' + text
if nsfw and snw:
text = '🔞🔞🔞 18+ / NSFW 🔞🔞🔞\n' + text
2020-08-31 06:51:07 +00:00
if not random_post.is_self:
with tempfile.TemporaryDirectory() as tempdir:
url = random_post.url
filename = os.path.join(tempdir, str(time.time()))
files = [filename]
captions = [text]
if random_post.is_video:
ffmpeg_exists = any(True for i in os.environ.get('PATH', '').split(':') if os.path.exists(os.path.join(i, 'ffmpeg')))
reddit_video = random_post.secure_media['reddit_video']
for i in ('hls_url', 'dash_url'):
if not ffmpeg_exists:
continue
2020-12-04 04:59:14 +00:00
if not reddit_video.get(i):
2020-08-31 06:51:07 +00:00
continue
2020-12-04 04:59:14 +00:00
url = reddit_video[i]
2020-08-31 06:51:07 +00:00
print(url)
proc = await asyncio.create_subprocess_exec('ffmpeg', '-nostdin', '-y', '-i', url, '-c', 'copy', '-f', 'mp4', filename)
await proc.communicate()
if not proc.returncode:
url = None
break
else:
2020-12-04 04:44:50 +00:00
if 'fallback_url' in reddit_video:
url = reddit_video['fallback_url']
2020-08-31 06:51:07 +00:00
elif getattr(random_post, 'is_gallery', None):
files = []
captions = []
if getattr(random_post, 'gallery_data', None):
gallery_keys = map(lambda i: i[1], sorted(map(lambda i: (i['id'], i['media_id']), random_post.gallery_data['items']), key=lambda i: i[0]))
else:
gallery_keys = random_post.media_metadata.keys()
for a, i in enumerate(gallery_keys):
2020-08-31 06:51:07 +00:00
i = random_post.media_metadata[i]
2020-11-12 12:45:46 +00:00
if i['status'] == 'valid':
filename = os.path.join(tempdir, str(time.time()))
2020-12-03 15:08:02 +00:00
for j in ('u', 'mp4', 'gif'):
if j in i['s']:
await _download_file(filename, i['s'][j])
break
2020-11-12 12:45:46 +00:00
captions.append(f'{text}\n#{a + 1}')
files.append(filename)
2020-08-31 06:51:07 +00:00
url = None
if url:
parsed = list(urlparse(url))
splitted = os.path.splitext(parsed[2])
domain = getattr(random_post, 'domain', parsed[1])
preview = getattr(random_post, 'preview', None)
2020-09-04 10:43:36 +00:00
if domain.endswith('imgur.com'):
2020-08-31 06:51:07 +00:00
parsed[1] = 'i.imgur.com'
if parsed[2].startswith('/a/'):
2021-01-03 12:45:36 +00:00
albumid = os.path.split(parsed[2].rstrip('/'))[1]
2020-08-31 06:51:07 +00:00
async with session.get(f'https://imgur.com/ajaxalbums/getimages/{albumid}/hit.json?all=true') as resp:
apidata = (await resp.json())['data']
if apidata['count'] == 1:
parsed[2] = apidata['images'][0]['hash'] + apidata['images'][0]['ext']
desc = apidata['images'][0]['description']
if desc:
captions[0] += '\n' + html.escape(desc)
else:
files = []
captions = []
for a, i in enumerate(apidata['images']):
2020-09-04 10:20:09 +00:00
to_append = f'#{a + 1}'
2020-08-31 06:51:07 +00:00
desc = i['description']
if desc:
2020-09-04 10:20:09 +00:00
to_append += ': ' + desc.strip()
caplength = 1023 - len(client.parse_mode.parse(text)[0])
2020-09-04 10:20:09 +00:00
captext = to_append[:caplength]
if len(captext) >= caplength:
captext = captext[:-1]
captext += ''
captions.append(text + '\n' + html.escape(captext))
2020-08-31 06:51:07 +00:00
filename = os.path.join(tempdir, str(time.time()))
await _download_file(filename, f'https://i.imgur.com/{i["hash"]}{i["ext"]}')
files.append(filename)
url = None
if splitted[1] == '.gifv':
parsed[2] = splitted[0] + '.mp4'
if url:
url = urlunparse(parsed)
elif domain == 'gfycat.com':
async with session.get(f'https://api.gfycat.com/v1/gfycats/{parsed[2]}') as resp:
apidata = await resp.json()
gfyitem = apidata.get('gfyItem')
if gfyitem:
url = gfyitem.get('mp4Url', url)
2020-12-04 04:44:50 +00:00
elif random_post.is_reddit_media_domain and preview:
ppreview = preview['images'][0]
if splitted[1] == '.gif':
for i in ('mp4', 'gif'):
if i in ppreview['variants']:
url = ppreview['variants'][i]['source']['url']
break
elif random_post.is_video:
url = ppreview['source']['url']
2020-08-31 06:51:07 +00:00
if url:
2020-09-04 19:18:25 +00:00
url = urlunparse(urlparse(url, 'https'))
2020-08-31 06:51:07 +00:00
await _download_file(filename, url)
2020-09-03 04:53:54 +00:00
mimetype = await _get_file_mimetype(filename)
2020-10-29 17:03:02 +00:00
if mimetype.startswith('image') and preview and preview.get('enabled'):
2020-09-03 04:53:54 +00:00
preview = preview['images'][0]
2020-09-03 05:03:15 +00:00
urls = [i['url'] for i in preview['resolutions']]
urls.append(preview['source']['url'])
urls.reverse()
2020-09-03 04:53:54 +00:00
for url in urls:
if os.path.getsize(filename) < 10000000:
break
2020-09-04 19:18:25 +00:00
url = urlunparse(urlparse(url, 'https'))
2020-09-03 04:53:54 +00:00
await _download_file(filename, url)
2020-09-03 04:14:28 +00:00
ext = await _get_file_ext(filename)
if ext in ('.htm', '.html'):
2020-09-04 10:20:09 +00:00
with open(filename) as file:
soup = BeautifulSoup(file.read())
ptitle = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:title' and tag.attrs.get('content')) or soup.find('title')
if ptitle:
ptitle = ptitle.attrs.get('content', ptitle.text).strip()
pdesc = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:description' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('name') == 'description' and tag.attrs.get('content'))
if pdesc:
pdesc = pdesc.attrs.get('content', pdesc.text).strip()
2021-04-29 09:32:53 +00:00
pmedia = soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:video' and tag.attrs.get('content')) or soup.find(lambda tag: tag.name == 'meta' and tag.attrs.get('property') == 'og:image' and tag.attrs.get('content'))
if pmedia:
pmedia = pmedia.attrs.get('content', '').strip()
2020-09-04 10:20:09 +00:00
tat = f'{text}\n\nURL: '
if ptitle:
tat += f'<a href="{url}">{html.escape(ptitle)}</a>'
else:
tat += url
2020-09-03 04:14:28 +00:00
files = []
2021-04-29 09:32:53 +00:00
if pmedia:
pmedia = urlunparse(urlparse(pmedia, 'https'))
await _download_file(filename, pmedia)
2020-09-04 10:20:09 +00:00
files.append(filename)
if pdesc:
2021-04-29 09:32:53 +00:00
caplength = 1023 if pmedia else 4095
2020-09-04 10:20:09 +00:00
caplength -= len(client.parse_mode.parse(tat)[0])
captext = pdesc[:caplength]
if len(captext) >= caplength:
captext = captext[:-1]
captext += ''
tat += '\n' + captext
captions = [tat]
2020-08-31 06:51:07 +00:00
for a, i in enumerate(files):
ext = await _get_file_ext(i)
os.rename(i, i + ext)
files[a] = i + ext
await _start_broadcast(captions, files, chats)
else:
2020-09-04 08:34:48 +00:00
if getattr(random_post, 'selftext', None):
2020-09-04 09:13:20 +00:00
caplength = 4094 - len(client.parse_mode.parse(text)[0])
text += '\n\n'
2020-09-04 10:20:09 +00:00
captext = random_post.selftext.strip()[:caplength]
2020-09-04 09:13:20 +00:00
if len(captext) >= caplength:
captext = captext[:-1]
captext += ''
text += html.escape(captext)
2020-08-31 06:51:07 +00:00
await _start_broadcast([text], None, chats)
def register(pattern):
def wrapper(func):
@functools.wraps(func)
@client.on(events.NewMessage(chats=bot_admins, pattern=pattern))
async def awrapper(e):
try:
await func(e)
except BaseException:
2020-08-31 06:51:07 +00:00
await e.reply(traceback.format_exc(), parse_mode=None)
raise
return awrapper
return wrapper
@register('/(start|help)')
async def start_or_help(e):
await e.reply(('/start - /help\n'
'/help - /start\n'
'/poweroff - shuts down bot\n'
'/test <submission id> [ns] - tests sending submission'), parse_mode=None)
2020-08-31 06:51:07 +00:00
@register('/poweroff')
async def poweroff(e):
await e.reply('ok')
await e.client.disconnect()
@register(r'/test (\S+)(?: ([ns]+))?')
2020-08-31 06:51:07 +00:00
async def test_post(e):
await e.reply('ok')
2021-04-04 06:10:56 +00:00
post = await reddit.submission(e.pattern_match.group(1))
flags = e.pattern_match.group(2) or ''
snw = 'n' in flags
ssw = 's' in flags
await _actual_start_post(post, [e.chat_id], None, snw, ssw)
2020-08-31 06:51:07 +00:00
# await asyncio.gather(*[i.func() for i in _added_chats])
2020-08-31 06:51:07 +00:00
try:
await client.run_until_disconnected()
finally:
await session.close()
if __name__ == '__main__':
asyncio.run(main())