autoytarchivers/src/utils.rs

286 lines
10 KiB
Rust

use crate::config::{INVIDIOUS_INSTANCES, STORAGE_MESSAGE_ID};
use crate::structs::{Error, InvidiousVideo, Result, VideoData, YoutubeDLError};
use grammers_client::{types::Chat, InputMessage};
use quick_xml::{events::Event, Reader};
use rand::{thread_rng, Rng};
use reqwest::Client;
use std::io::Cursor;
use std::process::Stdio;
use tokio::io::AsyncWriteExt;
use tokio::process::Command;
use tokio::time::{sleep, Duration};
extern crate grammers_client;
extern crate serde_json;
extern crate tokio;
const PYTHON_INPUT: &[u8] = br#"import sys
import json
try:
import yt_dlp as youtube_dl
except ImportError:
import youtube_dl
_try_get = youtube_dl.extractor.youtube.try_get
def traverse_dict(src):
for (key, value) in src.items():
if key == 'scheduledStartTime':
return value
if isinstance(value, dict):
if value := traverse_dict(value):
return value
return None
def try_get(src, getter, expected_type=None):
if isinstance(src, dict):
if reason := src.get('reason'):
if isinstance(reason, str) and (reason.startswith('This live event will begin in ') or reason.startswith('Premieres in ')):
if t := _try_get(src, traverse_dict, str):
src['reason'] = f'autoytarchivers:{t} {reason}'
return _try_get(src, getter, expected_type)
youtube_dl.extractor.youtube.try_get = try_get
ytdl = youtube_dl.YoutubeDL({"skip_download": True, "no_color": True, "quiet": True})
try:
print(json.dumps(ytdl.extract_info("https://www.youtube.com/watch?v=" + sys.argv[1]), indent=4))
except Exception as e:
sys.exit(str(e))"#;
pub async fn get_videos(client: &Client, channel_id: &str) -> Result<Vec<String>> {
let mut video_ids = vec![];
if let Some(invidious_instances) = INVIDIOUS_INSTANCES {
for i in &invidious_instances {
let resp = match client
.get(&format!("{}/api/v1/channels/{}/latest", i, channel_id))
.query(&[("fields", "videoId")])
.header("Cache-Control", "no-store, max-age=0")
.send()
.await
{
Ok(i) => i,
Err(err) => {
eprintln!("Failed to connect to {}: {:?}", i, err);
continue;
}
};
if resp.status() != 200 {
eprintln!("Got {} from {}", resp.status(), i);
continue;
}
let resp = match resp.bytes().await {
Ok(i) => i,
Err(err) => {
eprintln!("Failed to get data from {}: {:?}", i, err);
continue;
}
};
let resp: Vec<InvidiousVideo> = match serde_json::from_slice(&resp) {
Ok(i) => i,
Err(err) => {
eprintln!("Failed to parse data from {}: {:?}", i, err);
continue;
}
};
video_ids.extend(resp.into_iter().take(15).map(|i| i.video_id));
if !video_ids.is_empty() {
return Ok(video_ids);
}
}
}
let resp = client
.get("https://www.youtube.com/feeds/videos.xml")
.query(&[("channel_id", channel_id)])
.header("Cache-Control", "no-store, max-age=0")
.send()
.await?
.error_for_status()?
.text()
.await?;
let mut reader = Reader::from_str(&resp);
let mut buf = vec![];
let mut inside = false;
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"yt:videoId" => inside = true,
Ok(Event::Text(e)) if inside => {
video_ids.push(e.unescape_and_decode(&reader)?);
if video_ids.len() >= 15 {
break;
}
inside = false;
}
Ok(Event::Eof) => break,
Err(err) => Err(err)?,
_ => (),
};
buf.clear();
}
Ok(video_ids)
}
pub async fn get_video(video_id: &str) -> Result<Option<VideoData>> {
let mut command = Command::new("python3");
let mut process = command
.args(&["-", video_id])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
let mut stdin = process.stdin.take().unwrap();
tokio::spawn(async move {
if let Err(err) = stdin.write_all(PYTHON_INPUT).await {
eprintln!("Failed to write PYTHON_INPUT: {:?}", err);
}
drop(stdin)
});
let output = process.wait_with_output().await?;
if output.status.success() {
let mut data: VideoData = serde_json::from_slice(&output.stdout)?;
data.json = String::from_utf8(output.stdout)?;
Ok(Some(data))
} else {
let stderr = String::from_utf8(output.stderr)?;
let stderr_lowercase = stderr.to_lowercase();
if stderr_lowercase.contains("private video")
|| stderr_lowercase.contains("unavailable")
|| stderr_lowercase.contains("not available")
{
Ok(None)
} else {
Err(Error::YoutubeDL(YoutubeDLError {
status: output.status,
stdout: String::from_utf8(output.stdout)?,
stderr,
}))
}
}
}
pub async fn get_video_retry(
tclient: &mut grammers_client::Client,
chat: &Chat,
video_data: VideoData,
) -> Option<VideoData> {
for i in 1..=5 {
match get_video(&video_data.id).await {
Ok(i) => return i,
Err(err) => {
eprintln!("Failed to get video data: {:?}", err);
if let Error::YoutubeDL(ref err) = err {
let output = err.stderr.to_lowercase();
if output.contains("429")
|| output.contains("too many requests")
|| output.contains("technical difficult")
{
sleep(Duration::from_secs(i * 60 * 60)).await;
continue;
}
}
let text = format!("{:#?}", err);
let size = text.len();
let mut stream = Cursor::new(text.into_bytes());
match tclient
.upload_stream(&mut stream, size, "failed-get-video-data.log".to_string())
.await
{
Ok(uploaded) => {
let message = InputMessage::text("Failed to get video data")
.mime_type("text/plain")
.file(uploaded);
if let Err(err) = tclient.send_message(&chat, message).await {
eprintln!(
"Failed to send message about failing to get video data: {:?}",
err
);
if let Err(err) = tclient.send_message(&chat, InputMessage::text("Failed to send message about failing to get video data, see logs")).await {
eprintln!("Failed to send message about failing to send message about failing to get video data: {:?}", err);
}
}
}
Err(err) => {
eprintln!(
"Failed to upload logs about failing to get video data: {:?}",
err
);
if let Err(err) = tclient.send_message(&chat, InputMessage::text("Failed to upload logs about failing to get video data, see logs")).await {
eprintln!("Failed to send message about failing to upload logs about failing to get video data: {:?}", err);
}
}
};
}
};
let tmp = thread_rng().gen_range(30..=10 * 60);
sleep(Duration::from_secs(tmp)).await;
}
Some(video_data)
}
pub fn is_manifest(video_data: &VideoData) -> bool {
if video_data.requested_formats.is_empty() {
video_data.url.as_ref().unwrap().domain() == Some("manifest.googlevideo.com")
} else {
video_data
.requested_formats
.iter()
.any(|i| i.url.domain() == Some("manifest.googlevideo.com"))
}
}
pub fn extension(text: &str) -> Option<&str> {
text.trim_start_matches(".").splitn(2, ".").nth(1)
}
pub async fn update_seen_videos(
tclient: &mut grammers_client::Client,
chat: &Chat,
seen_videos: Vec<String>,
) -> bool {
let bytes = serde_json::to_vec(&serde_json::json!(seen_videos)).unwrap();
let size = bytes.len();
let mut stream = Cursor::new(bytes);
match tclient
.upload_stream(&mut stream, size, "autoytarchivers.json".to_string())
.await
{
Ok(uploaded) => {
let message = InputMessage::text("")
.mime_type("application/json")
.file(uploaded);
if let Err(err) = tclient
.edit_message(&chat, STORAGE_MESSAGE_ID, message)
.await
{
eprintln!("Failed to edit seen videos: {:?}", err);
if let Err(err) = tclient
.send_message(
&chat,
InputMessage::text("Failed to edit seen videos, see logs"),
)
.await
{
eprintln!(
"Failed to send message about failing to edit seen videos: {:?}",
err
);
}
false
} else {
true
}
}
Err(err) => {
eprintln!("Failed to upload seen videos: {:?}", err);
if let Err(err) = tclient
.send_message(
&chat,
InputMessage::text("Failed to upload seen videos, see logs"),
)
.await
{
eprintln!(
"Failed to send message about failing to upload seen videos: {:?}",
err
);
}
false
}
}
}