hentaihavenrs/src/utils.rs

338 lines
12 KiB
Rust

use crate::structs;
use quick_xml::events::Event;
use quick_xml::Reader;
extern crate reqwest;
extern crate serde_json;
pub async fn search(
client: reqwest::Client,
query: &str,
) -> Result<Vec<structs::SearchResult>, structs::Error> {
let text = &client
.get("https://hentaihaven.xxx/wp-json/wp/v2/wp-manga")
.query(&[("search", &query)])
.send()
.await?
.text()
.await?;
let text = text.trim_start_matches("\u{feff}");
Ok(serde_json::from_str(&text)?)
}
pub async fn get_hentai(
client: reqwest::Client,
id: &str,
) -> Result<Option<structs::HentaiInfo>, structs::Error> {
let url = match id.contains(|c: char| !c.is_digit(10)) {
true => format!("https://hentaihaven.xxx/watch/{}", &id),
false => format!("https://hentaihaven.xxx/?p={}", &id),
};
let resp = client.get(&url).send().await?;
if resp.status() != 200 {
return Ok(None);
}
let mut id = String::new();
let slug = resp
.url()
.path()
.trim_end_matches('/')
.rsplitn(2, '/')
.nth(0)
.unwrap()
.to_string();
let text = resp.text().await?.replace("&nbsp;", " ");
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
let mut is_inside_a = false;
let mut is_inside_summary = false;
let mut is_inside_nav_links = false;
let mut is_inside_post_title = false;
let mut is_inside_chapter_list = false;
let mut is_inside_summary_heading = false;
let mut is_inside_summary_content = false;
let mut to_read_rank = false;
let mut to_read_genres = false;
let mut rank = 0;
let mut title = String::new();
let mut genres = Vec::new();
let mut censored = true;
let mut episode_urls = Vec::new();
let mut summary = String::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
if e.name() == b"div" {
let class = e.attributes().find(|i| match i.as_ref() {
Ok(i) => i.key == b"class",
Err(_) => false,
});
if let Some(class) = class {
match class.unwrap().unescape_and_decode_value(&reader) {
Ok(class_name) => {
match class_name.as_str() {
"summary-heading" => is_inside_summary_heading = true,
"summary-content" => is_inside_summary_content = true,
"post-title" => is_inside_post_title = true,
"nav-links" => is_inside_nav_links = true,
"listing-chapters_wrap" => is_inside_chapter_list = true,
"summary__content show-more" => is_inside_summary = true,
_ => (),
};
}
Err(_) => (),
};
}
} else if e.name() == b"a" {
is_inside_a = true;
if is_inside_nav_links {
let class = e.attributes().find(|i| match i.as_ref() {
Ok(i) => i.key == b"class",
Err(_) => false,
});
if let Some(class) = class {
match class.unwrap().unescape_and_decode_value(&reader) {
Ok(class_name) => {
if class_name
.to_lowercase()
.split_whitespace()
.any(|i| i == "uncensored")
{
censored = false;
is_inside_nav_links = false;
}
}
Err(_) => (),
};
}
} else if is_inside_chapter_list {
let href = e.attributes().find(|i| match i.as_ref() {
Ok(i) => i.key == b"href",
Err(_) => false,
});
if let Some(href) = href {
match href.unwrap().unescape_and_decode_value(&reader) {
Ok(href) => episode_urls.push(href),
Err(_) => (),
};
}
} else if id.is_empty() {
let data_post = e.attributes().find(|i| match i.as_ref() {
Ok(i) => i.key == b"data-post",
Err(_) => false,
});
if let Some(data_post) = data_post {
match data_post.unwrap().unescape_and_decode_value(&reader) {
Ok(data_post) => id = data_post,
Err(_) => (),
};
}
}
}
}
Ok(Event::Text(e)) => {
let text = match e.unescape_and_decode(&reader) {
Ok(text) => text,
Err(_) => continue,
};
if is_inside_summary_heading {
match text.trim() {
"Rank" => to_read_rank = true,
"Genre(s)" => to_read_genres = true,
_ => (),
};
} else if is_inside_summary_content {
if to_read_rank {
match text.trim().splitn(2, " ").nth(0).unwrap().parse::<usize>() {
Ok(i) => rank = i,
Err(_) => (),
};
to_read_rank = false;
} else if to_read_genres && is_inside_a {
genres.push(text.to_string());
}
} else if is_inside_post_title {
title.push_str(&text);
} else if is_inside_summary {
summary.push_str(&text);
}
}
Ok(Event::End(ref e)) => {
if e.name() == b"div" {
if is_inside_summary_heading {
is_inside_summary_heading = false;
} else if is_inside_summary_content {
is_inside_summary_content = false;
to_read_genres = false;
} else if is_inside_post_title {
is_inside_post_title = false;
title = title.trim().to_string();
} else if is_inside_nav_links {
is_inside_nav_links = false;
} else if is_inside_chapter_list {
is_inside_chapter_list = false;
} else if is_inside_summary {
break;
}
} else if e.name() == b"a" {
is_inside_a = false;
}
}
Err(err) => panic!("Error at position {}: {:?}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => (),
};
buf.clear();
}
if id.is_empty() {
return Ok(None);
}
episode_urls.reverse();
summary = summary.trim().to_string();
Ok(Some(structs::HentaiInfo {
id: id,
slug: slug,
title: title,
views: rank,
genres: genres,
censored: censored,
episode_urls: episode_urls,
summary: summary,
}))
}
pub async fn get_url(
client: reqwest::Client,
url: &str,
) -> Result<Option<structs::HentaiVideo>, structs::Error> {
let resp = client.get(url).send().await?;
if resp.status() != 200 {
return Ok(None);
}
let text = resp.text().await?;
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
let mut iframe_url = None;
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"iframe" => {
let src = e.attributes().find(|i| match i.as_ref() {
Ok(i) => i.key == b"src",
Err(_) => false,
});
if let Some(src) = src {
match src.unwrap().unescape_and_decode_value(&reader) {
Ok(src) => {
iframe_url = Some(src);
break;
}
Err(_) => (),
};
}
}
Err(err) => panic!("Error at position {}: {:?}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => (),
};
buf.clear();
}
let iframe_url = match iframe_url {
Some(tmp) => tmp,
None => return Ok(None),
};
parse_iframe(client, &iframe_url).await
}
async fn parse_iframe(
client: reqwest::Client,
url: &str,
) -> Result<Option<structs::HentaiVideo>, structs::Error> {
let resp = client.get(url).send().await?;
if resp.status() != 200 {
return Ok(None);
}
let text = resp.text().await?;
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
let mut form = reqwest::multipart::Form::new();
let mut form_modified = false;
loop {
match reader.read_event(&mut buf) {
Ok(Event::Text(e)) => {
let text = match reader.decode(e.escaped()) {
Ok(text) => text,
Err(_) => continue,
};
for i in text.split('\n') {
let i = i.trim();
if !i.starts_with("data.append('") {
continue;
}
let mut i = i
.trim_start_matches("data.append('")
.trim_end_matches("');")
.splitn(2, "', '");
let key = match i.next() {
Some(i) => i,
None => continue,
};
let value = match i.next() {
Some(i) => i,
None => continue,
};
form = form.text(key.to_string(), value.to_string());
form_modified = true;
}
}
Err(err) => panic!("Error at position {}: {:?}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => (),
};
buf.clear();
}
if !form_modified {
return Ok(None);
}
let text = client
.post("https://hentaihaven.xxx/wp-admin/admin-ajax.php")
.multipart(form)
.send()
.await?
.text()
.await?;
let text = text.trim_start_matches("\u{feff}");
let raw_data: structs::RawHentaiVideo = serde_json::from_str(&text)?;
let raw_data = raw_data.data;
let captions = match client
.get(&raw_data.captions.src)
.send()
.await?
.status()
.as_u16()
{
200 => Some(raw_data.captions.src),
_ => None,
};
let video_url = match raw_data.sources.get(0) {
Some(i) => i.src.clone(),
None => return Ok(None),
};
Ok(Some(structs::HentaiVideo {
captions: captions,
video: video_url,
}))
}
pub fn create_client() -> reqwest::Client {
// cloudflare you can go fuck yourself
reqwest::ClientBuilder::new()
.use_rustls_tls()
.http1_title_case_headers()
.user_agent("Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0")
.build()
.unwrap()
}