746 lines
32 KiB
Rust
746 lines
32 KiB
Rust
use crate::structs;
|
|
|
|
use std::io::Write;
|
|
use std::path::PathBuf;
|
|
use std::fs::{create_dir, File};
|
|
use url::Url;
|
|
use quick_xml::Reader;
|
|
use quick_xml::events::Event;
|
|
extern crate reqwest;
|
|
extern crate serde_json;
|
|
|
|
fn generate_slug(text: &str) -> String {
|
|
let mut text = text.to_string()
|
|
.to_lowercase()
|
|
.replace(&['à', 'á', 'ạ', 'ả', 'ã', 'â', 'ầ', 'ấ', 'ậ', 'ẩ', 'ẫ', 'ă', 'ằ', 'ắ', 'ặ', 'ẳ', 'ẵ'][..], "a")
|
|
.replace(&['è', 'é', 'ẹ', 'ẻ', 'ẽ', 'ê', 'ề', 'ế', 'ệ', 'ể', 'ễ'][..], "e")
|
|
.replace(&['ì', 'í', 'ị', 'ỉ', 'ĩ'][..], "i")
|
|
.replace(&['ò', 'ó', 'ọ', 'ỏ', 'õ', 'ô', 'ồ', 'ố', 'ộ', 'ổ', 'ỗ', 'ơ', 'ờ', 'ớ', 'ợ', 'ở', 'ỡ'][..], "o")
|
|
.replace(&['ù', 'ú', 'ụ', 'ủ', 'ũ', 'ư', 'ừ', 'ứ', 'ự', 'ử', 'ữ'][..], "u")
|
|
.replace(&['ỳ', 'ý', 'ỵ', 'ỷ', 'ỹ'][..], "y")
|
|
.replace('đ', "d")
|
|
.replace(&['!', '@', '%', '^', '*', '(', ')', '+', '=', '<', '>', '?', '/', ',', '.', ':', ';', '\'', ' ', '"', '&', '#', '[', ']', '~', '-'][..], "_");
|
|
while text.contains("__") {
|
|
text = text.replace("__", "_");
|
|
}
|
|
text.trim_matches('_').to_string()
|
|
}
|
|
|
|
fn remove_html(text: &str) -> String {
|
|
let mut removed = String::new();
|
|
let mut reader = Reader::from_str(&text);
|
|
let mut buf = Vec::new();
|
|
loop {
|
|
match reader.read_event(&mut buf) {
|
|
Ok(Event::Text(e)) => removed.push_str(&e.unescape_and_decode(&reader).unwrap()),
|
|
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
|
|
Ok(Event::Eof) => break,
|
|
_ => ()
|
|
};
|
|
buf.clear();
|
|
}
|
|
removed
|
|
}
|
|
|
|
pub async fn search(client: reqwest::Client, query: &str) -> Result<Vec<structs::SearchResult>, structs::Error> {
|
|
let text = client.post("https://mangakakalot.com/home_json_search")
|
|
.form(&[("searchword", &generate_slug(&query))])
|
|
.send()
|
|
.await?
|
|
.text()
|
|
.await?;
|
|
if text.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
let mut results: Vec<structs::SearchResult> = serde_json::from_str(&text)?;
|
|
for i in 0..results.len() {
|
|
let old_result = &results[i];
|
|
results[i] = structs::SearchResult {
|
|
id: old_result.id.clone(),
|
|
name: remove_html(&old_result.name),
|
|
name_unsigned: old_result.name_unsigned.clone(),
|
|
last_chapter: remove_html(&old_result.last_chapter),
|
|
image: old_result.image.clone(),
|
|
author: remove_html(&old_result.author),
|
|
story_link: old_result.story_link.clone()
|
|
};
|
|
}
|
|
Ok(results)
|
|
}
|
|
|
|
pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result<structs::MangaOption, structs::Error> {
|
|
let text = client.get(&format!("https://mangakakalot.com/manga/{}", &manga_id))
|
|
.send()
|
|
.await?
|
|
.text()
|
|
.await?;
|
|
let resp = parse_mangakakalot_manga(&text, &manga_id);
|
|
Ok(match resp {
|
|
structs::MangaOption::Manga(_) => resp,
|
|
structs::MangaOption::Redirect(redirect) => {
|
|
let text = client.get(&redirect.url)
|
|
.send()
|
|
.await?
|
|
.text()
|
|
.await?;
|
|
let resp = match Url::parse(&redirect.url)?.host_str().unwrap() {
|
|
"mangakakalot.com" => parse_mangakakalot_manga(&text, &manga_id),
|
|
"manganelo.com" => parse_manganelo_manga(&text, &manga_id),
|
|
_ => panic!("Unknown URL: {}", &redirect.url)
|
|
};
|
|
match resp {
|
|
structs::MangaOption::Manga(_) => resp,
|
|
structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
|
|
structs::MangaOption::DoesNotExist => structs::MangaOption::DoesNotExist
|
|
}
|
|
},
|
|
structs::MangaOption::DoesNotExist => structs::MangaOption::DoesNotExist
|
|
})
|
|
}
|
|
|
|
fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
|
|
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
|
|
let screaming_doctype = split[0].to_uppercase();
|
|
split[0] = &screaming_doctype;
|
|
let text = split.join("\n");
|
|
let mut name: Option<String> = None;
|
|
let mut status: Option<String> = None;
|
|
let mut last_updated: Option<String> = None;
|
|
let mut summary: Option<String> = None;
|
|
let mut authors = Vec::new();
|
|
let mut genres = Vec::new();
|
|
let mut chapters = Vec::new();
|
|
let mut is_inside_h1 = false;
|
|
let mut is_inside_a = false;
|
|
let mut is_inside_td = false;
|
|
let mut is_inside_authors = false;
|
|
let mut is_inside_genres = false;
|
|
let mut is_inside_status = false;
|
|
let mut is_inside_stre_value = false;
|
|
let mut is_inside_h3 = false;
|
|
let mut is_inside_ul = false;
|
|
let mut is_inside_description = false;
|
|
let mut tmp_chapter_link: Option<String> = None;
|
|
let mut reader = Reader::from_str(&text);
|
|
reader.check_end_names(false);
|
|
let mut buf = Vec::new();
|
|
loop {
|
|
match reader.read_event(&mut buf) {
|
|
Ok(Event::Start(ref e)) => {
|
|
match e.name() {
|
|
b"a" => {
|
|
is_inside_a = true;
|
|
if is_inside_ul {
|
|
let href = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => attribute.key == b"href",
|
|
Err(_) => false
|
|
}
|
|
});
|
|
if href.is_some() {
|
|
match href.unwrap().unwrap().unescape_and_decode_value(&reader) {
|
|
Ok(text) => tmp_chapter_link = Some(text),
|
|
Err(_) => ()
|
|
};
|
|
}
|
|
}
|
|
},
|
|
b"ul" => is_inside_ul = true,
|
|
b"h1" => is_inside_h1 = true,
|
|
b"h3" => is_inside_h3 = true,
|
|
b"td" => {
|
|
let is_table_value = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"class" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "table-value",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if is_table_value {
|
|
is_inside_td = true;
|
|
}
|
|
},
|
|
b"i" => {
|
|
let class = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => attribute.key == b"class",
|
|
Err(_) => false
|
|
}
|
|
});
|
|
if class.is_some() {
|
|
match class.unwrap().unwrap().unescape_and_decode_value(&reader) {
|
|
Ok(class_name) => {
|
|
match class_name.as_str() {
|
|
"info-author" => is_inside_authors = true,
|
|
"info-status" => is_inside_status = true,
|
|
"info-genres" => is_inside_genres = true,
|
|
_ => ()
|
|
};
|
|
},
|
|
Err(_) => ()
|
|
};
|
|
}
|
|
},
|
|
b"span" => {
|
|
let is_stre_value = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"class" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "stre-value",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if is_stre_value {
|
|
is_inside_stre_value = true;
|
|
}
|
|
},
|
|
b"div" => {
|
|
let is_description = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"class" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "panel-story-info-description",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if is_description {
|
|
is_inside_description = true;
|
|
}
|
|
},
|
|
_ => ()
|
|
};
|
|
},
|
|
Ok(Event::Text(e)) => {
|
|
let text = match e.unescape_and_decode(&reader) {
|
|
Ok(text) => text,
|
|
Err(_) => {
|
|
buf.clear();
|
|
continue;
|
|
}
|
|
};
|
|
let text = text.trim();
|
|
if name.is_none() && is_inside_h1 {
|
|
name = Some(text.to_string());
|
|
} else if is_inside_authors && is_inside_td && is_inside_a {
|
|
authors.push(text.to_string());
|
|
} else if is_inside_status && is_inside_td {
|
|
status = Some(text.to_string());
|
|
} else if is_inside_genres && is_inside_td && is_inside_a {
|
|
genres.push(text.to_string());
|
|
} else if last_updated.is_none() && is_inside_stre_value {
|
|
last_updated = Some(text.to_string());
|
|
} else if is_inside_description && !is_inside_h3 && !text.is_empty() {
|
|
if summary.is_some() {
|
|
summary.as_mut().unwrap().push_str(text);
|
|
} else {
|
|
summary = Some(text.to_string());
|
|
}
|
|
} else if is_inside_ul && is_inside_a && tmp_chapter_link.is_some() {
|
|
let chapter_name = match text.splitn(2, &[':', '-'][..]).nth(1) {
|
|
Some(text) => Some(text.trim().to_string()),
|
|
None => None
|
|
};
|
|
match tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0) {
|
|
Some(chapter_number) => {
|
|
chapters.push(structs::Chapter {
|
|
chapter_number: chapter_number.to_string(),
|
|
chapter_name: chapter_name,
|
|
domain: "manganelo.com".to_string()
|
|
});
|
|
},
|
|
None => ()
|
|
};
|
|
tmp_chapter_link = None;
|
|
} else if text.starts_with("REDIRECT : ") {
|
|
return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() });
|
|
}
|
|
},
|
|
Ok(Event::End(e)) => {
|
|
match e.name() {
|
|
b"a" => is_inside_a = false,
|
|
b"h1" => is_inside_h1 = false,
|
|
b"h3" => is_inside_h3 = false,
|
|
b"td" => {
|
|
if is_inside_td {
|
|
is_inside_td = false;
|
|
is_inside_authors = false;
|
|
is_inside_genres = false;
|
|
is_inside_status = false;
|
|
}
|
|
},
|
|
b"div" => is_inside_description = false,
|
|
b"ul" => break,
|
|
_ => ()
|
|
};
|
|
},
|
|
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
|
|
Ok(Event::Eof) => break,
|
|
_ => ()
|
|
};
|
|
buf.clear();
|
|
}
|
|
chapters.reverse();
|
|
structs::MangaOption::Manga(structs::Manga {
|
|
id: manga_id.to_string(),
|
|
name: name.unwrap(),
|
|
authors: authors,
|
|
status: status,
|
|
last_updated: last_updated,
|
|
genres: genres,
|
|
summary: summary,
|
|
chapters: chapters
|
|
}
|
|
)
|
|
}
|
|
|
|
fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption {
|
|
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
|
|
let screaming_doctype = split[0].to_uppercase();
|
|
split[0] = &screaming_doctype;
|
|
let text = split.join("\n");
|
|
let mut is_inside_title = false;
|
|
let mut is_title_real = false;
|
|
let mut is_inside_chapter_list = false;
|
|
let mut is_inside_manga_info = false;
|
|
let mut is_inside_authors = false;
|
|
let mut is_inside_genres = false;
|
|
let mut is_inside_a = false;
|
|
let mut is_inside_row = false;
|
|
let mut name: Option<String> = None;
|
|
let mut status: Option<String> = None;
|
|
let mut last_updated: Option<String> = None;
|
|
let mut summary: Option<String> = None;
|
|
let mut is_inside_noidungm = false;
|
|
let mut is_inside_h1 = false;
|
|
let mut is_inside_h2 = false;
|
|
let mut authors = Vec::new();
|
|
let mut genres = Vec::new();
|
|
let mut chapters = Vec::new();
|
|
let mut tmp_chapter_link: Option<String> = None;
|
|
let mut reader = Reader::from_str(&text);
|
|
reader.check_end_names(false);
|
|
let mut buf = Vec::new();
|
|
loop {
|
|
match reader.read_event(&mut buf) {
|
|
Ok(Event::Start(ref e)) => {
|
|
match e.name() {
|
|
b"ul" => {
|
|
let is_manga_info_text = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"class" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "manga-info-text",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if is_manga_info_text {
|
|
is_inside_manga_info = true;
|
|
}
|
|
},
|
|
b"div" => {
|
|
let class = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => attribute.key == b"class",
|
|
Err(_) => false
|
|
}
|
|
});
|
|
if class.is_some() {
|
|
match class.unwrap().unwrap().unescape_and_decode_value(&reader) {
|
|
Ok(class_name) => {
|
|
match class_name.as_str() {
|
|
"chapter-list" => is_inside_chapter_list = true,
|
|
"row" => is_inside_row = true,
|
|
_ => ()
|
|
};
|
|
},
|
|
Err(_) => ()
|
|
};
|
|
}
|
|
let inside_noidungm = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"id" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "noidungm",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if inside_noidungm {
|
|
is_inside_noidungm = true;
|
|
}
|
|
},
|
|
b"h1" => is_inside_h1 = true,
|
|
b"h2" => is_inside_h2 = true,
|
|
b"a" => {
|
|
is_inside_a = true;
|
|
if is_inside_chapter_list {
|
|
let href = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => attribute.key == b"href",
|
|
Err(_) => false
|
|
}
|
|
});
|
|
if href.is_some() {
|
|
match href.unwrap().unwrap().unescape_and_decode_value(&reader) {
|
|
Ok(text) => tmp_chapter_link = Some(text),
|
|
Err(_) => ()
|
|
};
|
|
}
|
|
}
|
|
},
|
|
b"title" => is_inside_title = true,
|
|
_ => ()
|
|
};
|
|
},
|
|
Ok(Event::Text(e)) => {
|
|
let text = match e.unescape_and_decode(&reader) {
|
|
Ok(text) => text,
|
|
Err(_) => {
|
|
buf.clear();
|
|
continue;
|
|
}
|
|
};
|
|
let text = text.trim();
|
|
if is_inside_manga_info {
|
|
if is_inside_h1 {
|
|
name = Some(text.to_string());
|
|
} else if is_inside_authors && is_inside_a {
|
|
authors.push(text.to_string());
|
|
} else if is_inside_genres && is_inside_a {
|
|
genres.push(text.to_string());
|
|
} else {
|
|
match text.splitn(2, ' ').nth(0).unwrap() {
|
|
"Author(s)" => is_inside_authors = true,
|
|
"Status" => {
|
|
match text.splitn(3, ' ').nth(2) {
|
|
Some(text) => status = Some(text.to_string()),
|
|
None => ()
|
|
};
|
|
},
|
|
"Last" => {
|
|
if text.starts_with("Last updated : ") {
|
|
match text.splitn(4, ' ').nth(3) {
|
|
Some(text) => last_updated = Some(text.to_string()),
|
|
None => ()
|
|
};
|
|
}
|
|
},
|
|
"Genres" => is_inside_genres = true,
|
|
_ => ()
|
|
}
|
|
}
|
|
} else if is_inside_noidungm && !is_inside_h2 && !text.is_empty(){
|
|
if summary.is_some() {
|
|
summary.as_mut().unwrap().push_str(text);
|
|
} else {
|
|
summary = Some(text.to_string());
|
|
}
|
|
} else if is_inside_chapter_list && is_inside_a && tmp_chapter_link.is_some() {
|
|
let chapter_name = match text.splitn(2, &[':', '-'][..]).nth(1) {
|
|
Some(text) => Some(text.trim().to_string()),
|
|
None => None
|
|
};
|
|
match tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0) {
|
|
Some(chapter_number) => {
|
|
chapters.push(structs::Chapter {
|
|
chapter_number: chapter_number.to_string(),
|
|
chapter_name: chapter_name,
|
|
domain: "mangakakalot.com".to_string()
|
|
});
|
|
},
|
|
None => ()
|
|
};
|
|
tmp_chapter_link = None;
|
|
} else if is_inside_title {
|
|
is_title_real = !text.is_empty();
|
|
} else if text.trim().starts_with("REDIRECT : ") {
|
|
return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() });
|
|
}
|
|
},
|
|
Ok(Event::Empty(ref e)) => {
|
|
if is_inside_noidungm && e.name() == b"br" && summary.is_some() {
|
|
summary.as_mut().unwrap().push('\n');
|
|
}
|
|
},
|
|
Ok(Event::End(e)) => {
|
|
match e.name() {
|
|
b"ul" => is_inside_manga_info = false,
|
|
b"li" => {
|
|
is_inside_authors = false;
|
|
is_inside_genres = false;
|
|
},
|
|
b"div" => {
|
|
if is_inside_noidungm {
|
|
is_inside_noidungm = false;
|
|
} else if is_inside_row {
|
|
is_inside_row = false;
|
|
} else if is_inside_chapter_list {
|
|
break;
|
|
}
|
|
},
|
|
b"h1" => is_inside_h1 = false,
|
|
b"h2" => is_inside_h2 = false,
|
|
b"a" => is_inside_a = false,
|
|
b"title" => {
|
|
if !is_title_real {
|
|
return structs::MangaOption::DoesNotExist;
|
|
}
|
|
},
|
|
_ => ()
|
|
};
|
|
},
|
|
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
|
|
Ok(Event::Eof) => break,
|
|
_ => ()
|
|
};
|
|
buf.clear();
|
|
}
|
|
chapters.reverse();
|
|
structs::MangaOption::Manga(structs::Manga {
|
|
id: manga_id.to_string(),
|
|
name: name.unwrap(),
|
|
authors: authors,
|
|
status: status,
|
|
last_updated: last_updated,
|
|
genres: genres,
|
|
summary: summary,
|
|
chapters: chapters
|
|
})
|
|
}
|
|
|
|
pub async fn get_pages(client: reqwest::Client, chapter: &structs::Chapter, manga_id: &str) -> Result<Vec<String>, reqwest::Error> {
|
|
let text = client.get(&format!("https://{}/chapter/{}/chapter_{}", &chapter.domain, &manga_id, &chapter.chapter_number))
|
|
.send()
|
|
.await?
|
|
.text()
|
|
.await?;
|
|
Ok(match chapter.domain.as_str() {
|
|
"mangakakalot.com" => parse_mangakakalot_pages(&text),
|
|
"manganelo.com" => parse_manganelo_pages(&text),
|
|
_ => panic!("Unknown domain: {}", &chapter.domain)
|
|
})
|
|
}
|
|
|
|
fn parse_mangakakalot_pages(text: &str) -> Vec<String> {
|
|
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
|
|
let screaming_doctype = split[0].to_uppercase();
|
|
split[0] = &screaming_doctype;
|
|
let text = split.join("\n");
|
|
let mut is_inside_pages = false;
|
|
let mut is_inside_ads = false;
|
|
let mut pages = Vec::new();
|
|
let mut reader = Reader::from_str(&text);
|
|
reader.check_end_names(false);
|
|
let mut buf = Vec::new();
|
|
loop {
|
|
match reader.read_event(&mut buf) {
|
|
Ok(Event::Start(ref e)) => {
|
|
if e.name() == b"div" {
|
|
if is_inside_pages {
|
|
is_inside_ads = true;
|
|
} else {
|
|
let inside_pages = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"id" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "vungdoc",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if inside_pages {
|
|
is_inside_pages = true;
|
|
}
|
|
}
|
|
}
|
|
},
|
|
Ok(Event::Empty(ref e)) => {
|
|
if e.name() == b"img" {
|
|
let mut src: Option<String> = None;
|
|
let mut alt: Option<String> = None;
|
|
for attribute in e.attributes() {
|
|
match attribute {
|
|
Ok(attribute) => {
|
|
match attribute.key {
|
|
b"src" => {
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(src_text) => src = Some(src_text),
|
|
Err(_) => ()
|
|
};
|
|
},
|
|
b"alt" => {
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(alt_text) => alt = Some(alt_text),
|
|
Err(_) => ()
|
|
};
|
|
},
|
|
_ => ()
|
|
};
|
|
},
|
|
Err(_) => ()
|
|
};
|
|
}
|
|
if src.is_some() && alt.is_some() {
|
|
pages.push(src.unwrap());
|
|
}
|
|
}
|
|
},
|
|
Ok(Event::End(e)) => {
|
|
if e.name() == b"div" {
|
|
if is_inside_ads {
|
|
is_inside_ads = false;
|
|
} else if is_inside_pages {
|
|
is_inside_pages = false;
|
|
}
|
|
}
|
|
},
|
|
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
|
|
Ok(Event::Eof) => break,
|
|
_ => ()
|
|
}
|
|
buf.clear();
|
|
}
|
|
pages
|
|
}
|
|
|
|
fn parse_manganelo_pages(text: &str) -> Vec<String> {
|
|
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
|
|
let screaming_doctype = split[0].to_uppercase();
|
|
split[0] = &screaming_doctype;
|
|
let text = split.join("\n");
|
|
let mut is_inside_pages = false;
|
|
let mut is_inside_ads = false;
|
|
let mut pages = Vec::new();
|
|
let mut reader = Reader::from_str(&text);
|
|
reader.check_end_names(false);
|
|
let mut buf = Vec::new();
|
|
loop {
|
|
match reader.read_event(&mut buf) {
|
|
Ok(Event::Start(ref e)) => {
|
|
if e.name() == b"div" {
|
|
if is_inside_pages {
|
|
is_inside_ads = true;
|
|
} else {
|
|
let inside_pages = e.attributes()
|
|
.find(|attribute| {
|
|
match attribute.as_ref() {
|
|
Ok(attribute) => {
|
|
attribute.key == b"class" &&
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(text) => text.as_str() == "container-chapter-reader",
|
|
Err(_) => false
|
|
}
|
|
},
|
|
Err(_) => false
|
|
}
|
|
}).is_some();
|
|
if inside_pages {
|
|
is_inside_pages = true;
|
|
}
|
|
}
|
|
}
|
|
},
|
|
Ok(Event::Empty(ref e)) => {
|
|
if is_inside_pages && !is_inside_ads && e.name() == b"img" {
|
|
let mut src: Option<String> = None;
|
|
let mut alt: Option<String> = None;
|
|
for attribute in e.attributes() {
|
|
match attribute {
|
|
Ok(attribute) => {
|
|
match attribute.key {
|
|
b"src" => {
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(src_text) => src = Some(src_text),
|
|
Err(_) => ()
|
|
};
|
|
},
|
|
b"alt" => {
|
|
match attribute.unescape_and_decode_value(&reader) {
|
|
Ok(alt_text) => alt = Some(alt_text),
|
|
Err(_) => ()
|
|
};
|
|
},
|
|
_ => ()
|
|
};
|
|
},
|
|
Err(_) => ()
|
|
};
|
|
}
|
|
if src.is_some() && alt.is_some() {
|
|
pages.push(src.unwrap());
|
|
}
|
|
}
|
|
},
|
|
Ok(Event::End(e)) => {
|
|
if e.name() == b"div" {
|
|
if is_inside_ads {
|
|
is_inside_ads = false;
|
|
} else if is_inside_pages {
|
|
is_inside_pages = false;
|
|
}
|
|
}
|
|
},
|
|
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
|
|
Ok(Event::Eof) => break,
|
|
_ => ()
|
|
}
|
|
buf.clear();
|
|
}
|
|
pages
|
|
}
|
|
|
|
pub async fn download_file(client: reqwest::Client, url: &str, file_name: &PathBuf, referer: &str) -> Result<bool, structs::Error> {
|
|
let resp = client.get(url)
|
|
.header("Referer", referer)
|
|
.send()
|
|
.await?;
|
|
match resp.headers().get("Content-Type") {
|
|
Some(header_value) => {
|
|
if header_value.to_str().unwrap_or_default().starts_with("image/") {
|
|
let bytes = resp.bytes().await?;
|
|
if !file_name.parent().unwrap().is_dir() {
|
|
create_dir(file_name.parent().unwrap())?;
|
|
}
|
|
let mut file = File::create(&file_name)?;
|
|
file.write_all(&bytes)?;
|
|
return Ok(true);
|
|
}
|
|
return Ok(false);
|
|
},
|
|
None => Ok(false)
|
|
}
|
|
}
|