Die less and other stuff

This commit is contained in:
blank X 2021-01-11 22:38:08 +07:00
parent 9495d93fff
commit e00fb4b652
7 changed files with 345 additions and 122 deletions

3
Cargo.lock generated
View File

@ -330,13 +330,14 @@ dependencies = [
[[package]] [[package]]
name = "mangafetchi" name = "mangafetchi"
version = "0.1.6" version = "0.1.7"
dependencies = [ dependencies = [
"quick-xml", "quick-xml",
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"url",
] ]
[[package]] [[package]]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "mangafetchi" name = "mangafetchi"
version = "0.1.6" version = "0.1.7"
authors = ["blank X <theblankx@protonmail.com>"] authors = ["blank X <theblankx@protonmail.com>"]
edition = "2018" edition = "2018"
@ -12,6 +12,7 @@ lto = true
[dependencies] [dependencies]
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
url = "2.2"
reqwest = "0.11" reqwest = "0.11"
quick-xml = "0.20" quick-xml = "0.20"
tokio = { version = "1.0", features = ["rt-multi-thread", "sync", "time"] } tokio = { version = "1.0", features = ["rt-multi-thread", "sync", "time"] }

View File

@ -5,6 +5,7 @@ use std::env;
use std::sync::Arc; use std::sync::Arc;
use std::process::exit; use std::process::exit;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use url::Url;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
@ -14,6 +15,7 @@ extern crate reqwest;
const DOWNLOAD_WORKERS: usize = 5; const DOWNLOAD_WORKERS: usize = 5;
const NON_IMAGE_WAIT_TIME: u64 = 5000; const NON_IMAGE_WAIT_TIME: u64 = 5000;
const NO_ITEM_WAIT_TIME: u64 = 1000; const NO_ITEM_WAIT_TIME: u64 = 1000;
const GET_MANGA_FAIL_WAIT_TIME: u64 = 30000;
pub async fn run(mut args: env::Args) { pub async fn run(mut args: env::Args) {
let manga_id = match args.next() { let manga_id = match args.next() {
@ -23,7 +25,7 @@ pub async fn run(mut args: env::Args) {
exit(1); exit(1);
} }
}; };
let mut chapter_numbers: Vec<_> = args.collect(); let mut chapter_numbers: Vec<_> = args.map(|i| i.trim().to_string()).collect();
chapter_numbers.sort(); chapter_numbers.sort();
chapter_numbers.dedup(); chapter_numbers.dedup();
let mut chapters: Vec<structs::Chapter> = Vec::new(); let mut chapters: Vec<structs::Chapter> = Vec::new();
@ -34,7 +36,7 @@ pub async fn run(mut args: env::Args) {
chapters = manga_info.chapters; chapters = manga_info.chapters;
} else { } else {
for chapter_number in chapter_numbers { for chapter_number in chapter_numbers {
let tmp = manga_info.chapters.iter().enumerate().find(|(_, chapter)| chapter_number.trim() == chapter.chapter_number.as_str()); let tmp = manga_info.chapters.iter().enumerate().find(|(_, chapter)| chapter_number == chapter.chapter_number);
if tmp.is_some() { if tmp.is_some() {
let (i, _) = tmp.unwrap(); let (i, _) = tmp.unwrap();
chapters.push(manga_info.chapters.remove(i)); chapters.push(manga_info.chapters.remove(i));
@ -46,7 +48,7 @@ pub async fn run(mut args: env::Args) {
} }
}, },
structs::MangaOption::Redirect(_) => panic!("Nested redirect"), structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => { structs::MangaOption::DoesNotExist => {
eprintln!("ID: {}\nError: does not exist", &manga_id); eprintln!("ID: {}\nError: does not exist", &manga_id);
exit(1); exit(1);
} }
@ -54,18 +56,28 @@ pub async fn run(mut args: env::Args) {
let mutex = Arc::new(Mutex::new(DownloadData { data: Vec::new(), is_done: false })); let mutex = Arc::new(Mutex::new(DownloadData { data: Vec::new(), is_done: false }));
let handles: Vec<JoinHandle<()>> = summon_handles(client.clone(), Arc::clone(&mutex)).await; let handles: Vec<JoinHandle<()>> = summon_handles(client.clone(), Arc::clone(&mutex)).await;
for chapter in chapters { for chapter in chapters {
let cloned_mutex = Arc::clone(&mutex); loop {
let chapter_pages = utils::get_pages(client.clone(), &chapter, &manga_id).await.unwrap(); match utils::get_pages(client.clone(), &chapter, &manga_id).await {
let mut to_extend: Vec<(String, PathBuf, String)> = Vec::new(); Ok(chapter_pages) => {
for url in chapter_pages { if chapter_pages.is_empty() {
let mut file_name = PathBuf::from(&chapter.chapter_number); sleep(Duration::from_millis(GET_MANGA_FAIL_WAIT_TIME)).await;
file_name.push(Path::new(reqwest::Url::parse(&url).unwrap().path()).file_name().unwrap()); continue;
if !file_name.exists() { }
to_extend.push((url, file_name, chapter.domain.clone())); let mut to_extend: Vec<(String, PathBuf, String)> = Vec::new();
} for url in chapter_pages {
} let mut file_name = PathBuf::from(&chapter.chapter_number);
if !to_extend.is_empty() { file_name.push(Path::new(Url::parse(&url).unwrap().path()).file_name().unwrap());
cloned_mutex.lock().await.data.extend(to_extend); if !file_name.exists() {
to_extend.push((url, file_name, chapter.domain.clone()));
}
}
if !to_extend.is_empty() {
Arc::clone(&mutex).lock().await.data.extend(to_extend);
}
break;
},
Err(_) => sleep(Duration::from_millis(GET_MANGA_FAIL_WAIT_TIME)).await
};
} }
} }
{ {

View File

@ -23,7 +23,7 @@ pub async fn run(mut args: env::Args) {
let mut manga_info = match utils::get_manga(reqwest::Client::new(), &manga_id).await.unwrap() { let mut manga_info = match utils::get_manga(reqwest::Client::new(), &manga_id).await.unwrap() {
structs::MangaOption::Manga(manga_info) => manga_info, structs::MangaOption::Manga(manga_info) => manga_info,
structs::MangaOption::Redirect(_) => panic!("Nested redirect"), structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => { structs::MangaOption::DoesNotExist => {
eprintln!("ID: {}\nError: does not exist", &manga_id); eprintln!("ID: {}\nError: does not exist", &manga_id);
exit(1); exit(1);
} }

View File

@ -33,7 +33,7 @@ pub async fn run(args: env::Args) {
println!("{}", &manga_info); println!("{}", &manga_info);
}, },
structs::MangaOption::Redirect(_) => panic!("Nested redirect"), structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => { structs::MangaOption::DoesNotExist => {
if one_done { if one_done {
eprintln!(""); eprintln!("");
} }

View File

@ -1,6 +1,8 @@
use std::fmt; use std::fmt;
use serde::Deserialize; use serde::Deserialize;
extern crate url;
extern crate serde; extern crate serde;
extern crate serde_json;
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct SearchResult { pub struct SearchResult {
@ -36,8 +38,8 @@ pub struct Manga {
pub id: String, pub id: String,
pub name: String, pub name: String,
pub authors: Vec<String>, pub authors: Vec<String>,
pub status: String, pub status: Option<String>,
pub last_updated: String, pub last_updated: Option<String>,
pub genres: Vec<String>, pub genres: Vec<String>,
pub summary: Option<String>, pub summary: Option<String>,
pub chapters: Vec<Chapter> pub chapters: Vec<Chapter>
@ -45,13 +47,21 @@ pub struct Manga {
impl fmt::Display for Manga { impl fmt::Display for Manga {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut text = format!("ID: {}\nName: {}\nStatus: {}\nLast Updated: {}\nGenres: {}\nAuthors: {}", let mut text = format!("ID: {}\nName: {}",
self.id, self.id,
self.name, self.name);
self.status, if self.status.is_some() {
self.last_updated, text.push_str(&format!("\nStatus: {}", self.status.as_ref().unwrap()));
self.genres.join(", "), }
self.authors.join(", ")); if self.last_updated.is_some() {
text.push_str(&format!("\nLast Updated: {}", self.last_updated.as_ref().unwrap()));
}
if !self.genres.is_empty() {
text.push_str(&format!("\nGenres: {}", self.genres.join(", ")));
}
if !self.authors.is_empty() {
text.push_str(&format!("\nAuthors: {}", self.authors.join(", ")));
}
if self.summary.is_some() { if self.summary.is_some() {
text.push_str(&format!("\nSummary:\n{}", self.summary.as_ref().unwrap())); text.push_str(&format!("\nSummary:\n{}", self.summary.as_ref().unwrap()));
} }
@ -70,5 +80,33 @@ pub struct Redirect {
pub enum MangaOption { pub enum MangaOption {
Manga(Manga), Manga(Manga),
Redirect(Redirect), Redirect(Redirect),
None DoesNotExist
}
#[derive(Debug)]
pub enum Error {
Reqwest(reqwest::Error),
URL(url::ParseError),
SerdeJSON(serde_json::Error),
}
impl From<reqwest::Error> for Error {
#[inline]
fn from(error: reqwest::Error) -> Error {
Error::Reqwest(error)
}
}
impl From<url::ParseError> for Error {
#[inline]
fn from(error: url::ParseError) -> Error {
Error::URL(error)
}
}
impl From<serde_json::Error> for Error {
#[inline]
fn from(error: serde_json::Error) -> Error {
Error::SerdeJSON(error)
}
} }

View File

@ -3,6 +3,7 @@ use crate::structs;
use std::io::Write; use std::io::Write;
use std::path::PathBuf; use std::path::PathBuf;
use std::fs::{create_dir, File}; use std::fs::{create_dir, File};
use url::Url;
use quick_xml::Reader; use quick_xml::Reader;
use quick_xml::events::Event; use quick_xml::events::Event;
extern crate reqwest; extern crate reqwest;
@ -41,7 +42,7 @@ fn remove_html(text: &str) -> String {
removed removed
} }
pub async fn search(client: reqwest::Client, query: &str) -> Result<Vec<structs::SearchResult>, reqwest::Error> { pub async fn search(client: reqwest::Client, query: &str) -> Result<Vec<structs::SearchResult>, structs::Error> {
let text = client.post("https://mangakakalot.com/home_json_search") let text = client.post("https://mangakakalot.com/home_json_search")
.form(&[("searchword", &generate_slug(&query))]) .form(&[("searchword", &generate_slug(&query))])
.send() .send()
@ -51,7 +52,7 @@ pub async fn search(client: reqwest::Client, query: &str) -> Result<Vec<structs:
if text.is_empty() { if text.is_empty() {
return Ok(Vec::new()); return Ok(Vec::new());
} }
let mut results: Vec<structs::SearchResult> = serde_json::from_str(&text).unwrap(); let mut results: Vec<structs::SearchResult> = serde_json::from_str(&text)?;
for i in 0..results.len() { for i in 0..results.len() {
let old_result = &results[i]; let old_result = &results[i];
results[i] = structs::SearchResult { results[i] = structs::SearchResult {
@ -67,7 +68,7 @@ pub async fn search(client: reqwest::Client, query: &str) -> Result<Vec<structs:
Ok(results) Ok(results)
} }
pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result<structs::MangaOption, reqwest::Error> { pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result<structs::MangaOption, structs::Error> {
let text = client.get(&format!("https://mangakakalot.com/manga/{}", &manga_id)) let text = client.get(&format!("https://mangakakalot.com/manga/{}", &manga_id))
.send() .send()
.await? .await?
@ -82,7 +83,7 @@ pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result<struct
.await? .await?
.text() .text()
.await?; .await?;
let resp = match reqwest::Url::parse(&redirect.url).unwrap().host_str().unwrap() { let resp = match Url::parse(&redirect.url)?.host_str().unwrap() {
"mangakakalot.com" => parse_mangakakalot_manga(&text, &manga_id), "mangakakalot.com" => parse_mangakakalot_manga(&text, &manga_id),
"manganelo.com" => parse_manganelo_manga(&text, &manga_id), "manganelo.com" => parse_manganelo_manga(&text, &manga_id),
_ => panic!("Unknown URL: {}", &redirect.url) _ => panic!("Unknown URL: {}", &redirect.url)
@ -90,10 +91,10 @@ pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result<struct
match resp { match resp {
structs::MangaOption::Manga(_) => resp, structs::MangaOption::Manga(_) => resp,
structs::MangaOption::Redirect(_) => panic!("Nested redirect"), structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => structs::MangaOption::None structs::MangaOption::DoesNotExist => structs::MangaOption::DoesNotExist
} }
}, },
structs::MangaOption::None => structs::MangaOption::None structs::MangaOption::DoesNotExist => structs::MangaOption::DoesNotExist
}) })
} }
@ -130,12 +131,19 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
b"a" => { b"a" => {
is_inside_a = true; is_inside_a = true;
if is_inside_ul { if is_inside_ul {
tmp_chapter_link = Some(e.attributes() let href = e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"href") .find(|attribute| {
.unwrap() match attribute.as_ref() {
.unwrap() Ok(attribute) => attribute.key == b"href",
.unescape_and_decode_value(&reader) Err(_) => false
.unwrap()); }
});
if href.is_some() {
match href.unwrap().unwrap().unescape_and_decode_value(&reader) {
Ok(text) => tmp_chapter_link = Some(text),
Err(_) => ()
};
}
} }
}, },
b"ul" => is_inside_ul = true, b"ul" => is_inside_ul = true,
@ -144,9 +152,16 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
b"td" => { b"td" => {
let is_table_value = e.attributes() let is_table_value = e.attributes()
.find(|attribute| { .find(|attribute| {
let attribute = attribute.as_ref().unwrap(); match attribute.as_ref() {
attribute.key == b"class" && Ok(attribute) => {
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "table-value" attribute.key == b"class" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "table-value",
Err(_) => false
}
},
Err(_) => false
}
}).is_some(); }).is_some();
if is_table_value { if is_table_value {
is_inside_td = true; is_inside_td = true;
@ -154,26 +169,39 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
}, },
b"i" => { b"i" => {
let class = e.attributes() let class = e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"class"); .find(|attribute| {
match attribute.as_ref() {
Ok(attribute) => attribute.key == b"class",
Err(_) => false
}
});
if class.is_some() { if class.is_some() {
let class_name = class.unwrap() match class.unwrap().unwrap().unescape_and_decode_value(&reader) {
.unwrap() Ok(class_name) => {
.unescape_and_decode_value(&reader) match class_name.as_str() {
.unwrap(); "info-author" => is_inside_authors = true,
match class_name.as_str() { "info-status" => is_inside_status = true,
"info-author" => is_inside_authors = true, "info-genres" => is_inside_genres = true,
"info-status" => is_inside_status = true, _ => ()
"info-genres" => is_inside_genres = true, };
_ => () },
} Err(_) => ()
};
} }
}, },
b"span" => { b"span" => {
let is_stre_value = e.attributes() let is_stre_value = e.attributes()
.find(|attribute| { .find(|attribute| {
let attribute = attribute.as_ref().unwrap(); match attribute.as_ref() {
attribute.key == b"class" && Ok(attribute) => {
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "stre-value" attribute.key == b"class" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "stre-value",
Err(_) => false
}
},
Err(_) => false
}
}).is_some(); }).is_some();
if is_stre_value { if is_stre_value {
is_inside_stre_value = true; is_inside_stre_value = true;
@ -182,9 +210,16 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
b"div" => { b"div" => {
let is_description = e.attributes() let is_description = e.attributes()
.find(|attribute| { .find(|attribute| {
let attribute = attribute.as_ref().unwrap(); match attribute.as_ref() {
attribute.key == b"class" && Ok(attribute) => {
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "panel-story-info-description" attribute.key == b"class" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "panel-story-info-description",
Err(_) => false
}
},
Err(_) => false
}
}).is_some(); }).is_some();
if is_description { if is_description {
is_inside_description = true; is_inside_description = true;
@ -223,11 +258,16 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
Some(text) => Some(text.trim().to_string()), Some(text) => Some(text.trim().to_string()),
None => None None => None
}; };
chapters.push(structs::Chapter { match tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0) {
chapter_number: tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0).unwrap().to_string(), Some(chapter_number) => {
chapter_name: chapter_name, chapters.push(structs::Chapter {
domain: "manganelo.com".to_string() chapter_number: chapter_number.to_string(),
}); chapter_name: chapter_name,
domain: "manganelo.com".to_string()
});
},
None => ()
};
tmp_chapter_link = None; tmp_chapter_link = None;
} else if text.starts_with("REDIRECT : ") { } else if text.starts_with("REDIRECT : ") {
return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() }); return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() });
@ -254,7 +294,7 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err), Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
Ok(Event::Eof) => break, Ok(Event::Eof) => break,
_ => () _ => ()
} };
buf.clear(); buf.clear();
} }
chapters.reverse(); chapters.reverse();
@ -262,8 +302,8 @@ fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
id: manga_id.to_string(), id: manga_id.to_string(),
name: name.unwrap(), name: name.unwrap(),
authors: authors, authors: authors,
status: status.unwrap(), status: status,
last_updated: last_updated.unwrap(), last_updated: last_updated,
genres: genres, genres: genres,
summary: summary, summary: summary,
chapters: chapters chapters: chapters
@ -305,9 +345,16 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
b"ul" => { b"ul" => {
let is_manga_info_text = e.attributes() let is_manga_info_text = e.attributes()
.find(|attribute| { .find(|attribute| {
let attribute = attribute.as_ref().unwrap(); match attribute.as_ref() {
attribute.key == b"class" && Ok(attribute) => {
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "manga-info-text" attribute.key == b"class" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "manga-info-text",
Err(_) => false
}
},
Err(_) => false
}
}).is_some(); }).is_some();
if is_manga_info_text { if is_manga_info_text {
is_inside_manga_info = true; is_inside_manga_info = true;
@ -315,29 +362,39 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
}, },
b"div" => { b"div" => {
let class = e.attributes() let class = e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"class");
if class.is_some() {
let class_name = class.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap();
match class_name.as_str() {
"chapter-list" => is_inside_chapter_list = true,
"row" => is_inside_row = true,
_ => ()
};
}
let id = e.attributes()
.find(|attribute| { .find(|attribute| {
match attribute.as_ref() { match attribute.as_ref() {
Ok(attribute) => attribute.key == b"id", Ok(attribute) => attribute.key == b"class",
Err(_) => false Err(_) => false
} }
}); });
if id.is_some() { if class.is_some() {
if id.unwrap().unwrap().unescape_and_decode_value(&reader).unwrap().as_str() == "noidungm" { match class.unwrap().unwrap().unescape_and_decode_value(&reader) {
is_inside_noidungm = true; Ok(class_name) => {
} match class_name.as_str() {
"chapter-list" => is_inside_chapter_list = true,
"row" => is_inside_row = true,
_ => ()
};
},
Err(_) => ()
};
}
let inside_noidungm = e.attributes()
.find(|attribute| {
match attribute.as_ref() {
Ok(attribute) => {
attribute.key == b"id" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "noidungm",
Err(_) => false
}
},
Err(_) => false
}
}).is_some();
if inside_noidungm {
is_inside_noidungm = true;
} }
}, },
b"h1" => is_inside_h1 = true, b"h1" => is_inside_h1 = true,
@ -345,12 +402,19 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
b"a" => { b"a" => {
is_inside_a = true; is_inside_a = true;
if is_inside_chapter_list { if is_inside_chapter_list {
tmp_chapter_link = Some(e.attributes() let href = e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"href") .find(|attribute| {
.unwrap() match attribute.as_ref() {
.unwrap() Ok(attribute) => attribute.key == b"href",
.unescape_and_decode_value(&reader) Err(_) => false
.unwrap()); }
});
if href.is_some() {
match href.unwrap().unwrap().unescape_and_decode_value(&reader) {
Ok(text) => tmp_chapter_link = Some(text),
Err(_) => ()
};
}
} }
}, },
b"title" => is_inside_title = true, b"title" => is_inside_title = true,
@ -376,10 +440,18 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
} else { } else {
match text.splitn(2, ' ').nth(0).unwrap() { match text.splitn(2, ' ').nth(0).unwrap() {
"Author(s)" => is_inside_authors = true, "Author(s)" => is_inside_authors = true,
"Status" => status = Some(text.splitn(3, ' ').nth(2).unwrap().to_string()), "Status" => {
match text.splitn(3, ' ').nth(2) {
Some(text) => status = Some(text.to_string()),
None => ()
};
},
"Last" => { "Last" => {
if text.starts_with("Last updated : ") { if text.starts_with("Last updated : ") {
last_updated = Some(text.splitn(4, ' ').nth(3).unwrap().to_string()); match text.splitn(4, ' ').nth(3) {
Some(text) => last_updated = Some(text.to_string()),
None => ()
};
} }
}, },
"Genres" => is_inside_genres = true, "Genres" => is_inside_genres = true,
@ -397,11 +469,16 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
Some(text) => Some(text.trim().to_string()), Some(text) => Some(text.trim().to_string()),
None => None None => None
}; };
chapters.push(structs::Chapter { match tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0) {
chapter_number: tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0).unwrap().to_string(), Some(chapter_number) => {
chapter_name: chapter_name, chapters.push(structs::Chapter {
domain: "mangakakalot.com".to_string() chapter_number: chapter_number.to_string(),
}); chapter_name: chapter_name,
domain: "mangakakalot.com".to_string()
});
},
None => ()
};
tmp_chapter_link = None; tmp_chapter_link = None;
} else if is_inside_title { } else if is_inside_title {
is_title_real = !text.is_empty(); is_title_real = !text.is_empty();
@ -435,7 +512,7 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
b"a" => is_inside_a = false, b"a" => is_inside_a = false,
b"title" => { b"title" => {
if !is_title_real { if !is_title_real {
return structs::MangaOption::None; return structs::MangaOption::DoesNotExist;
} }
}, },
_ => () _ => ()
@ -452,8 +529,8 @@ fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption
id: manga_id.to_string(), id: manga_id.to_string(),
name: name.unwrap(), name: name.unwrap(),
authors: authors, authors: authors,
status: status.unwrap(), status: status,
last_updated: last_updated.unwrap(), last_updated: last_updated,
genres: genres, genres: genres,
summary: summary, summary: summary,
chapters: chapters chapters: chapters
@ -478,28 +555,75 @@ fn parse_mangakakalot_pages(text: &str) -> Vec<String> {
let screaming_doctype = split[0].to_uppercase(); let screaming_doctype = split[0].to_uppercase();
split[0] = &screaming_doctype; split[0] = &screaming_doctype;
let text = split.join("\n"); let text = split.join("\n");
let mut is_inside_pages = false;
let mut is_inside_ads = false;
let mut pages = Vec::new(); let mut pages = Vec::new();
let mut reader = Reader::from_str(&text); let mut reader = Reader::from_str(&text);
reader.check_end_names(false); reader.check_end_names(false);
let mut buf = Vec::new(); let mut buf = Vec::new();
loop { loop {
match reader.read_event(&mut buf) { match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
if e.name() == b"div" {
if is_inside_pages {
is_inside_ads = true;
} else {
let inside_pages = e.attributes()
.find(|attribute| {
match attribute.as_ref() {
Ok(attribute) => {
attribute.key == b"id" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "vungdoc",
Err(_) => false
}
},
Err(_) => false
}
}).is_some();
if inside_pages {
is_inside_pages = true;
}
}
}
},
Ok(Event::Empty(ref e)) => { Ok(Event::Empty(ref e)) => {
if e.name() == b"img" { if e.name() == b"img" {
let mut src: Option<String> = None; let mut src: Option<String> = None;
let mut alt: Option<String> = None; let mut alt: Option<String> = None;
for attribute in e.attributes() { for attribute in e.attributes() {
let attribute = attribute.unwrap(); match attribute {
match attribute.key { Ok(attribute) => {
b"src" => src = Some(attribute.unescape_and_decode_value(&reader).unwrap()), match attribute.key {
b"alt" => alt = Some(attribute.unescape_and_decode_value(&reader).unwrap()), b"src" => {
_ => () match attribute.unescape_and_decode_value(&reader) {
Ok(src_text) => src = Some(src_text),
Err(_) => ()
};
},
b"alt" => {
match attribute.unescape_and_decode_value(&reader) {
Ok(alt_text) => alt = Some(alt_text),
Err(_) => ()
};
},
_ => ()
};
},
Err(_) => ()
}; };
} }
if src.is_some() && alt.is_some() { if src.is_some() && alt.is_some() {
if alt.unwrap().ends_with(" - Mangakakalot.com") { pages.push(src.unwrap());
pages.push(src.unwrap()); }
} }
},
Ok(Event::End(e)) => {
if e.name() == b"div" {
if is_inside_ads {
is_inside_ads = false;
} else if is_inside_pages {
is_inside_pages = false;
} }
} }
}, },
@ -517,28 +641,75 @@ fn parse_manganelo_pages(text: &str) -> Vec<String> {
let screaming_doctype = split[0].to_uppercase(); let screaming_doctype = split[0].to_uppercase();
split[0] = &screaming_doctype; split[0] = &screaming_doctype;
let text = split.join("\n"); let text = split.join("\n");
let mut is_inside_pages = false;
let mut is_inside_ads = false;
let mut pages = Vec::new(); let mut pages = Vec::new();
let mut reader = Reader::from_str(&text); let mut reader = Reader::from_str(&text);
reader.check_end_names(false); reader.check_end_names(false);
let mut buf = Vec::new(); let mut buf = Vec::new();
loop { loop {
match reader.read_event(&mut buf) { match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
if e.name() == b"div" {
if is_inside_pages {
is_inside_ads = true;
} else {
let inside_pages = e.attributes()
.find(|attribute| {
match attribute.as_ref() {
Ok(attribute) => {
attribute.key == b"class" &&
match attribute.unescape_and_decode_value(&reader) {
Ok(text) => text.as_str() == "container-chapter-reader",
Err(_) => false
}
},
Err(_) => false
}
}).is_some();
if inside_pages {
is_inside_pages = true;
}
}
}
},
Ok(Event::Empty(ref e)) => { Ok(Event::Empty(ref e)) => {
if e.name() == b"img" { if is_inside_pages && !is_inside_ads && e.name() == b"img" {
let mut src: Option<String> = None; let mut src: Option<String> = None;
let mut alt: Option<String> = None; let mut alt: Option<String> = None;
for attribute in e.attributes() { for attribute in e.attributes() {
let attribute = attribute.unwrap(); match attribute {
match attribute.key { Ok(attribute) => {
b"src" => src = Some(attribute.unescape_and_decode_value(&reader).unwrap()), match attribute.key {
b"alt" => alt = Some(attribute.unescape_and_decode_value(&reader).unwrap()), b"src" => {
_ => () match attribute.unescape_and_decode_value(&reader) {
Ok(src_text) => src = Some(src_text),
Err(_) => ()
};
},
b"alt" => {
match attribute.unescape_and_decode_value(&reader) {
Ok(alt_text) => alt = Some(alt_text),
Err(_) => ()
};
},
_ => ()
};
},
Err(_) => ()
}; };
} }
if src.is_some() && alt.is_some() { if src.is_some() && alt.is_some() {
if alt.unwrap().ends_with(" - MangaNelo.com") { pages.push(src.unwrap());
pages.push(src.unwrap()); }
} }
},
Ok(Event::End(e)) => {
if e.name() == b"div" {
if is_inside_ads {
is_inside_ads = false;
} else if is_inside_pages {
is_inside_pages = false;
} }
} }
}, },