Initial release

This commit is contained in:
blank X 2021-01-09 00:29:45 +07:00
commit f6ec76baf2
12 changed files with 2041 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1037
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

17
Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "mangafetchi"
version = "0.1.0"
authors = ["blank X <theblankx@protonmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.release]
lto = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = "0.11"
quick-xml = "0.20"
tokio = { version = "1.0", features = ["rt-multi-thread", "sync"] }

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 blank X
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

35
src/commands.rs Normal file
View File

@ -0,0 +1,35 @@
mod view;
mod feed;
mod search;
mod download;
use std::env;
use std::path::Path;
use std::process::exit;
pub async fn run() {
let mut args = env::args();
let path = args.next().expect("Cannot get binary path");
let path = Path::new(&path).file_stem().unwrap().to_str().unwrap();
let operation = match args.next() {
Some(operation) => operation,
None => {
eprintln!("Missing operation, run `{} help`", path);
exit(1);
}
};
match operation.as_str() {
"search" => search::run(args).await,
"info" | "view" | "show" => view::run(args).await,
"feed" | "rss" => feed::run(args).await,
"download" | "dl" => download::run(args).await,
"help" => println!(r#"Usage: {} search <query>
or {} info/view/show <manga id> [manga id]...
or {} feed/rss <manga id>
or {} download/dl <manga id> [chapters]..."#, path, path, path, path),
_ => {
eprintln!("Unknown operation, run `{} help`", path);
exit(1)
}
};
}

100
src/commands/download.rs Normal file
View File

@ -0,0 +1,100 @@
use crate::utils;
use crate::structs;
use std::env;
use std::sync::Arc;
use std::process::exit;
use std::path::{Path, PathBuf};
use tokio::sync::Mutex;
use tokio::task::JoinHandle;
extern crate tokio;
extern crate reqwest;
const DOWNLOAD_WORKERS: usize = 5;
pub async fn run(mut args: env::Args) {
let manga_id = match args.next() {
Some(manga_id) => manga_id,
None => {
eprintln!("Missing manga id");
exit(1);
}
};
let mut chapter_numbers: Vec<_> = args.collect();
chapter_numbers.sort();
chapter_numbers.dedup();
let mut chapters: Vec<structs::Chapter> = Vec::new();
let client = reqwest::Client::new();
match utils::get_manga(client.clone(), &manga_id).await.unwrap() {
structs::MangaOption::Manga(mut manga_info) => {
if chapter_numbers.is_empty() {
chapters = manga_info.chapters;
} else {
for chapter_number in chapter_numbers {
let tmp = manga_info.chapters.iter().enumerate().find(|(_, chapter)| chapter_number.trim() == chapter.chapter_number.as_str());
if tmp.is_some() {
let (i, _) = tmp.unwrap();
chapters.push(manga_info.chapters.remove(i));
} else {
eprintln!("Chapter {} does not exist", &chapter_number);
exit(1);
}
}
}
},
structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => {
eprintln!("ID: {}\nError: does not exist", &manga_id);
exit(1);
}
};
let mutex: Arc<Mutex<Vec<(String, PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
let mut handles: Option<Vec<JoinHandle<()>>> = None;
for chapter in chapters {
let cloned_mutex = Arc::clone(&mutex);
let chapter_pages = utils::get_pages(client.clone(), &chapter, &manga_id).await.unwrap();
let mut to_extend: Vec<(String, PathBuf, String)> = Vec::new();
for url in chapter_pages {
let mut file_name = PathBuf::from(&chapter.chapter_number);
file_name.push(Path::new(reqwest::Url::parse(&url).unwrap().path()).file_name().unwrap());
if !file_name.exists() {
to_extend.push((url, file_name, chapter.domain.clone()));
}
}
if !to_extend.is_empty() {
cloned_mutex.lock().await.extend(to_extend);
}
if handles.is_none() {
handles = Some(summon_handles(client.clone(), cloned_mutex).await);
}
}
for handle in handles.unwrap() {
handle.await.unwrap();
}
}
async fn summon_handles(client: reqwest::Client, mutex: Arc<Mutex<Vec<(String, PathBuf, String)>>>) -> Vec<JoinHandle<()>> {
let mut handles = Vec::with_capacity(DOWNLOAD_WORKERS);
for worker_id in 0..DOWNLOAD_WORKERS {
let tcloned_mutex = Arc::clone(&mutex);
let tcloned_client = client.clone();
handles.push(tokio::spawn(async move {
eprintln!("[DW{}] Up!", worker_id);
loop {
let cloned_mutex = Arc::clone(&tcloned_mutex);
let cloned_client = tcloned_client.clone();
let mut vec = cloned_mutex.lock().await;
if vec.is_empty() {
break;
}
let (url, file_name, referer) = vec.remove(0);
drop(vec);
eprintln!("[DW{}] Downloading {} to {}", worker_id, &url, file_name.display());
utils::download_file(cloned_client, &url, &file_name, &referer).await.unwrap();
eprintln!("[DW{}] Downloaded {} to {}", worker_id, &url, file_name.display());
}
eprintln!("[DW{}] Down!", worker_id);
}));
}
handles
}

113
src/commands/feed.rs Normal file
View File

@ -0,0 +1,113 @@
use crate::utils;
use crate::structs;
use std::env;
use std::io::Cursor;
use std::process::exit;
use quick_xml::Writer;
use quick_xml::events::{Event, BytesStart, BytesText, BytesEnd};
extern crate reqwest;
pub async fn run(mut args: env::Args) {
let manga_id = match args.next() {
Some(manga_id) => manga_id,
None => {
eprintln!("Missing manga id");
exit(1);
}
};
if args.next().is_some() {
eprintln!("Specify only one manga id");
exit(1);
}
let mut manga_info = match utils::get_manga(reqwest::Client::new(), &manga_id).await.unwrap() {
structs::MangaOption::Manga(manga_info) => manga_info,
structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => {
eprintln!("ID: {}\nError: does not exist", &manga_id);
exit(1);
}
};
manga_info.chapters.reverse();
let mut writer = Writer::new(Cursor::new(Vec::new()));
{
let mut elem = BytesStart::owned(b"rss".to_vec(), 3);
elem.push_attribute(("version", "2.0"));
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesStart::owned(b"channel".to_vec(), 7);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesStart::owned(b"title".to_vec(), 5);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesText::from_plain_str(&manga_info.name).into_owned();
writer.write_event(Event::Text(elem)).unwrap();
let elem = BytesEnd::owned(b"title".to_vec());
writer.write_event(Event::End(elem)).unwrap();
let elem = BytesStart::owned(b"link".to_vec(), 4);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesText::from_plain_str(&format!("https://mangakakalot.com/manga/{}", &manga_id)).into_owned();
writer.write_event(Event::Text(elem)).unwrap();
let elem = BytesEnd::owned(b"link".to_vec());
writer.write_event(Event::End(elem)).unwrap();
let elem = BytesStart::owned(b"description".to_vec(), 11);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesText::from_plain_str(&manga_info.summary).into_owned();
writer.write_event(Event::Text(elem)).unwrap();
let elem = BytesEnd::owned(b"description".to_vec());
writer.write_event(Event::End(elem)).unwrap();
}
for chapter in manga_info.chapters {
let link = format!("https://mangakakalot.com/chapter/{}/chapter_{}", &manga_id, chapter.chapter_number);
let elem = BytesStart::owned(b"item".to_vec(), 4);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesStart::owned(b"title".to_vec(), 5);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesText::from_plain_str(&format!("{}", &chapter)).into_owned();
writer.write_event(Event::Text(elem)).unwrap();
let elem = BytesEnd::owned(b"title".to_vec());
writer.write_event(Event::End(elem)).unwrap();
let elem = BytesStart::owned(b"link".to_vec(), 4);
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesText::from_plain_str(&link).into_owned();
writer.write_event(Event::Text(elem)).unwrap();
let elem = BytesEnd::owned(b"link".to_vec());
writer.write_event(Event::End(elem)).unwrap();
let mut elem = BytesStart::owned(b"guid".to_vec(), 4);
elem.push_attribute(("isPermaLink", "true"));
writer.write_event(Event::Start(elem)).unwrap();
let elem = BytesText::from_plain_str(&link).into_owned();
writer.write_event(Event::Text(elem)).unwrap();
let elem = BytesEnd::owned(b"guid".to_vec());
writer.write_event(Event::End(elem)).unwrap();
let elem = BytesEnd::owned(b"item".to_vec());
writer.write_event(Event::End(elem)).unwrap();
}
let elem = BytesEnd::owned(b"channel".to_vec());
writer.write_event(Event::End(elem)).unwrap();
let elem = BytesEnd::owned(b"rss".to_vec());
writer.write_event(Event::End(elem)).unwrap();
println!("{}", String::from_utf8(writer.into_inner().into_inner()).unwrap());
}

25
src/commands/search.rs Normal file
View File

@ -0,0 +1,25 @@
use crate::utils;
use std::env;
use std::process::exit;
extern crate reqwest;
pub async fn run(args: env::Args) {
let mut query = String::new();
for arg in args {
query.push_str(&format!(" {}", arg));
}
let query = query.trim();
if query.is_empty() {
eprintln!("Missing search query");
exit(1);
}
let results = utils::search(reqwest::Client::new(), &query).await.unwrap();
if results.is_empty() {
eprintln!("No results found");
exit(1);
}
for result in results {
println!("{}: {}", result.name_unsigned, result.name);
}
}

49
src/commands/view.rs Normal file
View File

@ -0,0 +1,49 @@
use crate::utils;
use crate::structs;
use std::env;
use std::process::exit;
use tokio::task::JoinHandle;
extern crate tokio;
extern crate reqwest;
pub async fn run(args: env::Args) {
let ids: Vec<String> = args.collect();
if ids.len() < 1 {
eprintln!("Missing manga id(s)");
exit(1);
}
let client = reqwest::Client::new();
let mut handles: Vec<JoinHandle<(structs::MangaOption, String)>> = Vec::with_capacity(ids.len());
for id in ids {
let cloned_client = client.clone();
handles.push(tokio::spawn(async move {
(utils::get_manga(cloned_client, &id).await.unwrap(), id)
}));
}
let mut fail = false;
let mut one_done = false;
for handle in handles {
let (manga_info, id) = handle.await.unwrap();
match manga_info {
structs::MangaOption::Manga(manga_info) => {
if one_done {
println!("");
}
println!("{}", &manga_info);
},
structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => {
if one_done {
eprintln!("");
}
eprintln!("ID: {}\nError: does not exist", id);
fail = true;
}
};
one_done = true;
}
if fail {
exit(1);
}
}

13
src/main.rs Normal file
View File

@ -0,0 +1,13 @@
mod utils;
mod structs;
mod commands;
extern crate tokio;
fn main() {
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap()
.block_on(commands::run());
}

71
src/structs.rs Normal file
View File

@ -0,0 +1,71 @@
use std::fmt;
use serde::Deserialize;
extern crate serde;
#[derive(Deserialize, Debug)]
pub struct SearchResult {
pub id: String,
pub name: String,
#[serde(rename(deserialize = "nameunsigned"))]
pub name_unsigned: String,
#[serde(rename(deserialize = "lastchapter"))]
pub last_chapter: String,
pub image: String,
pub author: String,
pub story_link: String
}
#[derive(Debug)]
pub struct Chapter {
pub chapter_number: String,
pub chapter_name: Option<String>,
pub domain: String
}
impl fmt::Display for Chapter {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut text = format!("Chapter {}", self.chapter_number);
if self.chapter_name.is_some() {
text.push_str(&format!(": {}", self.chapter_name.as_ref().unwrap()));
}
formatter.write_str(&text)
}
}
pub struct Manga {
pub id: String,
pub name: String,
pub authors: Vec<String>,
pub status: String,
pub last_updated: String,
pub genres: Vec<String>,
pub summary: String,
pub chapters: Vec<Chapter>
}
impl fmt::Display for Manga {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut text = format!("ID: {}\nName: {}\nStatus: {}\nLast Updated: {}\nGenres: {}\nAuthors: {}\nSummary:\n{}\nChapters:",
self.id,
self.name,
self.status,
self.last_updated,
self.genres.join(", "),
self.authors.join(", "),
self.summary);
for chapter in &self.chapters {
text.push_str(&format!("\n- {}", &chapter));
}
formatter.write_str(&text)
}
}
pub struct Redirect {
pub url: String
}
pub enum MangaOption {
Manga(Manga),
Redirect(Redirect),
None
}

559
src/utils.rs Normal file
View File

@ -0,0 +1,559 @@
use crate::structs;
use std::io::Write;
use std::path::PathBuf;
use std::fs::{create_dir, File};
use quick_xml::Reader;
use quick_xml::events::Event;
extern crate reqwest;
extern crate serde_json;
fn generate_slug(text: &str) -> String {
let mut text = text.to_string()
.to_lowercase()
.replace(&['à', 'á', 'ạ', 'ả', 'ã', 'â', 'ầ', 'ấ', 'ậ', 'ẩ', 'ẫ', 'ă', 'ằ', 'ắ', 'ặ', 'ẳ', 'ẵ'][..], "a")
.replace(&['è', 'é', 'ẹ', 'ẻ', 'ẽ', 'ê', 'ề', 'ế', 'ệ', 'ể', 'ễ'][..], "e")
.replace(&['ì', 'í', 'ị', 'ỉ', 'ĩ'][..], "i")
.replace(&['ò', 'ó', 'ọ', 'ỏ', 'õ', 'ô', 'ồ', 'ố', 'ộ', 'ổ', 'ỗ', 'ơ', 'ờ', 'ớ', 'ợ', 'ở', 'ỡ'][..], "o")
.replace(&['ù', 'ú', 'ụ', 'ủ', 'ũ', 'ư', 'ừ', 'ứ', 'ự', 'ử', 'ữ'][..], "u")
.replace(&['ỳ', 'ý', 'ỵ', 'ỷ', 'ỹ'][..], "y")
.replace('đ', "d")
.replace(&['!', '@', '%', '^', '*', '(', ')', '+', '=', '<', '>', '?', '/', ',', '.', ':', ';', '\'', ' ', '"', '&', '#', '[', ']', '~', '-'][..], "_");
while text.find("__").is_some() {
text = text.replace("__", "_");
}
text.trim_matches('_').to_string()
}
fn remove_html(text: &str) -> String {
let mut removed = String::new();
let mut reader = Reader::from_str(&text);
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Text(e)) => removed.push_str(&e.unescape_and_decode(&reader).unwrap()),
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => ()
};
buf.clear();
}
removed
}
pub async fn search(client: reqwest::Client, query: &str) -> Result<Vec<structs::SearchResult>, reqwest::Error> {
let text = client.post("https://mangakakalot.com/home_json_search")
.form(&[("searchword", &generate_slug(&query))])
.send()
.await?
.text()
.await?;
if text.is_empty() {
return Ok(Vec::new());
}
let mut results: Vec<structs::SearchResult> = serde_json::from_str(&text).unwrap();
for i in 0..results.len() {
let old_result = &results[i];
results[i] = structs::SearchResult {
id: old_result.id.clone(),
name: remove_html(&old_result.name),
name_unsigned: old_result.name_unsigned.clone(),
last_chapter: remove_html(&old_result.last_chapter),
image: old_result.image.clone(),
author: remove_html(&old_result.author),
story_link: old_result.story_link.clone()
};
}
Ok(results)
}
pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result<structs::MangaOption, reqwest::Error> {
let text = client.get(&format!("https://mangakakalot.com/manga/{}", &manga_id))
.send()
.await?
.text()
.await?;
let resp = parse_mangakakalot_manga(&text, &manga_id);
Ok(match resp {
structs::MangaOption::Manga(_) => resp,
structs::MangaOption::Redirect(redirect) => {
let text = client.get(&redirect.url)
.send()
.await?
.text()
.await?;
let resp = match reqwest::Url::parse(&redirect.url).unwrap().host_str().unwrap() {
"mangakakalot.com" => parse_mangakakalot_manga(&text, &manga_id),
"manganelo.com" => parse_manganelo_manga(&text, &manga_id),
_ => panic!("Unknown URL: {}", &redirect.url)
};
match resp {
structs::MangaOption::Manga(_) => resp,
structs::MangaOption::Redirect(_) => panic!("Nested redirect"),
structs::MangaOption::None => structs::MangaOption::None
}
},
structs::MangaOption::None => structs::MangaOption::None
})
}
fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption {
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
let screaming_doctype = split[0].to_uppercase();
split[0] = &screaming_doctype;
let text = split.join("\n");
let mut name: Option<String> = None;
let mut status: Option<String> = None;
let mut last_updated: Option<String> = None;
let mut summary = String::new();
let mut authors = Vec::new();
let mut genres = Vec::new();
let mut chapters = Vec::new();
let mut is_inside_h1 = false;
let mut is_inside_a = false;
let mut is_inside_td = false;
let mut is_inside_authors = false;
let mut is_inside_genres = false;
let mut is_inside_status = false;
let mut is_inside_stre_value = false;
let mut is_inside_h3 = false;
let mut is_inside_ul = false;
let mut is_inside_description = false;
let mut tmp_chapter_link: Option<String> = None;
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
b"a" => {
is_inside_a = true;
if is_inside_ul {
tmp_chapter_link = Some(e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"href")
.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap());
}
},
b"ul" => is_inside_ul = true,
b"h1" => is_inside_h1 = true,
b"h3" => is_inside_h3 = true,
b"td" => {
let is_table_value = e.attributes()
.find(|attribute| {
let attribute = attribute.as_ref().unwrap();
attribute.key == b"class" &&
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "table-value"
}).is_some();
if is_table_value {
is_inside_td = true;
}
},
b"i" => {
let class = e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"class");
if class.is_some() {
let class_name = class.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap();
match class_name.as_str() {
"info-author" => is_inside_authors = true,
"info-status" => is_inside_status = true,
"info-genres" => is_inside_genres = true,
_ => ()
}
}
},
b"span" => {
let is_stre_value = e.attributes()
.find(|attribute| {
let attribute = attribute.as_ref().unwrap();
attribute.key == b"class" &&
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "stre-value"
}).is_some();
if is_stre_value {
is_inside_stre_value = true;
}
},
b"div" => {
let is_description = e.attributes()
.find(|attribute| {
let attribute = attribute.as_ref().unwrap();
attribute.key == b"class" &&
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "panel-story-info-description"
}).is_some();
if is_description {
is_inside_description = true;
}
},
_ => ()
};
},
Ok(Event::Text(e)) => {
let text = match e.unescape_and_decode(&reader) {
Ok(text) => text,
Err(_) => {
buf.clear();
continue;
}
};
if name.is_none() && is_inside_h1 {
name = Some(text);
} else if is_inside_authors && is_inside_td && is_inside_a {
authors.push(text);
} else if is_inside_status && is_inside_td {
status = Some(text);
} else if is_inside_genres && is_inside_td && is_inside_a {
genres.push(text);
} else if last_updated.is_none() && is_inside_stre_value {
last_updated = Some(text);
} else if is_inside_description && !is_inside_h3 {
summary.push_str(text.trim());
} else if is_inside_ul && is_inside_a && tmp_chapter_link.is_some() {
let chapter_name = match text.splitn(2, &[':', '-'][..]).nth(1) {
Some(text) => Some(text.trim().to_string()),
None => None
};
chapters.push(structs::Chapter {
chapter_number: tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0).unwrap().to_string(),
chapter_name: chapter_name,
domain: "manganelo.com".to_string()
});
tmp_chapter_link = None;
} else if text.trim().starts_with("REDIRECT : ") {
return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() });
}
},
Ok(Event::End(e)) => {
match e.name() {
b"a" => is_inside_a = false,
b"h1" => is_inside_h1 = false,
b"h3" => is_inside_h3 = false,
b"td" => {
if is_inside_td {
is_inside_td = false;
is_inside_authors = false;
is_inside_genres = false;
is_inside_status = false;
}
},
b"div" => is_inside_description = false,
b"ul" => break,
_ => ()
};
},
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => ()
}
buf.clear();
}
chapters.reverse();
structs::MangaOption::Manga(structs::Manga {
id: manga_id.to_string(),
name: name.unwrap(),
authors: authors,
status: status.unwrap(),
last_updated: last_updated.unwrap(),
genres: genres,
summary: summary.trim().to_string(),
chapters: chapters
}
)
}
fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption {
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
let screaming_doctype = split[0].to_uppercase();
split[0] = &screaming_doctype;
let text = split.join("\n");
let mut is_inside_title = false;
let mut is_title_real = false;
let mut is_inside_chapter_list = false;
let mut is_inside_manga_info = false;
let mut is_inside_authors = false;
let mut is_inside_genres = false;
let mut is_inside_a = false;
let mut is_inside_row = false;
let mut name: Option<String> = None;
let mut status: Option<String> = None;
let mut last_updated: Option<String> = None;
let mut summary = String::new();
let mut is_inside_noidungm = false;
let mut is_inside_h1 = false;
let mut is_inside_h2 = false;
let mut authors = Vec::new();
let mut genres = Vec::new();
let mut chapters = Vec::new();
let mut tmp_chapter_link: Option<String> = None;
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
b"ul" => {
let is_manga_info_text = e.attributes()
.find(|attribute| {
let attribute = attribute.as_ref().unwrap();
attribute.key == b"class" &&
attribute.unescape_and_decode_value(&reader).unwrap().as_str() == "manga-info-text"
}).is_some();
if is_manga_info_text {
is_inside_manga_info = true;
}
},
b"div" => {
let class = e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"class");
if class.is_some() {
let class_name = class.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap();
match class_name.as_str() {
"chapter-list" => is_inside_chapter_list = true,
"row" => is_inside_row = true,
_ => ()
};
}
let id = e.attributes()
.find(|attribute| {
match attribute.as_ref() {
Ok(attribute) => attribute.key == b"id",
Err(_) => false
}
});
if id.is_some() {
if id.unwrap().unwrap().unescape_and_decode_value(&reader).unwrap().as_str() == "noidungm" {
is_inside_noidungm = true;
}
}
},
b"h1" => is_inside_h1 = true,
b"h2" => is_inside_h2 = true,
b"a" => {
is_inside_a = true;
if is_inside_chapter_list {
tmp_chapter_link = Some(e.attributes()
.find(|attribute| attribute.as_ref().unwrap().key == b"href")
.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap());
}
},
b"title" => is_inside_title = true,
_ => ()
};
},
Ok(Event::Text(e)) => {
let text = match e.unescape_and_decode(&reader) {
Ok(text) => text,
Err(_) => {
buf.clear();
continue;
}
};
if is_inside_manga_info {
if is_inside_h1 {
name = Some(text);
} else if is_inside_authors && is_inside_a {
authors.push(text);
} else if is_inside_genres && is_inside_a {
genres.push(text);
} else {
match text.splitn(2, ' ').nth(0).unwrap() {
"Author(s)" => is_inside_authors = true,
"Status" => status = Some(text.splitn(3, ' ').nth(2).unwrap().to_string()),
"Last" => {
if text.starts_with("Last updated : ") {
last_updated = Some(text.splitn(4, ' ').nth(3).unwrap().to_string());
}
},
"Genres" => is_inside_genres = true,
_ => ()
}
}
} else if is_inside_noidungm && !is_inside_h2 {
summary.push_str(&text.trim());
} else if is_inside_chapter_list && is_inside_a && tmp_chapter_link.is_some() {
let chapter_name = match text.splitn(2, &[':', '-'][..]).nth(1) {
Some(text) => Some(text.trim().to_string()),
None => None
};
chapters.push(structs::Chapter {
chapter_number: tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0).unwrap().to_string(),
chapter_name: chapter_name,
domain: "mangakakalot.com".to_string()
});
tmp_chapter_link = None;
} else if is_inside_title {
is_title_real = !text.trim().is_empty();
} else if text.trim().starts_with("REDIRECT : ") {
return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() });
}
},
Ok(Event::Empty(ref e)) => {
if is_inside_noidungm {
if e.name() == b"br" {
summary.push_str("\n");
}
}
},
Ok(Event::End(e)) => {
match e.name() {
b"ul" => is_inside_manga_info = false,
b"li" => {
is_inside_authors = false;
is_inside_genres = false;
},
b"div" => {
if is_inside_noidungm {
is_inside_noidungm = false;
} else if is_inside_row {
is_inside_row = false;
} else if is_inside_chapter_list {
break;
}
},
b"h1" => is_inside_h1 = false,
b"h2" => is_inside_h2 = false,
b"a" => is_inside_a = false,
b"title" => {
if !is_title_real {
return structs::MangaOption::None;
}
},
_ => ()
};
},
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => ()
};
buf.clear();
}
chapters.reverse();
structs::MangaOption::Manga(structs::Manga {
id: manga_id.to_string(),
name: name.unwrap(),
authors: authors,
status: status.unwrap(),
last_updated: last_updated.unwrap(),
genres: genres,
summary: summary.trim().to_string(),
chapters: chapters
})
}
pub async fn get_pages(client: reqwest::Client, chapter: &structs::Chapter, manga_id: &str) -> Result<Vec<String>, reqwest::Error> {
let text = client.get(&format!("https://{}/chapter/{}/chapter_{}", &chapter.domain, &manga_id, &chapter.chapter_number))
.send()
.await?
.text()
.await?;
Ok(match chapter.domain.as_str() {
"mangakakalot.com" => parse_mangakakalot_pages(&text),
"manganelo.com" => parse_manganelo_pages(&text),
_ => panic!("Unknown domain: {}", &chapter.domain)
})
}
fn parse_mangakakalot_pages(text: &str) -> Vec<String> {
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
let screaming_doctype = split[0].to_uppercase();
split[0] = &screaming_doctype;
let text = split.join("\n");
let mut pages = Vec::new();
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Empty(ref e)) => {
if e.name() == b"img" {
let mut src: Option<String> = None;
let mut alt: Option<String> = None;
for attribute in e.attributes() {
let attribute = attribute.unwrap();
match attribute.key {
b"src" => src = Some(attribute.unescape_and_decode_value(&reader).unwrap()),
b"alt" => alt = Some(attribute.unescape_and_decode_value(&reader).unwrap()),
_ => ()
};
}
if src.is_some() && alt.is_some() {
if alt.unwrap().ends_with(" - Mangakakalot.com") {
pages.push(src.unwrap());
}
}
}
},
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => ()
}
buf.clear();
}
pages
}
fn parse_manganelo_pages(text: &str) -> Vec<String> {
let mut split: Vec<&str> = text.splitn(2, "\n").collect();
let screaming_doctype = split[0].to_uppercase();
split[0] = &screaming_doctype;
let text = split.join("\n");
let mut pages = Vec::new();
let mut reader = Reader::from_str(&text);
reader.check_end_names(false);
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Empty(ref e)) => {
if e.name() == b"img" {
let mut src: Option<String> = None;
let mut alt: Option<String> = None;
for attribute in e.attributes() {
let attribute = attribute.unwrap();
match attribute.key {
b"src" => src = Some(attribute.unescape_and_decode_value(&reader).unwrap()),
b"alt" => alt = Some(attribute.unescape_and_decode_value(&reader).unwrap()),
_ => ()
};
}
if src.is_some() && alt.is_some() {
if alt.unwrap().ends_with(" - MangaNelo.com") {
pages.push(src.unwrap());
}
}
}
},
Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err),
Ok(Event::Eof) => break,
_ => ()
}
buf.clear();
}
pages
}
pub async fn download_file(client: reqwest::Client, url: &str, file_name: &PathBuf, referer: &str) -> Result<(), reqwest::Error> {
let bytes = client.get(url)
.header("Referer", referer)
.send()
.await?
.bytes()
.await?;
if !file_name.parent().unwrap().is_dir() {
create_dir(file_name.parent().unwrap()).unwrap();
}
let mut file = File::create(&file_name).unwrap();
file.write_all(&bytes).unwrap();
Ok(())
}