use crate::structs; use std::io::Write; use std::path::PathBuf; use std::fs::{create_dir, File}; use url::Url; use quick_xml::Reader; use quick_xml::events::Event; extern crate reqwest; extern crate serde_json; fn generate_slug(text: &str) -> String { let mut text = text.to_string() .to_lowercase() .replace(&['à', 'á', 'ạ', 'ả', 'ã', 'â', 'ầ', 'ấ', 'ậ', 'ẩ', 'ẫ', 'ă', 'ằ', 'ắ', 'ặ', 'ẳ', 'ẵ'][..], "a") .replace(&['è', 'é', 'ẹ', 'ẻ', 'ẽ', 'ê', 'ề', 'ế', 'ệ', 'ể', 'ễ'][..], "e") .replace(&['ì', 'í', 'ị', 'ỉ', 'ĩ'][..], "i") .replace(&['ò', 'ó', 'ọ', 'ỏ', 'õ', 'ô', 'ồ', 'ố', 'ộ', 'ổ', 'ỗ', 'ơ', 'ờ', 'ớ', 'ợ', 'ở', 'ỡ'][..], "o") .replace(&['ù', 'ú', 'ụ', 'ủ', 'ũ', 'ư', 'ừ', 'ứ', 'ự', 'ử', 'ữ'][..], "u") .replace(&['ỳ', 'ý', 'ỵ', 'ỷ', 'ỹ'][..], "y") .replace('đ', "d") .replace(&['!', '@', '%', '^', '*', '(', ')', '+', '=', '<', '>', '?', '/', ',', '.', ':', ';', '\'', ' ', '"', '&', '#', '[', ']', '~', '-'][..], "_"); while text.contains("__") { text = text.replace("__", "_"); } text.trim_matches('_').to_string() } fn remove_html(text: &str) -> String { let mut removed = String::new(); let mut reader = Reader::from_str(&text); let mut buf = Vec::new(); loop { match reader.read_event(&mut buf) { Ok(Event::Text(e)) => removed.push_str(&e.unescape_and_decode(&reader).unwrap()), Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err), Ok(Event::Eof) => break, _ => () }; buf.clear(); } removed } pub async fn search(client: reqwest::Client, query: &str) -> Result, structs::Error> { let text = client.post("https://mangakakalot.com/home_json_search") .form(&[("searchword", &generate_slug(&query))]) .send() .await? .text() .await?; if text.is_empty() { return Ok(Vec::new()); } let mut results: Vec = serde_json::from_str(&text)?; for i in 0..results.len() { let old_result = &results[i]; results[i] = structs::SearchResult { id: old_result.id.clone(), name: remove_html(&old_result.name), name_unsigned: old_result.name_unsigned.clone(), last_chapter: remove_html(&old_result.last_chapter), image: old_result.image.clone(), author: remove_html(&old_result.author), story_link: old_result.story_link.clone() }; } Ok(results) } pub async fn get_manga(client: reqwest::Client, manga_id: &str) -> Result { let text = client.get(&format!("https://mangakakalot.com/manga/{}", &manga_id)) .send() .await? .text() .await?; let resp = parse_mangakakalot_manga(&text, &manga_id); Ok(match resp { structs::MangaOption::Manga(_) => resp, structs::MangaOption::Redirect(redirect) => { let text = client.get(&redirect.url) .send() .await? .text() .await?; let resp = match Url::parse(&redirect.url)?.host_str().unwrap() { "mangakakalot.com" => parse_mangakakalot_manga(&text, &manga_id), "manganelo.com" => parse_manganelo_manga(&text, &manga_id), _ => panic!("Unknown URL: {}", &redirect.url) }; match resp { structs::MangaOption::Manga(_) => resp, structs::MangaOption::Redirect(_) => panic!("Nested redirect"), structs::MangaOption::DoesNotExist => structs::MangaOption::DoesNotExist } }, structs::MangaOption::DoesNotExist => structs::MangaOption::DoesNotExist }) } fn parse_manganelo_manga(text: &str, manga_id: &str) -> structs::MangaOption { let mut split: Vec<&str> = text.splitn(2, "\n").collect(); let screaming_doctype = split[0].to_uppercase(); split[0] = &screaming_doctype; let text = split.join("\n"); let mut name: Option = None; let mut status: Option = None; let mut last_updated: Option = None; let mut summary: Option = None; let mut authors = Vec::new(); let mut genres = Vec::new(); let mut chapters = Vec::new(); let mut is_inside_h1 = false; let mut is_inside_a = false; let mut is_inside_td = false; let mut is_inside_authors = false; let mut is_inside_genres = false; let mut is_inside_status = false; let mut is_inside_stre_value = false; let mut is_inside_h3 = false; let mut is_inside_ul = false; let mut is_inside_description = false; let mut tmp_chapter_link: Option = None; let mut reader = Reader::from_str(&text); reader.check_end_names(false); let mut buf = Vec::new(); loop { match reader.read_event(&mut buf) { Ok(Event::Start(ref e)) => { match e.name() { b"a" => { is_inside_a = true; if is_inside_ul { let href = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => attribute.key == b"href", Err(_) => false } }); if href.is_some() { match href.unwrap().unwrap().unescape_and_decode_value(&reader) { Ok(text) => tmp_chapter_link = Some(text), Err(_) => () }; } } }, b"ul" => is_inside_ul = true, b"h1" => is_inside_h1 = true, b"h3" => is_inside_h3 = true, b"td" => { let is_table_value = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"class" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "table-value", Err(_) => false } }, Err(_) => false } }).is_some(); if is_table_value { is_inside_td = true; } }, b"i" => { let class = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => attribute.key == b"class", Err(_) => false } }); if class.is_some() { match class.unwrap().unwrap().unescape_and_decode_value(&reader) { Ok(class_name) => { match class_name.as_str() { "info-author" => is_inside_authors = true, "info-status" => is_inside_status = true, "info-genres" => is_inside_genres = true, _ => () }; }, Err(_) => () }; } }, b"span" => { let is_stre_value = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"class" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "stre-value", Err(_) => false } }, Err(_) => false } }).is_some(); if is_stre_value { is_inside_stre_value = true; } }, b"div" => { let is_description = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"class" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "panel-story-info-description", Err(_) => false } }, Err(_) => false } }).is_some(); if is_description { is_inside_description = true; } }, _ => () }; }, Ok(Event::Text(e)) => { let text = match e.unescape_and_decode(&reader) { Ok(text) => text, Err(_) => { buf.clear(); continue; } }; let text = text.trim(); if name.is_none() && is_inside_h1 { name = Some(text.to_string()); } else if is_inside_authors && is_inside_td && is_inside_a { authors.push(text.to_string()); } else if is_inside_status && is_inside_td { status = Some(text.to_string()); } else if is_inside_genres && is_inside_td && is_inside_a { genres.push(text.to_string()); } else if last_updated.is_none() && is_inside_stre_value { last_updated = Some(text.to_string()); } else if is_inside_description && !is_inside_h3 && !text.is_empty() { if summary.is_some() { summary.as_mut().unwrap().push_str(text); } else { summary = Some(text.to_string()); } } else if is_inside_ul && is_inside_a && tmp_chapter_link.is_some() { let chapter_name = match text.splitn(2, &[':', '-'][..]).nth(1) { Some(text) => Some(text.trim().to_string()), None => None }; match tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0) { Some(chapter_number) => { chapters.push(structs::Chapter { chapter_number: chapter_number.to_string(), chapter_name: chapter_name, domain: "manganelo.com".to_string() }); }, None => () }; tmp_chapter_link = None; } else if text.starts_with("REDIRECT : ") { return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() }); } }, Ok(Event::End(e)) => { match e.name() { b"a" => is_inside_a = false, b"h1" => is_inside_h1 = false, b"h3" => is_inside_h3 = false, b"td" => { if is_inside_td { is_inside_td = false; is_inside_authors = false; is_inside_genres = false; is_inside_status = false; } }, b"div" => is_inside_description = false, b"ul" => break, _ => () }; }, Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err), Ok(Event::Eof) => break, _ => () }; buf.clear(); } chapters.reverse(); structs::MangaOption::Manga(structs::Manga { id: manga_id.to_string(), name: name.unwrap(), authors: authors, status: status, last_updated: last_updated, genres: genres, summary: summary, chapters: chapters } ) } fn parse_mangakakalot_manga(text: &str, manga_id: &str) -> structs::MangaOption { let mut split: Vec<&str> = text.splitn(2, "\n").collect(); let screaming_doctype = split[0].to_uppercase(); split[0] = &screaming_doctype; let text = split.join("\n"); let mut is_inside_title = false; let mut is_title_real = false; let mut is_inside_chapter_list = false; let mut is_inside_manga_info = false; let mut is_inside_authors = false; let mut is_inside_genres = false; let mut is_inside_a = false; let mut is_inside_row = false; let mut name: Option = None; let mut status: Option = None; let mut last_updated: Option = None; let mut summary: Option = None; let mut is_inside_noidungm = false; let mut is_inside_h1 = false; let mut is_inside_h2 = false; let mut authors = Vec::new(); let mut genres = Vec::new(); let mut chapters = Vec::new(); let mut tmp_chapter_link: Option = None; let mut reader = Reader::from_str(&text); reader.check_end_names(false); let mut buf = Vec::new(); loop { match reader.read_event(&mut buf) { Ok(Event::Start(ref e)) => { match e.name() { b"ul" => { let is_manga_info_text = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"class" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "manga-info-text", Err(_) => false } }, Err(_) => false } }).is_some(); if is_manga_info_text { is_inside_manga_info = true; } }, b"div" => { let class = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => attribute.key == b"class", Err(_) => false } }); if class.is_some() { match class.unwrap().unwrap().unescape_and_decode_value(&reader) { Ok(class_name) => { match class_name.as_str() { "chapter-list" => is_inside_chapter_list = true, "row" => is_inside_row = true, _ => () }; }, Err(_) => () }; } let inside_noidungm = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"id" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "noidungm", Err(_) => false } }, Err(_) => false } }).is_some(); if inside_noidungm { is_inside_noidungm = true; } }, b"h1" => is_inside_h1 = true, b"h2" => is_inside_h2 = true, b"a" => { is_inside_a = true; if is_inside_chapter_list { let href = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => attribute.key == b"href", Err(_) => false } }); if href.is_some() { match href.unwrap().unwrap().unescape_and_decode_value(&reader) { Ok(text) => tmp_chapter_link = Some(text), Err(_) => () }; } } }, b"title" => is_inside_title = true, _ => () }; }, Ok(Event::Text(e)) => { let text = match e.unescape_and_decode(&reader) { Ok(text) => text, Err(_) => { buf.clear(); continue; } }; let text = text.trim(); if is_inside_manga_info { if is_inside_h1 { name = Some(text.to_string()); } else if is_inside_authors && is_inside_a { authors.push(text.to_string()); } else if is_inside_genres && is_inside_a { genres.push(text.to_string()); } else { match text.splitn(2, ' ').nth(0).unwrap() { "Author(s)" => is_inside_authors = true, "Status" => { match text.splitn(3, ' ').nth(2) { Some(text) => status = Some(text.to_string()), None => () }; }, "Last" => { if text.starts_with("Last updated : ") { match text.splitn(4, ' ').nth(3) { Some(text) => last_updated = Some(text.to_string()), None => () }; } }, "Genres" => is_inside_genres = true, _ => () } } } else if is_inside_noidungm && !is_inside_h2 && !text.is_empty(){ if summary.is_some() { summary.as_mut().unwrap().push_str(text); } else { summary = Some(text.to_string()); } } else if is_inside_chapter_list && is_inside_a && tmp_chapter_link.is_some() { let chapter_name = match text.splitn(2, &[':', '-'][..]).nth(1) { Some(text) => Some(text.trim().to_string()), None => None }; match tmp_chapter_link.unwrap().rsplitn(2, '_').nth(0) { Some(chapter_number) => { chapters.push(structs::Chapter { chapter_number: chapter_number.to_string(), chapter_name: chapter_name, domain: "mangakakalot.com".to_string() }); }, None => () }; tmp_chapter_link = None; } else if is_inside_title { is_title_real = !text.is_empty(); } else if text.trim().starts_with("REDIRECT : ") { return structs::MangaOption::Redirect(structs::Redirect { url: text.splitn(2, ':').nth(1).unwrap().trim().to_string() }); } }, Ok(Event::Empty(ref e)) => { if is_inside_noidungm && e.name() == b"br" && summary.is_some() { summary.as_mut().unwrap().push('\n'); } }, Ok(Event::End(e)) => { match e.name() { b"ul" => is_inside_manga_info = false, b"li" => { is_inside_authors = false; is_inside_genres = false; }, b"div" => { if is_inside_noidungm { is_inside_noidungm = false; } else if is_inside_row { is_inside_row = false; } else if is_inside_chapter_list { break; } }, b"h1" => is_inside_h1 = false, b"h2" => is_inside_h2 = false, b"a" => is_inside_a = false, b"title" => { if !is_title_real { return structs::MangaOption::DoesNotExist; } }, _ => () }; }, Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err), Ok(Event::Eof) => break, _ => () }; buf.clear(); } chapters.reverse(); structs::MangaOption::Manga(structs::Manga { id: manga_id.to_string(), name: name.unwrap(), authors: authors, status: status, last_updated: last_updated, genres: genres, summary: summary, chapters: chapters }) } pub async fn get_pages(client: reqwest::Client, chapter: &structs::Chapter, manga_id: &str) -> Result, reqwest::Error> { let text = client.get(&format!("https://{}/chapter/{}/chapter_{}", &chapter.domain, &manga_id, &chapter.chapter_number)) .send() .await? .text() .await?; Ok(match chapter.domain.as_str() { "mangakakalot.com" => parse_mangakakalot_pages(&text), "manganelo.com" => parse_manganelo_pages(&text), _ => panic!("Unknown domain: {}", &chapter.domain) }) } fn parse_mangakakalot_pages(text: &str) -> Vec { let mut split: Vec<&str> = text.splitn(2, "\n").collect(); let screaming_doctype = split[0].to_uppercase(); split[0] = &screaming_doctype; let text = split.join("\n"); let mut is_inside_pages = false; let mut is_inside_ads = false; let mut pages = Vec::new(); let mut reader = Reader::from_str(&text); reader.check_end_names(false); let mut buf = Vec::new(); loop { match reader.read_event(&mut buf) { Ok(Event::Start(ref e)) => { if e.name() == b"div" { if is_inside_pages { is_inside_ads = true; } else { let inside_pages = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"id" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "vungdoc", Err(_) => false } }, Err(_) => false } }).is_some(); if inside_pages { is_inside_pages = true; } } } }, Ok(Event::Empty(ref e)) => { if e.name() == b"img" { let mut src: Option = None; let mut alt: Option = None; for attribute in e.attributes() { match attribute { Ok(attribute) => { match attribute.key { b"src" => { match attribute.unescape_and_decode_value(&reader) { Ok(src_text) => src = Some(src_text), Err(_) => () }; }, b"alt" => { match attribute.unescape_and_decode_value(&reader) { Ok(alt_text) => alt = Some(alt_text), Err(_) => () }; }, _ => () }; }, Err(_) => () }; } if src.is_some() && alt.is_some() { pages.push(src.unwrap()); } } }, Ok(Event::End(e)) => { if e.name() == b"div" { if is_inside_ads { is_inside_ads = false; } else if is_inside_pages { is_inside_pages = false; } } }, Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err), Ok(Event::Eof) => break, _ => () } buf.clear(); } pages } fn parse_manganelo_pages(text: &str) -> Vec { let mut split: Vec<&str> = text.splitn(2, "\n").collect(); let screaming_doctype = split[0].to_uppercase(); split[0] = &screaming_doctype; let text = split.join("\n"); let mut is_inside_pages = false; let mut is_inside_ads = false; let mut pages = Vec::new(); let mut reader = Reader::from_str(&text); reader.check_end_names(false); let mut buf = Vec::new(); loop { match reader.read_event(&mut buf) { Ok(Event::Start(ref e)) => { if e.name() == b"div" { if is_inside_pages { is_inside_ads = true; } else { let inside_pages = e.attributes() .find(|attribute| { match attribute.as_ref() { Ok(attribute) => { attribute.key == b"class" && match attribute.unescape_and_decode_value(&reader) { Ok(text) => text.as_str() == "container-chapter-reader", Err(_) => false } }, Err(_) => false } }).is_some(); if inside_pages { is_inside_pages = true; } } } }, Ok(Event::Empty(ref e)) => { if is_inside_pages && !is_inside_ads && e.name() == b"img" { let mut src: Option = None; let mut alt: Option = None; for attribute in e.attributes() { match attribute { Ok(attribute) => { match attribute.key { b"src" => { match attribute.unescape_and_decode_value(&reader) { Ok(src_text) => src = Some(src_text), Err(_) => () }; }, b"alt" => { match attribute.unescape_and_decode_value(&reader) { Ok(alt_text) => alt = Some(alt_text), Err(_) => () }; }, _ => () }; }, Err(_) => () }; } if src.is_some() && alt.is_some() { pages.push(src.unwrap()); } } }, Ok(Event::End(e)) => { if e.name() == b"div" { if is_inside_ads { is_inside_ads = false; } else if is_inside_pages { is_inside_pages = false; } } }, Err(err) => panic!("Error at position {}: {}", reader.buffer_position(), err), Ok(Event::Eof) => break, _ => () } buf.clear(); } pages } pub async fn download_file(client: reqwest::Client, url: &str, file_name: &PathBuf, referer: &str) -> Result { let resp = client.get(url) .header("Referer", referer) .send() .await?; match resp.headers().get("Content-Type") { Some(header_value) => { if header_value.to_str().unwrap_or_default().starts_with("image/") { let bytes = resp.bytes().await?; if !file_name.parent().unwrap().is_dir() { create_dir(file_name.parent().unwrap())?; } let mut file = File::create(&file_name)?; file.write_all(&bytes)?; return Ok(true); } return Ok(false); }, None => Ok(false) } }