Scrape results page instead of using the API
For some reason, some search queries fail with a "does not exist" error, like "seven young goats". There actually was a doujin with "seven young goats" in the title (254818), but it seems like it was deleted. Perhaps the web frontend ignores deleted doujins, but the API doesn't and fails?
This commit is contained in:
parent
841d740475
commit
13c365c3a7
|
@ -420,6 +420,7 @@ dependencies = [
|
|||
name = "nhentairs"
|
||||
version = "0.5.6"
|
||||
dependencies = [
|
||||
"quick-xml",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -528,6 +529,15 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
|
|
|
@ -14,3 +14,4 @@ serde = { version = "1.0", features = ["derive"] }
|
|||
serde_json = "1.0"
|
||||
reqwest = "0.11"
|
||||
tokio = { version = "1.33", features = ["rt-multi-thread", "sync", "time"] }
|
||||
quick-xml = "0.31"
|
||||
|
|
63
src/api.rs
63
src/api.rs
|
@ -1,5 +1,7 @@
|
|||
use crate::structs;
|
||||
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
use std::env;
|
||||
use std::process::exit;
|
||||
|
||||
|
@ -38,12 +40,63 @@ pub async fn get_sauce_info(
|
|||
pub async fn get_search_info(
|
||||
client: reqwest::Client,
|
||||
search_query: &str,
|
||||
) -> Result<structs::SearchInfo, structs::Error> {
|
||||
let uri = "https://nhentai.net/api/galleries/search";
|
||||
) -> Result<Vec<structs::MiniGalleryInfo>, structs::Error> {
|
||||
let resp = client
|
||||
.get(uri)
|
||||
.query(&[("query", search_query)])
|
||||
.get("https://nhentai.net/search/")
|
||||
.query(&[("q", search_query)])
|
||||
.send()
|
||||
.await?;
|
||||
Ok(serde_json::from_str(&resp.text().await?)?)
|
||||
let text = resp.text().await?;
|
||||
let mut results = Vec::new();
|
||||
let mut gallery_info = structs::MiniGalleryInfo {
|
||||
id: 0,
|
||||
title: "".to_string(),
|
||||
};
|
||||
let mut reading_gallery = false;
|
||||
|
||||
let mut reader = Reader::from_str(&text);
|
||||
reader.trim_text(true).check_end_names(false);
|
||||
loop {
|
||||
match reader.read_event() {
|
||||
Ok(Event::Start(e)) if e.local_name().as_ref() == "a".as_bytes() => {
|
||||
let class_attribute = match e.try_get_attribute("class")? {
|
||||
Some(a) => a,
|
||||
None => continue,
|
||||
};
|
||||
if class_attribute.decode_and_unescape_value(&reader)? != "cover" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let href_attribute = match e.try_get_attribute("href")? {
|
||||
Some(a) => a,
|
||||
None => return Err(structs::Error::Unknown("failed to find href in <a>")),
|
||||
};
|
||||
let href = href_attribute.decode_and_unescape_value(&reader)?;
|
||||
let id_str = match href.split('/').nth(2) {
|
||||
Some(i) => i,
|
||||
None => return Err(structs::Error::Unknown("failed to find id in <a href>")),
|
||||
};
|
||||
|
||||
reading_gallery = true;
|
||||
gallery_info.id = id_str.parse()?;
|
||||
}
|
||||
Ok(Event::Text(e)) if reading_gallery => {
|
||||
gallery_info.title.push_str(&e.unescape()?);
|
||||
}
|
||||
Ok(Event::End(e)) if reading_gallery && e.local_name().as_ref() == "a".as_bytes() => {
|
||||
results.push(gallery_info);
|
||||
reading_gallery = false;
|
||||
gallery_info = structs::MiniGalleryInfo {
|
||||
id: 0,
|
||||
title: "".to_string(),
|
||||
};
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
// why cast? i have no idea, the compiler just doesn't see the From
|
||||
Err(err) => return Err(structs::Error::QuickXML(err)),
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
|
|
@ -17,15 +17,11 @@ pub async fn run(args: env::Args) {
|
|||
let search_info = api::get_search_info(api::get_client(), &query)
|
||||
.await
|
||||
.unwrap();
|
||||
if search_info.num_pages < 1 {
|
||||
if search_info.len() < 1 {
|
||||
eprintln!("No results found");
|
||||
exit(1);
|
||||
}
|
||||
for result in search_info.result {
|
||||
let mut title = &result.title.english.unwrap_or_default();
|
||||
if title == "" {
|
||||
title = &result.title.japanese.as_ref().unwrap();
|
||||
}
|
||||
println!("{}: {}", result.id, &title);
|
||||
for result in search_info {
|
||||
println!("{}: {}", result.id, result.title);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use serde::{Deserialize, Deserializer};
|
|||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::marker::PhantomData;
|
||||
use std::num::ParseIntError;
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct GalleryTitleInfo {
|
||||
|
@ -60,11 +61,10 @@ pub enum GalleryInfo {
|
|||
Error(GalleryInfoError),
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct SearchInfo {
|
||||
pub result: Vec<GalleryInfoSuccess>,
|
||||
pub num_pages: i32,
|
||||
pub per_page: i32,
|
||||
#[derive(Debug)]
|
||||
pub struct MiniGalleryInfo {
|
||||
pub id: i32,
|
||||
pub title: String,
|
||||
}
|
||||
|
||||
impl fmt::Display for GalleryInfoSuccess {
|
||||
|
@ -112,6 +112,9 @@ impl fmt::Display for GalleryInfoSuccess {
|
|||
pub enum Error {
|
||||
Reqwest(reqwest::Error),
|
||||
SerdeJSON(serde_json::Error),
|
||||
QuickXML(quick_xml::Error),
|
||||
ParseInt(ParseIntError),
|
||||
Unknown(&'static str),
|
||||
}
|
||||
|
||||
impl From<reqwest::Error> for Error {
|
||||
|
@ -128,11 +131,28 @@ impl From<serde_json::Error> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<quick_xml::Error> for Error {
|
||||
#[inline]
|
||||
fn from(error: quick_xml::Error) -> Error {
|
||||
Error::QuickXML(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseIntError> for Error {
|
||||
#[inline]
|
||||
fn from(error: ParseIntError) -> Error {
|
||||
Error::ParseInt(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let str = match self {
|
||||
Error::Reqwest(err) => format!("reqwest error: {}", err),
|
||||
Error::SerdeJSON(err) => format!("serde_json error: {}", err),
|
||||
Error::QuickXML(err) => format!("quick_xml error: {}", err),
|
||||
Error::ParseInt(err) => format!("parse int error: {}", err),
|
||||
Error::Unknown(err) => err.to_string(),
|
||||
};
|
||||
formatter.write_str(&str)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue