Scrape results page instead of using the API
For some reason, some search queries fail with a "does not exist" error, like "seven young goats". There actually was a doujin with "seven young goats" in the title (254818), but it seems like it was deleted. Perhaps the web frontend ignores deleted doujins, but the API doesn't and fails?
This commit is contained in:
parent
841d740475
commit
13c365c3a7
|
@ -420,6 +420,7 @@ dependencies = [
|
||||||
name = "nhentairs"
|
name = "nhentairs"
|
||||||
version = "0.5.6"
|
version = "0.5.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"quick-xml",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
@ -528,6 +529,15 @@ dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quick-xml"
|
||||||
|
version = "0.31.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.33"
|
version = "1.0.33"
|
||||||
|
|
|
@ -14,3 +14,4 @@ serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
reqwest = "0.11"
|
reqwest = "0.11"
|
||||||
tokio = { version = "1.33", features = ["rt-multi-thread", "sync", "time"] }
|
tokio = { version = "1.33", features = ["rt-multi-thread", "sync", "time"] }
|
||||||
|
quick-xml = "0.31"
|
||||||
|
|
63
src/api.rs
63
src/api.rs
|
@ -1,5 +1,7 @@
|
||||||
use crate::structs;
|
use crate::structs;
|
||||||
|
|
||||||
|
use quick_xml::events::Event;
|
||||||
|
use quick_xml::Reader;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
|
@ -38,12 +40,63 @@ pub async fn get_sauce_info(
|
||||||
pub async fn get_search_info(
|
pub async fn get_search_info(
|
||||||
client: reqwest::Client,
|
client: reqwest::Client,
|
||||||
search_query: &str,
|
search_query: &str,
|
||||||
) -> Result<structs::SearchInfo, structs::Error> {
|
) -> Result<Vec<structs::MiniGalleryInfo>, structs::Error> {
|
||||||
let uri = "https://nhentai.net/api/galleries/search";
|
|
||||||
let resp = client
|
let resp = client
|
||||||
.get(uri)
|
.get("https://nhentai.net/search/")
|
||||||
.query(&[("query", search_query)])
|
.query(&[("q", search_query)])
|
||||||
.send()
|
.send()
|
||||||
.await?;
|
.await?;
|
||||||
Ok(serde_json::from_str(&resp.text().await?)?)
|
let text = resp.text().await?;
|
||||||
|
let mut results = Vec::new();
|
||||||
|
let mut gallery_info = structs::MiniGalleryInfo {
|
||||||
|
id: 0,
|
||||||
|
title: "".to_string(),
|
||||||
|
};
|
||||||
|
let mut reading_gallery = false;
|
||||||
|
|
||||||
|
let mut reader = Reader::from_str(&text);
|
||||||
|
reader.trim_text(true).check_end_names(false);
|
||||||
|
loop {
|
||||||
|
match reader.read_event() {
|
||||||
|
Ok(Event::Start(e)) if e.local_name().as_ref() == "a".as_bytes() => {
|
||||||
|
let class_attribute = match e.try_get_attribute("class")? {
|
||||||
|
Some(a) => a,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
if class_attribute.decode_and_unescape_value(&reader)? != "cover" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let href_attribute = match e.try_get_attribute("href")? {
|
||||||
|
Some(a) => a,
|
||||||
|
None => return Err(structs::Error::Unknown("failed to find href in <a>")),
|
||||||
|
};
|
||||||
|
let href = href_attribute.decode_and_unescape_value(&reader)?;
|
||||||
|
let id_str = match href.split('/').nth(2) {
|
||||||
|
Some(i) => i,
|
||||||
|
None => return Err(structs::Error::Unknown("failed to find id in <a href>")),
|
||||||
|
};
|
||||||
|
|
||||||
|
reading_gallery = true;
|
||||||
|
gallery_info.id = id_str.parse()?;
|
||||||
|
}
|
||||||
|
Ok(Event::Text(e)) if reading_gallery => {
|
||||||
|
gallery_info.title.push_str(&e.unescape()?);
|
||||||
|
}
|
||||||
|
Ok(Event::End(e)) if reading_gallery && e.local_name().as_ref() == "a".as_bytes() => {
|
||||||
|
results.push(gallery_info);
|
||||||
|
reading_gallery = false;
|
||||||
|
gallery_info = structs::MiniGalleryInfo {
|
||||||
|
id: 0,
|
||||||
|
title: "".to_string(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(Event::Eof) => break,
|
||||||
|
// why cast? i have no idea, the compiler just doesn't see the From
|
||||||
|
Err(err) => return Err(structs::Error::QuickXML(err)),
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,15 +17,11 @@ pub async fn run(args: env::Args) {
|
||||||
let search_info = api::get_search_info(api::get_client(), &query)
|
let search_info = api::get_search_info(api::get_client(), &query)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
if search_info.num_pages < 1 {
|
if search_info.len() < 1 {
|
||||||
eprintln!("No results found");
|
eprintln!("No results found");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
for result in search_info.result {
|
for result in search_info {
|
||||||
let mut title = &result.title.english.unwrap_or_default();
|
println!("{}: {}", result.id, result.title);
|
||||||
if title == "" {
|
|
||||||
title = &result.title.japanese.as_ref().unwrap();
|
|
||||||
}
|
|
||||||
println!("{}: {}", result.id, &title);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ use serde::{Deserialize, Deserializer};
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
use std::num::ParseIntError;
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Deserialize, Debug)]
|
||||||
pub struct GalleryTitleInfo {
|
pub struct GalleryTitleInfo {
|
||||||
|
@ -60,11 +61,10 @@ pub enum GalleryInfo {
|
||||||
Error(GalleryInfoError),
|
Error(GalleryInfoError),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Debug)]
|
||||||
pub struct SearchInfo {
|
pub struct MiniGalleryInfo {
|
||||||
pub result: Vec<GalleryInfoSuccess>,
|
pub id: i32,
|
||||||
pub num_pages: i32,
|
pub title: String,
|
||||||
pub per_page: i32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for GalleryInfoSuccess {
|
impl fmt::Display for GalleryInfoSuccess {
|
||||||
|
@ -112,6 +112,9 @@ impl fmt::Display for GalleryInfoSuccess {
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
Reqwest(reqwest::Error),
|
Reqwest(reqwest::Error),
|
||||||
SerdeJSON(serde_json::Error),
|
SerdeJSON(serde_json::Error),
|
||||||
|
QuickXML(quick_xml::Error),
|
||||||
|
ParseInt(ParseIntError),
|
||||||
|
Unknown(&'static str),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<reqwest::Error> for Error {
|
impl From<reqwest::Error> for Error {
|
||||||
|
@ -128,11 +131,28 @@ impl From<serde_json::Error> for Error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<quick_xml::Error> for Error {
|
||||||
|
#[inline]
|
||||||
|
fn from(error: quick_xml::Error) -> Error {
|
||||||
|
Error::QuickXML(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ParseIntError> for Error {
|
||||||
|
#[inline]
|
||||||
|
fn from(error: ParseIntError) -> Error {
|
||||||
|
Error::ParseInt(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for Error {
|
impl fmt::Display for Error {
|
||||||
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
let str = match self {
|
let str = match self {
|
||||||
Error::Reqwest(err) => format!("reqwest error: {}", err),
|
Error::Reqwest(err) => format!("reqwest error: {}", err),
|
||||||
Error::SerdeJSON(err) => format!("serde_json error: {}", err),
|
Error::SerdeJSON(err) => format!("serde_json error: {}", err),
|
||||||
|
Error::QuickXML(err) => format!("quick_xml error: {}", err),
|
||||||
|
Error::ParseInt(err) => format!("parse int error: {}", err),
|
||||||
|
Error::Unknown(err) => err.to_string(),
|
||||||
};
|
};
|
||||||
formatter.write_str(&str)
|
formatter.write_str(&str)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue