Compare commits

...

10 Commits

Author SHA1 Message Date
blankie acea5d26df
Bump version to 0.6.0 2023-10-28 13:10:28 +11:00
blankie 8f75ac0b45
Add related 2023-10-28 13:09:17 +11:00
blankie 13c365c3a7
Scrape results page instead of using the API
For some reason, some search queries fail with a "does not exist" error,
like "seven young goats". There actually was a doujin with "seven young
goats" in the title (254818), but it seems like it was deleted. Perhaps
the web frontend ignores deleted doujins, but the API doesn't and fails?
2023-10-28 12:51:18 +11:00
blankie 841d740475
Move more sauce common code into get_arg_sauces 2023-10-28 11:49:55 +11:00
blankie 1d8210967c
Unify errors into one struct 2023-10-28 11:49:35 +11:00
blankie dd62cc2c19
Add NHENTAIRS_INSECURE 2023-10-28 11:25:53 +11:00
blankie c2ea9b87b0
cargo fmt 2023-10-28 11:11:16 +11:00
blankie 9ca4dda9d9
Move utils::download_file into commands/download.rs 2023-10-28 11:09:48 +11:00
blankie bb80105b4a
Split requests into their own module 2023-10-28 11:08:10 +11:00
blankie 4769baf875
Update dependencies 2023-10-28 11:04:40 +11:00
11 changed files with 781 additions and 383 deletions

648
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package]
name = "nhentairs"
version = "0.5.6"
version = "0.6.0"
authors = ["blank X <theblankx@protonmail.com>"]
edition = "2018"
@ -13,4 +13,5 @@ lto = true
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = "0.11"
tokio = { version = "1.0", features = ["rt-multi-thread", "sync", "time"] }
tokio = { version = "1.33", features = ["rt-multi-thread", "sync", "time"] }
quick-xml = "0.31"

113
src/api.rs Normal file
View File

@ -0,0 +1,113 @@
use crate::structs;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::env;
use std::process::exit;
extern crate serde_json;
pub fn get_client() -> reqwest::Client {
let mut builder = reqwest::Client::builder();
match env::var("NHENTAIRS_INSECURE") {
Ok(val) => {
if val == "true" || val == "yes" || val == "1" {
builder = builder.danger_accept_invalid_certs(true);
}
}
Err(env::VarError::NotPresent) => {}
Err(err) => eprintln!("failed to parse NHENTAIRS_INSECURE: {err}"),
};
match builder.build() {
Ok(client) => client,
Err(err) => {
eprintln!("Failed to create reqwest client: {err}");
exit(1);
}
}
}
pub async fn get_sauce_info(
client: reqwest::Client,
sauce: i32,
) -> Result<structs::GalleryInfo, structs::Error> {
let mut url = String::from("https://nhentai.net/api/gallery/");
url.push_str(&sauce.to_string());
let resp = client.get(&url).send().await?;
Ok(serde_json::from_str(&resp.text().await?)?)
}
pub async fn get_related_galleries(
client: &reqwest::Client,
sauce: i32,
) -> Result<structs::RelatedGalleries, structs::Error> {
let mut url = String::from("https://nhentai.net/api/gallery/");
url.push_str(&sauce.to_string());
url.push_str("/related");
let resp = client.get(&url).send().await?;
Ok(serde_json::from_str(&resp.text().await?)?)
}
pub async fn get_search_info(
client: reqwest::Client,
search_query: &str,
) -> Result<Vec<structs::MiniGalleryInfo>, structs::Error> {
let resp = client
.get("https://nhentai.net/search/")
.query(&[("q", search_query)])
.send()
.await?;
let text = resp.text().await?;
let mut results = Vec::new();
let mut gallery_info = structs::MiniGalleryInfo {
id: 0,
title: "".to_string(),
};
let mut reading_gallery = false;
let mut reader = Reader::from_str(&text);
reader.trim_text(true).check_end_names(false);
loop {
match reader.read_event() {
Ok(Event::Start(e)) if e.local_name().as_ref() == "a".as_bytes() => {
let class_attribute = match e.try_get_attribute("class")? {
Some(a) => a,
None => continue,
};
if class_attribute.decode_and_unescape_value(&reader)? != "cover" {
continue;
}
let href_attribute = match e.try_get_attribute("href")? {
Some(a) => a,
None => return Err(structs::Error::Unknown("failed to find href in <a>")),
};
let href = href_attribute.decode_and_unescape_value(&reader)?;
let id_str = match href.split('/').nth(2) {
Some(i) => i,
None => return Err(structs::Error::Unknown("failed to find id in <a href>")),
};
reading_gallery = true;
gallery_info.id = id_str.parse()?;
}
Ok(Event::Text(e)) if reading_gallery => {
gallery_info.title.push_str(&e.unescape()?);
}
Ok(Event::End(e)) if reading_gallery && e.local_name().as_ref() == "a".as_bytes() => {
results.push(gallery_info);
reading_gallery = false;
gallery_info = structs::MiniGalleryInfo {
id: 0,
title: "".to_string(),
};
}
Ok(Event::Eof) => break,
// why cast? i have no idea, the compiler just doesn't see the From
Err(err) => return Err(structs::Error::QuickXML(err)),
_ => {}
};
}
Ok(results)
}

View File

@ -1,6 +1,7 @@
mod view;
mod search;
mod download;
mod related;
mod search;
mod view;
use std::env;
use std::path::Path;
@ -11,19 +12,24 @@ pub async fn run() {
let path = args.next().expect("Cannot get binary path");
let path = Path::new(&path).file_stem().unwrap().to_str().unwrap();
let operation = match args.next() {
Some(operation) => operation,
None => {
eprintln!("Missing operation, run `{} help`", path);
exit(1);
}
Some(operation) => operation,
None => {
eprintln!("Missing operation, run `{} help`", path);
exit(1);
}
};
match operation.as_str() {
"search" => search::run(args).await,
"view" | "show" | "info" => view::run(args).await,
"related" => related::run(args).await,
"download" | "dl" => download::run(args).await,
"help" => println!(r#"Usage: {} search QUERY
or {} info/view/show SAUCE [SAUCE]...
or {} download/dl SAUCE [SAUCE]..."#, path, path, path),
"help" => println!(
r#"Usage: {} search <query>
or {} info/view/show <sauce>...
or {} related <sauce>...
or {} download/dl <sauce>..."#,
path, path, path, path
),
_ => {
eprintln!("Unknown operation, run `{} help`", path);
exit(1)

View File

@ -1,37 +1,38 @@
use crate::utils;
use crate::api;
use crate::structs;
use crate::utils;
use std::env;
use std::sync::Arc;
use std::fs::File;
use std::fs::{create_dir, rename, write};
use std::io::Write;
use std::path::Path;
use std::process::exit;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::task::JoinHandle;
use tokio::time::{sleep, Duration};
use std::fs::{rename, create_dir, write};
extern crate tokio;
extern crate reqwest;
extern crate tokio;
const DOWNLOAD_WORKERS: usize = 5;
const FAIL_DOWNLOAD_WAIT_TIME: u64 = 5000;
pub async fn run(args: env::Args) {
let sauces = utils::get_arg_sauces(args).unwrap();
if sauces.len() < 1 {
eprintln!("Missing sauce(s)");
exit(1);
}
let client = reqwest::Client::new();
let sauces = utils::get_arg_sauces(args);
let client = api::get_client();
let mut pages_vec: Vec<(String, String)> = Vec::new();
{
let mut handles: Vec<JoinHandle<structs::GalleryInfoSuccess>> = Vec::with_capacity(sauces.len());
let mut handles: Vec<JoinHandle<structs::GalleryInfoSuccess>> =
Vec::with_capacity(sauces.len());
let mut sauce_info_vec: Vec<structs::GalleryInfoSuccess> = Vec::with_capacity(sauces.len());
for sauce in sauces {
let cloned_client = client.clone();
handles.push(tokio::spawn(async move {
match utils::get_sauce_info(cloned_client, sauce).await.unwrap() {
match api::get_sauce_info(cloned_client, sauce).await.unwrap() {
structs::GalleryInfo::Info(sauce_info) => sauce_info,
structs::GalleryInfo::Error(sauce_error) => panic!("{} returned: {}", sauce, sauce_error.error)
structs::GalleryInfo::Error(sauce_error) => {
panic!("{} returned: {}", sauce, sauce_error.error)
}
}
}));
}
@ -45,8 +46,8 @@ pub async fn run(args: env::Args) {
Ok(()) => write(base_path.join("info.txt"), format!("{}\n", &sauce_info)).unwrap(),
Err(err) => match err.kind() {
std::io::ErrorKind::AlreadyExists => (),
_ => panic!("Got a weird error while creating dir: {}", err)
}
_ => panic!("Got a weird error while creating dir: {}", err),
},
};
let mut page_num: i32 = 1;
for page in sauce_info.images.pages {
@ -54,7 +55,7 @@ pub async fn run(args: env::Args) {
"j" => ".jpg",
"p" => ".png",
"g" => ".gif",
_ => panic!("Unknown extension type: {}", page.t)
_ => panic!("Unknown extension type: {}", page.t),
};
let mut file_name = page_num.to_string();
file_name.push_str(file_ext);
@ -62,9 +63,10 @@ pub async fn run(args: env::Args) {
if !file_path.exists() {
pages_vec.push((
String::from(file_path.to_str().unwrap()),
format!("https://i.nhentai.net/galleries/{}/{}",
sauce_info.media_id,
file_name)
format!(
"https://i.nhentai.net/galleries/{}/{}",
sauce_info.media_id, file_name
),
));
}
page_num += 1;
@ -93,13 +95,16 @@ pub async fn run(args: env::Args) {
let mut tmp_file_path = file_path.clone();
tmp_file_path.push_str(".tmp");
loop {
match utils::download_file(cloned_client.clone(), &url, &tmp_file_path).await {
match download_file(cloned_client.clone(), &url, &tmp_file_path).await {
Ok(success) => {
if success {
break;
}
},
Err(err) => eprintln!("[DW{}] Failed to download {} due to {}, sleeping for {}ms", worker_id, file_path, err, FAIL_DOWNLOAD_WAIT_TIME)
}
Err(err) => eprintln!(
"[DW{}] Failed to download {} due to {}, sleeping for {}ms",
worker_id, file_path, err, FAIL_DOWNLOAD_WAIT_TIME
),
};
sleep(Duration::from_millis(FAIL_DOWNLOAD_WAIT_TIME)).await;
}
@ -112,3 +117,20 @@ pub async fn run(args: env::Args) {
handle.await.unwrap();
}
}
async fn download_file(
client: reqwest::Client,
url: &str,
file_name: &str,
) -> Result<bool, reqwest::Error> {
let resp = client.get(url).send().await?;
Ok(match resp.headers().get("Content-Type") {
Some(header) if header.to_str().unwrap_or_default().starts_with("image/") => {
let bytes = resp.bytes().await?;
let mut file = File::create(&file_name).unwrap();
file.write_all(&bytes).unwrap();
true
}
_ => false,
})
}

74
src/commands/related.rs Normal file
View File

@ -0,0 +1,74 @@
use crate::api;
use crate::structs;
use crate::utils;
use std::env;
use std::process::exit;
extern crate reqwest;
extern crate tokio;
pub async fn run(args: env::Args) {
let sauces = utils::get_arg_sauces(args);
let is_multi = sauces.len() > 1;
let client = api::get_client();
let mut failures = 0;
let mut one_done = false;
for sauce in sauces {
let sauce_info = api::get_related_galleries(&client, sauce).await;
match sauce_info {
Ok(structs::RelatedGalleries::Galleries(related_galleries)) => {
show_related_galleries(sauce, &related_galleries, one_done, is_multi)
}
Ok(structs::RelatedGalleries::Error(err)) => {
show_error(sauce, &err.error, one_done, true);
failures += 1;
}
Err(err) => {
show_error(sauce, &err, one_done, is_multi);
failures += 1;
}
}
one_done = true;
}
exit(failures);
}
fn show_related_galleries(
sauce: i32,
related_galleries: &structs::RelatedGalleriesSuccess,
prepend_newline: bool,
is_multi: bool,
) {
if prepend_newline {
println!("");
}
let mut prefix = "";
if is_multi {
println!("{}:", sauce);
prefix = "- ";
}
for i in &related_galleries.result {
let title = i.title.english.as_deref().or(i.title.japanese.as_deref());
println!("{}{}: {}", prefix, i.id, title.unwrap_or("<unknown title>"));
}
}
fn show_error<T: std::fmt::Display>(
sauce: i32,
error: &T,
prepend_newline: bool,
prepend_sauce: bool,
) {
if prepend_newline {
eprintln!("");
}
if !prepend_sauce {
eprintln!("{}", error);
} else {
eprintln!("{}: {}", sauce, error);
}
}

View File

@ -1,4 +1,4 @@
use crate::utils;
use crate::api;
use std::env;
use std::process::exit;
@ -14,16 +14,14 @@ pub async fn run(args: env::Args) {
eprintln!("Missing search query");
exit(1);
}
let search_info = utils::get_search_info(reqwest::Client::new(), &query).await.unwrap();
if search_info.num_pages < 1 {
let search_info = api::get_search_info(api::get_client(), &query)
.await
.unwrap();
if search_info.len() < 1 {
eprintln!("No results found");
exit(1);
}
for result in search_info.result {
let mut title = &result.title.english.unwrap_or_default();
if title == "" {
title = &result.title.japanese.as_ref().unwrap();
}
println!("{}: {}", result.id, &title);
for result in search_info {
println!("{}: {}", result.id, result.title);
}
}

View File

@ -1,24 +1,25 @@
use crate::utils;
use crate::api;
use crate::structs;
use crate::utils;
use std::env;
use std::process::exit;
use tokio::task::JoinHandle;
extern crate tokio;
extern crate reqwest;
extern crate tokio;
pub async fn run(args: env::Args) {
let sauces = utils::get_arg_sauces(args).unwrap();
if sauces.len() < 1 {
eprintln!("Missing sauce(s)");
exit(1);
}
let client = reqwest::Client::new();
let mut handles: Vec<JoinHandle<(structs::GalleryInfo, i32)>> = Vec::with_capacity(sauces.len());
let sauces = utils::get_arg_sauces(args);
let client = api::get_client();
let mut handles: Vec<JoinHandle<(structs::GalleryInfo, i32)>> =
Vec::with_capacity(sauces.len());
for sauce in sauces {
let cloned_client = client.clone();
handles.push(tokio::spawn(async move {
(utils::get_sauce_info(cloned_client, sauce).await.unwrap(), sauce)
(
api::get_sauce_info(cloned_client, sauce).await.unwrap(),
sauce,
)
}));
}
let mut fail = false;
@ -31,7 +32,7 @@ pub async fn run(args: env::Args) {
println!("");
}
println!("{}", &sauce_info);
},
}
structs::GalleryInfo::Error(sauce_error) => {
if one_done {
eprintln!("");

View File

@ -1,6 +1,7 @@
mod utils;
mod structs;
mod api;
mod commands;
mod structs;
mod utils;
extern crate tokio;
@ -8,6 +9,6 @@ fn main() {
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap()
.expect("failed to build tokio runtime")
.block_on(commands::run());
}

View File

@ -1,8 +1,14 @@
use std::fmt;
use std::marker::PhantomData;
use std::collections::BTreeMap;
use serde::de::{self, Visitor};
use serde::{Deserialize, Deserializer};
use std::collections::BTreeMap;
use std::fmt;
use std::marker::PhantomData;
use std::num::ParseIntError;
#[derive(Deserialize, Debug)]
pub struct APIError {
pub error: String,
}
#[derive(Deserialize, Debug)]
pub struct GalleryTitleInfo {
@ -15,14 +21,14 @@ pub struct GalleryTitleInfo {
pub struct GalleryImageInfo {
pub t: String,
pub w: i32,
pub h: i32
pub h: i32,
}
#[derive(Deserialize, Debug)]
pub struct GalleryImagesInfo {
pub pages: Vec<GalleryImageInfo>,
pub cover: GalleryImageInfo,
pub thumbnail: GalleryImageInfo
pub thumbnail: GalleryImageInfo,
}
#[derive(Deserialize, Debug)]
@ -31,7 +37,7 @@ pub struct GalleryTagInfo {
pub r#type: String,
pub name: String,
pub url: String,
pub count: i32
pub count: i32,
}
#[derive(Deserialize, Debug)]
@ -45,26 +51,32 @@ pub struct GalleryInfoSuccess {
pub upload_date: i32,
pub tags: Vec<GalleryTagInfo>,
pub num_pages: i32,
pub num_favorites: i32
}
#[derive(Deserialize, Debug)]
pub struct GalleryInfoError {
pub error: String
pub num_favorites: i32,
}
#[derive(Deserialize, Debug)]
#[serde(untagged)]
pub enum GalleryInfo {
Info(GalleryInfoSuccess),
Error(GalleryInfoError)
Error(APIError),
}
#[derive(Deserialize, Debug)]
pub struct SearchInfo {
pub struct RelatedGalleriesSuccess {
pub result: Vec<GalleryInfoSuccess>,
pub num_pages: i32,
pub per_page: i32
}
#[derive(Deserialize, Debug)]
#[serde(untagged)]
pub enum RelatedGalleries {
Galleries(RelatedGalleriesSuccess),
Error(APIError),
}
#[derive(Debug)]
pub struct MiniGalleryInfo {
pub id: i32,
pub title: String,
}
impl fmt::Display for GalleryInfoSuccess {
@ -96,23 +108,75 @@ impl fmt::Display for GalleryInfoSuccess {
"group" => "Groups",
"language" => "Languages",
"category" => "Categories",
_ => tag_key
_ => tag_key,
};
text.push_str(&format!("\n{}: {}", tag_key, tag_value.join(", ")));
}
text.push_str(&format!("\nPages: {}\nFavorites: {}", self.num_pages, self.num_favorites));
text.push_str(&format!(
"\nPages: {}\nFavorites: {}",
self.num_pages, self.num_favorites
));
formatter.write_str(&text)
}
}
#[derive(Debug)]
pub enum Error {
Reqwest(reqwest::Error),
SerdeJSON(serde_json::Error),
QuickXML(quick_xml::Error),
ParseInt(ParseIntError),
Unknown(&'static str),
}
impl From<reqwest::Error> for Error {
#[inline]
fn from(error: reqwest::Error) -> Error {
Error::Reqwest(error)
}
}
impl From<serde_json::Error> for Error {
#[inline]
fn from(error: serde_json::Error) -> Error {
Error::SerdeJSON(error)
}
}
impl From<quick_xml::Error> for Error {
#[inline]
fn from(error: quick_xml::Error) -> Error {
Error::QuickXML(error)
}
}
impl From<ParseIntError> for Error {
#[inline]
fn from(error: ParseIntError) -> Error {
Error::ParseInt(error)
}
}
impl fmt::Display for Error {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let str = match self {
Error::Reqwest(err) => format!("reqwest error: {}", err),
Error::SerdeJSON(err) => format!("serde_json error: {}", err),
Error::QuickXML(err) => format!("quick_xml error: {}", err),
Error::ParseInt(err) => format!("parse int error: {}", err),
Error::Unknown(err) => err.to_string(),
};
formatter.write_str(&str)
}
}
fn convert_to_i32<'de, D>(deserializer: D) -> Result<i32, D::Error>
where
D: Deserializer<'de>
D: Deserializer<'de>,
{
struct ConvertToI32<T>(PhantomData<fn() -> T>);
impl<'de> Visitor<'de> for ConvertToI32<i32>
{
impl<'de> Visitor<'de> for ConvertToI32<i32> {
type Value = i32;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
@ -121,28 +185,28 @@ where
fn visit_i8<E>(self, value: i8) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
Ok(i32::from(value))
}
fn visit_i16<E>(self, value: i16) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
Ok(i32::from(value))
}
fn visit_i32<E>(self, value: i32) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
Ok(value)
}
fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
use std::i32;
if value >= i64::from(i32::MIN) && value <= i64::from(i32::MAX) {
@ -154,21 +218,21 @@ where
fn visit_u8<E>(self, value: u8) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
Ok(i32::from(value))
}
fn visit_u16<E>(self, value: u16) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
Ok(i32::from(value))
}
fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
use std::{i32, u32};
if value <= i32::MAX as u32 {
@ -180,7 +244,7 @@ where
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
use std::{i32, u64};
if value <= i32::MAX as u64 {
@ -192,9 +256,9 @@ where
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error
E: de::Error,
{
// https://brokenco.de/2020/08/03/serde-deserialize-with-string.html
// https://brokenco.de/2020/08/03/serde-deserialize-with-string.html
value.parse::<i32>().map_err(serde::de::Error::custom)
}
}

View File

@ -1,56 +1,26 @@
use crate::structs;
use std::env;
use std::fs::File;
use std::io::Write;
extern crate serde_json;
extern crate reqwest;
pub async fn get_sauce_info(client: reqwest::Client, sauce: i32) -> Result<structs::GalleryInfo, reqwest::Error> {
let mut uri = String::from("https://nhentai.net/api/gallery/");
uri.push_str(&sauce.to_string());
let resp = client.get(&uri)
.send()
.await?;
Ok(serde_json::from_str(&resp.text().await?).unwrap())
}
use std::process::exit;
pub async fn get_search_info(client: reqwest::Client, search_query: &str) -> Result<structs::SearchInfo, reqwest::Error> {
let uri = "https://nhentai.net/api/galleries/search";
let resp = client.get(uri)
.query(&[("query", search_query)])
.send()
.await?;
Ok(serde_json::from_str(&resp.text().await?).unwrap())
}
pub async fn download_file(client: reqwest::Client, url: &str, file_name: &str) -> Result<bool, reqwest::Error> {
let resp = client.get(url)
.send()
.await?;
Ok(match resp.headers().get("Content-Type") {
Some(header) if header.to_str().unwrap_or_default().starts_with("image/") => {
let bytes = resp.bytes().await?;
let mut file = File::create(&file_name).unwrap();
file.write_all(&bytes).unwrap();
true
},
_ => false
})
}
pub fn get_arg_sauces(args: env::Args) -> Result<Vec<i32>, String> {
pub fn get_arg_sauces(args: env::Args) -> Vec<i32> {
let mut sauces: Vec<i32> = Vec::new();
for sauce in args {
let sauce: i32 = match sauce.parse() {
Ok(sauce) => sauce,
Err(_) => {
return Err(format!("{} is not a number/sauce", sauce));
}
Ok(sauce) => sauce,
Err(_) => {
eprintln!("{} is not a number/sauce", sauce);
exit(1);
}
};
if !sauces.contains(&sauce) {
sauces.push(sauce);
}
}
Ok(sauces)
if sauces.len() < 1 {
eprintln!("Missing sauce(s)");
exit(1);
}
sauces
}