Use a different html unescaper for descriptions

This commit is contained in:
blank X 2021-03-26 01:23:21 +07:00
parent 71727a7b6f
commit c706dce677
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
5 changed files with 1571 additions and 12 deletions

53
Cargo.lock generated
View File

@ -1,5 +1,14 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "async-stream"
version = "0.3.0"
@ -202,10 +211,11 @@ dependencies = [
[[package]]
name = "hanimers"
version = "0.1.7"
version = "0.1.8"
dependencies = [
"clap",
"quick-xml",
"regex",
"reqwest",
"serde",
"serde_json",
@ -419,6 +429,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "once_cell"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
[[package]]
name = "openssl"
version = "0.10.32"
@ -533,9 +549,9 @@ dependencies = [
[[package]]
name = "quick-xml"
version = "0.20.0"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26aab6b48e2590e4a64d1ed808749ba06257882b461d01ca71baeb747074a6dd"
checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
dependencies = [
"memchr",
]
@ -598,6 +614,24 @@ dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
@ -774,6 +808,15 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "thread_local"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"once_cell",
]
[[package]]
name = "tinyvec"
version = "1.1.1"
@ -791,9 +834,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
version = "1.2.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8190d04c665ea9e6b6a0dc45523ade572c088d2e6566244c1122671dbf4ae3a"
checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722"
dependencies = [
"autocfg",
"bytes",

View File

@ -1,6 +1,6 @@
[package]
name = "hanimers"
version = "0.1.7"
version = "0.1.8"
authors = ["blank X <theblankx@protonmail.com>"]
edition = "2018"
@ -10,9 +10,10 @@ edition = "2018"
lto = true
[dependencies]
tokio = { version = "1.2", features = ["rt"] }
tokio = { version = "1.4", features = ["rt"] }
reqwest = "0.11"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
quick-xml = "0.20"
quick-xml = "0.22"
clap = { version = "2.33", default-features = false }
regex = "1.4"

View File

@ -1,4 +1,5 @@
mod commands;
mod unescape;
mod structs;
mod utils;
use clap::{App, AppSettings, Arg, SubCommand};

View File

@ -5,6 +5,7 @@ use serde::de::{self, Visitor};
use serde::{Deserialize, Deserializer};
use quick_xml::Reader;
use quick_xml::events::Event;
use crate::unescape::unescape;
extern crate reqwest;
extern crate quick_xml;
extern crate serde_json;
@ -193,20 +194,20 @@ where
E: de::Error
{
// https://brokenco.de/2020/08/03/serde-deserialize-with-string.html
let mut to_return = String::new();
let mut reader = Reader::from_str(&value);
let mut text = String::with_capacity(value.len());
let mut reader = Reader::from_str(value);
reader.check_end_names(false);
let mut buf = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Text(e)) => to_return.push_str(&e.unescape_and_decode(&reader).map_err(de::Error::custom)?),
Ok(Event::Text(e)) => text.push_str(&unescape(reader.decode(e.escaped()).map_err(serde::de::Error::custom)?)),
Ok(Event::Eof) => break,
Err(err) => panic!("Error at position {}: {:?}", reader.buffer_position(), err),
_ => ()
};
buf.clear();
}
Ok(to_return)
Ok(text)
}
}

1513
src/unescape.rs Normal file

File diff suppressed because it is too large Load Diff