Use a different html unescaper for descriptions
This commit is contained in:
parent
71727a7b6f
commit
c706dce677
|
@ -1,5 +1,14 @@
|
||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-stream"
|
name = "async-stream"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
@ -202,10 +211,11 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hanimers"
|
name = "hanimers"
|
||||||
version = "0.1.7"
|
version = "0.1.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"quick-xml",
|
"quick-xml",
|
||||||
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
@ -419,6 +429,12 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl"
|
name = "openssl"
|
||||||
version = "0.10.32"
|
version = "0.10.32"
|
||||||
|
@ -533,9 +549,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quick-xml"
|
name = "quick-xml"
|
||||||
version = "0.20.0"
|
version = "0.22.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "26aab6b48e2590e4a64d1ed808749ba06257882b461d01ca71baeb747074a6dd"
|
checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
@ -598,6 +614,24 @@ dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
"thread_local",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.22"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "remove_dir_all"
|
name = "remove_dir_all"
|
||||||
version = "0.5.3"
|
version = "0.5.3"
|
||||||
|
@ -774,6 +808,15 @@ dependencies = [
|
||||||
"unicode-width",
|
"unicode-width",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thread_local"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
|
||||||
|
dependencies = [
|
||||||
|
"once_cell",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tinyvec"
|
name = "tinyvec"
|
||||||
version = "1.1.1"
|
version = "1.1.1"
|
||||||
|
@ -791,9 +834,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.2.0"
|
version = "1.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e8190d04c665ea9e6b6a0dc45523ade572c088d2e6566244c1122671dbf4ae3a"
|
checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "hanimers"
|
name = "hanimers"
|
||||||
version = "0.1.7"
|
version = "0.1.8"
|
||||||
authors = ["blank X <theblankx@protonmail.com>"]
|
authors = ["blank X <theblankx@protonmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
|
@ -10,9 +10,10 @@ edition = "2018"
|
||||||
lto = true
|
lto = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
tokio = { version = "1.2", features = ["rt"] }
|
tokio = { version = "1.4", features = ["rt"] }
|
||||||
reqwest = "0.11"
|
reqwest = "0.11"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
quick-xml = "0.20"
|
quick-xml = "0.22"
|
||||||
clap = { version = "2.33", default-features = false }
|
clap = { version = "2.33", default-features = false }
|
||||||
|
regex = "1.4"
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
mod commands;
|
mod commands;
|
||||||
|
mod unescape;
|
||||||
mod structs;
|
mod structs;
|
||||||
mod utils;
|
mod utils;
|
||||||
use clap::{App, AppSettings, Arg, SubCommand};
|
use clap::{App, AppSettings, Arg, SubCommand};
|
||||||
|
|
|
@ -5,6 +5,7 @@ use serde::de::{self, Visitor};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use quick_xml::Reader;
|
use quick_xml::Reader;
|
||||||
use quick_xml::events::Event;
|
use quick_xml::events::Event;
|
||||||
|
use crate::unescape::unescape;
|
||||||
extern crate reqwest;
|
extern crate reqwest;
|
||||||
extern crate quick_xml;
|
extern crate quick_xml;
|
||||||
extern crate serde_json;
|
extern crate serde_json;
|
||||||
|
@ -193,20 +194,20 @@ where
|
||||||
E: de::Error
|
E: de::Error
|
||||||
{
|
{
|
||||||
// https://brokenco.de/2020/08/03/serde-deserialize-with-string.html
|
// https://brokenco.de/2020/08/03/serde-deserialize-with-string.html
|
||||||
let mut to_return = String::new();
|
let mut text = String::with_capacity(value.len());
|
||||||
let mut reader = Reader::from_str(&value);
|
let mut reader = Reader::from_str(value);
|
||||||
reader.check_end_names(false);
|
reader.check_end_names(false);
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
loop {
|
loop {
|
||||||
match reader.read_event(&mut buf) {
|
match reader.read_event(&mut buf) {
|
||||||
Ok(Event::Text(e)) => to_return.push_str(&e.unescape_and_decode(&reader).map_err(de::Error::custom)?),
|
Ok(Event::Text(e)) => text.push_str(&unescape(reader.decode(e.escaped()).map_err(serde::de::Error::custom)?)),
|
||||||
Ok(Event::Eof) => break,
|
Ok(Event::Eof) => break,
|
||||||
Err(err) => panic!("Error at position {}: {:?}", reader.buffer_position(), err),
|
Err(err) => panic!("Error at position {}: {:?}", reader.buffer_position(), err),
|
||||||
_ => ()
|
_ => ()
|
||||||
};
|
};
|
||||||
buf.clear();
|
buf.clear();
|
||||||
}
|
}
|
||||||
Ok(to_return)
|
Ok(text)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue