From 79bae2c0130dbcdfa71074a4fc37a7f2cc74fdb2 Mon Sep 17 00:00:00 2001 From: blankie Date: Mon, 10 Apr 2023 00:34:13 +0700 Subject: [PATCH] Rewrite pixiv URLs to Pixwhile --- blankie/murl.cpp | 15 ++++++++++++++- blankie/murl.h | 3 +++ routes/artworks.cpp | 21 +++++++++++++-------- servehelper.cpp | 28 +++++++++++++++------------- servehelper.h | 1 + 5 files changed, 46 insertions(+), 22 deletions(-) diff --git a/blankie/murl.cpp b/blankie/murl.cpp index 7852b25..9d8ebc8 100644 --- a/blankie/murl.cpp +++ b/blankie/murl.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "murl.h" @@ -14,7 +15,7 @@ // A lot looser than RFC 3986, but things will go very wrong very quickly if I comply // Hostname does not allow for stuff like "//The", but it is not important here #define HOSTCHAR "[\\w\\d\\-.]" -#define HOST "(\\[[\\da-f:.]+\\]|" HOSTCHAR "+(?:\\." HOSTCHAR "+)+)" +#define HOST "(\\[[\\da-f:.]+\\]|localhost|" HOSTCHAR "+(?:\\." HOSTCHAR "+)+)" #define PORT "(\\d*)" #define AUTHORITY "(?:" USERINFO "@)?" HOST "(?::" PORT ")?" @@ -38,6 +39,7 @@ "(?:#" FRAGMENT ")?" static void handle_segment(std::vector& segments, const std::string& str, size_t offset, size_t length); +static std::string tolower(std::string str); namespace blankie { namespace murl { @@ -87,6 +89,10 @@ std::string Url::to_string() const { return str; } +bool Url::is_host_equal(std::string other) const { + return tolower(this->hostname) == tolower(std::move(other)); +} + std::string normalize_path(const std::string& str) { std::vector segments; std::string res; @@ -143,3 +149,10 @@ static void handle_segment(std::vector& segments, const std::string segments.push_back(str.substr(offset, length)); } } + +static std::string tolower(std::string str) { + for (size_t i = 0; i < str.size(); i++) { + str[i] = static_cast(tolower(str[i])); + } + return str; +} diff --git a/blankie/murl.h b/blankie/murl.h index 341aa65..902c3b6 100644 --- a/blankie/murl.h +++ b/blankie/murl.h @@ -32,6 +32,9 @@ struct Url { return res; } + // NOT SECURE! + bool is_host_equal(std::string other) const; + std::string to_string() const; }; diff --git a/routes/artworks.cpp b/routes/artworks.cpp index f888b05..23a220a 100644 --- a/routes/artworks.cpp +++ b/routes/artworks.cpp @@ -9,8 +9,8 @@ static inline Element generate_user_link(const httplib::Request& req, const Config& config, const Illust& illust); static inline Element generate_images(const httplib::Request& req, const Config& config, const Illust& illust); static inline Element generate_preview_images(const httplib::Request& req, const Config& config, const Illust& illust); -static inline std::vector parse_description_line(std::string str); -static inline Element generate_description(const std::string& description); +static inline std::vector parse_description_line(const httplib::Request& req, const Config& config, std::string str); +static inline Element generate_description(const httplib::Request& req, const Config& config, const std::string& description); static inline Element generate_illust_tags(const Illust& illust); static inline bool is_true(const std::string& str); @@ -45,7 +45,7 @@ void artworks_route(const httplib::Request& req, httplib::Response& res, const C Element("br") }); if (illust.comment) { - body.nodes.push_back(generate_description(*illust.comment)); + body.nodes.push_back(generate_description(req, config, *illust.comment)); } body.nodes.push_back(generate_illust_tags(illust)); body.nodes.push_back(Element("p", {time_to_string(illust.upload_time)})); @@ -112,8 +112,7 @@ static inline Element generate_preview_images(const httplib::Request& req, const return div; } -// TODO proxy urls -static inline std::vector parse_description_line(std::string str) { +static inline std::vector parse_description_line(const httplib::Request& req, const Config& config, std::string str) { std::vector nodes; std::smatch sm; @@ -121,7 +120,13 @@ static inline std::vector parse_description_line(std::strin if (sm.prefix().length()) { nodes.push_back(sm.prefix()); } - nodes.push_back(Element("a", {{"href", sm.str(0)}}, {sm.str(0)})); + + blankie::murl::Url url(sm.str(0)); + std::string url_str = url.is_host_equal("pixiv.net") || url.is_host_equal("www.pixiv.net") + ? proxy_url(get_origin(req, config), std::move(url)) + : url.to_string(); + nodes.push_back(Element("a", {{"href", url_str}}, {url_str})); + str = sm.suffix(); } if (!str.empty()) { @@ -131,7 +136,7 @@ static inline std::vector parse_description_line(std::strin return nodes; } -static inline Element generate_description(const std::string& description) { +static inline Element generate_description(const httplib::Request& req, const Config& config, const std::string& description) { Element p("p"); size_t pos = 0; size_t last_pos = 0; @@ -139,7 +144,7 @@ static inline Element generate_description(const std::string& description) { if (!p.nodes.empty()) { p.nodes.push_back(Element("br")); } - std::vector nodes = parse_description_line(std::move(str)); + std::vector nodes = parse_description_line(req, config, std::move(str)); p.nodes.insert(p.nodes.end(), nodes.begin(), nodes.end()); }; diff --git a/servehelper.cpp b/servehelper.cpp index 0ed2151..ccb5022 100644 --- a/servehelper.cpp +++ b/servehelper.cpp @@ -72,20 +72,22 @@ std::string get_origin(const httplib::Request& req, const Config& config) { return origin; } +std::string proxy_url(blankie::murl::Url base, blankie::murl::Url url) { + if (!url.path.empty() && url.path[0] != '/') { + base.path += '/'; + } + base.path += blankie::murl::normalize_path(std::move(url.path)); + if (!base.query.empty() && !url.query.empty()) { + base.query += '&'; + } + base.query += std::move(url.query); + base.fragment = std::move(url.fragment); + return base.to_string(); +} + std::string proxy_image_url(const Config& config, blankie::murl::Url url) { - if (url.hostname == "s.pximg.net") { + if (url.is_host_equal("s.pximg.net")) { return url.to_string(); } - - blankie::murl::Url new_url = config.image_proxy_url; - if (!url.path.empty() && url.path[0] != '/') { - new_url.path += '/'; - } - new_url.path += blankie::murl::normalize_path(std::move(url.path)); - if (!new_url.query.empty() && !url.query.empty()) { - new_url.query += '&'; - new_url.query += std::move(url.query); - } - new_url.fragment = std::move(url.fragment); - return new_url.to_string(); + return proxy_url(config.image_proxy_url, std::move(url)); } diff --git a/servehelper.h b/servehelper.h index 105d709..efc7a0d 100644 --- a/servehelper.h +++ b/servehelper.h @@ -14,4 +14,5 @@ void serve_error(const httplib::Request& req, httplib::Response& res, const Conf std::string title, std::optional subtitle = std::nullopt, std::optional info = std::nullopt); void serve_redirect(const httplib::Request& req, httplib::Response& res, const Config& config, std::string url); std::string get_origin(const httplib::Request& req, const Config& config); +std::string proxy_url(blankie::murl::Url base, blankie::murl::Url url); std::string proxy_image_url(const Config& config, blankie::murl::Url url);