diff --git a/CMakeLists.txt b/CMakeLists.txt index eaf8ae8..3caef15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ list(APPEND FLAGS -Werror -Wall -Wextra -Wshadow -Wpedantic -Wno-gnu-anonymous-s add_link_options(${FLAGS}) -add_executable(${PROJECT_NAME} main.cpp misc.cpp config.cpp servehelper.cpp pixivclient.cpp blankie/serializer.cpp blankie/escape.cpp +add_executable(${PROJECT_NAME} main.cpp misc.cpp config.cpp servehelper.cpp pixivclient.cpp blankie/serializer.cpp blankie/escape.cpp blankie/murl.cpp routes/home.cpp routes/css.cpp routes/users/common.cpp routes/users/users.cpp) set_target_properties(${PROJECT_NAME} PROPERTIES diff --git a/blankie/murl.cpp b/blankie/murl.cpp new file mode 100644 index 0000000..0e41a11 --- /dev/null +++ b/blankie/murl.cpp @@ -0,0 +1,151 @@ +#include +#include +#include + +#include "murl.h" + +#define UNRESERVED "[\\w\\d\\-._~]" +#define PCT_ENCODED "%[\\da-f]{2}" +// A space is added to sub-delims to make it work with URLs that have a space +#define SUB_DELIMS "[!$&'()*+,;= ]" + +#define USERINFO "((?:" UNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|:)*)" +// A lot looser than RFC 3986, but things will go very wrong very quickly if I comply +#define HOST "([^/?#]+?)" +#define PORT "(\\d*)" +#define AUTHORITY "(?:" USERINFO "@)?" HOST "(?::" PORT ")?" + +#define PCHAR "(?:" UNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|[:@])" +#define SEGMENT PCHAR "*" +#define PATH_ABEMPTY "((?:/" SEGMENT ")*)" + +#define QUERY "((?:" PCHAR "|[/?])*)" +// Looser than RFC 3986, but fragments might as well own everything +#define FRAGMENT "(.*)" + +#define HTTP_HTTPS_URL \ + "(?:(https?)?:)?(?://" AUTHORITY ")?" \ + PATH_ABEMPTY \ + "(?:\\?" QUERY ")?" \ + "(?:#" FRAGMENT ")?" + +static std::regex url_regex(HTTP_HTTPS_URL, std::regex::icase); +static inline int to_int(const std::string& str); +static void handle_segment(std::vector& segments, const std::string& str, size_t offset, size_t length); + +namespace blankie { +namespace murl { + +Url::Url(const std::string& str) { + std::smatch sm; + if (!std::regex_match(str, sm, url_regex)) { + throw std::invalid_argument(str + " is not a URL"); + } + this->scheme = sm.str(1); + this->userinfo = sm.str(2); + this->hostname = sm.str(3); + this->port = sm[4].length() > 0 ? to_int(sm.str(4)) : -1; + this->path = sm.str(5); + this->query = sm.str(6); + this->fragment = sm.str(7); +} + +std::string Url::to_string() const { + std::string str; + if (!this->hostname.empty()) { + if (!this->scheme.empty()) { + str += this->scheme + ':'; + } + str += "//"; + if (!this->userinfo.empty()) { + str += this->userinfo + '@'; + } + str += this->hostname; + if (this->port != -1) { + str += ':'; + str += std::to_string(this->port); + } + } + str += this->path; + if (!this->query.empty()) { + str += '?'; + str += this->query; + } + if (!this->fragment.empty()) { + str += '#'; + str += this->fragment; + } + return str; +} + +std::string normalize_path(const std::string& str) { + std::vector segments; + std::string res; + size_t pos = 0; + size_t last_pos = 0; + bool starts_with_slash = false; + bool ends_with_slash = false; + + if (str.size() > 0 && str[0] == '/') { + starts_with_slash = true; + last_pos = pos = 1; + } + + while ((pos = str.find('/', pos)) != std::string::npos) { + handle_segment(segments, str, last_pos, pos - last_pos); + last_pos = ++pos; + } + if (str.size() > last_pos) { + handle_segment(segments, str, last_pos, str.size() - last_pos); + } + + if (str.size() > 1 && str.back() == '/' && !segments.empty()) { + ends_with_slash = true; + } + + if (starts_with_slash) { + res += '/'; + } + for (size_t i = 0; i < segments.size(); i++) { + if (i != 0) { + res += '/'; + } + res += std::move(segments[i]); + } + if (ends_with_slash) { + res += '/'; + } + + return res; +} + +}; // namespace murl +}; // namespace blankie + +static inline int to_int(const std::string& str) { + char* endptr; + + long res = strtol(str.c_str(), &endptr, 10); + if (res > INT_MAX) { + throw std::overflow_error(str + " is too big"); + } else if (res < INT_MIN) { + throw std::underflow_error(str + " is too small"); + } else if (endptr[0] != '\0') { + throw std::invalid_argument(str + " has trailing text"); + } + + return static_cast(res); +} + +static void handle_segment(std::vector& segments, const std::string& str, size_t offset, size_t length) { + if (length == 2 && str[offset] == '.' && str[offset + 1] == '.') { + if (segments.empty()) { + return; + } + segments.pop_back(); + } else if (length == 1 && str[offset] == '.') { + // do nothing + } else if (length != 0) { + segments.push_back(str.substr(offset, length)); + } +} diff --git a/blankie/murl.h b/blankie/murl.h new file mode 100644 index 0000000..8f37c23 --- /dev/null +++ b/blankie/murl.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +namespace blankie { +namespace murl { + +struct Url { + std::string scheme; + std::string userinfo; + std::string hostname; + int port; // -1 if unspecified + std::string path; + std::string query; + std::string fragment; + + Url(const std::string& str); + + constexpr std::string get_origin() const { + std::string res; + if (!this->scheme.empty()) { + res = this->scheme + "://"; + } + res += this->hostname; + if (this->port != -1) { + res += ':'; + res += std::to_string(this->port); + } + return res; + } + + std::string to_string() const; +}; + +std::string normalize_path(const std::string& str); + +}; // namespace murl +}; // namespace blankie diff --git a/config.cpp b/config.cpp index 1346091..adc0838 100644 --- a/config.cpp +++ b/config.cpp @@ -16,5 +16,5 @@ void from_json(const nlohmann::json& j, Config& config) { if (config.bind_port < 0) { throw std::invalid_argument("Invalid port to bind to: "s + std::to_string(config.bind_port)); } - j.at("image_proxy_url").get_to(config.image_proxy_url); + config.image_proxy_url = j.at("image_proxy_url").get(); } diff --git a/config.h b/config.h index cd67f4a..25b8789 100644 --- a/config.h +++ b/config.h @@ -2,11 +2,12 @@ #include #include +#include "blankie/murl.h" struct Config { std::string bind_host = "127.0.0.1"; int bind_port = 8080; - std::string image_proxy_url = "https://i.pixiv.cat"; + blankie::murl::Url image_proxy_url{"https://i.pixiv.cat"}; }; Config load_config(const char* path); diff --git a/pixivclient.cpp b/pixivclient.cpp index 1dade6a..c2b2053 100644 --- a/pixivclient.cpp +++ b/pixivclient.cpp @@ -1,10 +1,11 @@ #include +#include "blankie/murl.h" #include "pixivclient.h" -static inline std::optional get_1920x960_cover_image(const std::string& thumbnail); -static inline std::optional get_original_cover_image(const std::string& thumbnail); -static inline std::optional get_original_profile_picture(const std::string& thumbnail); +static inline std::optional get_1920x960_cover_image(blankie::murl::Url url); +static inline std::optional get_original_cover_image(blankie::murl::Url url); +static inline std::optional get_original_profile_picture(blankie::murl::Url url); static inline uint64_t to_ull(const std::string& str); PixivClient::PixivClient() { @@ -81,43 +82,37 @@ void from_json(const nlohmann::json& j, User& user) { add_social_as_needed("pawoo", "Pawoo"); } -static std::regex c1920x960_cover_image_thumbnail_regex( - "((?:https?://)?(?:i\\.pximg\\.net)?)" // optional scheme and host - "/c/(\\d+x\\d+)(.+)" -); -static inline std::optional get_1920x960_cover_image(const std::string& thumbnail) { +static std::regex resolution_path_regex("/c/(\\d+x\\d+)(.+)"); +static inline std::optional get_1920x960_cover_image(blankie::murl::Url url) { std::smatch sm; - if (!std::regex_match(thumbnail, sm, c1920x960_cover_image_thumbnail_regex)) { + if (!std::regex_match(url.path, sm, resolution_path_regex)) { return std::nullopt; } - if (sm[2] == "1920x960") { + if (sm[1] == "1920x960") { return std::nullopt; } - return sm[1].str() + "/c/1920x960" + sm[3].str(); + url.path = "/c/1920x960" + sm.str(2); + return url.to_string(); } -static std::regex original_cover_image_thumbnail_regex( - "((?:https?://)?(?:i\\.pximg\\.net)?)" // optional scheme and host - "/c/[0-9a-z_-]+(/.+)_master\\d+(\\.\\w{3,4})" -); -static inline std::optional get_original_cover_image(const std::string& thumbnail) { +static std::regex thumbnail_path_regex("/c/[^/]+/(.+)_master\\d+(\\.\\w{3,4})?"); +static inline std::optional get_original_cover_image(blankie::murl::Url url) { std::smatch sm; - if (!std::regex_match(thumbnail, sm, original_cover_image_thumbnail_regex)) { + if (!std::regex_match(url.path, sm, thumbnail_path_regex)) { return std::nullopt; } - return sm[1].str() + sm[2].str() + sm[3].str(); + url.path = sm.str(1) + sm.str(2); + return url.to_string(); } -static std::regex profile_picture_thumbnail_regex( - "((?:https?://)?(?:i\\.pximg\\.net)?)" // optional scheme and host - "(/.+)_\\d+(\\.\\w{3,4})" -); -static inline std::optional get_original_profile_picture(const std::string& thumbnail) { +static std::regex profile_picture_thumbnail_path_regex("(/.+)_\\d{2,}(\\.\\w{3,4})"); +static inline std::optional get_original_profile_picture(blankie::murl::Url url) { std::smatch sm; - if (!std::regex_match(thumbnail, sm, profile_picture_thumbnail_regex)) { + if (!std::regex_match(url.path, sm, profile_picture_thumbnail_path_regex)) { return std::nullopt; } - return sm[1].str() + sm[2].str() + sm[3].str(); + url.path = sm.str(1) + sm.str(2); + return url.to_string(); } static inline uint64_t to_ull(const std::string& str) { diff --git a/routes/users/common.cpp b/routes/users/common.cpp index 540bdf8..450c317 100644 --- a/routes/users/common.cpp +++ b/routes/users/common.cpp @@ -8,20 +8,20 @@ static std::string thumbnail_or_original(const Images& images); static std::string original_or_thumbnail(const Images& images); Element generate_user_header(const User& user, const Config& config) { - auto proxy_url = [&](std::string url) { - return config.image_proxy_url + remove_origin(std::move(url)); - }; - Element header("header"); if (user.cover_images) { - header.nodes.push_back(Element("a", {{"href", proxy_url(original_or_thumbnail(*user.cover_images))}}, { - Element("img", {{"class", "cover"}, {"src", proxy_url(thumbnail_or_original(*user.cover_images))}}, {}) + std::string cover_original = proxy_image_url(config, original_or_thumbnail(*user.cover_images)); + std::string cover_thumbnail = proxy_image_url(config, thumbnail_or_original(*user.cover_images)); + header.nodes.push_back(Element("a", {{"href", std::move(cover_original)}}, { + Element("img", {{"class", "cover"}, {"src", std::move(cover_thumbnail)}}, {}) })); } + std::string profile_picture_original = proxy_image_url(config, original_or_thumbnail(user.profile_pictures)); + std::string profile_picture_thumbnail = proxy_image_url(config, thumbnail_or_original(user.profile_pictures)); header.nodes.push_back(Element("div", {{"class", "usermetadata"}}, { - Element("a", {{"href", proxy_url(original_or_thumbnail(user.profile_pictures))}}, { - Element("img", {{"class", "profilepicture"}, {"src", proxy_url(thumbnail_or_original(user.profile_pictures))}}, {}) + Element("a", {{"href", std::move(profile_picture_original)}}, { + Element("img", {{"class", "profilepicture"}, {"src", std::move(profile_picture_thumbnail)}}, {}) }), Element("div", { Element("p", {Element("b", {user.display_name}), " (@", user.username, ")"}), diff --git a/servehelper.cpp b/servehelper.cpp index bc0c8e0..c16f1d1 100644 --- a/servehelper.cpp +++ b/servehelper.cpp @@ -3,15 +3,12 @@ #include "config.h" #include "servehelper.h" -static inline std::string get_image_proxy_origin(const std::string& url); - void serve(const httplib::Request& req, httplib::Response& res, const Config& config, std::string title, Element element) { using namespace std::string_literals; - std::string origin = get_origin(req, config); - std::string css_url = origin + "/style.css"; + std::string css_url = get_origin(req, config) + "/style.css"; res.set_header("Content-Security-Policy", "default-src 'none'; style-src "s + css_url - + "; img-src " + get_image_proxy_origin(config.image_proxy_url)); + + "; img-src " + config.image_proxy_url.get_origin()); Element html("html", { Element("head", { @@ -74,29 +71,16 @@ std::string get_origin(const httplib::Request& req, const Config& config) { return origin; } -static std::regex remove_origin_regex( - "(?:https?://)?" // optional schema - "(?:.+?@)?" // optional username and pass - "(?:[^/]+[.:][^/]+(?:\\d+)?)" // host - "(/.*)"); -std::string remove_origin(const std::string& url) { - std::smatch sm; - if (!std::regex_match(url, sm, remove_origin_regex)) { - return url; +std::string proxy_image_url(const Config& config, blankie::murl::Url url) { + blankie::murl::Url new_url = config.image_proxy_url; + if (!url.path.empty() && url.path[0] != '/') { + new_url.path += '/'; } - return sm[1].str(); -} - - -static std::regex image_proxy_regex( - "(https?://)?" // optional scheme - "(?:.+?@)?" // optional username and pass - "([^/]+(?::\\d+)?)" // host - "(?:/.*)?$"); -static inline std::string get_image_proxy_origin(const std::string& url) { - std::smatch sm; - if (!std::regex_match(url, sm, image_proxy_regex)) { - return url; + new_url.path += std::move(url.path); + if (!new_url.query.empty() && !url.query.empty()) { + new_url.query += '&'; + new_url.query += std::move(url.query); } - return sm[1].str() + sm[2].str(); + new_url.fragment = std::move(url.fragment); + return new_url.to_string(); } diff --git a/servehelper.h b/servehelper.h index 26b7e9f..105d709 100644 --- a/servehelper.h +++ b/servehelper.h @@ -3,6 +3,7 @@ #include #include +#include "blankie/murl.h" #include "blankie/serializer.h" struct Config; // forward declaration from config.h @@ -13,4 +14,4 @@ void serve_error(const httplib::Request& req, httplib::Response& res, const Conf std::string title, std::optional subtitle = std::nullopt, std::optional info = std::nullopt); void serve_redirect(const httplib::Request& req, httplib::Response& res, const Config& config, std::string url); std::string get_origin(const httplib::Request& req, const Config& config); -std::string remove_origin(const std::string& url); +std::string proxy_image_url(const Config& config, blankie::murl::Url url);