Add URL utility
This commit is contained in:
parent
7c32826b66
commit
9526a0df1b
|
@ -23,7 +23,7 @@ list(APPEND FLAGS -Werror -Wall -Wextra -Wshadow -Wpedantic -Wno-gnu-anonymous-s
|
|||
add_link_options(${FLAGS})
|
||||
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp misc.cpp config.cpp servehelper.cpp pixivclient.cpp blankie/serializer.cpp blankie/escape.cpp
|
||||
add_executable(${PROJECT_NAME} main.cpp misc.cpp config.cpp servehelper.cpp pixivclient.cpp blankie/serializer.cpp blankie/escape.cpp blankie/murl.cpp
|
||||
routes/home.cpp routes/css.cpp routes/users/common.cpp routes/users/users.cpp)
|
||||
set_target_properties(${PROJECT_NAME}
|
||||
PROPERTIES
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
#include <regex>
|
||||
#include <vector>
|
||||
#include <climits>
|
||||
|
||||
#include "murl.h"
|
||||
|
||||
#define UNRESERVED "[\\w\\d\\-._~]"
|
||||
#define PCT_ENCODED "%[\\da-f]{2}"
|
||||
// A space is added to sub-delims to make it work with URLs that have a space
|
||||
#define SUB_DELIMS "[!$&'()*+,;= ]"
|
||||
|
||||
#define USERINFO "((?:" UNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|:)*)"
|
||||
// A lot looser than RFC 3986, but things will go very wrong very quickly if I comply
|
||||
#define HOST "([^/?#]+?)"
|
||||
#define PORT "(\\d*)"
|
||||
#define AUTHORITY "(?:" USERINFO "@)?" HOST "(?::" PORT ")?"
|
||||
|
||||
#define PCHAR "(?:" UNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|[:@])"
|
||||
#define SEGMENT PCHAR "*"
|
||||
#define PATH_ABEMPTY "((?:/" SEGMENT ")*)"
|
||||
|
||||
#define QUERY "((?:" PCHAR "|[/?])*)"
|
||||
// Looser than RFC 3986, but fragments might as well own everything
|
||||
#define FRAGMENT "(.*)"
|
||||
|
||||
#define HTTP_HTTPS_URL \
|
||||
"(?:(https?)?:)?(?://" AUTHORITY ")?" \
|
||||
PATH_ABEMPTY \
|
||||
"(?:\\?" QUERY ")?" \
|
||||
"(?:#" FRAGMENT ")?"
|
||||
|
||||
static std::regex url_regex(HTTP_HTTPS_URL, std::regex::icase);
|
||||
static inline int to_int(const std::string& str);
|
||||
static void handle_segment(std::vector<std::string>& segments, const std::string& str, size_t offset, size_t length);
|
||||
|
||||
namespace blankie {
|
||||
namespace murl {
|
||||
|
||||
Url::Url(const std::string& str) {
|
||||
std::smatch sm;
|
||||
if (!std::regex_match(str, sm, url_regex)) {
|
||||
throw std::invalid_argument(str + " is not a URL");
|
||||
}
|
||||
this->scheme = sm.str(1);
|
||||
this->userinfo = sm.str(2);
|
||||
this->hostname = sm.str(3);
|
||||
this->port = sm[4].length() > 0 ? to_int(sm.str(4)) : -1;
|
||||
this->path = sm.str(5);
|
||||
this->query = sm.str(6);
|
||||
this->fragment = sm.str(7);
|
||||
}
|
||||
|
||||
std::string Url::to_string() const {
|
||||
std::string str;
|
||||
if (!this->hostname.empty()) {
|
||||
if (!this->scheme.empty()) {
|
||||
str += this->scheme + ':';
|
||||
}
|
||||
str += "//";
|
||||
if (!this->userinfo.empty()) {
|
||||
str += this->userinfo + '@';
|
||||
}
|
||||
str += this->hostname;
|
||||
if (this->port != -1) {
|
||||
str += ':';
|
||||
str += std::to_string(this->port);
|
||||
}
|
||||
}
|
||||
str += this->path;
|
||||
if (!this->query.empty()) {
|
||||
str += '?';
|
||||
str += this->query;
|
||||
}
|
||||
if (!this->fragment.empty()) {
|
||||
str += '#';
|
||||
str += this->fragment;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string normalize_path(const std::string& str) {
|
||||
std::vector<std::string> segments;
|
||||
std::string res;
|
||||
size_t pos = 0;
|
||||
size_t last_pos = 0;
|
||||
bool starts_with_slash = false;
|
||||
bool ends_with_slash = false;
|
||||
|
||||
if (str.size() > 0 && str[0] == '/') {
|
||||
starts_with_slash = true;
|
||||
last_pos = pos = 1;
|
||||
}
|
||||
|
||||
while ((pos = str.find('/', pos)) != std::string::npos) {
|
||||
handle_segment(segments, str, last_pos, pos - last_pos);
|
||||
last_pos = ++pos;
|
||||
}
|
||||
if (str.size() > last_pos) {
|
||||
handle_segment(segments, str, last_pos, str.size() - last_pos);
|
||||
}
|
||||
|
||||
if (str.size() > 1 && str.back() == '/' && !segments.empty()) {
|
||||
ends_with_slash = true;
|
||||
}
|
||||
|
||||
if (starts_with_slash) {
|
||||
res += '/';
|
||||
}
|
||||
for (size_t i = 0; i < segments.size(); i++) {
|
||||
if (i != 0) {
|
||||
res += '/';
|
||||
}
|
||||
res += std::move(segments[i]);
|
||||
}
|
||||
if (ends_with_slash) {
|
||||
res += '/';
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}; // namespace murl
|
||||
}; // namespace blankie
|
||||
|
||||
static inline int to_int(const std::string& str) {
|
||||
char* endptr;
|
||||
|
||||
long res = strtol(str.c_str(), &endptr, 10);
|
||||
if (res > INT_MAX) {
|
||||
throw std::overflow_error(str + " is too big");
|
||||
} else if (res < INT_MIN) {
|
||||
throw std::underflow_error(str + " is too small");
|
||||
} else if (endptr[0] != '\0') {
|
||||
throw std::invalid_argument(str + " has trailing text");
|
||||
}
|
||||
|
||||
return static_cast<int>(res);
|
||||
}
|
||||
|
||||
static void handle_segment(std::vector<std::string>& segments, const std::string& str, size_t offset, size_t length) {
|
||||
if (length == 2 && str[offset] == '.' && str[offset + 1] == '.') {
|
||||
if (segments.empty()) {
|
||||
return;
|
||||
}
|
||||
segments.pop_back();
|
||||
} else if (length == 1 && str[offset] == '.') {
|
||||
// do nothing
|
||||
} else if (length != 0) {
|
||||
segments.push_back(str.substr(offset, length));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace blankie {
|
||||
namespace murl {
|
||||
|
||||
struct Url {
|
||||
std::string scheme;
|
||||
std::string userinfo;
|
||||
std::string hostname;
|
||||
int port; // -1 if unspecified
|
||||
std::string path;
|
||||
std::string query;
|
||||
std::string fragment;
|
||||
|
||||
Url(const std::string& str);
|
||||
|
||||
constexpr std::string get_origin() const {
|
||||
std::string res;
|
||||
if (!this->scheme.empty()) {
|
||||
res = this->scheme + "://";
|
||||
}
|
||||
res += this->hostname;
|
||||
if (this->port != -1) {
|
||||
res += ':';
|
||||
res += std::to_string(this->port);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
std::string normalize_path(const std::string& str);
|
||||
|
||||
}; // namespace murl
|
||||
}; // namespace blankie
|
|
@ -16,5 +16,5 @@ void from_json(const nlohmann::json& j, Config& config) {
|
|||
if (config.bind_port < 0) {
|
||||
throw std::invalid_argument("Invalid port to bind to: "s + std::to_string(config.bind_port));
|
||||
}
|
||||
j.at("image_proxy_url").get_to(config.image_proxy_url);
|
||||
config.image_proxy_url = j.at("image_proxy_url").get<std::string>();
|
||||
}
|
||||
|
|
3
config.h
3
config.h
|
@ -2,11 +2,12 @@
|
|||
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "blankie/murl.h"
|
||||
|
||||
struct Config {
|
||||
std::string bind_host = "127.0.0.1";
|
||||
int bind_port = 8080;
|
||||
std::string image_proxy_url = "https://i.pixiv.cat";
|
||||
blankie::murl::Url image_proxy_url{"https://i.pixiv.cat"};
|
||||
};
|
||||
|
||||
Config load_config(const char* path);
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
#include <regex>
|
||||
|
||||
#include "blankie/murl.h"
|
||||
#include "pixivclient.h"
|
||||
|
||||
static inline std::optional<std::string> get_1920x960_cover_image(const std::string& thumbnail);
|
||||
static inline std::optional<std::string> get_original_cover_image(const std::string& thumbnail);
|
||||
static inline std::optional<std::string> get_original_profile_picture(const std::string& thumbnail);
|
||||
static inline std::optional<std::string> get_1920x960_cover_image(blankie::murl::Url url);
|
||||
static inline std::optional<std::string> get_original_cover_image(blankie::murl::Url url);
|
||||
static inline std::optional<std::string> get_original_profile_picture(blankie::murl::Url url);
|
||||
static inline uint64_t to_ull(const std::string& str);
|
||||
|
||||
PixivClient::PixivClient() {
|
||||
|
@ -81,43 +82,37 @@ void from_json(const nlohmann::json& j, User& user) {
|
|||
add_social_as_needed("pawoo", "Pawoo");
|
||||
}
|
||||
|
||||
static std::regex c1920x960_cover_image_thumbnail_regex(
|
||||
"((?:https?://)?(?:i\\.pximg\\.net)?)" // optional scheme and host
|
||||
"/c/(\\d+x\\d+)(.+)"
|
||||
);
|
||||
static inline std::optional<std::string> get_1920x960_cover_image(const std::string& thumbnail) {
|
||||
static std::regex resolution_path_regex("/c/(\\d+x\\d+)(.+)");
|
||||
static inline std::optional<std::string> get_1920x960_cover_image(blankie::murl::Url url) {
|
||||
std::smatch sm;
|
||||
if (!std::regex_match(thumbnail, sm, c1920x960_cover_image_thumbnail_regex)) {
|
||||
if (!std::regex_match(url.path, sm, resolution_path_regex)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (sm[2] == "1920x960") {
|
||||
if (sm[1] == "1920x960") {
|
||||
return std::nullopt;
|
||||
}
|
||||
return sm[1].str() + "/c/1920x960" + sm[3].str();
|
||||
url.path = "/c/1920x960" + sm.str(2);
|
||||
return url.to_string();
|
||||
}
|
||||
|
||||
static std::regex original_cover_image_thumbnail_regex(
|
||||
"((?:https?://)?(?:i\\.pximg\\.net)?)" // optional scheme and host
|
||||
"/c/[0-9a-z_-]+(/.+)_master\\d+(\\.\\w{3,4})"
|
||||
);
|
||||
static inline std::optional<std::string> get_original_cover_image(const std::string& thumbnail) {
|
||||
static std::regex thumbnail_path_regex("/c/[^/]+/(.+)_master\\d+(\\.\\w{3,4})?");
|
||||
static inline std::optional<std::string> get_original_cover_image(blankie::murl::Url url) {
|
||||
std::smatch sm;
|
||||
if (!std::regex_match(thumbnail, sm, original_cover_image_thumbnail_regex)) {
|
||||
if (!std::regex_match(url.path, sm, thumbnail_path_regex)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return sm[1].str() + sm[2].str() + sm[3].str();
|
||||
url.path = sm.str(1) + sm.str(2);
|
||||
return url.to_string();
|
||||
}
|
||||
|
||||
static std::regex profile_picture_thumbnail_regex(
|
||||
"((?:https?://)?(?:i\\.pximg\\.net)?)" // optional scheme and host
|
||||
"(/.+)_\\d+(\\.\\w{3,4})"
|
||||
);
|
||||
static inline std::optional<std::string> get_original_profile_picture(const std::string& thumbnail) {
|
||||
static std::regex profile_picture_thumbnail_path_regex("(/.+)_\\d{2,}(\\.\\w{3,4})");
|
||||
static inline std::optional<std::string> get_original_profile_picture(blankie::murl::Url url) {
|
||||
std::smatch sm;
|
||||
if (!std::regex_match(thumbnail, sm, profile_picture_thumbnail_regex)) {
|
||||
if (!std::regex_match(url.path, sm, profile_picture_thumbnail_path_regex)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return sm[1].str() + sm[2].str() + sm[3].str();
|
||||
url.path = sm.str(1) + sm.str(2);
|
||||
return url.to_string();
|
||||
}
|
||||
|
||||
static inline uint64_t to_ull(const std::string& str) {
|
||||
|
|
|
@ -8,20 +8,20 @@ static std::string thumbnail_or_original(const Images& images);
|
|||
static std::string original_or_thumbnail(const Images& images);
|
||||
|
||||
Element generate_user_header(const User& user, const Config& config) {
|
||||
auto proxy_url = [&](std::string url) {
|
||||
return config.image_proxy_url + remove_origin(std::move(url));
|
||||
};
|
||||
|
||||
Element header("header");
|
||||
if (user.cover_images) {
|
||||
header.nodes.push_back(Element("a", {{"href", proxy_url(original_or_thumbnail(*user.cover_images))}}, {
|
||||
Element("img", {{"class", "cover"}, {"src", proxy_url(thumbnail_or_original(*user.cover_images))}}, {})
|
||||
std::string cover_original = proxy_image_url(config, original_or_thumbnail(*user.cover_images));
|
||||
std::string cover_thumbnail = proxy_image_url(config, thumbnail_or_original(*user.cover_images));
|
||||
header.nodes.push_back(Element("a", {{"href", std::move(cover_original)}}, {
|
||||
Element("img", {{"class", "cover"}, {"src", std::move(cover_thumbnail)}}, {})
|
||||
}));
|
||||
}
|
||||
|
||||
std::string profile_picture_original = proxy_image_url(config, original_or_thumbnail(user.profile_pictures));
|
||||
std::string profile_picture_thumbnail = proxy_image_url(config, thumbnail_or_original(user.profile_pictures));
|
||||
header.nodes.push_back(Element("div", {{"class", "usermetadata"}}, {
|
||||
Element("a", {{"href", proxy_url(original_or_thumbnail(user.profile_pictures))}}, {
|
||||
Element("img", {{"class", "profilepicture"}, {"src", proxy_url(thumbnail_or_original(user.profile_pictures))}}, {})
|
||||
Element("a", {{"href", std::move(profile_picture_original)}}, {
|
||||
Element("img", {{"class", "profilepicture"}, {"src", std::move(profile_picture_thumbnail)}}, {})
|
||||
}),
|
||||
Element("div", {
|
||||
Element("p", {Element("b", {user.display_name}), " (@", user.username, ")"}),
|
||||
|
|
|
@ -3,15 +3,12 @@
|
|||
#include "config.h"
|
||||
#include "servehelper.h"
|
||||
|
||||
static inline std::string get_image_proxy_origin(const std::string& url);
|
||||
|
||||
void serve(const httplib::Request& req, httplib::Response& res, const Config& config, std::string title, Element element) {
|
||||
using namespace std::string_literals;
|
||||
|
||||
std::string origin = get_origin(req, config);
|
||||
std::string css_url = origin + "/style.css";
|
||||
std::string css_url = get_origin(req, config) + "/style.css";
|
||||
res.set_header("Content-Security-Policy", "default-src 'none'; style-src "s + css_url
|
||||
+ "; img-src " + get_image_proxy_origin(config.image_proxy_url));
|
||||
+ "; img-src " + config.image_proxy_url.get_origin());
|
||||
|
||||
Element html("html", {
|
||||
Element("head", {
|
||||
|
@ -74,29 +71,16 @@ std::string get_origin(const httplib::Request& req, const Config& config) {
|
|||
return origin;
|
||||
}
|
||||
|
||||
static std::regex remove_origin_regex(
|
||||
"(?:https?://)?" // optional schema
|
||||
"(?:.+?@)?" // optional username and pass
|
||||
"(?:[^/]+[.:][^/]+(?:\\d+)?)" // host
|
||||
"(/.*)");
|
||||
std::string remove_origin(const std::string& url) {
|
||||
std::smatch sm;
|
||||
if (!std::regex_match(url, sm, remove_origin_regex)) {
|
||||
return url;
|
||||
std::string proxy_image_url(const Config& config, blankie::murl::Url url) {
|
||||
blankie::murl::Url new_url = config.image_proxy_url;
|
||||
if (!url.path.empty() && url.path[0] != '/') {
|
||||
new_url.path += '/';
|
||||
}
|
||||
return sm[1].str();
|
||||
}
|
||||
|
||||
|
||||
static std::regex image_proxy_regex(
|
||||
"(https?://)?" // optional scheme
|
||||
"(?:.+?@)?" // optional username and pass
|
||||
"([^/]+(?::\\d+)?)" // host
|
||||
"(?:/.*)?$");
|
||||
static inline std::string get_image_proxy_origin(const std::string& url) {
|
||||
std::smatch sm;
|
||||
if (!std::regex_match(url, sm, image_proxy_regex)) {
|
||||
return url;
|
||||
new_url.path += std::move(url.path);
|
||||
if (!new_url.query.empty() && !url.query.empty()) {
|
||||
new_url.query += '&';
|
||||
new_url.query += std::move(url.query);
|
||||
}
|
||||
return sm[1].str() + sm[2].str();
|
||||
new_url.fragment = std::move(url.fragment);
|
||||
return new_url.to_string();
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <optional>
|
||||
|
||||
#include <httplib/httplib.h>
|
||||
#include "blankie/murl.h"
|
||||
#include "blankie/serializer.h"
|
||||
|
||||
struct Config; // forward declaration from config.h
|
||||
|
@ -13,4 +14,4 @@ void serve_error(const httplib::Request& req, httplib::Response& res, const Conf
|
|||
std::string title, std::optional<std::string> subtitle = std::nullopt, std::optional<std::string> info = std::nullopt);
|
||||
void serve_redirect(const httplib::Request& req, httplib::Response& res, const Config& config, std::string url);
|
||||
std::string get_origin(const httplib::Request& req, const Config& config);
|
||||
std::string remove_origin(const std::string& url);
|
||||
std::string proxy_image_url(const Config& config, blankie::murl::Url url);
|
||||
|
|
Loading…
Reference in New Issue