Proxy Mastodon links to Coyote
This commit is contained in:
parent
f16f03059c
commit
462dbb1b10
|
@ -1,3 +1,6 @@
|
||||||
[submodule "thirdparty/httplib"]
|
[submodule "thirdparty/httplib"]
|
||||||
path = thirdparty/httplib
|
path = thirdparty/httplib
|
||||||
url = https://github.com/yhirose/cpp-httplib.git
|
url = https://github.com/yhirose/cpp-httplib.git
|
||||||
|
[submodule "thirdparty/lexbor"]
|
||||||
|
path = thirdparty/lexbor
|
||||||
|
url = https://github.com/lexbor/lexbor.git
|
||||||
|
|
|
@ -7,6 +7,8 @@ find_package(nlohmann_json REQUIRED)
|
||||||
find_package(CURL REQUIRED)
|
find_package(CURL REQUIRED)
|
||||||
set(HTTPLIB_REQUIRE_OPENSSL ON)
|
set(HTTPLIB_REQUIRE_OPENSSL ON)
|
||||||
add_subdirectory(thirdparty/httplib)
|
add_subdirectory(thirdparty/httplib)
|
||||||
|
set(LEXBOR_BUILD_SHARED OFF)
|
||||||
|
add_subdirectory(thirdparty/lexbor)
|
||||||
#find_package(PkgConfig REQUIRED)
|
#find_package(PkgConfig REQUIRED)
|
||||||
#pkg_check_modules(HIREDIS REQUIRED hiredis)
|
#pkg_check_modules(HIREDIS REQUIRED hiredis)
|
||||||
|
|
||||||
|
@ -36,5 +38,5 @@ set_target_properties(${PROJECT_NAME}
|
||||||
CXX_EXTENSIONS NO
|
CXX_EXTENSIONS NO
|
||||||
)
|
)
|
||||||
target_include_directories(${PROJECT_NAME} PRIVATE thirdparty ${HIREDIS_INCLUDE_DIRS})
|
target_include_directories(${PROJECT_NAME} PRIVATE thirdparty ${HIREDIS_INCLUDE_DIRS})
|
||||||
target_link_libraries(${PROJECT_NAME} PRIVATE nlohmann_json::nlohmann_json httplib::httplib CURL::libcurl ${HIREDIS_LINK_LIBRARIES})
|
target_link_libraries(${PROJECT_NAME} PRIVATE nlohmann_json::nlohmann_json CURL::libcurl httplib::httplib lexbor_static ${HIREDIS_LINK_LIBRARIES})
|
||||||
target_compile_options(${PROJECT_NAME} PRIVATE ${FLAGS})
|
target_compile_options(${PROJECT_NAME} PRIVATE ${FLAGS})
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <string>
|
||||||
|
#include <exception>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wconversion"
|
||||||
|
#include <lexbor/html/html.h>
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
namespace LXB {
|
||||||
|
|
||||||
|
class Exception : public std::exception {
|
||||||
|
public:
|
||||||
|
Exception(lxb_status_t status) {
|
||||||
|
using namespace std::string_literals;
|
||||||
|
this->_msg = "LXB Exception: "s + std::to_string(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* what() const noexcept {
|
||||||
|
return this->_msg.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string _msg;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace HTML {
|
||||||
|
|
||||||
|
class Document {
|
||||||
|
public:
|
||||||
|
Document(const Document&&) = delete;
|
||||||
|
Document&& operator=(const Document&&) = delete;
|
||||||
|
|
||||||
|
Document(const std::string& str) {
|
||||||
|
this->_document = lxb_html_document_create();
|
||||||
|
if (!this->_document) {
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
lxb_status_t status = lxb_html_document_parse(this->_document, reinterpret_cast<const lxb_char_t*>(str.data()), str.size());
|
||||||
|
if (status != LXB_STATUS_OK) {
|
||||||
|
lxb_html_document_destroy(this->_document);
|
||||||
|
throw Exception(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~Document() {
|
||||||
|
lxb_html_document_destroy(this->_document);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr lxb_dom_node_t* body() const noexcept {
|
||||||
|
lxb_dom_node_t* node = lxb_dom_interface_node(this->_document);
|
||||||
|
lxb_dom_node_t* html = lxb_dom_node_first_child(node);
|
||||||
|
lxb_dom_node_t* body = lxb_dom_node_last_child(html);
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
constexpr lxb_dom_element_t* body_element() const noexcept {
|
||||||
|
lxb_dom_node_t* body = this->body();
|
||||||
|
assert(body->type == LXB_DOM_NODE_TYPE_ELEMENT);
|
||||||
|
return lxb_dom_interface_element(body);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string serialize() const {
|
||||||
|
std::string res;
|
||||||
|
lxb_dom_node_t* body = this->body();
|
||||||
|
|
||||||
|
lxb_dom_node_t* child = lxb_dom_node_first_child(body);
|
||||||
|
while (child) {
|
||||||
|
lxb_status_t status = lxb_html_serialize_tree_cb(child, Document::_serialize_cb, &res);
|
||||||
|
if (status != LXB_STATUS_OK) {
|
||||||
|
throw Exception(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
child = lxb_dom_node_next(child);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static lxb_status_t _serialize_cb(const lxb_char_t* data, size_t len, void* ctx) {
|
||||||
|
std::string* str = reinterpret_cast<std::string*>(ctx);
|
||||||
|
str->append(reinterpret_cast<const char*>(data), len);
|
||||||
|
return LXB_STATUS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
lxb_html_document_t* _document;
|
||||||
|
};
|
||||||
|
|
||||||
|
}; // namespace HTML
|
||||||
|
|
||||||
|
}; // namespace LXB
|
|
@ -8,7 +8,7 @@ static const char* sorting_method_suffixes[3] = {"", "/with_replies", "/media"};
|
||||||
static inline PostSortingMethod get_sorting_method(const std::string& method);
|
static inline PostSortingMethod get_sorting_method(const std::string& method);
|
||||||
|
|
||||||
static inline Element user_header(const httplib::Request& req, const Account& account, PostSortingMethod sorting_method);
|
static inline Element user_header(const httplib::Request& req, const Account& account, PostSortingMethod sorting_method);
|
||||||
static inline Element user_link_field(const AccountField& field);
|
static inline Element user_link_field(const httplib::Request& req, const std::string& domain_name, const AccountField& field);
|
||||||
static inline Element sorting_method_link(const httplib::Request& req, const Account& account, PostSortingMethod current_method, PostSortingMethod new_method);
|
static inline Element sorting_method_link(const httplib::Request& req, const Account& account, PostSortingMethod current_method, PostSortingMethod new_method);
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ static inline Element user_header(const httplib::Request& req, const Account& ac
|
||||||
Element user_links("table", {{"class", "user_page-user_links"}}, {});
|
Element user_links("table", {{"class", "user_page-user_links"}}, {});
|
||||||
user_links.nodes.reserve(account.fields.size());
|
user_links.nodes.reserve(account.fields.size());
|
||||||
for (const AccountField& i : account.fields) {
|
for (const AccountField& i : account.fields) {
|
||||||
user_links.nodes.push_back(user_link_field(i));
|
user_links.nodes.push_back(user_link_field(req, account.domain_name, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
Element header("header", {
|
Element header("header", {
|
||||||
|
@ -82,7 +82,7 @@ static inline Element user_header(const httplib::Request& req, const Account& ac
|
||||||
}),
|
}),
|
||||||
|
|
||||||
Element("div", {{"class", "user_page-user_description"}}, {
|
Element("div", {{"class", "user_page-user_description"}}, {
|
||||||
Element("div", {{"class", "user_page-user_bio"}}, {account.note_html}),
|
Element("div", {{"class", "user_page-user_bio"}}, {preprocess_html(req, account.domain_name, account.note_html)}),
|
||||||
|
|
||||||
std::move(user_links),
|
std::move(user_links),
|
||||||
}),
|
}),
|
||||||
|
@ -96,12 +96,12 @@ static inline Element user_header(const httplib::Request& req, const Account& ac
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline Element user_link_field(const AccountField& field) {
|
static inline Element user_link_field(const httplib::Request& req, const std::string& domain_name, const AccountField& field) {
|
||||||
using namespace std::string_literals;
|
using namespace std::string_literals;
|
||||||
|
|
||||||
Element tr("tr", {
|
Element tr("tr", {
|
||||||
Element("th", {field.name}),
|
Element("th", {field.name}),
|
||||||
Element("td", {field.value_html}),
|
Element("td", {preprocess_html(req, domain_name, field.value_html)}),
|
||||||
});
|
});
|
||||||
if (field.verified_at >= 0) {
|
if (field.verified_at >= 0) {
|
||||||
struct tm verified_at;
|
struct tm verified_at;
|
||||||
|
|
104
servehelper.cpp
104
servehelper.cpp
|
@ -6,8 +6,13 @@
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "servehelper.h"
|
#include "servehelper.h"
|
||||||
|
#include "lxb_wrapper.h"
|
||||||
#include "routes/routes.h"
|
#include "routes/routes.h"
|
||||||
|
|
||||||
|
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element);
|
||||||
|
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element);
|
||||||
|
static inline bool should_fix_link(lxb_dom_element_t* element);
|
||||||
|
|
||||||
class CurlUrlException : public std::exception {
|
class CurlUrlException : public std::exception {
|
||||||
public:
|
public:
|
||||||
CurlUrlException(CURLUcode code_) : code(code_) {}
|
CurlUrlException(CURLUcode code_) : code(code_) {}
|
||||||
|
@ -119,18 +124,18 @@ std::string proxy_mastodon_url(const httplib::Request& req, const std::string& u
|
||||||
throw CurlUrlException(code);
|
throw CurlUrlException(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto get_part = [&](CURLUPart part) {
|
auto get_part = [&](CURLUPart part, CURLUcode ignore = CURLUE_OK) {
|
||||||
char* content;
|
char* content = nullptr;
|
||||||
CURLUcode code = curl_url_get(url.get(), part, &content, 0);
|
CURLUcode code = curl_url_get(url.get(), part, &content, 0);
|
||||||
if (code) {
|
if (code && code != ignore) {
|
||||||
throw CurlUrlException(code);
|
throw CurlUrlException(code);
|
||||||
}
|
}
|
||||||
return CurlStr(content, curl_free);
|
return CurlStr(content, curl_free);
|
||||||
};
|
};
|
||||||
CurlStr host = get_part(CURLUPART_HOST);
|
CurlStr host = get_part(CURLUPART_HOST);
|
||||||
CurlStr path = get_part(CURLUPART_PATH);
|
CurlStr path = get_part(CURLUPART_PATH);
|
||||||
CurlStr query = get_part(CURLUPART_QUERY);
|
CurlStr query = get_part(CURLUPART_QUERY, CURLUE_NO_QUERY);
|
||||||
CurlStr fragment = get_part(CURLUPART_FRAGMENT);
|
CurlStr fragment = get_part(CURLUPART_FRAGMENT, CURLUE_NO_FRAGMENT);
|
||||||
|
|
||||||
std::string new_url = get_origin(req) + '/' + host.get() + path.get();
|
std::string new_url = get_origin(req) + '/' + host.get() + path.get();
|
||||||
if (query) {
|
if (query) {
|
||||||
|
@ -153,3 +158,92 @@ bool should_send_304(const httplib::Request& req, uint64_t hash) {
|
||||||
size_t pos = header.find(std::string(1, '"') + std::to_string(hash) + '"');
|
size_t pos = header.find(std::string(1, '"') + std::to_string(hash) + '"');
|
||||||
return pos != std::string::npos && (pos == 0 || header[pos - 1] != '/');
|
return pos != std::string::npos && (pos == 0 || header[pos - 1] != '/');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const blankie::html::HTMLString& str) {
|
||||||
|
LXB::HTML::Document document(str.str);
|
||||||
|
preprocess_html(req, domain_name, document.body_element());
|
||||||
|
return blankie::html::HTMLString(document.serialize());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) {
|
||||||
|
const char* tag_name = reinterpret_cast<const char*>(lxb_dom_element_tag_name(element, nullptr));
|
||||||
|
|
||||||
|
if (strncmp(tag_name, "A", 2) == 0) {
|
||||||
|
// Proprocess links
|
||||||
|
preprocess_link(req, domain_name, element);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walk through the element's children
|
||||||
|
lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
|
||||||
|
while (child) {
|
||||||
|
if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||||
|
preprocess_html(req, domain_name, lxb_dom_interface_element(child));
|
||||||
|
}
|
||||||
|
|
||||||
|
child = lxb_dom_node_next(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) {
|
||||||
|
using namespace std::string_literals;
|
||||||
|
|
||||||
|
size_t href_c_len;
|
||||||
|
const lxb_char_t* href_c = lxb_dom_element_get_attribute(element, reinterpret_cast<const lxb_char_t*>("href"), 4, &href_c_len);
|
||||||
|
if (!href_c) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::string href(reinterpret_cast<const char*>(href_c), href_c_len);
|
||||||
|
|
||||||
|
std::string instance_url_base = "https://"s + domain_name;
|
||||||
|
if (href.starts_with(instance_url_base + '/') || href == instance_url_base) {
|
||||||
|
// Proxy this instance's URLs to Coyote
|
||||||
|
href = proxy_mastodon_url(req, std::move(href));
|
||||||
|
|
||||||
|
lxb_dom_element_set_attribute(element, reinterpret_cast<const lxb_char_t*>("href"), 4, reinterpret_cast<const lxb_char_t*>(href.data()), href.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (should_fix_link(element)) {
|
||||||
|
// Set the content of each <a> to its href
|
||||||
|
lxb_status_t status = lxb_dom_node_text_content_set(lxb_dom_interface_node(element), reinterpret_cast<const lxb_char_t*>(href.data()), href.size());
|
||||||
|
if (status != LXB_STATUS_OK) {
|
||||||
|
throw LXB::Exception(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool should_fix_link(lxb_dom_element_t* element) {
|
||||||
|
auto expected_element = [](lxb_dom_node_t* node, const char* expected_cls) {
|
||||||
|
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
lxb_dom_element_t* span = lxb_dom_interface_element(node);
|
||||||
|
|
||||||
|
const char* tag_name = reinterpret_cast<const char*>(lxb_dom_element_tag_name(span, nullptr));
|
||||||
|
if (strncmp(tag_name, "SPAN", 5) != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const lxb_char_t* cls = lxb_dom_element_get_attribute(span, reinterpret_cast<const lxb_char_t*>("class"), 5, nullptr);
|
||||||
|
return cls && strcmp(reinterpret_cast<const char*>(cls), expected_cls) == 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
|
||||||
|
if (!expected_element(child, "invisible")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
child = lxb_dom_node_next(child);
|
||||||
|
if (!expected_element(child, "ellipsis") && !expected_element(child, "")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
child = lxb_dom_node_next(child);
|
||||||
|
if (!expected_element(child, "invisible")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
child = lxb_dom_node_next(child);
|
||||||
|
return child == nullptr;
|
||||||
|
}
|
||||||
|
|
|
@ -17,3 +17,5 @@ void serve_redirect(const httplib::Request& req, httplib::Response& res, std::st
|
||||||
std::string get_origin(const httplib::Request& req);
|
std::string get_origin(const httplib::Request& req);
|
||||||
std::string proxy_mastodon_url(const httplib::Request& req, const std::string& url_str);
|
std::string proxy_mastodon_url(const httplib::Request& req, const std::string& url_str);
|
||||||
bool should_send_304(const httplib::Request& req, uint64_t hash);
|
bool should_send_304(const httplib::Request& req, uint64_t hash);
|
||||||
|
|
||||||
|
blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const blankie::html::HTMLString& str);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 14166847cfa85d80c9041f5387014961f70f3831
|
Loading…
Reference in New Issue