coyote/servehelper.cpp

307 lines
12 KiB
C++

#include <memory>
#include <exception>
#include <stdexcept>
#include <FastHash.h>
#include <curl/curl.h>
#include "config.h"
#include "models.h"
#include "servehelper.h"
#include "lxb_wrapper.h"
#include "routes/routes.h"
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector<Emoji>& emojis, lxb_dom_element_t* element);
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element);
static inline bool should_fix_link(lxb_dom_element_t* element);
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis);
static inline std::vector<lxb_dom_node*> emojify(lxb_dom_document_t* document, std::string str, const std::vector<Emoji>& emojis);
class CurlUrlException : public std::exception {
public:
CurlUrlException(CURLUcode code_) : code(code_) {}
const char* what() const noexcept {
return curl_url_strerror(this->code);
}
CURLUcode code;
};
void serve(const httplib::Request& req, httplib::Response& res, std::string title, Element element, Nodes extra_head) {
using namespace std::string_literals;
std::string css_url = get_origin(req) + "/style.css";
res.set_header("Content-Security-Policy", "default-src 'none'; img-src https:; media-src: https:; style-src "s + css_url);
Element head("head", {
Element("meta", {{"charset", "utf-8"}}, {}),
Element("title", {std::move(title)}),
Element("link", {{"rel", "stylesheet"}, {"href", std::move(css_url) + "?v=" + std::to_string(css_hash)}}, {}),
Element("meta", {{"name", "viewport"}, {"content", "width=device-width,initial-scale=1"}}, {})
});
head.nodes.reserve(head.nodes.size() + extra_head.size());
head.nodes.insert(head.nodes.end(), extra_head.begin(), extra_head.end());
std::string html = "<!DOCTYPE html>"s + Element("html", {
std::move(head),
std::move(element)
}).serialize();
uint64_t hash = FastHash(html.data(), html.size(), 0);
res.set_header("ETag", std::string(1, '"') + std::to_string(hash) + '"');
if (should_send_304(req, hash)) {
res.status = 304;
res.set_header("Content-Length", std::to_string(html.size()));
res.set_header("Content-Type", "text/html");
} else {
res.set_content(std::move(html), "text/html");
}
}
void serve_error(const httplib::Request& req, httplib::Response& res,
std::string title, std::optional<std::string> subtitle, std::optional<std::string> info) {
Element error_div("div", {{"class", "error"}}, {
Element("h2", {title})
});
if (subtitle) {
error_div.nodes.push_back(Element("p", {
std::move(*subtitle)
}));
}
if (info) {
error_div.nodes.push_back(Element("pre", {
Element("code", {std::move(*info)})
}));
}
Element body("body", {std::move(error_div)});
serve(req, res, std::move(title), std::move(body));
}
void serve_redirect(const httplib::Request& req, httplib::Response& res, std::string url, bool permanent) {
using namespace std::string_literals;
Element body("body", {
"Redirecting to ",
Element("a", {{"href", url}}, {url}),
""
});
res.set_redirect(url, permanent ? 301 : 302);
serve(req, res, "Redirecting to "s + std::move(url) + "", std::move(body));
}
std::string get_origin(const httplib::Request& req) {
if (req.has_header("X-Canonical-Origin")) {
return req.get_header_value("X-Canonical-Origin");
}
if (config.canonical_origin) {
return *config.canonical_origin;
}
std::string origin = "http://";
if (req.has_header("Host")) {
origin += req.get_header_value("Host");
} else {
origin += config.bind_host;
if (config.bind_port != 80) {
origin += ':' + std::to_string(config.bind_port);
}
}
return origin;
}
std::string proxy_mastodon_url(const httplib::Request& req, const std::string& url_str) {
using CurlStr = std::unique_ptr<char, decltype(&curl_free)>;
std::unique_ptr<CURLU, decltype(&curl_url_cleanup)> url(curl_url(), curl_url_cleanup);
if (!url) {
throw std::bad_alloc();
}
CURLUcode code = curl_url_set(url.get(), CURLUPART_URL, url_str.c_str(), 0);
if (code) {
throw CurlUrlException(code);
}
auto get_part = [&](CURLUPart part, CURLUcode ignore = CURLUE_OK) {
char* content = nullptr;
CURLUcode code = curl_url_get(url.get(), part, &content, 0);
if (code && code != ignore) {
throw CurlUrlException(code);
}
return CurlStr(content, curl_free);
};
CurlStr host = get_part(CURLUPART_HOST);
CurlStr path = get_part(CURLUPART_PATH);
CurlStr query = get_part(CURLUPART_QUERY, CURLUE_NO_QUERY);
CurlStr fragment = get_part(CURLUPART_FRAGMENT, CURLUE_NO_FRAGMENT);
std::string new_url = get_origin(req) + '/' + host.get() + path.get();
if (query) {
new_url += '?';
new_url += query.get();
}
if (fragment) {
new_url += '#';
new_url += fragment.get();
}
return new_url;
}
bool should_send_304(const httplib::Request& req, uint64_t hash) {
std::string header = req.get_header_value("If-None-Match");
if (header == "*") {
return true;
}
size_t pos = header.find(std::string(1, '"') + std::to_string(hash) + '"');
return pos != std::string::npos && (pos == 0 || header[pos - 1] != '/');
}
blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector<Emoji>& emojis, const blankie::html::HTMLString& str) {
LXB::HTML::Document document(str.str);
preprocess_html(req, domain_name, emojis, document.body_element());
return blankie::html::HTMLString(document.serialize());
}
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector<Emoji>& emojis, lxb_dom_element_t* element) {
const char* tag_name = reinterpret_cast<const char*>(lxb_dom_element_tag_name(element, nullptr));
if (strncmp(tag_name, "A", 2) == 0) {
// Proprocess links
preprocess_link(req, domain_name, element);
}
// Walk through the element's children
lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
while (child) {
if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
preprocess_html(req, domain_name, emojis, lxb_dom_interface_element(child));
} else if (child->type == LXB_DOM_NODE_TYPE_TEXT) {
child = emojify(child, emojis);
}
child = lxb_dom_node_next(child);
}
}
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) {
using namespace std::string_literals;
size_t href_c_len;
const lxb_char_t* href_c = lxb_dom_element_get_attribute(element, reinterpret_cast<const lxb_char_t*>("href"), 4, &href_c_len);
if (!href_c) {
return;
}
std::string href(reinterpret_cast<const char*>(href_c), href_c_len);
std::string instance_url_base = "https://"s + domain_name;
if (href.starts_with(instance_url_base + '/') || href == instance_url_base) {
// Proxy this instance's URLs to Coyote
href = proxy_mastodon_url(req, std::move(href));
lxb_dom_element_set_attribute(element, reinterpret_cast<const lxb_char_t*>("href"), 4, reinterpret_cast<const lxb_char_t*>(href.data()), href.size());
}
if (should_fix_link(element)) {
// Set the content of each <a> to its href
lxb_status_t status = lxb_dom_node_text_content_set(lxb_dom_interface_node(element), reinterpret_cast<const lxb_char_t*>(href.data()), href.size());
if (status != LXB_STATUS_OK) {
throw LXB::Exception(status);
}
}
}
static inline bool should_fix_link(lxb_dom_element_t* element) {
auto expected_element = [](lxb_dom_node_t* node, const char* expected_cls) {
if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
return false;
}
lxb_dom_element_t* span = lxb_dom_interface_element(node);
const char* tag_name = reinterpret_cast<const char*>(lxb_dom_element_tag_name(span, nullptr));
if (strncmp(tag_name, "SPAN", 5) != 0) {
return false;
}
const lxb_char_t* cls = lxb_dom_element_get_attribute(span, reinterpret_cast<const lxb_char_t*>("class"), 5, nullptr);
return cls && strcmp(reinterpret_cast<const char*>(cls), expected_cls) == 0;
};
lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
if (!expected_element(child, "invisible")) {
return false;
}
child = lxb_dom_node_next(child);
if (!expected_element(child, "ellipsis") && !expected_element(child, "")) {
return false;
}
child = lxb_dom_node_next(child);
if (!expected_element(child, "invisible")) {
return false;
}
child = lxb_dom_node_next(child);
return child == nullptr;
}
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis) {
size_t text_content_len;
const char* text_content = reinterpret_cast<const char*>(lxb_dom_node_text_content(child, &text_content_len));
std::vector<lxb_dom_node_t*> nodes = emojify(child->owner_document, std::string(text_content, text_content_len), emojis);
for (lxb_dom_node_t* node : nodes) {
lxb_dom_node_insert_after(child, node);
lxb_dom_node_destroy(child);
child = node;
}
return child;
}
static std::regex shortcode_re(":([a-zA-Z0-9_]+):");
static inline std::vector<lxb_dom_node_t*> emojify(lxb_dom_document_t* document, std::string str, const std::vector<Emoji>& emojis) {
std::string buf;
std::smatch sm;
std::vector<lxb_dom_node*> res;
while (std::regex_search(str, sm, shortcode_re)) {
buf += sm.prefix();
std::string group_0 = sm.str(0);
auto emoji = std::find_if(emojis.begin(), emojis.end(), [&](const Emoji& i) { return i.shortcode == sm.str(1); });
if (emoji != emojis.end()) {
res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast<const lxb_char_t*>(buf.data()), buf.size())));
buf.clear();
lxb_dom_element_t* img = lxb_dom_element_create(document, reinterpret_cast<const lxb_char_t*>("img"), 3, nullptr, 0, nullptr, 0, nullptr, 0, false);
lxb_dom_element_set_attribute(img, reinterpret_cast<const lxb_char_t*>("class"), 5, reinterpret_cast<const lxb_char_t*>("custom_emoji"), 12);
lxb_dom_element_set_attribute(img, reinterpret_cast<const lxb_char_t*>("alt"), 3, reinterpret_cast<const lxb_char_t*>(group_0.data()), group_0.size());
lxb_dom_element_set_attribute(img, reinterpret_cast<const lxb_char_t*>("title"), 5, reinterpret_cast<const lxb_char_t*>(group_0.data()), group_0.size());
lxb_dom_element_set_attribute(img, reinterpret_cast<const lxb_char_t*>("src"), 3, reinterpret_cast<const lxb_char_t*>(emoji->url.data()), emoji->url.size());
res.push_back(lxb_dom_interface_node(img));
} else {
buf += group_0;
}
str = sm.suffix();
}
if (!str.empty()) {
buf += std::move(str);
}
if (!buf.empty()) {
res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast<const lxb_char_t*>(buf.data()), buf.size())));
}
return res;
}