#include #include #include #include #include #include "config.h" #include "models.h" #include "servehelper.h" #include "lxb_wrapper.h" #include "routes/routes.h" #include "blankie/escape.h" static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element); static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); static inline bool should_fix_link(lxb_dom_element_t* element); static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis); static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis); class CurlUrlException : public std::exception { public: CurlUrlException(CURLUcode code_) : code(code_) {} const char* what() const noexcept { return curl_url_strerror(this->code); } CURLUcode code; }; void serve(const httplib::Request& req, httplib::Response& res, std::string title, Element element, Nodes extra_head) { using namespace std::string_literals; std::string css_url = get_origin(req) + "/style.css"; res.set_header("Content-Security-Policy", "default-src 'none'; img-src https:; media-src: https:; style-src "s + css_url); Element head("head", { Element("meta", {{"charset", "utf-8"}}, {}), Element("title", {std::move(title)}), Element("link", {{"rel", "stylesheet"}, {"href", std::move(css_url) + "?v=" + std::to_string(css_hash)}}, {}), Element("meta", {{"name", "viewport"}, {"content", "width=device-width,initial-scale=1"}}, {}) }); head.nodes.reserve(head.nodes.size() + extra_head.size()); head.nodes.insert(head.nodes.end(), extra_head.begin(), extra_head.end()); std::string html = ""s + Element("html", { std::move(head), std::move(element) }).serialize(); uint64_t hash = FastHash(html.data(), html.size(), 0); res.set_header("ETag", std::string(1, '"') + std::to_string(hash) + '"'); if (should_send_304(req, hash)) { res.status = 304; res.set_header("Content-Length", std::to_string(html.size())); res.set_header("Content-Type", "text/html"); } else { res.set_content(std::move(html), "text/html"); } } void serve_error(const httplib::Request& req, httplib::Response& res, std::string title, std::optional subtitle, std::optional info) { Element error_div("div", {{"class", "error"}}, { Element("h2", {title}) }); if (subtitle) { error_div.nodes.push_back(Element("p", { std::move(*subtitle) })); } if (info) { error_div.nodes.push_back(Element("pre", { Element("code", {std::move(*info)}) })); } Element body("body", {std::move(error_div)}); serve(req, res, std::move(title), std::move(body)); } void serve_redirect(const httplib::Request& req, httplib::Response& res, std::string url, bool permanent) { using namespace std::string_literals; Element body("body", { "Redirecting to ", Element("a", {{"href", url}}, {url}), "…" }); res.set_redirect(url, permanent ? 301 : 302); serve(req, res, "Redirecting to "s + std::move(url) + "…", std::move(body)); } std::string get_origin(const httplib::Request& req) { if (req.has_header("X-Canonical-Origin")) { return req.get_header_value("X-Canonical-Origin"); } if (config.canonical_origin) { return *config.canonical_origin; } std::string origin = "http://"; if (req.has_header("Host")) { origin += req.get_header_value("Host"); } else { origin += config.bind_host; if (config.bind_port != 80) { origin += ':' + std::to_string(config.bind_port); } } return origin; } std::string proxy_mastodon_url(const httplib::Request& req, const std::string& url_str) { using CurlStr = std::unique_ptr; std::unique_ptr url(curl_url(), curl_url_cleanup); if (!url) { throw std::bad_alloc(); } CURLUcode code = curl_url_set(url.get(), CURLUPART_URL, url_str.c_str(), 0); if (code) { throw CurlUrlException(code); } auto get_part = [&](CURLUPart part, CURLUcode ignore = CURLUE_OK) { char* content = nullptr; CURLUcode code = curl_url_get(url.get(), part, &content, 0); if (code && code != ignore) { throw CurlUrlException(code); } return CurlStr(content, curl_free); }; CurlStr host = get_part(CURLUPART_HOST); CurlStr path = get_part(CURLUPART_PATH); CurlStr query = get_part(CURLUPART_QUERY, CURLUE_NO_QUERY); CurlStr fragment = get_part(CURLUPART_FRAGMENT, CURLUE_NO_FRAGMENT); std::string new_url = get_origin(req) + '/' + host.get() + path.get(); if (query) { new_url += '?'; new_url += query.get(); } if (fragment) { new_url += '#'; new_url += fragment.get(); } return new_url; } bool should_send_304(const httplib::Request& req, uint64_t hash) { std::string header = req.get_header_value("If-None-Match"); if (header == "*") { return true; } size_t pos = header.find(std::string(1, '"') + std::to_string(hash) + '"'); return pos != std::string::npos && (pos == 0 || header[pos - 1] != '/'); } blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str) { LXB::HTML::Document document(str.str); preprocess_html(req, domain_name, emojis, document.body_element()); return blankie::html::HTMLString(document.serialize()); } blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str) { return preprocess_html(req, "", emojis, blankie::html::HTMLString(blankie::html::escape(str))); } static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element) { const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); if (strncmp(tag_name, "A", 2) == 0) { // Proprocess links preprocess_link(req, domain_name, element); } // Walk through the element's children lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); while (child) { if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) { preprocess_html(req, domain_name, emojis, lxb_dom_interface_element(child)); } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) { child = emojify(child, emojis); } child = lxb_dom_node_next(child); } } static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) { using namespace std::string_literals; size_t href_c_len; const lxb_char_t* href_c = lxb_dom_element_get_attribute(element, reinterpret_cast("href"), 4, &href_c_len); if (!href_c) { return; } std::string href(reinterpret_cast(href_c), href_c_len); std::string instance_url_base = "https://"s + domain_name; if (href.starts_with(instance_url_base + '/') || href == instance_url_base) { // Proxy this instance's URLs to Coyote href = proxy_mastodon_url(req, std::move(href)); lxb_dom_element_set_attribute(element, reinterpret_cast("href"), 4, reinterpret_cast(href.data()), href.size()); } if (should_fix_link(element)) { // Set the content of each to its href lxb_status_t status = lxb_dom_node_text_content_set(lxb_dom_interface_node(element), reinterpret_cast(href.data()), href.size()); if (status != LXB_STATUS_OK) { throw LXB::Exception(status); } } } static inline bool should_fix_link(lxb_dom_element_t* element) { auto expected_element = [](lxb_dom_node_t* node, const char* expected_cls) { if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) { return false; } lxb_dom_element_t* span = lxb_dom_interface_element(node); const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(span, nullptr)); if (strncmp(tag_name, "SPAN", 5) != 0) { return false; } const lxb_char_t* cls = lxb_dom_element_get_attribute(span, reinterpret_cast("class"), 5, nullptr); return cls && strcmp(reinterpret_cast(cls), expected_cls) == 0; }; lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); if (!expected_element(child, "invisible")) { return false; } child = lxb_dom_node_next(child); if (!expected_element(child, "ellipsis") && !expected_element(child, "")) { return false; } child = lxb_dom_node_next(child); if (!expected_element(child, "invisible")) { return false; } child = lxb_dom_node_next(child); return child == nullptr; } static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { size_t text_content_len; const char* text_content = reinterpret_cast(lxb_dom_node_text_content(child, &text_content_len)); std::vector nodes = emojify(child->owner_document, std::string(text_content, text_content_len), emojis); lxb_dom_node_insert_after(child, nodes[0]); lxb_dom_node_destroy(child); child = nodes[0]; for (size_t i = 1; i < nodes.size(); i++) { lxb_dom_node_insert_after(child, nodes[i]); child = nodes[i]; } return child; } static std::regex shortcode_re(":([a-zA-Z0-9_]+):"); static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis) { std::string buf; std::smatch sm; std::vector res; while (std::regex_search(str, sm, shortcode_re)) { buf += sm.prefix(); std::string group_0 = sm.str(0); auto emoji = std::find_if(emojis.begin(), emojis.end(), [&](const Emoji& i) { return i.shortcode == sm.str(1); }); if (emoji != emojis.end()) { res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); buf.clear(); lxb_dom_element_t* img = lxb_dom_element_create(document, reinterpret_cast("img"), 3, nullptr, 0, nullptr, 0, nullptr, 0, false); lxb_dom_element_set_attribute(img, reinterpret_cast("class"), 5, reinterpret_cast("custom_emoji"), 12); lxb_dom_element_set_attribute(img, reinterpret_cast("alt"), 3, reinterpret_cast(group_0.data()), group_0.size()); lxb_dom_element_set_attribute(img, reinterpret_cast("title"), 5, reinterpret_cast(group_0.data()), group_0.size()); lxb_dom_element_set_attribute(img, reinterpret_cast("src"), 3, reinterpret_cast(emoji->url.data()), emoji->url.size()); res.push_back(lxb_dom_interface_node(img)); } else { buf += group_0; } str = sm.suffix(); } if (!str.empty()) { buf += std::move(str); } if (!buf.empty()) { res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); } return res; }