From 973a0eada2228d9ef35de9c5f2cc71dc63d2dc15 Mon Sep 17 00:00:00 2001 From: blankie Date: Fri, 8 Dec 2023 18:38:55 +1100 Subject: [PATCH] Split HTML helpers into their own file --- CMakeLists.txt | 2 +- htmlhelper.cpp | 456 ++++++++++++++++++++++++++++++++++++++++++++++ htmlhelper.h | 14 ++ models.h | 1 + routes/about.cpp | 1 + routes/status.cpp | 1 + routes/tags.cpp | 1 + routes/user.cpp | 1 + servehelper.cpp | 454 --------------------------------------------- servehelper.h | 10 - 10 files changed, 476 insertions(+), 465 deletions(-) create mode 100644 htmlhelper.cpp create mode 100644 htmlhelper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0acea2b..d4114c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ list(APPEND FLAGS -Werror -Wall -Wextra -Wshadow -Wpedantic -Wno-gnu-anonymous-s add_link_options(${FLAGS}) -add_executable(${PROJECT_NAME} main.cpp numberhelper.cpp hex.cpp config.cpp settings.cpp models.cpp client.cpp servehelper.cpp timeutils.cpp hiredis_wrapper.cpp +add_executable(${PROJECT_NAME} main.cpp numberhelper.cpp hex.cpp config.cpp settings.cpp models.cpp client.cpp servehelper.cpp htmlhelper.cpp timeutils.cpp hiredis_wrapper.cpp routes/home.cpp routes/css.cpp routes/user.cpp routes/status.cpp routes/tags.cpp routes/about.cpp routes/user_settings.cpp blankie/serializer.cpp blankie/escape.cpp) set_target_properties(${PROJECT_NAME} diff --git a/htmlhelper.cpp b/htmlhelper.cpp new file mode 100644 index 0000000..f9745e7 --- /dev/null +++ b/htmlhelper.cpp @@ -0,0 +1,456 @@ +#include "models.h" +#include "settings.h" +#include "timeutils.h" +#include "curlu_wrapper.h" +#include "font_awesome.h" +#include "blankie/escape.h" + +#include "htmlhelper.h" + +static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element); +static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); +static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls); +static inline void get_text_content(lxb_dom_node_t* node, std::string& out); +static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis); +static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis); + +struct PostStatus { + const char* icon_html; + Node info_node; +}; +static Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool main_post, const std::optional& post_status, const Post* reblogged = nullptr); +static inline Element serialize_media(const Media& media); +static inline Element serialize_poll(const httplib::Request& req, const Poll& poll); + + +Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool pinned, bool main_post) { + using namespace std::string_literals; + + if (post.reblog) { + PostStatus post_status = { + fa_retweet, + preprocess_html(req, post.account.emojis, post.account.display_name + " boosted"), + }; + return serialize_post(req, server, *post.reblog, main_post, post_status, &post); + } else if (pinned) { + PostStatus post_status = { + fa_thumbtack, + blankie::html::HTMLString("Pinned post"), + }; + return serialize_post(req, server, post, main_post, post_status); + } else if (post.in_reply_to_id && post.in_reply_to_account_id && post.account.id == *post.in_reply_to_account_id) { + PostStatus post_status = { + fa_reply, + preprocess_html(req, post.account.emojis, "Replied to "s + post.account.display_name), + }; + return serialize_post(req, server, post, main_post, post_status); + } else { + return serialize_post(req, server, post, main_post, std::nullopt); + } +} + +std::string get_text_content(lxb_dom_node_t* child) { + std::string out; + get_text_content(child, out); + + if (!out.empty()) { + size_t remove_from = out.size(); + while (remove_from && out[remove_from - 1] == '\n') { + remove_from--; + } + // Don't engulf everything, otherwise it crashes + // https://ruby.social/@CoralineAda/109951421922797743 + if (out.size() > remove_from && remove_from != 0) { + out.erase(remove_from); + } + } + if (!out.empty()) { + size_t remove_to = 0; + while (out.size() > remove_to && out[remove_to] == '\n') { + remove_to++; + } + // Don't engulf everything, otherwise it crashes + // https://ruby.social/@CoralineAda/109951421922797743 + if (out.size() > remove_to) { + out.erase(0, remove_to); + } + } + + return out; +} + +std::string get_text_content(blankie::html::HTMLString str) { + LXB::HTML::Document document(str.str); + return get_text_content(document.body()); +} + +blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str) { + LXB::HTML::Document document(str.str); + preprocess_html(req, domain_name, emojis, document.body_element()); + return blankie::html::HTMLString(document.serialize()); +} + +blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str) { + return preprocess_html(req, "", emojis, blankie::html::HTMLString(blankie::html::escape(str))); +} + + +static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element) { + const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); + + if (strncmp(tag_name, "A", 2) == 0) { + // Proprocess links + preprocess_link(req, domain_name, element); + } + + // Walk through the element's children + lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); + while (child) { + if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) { + preprocess_html(req, domain_name, emojis, lxb_dom_interface_element(child)); + } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) { + child = emojify(child, emojis); + } + + child = lxb_dom_node_next(child); + } +} + +static std::regex mention_class_re("\\bmention\\b"); +static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) { + using namespace std::string_literals; + + // Remove target=... + lxb_status_t status = lxb_dom_element_remove_attribute(element, reinterpret_cast("target"), 6); + if (status != LXB_STATUS_OK) { + throw LXB::Exception(status); + } + + size_t href_c_len; + const lxb_char_t* href_c = lxb_dom_element_get_attribute(element, reinterpret_cast("href"), 4, &href_c_len); + if (!href_c) { + return; + } + std::string href(reinterpret_cast(href_c), href_c_len); + + size_t cls_c_len; + const lxb_char_t* cls_c = lxb_dom_element_class(element, &cls_c_len); + std::string cls = cls_c ? std::string(reinterpret_cast(cls_c), cls_c_len) : ""; + + try { + CurlUrl href_url; + href_url.set(CURLUPART_URL, get_origin(req)); + href_url.set(CURLUPART_PATH, std::string(href_url.get(CURLUPART_PATH).get()) + req.path); + href_url.set(CURLUPART_URL, href); + + CurlUrl instance_url_base; + instance_url_base.set(CURLUPART_SCHEME, "https"); + instance_url_base.set(CURLUPART_HOST, domain_name); + + // .mention is used in note and posts + // Instance base is used for link fields + if (std::regex_search(cls, mention_class_re) || starts_with(href_url, instance_url_base)) { + // Proxy this instance's URLs to Coyote + href = proxy_mastodon_url(req, std::move(href)); + + lxb_dom_element_set_attribute(element, reinterpret_cast("href"), 4, reinterpret_cast(href.data()), href.size()); + } + } catch (const CurlUrlException& e) { + // example: on eldritch.cafe/about + if (e.code != CURLUE_MALFORMED_INPUT) { + throw; + } + } + + if (should_fix_link(element, cls)) { + // Set the content of each to its href + status = lxb_dom_node_text_content_set(lxb_dom_interface_node(element), reinterpret_cast(href.data()), href.size()); + if (status != LXB_STATUS_OK) { + throw LXB::Exception(status); + } + } +} + +static std::regex unhandled_link_re("\\bunhandled-link\\b"); +static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) { + // https://vt.social/@LucydiaLuminous/111448085044245037 + if (std::regex_search(element_cls, unhandled_link_re)) { + return true; + } + + auto expected_element = [](lxb_dom_node_t* node, const char* expected_cls) { + if (!node || node->type != LXB_DOM_NODE_TYPE_ELEMENT) { + return false; + } + lxb_dom_element_t* span = lxb_dom_interface_element(node); + + const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(span, nullptr)); + if (strncmp(tag_name, "SPAN", 5) != 0) { + return false; + } + + const lxb_char_t* cls = lxb_dom_element_get_attribute(span, reinterpret_cast("class"), 5, nullptr); + return cls && strcmp(reinterpret_cast(cls), expected_cls) == 0; + }; + + lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); + if (!expected_element(child, "invisible")) { + return false; + } + + child = lxb_dom_node_next(child); + if (!expected_element(child, "ellipsis") && !expected_element(child, "")) { + return false; + } + + child = lxb_dom_node_next(child); + if (!expected_element(child, "invisible")) { + return false; + } + + child = lxb_dom_node_next(child); + return child == nullptr; +} + +static inline void get_text_content(lxb_dom_node_t* node, std::string& out) { + bool is_br = false, is_p = false; + + if (node->type == LXB_DOM_NODE_TYPE_TEXT) { + size_t len; + const char* text = reinterpret_cast(lxb_dom_node_text_content(node, &len)); + + out.append(text, len); + } else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) { + lxb_dom_element_t* element = lxb_dom_interface_element(node); + const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); + + is_p = strncmp(tag_name, "P", 2) == 0; + is_br = strncmp(tag_name, "BR", 3) == 0; + } + + if (is_p || is_br) { + out.push_back('\n'); + } + + lxb_dom_node_t* child = lxb_dom_node_first_child(node); + while (child) { + get_text_content(child, out); + + child = lxb_dom_node_next(child); + } + + if (is_p) { + out.push_back('\n'); + } +} + +static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { + std::vector nodes = emojify(child->owner_document, get_text_content(child), emojis); + + lxb_dom_node_insert_after(child, nodes[0]); + lxb_dom_node_destroy(child); + child = nodes[0]; + + for (size_t i = 1; i < nodes.size(); i++) { + lxb_dom_node_insert_after(child, nodes[i]); + child = nodes[i]; + } + + return child; +} + +static std::regex shortcode_re(":([a-zA-Z0-9_]+):"); +static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis) { + std::string buf; + std::smatch sm; + std::vector res; + + while (std::regex_search(str, sm, shortcode_re)) { + buf += sm.prefix(); + + std::string group_0 = sm.str(0); + auto emoji = std::find_if(emojis.begin(), emojis.end(), [&](const Emoji& i) { return i.shortcode == sm.str(1); }); + if (emoji != emojis.end()) { + res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); + buf.clear(); + + lxb_dom_element_t* img = lxb_dom_element_create(document, reinterpret_cast("IMG"), 3, nullptr, 0, nullptr, 0, nullptr, 0, false); + lxb_dom_element_set_attribute(img, reinterpret_cast("class"), 5, reinterpret_cast("custom_emoji"), 12); + lxb_dom_element_set_attribute(img, reinterpret_cast("alt"), 3, reinterpret_cast(group_0.data()), group_0.size()); + lxb_dom_element_set_attribute(img, reinterpret_cast("title"), 5, reinterpret_cast(group_0.data()), group_0.size()); + lxb_dom_element_set_attribute(img, reinterpret_cast("src"), 3, reinterpret_cast(emoji->url.data()), emoji->url.size()); + res.push_back(lxb_dom_interface_node(img)); + } else { + buf += group_0; + } + + str = sm.suffix(); + } + + if (!str.empty()) { + buf += std::move(str); + } + if (!buf.empty()) { + res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); + } + + return res; +} + +static Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool main_post, const std::optional& post_status, const Post* reblogged) { + using namespace std::string_literals; + + bool user_known = !post.account.id.empty(); + bool user_ref_known = !post.account.username.empty() && !post.account.server.empty(); + // `reblogged == nullptr` since a malicious server could take down the frontend + // by sending a post that is not a reblog with no account information + std::string post_url = user_known || reblogged == nullptr + ? get_origin(req) + '/' + server + "/@" + post.account.acct(false) + '/' + post.id + "#m" + : get_origin(req) + '/' + server + "/@" + reblogged->account.acct(false) + '/' + reblogged->id + "#m"; + + std::string time_title = post.edited_at < 0 + ? full_time(post.created_at) + : "Created: "s + full_time(post.created_at) + "\nEdited: " + full_time(post.edited_at); + const char* time_badge = post.edited_at < 0 ? "" : " (edited)"; + + blankie::html::HTMLString preprocessed_html = preprocess_html(req, server, post.emojis, post.content); + // Workaround for https://vt.social/@a1ba@suya.place/110552480243348878#m + if (preprocessed_html.str.find("

") == std::string::npos) { + preprocessed_html.str.reserve(preprocessed_html.str.size() + 3 + 4); + preprocessed_html.str.insert(0, "

"); + preprocessed_html.str.append("

"); + } + Element contents("div", {{"class", "post-contents"}}, {std::move(preprocessed_html)}); + + Element post_attachments("div", {{"class", "post-attachments"}}, {}); + post_attachments.nodes.reserve(post.media_attachments.size()); + for (const Media& media : post.media_attachments) { + post_attachments.nodes.push_back(serialize_media(media)); + } + contents.nodes.push_back(std::move(post_attachments)); + + if (post.poll) { + contents.nodes.push_back(serialize_poll(req, *post.poll)); + } + + if (post.sensitive) { + std::string spoiler_text = !post.spoiler_text.empty() ? post.spoiler_text : "See more"; + contents = Element("details", { + Element("summary", {preprocess_html(req, post.emojis, std::move(spoiler_text))}), + std::move(contents), + }); + if (UserSettings(req).auto_open_cw) { + contents.attributes.push_back({"open", ""}); + } + } + + Element div("div", {{"class", "post"}}, { + Element("div", {{"class", "post-header"}}, { + user_ref_known ? Element("a", {{"href", get_origin(req) + '/' + server + "/@" + post.account.acct(false)}}, { + !post.account.avatar_static.empty() + ? Element("img", {{"class", "post-avatar"}, {"alt", "User profile picture"}, {"loading", "lazy"}, {"src", post.account.avatar_static}}, {}) + : Node(""), + Element("span", { + Element("b", {preprocess_html(req, post.account.emojis, post.account.display_name)}), + Element("br"), "@", post.account.acct(), + }), + }) : Element("b", {"Unknown user"}), + Element("a", {{"class", "post-time_header"}, {"href", std::move(post_url)}, {"title", time_title}}, { + Element("time", {{"datetime", to_rfc3339(post.created_at)}}, {relative_time(post.created_at, current_time()), time_badge}), + }), + }), + + contents, + }); + if (post_status) { + div.nodes.insert(div.nodes.begin(), Element("p", { + blankie::html::HTMLString(post_status->icon_html), " ", post_status->info_node, + })); + } + if (main_post) { + div.attributes = {{"class", "post main_post"}, {"id", "m"}}; + } + + return div; +} + +static inline Element serialize_media(const Media& media) { + Element element = [&]() { + if (media.type == "image") { + return Element("a", {{"href", media.url}}, { + Element("img", {{"loading", "lazy"}, {"src", media.preview_url.value_or(media.url)}}, {}), + }); + } else if (media.type == "video") { + Element video("video", {{"controls", ""}, {"src", media.url}}, {}); + if (media.preview_url) { + video.attributes.push_back({"poster", *media.preview_url}); + } + return video; + } else if (media.type == "audio") { + return Element("audio", {{"controls", ""}, {"src", media.url}}, {}); + } else if (media.type == "gifv") { + // https://hachyderm.io/@Impossible_PhD/111444541628207638 + Element video("video", {{"controls", ""}, {"loop", ""}, {"muted", ""}, {"autoplay", ""}, {"src", media.url}}, {}); + if (media.preview_url) { + video.attributes.push_back({"poster", *media.preview_url}); + } + return video; + } else if (media.type == "unknown") { + if (media.remote_url) { + // https://botsin.space/@lina@vt.social/111053598696451525 + return Element("a", {{"class", "unknown_media"}, {"href", *media.remote_url}}, {"Media is not available from this instance, view externally"}); + } else { + return Element("p", {{"class", "unknown_media"}}, {"Media is not available from this instance"}); + } + } else { + return Element("p", {"Unsupported media type: ", media.type}); + } + }(); + + if (media.description) { + element.attributes.push_back({"alt", *media.description}); + element.attributes.push_back({"title", *media.description}); + } + + return element; +} + +static inline Element serialize_poll(const httplib::Request& req, const Poll& poll) { + using namespace std::string_literals; + + uint64_t voters_count = poll.voters_count >= 0 ? static_cast(poll.voters_count) : poll.votes_count; + Element div("div"); + + auto pick_form = [](uint64_t count, const char* singular, const char* plural) { + return count == 1 ? singular : plural; + }; + + div.nodes.reserve(poll.options.size() + 1); + for (const PollOption& option : poll.options) { + std::string percentage = voters_count + ? std::to_string(option.votes_count * 100 / voters_count) + '%' + : "0%"; + + div.nodes.push_back(Element("div", {{"class", "poll-option"}, {"title", std::to_string(option.votes_count) + pick_form(option.votes_count, " vote", " votes")}}, { + Element("b", {{"class", "poll-percentage"}}, {percentage}), " ", preprocess_html(req, poll.emojis, option.title), + Element("object", {{"class", "poll-bar"}, {"width", percentage}}, {}), + })); + } + + Element p("p", poll.voters_count >= 0 + ? std::vector({std::to_string(voters_count), " ", pick_form(voters_count, "voter", "voters")}) + : std::vector({std::to_string(poll.votes_count), " ", pick_form(poll.votes_count, "vote", "votes")}) + ); + if (poll.expired) { + p.nodes.push_back(" / "); + p.nodes.push_back(Element("time", {{"datetime", to_rfc3339(poll.expires_at)}, {"title", "Expired on "s + full_time(poll.expires_at)}}, {"Expired"})); + } else if (poll.expires_at >= 0) { + p.nodes.push_back(" / "); + p.nodes.push_back(Element("time", {{"datetime", to_rfc3339(poll.expires_at)}, {"title", full_time(poll.expires_at)}}, { + "Expires in ", relative_time(current_time(), poll.expires_at), + })); + } + div.nodes.push_back(std::move(p)); + + return div; +} diff --git a/htmlhelper.h b/htmlhelper.h new file mode 100644 index 0000000..f2e59e8 --- /dev/null +++ b/htmlhelper.h @@ -0,0 +1,14 @@ +#pragma once + +#include "lxb_wrapper.h" + +#include "servehelper.h" +struct Post; // forward declaration from models.h +struct Emoji; // forward declaration from models.h + +Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool pinned = false, bool main_post = false); + +std::string get_text_content(lxb_dom_node_t* child); +std::string get_text_content(blankie::html::HTMLString str); +blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str); +blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str); diff --git a/models.h b/models.h index d62abc2..b484423 100644 --- a/models.h +++ b/models.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "blankie/serializer.h" diff --git a/routes/about.cpp b/routes/about.cpp index ae0d52d..0693183 100644 --- a/routes/about.cpp +++ b/routes/about.cpp @@ -1,5 +1,6 @@ #include "routes.h" #include "../servehelper.h" +#include "../htmlhelper.h" #include "../client.h" #include "../models.h" diff --git a/routes/status.cpp b/routes/status.cpp index e6e6474..f8a6192 100644 --- a/routes/status.cpp +++ b/routes/status.cpp @@ -1,6 +1,7 @@ #include "routes.h" #include "../lxb_wrapper.h" #include "../servehelper.h" +#include "../htmlhelper.h" #include "../client.h" #include "../models.h" diff --git a/routes/tags.cpp b/routes/tags.cpp index 44162ed..4d33641 100644 --- a/routes/tags.cpp +++ b/routes/tags.cpp @@ -1,5 +1,6 @@ #include "routes.h" #include "../servehelper.h" +#include "../htmlhelper.h" #include "../client.h" #include "../models.h" diff --git a/routes/user.cpp b/routes/user.cpp index 5c111e0..a8165cb 100644 --- a/routes/user.cpp +++ b/routes/user.cpp @@ -1,5 +1,6 @@ #include "routes.h" #include "../servehelper.h" +#include "../htmlhelper.h" #include "../client.h" #include "../models.h" #include "../timeutils.h" diff --git a/servehelper.cpp b/servehelper.cpp index 3438686..e2bc89f 100644 --- a/servehelper.cpp +++ b/servehelper.cpp @@ -4,32 +4,10 @@ #include #include -#include "font_awesome.h" #include "config.h" -#include "settings.h" -#include "models.h" -#include "timeutils.h" #include "servehelper.h" -#include "lxb_wrapper.h" #include "curlu_wrapper.h" #include "routes/routes.h" -#include "blankie/escape.h" - -static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element); -static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); -static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls); -static inline void get_text_content(lxb_dom_node_t* node, std::string& out); -static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis); -static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis); - -struct PostStatus { - const char* icon_html; - Node info_node; -}; -static Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool main_post, const std::optional& post_status, const Post* reblogged = nullptr); -static inline Element serialize_media(const Media& media); -static inline Element serialize_poll(const httplib::Request& req, const Poll& poll); - void serve(const httplib::Request& req, httplib::Response& res, std::string title, Element element, Nodes extra_head) { using namespace std::string_literals; @@ -168,435 +146,3 @@ bool should_send_304(const httplib::Request& req, uint64_t hash) { size_t pos = header.find(std::string(1, '"') + std::to_string(hash) + '"'); return pos != std::string::npos && (pos == 0 || header[pos - 1] != '/'); } - -Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool pinned, bool main_post) { - using namespace std::string_literals; - - if (post.reblog) { - PostStatus post_status = { - fa_retweet, - preprocess_html(req, post.account.emojis, post.account.display_name + " boosted"), - }; - return serialize_post(req, server, *post.reblog, main_post, post_status, &post); - } else if (pinned) { - PostStatus post_status = { - fa_thumbtack, - blankie::html::HTMLString("Pinned post"), - }; - return serialize_post(req, server, post, main_post, post_status); - } else if (post.in_reply_to_id && post.in_reply_to_account_id && post.account.id == *post.in_reply_to_account_id) { - PostStatus post_status = { - fa_reply, - preprocess_html(req, post.account.emojis, "Replied to "s + post.account.display_name), - }; - return serialize_post(req, server, post, main_post, post_status); - } else { - return serialize_post(req, server, post, main_post, std::nullopt); - } -} - -std::string get_text_content(lxb_dom_node_t* child) { - std::string out; - get_text_content(child, out); - - if (!out.empty()) { - size_t remove_from = out.size(); - while (remove_from && out[remove_from - 1] == '\n') { - remove_from--; - } - // Don't engulf everything, otherwise it crashes - // https://ruby.social/@CoralineAda/109951421922797743 - if (out.size() > remove_from && remove_from != 0) { - out.erase(remove_from); - } - } - if (!out.empty()) { - size_t remove_to = 0; - while (out.size() > remove_to && out[remove_to] == '\n') { - remove_to++; - } - // Don't engulf everything, otherwise it crashes - // https://ruby.social/@CoralineAda/109951421922797743 - if (out.size() > remove_to) { - out.erase(0, remove_to); - } - } - - return out; -} - -std::string get_text_content(blankie::html::HTMLString str) { - LXB::HTML::Document document(str.str); - return get_text_content(document.body()); -} - -blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str) { - LXB::HTML::Document document(str.str); - preprocess_html(req, domain_name, emojis, document.body_element()); - return blankie::html::HTMLString(document.serialize()); -} - -blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str) { - return preprocess_html(req, "", emojis, blankie::html::HTMLString(blankie::html::escape(str))); -} - - -static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element) { - const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); - - if (strncmp(tag_name, "A", 2) == 0) { - // Proprocess links - preprocess_link(req, domain_name, element); - } - - // Walk through the element's children - lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); - while (child) { - if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) { - preprocess_html(req, domain_name, emojis, lxb_dom_interface_element(child)); - } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) { - child = emojify(child, emojis); - } - - child = lxb_dom_node_next(child); - } -} - -static std::regex mention_class_re("\\bmention\\b"); -static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) { - using namespace std::string_literals; - - // Remove target=... - lxb_status_t status = lxb_dom_element_remove_attribute(element, reinterpret_cast("target"), 6); - if (status != LXB_STATUS_OK) { - throw LXB::Exception(status); - } - - size_t href_c_len; - const lxb_char_t* href_c = lxb_dom_element_get_attribute(element, reinterpret_cast("href"), 4, &href_c_len); - if (!href_c) { - return; - } - std::string href(reinterpret_cast(href_c), href_c_len); - - size_t cls_c_len; - const lxb_char_t* cls_c = lxb_dom_element_class(element, &cls_c_len); - std::string cls = cls_c ? std::string(reinterpret_cast(cls_c), cls_c_len) : ""; - - try { - CurlUrl href_url; - href_url.set(CURLUPART_URL, get_origin(req)); - href_url.set(CURLUPART_PATH, std::string(href_url.get(CURLUPART_PATH).get()) + req.path); - href_url.set(CURLUPART_URL, href); - - CurlUrl instance_url_base; - instance_url_base.set(CURLUPART_SCHEME, "https"); - instance_url_base.set(CURLUPART_HOST, domain_name); - - // .mention is used in note and posts - // Instance base is used for link fields - if (std::regex_search(cls, mention_class_re) || starts_with(href_url, instance_url_base)) { - // Proxy this instance's URLs to Coyote - href = proxy_mastodon_url(req, std::move(href)); - - lxb_dom_element_set_attribute(element, reinterpret_cast("href"), 4, reinterpret_cast(href.data()), href.size()); - } - } catch (const CurlUrlException& e) { - // example:
on eldritch.cafe/about - if (e.code != CURLUE_MALFORMED_INPUT) { - throw; - } - } - - if (should_fix_link(element, cls)) { - // Set the content of each to its href - status = lxb_dom_node_text_content_set(lxb_dom_interface_node(element), reinterpret_cast(href.data()), href.size()); - if (status != LXB_STATUS_OK) { - throw LXB::Exception(status); - } - } -} - -static std::regex unhandled_link_re("\\bunhandled-link\\b"); -static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) { - // https://vt.social/@LucydiaLuminous/111448085044245037 - if (std::regex_search(element_cls, unhandled_link_re)) { - return true; - } - - auto expected_element = [](lxb_dom_node_t* node, const char* expected_cls) { - if (!node || node->type != LXB_DOM_NODE_TYPE_ELEMENT) { - return false; - } - lxb_dom_element_t* span = lxb_dom_interface_element(node); - - const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(span, nullptr)); - if (strncmp(tag_name, "SPAN", 5) != 0) { - return false; - } - - const lxb_char_t* cls = lxb_dom_element_get_attribute(span, reinterpret_cast("class"), 5, nullptr); - return cls && strcmp(reinterpret_cast(cls), expected_cls) == 0; - }; - - lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); - if (!expected_element(child, "invisible")) { - return false; - } - - child = lxb_dom_node_next(child); - if (!expected_element(child, "ellipsis") && !expected_element(child, "")) { - return false; - } - - child = lxb_dom_node_next(child); - if (!expected_element(child, "invisible")) { - return false; - } - - child = lxb_dom_node_next(child); - return child == nullptr; -} - -static inline void get_text_content(lxb_dom_node_t* node, std::string& out) { - bool is_br = false, is_p = false; - - if (node->type == LXB_DOM_NODE_TYPE_TEXT) { - size_t len; - const char* text = reinterpret_cast(lxb_dom_node_text_content(node, &len)); - - out.append(text, len); - } else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) { - lxb_dom_element_t* element = lxb_dom_interface_element(node); - const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); - - is_p = strncmp(tag_name, "P", 2) == 0; - is_br = strncmp(tag_name, "BR", 3) == 0; - } - - if (is_p || is_br) { - out.push_back('\n'); - } - - lxb_dom_node_t* child = lxb_dom_node_first_child(node); - while (child) { - get_text_content(child, out); - - child = lxb_dom_node_next(child); - } - - if (is_p) { - out.push_back('\n'); - } -} - -static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { - std::vector nodes = emojify(child->owner_document, get_text_content(child), emojis); - - lxb_dom_node_insert_after(child, nodes[0]); - lxb_dom_node_destroy(child); - child = nodes[0]; - - for (size_t i = 1; i < nodes.size(); i++) { - lxb_dom_node_insert_after(child, nodes[i]); - child = nodes[i]; - } - - return child; -} - -static std::regex shortcode_re(":([a-zA-Z0-9_]+):"); -static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis) { - std::string buf; - std::smatch sm; - std::vector res; - - while (std::regex_search(str, sm, shortcode_re)) { - buf += sm.prefix(); - - std::string group_0 = sm.str(0); - auto emoji = std::find_if(emojis.begin(), emojis.end(), [&](const Emoji& i) { return i.shortcode == sm.str(1); }); - if (emoji != emojis.end()) { - res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); - buf.clear(); - - lxb_dom_element_t* img = lxb_dom_element_create(document, reinterpret_cast("IMG"), 3, nullptr, 0, nullptr, 0, nullptr, 0, false); - lxb_dom_element_set_attribute(img, reinterpret_cast("class"), 5, reinterpret_cast("custom_emoji"), 12); - lxb_dom_element_set_attribute(img, reinterpret_cast("alt"), 3, reinterpret_cast(group_0.data()), group_0.size()); - lxb_dom_element_set_attribute(img, reinterpret_cast("title"), 5, reinterpret_cast(group_0.data()), group_0.size()); - lxb_dom_element_set_attribute(img, reinterpret_cast("src"), 3, reinterpret_cast(emoji->url.data()), emoji->url.size()); - res.push_back(lxb_dom_interface_node(img)); - } else { - buf += group_0; - } - - str = sm.suffix(); - } - - if (!str.empty()) { - buf += std::move(str); - } - if (!buf.empty()) { - res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); - } - - return res; -} - -static Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool main_post, const std::optional& post_status, const Post* reblogged) { - using namespace std::string_literals; - - bool user_known = !post.account.id.empty(); - bool user_ref_known = !post.account.username.empty() && !post.account.server.empty(); - // `reblogged == nullptr` since a malicious server could take down the frontend - // by sending a post that is not a reblog with no account information - std::string post_url = user_known || reblogged == nullptr - ? get_origin(req) + '/' + server + "/@" + post.account.acct(false) + '/' + post.id + "#m" - : get_origin(req) + '/' + server + "/@" + reblogged->account.acct(false) + '/' + reblogged->id + "#m"; - - std::string time_title = post.edited_at < 0 - ? full_time(post.created_at) - : "Created: "s + full_time(post.created_at) + "\nEdited: " + full_time(post.edited_at); - const char* time_badge = post.edited_at < 0 ? "" : " (edited)"; - - blankie::html::HTMLString preprocessed_html = preprocess_html(req, server, post.emojis, post.content); - // Workaround for https://vt.social/@a1ba@suya.place/110552480243348878#m - if (preprocessed_html.str.find("

") == std::string::npos) { - preprocessed_html.str.reserve(preprocessed_html.str.size() + 3 + 4); - preprocessed_html.str.insert(0, "

"); - preprocessed_html.str.append("

"); - } - Element contents("div", {{"class", "post-contents"}}, {std::move(preprocessed_html)}); - - Element post_attachments("div", {{"class", "post-attachments"}}, {}); - post_attachments.nodes.reserve(post.media_attachments.size()); - for (const Media& media : post.media_attachments) { - post_attachments.nodes.push_back(serialize_media(media)); - } - contents.nodes.push_back(std::move(post_attachments)); - - if (post.poll) { - contents.nodes.push_back(serialize_poll(req, *post.poll)); - } - - if (post.sensitive) { - std::string spoiler_text = !post.spoiler_text.empty() ? post.spoiler_text : "See more"; - contents = Element("details", { - Element("summary", {preprocess_html(req, post.emojis, std::move(spoiler_text))}), - std::move(contents), - }); - if (UserSettings(req).auto_open_cw) { - contents.attributes.push_back({"open", ""}); - } - } - - Element div("div", {{"class", "post"}}, { - Element("div", {{"class", "post-header"}}, { - user_ref_known ? Element("a", {{"href", get_origin(req) + '/' + server + "/@" + post.account.acct(false)}}, { - !post.account.avatar_static.empty() - ? Element("img", {{"class", "post-avatar"}, {"alt", "User profile picture"}, {"loading", "lazy"}, {"src", post.account.avatar_static}}, {}) - : Node(""), - Element("span", { - Element("b", {preprocess_html(req, post.account.emojis, post.account.display_name)}), - Element("br"), "@", post.account.acct(), - }), - }) : Element("b", {"Unknown user"}), - Element("a", {{"class", "post-time_header"}, {"href", std::move(post_url)}, {"title", time_title}}, { - Element("time", {{"datetime", to_rfc3339(post.created_at)}}, {relative_time(post.created_at, current_time()), time_badge}), - }), - }), - - contents, - }); - if (post_status) { - div.nodes.insert(div.nodes.begin(), Element("p", { - blankie::html::HTMLString(post_status->icon_html), " ", post_status->info_node, - })); - } - if (main_post) { - div.attributes = {{"class", "post main_post"}, {"id", "m"}}; - } - - return div; -} - -static inline Element serialize_media(const Media& media) { - Element element = [&]() { - if (media.type == "image") { - return Element("a", {{"href", media.url}}, { - Element("img", {{"loading", "lazy"}, {"src", media.preview_url.value_or(media.url)}}, {}), - }); - } else if (media.type == "video") { - Element video("video", {{"controls", ""}, {"src", media.url}}, {}); - if (media.preview_url) { - video.attributes.push_back({"poster", *media.preview_url}); - } - return video; - } else if (media.type == "audio") { - return Element("audio", {{"controls", ""}, {"src", media.url}}, {}); - } else if (media.type == "gifv") { - // https://hachyderm.io/@Impossible_PhD/111444541628207638 - Element video("video", {{"controls", ""}, {"loop", ""}, {"muted", ""}, {"autoplay", ""}, {"src", media.url}}, {}); - if (media.preview_url) { - video.attributes.push_back({"poster", *media.preview_url}); - } - return video; - } else if (media.type == "unknown") { - if (media.remote_url) { - // https://botsin.space/@lina@vt.social/111053598696451525 - return Element("a", {{"class", "unknown_media"}, {"href", *media.remote_url}}, {"Media is not available from this instance, view externally"}); - } else { - return Element("p", {{"class", "unknown_media"}}, {"Media is not available from this instance"}); - } - } else { - return Element("p", {"Unsupported media type: ", media.type}); - } - }(); - - if (media.description) { - element.attributes.push_back({"alt", *media.description}); - element.attributes.push_back({"title", *media.description}); - } - - return element; -} - -static inline Element serialize_poll(const httplib::Request& req, const Poll& poll) { - using namespace std::string_literals; - - uint64_t voters_count = poll.voters_count >= 0 ? static_cast(poll.voters_count) : poll.votes_count; - Element div("div"); - - auto pick_form = [](uint64_t count, const char* singular, const char* plural) { - return count == 1 ? singular : plural; - }; - - div.nodes.reserve(poll.options.size() + 1); - for (const PollOption& option : poll.options) { - std::string percentage = voters_count - ? std::to_string(option.votes_count * 100 / voters_count) + '%' - : "0%"; - - div.nodes.push_back(Element("div", {{"class", "poll-option"}, {"title", std::to_string(option.votes_count) + pick_form(option.votes_count, " vote", " votes")}}, { - Element("b", {{"class", "poll-percentage"}}, {percentage}), " ", preprocess_html(req, poll.emojis, option.title), - Element("object", {{"class", "poll-bar"}, {"width", percentage}}, {}), - })); - } - - Element p("p", poll.voters_count >= 0 - ? std::vector({std::to_string(voters_count), " ", pick_form(voters_count, "voter", "voters")}) - : std::vector({std::to_string(poll.votes_count), " ", pick_form(poll.votes_count, "vote", "votes")}) - ); - if (poll.expired) { - p.nodes.push_back(" / "); - p.nodes.push_back(Element("time", {{"datetime", to_rfc3339(poll.expires_at)}, {"title", "Expired on "s + full_time(poll.expires_at)}}, {"Expired"})); - } else if (poll.expires_at >= 0) { - p.nodes.push_back(" / "); - p.nodes.push_back(Element("time", {{"datetime", to_rfc3339(poll.expires_at)}, {"title", full_time(poll.expires_at)}}, { - "Expires in ", relative_time(current_time(), poll.expires_at), - })); - } - div.nodes.push_back(std::move(p)); - - return div; -} diff --git a/servehelper.h b/servehelper.h index a1a1e2e..4cd9774 100644 --- a/servehelper.h +++ b/servehelper.h @@ -4,9 +4,6 @@ #include #include "blankie/serializer.h" -#include "lxb_wrapper.h" -struct Post; // forward declaration from models.h -struct Emoji; // forward declaration from models.h class CurlUrl; // forward declaration from curlu_wrapper.h using Element = blankie::html::Element; @@ -22,10 +19,3 @@ bool starts_with(const CurlUrl& url, const CurlUrl& base); std::string get_origin(const httplib::Request& req); std::string proxy_mastodon_url(const httplib::Request& req, const std::string& url_str); bool should_send_304(const httplib::Request& req, uint64_t hash); - -Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool pinned = false, bool main_post = false); - -std::string get_text_content(lxb_dom_node_t* child); -std::string get_text_content(blankie::html::HTMLString str); -blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str); -blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str);