#include "models.h" #include "settings.h" #include "timeutils.h" #include "curlu_wrapper.h" #include "font_awesome.h" #include "blankie/escape.h" #include "htmlhelper.h" static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element); static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); static inline void preprocess_blockquote(lxb_dom_element_t* element); static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls); static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth); static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis); static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis); struct PostStatus { const char* icon_html; Node info_node; }; static Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool main_post, const std::optional& post_status, const Post* reblogged = nullptr); static inline Element serialize_media(const Media& media); static inline Element serialize_poll(const httplib::Request& req, const Poll& poll); Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool pinned, bool main_post) { using namespace std::string_literals; if (post.reblog) { PostStatus post_status = { fa_retweet, preprocess_html(req, post.account.emojis, post.account.display_name + " boosted"), }; return serialize_post(req, server, *post.reblog, main_post, post_status, &post); } else if (pinned) { PostStatus post_status = { fa_thumbtack, blankie::html::HTMLString("Pinned post"), }; return serialize_post(req, server, post, main_post, post_status); } else if (post.in_reply_to_id && post.in_reply_to_account_id && post.account.id == *post.in_reply_to_account_id) { PostStatus post_status = { fa_reply, preprocess_html(req, post.account.emojis, "Replied to "s + post.account.display_name), }; return serialize_post(req, server, post, main_post, post_status); } else { return serialize_post(req, server, post, main_post, std::nullopt); } } std::string get_text_content(lxb_dom_node_t* child) { std::string out; get_text_content(child, out, 0); if (!out.empty()) { size_t remove_from = out.size(); while (remove_from && out[remove_from - 1] == '\n') { remove_from--; } if (out.size() > remove_from) { out.erase(remove_from); } } if (!out.empty()) { size_t remove_to = 0; while (out.size() > remove_to && out[remove_to] == '\n') { remove_to++; } out.erase(0, remove_to); } return out; } std::string get_text_content(blankie::html::HTMLString str) { LXB::HTML::Document document(str.str); return get_text_content(document.body()); } blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str) { LXB::HTML::Document document(str.str); preprocess_html(req, domain_name, emojis, document.body_element()); return blankie::html::HTMLString(document.serialize()); } blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str) { return preprocess_html(req, "", emojis, blankie::html::HTMLString(blankie::html::escape(str))); } static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element) { const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); if (strncmp(tag_name, "A", 2) == 0) { // Proprocess links preprocess_link(req, domain_name, element); } else if (strncmp(tag_name, "BLOCKQUOTE", 11) == 0) { // Prepend "> " to
preprocess_blockquote(element); } // Walk through the element's children lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); while (child) { if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) { preprocess_html(req, domain_name, emojis, lxb_dom_interface_element(child)); } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) { child = emojify(child, emojis); } child = lxb_dom_node_next(child); } } // examples of mention and hashtag: https://anarres.family/@ashten@social.platypus-sandbox.com/111532064387673301 static std::regex proxy_class_re("\\b(mention|hashtag)\\b"); static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element) { using namespace std::string_literals; // Remove target=... lxb_status_t status = lxb_dom_element_remove_attribute(element, reinterpret_cast("target"), 6); if (status != LXB_STATUS_OK) { throw LXB::Exception(status); } size_t href_c_len; const lxb_char_t* href_c = lxb_dom_element_get_attribute(element, reinterpret_cast("href"), 4, &href_c_len); if (!href_c) { return; } std::string href(reinterpret_cast(href_c), href_c_len); size_t cls_c_len; const lxb_char_t* cls_c = lxb_dom_element_class(element, &cls_c_len); std::string cls = cls_c ? std::string(reinterpret_cast(cls_c), cls_c_len) : ""; try { CurlUrl href_url; href_url.set(CURLUPART_URL, get_origin(req)); href_url.set(CURLUPART_PATH, std::string(href_url.get(CURLUPART_PATH).get()) + req.path); href_url.set(CURLUPART_URL, href, CURLU_NON_SUPPORT_SCHEME); CurlUrl instance_url_base; instance_url_base.set(CURLUPART_SCHEME, "https"); instance_url_base.set(CURLUPART_HOST, domain_name); // Instance base is used for link fields if (std::regex_search(cls, proxy_class_re) || starts_with(href_url, instance_url_base)) { // Proxy this instance's URLs to Coyote href = proxy_mastodon_url(req, std::move(href)); lxb_dom_element_set_attribute(element, reinterpret_cast("href"), 4, reinterpret_cast(href.data()), href.size()); } } catch (const CurlUrlException& e) { // example: on eldritch.cafe/about if (e.code != CURLUE_MALFORMED_INPUT) { throw; } } if (should_fix_link(element, cls)) { // Set the content of each to its href status = lxb_dom_node_text_content_set(lxb_dom_interface_node(element), reinterpret_cast(href.data()), href.size()); if (status != LXB_STATUS_OK) { throw LXB::Exception(status); } } } // https://tech.lgbt/@mia@void.rehab/111500676785694526 static inline void preprocess_blockquote(lxb_dom_element_t* element) { using namespace std::string_literals; lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); while (child) { if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) { preprocess_blockquote(lxb_dom_interface_element(child)); } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) { size_t len; const char* text = reinterpret_cast(lxb_dom_node_text_content(child, &len)); std::string new_text = "> "s + std::string(text, len); lxb_dom_node_t* new_child = lxb_dom_interface_node(lxb_dom_document_create_text_node(child->owner_document, reinterpret_cast(new_text.data()), new_text.size())); lxb_dom_node_insert_after(child, new_child); lxb_dom_node_remove(child); child = new_child; } child = lxb_dom_node_next(child); } } static std::regex unhandled_link_re("\\bunhandled-link\\b"); static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) { // https://vt.social/@LucydiaLuminous/111448085044245037 if (std::regex_search(element_cls, unhandled_link_re)) { return true; } auto expected_element = [](lxb_dom_node_t* node, const char* expected_cls) { if (!node || node->type != LXB_DOM_NODE_TYPE_ELEMENT) { return false; } lxb_dom_element_t* span = lxb_dom_interface_element(node); const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(span, nullptr)); if (strncmp(tag_name, "SPAN", 5) != 0) { return false; } const lxb_char_t* cls = lxb_dom_element_get_attribute(span, reinterpret_cast("class"), 5, nullptr); return cls && strcmp(reinterpret_cast(cls), expected_cls) == 0; }; lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); if (!expected_element(child, "invisible")) { return false; } child = lxb_dom_node_next(child); if (!expected_element(child, "ellipsis") && !expected_element(child, "")) { return false; } child = lxb_dom_node_next(child); if (!expected_element(child, "invisible")) { return false; } child = lxb_dom_node_next(child); return child == nullptr; } static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth) { bool is_br = false, is_p = false, is_blockquote = false; if (node->type == LXB_DOM_NODE_TYPE_TEXT) { size_t len; const char* text = reinterpret_cast(lxb_dom_node_text_content(node, &len)); for (size_t i = 0; i < blockquote_depth; i++) { out += "> "; } out.append(text, len); } else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) { lxb_dom_element_t* element = lxb_dom_interface_element(node); const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); is_p = strncmp(tag_name, "P", 2) == 0; is_br = strncmp(tag_name, "BR", 3) == 0; is_blockquote = strncmp(tag_name, "BLOCKQUOTE", 11) == 0; } if (is_p || is_br || is_blockquote) { out.push_back('\n'); } lxb_dom_node_t* child = lxb_dom_node_first_child(node); while (child) { get_text_content(child, out, blockquote_depth + is_blockquote); child = lxb_dom_node_next(child); } if (is_p || is_blockquote) { out.push_back('\n'); } } static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { size_t len; const char* text = reinterpret_cast(lxb_dom_node_text_content(child, &len)); std::vector nodes = emojify(child->owner_document, std::string(text, len), emojis); lxb_dom_node_insert_after(child, nodes[0]); lxb_dom_node_destroy(child); child = nodes[0]; for (size_t i = 1; i < nodes.size(); i++) { lxb_dom_node_insert_after(child, nodes[i]); child = nodes[i]; } return child; } static std::regex shortcode_re(":([a-zA-Z0-9_]+):"); static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis) { std::string buf; std::smatch sm; std::vector res; while (std::regex_search(str, sm, shortcode_re)) { buf += sm.prefix(); std::string group_0 = sm.str(0); auto emoji = std::find_if(emojis.begin(), emojis.end(), [&](const Emoji& i) { return i.shortcode == sm.str(1); }); if (emoji != emojis.end()) { res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); buf.clear(); lxb_dom_element_t* img = lxb_dom_document_create_element(document, reinterpret_cast("IMG"), 3, nullptr); lxb_dom_element_set_attribute(img, reinterpret_cast("class"), 5, reinterpret_cast("custom_emoji"), 12); lxb_dom_element_set_attribute(img, reinterpret_cast("alt"), 3, reinterpret_cast(group_0.data()), group_0.size()); lxb_dom_element_set_attribute(img, reinterpret_cast("title"), 5, reinterpret_cast(group_0.data()), group_0.size()); lxb_dom_element_set_attribute(img, reinterpret_cast("src"), 3, reinterpret_cast(emoji->url.data()), emoji->url.size()); res.push_back(lxb_dom_interface_node(img)); } else { buf += group_0; } str = sm.suffix(); } if (!str.empty()) { buf += std::move(str); } if (!buf.empty()) { res.push_back(lxb_dom_interface_node(lxb_dom_document_create_text_node(document, reinterpret_cast(buf.data()), buf.size()))); } return res; } static Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool main_post, const std::optional& post_status, const Post* reblogged) { using namespace std::string_literals; bool user_known = !post.account.id.empty(); bool user_ref_known = !post.account.username.empty() && !post.account.server.empty(); // `reblogged == nullptr` since a malicious server could take down the frontend // by sending a post that is not a reblog with no account information std::string post_url = user_known || reblogged == nullptr ? get_origin(req) + '/' + server + "/@" + post.account.acct(false) + '/' + post.id + "#m" : get_origin(req) + '/' + server + "/@" + reblogged->account.acct(false) + '/' + reblogged->id + "#m"; std::string time_title = post.edited_at < 0 ? full_time(post.created_at) : "Created: "s + full_time(post.created_at) + "\nEdited: " + full_time(post.edited_at); const char* time_badge = post.edited_at < 0 ? "" : " (edited)"; blankie::html::HTMLString preprocessed_html = preprocess_html(req, server, post.emojis, post.content); // Workaround for https://vt.social/@a1ba@suya.place/110552480243348878#m if (preprocessed_html.str.find("

") == std::string::npos) { size_t offset = 0; while ((offset = preprocessed_html.str.find('\n', offset)) != std::string::npos) { preprocessed_html.str.replace(offset, 1, "
"); offset += 4; } preprocessed_html.str.reserve(preprocessed_html.str.size() + 3 + 4); preprocessed_html.str.insert(0, "

"); preprocessed_html.str.append("

"); } Element contents("div", {{"class", "post-contents"}}, {std::move(preprocessed_html)}); Element post_attachments("div", {{"class", "post-attachments"}}, {}); post_attachments.nodes.reserve(post.media_attachments.size()); for (const Media& media : post.media_attachments) { post_attachments.nodes.push_back(serialize_media(media)); } contents.nodes.push_back(std::move(post_attachments)); if (post.poll) { contents.nodes.push_back(serialize_poll(req, *post.poll)); } if (post.sensitive) { std::string spoiler_text = !post.spoiler_text.empty() ? post.spoiler_text : "See more"; contents = Element("details", { Element("summary", {preprocess_html(req, post.emojis, std::move(spoiler_text))}), std::move(contents), }); if (UserSettings(req).auto_open_cw) { contents.attributes.push_back({"open", ""}); } } Element div("div", {{"class", "post"}}, { Element("div", {{"class", "post-header"}}, { user_ref_known ? Element("a", {{"href", get_origin(req) + '/' + server + "/@" + post.account.acct(false)}}, { !post.account.avatar_static.empty() ? Element("img", {{"class", "post-avatar"}, {"alt", "User profile picture"}, {"loading", "lazy"}, {"src", post.account.avatar_static}}, {}) : Node(""), Element("span", { Element("b", {preprocess_html(req, post.account.emojis, post.account.display_name)}), Element("br"), "@", post.account.acct(), }), }) : Element("b", {"Unknown user"}), Element("a", {{"class", "post-time_header"}, {"href", std::move(post_url)}, {"title", time_title}}, { Element("time", {{"datetime", to_rfc3339(post.created_at)}}, {relative_time(post.created_at, current_time()), time_badge}), }), }), contents, }); if (post_status) { div.nodes.insert(div.nodes.begin(), Element("p", { blankie::html::HTMLString(post_status->icon_html), " ", post_status->info_node, })); } if (main_post) { div.attributes = {{"class", "post main_post"}, {"id", "m"}}; } return div; } static inline Element serialize_media(const Media& media) { Element element = [&]() { if (media.type == "image") { return Element("a", {{"href", media.url}}, { Element("img", {{"loading", "lazy"}, {"src", media.preview_url.value_or(media.url)}}, {}), }); } else if (media.type == "video") { Element video("video", {{"controls", ""}, {"src", media.url}}, {}); if (media.preview_url) { video.attributes.push_back({"poster", *media.preview_url}); } return video; } else if (media.type == "audio") { return Element("audio", {{"controls", ""}, {"src", media.url}}, {}); } else if (media.type == "gifv") { // https://hachyderm.io/@Impossible_PhD/111444541628207638 Element video("video", {{"controls", ""}, {"loop", ""}, {"muted", ""}, {"autoplay", ""}, {"src", media.url}}, {}); if (media.preview_url) { video.attributes.push_back({"poster", *media.preview_url}); } return video; } else if (media.type == "unknown") { if (media.remote_url) { // https://botsin.space/@lina@vt.social/111053598696451525 return Element("a", {{"class", "unknown_media"}, {"href", *media.remote_url}}, {"Media is not available from this instance, view externally"}); } else { return Element("p", {{"class", "unknown_media"}}, {"Media is not available from this instance"}); } } else { return Element("p", {"Unsupported media type: ", media.type}); } }(); if (media.description) { element.attributes.push_back({"alt", *media.description}); element.attributes.push_back({"title", *media.description}); } return element; } static inline Element serialize_poll(const httplib::Request& req, const Poll& poll) { using namespace std::string_literals; uint64_t voters_count = poll.voters_count >= 0 ? static_cast(poll.voters_count) : poll.votes_count; Element div("div"); auto pick_form = [](uint64_t count, const char* singular, const char* plural) { return count == 1 ? singular : plural; }; div.nodes.reserve(poll.options.size() + 1); for (const PollOption& option : poll.options) { std::string percentage = voters_count ? std::to_string(option.votes_count * 100 / voters_count) + '%' : "0%"; div.nodes.push_back(Element("div", {{"class", "poll-option"}, {"title", std::to_string(option.votes_count) + pick_form(option.votes_count, " vote", " votes")}}, { Element("b", {{"class", "poll-percentage"}}, {percentage}), " ", preprocess_html(req, poll.emojis, option.title), Element("object", {{"class", "poll-bar"}, {"width", percentage}}, {}), })); } Element p("p", poll.voters_count >= 0 ? std::vector({std::to_string(voters_count), " ", pick_form(voters_count, "voter", "voters")}) : std::vector({std::to_string(poll.votes_count), " ", pick_form(poll.votes_count, "vote", "votes")}) ); if (poll.expired) { p.nodes.push_back(" / "); p.nodes.push_back(Element("time", {{"datetime", to_rfc3339(poll.expires_at)}, {"title", "Expired on "s + full_time(poll.expires_at)}}, {"Expired"})); } else if (poll.expires_at >= 0) { p.nodes.push_back(" / "); p.nodes.push_back(Element("time", {{"datetime", to_rfc3339(poll.expires_at)}, {"title", full_time(poll.expires_at)}}, { "Expires in ", relative_time(current_time(), poll.expires_at), })); } div.nodes.push_back(std::move(p)); return div; }