diff --git a/htmlhelper.cpp b/htmlhelper.cpp index 5dd7ca5..7ac2123 100644 --- a/htmlhelper.cpp +++ b/htmlhelper.cpp @@ -9,8 +9,9 @@ static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element); static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); +static inline void preprocess_blockquote(lxb_dom_element_t* element); static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls); -static inline void get_text_content(lxb_dom_node_t* node, std::string& out); +static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth); static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis); static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis); @@ -51,16 +52,14 @@ Element serialize_post(const httplib::Request& req, const std::string& server, c std::string get_text_content(lxb_dom_node_t* child) { std::string out; - get_text_content(child, out); + get_text_content(child, out, 0); if (!out.empty()) { size_t remove_from = out.size(); while (remove_from && out[remove_from - 1] == '\n') { remove_from--; } - // Don't engulf everything, otherwise it crashes - // https://ruby.social/@CoralineAda/109951421922797743 - if (out.size() > remove_from && remove_from != 0) { + if (out.size() > remove_from) { out.erase(remove_from); } } @@ -69,11 +68,7 @@ std::string get_text_content(lxb_dom_node_t* child) { while (out.size() > remove_to && out[remove_to] == '\n') { remove_to++; } - // Don't engulf everything, otherwise it crashes - // https://ruby.social/@CoralineAda/109951421922797743 - if (out.size() > remove_to) { - out.erase(0, remove_to); - } + out.erase(0, remove_to); } return out; @@ -101,6 +96,9 @@ static inline void preprocess_html(const httplib::Request& req, const std::strin if (strncmp(tag_name, "A", 2) == 0) { // Proprocess links preprocess_link(req, domain_name, element); + } else if (strncmp(tag_name, "BLOCKQUOTE", 11) == 0) { + // Prepend "> " to
+ preprocess_blockquote(element); } // Walk through the element's children @@ -171,6 +169,29 @@ static inline void preprocess_link(const httplib::Request& req, const std::strin } } +// https://tech.lgbt/@mia@void.rehab/111500676785694526 +static inline void preprocess_blockquote(lxb_dom_element_t* element) { + using namespace std::string_literals; + + lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element)); + while (child) { + if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) { + preprocess_blockquote(lxb_dom_interface_element(child)); + } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) { + size_t len; + const char* text = reinterpret_cast(lxb_dom_node_text_content(child, &len)); + std::string new_text = "> "s + std::string(text, len); + + lxb_dom_node_t* new_child = lxb_dom_interface_node(lxb_dom_document_create_text_node(child->owner_document, reinterpret_cast(new_text.data()), new_text.size())); + lxb_dom_node_insert_after(child, new_child); + lxb_dom_node_remove(child); + child = new_child; + } + + child = lxb_dom_node_next(child); + } +} + static std::regex unhandled_link_re("\\bunhandled-link\\b"); static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) { // https://vt.social/@LucydiaLuminous/111448085044245037 @@ -212,13 +233,16 @@ static inline bool should_fix_link(lxb_dom_element_t* element, const std::string return child == nullptr; } -static inline void get_text_content(lxb_dom_node_t* node, std::string& out) { - bool is_br = false, is_p = false; +static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth) { + bool is_br = false, is_p = false, is_blockquote = false; if (node->type == LXB_DOM_NODE_TYPE_TEXT) { size_t len; const char* text = reinterpret_cast(lxb_dom_node_text_content(node, &len)); + for (size_t i = 0; i < blockquote_depth; i++) { + out += "> "; + } out.append(text, len); } else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) { lxb_dom_element_t* element = lxb_dom_interface_element(node); @@ -226,26 +250,29 @@ static inline void get_text_content(lxb_dom_node_t* node, std::string& out) { is_p = strncmp(tag_name, "P", 2) == 0; is_br = strncmp(tag_name, "BR", 3) == 0; + is_blockquote = strncmp(tag_name, "BLOCKQUOTE", 11) == 0; } - if (is_p || is_br) { + if (is_p || is_br || is_blockquote) { out.push_back('\n'); } lxb_dom_node_t* child = lxb_dom_node_first_child(node); while (child) { - get_text_content(child, out); + get_text_content(child, out, blockquote_depth + is_blockquote); child = lxb_dom_node_next(child); } - if (is_p) { + if (is_p || is_blockquote) { out.push_back('\n'); } } static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { - std::vector nodes = emojify(child->owner_document, get_text_content(child), emojis); + size_t len; + const char* text = reinterpret_cast(lxb_dom_node_text_content(child, &len)); + std::vector nodes = emojify(child->owner_document, std::string(text, len), emojis); lxb_dom_node_insert_after(child, nodes[0]); lxb_dom_node_destroy(child);