diff --git a/htmlhelper.cpp b/htmlhelper.cpp
index 5dd7ca5..7ac2123 100644
--- a/htmlhelper.cpp
+++ b/htmlhelper.cpp
@@ -9,8 +9,9 @@
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element);
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element);
+static inline void preprocess_blockquote(lxb_dom_element_t* element);
static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls);
-static inline void get_text_content(lxb_dom_node_t* node, std::string& out);
+static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth);
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis);
static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis);
@@ -51,16 +52,14 @@ Element serialize_post(const httplib::Request& req, const std::string& server, c
std::string get_text_content(lxb_dom_node_t* child) {
std::string out;
- get_text_content(child, out);
+ get_text_content(child, out, 0);
if (!out.empty()) {
size_t remove_from = out.size();
while (remove_from && out[remove_from - 1] == '\n') {
remove_from--;
}
- // Don't engulf everything, otherwise it crashes
- // https://ruby.social/@CoralineAda/109951421922797743
- if (out.size() > remove_from && remove_from != 0) {
+ if (out.size() > remove_from) {
out.erase(remove_from);
}
}
@@ -69,11 +68,7 @@ std::string get_text_content(lxb_dom_node_t* child) {
while (out.size() > remove_to && out[remove_to] == '\n') {
remove_to++;
}
- // Don't engulf everything, otherwise it crashes
- // https://ruby.social/@CoralineAda/109951421922797743
- if (out.size() > remove_to) {
- out.erase(0, remove_to);
- }
+ out.erase(0, remove_to);
}
return out;
@@ -101,6 +96,9 @@ static inline void preprocess_html(const httplib::Request& req, const std::strin
if (strncmp(tag_name, "A", 2) == 0) {
// Proprocess links
preprocess_link(req, domain_name, element);
+ } else if (strncmp(tag_name, "BLOCKQUOTE", 11) == 0) {
+ // Prepend "> " to
+ preprocess_blockquote(element);
}
// Walk through the element's children
@@ -171,6 +169,29 @@ static inline void preprocess_link(const httplib::Request& req, const std::strin
}
}
+// https://tech.lgbt/@mia@void.rehab/111500676785694526
+static inline void preprocess_blockquote(lxb_dom_element_t* element) {
+ using namespace std::string_literals;
+
+ lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
+ while (child) {
+ if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
+ preprocess_blockquote(lxb_dom_interface_element(child));
+ } else if (child->type == LXB_DOM_NODE_TYPE_TEXT) {
+ size_t len;
+ const char* text = reinterpret_cast(lxb_dom_node_text_content(child, &len));
+ std::string new_text = "> "s + std::string(text, len);
+
+ lxb_dom_node_t* new_child = lxb_dom_interface_node(lxb_dom_document_create_text_node(child->owner_document, reinterpret_cast(new_text.data()), new_text.size()));
+ lxb_dom_node_insert_after(child, new_child);
+ lxb_dom_node_remove(child);
+ child = new_child;
+ }
+
+ child = lxb_dom_node_next(child);
+ }
+}
+
static std::regex unhandled_link_re("\\bunhandled-link\\b");
static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) {
// https://vt.social/@LucydiaLuminous/111448085044245037
@@ -212,13 +233,16 @@ static inline bool should_fix_link(lxb_dom_element_t* element, const std::string
return child == nullptr;
}
-static inline void get_text_content(lxb_dom_node_t* node, std::string& out) {
- bool is_br = false, is_p = false;
+static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth) {
+ bool is_br = false, is_p = false, is_blockquote = false;
if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
size_t len;
const char* text = reinterpret_cast(lxb_dom_node_text_content(node, &len));
+ for (size_t i = 0; i < blockquote_depth; i++) {
+ out += "> ";
+ }
out.append(text, len);
} else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
lxb_dom_element_t* element = lxb_dom_interface_element(node);
@@ -226,26 +250,29 @@ static inline void get_text_content(lxb_dom_node_t* node, std::string& out) {
is_p = strncmp(tag_name, "P", 2) == 0;
is_br = strncmp(tag_name, "BR", 3) == 0;
+ is_blockquote = strncmp(tag_name, "BLOCKQUOTE", 11) == 0;
}
- if (is_p || is_br) {
+ if (is_p || is_br || is_blockquote) {
out.push_back('\n');
}
lxb_dom_node_t* child = lxb_dom_node_first_child(node);
while (child) {
- get_text_content(child, out);
+ get_text_content(child, out, blockquote_depth + is_blockquote);
child = lxb_dom_node_next(child);
}
- if (is_p) {
+ if (is_p || is_blockquote) {
out.push_back('\n');
}
}
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) {
- std::vector nodes = emojify(child->owner_document, get_text_content(child), emojis);
+ size_t len;
+ const char* text = reinterpret_cast(lxb_dom_node_text_content(child, &len));
+ std::vector nodes = emojify(child->owner_document, std::string(text, len), emojis);
lxb_dom_node_insert_after(child, nodes[0]);
lxb_dom_node_destroy(child);