Prepend "> " to <blockquote> elements

https://tech.lgbt/@mia@void.rehab/111500676785694526
This commit is contained in:
blankie 2023-12-11 11:30:01 +11:00
parent 8088a8aff9
commit 189019d133
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
1 changed files with 43 additions and 16 deletions

View File

@ -9,8 +9,9 @@
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector<Emoji>& emojis, lxb_dom_element_t* element); static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector<Emoji>& emojis, lxb_dom_element_t* element);
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element);
static inline void preprocess_blockquote(lxb_dom_element_t* element);
static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls); static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls);
static inline void get_text_content(lxb_dom_node_t* node, std::string& out); static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth);
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis); static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis);
static inline std::vector<lxb_dom_node*> emojify(lxb_dom_document_t* document, std::string str, const std::vector<Emoji>& emojis); static inline std::vector<lxb_dom_node*> emojify(lxb_dom_document_t* document, std::string str, const std::vector<Emoji>& emojis);
@ -51,16 +52,14 @@ Element serialize_post(const httplib::Request& req, const std::string& server, c
std::string get_text_content(lxb_dom_node_t* child) { std::string get_text_content(lxb_dom_node_t* child) {
std::string out; std::string out;
get_text_content(child, out); get_text_content(child, out, 0);
if (!out.empty()) { if (!out.empty()) {
size_t remove_from = out.size(); size_t remove_from = out.size();
while (remove_from && out[remove_from - 1] == '\n') { while (remove_from && out[remove_from - 1] == '\n') {
remove_from--; remove_from--;
} }
// Don't engulf everything, otherwise it crashes if (out.size() > remove_from) {
// https://ruby.social/@CoralineAda/109951421922797743
if (out.size() > remove_from && remove_from != 0) {
out.erase(remove_from); out.erase(remove_from);
} }
} }
@ -69,12 +68,8 @@ std::string get_text_content(lxb_dom_node_t* child) {
while (out.size() > remove_to && out[remove_to] == '\n') { while (out.size() > remove_to && out[remove_to] == '\n') {
remove_to++; remove_to++;
} }
// Don't engulf everything, otherwise it crashes
// https://ruby.social/@CoralineAda/109951421922797743
if (out.size() > remove_to) {
out.erase(0, remove_to); out.erase(0, remove_to);
} }
}
return out; return out;
} }
@ -101,6 +96,9 @@ static inline void preprocess_html(const httplib::Request& req, const std::strin
if (strncmp(tag_name, "A", 2) == 0) { if (strncmp(tag_name, "A", 2) == 0) {
// Proprocess links // Proprocess links
preprocess_link(req, domain_name, element); preprocess_link(req, domain_name, element);
} else if (strncmp(tag_name, "BLOCKQUOTE", 11) == 0) {
// Prepend "> " to <blockquote>
preprocess_blockquote(element);
} }
// Walk through the element's children // Walk through the element's children
@ -171,6 +169,29 @@ static inline void preprocess_link(const httplib::Request& req, const std::strin
} }
} }
// https://tech.lgbt/@mia@void.rehab/111500676785694526
static inline void preprocess_blockquote(lxb_dom_element_t* element) {
using namespace std::string_literals;
lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
while (child) {
if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
preprocess_blockquote(lxb_dom_interface_element(child));
} else if (child->type == LXB_DOM_NODE_TYPE_TEXT) {
size_t len;
const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(child, &len));
std::string new_text = "> "s + std::string(text, len);
lxb_dom_node_t* new_child = lxb_dom_interface_node(lxb_dom_document_create_text_node(child->owner_document, reinterpret_cast<const lxb_char_t*>(new_text.data()), new_text.size()));
lxb_dom_node_insert_after(child, new_child);
lxb_dom_node_remove(child);
child = new_child;
}
child = lxb_dom_node_next(child);
}
}
static std::regex unhandled_link_re("\\bunhandled-link\\b"); static std::regex unhandled_link_re("\\bunhandled-link\\b");
static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) { static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) {
// https://vt.social/@LucydiaLuminous/111448085044245037 // https://vt.social/@LucydiaLuminous/111448085044245037
@ -212,13 +233,16 @@ static inline bool should_fix_link(lxb_dom_element_t* element, const std::string
return child == nullptr; return child == nullptr;
} }
static inline void get_text_content(lxb_dom_node_t* node, std::string& out) { static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth) {
bool is_br = false, is_p = false; bool is_br = false, is_p = false, is_blockquote = false;
if (node->type == LXB_DOM_NODE_TYPE_TEXT) { if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
size_t len; size_t len;
const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(node, &len)); const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(node, &len));
for (size_t i = 0; i < blockquote_depth; i++) {
out += "> ";
}
out.append(text, len); out.append(text, len);
} else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) { } else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
lxb_dom_element_t* element = lxb_dom_interface_element(node); lxb_dom_element_t* element = lxb_dom_interface_element(node);
@ -226,26 +250,29 @@ static inline void get_text_content(lxb_dom_node_t* node, std::string& out) {
is_p = strncmp(tag_name, "P", 2) == 0; is_p = strncmp(tag_name, "P", 2) == 0;
is_br = strncmp(tag_name, "BR", 3) == 0; is_br = strncmp(tag_name, "BR", 3) == 0;
is_blockquote = strncmp(tag_name, "BLOCKQUOTE", 11) == 0;
} }
if (is_p || is_br) { if (is_p || is_br || is_blockquote) {
out.push_back('\n'); out.push_back('\n');
} }
lxb_dom_node_t* child = lxb_dom_node_first_child(node); lxb_dom_node_t* child = lxb_dom_node_first_child(node);
while (child) { while (child) {
get_text_content(child, out); get_text_content(child, out, blockquote_depth + is_blockquote);
child = lxb_dom_node_next(child); child = lxb_dom_node_next(child);
} }
if (is_p) { if (is_p || is_blockquote) {
out.push_back('\n'); out.push_back('\n');
} }
} }
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis) { static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis) {
std::vector<lxb_dom_node_t*> nodes = emojify(child->owner_document, get_text_content(child), emojis); size_t len;
const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(child, &len));
std::vector<lxb_dom_node_t*> nodes = emojify(child->owner_document, std::string(text, len), emojis);
lxb_dom_node_insert_after(child, nodes[0]); lxb_dom_node_insert_after(child, nodes[0]);
lxb_dom_node_destroy(child); lxb_dom_node_destroy(child);