Prepend "> " to <blockquote> elements

https://tech.lgbt/@mia@void.rehab/111500676785694526
This commit is contained in:
blankie 2023-12-11 11:30:01 +11:00
parent 8088a8aff9
commit 189019d133
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
1 changed files with 43 additions and 16 deletions

View File

@ -9,8 +9,9 @@
static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector<Emoji>& emojis, lxb_dom_element_t* element);
static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element);
static inline void preprocess_blockquote(lxb_dom_element_t* element);
static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls);
static inline void get_text_content(lxb_dom_node_t* node, std::string& out);
static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth);
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis);
static inline std::vector<lxb_dom_node*> emojify(lxb_dom_document_t* document, std::string str, const std::vector<Emoji>& emojis);
@ -51,16 +52,14 @@ Element serialize_post(const httplib::Request& req, const std::string& server, c
std::string get_text_content(lxb_dom_node_t* child) {
std::string out;
get_text_content(child, out);
get_text_content(child, out, 0);
if (!out.empty()) {
size_t remove_from = out.size();
while (remove_from && out[remove_from - 1] == '\n') {
remove_from--;
}
// Don't engulf everything, otherwise it crashes
// https://ruby.social/@CoralineAda/109951421922797743
if (out.size() > remove_from && remove_from != 0) {
if (out.size() > remove_from) {
out.erase(remove_from);
}
}
@ -69,11 +68,7 @@ std::string get_text_content(lxb_dom_node_t* child) {
while (out.size() > remove_to && out[remove_to] == '\n') {
remove_to++;
}
// Don't engulf everything, otherwise it crashes
// https://ruby.social/@CoralineAda/109951421922797743
if (out.size() > remove_to) {
out.erase(0, remove_to);
}
out.erase(0, remove_to);
}
return out;
@ -101,6 +96,9 @@ static inline void preprocess_html(const httplib::Request& req, const std::strin
if (strncmp(tag_name, "A", 2) == 0) {
// Proprocess links
preprocess_link(req, domain_name, element);
} else if (strncmp(tag_name, "BLOCKQUOTE", 11) == 0) {
// Prepend "> " to <blockquote>
preprocess_blockquote(element);
}
// Walk through the element's children
@ -171,6 +169,29 @@ static inline void preprocess_link(const httplib::Request& req, const std::strin
}
}
// https://tech.lgbt/@mia@void.rehab/111500676785694526
static inline void preprocess_blockquote(lxb_dom_element_t* element) {
using namespace std::string_literals;
lxb_dom_node_t* child = lxb_dom_node_first_child(lxb_dom_interface_node(element));
while (child) {
if (child->type == LXB_DOM_NODE_TYPE_ELEMENT) {
preprocess_blockquote(lxb_dom_interface_element(child));
} else if (child->type == LXB_DOM_NODE_TYPE_TEXT) {
size_t len;
const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(child, &len));
std::string new_text = "> "s + std::string(text, len);
lxb_dom_node_t* new_child = lxb_dom_interface_node(lxb_dom_document_create_text_node(child->owner_document, reinterpret_cast<const lxb_char_t*>(new_text.data()), new_text.size()));
lxb_dom_node_insert_after(child, new_child);
lxb_dom_node_remove(child);
child = new_child;
}
child = lxb_dom_node_next(child);
}
}
static std::regex unhandled_link_re("\\bunhandled-link\\b");
static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls) {
// https://vt.social/@LucydiaLuminous/111448085044245037
@ -212,13 +233,16 @@ static inline bool should_fix_link(lxb_dom_element_t* element, const std::string
return child == nullptr;
}
static inline void get_text_content(lxb_dom_node_t* node, std::string& out) {
bool is_br = false, is_p = false;
static inline void get_text_content(lxb_dom_node_t* node, std::string& out, size_t blockquote_depth) {
bool is_br = false, is_p = false, is_blockquote = false;
if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
size_t len;
const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(node, &len));
for (size_t i = 0; i < blockquote_depth; i++) {
out += "> ";
}
out.append(text, len);
} else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
lxb_dom_element_t* element = lxb_dom_interface_element(node);
@ -226,26 +250,29 @@ static inline void get_text_content(lxb_dom_node_t* node, std::string& out) {
is_p = strncmp(tag_name, "P", 2) == 0;
is_br = strncmp(tag_name, "BR", 3) == 0;
is_blockquote = strncmp(tag_name, "BLOCKQUOTE", 11) == 0;
}
if (is_p || is_br) {
if (is_p || is_br || is_blockquote) {
out.push_back('\n');
}
lxb_dom_node_t* child = lxb_dom_node_first_child(node);
while (child) {
get_text_content(child, out);
get_text_content(child, out, blockquote_depth + is_blockquote);
child = lxb_dom_node_next(child);
}
if (is_p) {
if (is_p || is_blockquote) {
out.push_back('\n');
}
}
static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector<Emoji>& emojis) {
std::vector<lxb_dom_node_t*> nodes = emojify(child->owner_document, get_text_content(child), emojis);
size_t len;
const char* text = reinterpret_cast<const char*>(lxb_dom_node_text_content(child, &len));
std::vector<lxb_dom_node_t*> nodes = emojify(child->owner_document, std::string(text, len), emojis);
lxb_dom_node_insert_after(child, nodes[0]);
lxb_dom_node_destroy(child);