diff --git a/blankie/serializer.cpp b/blankie/serializer.cpp index 5a3d43d..738abe0 100644 --- a/blankie/serializer.cpp +++ b/blankie/serializer.cpp @@ -58,5 +58,6 @@ static inline bool is_autoclosing_tag(const char* tag) { || !strncmp(tag, "meta", 5) || !strncmp(tag, "img", 4) || !strncmp(tag, "br", 3) + || !strncmp(tag, "hr", 3) || !strncmp(tag, "input", 6); } diff --git a/models.cpp b/models.cpp index 6e8dd2f..022e1b9 100644 --- a/models.cpp +++ b/models.cpp @@ -60,6 +60,11 @@ void from_json(const json& j, Account& account) { } } +void from_json(const json& j, Size& size) { + j.at("width").get_to(size.width); + j.at("height").get_to(size.height); +} + void from_json(const json& j, Media& media) { j.at("type").get_to(media.type); j.at("url").get_to(media.url); @@ -69,6 +74,11 @@ void from_json(const json& j, Media& media) { if (!j.at("remote_url").is_null()) { media.remote_url = j["remote_url"].get(); } + if (media.type == "image" || media.type == "video" || media.type == "gifv") { + const json& meta = j.at("meta"); + media.size = meta.at("original").get(); + media.preview_size = meta.at("small").get(); + } if (!j.at("description").is_null()) { media.description = j["description"].get(); } diff --git a/models.h b/models.h index dac0510..d62abc2 100644 --- a/models.h +++ b/models.h @@ -51,11 +51,17 @@ struct Account { } }; +struct Size { + uint64_t width; + uint64_t height; +}; struct Media { std::string type; std::string url; std::optional preview_url; std::optional remote_url; + std::optional size; + std::optional preview_size; std::optional description; }; @@ -108,6 +114,7 @@ struct Instance { void from_json(const nlohmann::json& j, Emoji& emoji); void from_json(const nlohmann::json& j, AccountField& field); void from_json(const nlohmann::json& j, Account& account); +void from_json(const nlohmann::json& j, Size& size); void from_json(const nlohmann::json& j, Media& media); void from_json(const nlohmann::json& j, PollOption& option); void from_json(const nlohmann::json& j, Poll& poll); diff --git a/routes/status.cpp b/routes/status.cpp index 77eeafb..26aae22 100644 --- a/routes/status.cpp +++ b/routes/status.cpp @@ -5,6 +5,8 @@ #include "../models.h" static inline std::string make_title(const Post& post); +static inline Nodes generate_ogp_nodes(const httplib::Request& req, const Post& post, const std::string& server); +static inline void generate_media_ogp_nodes(Nodes& nodes, const Media& media, bool* has_video, bool* has_image); void status_route(const httplib::Request& req, httplib::Response& res) { @@ -48,7 +50,7 @@ void status_route(const httplib::Request& req, httplib::Response& res) { body.nodes.push_back(serialize_post(req, server, i)); } - serve(req, res, make_title(*post), std::move(body)); + serve(req, res, make_title(*post), std::move(body), generate_ogp_nodes(req, *post, server)); } @@ -69,3 +71,64 @@ static inline std::string make_title(const Post& post) { return title; } + +static inline Nodes generate_ogp_nodes(const httplib::Request& req, const Post& post, const std::string& server) { + using namespace std::string_literals; + std::string url = get_origin(req) + '/' + server + "/@" + post.account.acct(false) + '/' + post.id; + bool has_video = false, has_image = false; + + Nodes nodes({ + // left-to-right override--thank https://anarres.family/@alice@mk.nyaa.place + Element("meta", {{"property", "og:title"}, {"content", post.account.display_name + "\u202d (@" + post.account.acct() + ')'}}, {}), + Element("meta", {{"property", "og:site_name"}, {"content", "Coyote"}}, {}), + Element("meta", {{"property", "og:url"}, {"content", std::move(url)}}, {}), + }); + if (!post.sensitive) { + nodes.push_back(Element("meta", {{"property", "og:description"}, {"content", get_text_content(post.content)}}, {})); + + for (const Media& media : post.media_attachments) { + generate_media_ogp_nodes(nodes, media, &has_video, &has_image); + } + } else if (!post.spoiler_text.empty()) { + nodes.push_back(Element("meta", {{"property", "og:description"}, {"content", "CW: "s + post.spoiler_text}}, {})); + } + + const char* type = !post.sensitive && has_video + ? "video" + : !post.sensitive && has_image ? "image" : "article"; + nodes.push_back(Element("meta", {{"property", "og:type"}, {"content", type}}, {})); + + return nodes; +} + +static inline void generate_media_ogp_nodes(Nodes& nodes, const Media& media, bool* has_video, bool* has_image) { + if (media.type == "image") { + *has_image = true; + nodes.push_back(Element("meta", {{"property", "og:image"}, {"content", media.preview_url.value_or(media.url)}}, {})); + + std::optional size = media.preview_size ? media.preview_size : media.size; + if (size) { + nodes.push_back(Element("meta", {{"property", "og:image:width"}, {"content", std::to_string(size->width)}}, {})); + nodes.push_back(Element("meta", {{"property", "og:image:height"}, {"content", std::to_string(size->height)}}, {})); + } + + if (media.description) { + nodes.push_back(Element("meta", {{"property", "og:image:alt"}, {"content", *media.description}}, {})); + } + } else if (media.type == "video" || media.type == "gifv") { + *has_video = true; + nodes.push_back(Element("meta", {{"property", "og:video"}, {"content", media.preview_url.value_or(media.url)}}, {})); + + std::optional size = media.preview_size ? media.preview_size : media.size; + if (size) { + nodes.push_back(Element("meta", {{"property", "og:video:width"}, {"content", std::to_string(size->width)}}, {})); + nodes.push_back(Element("meta", {{"property", "og:video:height"}, {"content", std::to_string(size->height)}}, {})); + } + + if (media.description) { + nodes.push_back(Element("meta", {{"property", "og:video:alt"}, {"content", *media.description}}, {})); + } + } else if (media.type == "audio") { + nodes.push_back(Element("meta", {{"property", "og:audio"}, {"content", media.url}}, {})); + } +} diff --git a/routes/user.cpp b/routes/user.cpp index 1f7e681..5c111e0 100644 --- a/routes/user.cpp +++ b/routes/user.cpp @@ -12,6 +12,8 @@ static inline Element user_header(const httplib::Request& req, const std::string static inline Element user_link_field(const httplib::Request& req, const Account& account, const AccountField& field); static inline Element sorting_method_link(const httplib::Request& req, const std::string& server, const Account& account, PostSortingMethod current_method, PostSortingMethod new_method); +static inline Nodes generate_ogp_nodes(const httplib::Request& req, const Account& account, const std::optional& max_id, PostSortingMethod sorting_method); + void user_route(const httplib::Request& req, httplib::Response& res) { using namespace std::string_literals; @@ -66,7 +68,7 @@ void user_route(const httplib::Request& req, httplib::Response& res) { body.nodes.push_back(Element("p", {{"class", "more_posts"}}, {"There are no more posts"})); } - serve(req, res, account->display_name + " (@" + account->acct() + ')', std::move(body)); + serve(req, res, account->display_name + " (@" + account->acct() + ')', std::move(body), generate_ogp_nodes(req, *account, max_id, sorting_method)); } @@ -161,3 +163,27 @@ static inline Element sorting_method_link(const httplib::Request& req, const std } return a; } + + +static inline Nodes generate_ogp_nodes(const httplib::Request& req, const Account& account, const std::optional& max_id, PostSortingMethod sorting_method) { + std::string url = get_origin(req) + '/' + account.server + "/@" + account.acct(false) + sorting_method_suffixes[sorting_method]; + if (max_id) { + url += "?max_id="; + url += *max_id; + } + + std::string note = get_text_content(account.note_html); + + Nodes nodes({ + // left-to-right override--thank https://anarres.family/@alice@mk.nyaa.place + Element("meta", {{"property", "og:title"}, {"content", account.display_name + "\u202d (@" + account.acct() + ')'}}, {}), + Element("meta", {{"property", "og:type"}, {"content", "website"}}, {}), + Element("meta", {{"property", "og:site_name"}, {"content", "Coyote"}}, {}), + Element("meta", {{"property", "og:url"}, {"content", std::move(url)}}, {}), + Element("meta", {{"property", "og:image"}, {"content", account.avatar}}, {}), + }); + if (!note.empty()) { + nodes.push_back(Element("meta", {{"property", "og:description"}, {"content", std::move(note)}}, {})); + } + return nodes; +} diff --git a/servehelper.cpp b/servehelper.cpp index 66d236d..aa421b3 100644 --- a/servehelper.cpp +++ b/servehelper.cpp @@ -18,6 +18,7 @@ static inline void preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, lxb_dom_element_t* element); static inline void preprocess_link(const httplib::Request& req, const std::string& domain_name, lxb_dom_element_t* element); static inline bool should_fix_link(lxb_dom_element_t* element, const std::string& element_cls); +static inline void get_text_content(lxb_dom_node_t* node, std::string& out); static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis); static inline std::vector emojify(lxb_dom_document_t* document, std::string str, const std::vector& emojis); @@ -194,6 +195,35 @@ Element serialize_post(const httplib::Request& req, const std::string& server, c } } +std::string get_text_content(lxb_dom_node_t* child) { + std::string out; + get_text_content(child, out); + + if (!out.empty()) { + size_t remove_from = out.size(); + while (remove_from && out[remove_from - 1] == '\n') { + remove_from--; + } + if (out.size() > remove_from) { + out.erase(remove_from); + } + } + if (!out.empty()) { + size_t remove_to = 0; + while (out.size() > remove_to && out[remove_to] == '\n') { + remove_to++; + } + out.erase(0, remove_to); + } + + return out; +} + +std::string get_text_content(blankie::html::HTMLString str) { + LXB::HTML::Document document(str.str); + return get_text_content(document.body()); +} + blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str) { LXB::HTML::Document document(str.str); preprocess_html(req, domain_name, emojis, document.body_element()); @@ -316,11 +346,40 @@ static inline bool should_fix_link(lxb_dom_element_t* element, const std::string return child == nullptr; } -static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { - size_t text_content_len; - const char* text_content = reinterpret_cast(lxb_dom_node_text_content(child, &text_content_len)); +static inline void get_text_content(lxb_dom_node_t* node, std::string& out) { + bool is_br = false, is_p = false; - std::vector nodes = emojify(child->owner_document, std::string(text_content, text_content_len), emojis); + if (node->type == LXB_DOM_NODE_TYPE_TEXT) { + size_t len; + const char* text = reinterpret_cast(lxb_dom_node_text_content(node, &len)); + + out.append(text, len); + } else if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) { + lxb_dom_element_t* element = lxb_dom_interface_element(node); + const char* tag_name = reinterpret_cast(lxb_dom_element_tag_name(element, nullptr)); + + is_p = strncmp(tag_name, "P", 2) == 0; + is_br = strncmp(tag_name, "BR", 3) == 0; + } + + if (is_p || is_br) { + out.push_back('\n'); + } + + lxb_dom_node_t* child = lxb_dom_node_first_child(node); + while (child) { + get_text_content(child, out); + + child = lxb_dom_node_next(child); + } + + if (is_p) { + out.push_back('\n'); + } +} + +static inline lxb_dom_node_t* emojify(lxb_dom_node_t* child, const std::vector& emojis) { + std::vector nodes = emojify(child->owner_document, get_text_content(child), emojis); lxb_dom_node_insert_after(child, nodes[0]); lxb_dom_node_destroy(child); @@ -460,7 +519,7 @@ static inline Element serialize_media(const Media& media) { video.attributes.push_back({"poster", *media.preview_url}); } return video; - } else if (media.type == "unknown" && media.remote_url) { + } else if (media.type == "unknown") { if (media.remote_url) { // https://botsin.space/@lina@vt.social/111053598696451525 return Element("a", {{"href", *media.remote_url}}, {"Media is not available from this instance, view externally"}); diff --git a/servehelper.h b/servehelper.h index 3e5cb48..a1a1e2e 100644 --- a/servehelper.h +++ b/servehelper.h @@ -4,6 +4,7 @@ #include #include "blankie/serializer.h" +#include "lxb_wrapper.h" struct Post; // forward declaration from models.h struct Emoji; // forward declaration from models.h class CurlUrl; // forward declaration from curlu_wrapper.h @@ -24,5 +25,7 @@ bool should_send_304(const httplib::Request& req, uint64_t hash); Element serialize_post(const httplib::Request& req, const std::string& server, const Post& post, bool pinned = false, bool main_post = false); +std::string get_text_content(lxb_dom_node_t* child); +std::string get_text_content(blankie::html::HTMLString str); blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::string& domain_name, const std::vector& emojis, const blankie::html::HTMLString& str); blankie::html::HTMLString preprocess_html(const httplib::Request& req, const std::vector& emojis, const std::string& str);