#pragma once #include #include #include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wshadow" #include #pragma GCC diagnostic pop namespace LXB { class Exception : public std::exception { public: Exception(lxb_status_t status) { using namespace std::string_literals; this->_msg = "LXB Exception: "s + std::to_string(status); } const char* what() const noexcept { return this->_msg.c_str(); } private: std::string _msg; }; namespace HTML { class Document { public: Document(const Document&&) = delete; Document&& operator=(const Document&&) = delete; Document(const std::string& str) { this->_document = lxb_html_document_create(); if (!this->_document) { throw std::bad_alloc(); } lxb_status_t status = lxb_html_document_parse(this->_document, reinterpret_cast(str.data()), str.size()); if (status != LXB_STATUS_OK) { lxb_html_document_destroy(this->_document); throw Exception(status); } } ~Document() { lxb_html_document_destroy(this->_document); } inline lxb_dom_node_t* body() const noexcept { lxb_dom_node_t* node = lxb_dom_interface_node(this->_document); lxb_dom_node_t* html = lxb_dom_node_first_child(node); lxb_dom_node_t* body = lxb_dom_node_last_child(html); return body; } inline lxb_dom_element_t* body_element() const noexcept { lxb_dom_node_t* body = this->body(); assert(body->type == LXB_DOM_NODE_TYPE_ELEMENT); return lxb_dom_interface_element(body); } std::string serialize() const { std::string res; lxb_dom_node_t* body = this->body(); lxb_dom_node_t* child = lxb_dom_node_first_child(body); while (child) { lxb_status_t status = lxb_html_serialize_tree_cb(child, Document::_serialize_cb, &res); if (status != LXB_STATUS_OK) { throw Exception(status); } child = lxb_dom_node_next(child); } return res; } private: static lxb_status_t _serialize_cb(const lxb_char_t* data, size_t len, void* ctx) { std::string* str = reinterpret_cast(ctx); str->append(reinterpret_cast(data), len); return LXB_STATUS_OK; } lxb_html_document_t* _document; }; } // namespace HTML } // namespace LXB