coyote/lxb_wrapper.h

94 lines
2.5 KiB
C++

#pragma once
#include <cassert>
#include <string>
#include <exception>
#include <stdexcept>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#include <lexbor/html/html.h>
#pragma GCC diagnostic pop
namespace LXB {
class Exception : public std::exception {
public:
Exception(lxb_status_t status) {
using namespace std::string_literals;
this->_msg = "LXB Exception: "s + std::to_string(status);
}
const char* what() const noexcept {
return this->_msg.c_str();
}
private:
std::string _msg;
};
namespace HTML {
class Document {
public:
Document(const Document&&) = delete;
Document&& operator=(const Document&&) = delete;
Document(const std::string& str) {
this->_document = lxb_html_document_create();
if (!this->_document) {
throw std::bad_alloc();
}
lxb_status_t status = lxb_html_document_parse(this->_document, reinterpret_cast<const lxb_char_t*>(str.data()), str.size());
if (status != LXB_STATUS_OK) {
lxb_html_document_destroy(this->_document);
throw Exception(status);
}
}
~Document() {
lxb_html_document_destroy(this->_document);
}
constexpr lxb_dom_node_t* body() const noexcept {
lxb_dom_node_t* node = lxb_dom_interface_node(this->_document);
lxb_dom_node_t* html = lxb_dom_node_first_child(node);
lxb_dom_node_t* body = lxb_dom_node_last_child(html);
return body;
}
constexpr lxb_dom_element_t* body_element() const noexcept {
lxb_dom_node_t* body = this->body();
assert(body->type == LXB_DOM_NODE_TYPE_ELEMENT);
return lxb_dom_interface_element(body);
}
std::string serialize() const {
std::string res;
lxb_dom_node_t* body = this->body();
lxb_dom_node_t* child = lxb_dom_node_first_child(body);
while (child) {
lxb_status_t status = lxb_html_serialize_tree_cb(child, Document::_serialize_cb, &res);
if (status != LXB_STATUS_OK) {
throw Exception(status);
}
child = lxb_dom_node_next(child);
}
return res;
}
private:
static lxb_status_t _serialize_cb(const lxb_char_t* data, size_t len, void* ctx) {
std::string* str = reinterpret_cast<std::string*>(ctx);
str->append(reinterpret_cast<const char*>(data), len);
return LXB_STATUS_OK;
}
lxb_html_document_t* _document;
};
}; // namespace HTML
}; // namespace LXB