95 lines
2.5 KiB
C++
95 lines
2.5 KiB
C++
#pragma once
|
|
|
|
#include <cassert>
|
|
#include <string>
|
|
#include <exception>
|
|
#include <stdexcept>
|
|
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wconversion"
|
|
#pragma GCC diagnostic ignored "-Wshadow"
|
|
#include <lexbor/html/html.h>
|
|
#pragma GCC diagnostic pop
|
|
|
|
namespace LXB {
|
|
|
|
class Exception : public std::exception {
|
|
public:
|
|
Exception(lxb_status_t status) {
|
|
using namespace std::string_literals;
|
|
this->_msg = "LXB Exception: "s + std::to_string(status);
|
|
}
|
|
|
|
const char* what() const noexcept {
|
|
return this->_msg.c_str();
|
|
}
|
|
|
|
private:
|
|
std::string _msg;
|
|
};
|
|
|
|
namespace HTML {
|
|
|
|
class Document {
|
|
public:
|
|
Document(const Document&&) = delete;
|
|
Document&& operator=(const Document&&) = delete;
|
|
|
|
Document(const std::string& str) {
|
|
this->_document = lxb_html_document_create();
|
|
if (!this->_document) {
|
|
throw std::bad_alloc();
|
|
}
|
|
lxb_status_t status = lxb_html_document_parse(this->_document, reinterpret_cast<const lxb_char_t*>(str.data()), str.size());
|
|
if (status != LXB_STATUS_OK) {
|
|
lxb_html_document_destroy(this->_document);
|
|
throw Exception(status);
|
|
}
|
|
}
|
|
~Document() {
|
|
lxb_html_document_destroy(this->_document);
|
|
}
|
|
|
|
inline lxb_dom_node_t* body() const noexcept {
|
|
lxb_dom_node_t* node = lxb_dom_interface_node(this->_document);
|
|
lxb_dom_node_t* html = lxb_dom_node_first_child(node);
|
|
lxb_dom_node_t* body = lxb_dom_node_last_child(html);
|
|
return body;
|
|
}
|
|
inline lxb_dom_element_t* body_element() const noexcept {
|
|
lxb_dom_node_t* body = this->body();
|
|
assert(body->type == LXB_DOM_NODE_TYPE_ELEMENT);
|
|
return lxb_dom_interface_element(body);
|
|
}
|
|
|
|
std::string serialize() const {
|
|
std::string res;
|
|
lxb_dom_node_t* body = this->body();
|
|
|
|
lxb_dom_node_t* child = lxb_dom_node_first_child(body);
|
|
while (child) {
|
|
lxb_status_t status = lxb_html_serialize_tree_cb(child, Document::_serialize_cb, &res);
|
|
if (status != LXB_STATUS_OK) {
|
|
throw Exception(status);
|
|
}
|
|
|
|
child = lxb_dom_node_next(child);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private:
|
|
static lxb_status_t _serialize_cb(const lxb_char_t* data, size_t len, void* ctx) {
|
|
std::string* str = reinterpret_cast<std::string*>(ctx);
|
|
str->append(reinterpret_cast<const char*>(data), len);
|
|
return LXB_STATUS_OK;
|
|
}
|
|
|
|
lxb_html_document_t* _document;
|
|
};
|
|
|
|
} // namespace HTML
|
|
|
|
} // namespace LXB
|