95 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			95 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			C++
		
	
	
	
| #pragma once
 | |
| 
 | |
| #include <cassert>
 | |
| #include <string>
 | |
| #include <exception>
 | |
| #include <stdexcept>
 | |
| 
 | |
| #pragma GCC diagnostic push
 | |
| #pragma GCC diagnostic ignored "-Wconversion"
 | |
| #pragma GCC diagnostic ignored "-Wshadow"
 | |
| #include <lexbor/html/html.h>
 | |
| #pragma GCC diagnostic pop
 | |
| 
 | |
| namespace LXB {
 | |
| 
 | |
| class Exception : public std::exception {
 | |
| public:
 | |
|     Exception(lxb_status_t status) {
 | |
|         using namespace std::string_literals;
 | |
|         this->_msg = "LXB Exception: "s + std::to_string(status);
 | |
|     }
 | |
| 
 | |
|     const char* what() const noexcept {
 | |
|         return this->_msg.c_str();
 | |
|     }
 | |
| 
 | |
| private:
 | |
|     std::string _msg;
 | |
| };
 | |
| 
 | |
| namespace HTML {
 | |
| 
 | |
| class Document {
 | |
| public:
 | |
|     Document(const Document&&) = delete;
 | |
|     Document&& operator=(const Document&&) = delete;
 | |
| 
 | |
|     Document(const std::string& str) {
 | |
|         this->_document = lxb_html_document_create();
 | |
|         if (!this->_document) {
 | |
|             throw std::bad_alloc();
 | |
|         }
 | |
|         lxb_status_t status = lxb_html_document_parse(this->_document, reinterpret_cast<const lxb_char_t*>(str.data()), str.size());
 | |
|         if (status != LXB_STATUS_OK) {
 | |
|             lxb_html_document_destroy(this->_document);
 | |
|             throw Exception(status);
 | |
|         }
 | |
|     }
 | |
|     ~Document() {
 | |
|         lxb_html_document_destroy(this->_document);
 | |
|     }
 | |
| 
 | |
|     inline lxb_dom_node_t* body() const noexcept {
 | |
|         lxb_dom_node_t* node = lxb_dom_interface_node(this->_document);
 | |
|         lxb_dom_node_t* html = lxb_dom_node_first_child(node);
 | |
|         lxb_dom_node_t* body = lxb_dom_node_last_child(html);
 | |
|         return body;
 | |
|     }
 | |
|     inline lxb_dom_element_t* body_element() const noexcept {
 | |
|         lxb_dom_node_t* body = this->body();
 | |
|         assert(body->type == LXB_DOM_NODE_TYPE_ELEMENT);
 | |
|         return lxb_dom_interface_element(body);
 | |
|     }
 | |
| 
 | |
|     std::string serialize() const {
 | |
|         std::string res;
 | |
|         lxb_dom_node_t* body = this->body();
 | |
| 
 | |
|         lxb_dom_node_t* child = lxb_dom_node_first_child(body);
 | |
|         while (child) {
 | |
|             lxb_status_t status = lxb_html_serialize_tree_cb(child, Document::_serialize_cb, &res);
 | |
|             if (status != LXB_STATUS_OK) {
 | |
|                 throw Exception(status);
 | |
|             }
 | |
| 
 | |
|             child = lxb_dom_node_next(child);
 | |
|         }
 | |
| 
 | |
|         return res;
 | |
|     }
 | |
| 
 | |
| private:
 | |
|     static lxb_status_t _serialize_cb(const lxb_char_t* data, size_t len, void* ctx) {
 | |
|         std::string* str = reinterpret_cast<std::string*>(ctx);
 | |
|         str->append(reinterpret_cast<const char*>(data), len);
 | |
|         return LXB_STATUS_OK;
 | |
|     }
 | |
| 
 | |
|     lxb_html_document_t* _document;
 | |
| };
 | |
| 
 | |
| } // namespace HTML
 | |
| 
 | |
| } // namespace LXB
 |