diff --git a/blankie/murl.cpp b/blankie/murl.cpp index c44ab4f..7852b25 100644 --- a/blankie/murl.cpp +++ b/blankie/murl.cpp @@ -12,7 +12,9 @@ #define USERINFO "((?:" UNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|:)*)" // A lot looser than RFC 3986, but things will go very wrong very quickly if I comply -#define HOST "([^/?#]+?)" +// Hostname does not allow for stuff like "//The", but it is not important here +#define HOSTCHAR "[\\w\\d\\-.]" +#define HOST "(\\[[\\da-f:.]+\\]|" HOSTCHAR "+(?:\\." HOSTCHAR "+)+)" #define PORT "(\\d*)" #define AUTHORITY "(?:" USERINFO "@)?" HOST "(?::" PORT ")?" @@ -22,20 +24,27 @@ #define QUERY "((?:" PCHAR "|[/?])*)" // Looser than RFC 3986, but fragments might as well own everything -#define FRAGMENT "(.*)" +#define FRAGMENT "([^ ]*)" -#define HTTP_HTTPS_URL \ +#define FULL_HTTP_HTTPS_REGEX \ + "(?:(https?)?:)?//" AUTHORITY \ + PATH_ABEMPTY \ + "(?:\\?" QUERY ")?" \ + "(?:#" FRAGMENT ")?" +#define HTTP_HTTPS_REGEX \ "(?:(https?)?:)?(?://" AUTHORITY ")?" \ PATH_ABEMPTY \ "(?:\\?" QUERY ")?" \ "(?:#" FRAGMENT ")?" -static std::regex url_regex(HTTP_HTTPS_URL, std::regex::icase); static void handle_segment(std::vector& segments, const std::string& str, size_t offset, size_t length); namespace blankie { namespace murl { +std::regex full_url_regex(FULL_HTTP_HTTPS_REGEX, std::regex::icase); +std::regex url_regex(HTTP_HTTPS_REGEX, std::regex::icase); + Url::Url(const std::string& str) { std::smatch sm; if (!std::regex_match(str, sm, url_regex)) { diff --git a/blankie/murl.h b/blankie/murl.h index 8f37c23..341aa65 100644 --- a/blankie/murl.h +++ b/blankie/murl.h @@ -1,10 +1,13 @@ #pragma once +#include #include namespace blankie { namespace murl { +extern std::regex full_url_regex; + struct Url { std::string scheme; std::string userinfo; diff --git a/routes/artworks.cpp b/routes/artworks.cpp index 13cb5d3..f888b05 100644 --- a/routes/artworks.cpp +++ b/routes/artworks.cpp @@ -1,16 +1,21 @@ +#include + #include "routes.h" +#include "../blankie/murl.h" #include "../servehelper.h" #include "../numberhelper.h" #include "../pixivclient.h" -static inline bool is_true(const std::string& str); -static inline std::string time_to_string(time_t time); static inline Element generate_user_link(const httplib::Request& req, const Config& config, const Illust& illust); static inline Element generate_images(const httplib::Request& req, const Config& config, const Illust& illust); static inline Element generate_preview_images(const httplib::Request& req, const Config& config, const Illust& illust); +static inline std::vector parse_description_line(std::string str); static inline Element generate_description(const std::string& description); static inline Element generate_illust_tags(const Illust& illust); +static inline bool is_true(const std::string& str); +static inline std::string time_to_string(time_t time); + void artworks_route(const httplib::Request& req, httplib::Response& res, const Config& config, PixivClient& pixiv_client) { uint64_t illust_id = to_ull(req.matches.str(1)); bool preview = is_true(req.get_param_value("preview")); @@ -107,7 +112,25 @@ static inline Element generate_preview_images(const httplib::Request& req, const return div; } -// TODO auto-detect links +// TODO proxy urls +static inline std::vector parse_description_line(std::string str) { + std::vector nodes; + std::smatch sm; + + while (std::regex_search(str, sm, blankie::murl::full_url_regex)) { + if (sm.prefix().length()) { + nodes.push_back(sm.prefix()); + } + nodes.push_back(Element("a", {{"href", sm.str(0)}}, {sm.str(0)})); + str = sm.suffix(); + } + if (!str.empty()) { + nodes.push_back(std::move(str)); + } + + return nodes; +} + static inline Element generate_description(const std::string& description) { Element p("p"); size_t pos = 0; @@ -116,11 +139,12 @@ static inline Element generate_description(const std::string& description) { if (!p.nodes.empty()) { p.nodes.push_back(Element("br")); } - p.nodes.push_back(std::move(str)); + std::vector nodes = parse_description_line(std::move(str)); + p.nodes.insert(p.nodes.end(), nodes.begin(), nodes.end()); }; while ((pos = description.find('\n', pos)) != std::string::npos) { - add(description.substr(last_pos, pos)); + add(description.substr(last_pos, pos - last_pos)); last_pos = ++pos; } if (description.size() > last_pos) {