Add PixivClient::{search_illusts,get_search_suggestions}

This commit is contained in:
blankie 2023-05-01 11:10:15 +07:00
parent b71cbd039d
commit 81dd9c9973
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
4 changed files with 136 additions and 4 deletions

View File

@ -1,10 +1,13 @@
#include "blankie/murl.h"
#include "numberhelper.h"
#include "pixivclient.h"
static const constexpr char* touch_user_agent = "Mozilla/5.0 (Android 12; Mobile; rv:97.0) Gecko/97.0 Firefox/97.0";
static const constexpr char* desktop_user_agent = "Mozilla/5.0 (Windows NT 10.0; rv:111.0) Gecko/20100101 Firefox/111.0";
PixivClient::PixivClient() {
this->_www_pixiv_net_client.set_keep_alive(true);
this->_www_pixiv_net_client.set_default_headers({
{"User-Agent", "Mozilla/5.0 (Android 12; Mobile; rv:97.0) Gecko/97.0 Firefox/97.0"},
{"Cookie", "webp_available=1"}
});
}
@ -12,7 +15,7 @@ PixivClient::PixivClient() {
User PixivClient::get_user(uint64_t user_id) {
httplib::Result res = this->_www_pixiv_net_client.Get("/touch/ajax/user/details", {
{"lang", "en"}, {"id", std::to_string(user_id)}
}, httplib::Headers());
}, {{"User-Agent", touch_user_agent}});
return this->_handle_result(std::move(res)).at("user_details").get<User>();
}
@ -21,17 +24,54 @@ Illusts PixivClient::get_illusts(uint64_t user_id, size_t page) {
if (page != 0) {
params.insert({"p", std::to_string(page + 1)});
}
httplib::Result res = this->_www_pixiv_net_client.Get("/touch/ajax/user/illusts", std::move(params), httplib::Headers());
httplib::Result res = this->_www_pixiv_net_client.Get("/touch/ajax/user/illusts", std::move(params), {{"User-Agent", touch_user_agent}});
return this->_handle_result(std::move(res)).get<Illusts>();
}
Illust PixivClient::get_illust(uint64_t illust_id) {
httplib::Result res = this->_www_pixiv_net_client.Get("/touch/ajax/illust/details", {
{"lang", "en"}, {"illust_id", std::to_string(illust_id)}
}, httplib::Headers());
}, {{"User-Agent", touch_user_agent}});
return this->_handle_result(std::move(res)).get<Illust>();
}
SearchResults PixivClient::search_illusts(const std::string& query, size_t page, const std::string& order) {
using namespace std::string_literals;
httplib::Result res = this->_www_pixiv_net_client.Get("/ajax/search/illustrations/"s + blankie::murl::escape(query), {
{"lang", "en"},
{"mode", "all"},
{"p", std::to_string(page + 1)},
{"s_mode", "s_tag_full"},
{"type", "illust_and_ugoira"},
{"order", order},
{"word", query}
}, {{"User-Agent", desktop_user_agent}});
return this->_handle_result(std::move(res)).get<SearchResults>();
}
std::vector<SearchSuggestion> PixivClient::get_search_suggestions(const std::string& query) {
httplib::Result res = this->_www_pixiv_net_client.Get("/rpc/cps.php", {
{"lang", "en"}, {"keyword", query}
}, {{"User-Agent", desktop_user_agent}, {"Referer", "https://www.pixiv.net/"}});
if (!res) {
throw HTTPLibException(res.error());
}
nlohmann::json j = nlohmann::json::parse(std::move(res->body)).at("candidates");
std::vector<SearchSuggestion> search_suggestions;
search_suggestions.reserve(j.size());
for (const nlohmann::json& i : j) {
SearchSuggestion search_suggestion = i.get<SearchSuggestion>();
if (search_suggestion.tag != query) {
search_suggestions.push_back(std::move(search_suggestion));
}
}
return search_suggestions;
}
nlohmann::json PixivClient::_handle_result(httplib::Result res) {
if (!res) {
throw HTTPLibException(res.error());

View File

@ -14,6 +14,9 @@ public:
Illusts get_illusts(uint64_t user_id, size_t page);
Illust get_illust(uint64_t illust_id);
SearchResults search_illusts(const std::string& query, size_t page, const std::string& order);
std::vector<SearchSuggestion> get_search_suggestions(const std::string& query);
private:
nlohmann::json _handle_result(httplib::Result res);
httplib::Client _www_pixiv_net_client{"https://www.pixiv.net"};

View File

@ -10,6 +10,7 @@ static inline std::optional<std::string> get_original_cover_image(blankie::murl:
static inline std::optional<std::string> get_original_profile_picture(blankie::murl::Url url);
static inline std::optional<std::string> get_360x360_illust_thumbnail(blankie::murl::Url url);
static Images get_profile_pictures(const nlohmann::json& j);
static Images get_profile_pictures(const std::string& url);
static Images get_illust_image(const nlohmann::json& j);
const std::string& Images::original_or_thumbnail() const {
@ -127,6 +128,75 @@ void from_json(const nlohmann::json& j, Illusts& illusts) {
j.at("lastPage").get_to(illusts.total_pages);
}
void from_json(const nlohmann::json& j, SearchResults& search_results) {
const nlohmann::json& tag_translations = j.at("tagTranslation");
auto get_translated_tag = [&](const std::string& japanese) -> std::optional<std::string> {
if (!tag_translations.is_object() || !tag_translations.contains(japanese)) {
return std::nullopt;
}
const nlohmann::json& tag = tag_translations[japanese];
return tag.contains("en")
? std::optional(tag["en"].get<std::string>())
: std::nullopt;
};
const nlohmann::json& illusts = j.at("illust").at("data");
search_results.illusts.illusts.reserve(illusts.size());
for (const nlohmann::json& i : illusts) {
const nlohmann::json& i_tags = i.at("tags");
std::vector<Tag> tags;
tags.reserve(i_tags.size());
for (const nlohmann::json& tag : i_tags) {
std::string japanese = tag.get<std::string>();
tags.push_back({japanese, get_translated_tag(std::move(japanese))});
}
Illust illust = {
.username = "",
.user_display_name = i.at("userName").get<std::string>(),
.user_id = to_ull(i.at("userId").get<std::string>()),
.user_profile_pictures = get_profile_pictures(i.at("profileImageUrl").get<std::string>()),
.illust_id = to_ull(i.at("id").get<std::string>()),
.title = i.at("title").get<std::string>(),
.ai_generated = i.at("aiType").get<int>() == 2,
// pixiv does have a createDate field, but it can't be portably parsed by strptime
// and i cba to use regex for it, especially when it's not even used in this context
.upload_time = -1,
.comment = std::nullopt,
.tags = std::move(tags),
.images = {get_illust_image(i)}
};
search_results.illusts.illusts.push_back(illust);
}
j.at("illust").at("total").get_to(search_results.illusts.total_illusts);
search_results.illusts.total_pages = search_results.illusts.total_illusts / 60;
if (search_results.illusts.total_illusts % 60 != 0) {
search_results.illusts.total_pages++;
}
if (search_results.illusts.total_pages > 10) {
search_results.illusts.total_pages = 10;
}
search_results.tag_translations.reserve(tag_translations.size());
for (auto &[key, val] : tag_translations.items()) {
std::optional<std::string> translated_tag = get_translated_tag(key);
if (translated_tag) {
search_results.tag_translations.insert({std::move(key), std::move(*translated_tag)});
}
}
}
void from_json(const nlohmann::json& j, SearchSuggestion& search_suggestion) {
j.at("tag_name").get_to(search_suggestion.tag);
if (j.at("type").get_ref<const nlohmann::json::string_t&>() == "tag_translation") {
search_suggestion.english_tag = j.at("tag_translation").get<std::string>();
}
}
static std::regex resolution_path_regex("/c/(\\d+x\\d+)(.+)");
static inline std::optional<std::string> get_1920x960_cover_image(blankie::murl::Url url) {
std::smatch sm;
@ -180,6 +250,13 @@ static Images get_profile_pictures(const nlohmann::json& j) {
return images;
}
static Images get_profile_pictures(const std::string& url) {
return {
.original = get_original_profile_picture(url),
.thumbnails = {url}
};
}
static std::regex illust_360x360_thumbnail_path_regex(
"(?:/c/[^/]+?/img-master|/img-original)?"
"(/img/.*/\\d+_p\\d+)(?:_master1200|_square1200)?\\.\\w{3,4}"

View File

@ -52,7 +52,19 @@ struct Illusts {
size_t total_pages;
};
struct SearchResults {
Illusts illusts;
std::unordered_map<std::string, std::string> tag_translations;
};
struct SearchSuggestion {
std::string tag;
std::optional<std::string> english_tag;
};
void from_json(const nlohmann::json& j, User& user);
void from_json(const nlohmann::json& j, Tag& tag);
void from_json(const nlohmann::json& j, Illust& illust);
void from_json(const nlohmann::json& j, Illusts& illusts);
void from_json(const nlohmann::json& j, SearchResults& search_results);
void from_json(const nlohmann::json& j, SearchSuggestion& search_suggestion);