pixwhile/pixivmodels.cpp

397 lines
16 KiB
C++

#include <regex>
#include "blankie/murl.h"
#include "pixivmodels.h"
#include "numberhelper.h"
static inline std::optional<std::string> get_1920x960_cover_image(blankie::murl::Url url);
static inline std::optional<std::string> get_original_cover_image(blankie::murl::Url url, const nlohmann::json& cover_image);
static inline std::optional<std::string> get_original_profile_picture(blankie::murl::Url url);
static inline std::optional<std::string> get_360x360_illust_thumbnail(blankie::murl::Url url);
static inline std::optional<std::string> get_original_illust_image(blankie::murl::Url url);
static Images get_profile_pictures(const nlohmann::json& j);
static Images get_profile_pictures(const std::string& url);
static std::optional<std::pair<uint64_t, uint64_t>> get_thumbnail_size(blankie::murl::Url thumbnail_url, std::optional<std::pair<uint64_t, uint64_t>> original_size);
static Images get_illust_images(const nlohmann::json& image, std::optional<nlohmann::json> image_metadata);
const Image& Images::original_or_thumbnail() const {
if (this->original) {
return *this->original;
}
if (!this->thumbnails.empty()) {
return this->thumbnails.back();
}
throw std::runtime_error("Images does not contain any images");
}
const Image& Images::thumbnail_or_original(size_t back) const {
if (this->thumbnails.size() > back) {
return this->thumbnails[this->thumbnails.size() - back - 1];
}
if (!this->thumbnails.empty()) {
return this->thumbnails.back();
}
if (this->original) {
return *this->original;
}
throw std::runtime_error("Images does not contain any images");
}
void from_json(const nlohmann::json& j, User& user) {
using namespace std::string_literals;
j.at("user_account").get_to(user.username);
j.at("user_name").get_to(user.display_name);
user.user_id = to_ull(j.at("user_id").get_ref<const nlohmann::json::string_t&>());
if (j.contains("cover_image") && j["cover_image"].is_object()) {
nlohmann::json cover_image = j["cover_image"];
std::string c_720x360 = cover_image.at("profile_cover_image").at("720x360").get<std::string>();
std::optional<std::string> original = get_original_cover_image(c_720x360, cover_image);
std::optional<std::string> c_1920x960 = get_1920x960_cover_image(c_720x360);
user.cover_images = {std::move(original), {std::move(c_720x360)}};
if (c_1920x960) {
user.cover_images->thumbnails.push_back(std::move(*c_1920x960));
}
}
user.profile_pictures = get_profile_pictures(j.at("profile_img"));
if (j.contains("user_webpage")) {
std::string user_webpage = j.at("user_webpage").get<std::string>();
if (!user_webpage.empty()) {
user.links.push_back({"Webpage", std::move(user_webpage)});
}
}
auto add_social_as_needed = [&](const char* key, const char* public_name) {
nlohmann::json social = j["social"];
if (!social.contains(key)) {
return;
}
std::string url = social[key].at("url").get<std::string>();
user.links.push_back({public_name, std::move(url)});
};
if (j.contains("social") && j["social"].is_object()) {
add_social_as_needed("twitter", "Twitter");
add_social_as_needed("instagram", "Instagram");
add_social_as_needed("tumblr", "Tumblr");
add_social_as_needed("facebook", "Facebook");
add_social_as_needed("circlems", "Circle.ms");
add_social_as_needed("pawoo", "Pawoo");
}
blankie::murl::Url ogp_image = j.at("meta").at("ogp").at("image").get<std::string>();
if (ogp_image.is_host_equal("embed.pixiv.net")) {
user.ogp_image = "https://embed.pixiv.net/user_profile.php?id="s + std::to_string(user.user_id);
}
}
void from_json(const nlohmann::json& j, Tag& tag) {
j.at("tag").get_to(tag.japanese);
if (j.contains("translation")) {
tag.english = j["translation"].get<std::string>();
}
}
void from_json(const nlohmann::json& j, Illust& illust) {
bool full_data = j.contains("illust_details");
const nlohmann::json& author_details = j.at("author_details");
const nlohmann::json& illust_details = full_data ? j.at("illust_details") : j;
std::optional<nlohmann::json> images_metadata = illust_details.contains("illust_images")
? std::optional(illust_details["illust_images"])
: std::nullopt;
author_details.at("user_account").get_to(illust.username);
author_details.at("user_name").get_to(illust.user_display_name);
illust.user_id = to_ull(author_details.at("user_id").get_ref<const nlohmann::json::string_t&>());
if (full_data) {
illust.user_profile_pictures = get_profile_pictures(author_details.at("profile_img"));
}
illust.illust_id = to_ull(illust_details.at("id").get_ref<const nlohmann::json::string_t&>());
illust_details.at("title").get_to(illust.title);
illust.ai_generated = illust_details.at("ai_type").get<int>() == 2;
illust_details.at("upload_timestamp").get_to(illust.upload_time);
if (full_data) {
if (illust_details.contains("comment_html") && illust_details["comment_html"].is_string()) {
illust.comment_html = illust_details["comment_html"].get<std::string>();
}
illust_details.at("display_tags").get_to(illust.tags);
}
if (illust_details.contains("manga_a")) {
const nlohmann::json& manga_a = illust_details["manga_a"];
illust.images.reserve(manga_a.size());
for (size_t i = 0; i < manga_a.size(); i++) {
illust.images.push_back(get_illust_images(manga_a[i], images_metadata ? std::optional(images_metadata->at(i)) : std::nullopt));
}
} else {
illust.images = {get_illust_images(illust_details, images_metadata ? std::optional(images_metadata->at(0)) : std::nullopt)};
}
illust.page_count = to_ull(illust_details.at("page_count").get_ref<const nlohmann::json::string_t&>());
}
void from_json(const nlohmann::json& j, Illusts& illusts) {
j.at("illusts").get_to(illusts.illusts);
j.at("total").get_to(illusts.total_illusts);
j.at("lastPage").get_to(illusts.total_pages);
}
void from_json(const nlohmann::json& j, SearchResults& search_results) {
const nlohmann::json& tag_translations = j.at("tagTranslation");
auto get_translated_tag = [&](const std::string& japanese) -> std::optional<std::string> {
if (!tag_translations.is_object() || !tag_translations.contains(japanese)) {
return std::nullopt;
}
const nlohmann::json& tag = tag_translations[japanese];
return tag.contains("en")
? std::optional(tag["en"].get<std::string>())
: std::nullopt;
};
const nlohmann::json& illusts = j.at("illust").at("data");
search_results.illusts.illusts.reserve(illusts.size());
for (const nlohmann::json& i : illusts) {
const nlohmann::json& i_tags = i.at("tags");
std::vector<Tag> tags;
tags.reserve(i_tags.size());
for (const nlohmann::json& tag : i_tags) {
std::string japanese = tag.get<std::string>();
tags.push_back({japanese, get_translated_tag(std::move(japanese))});
}
Illust illust = {
.username = "",
.user_display_name = i.at("userName").get<std::string>(),
.user_id = to_ull(i.at("userId").get<std::string>()),
.user_profile_pictures = get_profile_pictures(i.at("profileImageUrl").get<std::string>()),
.illust_id = to_ull(i.at("id").get<std::string>()),
.title = i.at("title").get<std::string>(),
.ai_generated = i.at("aiType").get<int>() == 2,
// pixiv does have a createDate field, but it can't be portably parsed by strptime
// and i cba to use regex for it, especially when it's not even used in this context
.upload_time = -1,
.comment_html = std::nullopt,
.tags = std::move(tags),
.images = {get_illust_images(i, std::nullopt)},
.page_count = i.at("pageCount").get<size_t>()
};
search_results.illusts.illusts.push_back(illust);
}
j.at("illust").at("total").get_to(search_results.illusts.total_illusts);
search_results.illusts.total_pages = search_results.illusts.total_illusts / 60;
if (search_results.illusts.total_illusts % 60 != 0) {
search_results.illusts.total_pages++;
}
if (search_results.illusts.total_pages > 10) {
search_results.illusts.total_pages = 10;
}
search_results.tag_translations.reserve(tag_translations.size());
for (auto &[key, val] : tag_translations.items()) {
std::optional<std::string> translated_tag = get_translated_tag(key);
if (translated_tag) {
search_results.tag_translations.insert({std::move(key), std::move(*translated_tag)});
}
}
}
void from_json(const nlohmann::json& j, SearchSuggestion& search_suggestion) {
j.at("tag_name").get_to(search_suggestion.tag);
if (j.at("type").get_ref<const nlohmann::json::string_t&>() == "tag_translation") {
search_suggestion.english_tag = j.at("tag_translation").get<std::string>();
}
}
static std::regex resolution_path_regex("/c/(\\d+x\\d+)(.+)");
static inline std::optional<std::string> get_1920x960_cover_image(blankie::murl::Url url) {
std::smatch sm;
if (!std::regex_match(url.path, sm, resolution_path_regex)) {
return std::nullopt;
}
if (sm[1] == "1920x960") {
return std::nullopt;
}
url.path = "/c/1920x960" + sm.str(2);
return url.to_string();
}
static std::regex thumbnail_path_regex("/c/[^/]+(/.+)_master\\d+(\\.\\w{3,4})?");
static inline std::optional<std::string> get_original_cover_image(blankie::murl::Url url, const nlohmann::json& cover_image) {
std::smatch sm;
if (!std::regex_match(url.path, sm, thumbnail_path_regex)) {
return std::nullopt;
}
url.path = sm.str(1);
if (cover_image.contains("profile_cover_ext") && cover_image["profile_cover_ext"].is_string()
&& !cover_image["profile_cover_ext"].get_ref<const nlohmann::json::string_t&>().empty()) {
url.path += '.';
url.path += cover_image["profile_cover_ext"].get<std::string>();
} else {
url.path += sm.str(2);
}
return url.to_string();
}
static std::regex profile_picture_thumbnail_path_regex("(/.+)_\\d{2,}(\\.\\w{3,4})");
static inline std::optional<std::string> get_original_profile_picture(blankie::murl::Url url) {
std::smatch sm;
if (!std::regex_match(url.path, sm, profile_picture_thumbnail_path_regex)) {
return std::nullopt;
}
url.path = sm.str(1) + sm.str(2);
return url.to_string();
}
static Images get_profile_pictures(const nlohmann::json& j) {
Images images;
if (j.contains("main_s")) {
images.thumbnails.push_back(j["main_s"].get<std::string>());
}
images.thumbnails.push_back(j.at("main").get<std::string>());
images.original = get_original_profile_picture(images.thumbnails.back().url);
return images;
}
static Images get_profile_pictures(const std::string& url) {
return {
.original = get_original_profile_picture(url),
.thumbnails = {url}
};
}
static std::regex illust_360x360_thumbnail_path_regex(
"(?:/c/[^/]+?/img-master|/img-original)?"
"(/img/.*/\\d+_p\\d+)(?:_master1200|_square1200)?\\.\\w{3,4}"
);
static inline std::optional<std::string> get_360x360_illust_thumbnail(blankie::murl::Url url) {
using namespace std::string_literals;
std::smatch sm;
if (!std::regex_match(url.path, sm, illust_360x360_thumbnail_path_regex)) {
return std::nullopt;
}
url.path = "/c/360x360_10_webp/img-master"s + sm.str(1) + "_square1200.jpg";
return url.to_string();
}
static std::regex illust_original_image_path_regex("/c/.+?/(img/.+)(?:_master1200|square1200)\\.\\w{3,4}");
static inline std::optional<std::string> get_original_illust_image(blankie::murl::Url url) {
using namespace std::string_literals;
std::smatch sm;
if (!std::regex_match(url.path, sm, illust_original_image_path_regex)) {
return std::nullopt;
}
url.path = "/img-original/"s + sm.str(1);
url.fragment = "guess_extension";
return url.to_string();
}
static std::regex illust_size_regex(
"/c/(\\d+)x(\\d+)[/_].+"
);
static std::optional<std::pair<uint64_t, uint64_t>> get_thumbnail_size(blankie::murl::Url thumbnail_url, std::optional<std::pair<uint64_t, uint64_t>> original_size) {
if (!original_size) {
return std::nullopt;
}
uint64_t thumbnail_width, thumbnail_height;
if (thumbnail_url.path.starts_with("/img-master/")) {
if (original_size->first <= 1200 && original_size->second <= 1200) {
return original_size;
}
// one side is implicitly 1200
if (original_size->first >= original_size->second) {
// original width >= original height
thumbnail_width = 1200;
// derived from original_size->second / (original_size->first / thumbnail_width)
// to make it more accurate without using floats
thumbnail_height = original_size->second * thumbnail_width / original_size->first;
} else {
// original width < original height
thumbnail_height = 1200;
// derived from original_size->first / (original_size->second / thumbnail_height)
// to make it more accurate without using floats
thumbnail_width = original_size->first * thumbnail_height / original_size->second;
}
} else {
std::smatch sm;
if (!std::regex_match(thumbnail_url.path, sm, illust_size_regex)) {
return std::nullopt;
}
thumbnail_width = to_ull(sm.str(1));
// thumbnail_height = to_ull(sm.str(2));
// derived from original_size->second / (original_size->first / thumbnail_width)
// to make it more accurate without using floats
thumbnail_height = original_size->second * thumbnail_width / original_size->first;
}
std::pair<uint64_t, uint64_t> thumbnail_size = {thumbnail_width, thumbnail_height};
return thumbnail_size;
}
static Images get_illust_images(const nlohmann::json& image, std::optional<nlohmann::json> image_metadata) {
Images images;
ssize_t add_360x360_to = -1;
std::optional<std::pair<uint64_t, uint64_t>> original_size;
if (image_metadata) {
original_size = {
to_ull(image_metadata->at("illust_image_width").get_ref<const nlohmann::json::string_t&>()),
to_ull(image_metadata->at("illust_image_height").get_ref<const nlohmann::json::string_t&>())
};
}
auto add_if_exists = [&](const char* key) {
if (!image.contains(key) || !image[key].is_string()) {
return false;
}
std::string url = image[key].get<std::string>();
images.thumbnails.push_back({url, get_thumbnail_size(url, original_size)});
return true;
};
add_if_exists("url_ss");
add_if_exists("url_placeholder");
add_if_exists("url_small");
if (!add_if_exists("url_s")) {
add_360x360_to = static_cast<ssize_t>(images.thumbnails.size());
}
add_if_exists("url");
if (image.contains("url_big") && image["url_big"].is_string()) {
images.original = {image["url_big"].get<std::string>(), original_size};
} else {
std::optional<std::string> original_url = get_original_illust_image(images.thumbnail_or_original().url);
if (original_url) {
images.original = {std::move(*original_url), original_size};
}
}
if (add_360x360_to >= 0) {
std::optional<std::string> c_360x360 = get_360x360_illust_thumbnail(images.original_or_thumbnail().url);
if (c_360x360) {
images.thumbnails.insert(images.thumbnails.begin() + add_360x360_to, {*c_360x360, get_thumbnail_size(*c_360x360, original_size)});
}
}
return images;
}