coyote/models.cpp

218 lines
7.9 KiB
C++

#include <ctime>
#include <regex>
#include <stdexcept>
#include <system_error>
#include <nlohmann/json.hpp>
#include "models.h"
#include "numberhelper.h"
#define DOMAIN_RE "(?:[a-zA-Z0-9-]+\\.)+[a-zA-Z]{2,}"
// https://docs.joinmastodon.org/methods/accounts/#422-unprocessable-entity
// https://akko.erincandescent.net/@postmaster-emeritus
#define USERNAME_RE "[a-zA-Z0-9\\-_]+"
using json = nlohmann::json;
static time_t parse_rfc3339(const std::string& str);
void from_json(const json& j, Emoji& emoji) {
j.at("shortcode").get_to(emoji.shortcode);
j.at("url").get_to(emoji.url);
}
void from_json(const json& j, AccountField& field) {
j.at("name").get_to(field.name);
field.value_html = j.at("value").get<std::string>();
// https://social.kernel.org/@monsieuricon/Ac6oYwtLhess6uil1c
// https://social.kernel.org/@monsieuricon/Ac8RXJRqxTfUrM1xQG
if (!j.contains("verified_at") || j["verified_at"].is_null()) {
field.verified_at = -1;
} else {
field.verified_at = parse_rfc3339(j["verified_at"].get_ref<const std::string&>());
}
}
static std::regex host_regex("https?://(" DOMAIN_RE ")/.*", std::regex::ECMAScript | std::regex::icase);
void from_json(const json& j, Account& account) {
using namespace std::string_literals;
j.at("id").get_to(account.id);
j.at("username").get_to(account.username);
j.at("display_name").get_to(account.display_name);
j.at("bot").get_to(account.bot);
account.created_at = parse_rfc3339(j.at("created_at").get_ref<const std::string&>());
account.note_html = j.at("note").get<std::string>();
j.at("avatar").get_to(account.avatar);
j.at("avatar_static").get_to(account.avatar_static);
j.at("header").get_to(account.header);
j.at("followers_count").get_to(account.followers_count);
j.at("following_count").get_to(account.following_count);
j.at("statuses_count").get_to(account.statuses_count);
j.at("emojis").get_to(account.emojis);
j.at("fields").get_to(account.fields);
std::smatch sm;
const std::string& url = j.at("url").get_ref<const std::string&>();
if (!std::regex_match(url, sm, host_regex)) {
throw std::runtime_error("failed to find host in url: "s + url);
}
account.server = sm.str(1);
if (account.display_name.empty()) {
// https://mastodonapp.uk/@probertd8
account.display_name = account.username;
}
}
void from_json(const json& j, Size& size) {
j.at("width").get_to(size.width);
j.at("height").get_to(size.height);
}
void from_json(const json& j, Media& media) {
j.at("type").get_to(media.type);
j.at("url").get_to(media.url);
if (!j.at("preview_url").is_null()) {
media.preview_url = j["preview_url"].get<std::string>();
}
if (!j.at("remote_url").is_null()) {
media.remote_url = j["remote_url"].get<std::string>();
}
// .contains() check: https://social.platypus-sandbox.com/@ashten/AcPl5zVFxeGu7eSKSe
if (j.contains("meta") && (media.type == "image" || media.type == "video" || media.type == "gifv")) {
// .contains() checks: https://social.platypus-sandbox.com/@ashten/AcPlHNRnHGzjZMGX56
const json& meta = j["meta"];
if (meta.contains("original")) {
media.size = meta["original"].get<Size>();
}
if (meta.contains("small")) {
media.preview_size = meta["small"].get<Size>();
}
}
if (!j.at("description").is_null()) {
media.description = j["description"].get<std::string>();
}
}
void from_json(const json& j, PollOption& option) {
j.at("title").get_to(option.title);
j.at("votes_count").get_to(option.votes_count);
}
void from_json(const json& j, Poll& poll) {
if (!j.at("expires_at").is_null()) {
poll.expires_at = parse_rfc3339(j["expires_at"].get_ref<const std::string&>());
} else {
poll.expires_at = -1;
}
j.at("expired").get_to(poll.expired);
if (j.contains("voters_count") && !j["voters_count"].is_null()) {
j.at("voters_count").get_to(poll.voters_count);
} else {
poll.voters_count = -1;
}
j.at("votes_count").get_to(poll.votes_count);
j.at("options").get_to(poll.options);
j.at("emojis").get_to(poll.emojis);
}
static std::regex akkoma_status_url_regex("https?://(" DOMAIN_RE ")/(?:@|users/)(" USERNAME_RE ")/.+");
void from_json(const json& j, Post& post) {
j.at("id").get_to(post.id);
post.created_at = parse_rfc3339(j.at("created_at").get_ref<const std::string&>());
if (!j.at("in_reply_to_id").is_null()) {
post.in_reply_to_id = j["in_reply_to_id"].get<std::string>();
}
if (!j.at("in_reply_to_account_id").is_null()) {
post.in_reply_to_account_id = j["in_reply_to_account_id"].get<std::string>();
}
j.at("sensitive").get_to(post.sensitive);
j.at("spoiler_text").get_to(post.spoiler_text);
j.at("replies_count").get_to(post.replies_count);
j.at("reblogs_count").get_to(post.reblogs_count);
j.at("favourites_count").get_to(post.favorites_count);
// https://social.kernel.org/@monsieuricon/Ac6oYwtLhess6uil1c
if (j.contains("edited_at") && !j["edited_at"].is_null()) {
post.edited_at = parse_rfc3339(j["edited_at"].get_ref<const std::string&>());
} else {
post.edited_at = -1;
}
post.content = j.at("content").get<std::string>();
if (!j.at("reblog").is_null()) {
post.reblog = std::make_unique<Post>();
from_json(j["reblog"].get<json>(), *post.reblog.get());
}
j.at("media_attachments").get_to(post.media_attachments);
j.at("emojis").get_to(post.emojis);
// https://social.kernel.org/@monsieuricon/Ac6oYwtLhess6uil1c
if (j.contains("poll") && !j["poll"].is_null()) {
post.poll = j["poll"].get<Poll>();
}
// empty account with username accessible: https://dlx.pink/notice/AbtdJkjioOo8ZSdDhw
// empty account with username inaccesible: https://dlx.pink/notice/AbD2kgNviafFEsebqq
if (j.at("account").size()) {
j["account"].get_to(post.account);
} else {
std::smatch sm;
const std::string& url = j.at("url").get_ref<const std::string&>();
if (!std::regex_match(url, sm, akkoma_status_url_regex)) {
return;
}
post.account = {
.username = sm.str(2),
.server = sm.str(1),
.display_name = sm.str(2),
};
}
}
void from_json(const json& j, PostContext& context) {
j.at("ancestors").get_to(context.ancestors);
j.at("descendants").get_to(context.descendants);
}
void from_json(const json& j, Instance& instance) {
j.at("title").get_to(instance.title);
j.at("description").get_to(instance.description);
j.at("thumbnail").at("url").get_to(instance.thumbnail);
j.at("contact").at("email").get_to(instance.contact_email);
j.at("contact").at("account").get_to(instance.contact_account);
json rules = j.at("rules");
instance.rules.reserve(rules.size());
for (const json& i : rules) {
instance.rules.push_back(i.at("text").get<std::string>());
}
}
static std::regex rfc3339_re(R"EOF((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.\d+)?(?:(Z)|([+-]\d{2}):(\d{2})))EOF", std::regex::ECMAScript | std::regex::icase);
time_t parse_rfc3339(const std::string& str) {
using namespace std::string_literals;
std::smatch sm;
if (!std::regex_match(str, sm, rfc3339_re)) {
throw std::invalid_argument("unknown date format: "s + str);
}
struct tm tm = {
.tm_sec = to_int(sm.str(6)),
.tm_min = to_int(sm.str(5)),
.tm_hour = to_int(sm.str(4)),
.tm_mday = to_int(sm.str(3)),
.tm_mon = to_int(sm.str(2)) - 1,
.tm_year = to_int(sm.str(1)) - 1900,
.tm_gmtoff = !sm.str(7).empty() ? 0 : to_int(sm.str(8)) * 60 * 60 + to_int(sm.str(9)) * 60,
};
time_t time = mktime(&tm);
if (time == -1) {
throw std::system_error(errno, std::generic_category(), "mktime()");
}
return time;
}