Fix tombstone parsing

Apparently they just got rid of the "epitaph", oh well
This commit is contained in:
Zed 2020-06-12 08:01:31 +02:00
parent bd386b3acf
commit 7643293f6b
2 changed files with 3 additions and 29 deletions

View File

@ -1,28 +1,19 @@
import strutils, strformat, times, uri, tables import strutils, strformat, times, uri, tables, xmltree, htmlparser
import xmltree, htmlparser
import regex import regex
import types, utils, query import types, utils, query
from unicode import Rune, `$`
const const
ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)" ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
twRegex = re"(www\.|mobile\.)?twitter\.com" twRegex = re"(www\.|mobile\.)?twitter\.com"
igRegex = re"(www\.)?instagram.com" igRegex = re"(www\.)?instagram.com"
cards = "cards.twitter.com/cards" cards = "cards.twitter.com/cards"
tco = "https://t.co" tco = "https://t.co"
nbsp = $Rune(0x000A0)
wwwRegex = re"https?://(www[0-9]?\.)?" wwwRegex = re"https?://(www[0-9]?\.)?"
m3u8Regex = re"""url="(.+.m3u8)"""" m3u8Regex = re"""url="(.+.m3u8)""""
manifestRegex = re"(.+(.ts|.m3u8|.vmap))" manifestRegex = re"(.+(.ts|.m3u8|.vmap))"
userpicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$" userpicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
extRegex = re"(\.[A-z]+)$" extRegex = re"(\.[A-z]+)$"
tombstoneRegex = re"\n* *Learn more"
proc stripText*(text: string): string =
text.replace(nbsp, " ").strip()
proc stripHtml*(text: string): string = proc stripHtml*(text: string): string =
var html = parseHtml(text) var html = parseHtml(text)
@ -129,9 +120,6 @@ proc getLink*(tweet: Tweet; focus=true): string =
result = &"/{username}/status/{tweet.id}" result = &"/{username}/status/{tweet.id}"
if focus: result &= "#m" if focus: result &= "#m"
proc getTombstone*(text: string): string =
text.replace(tombstoneRegex, "").stripText().strip(chars={' ', '\n'})
proc getTwitterLink*(path: string; params: Table[string, string]): string = proc getTwitterLink*(path: string; params: Table[string, string]): string =
let let
twitter = parseUri("https://twitter.com") twitter = parseUri("https://twitter.com")

View File

@ -109,22 +109,8 @@ proc getBanner*(js: JsonNode): string =
return "#161616" return "#161616"
proc getTombstone*(js: JsonNode): string = proc getTombstone*(js: JsonNode): string =
let epitaph = js{"epitaph"}.getStr result = js{"tombstoneInfo", "richText", "text"}.getStr
case epitaph result.removeSuffix(" Learn more")
of "Suspended":
result = "This tweet is from a suspended account."
of "Protected":
result = "This account owner limits who can view their tweets."
of "Missing":
result = "This tweet is unavailable."
of "Deactivated":
result = "This tweet is from an account that no longer exists."
of "Bounced", "BounceDeleted":
result = "This tweet violated the Twitter rules."
else:
result = js{"tombstoneInfo", "richText", "text"}.getStr
if epitaph.len > 0 or result.len > 0:
echo "Unknown tombstone (", epitaph, "): ", result
template getSlice(text: string; slice: seq[int]): string = template getSlice(text: string; slice: seq[int]): string =
text.runeSubStr(slice[0], slice[1] - slice[0]) text.runeSubStr(slice[0], slice[1] - slice[0])