Fix tombstone parsing
Apparently they just got rid of the "epitaph", oh well
This commit is contained in:
parent
bd386b3acf
commit
7643293f6b
|
@ -1,28 +1,19 @@
|
||||||
import strutils, strformat, times, uri, tables
|
import strutils, strformat, times, uri, tables, xmltree, htmlparser
|
||||||
import xmltree, htmlparser
|
|
||||||
import regex
|
import regex
|
||||||
|
|
||||||
import types, utils, query
|
import types, utils, query
|
||||||
|
|
||||||
from unicode import Rune, `$`
|
|
||||||
|
|
||||||
const
|
const
|
||||||
ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
|
ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
|
||||||
twRegex = re"(www\.|mobile\.)?twitter\.com"
|
twRegex = re"(www\.|mobile\.)?twitter\.com"
|
||||||
igRegex = re"(www\.)?instagram.com"
|
igRegex = re"(www\.)?instagram.com"
|
||||||
cards = "cards.twitter.com/cards"
|
cards = "cards.twitter.com/cards"
|
||||||
tco = "https://t.co"
|
tco = "https://t.co"
|
||||||
nbsp = $Rune(0x000A0)
|
|
||||||
|
|
||||||
wwwRegex = re"https?://(www[0-9]?\.)?"
|
wwwRegex = re"https?://(www[0-9]?\.)?"
|
||||||
m3u8Regex = re"""url="(.+.m3u8)""""
|
m3u8Regex = re"""url="(.+.m3u8)""""
|
||||||
manifestRegex = re"(.+(.ts|.m3u8|.vmap))"
|
manifestRegex = re"(.+(.ts|.m3u8|.vmap))"
|
||||||
userpicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
|
userpicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
|
||||||
extRegex = re"(\.[A-z]+)$"
|
extRegex = re"(\.[A-z]+)$"
|
||||||
tombstoneRegex = re"\n* *Learn more"
|
|
||||||
|
|
||||||
proc stripText*(text: string): string =
|
|
||||||
text.replace(nbsp, " ").strip()
|
|
||||||
|
|
||||||
proc stripHtml*(text: string): string =
|
proc stripHtml*(text: string): string =
|
||||||
var html = parseHtml(text)
|
var html = parseHtml(text)
|
||||||
|
@ -129,9 +120,6 @@ proc getLink*(tweet: Tweet; focus=true): string =
|
||||||
result = &"/{username}/status/{tweet.id}"
|
result = &"/{username}/status/{tweet.id}"
|
||||||
if focus: result &= "#m"
|
if focus: result &= "#m"
|
||||||
|
|
||||||
proc getTombstone*(text: string): string =
|
|
||||||
text.replace(tombstoneRegex, "").stripText().strip(chars={' ', '\n'})
|
|
||||||
|
|
||||||
proc getTwitterLink*(path: string; params: Table[string, string]): string =
|
proc getTwitterLink*(path: string; params: Table[string, string]): string =
|
||||||
let
|
let
|
||||||
twitter = parseUri("https://twitter.com")
|
twitter = parseUri("https://twitter.com")
|
||||||
|
|
|
@ -109,22 +109,8 @@ proc getBanner*(js: JsonNode): string =
|
||||||
return "#161616"
|
return "#161616"
|
||||||
|
|
||||||
proc getTombstone*(js: JsonNode): string =
|
proc getTombstone*(js: JsonNode): string =
|
||||||
let epitaph = js{"epitaph"}.getStr
|
result = js{"tombstoneInfo", "richText", "text"}.getStr
|
||||||
case epitaph
|
result.removeSuffix(" Learn more")
|
||||||
of "Suspended":
|
|
||||||
result = "This tweet is from a suspended account."
|
|
||||||
of "Protected":
|
|
||||||
result = "This account owner limits who can view their tweets."
|
|
||||||
of "Missing":
|
|
||||||
result = "This tweet is unavailable."
|
|
||||||
of "Deactivated":
|
|
||||||
result = "This tweet is from an account that no longer exists."
|
|
||||||
of "Bounced", "BounceDeleted":
|
|
||||||
result = "This tweet violated the Twitter rules."
|
|
||||||
else:
|
|
||||||
result = js{"tombstoneInfo", "richText", "text"}.getStr
|
|
||||||
if epitaph.len > 0 or result.len > 0:
|
|
||||||
echo "Unknown tombstone (", epitaph, "): ", result
|
|
||||||
|
|
||||||
template getSlice(text: string; slice: seq[int]): string =
|
template getSlice(text: string; slice: seq[int]): string =
|
||||||
text.runeSubStr(slice[0], slice[1] - slice[0])
|
text.runeSubStr(slice[0], slice[1] - slice[0])
|
||||||
|
|
Loading…
Reference in New Issue