Add experimental user parser

This commit is contained in:
Zed 2022-01-16 06:00:11 +01:00
parent fcfc1ef497
commit cdf49dcddd
8 changed files with 270 additions and 29 deletions

View File

@ -7,6 +7,7 @@
# disable annoying warnings # disable annoying warnings
warning("GcUnsafe2", off) warning("GcUnsafe2", off)
hint("XDeclaredButNotUsed", off) hint("XDeclaredButNotUsed", off)
hint("XCannotRaiseY", off)
hint("User", off) hint("User", off)
const const

View File

@ -2,6 +2,7 @@
import asyncdispatch, httpclient, uri, strutils import asyncdispatch, httpclient, uri, strutils
import packedjson import packedjson
import types, query, formatters, consts, apiutils, parser import types, query, formatters, consts, apiutils, parser
import experimental/parser/user
proc getGraphListBySlug*(name, list: string): Future[List] {.async.} = proc getGraphListBySlug*(name, list: string): Future[List] {.async.} =
let let
@ -32,14 +33,14 @@ proc getListMembers*(list: List; after=""): Future[Result[Profile]] {.async.} =
proc getProfile*(username: string): Future[Profile] {.async.} = proc getProfile*(username: string): Future[Profile] {.async.} =
let let
ps = genParams({"screen_name": username}) ps = genParams({"screen_name": username})
js = await fetch(userShow ? ps, Api.userShow) json = await fetchRaw(userShow ? ps, Api.userShow)
result = parseUserShow(js, username=username) result = parseUser(json)
proc getProfileById*(userId: string): Future[Profile] {.async.} = proc getProfileById*(userId: string): Future[Profile] {.async.} =
let let
ps = genParams({"user_id": userId}) ps = genParams({"user_id": userId})
js = await fetch(userShow ? ps, Api.userShow) json = await fetchRaw(userShow ? ps, Api.userShow)
result = parseUserShow(js, id=userId) result = parseUser(json)
proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} = proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} =
let let

View File

@ -1,7 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
import httpclient, asyncdispatch, options, times, strutils, uri import httpclient, asyncdispatch, options, sequtils, strutils, uri
import packedjson, zippy import jsony, packedjson, zippy
import types, tokens, consts, parserutils, http_pool import types, tokens, consts, parserutils, http_pool
from experimental/types/common import Errors, ErrorObj
const const
rlRemaining = "x-rate-limit-remaining" rlRemaining = "x-rate-limit-remaining"
@ -40,7 +41,14 @@ proc genHeaders*(token: Token = nil): HttpHeaders =
"DNT": "1" "DNT": "1"
}) })
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} = template updateToken() =
if api != Api.search and resp.headers.hasKey(rlRemaining):
let
remaining = parseInt(resp.headers[rlRemaining])
reset = parseInt(resp.headers[rlReset])
token.setRateLimit(api, remaining, reset)
template fetchImpl(result, fetchBody) {.dirty.} =
once: once:
pool = HttpPool() pool = HttpPool()
@ -48,37 +56,21 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
if token.tok.len == 0: if token.tok.len == 0:
raise rateLimitError() raise rateLimitError()
let headers = genHeaders(token)
try: try:
var resp: AsyncResponse var resp: AsyncResponse
var body = pool.use(headers): result = pool.use(genHeaders(token)):
resp = await c.get($url) resp = await c.get($url)
await resp.body await resp.body
if body.len > 0: if result.len > 0:
if resp.headers.getOrDefault("content-encoding") == "gzip": if resp.headers.getOrDefault("content-encoding") == "gzip":
body = uncompress(body, dfGzip) result = uncompress(result, dfGzip)
else: else:
echo "non-gzip body, url: ", url, ", body: ", body echo "non-gzip body, url: ", url, ", body: ", result
if body.startsWith('{') or body.startsWith('['): fetchBody
result = parseJson(body)
else:
echo resp.status, ": ", body
result = newJNull()
if api != Api.search and resp.headers.hasKey(rlRemaining): release(token, used=true)
let
remaining = parseInt(resp.headers[rlRemaining])
reset = parseInt(resp.headers[rlReset])
token.setRateLimit(api, remaining, reset)
if result.getError notin {invalidToken, forbidden, badToken}:
release(token, used=true)
else:
echo "fetch error: ", result.getError
release(token, invalid=true)
raise rateLimitError()
if resp.status == $Http400: if resp.status == $Http400:
raise newException(InternalError, $url) raise newException(InternalError, $url)
@ -89,3 +81,35 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
if "length" notin e.msg and "descriptor" notin e.msg: if "length" notin e.msg and "descriptor" notin e.msg:
release(token, invalid=true) release(token, invalid=true)
raise rateLimitError() raise rateLimitError()
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
var body: string
fetchImpl body:
if body.startsWith('{') or body.startsWith('['):
result = parseJson(body)
else:
echo resp.status, ": ", body
result = newJNull()
updateToken()
let error = result.getError
if error in {invalidToken, forbidden, badToken}:
echo "fetch error: ", result.getError
release(token, invalid=true)
raise rateLimitError()
proc fetchRaw*(url: Uri; api: Api): Future[string] {.async.} =
fetchImpl result:
if not (result.startsWith('{') or result.startsWith('[')):
echo resp.status, ": ", result
result.setLen(0)
updateToken()
if result.startsWith("{\"errors"):
let errors = result.fromJson(Errors).errors
if errors.anyIt(it.code in {invalidToken, forbidden, badToken}):
echo "fetch error: ", errors
release(token, invalid=true)
raise rateLimitError()

View File

@ -0,0 +1,67 @@
import std/[macros, htmlgen, unicode]
import ../types/common
import ".."/../[formatters, utils]
type
ReplaceSliceKind = enum
rkRemove, rkUrl, rkHashtag, rkMention
ReplaceSlice* = object
slice: Slice[int]
kind: ReplaceSliceKind
url, display: string
proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
proc dedupSlices*(s: var seq[ReplaceSlice]) =
var
len = s.len
i = 0
while i < len:
var j = i + 1
while j < len:
if s[i].slice.a == s[j].slice.a:
s.del j
dec len
else:
inc j
inc i
proc extractUrls*(result: var seq[ReplaceSlice]; url: Url;
textLen: int; hideTwitter = false) =
let
link = url.expandedUrl
slice = url.indices[0] ..< url.indices[1]
if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl:
if slice.a < textLen:
result.add ReplaceSlice(kind: rkRemove, slice: slice)
else:
result.add ReplaceSlice(kind: rkUrl, url: link,
display: link.shortLink, slice: slice)
proc replacedWith*(runes: seq[Rune]; repls: openArray[ReplaceSlice];
textSlice: Slice[int]): string =
template extractLowerBound(i: int; idx): int =
if i > 0: repls[idx].slice.b.succ else: textSlice.a
result = newStringOfCap(runes.len)
for i, rep in repls:
result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
case rep.kind
of rkHashtag:
let
name = $runes[rep.slice.a.succ .. rep.slice.b]
symbol = $runes[rep.slice.a]
result.add a(symbol & name, href = "/search?q=%23" & name)
of rkMention:
result.add a($runes[rep.slice], href = rep.url, title = rep.display)
of rkUrl:
result.add a(rep.display, href = rep.url)
of rkRemove:
discard
let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b
if rest.a <= rest.b:
result.add $runes[rest]

View File

@ -0,0 +1,68 @@
import std/[algorithm, unicode, re, strutils]
import jsony
import utils, slices
import ../types/user as userType
from ../../types import Profile, Error
let
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
unReplace = "$1<a href=\"/$2\">@$2</a>"
htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)"
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
proc expandProfileEntities(profile: var Profile; user: User) =
let
orig = profile.bio.toRunes
ent = user.entities
if ent.url.urls.len > 0:
profile.website = ent.url.urls[0].expandedUrl
var replacements = newSeq[ReplaceSlice]()
for u in ent.description.urls:
replacements.extractUrls(u, orig.high)
replacements.dedupSlices
replacements.sort(cmp)
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
.replacef(unRegex, unReplace)
.replacef(htRegex, htReplace)
proc getBanner(user: User): string =
if user.profileBannerUrl.len > 0:
return user.profileBannerUrl & "/1500x500"
if user.profileLinkColor.len > 0:
return '#' & user.profileLinkColor
return "#161616"
proc parseUser*(json: string): Profile =
handleErrors:
case error
of suspended: return Profile(suspended: true)
of userNotFound: return
else: echo "[error - parseUser]: ", error
let user = json.fromJson(User)
result = Profile(
id: user.idStr,
username: user.screenName,
fullname: user.name,
location: user.location,
bio: user.description,
following: user.friendsCount,
followers: user.followersCount,
tweets: user.statusesCount,
likes: user.favouritesCount,
media: user.mediaCount,
verified: user.verified,
protected: user.protected,
joinDate: parseTwitterDate(user.createdAt),
banner: getBanner(user),
userPic: getImageUrl(user.profileImageUrlHttps).replace("_normal", "")
)
result.expandProfileEntities(user)

View File

@ -0,0 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-only
import std/[sugar, strutils, times]
import ../types/common
import ../../utils as uutils
template parseTime(time: string; f: static string; flen: int): DateTime =
if time.len != flen: return
parse(time, f, utc())
proc parseIsoDate*(date: string): DateTime =
date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
proc parseTwitterDate*(date: string): DateTime =
date.parseTime("ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)
proc getImageUrl*(url: string): string =
url.dup(removePrefix(twimg), removePrefix(https))
template handleErrors*(body) =
if json.startsWith("{\"errors"):
let error {.inject.} = json.fromJson(Errors).errors[0].code
body

View File

@ -0,0 +1,30 @@
from ../../types import Error
type
Url* = object
url*: string
expandedUrl*: string
displayUrl*: string
indices*: array[2, int]
ErrorCode* = enum
null = 0
noUserMatches = 17
protectedUser = 22
couldntAuth = 32
doesntExist = 34
userNotFound = 50
suspended = 63
rateLimited = 88
invalidToken = 89
listIdOrSlug = 112
forbidden = 200
badToken = 239
noCsrf = 353
ErrorObj* = object
code*: Error
message*: string
Errors* = object
errors*: seq[ErrorObj]

View File

@ -0,0 +1,28 @@
import common
type
User* = object
idStr*: string
name*: string
screenName*: string
location*: string
description*: string
entities*: Entities
createdAt*: string
followersCount*: int
friendsCount*: int
favouritesCount*: int
statusesCount*: int
mediaCount*: int
verified*: bool
protected*: bool
profileBannerUrl*: string
profileImageUrlHttps*: string
profileLinkColor*: string
Entities* = object
url*: Urls
description*: Urls
Urls* = object
urls*: seq[Url]