Implement card fetching and parsing

This commit is contained in:
Zed 2019-07-11 19:22:23 +02:00
parent 1f90e2ab3e
commit d3a7ca834b
7 changed files with 113 additions and 13 deletions

View File

@ -163,6 +163,37 @@ proc getConversationPolls*(convo: Conversation) {.async.} =
futs.add convo.replies.map(getPolls)
await all(futs)
proc getCard*(tweet: Tweet) {.async.} =
if tweet.card.isNone(): return
let headers = newHttpHeaders({
"Accept": cardAccept,
"Referer": $(base / getLink(tweet)),
"User-Agent": agent,
"Authority": "twitter.com",
"Accept-Language": lang,
})
let url = base / get(tweet.card).query
let html = await fetchHtml(url, headers)
if html == nil: return
parseCard(get(tweet.card), html)
# echo tweet.card.get()
proc getCards*(thread: Thread) {.async.} =
if thread == nil: return
var cards = thread.tweets.filterIt(it.card.isSome)
await all(cards.map(getCard))
proc getConversationCards*(convo: Conversation) {.async.} =
var futs: seq[Future[void]]
futs.add getCard(convo.tweet)
futs.add getCards(convo.before)
futs.add getCards(convo.after)
futs.add convo.replies.map(getCards)
await all(futs)
proc getPhotoRail*(username: string): Future[seq[GalleryPhoto]] {.async.} =
let headers = newHttpHeaders({
"Accept": jsonAccept,
@ -234,9 +265,12 @@ proc getTweet*(username, id: string): Future[Conversation] {.async.} =
result = parseConversation(html)
let vidsFut = getConversationVideos(result)
let pollFut = getConversationPolls(result)
await all(vidsFut, pollFut)
let
vidsFut = getConversationVideos(result)
pollFut = getConversationPolls(result)
cardFut = getConversationCards(result)
await all(vidsFut, pollFut, cardFut)
proc finishTimeline(json: JsonNode; query: Option[Query]; after: string): Future[Timeline] {.async.} =
if json == nil: return Timeline()
@ -257,8 +291,9 @@ proc finishTimeline(json: JsonNode; query: Option[Query]; after: string): Future
thread = parseThread(html)
vidsFut = getVideos(thread)
pollFut = getPolls(thread)
cardFut = getCards(thread)
await all(vidsFut, pollFut)
await all(vidsFut, pollFut, cardFut)
result.tweets = thread.tweets
proc getTimeline*(username, after: string): Future[Timeline] {.async.} =

View File

@ -81,3 +81,7 @@ proc getTime*(tweet: Tweet): string =
proc getLink*(tweet: Tweet | Quote): string =
&"/{tweet.profile.username}/status/{tweet.id}"
proc getUrls*(text: string): seq[string] =
# temporary
text.findAll(urlRegex).mapIt(text[it.group(0)[0]])

View File

@ -178,3 +178,32 @@ proc parsePhotoRail*(node: XmlNode): seq[GalleryPhoto] =
tweetId: img.attr("data-tweet-id"),
color: img.attr("background-color").replace("style: ", "")
)
proc parseCard*(card: var Card; node: XmlNode) =
let cardKind = node.select("head > meta[name*=card_name]").attr("content")
if "summary_large_image" in cardKind:
card.kind = summaryLarge
elif "summary" in cardKind:
card.kind = summary
elif "live_event" in cardKind:
card.kind = liveEvent
elif "player" in cardKind:
card.kind = player
elif "promo_website" in cardKind:
card.kind = promoWebsite
card.title = node.selectText("h2.TwitterCard-title")
card.text = node.selectText("p.tcu-resetMargin")
card.dest = node.selectText("span.SummaryCard-destination")
let image = node.select(".tcu-imageWrapper > img")
if image != nil:
# workaround for issue 11713
card.image = image.attr("data-src").replace("gname", "g&name")
else:
echo card.id
if card.kind == liveEvent:
card.text = card.title
card.title = node.selectText(".TwitterCard-attribution--category")

View File

@ -169,8 +169,26 @@ proc getQuoteMedia*(quote: var Quote; node: XmlNode) =
proc getTweetCards*(tweet: Tweet; node: XmlNode) =
if node.attr("data-has-cards") == "false": return
if "poll" in node.attr("data-card2-type"):
let cardType = node.attr("data-card2-type")
if "poll" in cardType:
tweet.poll = some(Poll())
return
let cardDiv = node.select(".card2 > div")
if cardDiv == nil: return
var card = Card(
id: tweet.id,
query: cardDiv.attr("data-src")
)
# temporary solution
let text = node.selectText(".tweet-text")
let urls = getUrls(text)
card.url = urls[0]
tweet.card = some(card)
proc getMoreReplies*(node: XmlNode): int =
let text = node.innerText().strip()

View File

@ -21,7 +21,7 @@ const
proc initQuery*(filters, includes, excludes, separator: string; name=""): Query =
var sep = separator.strip().toUpper()
Query(
queryType: custom,
kind: custom,
filters: filters.split(",").filterIt(it in validFilters),
includes: includes.split(",").filterIt(it in validFilters),
excludes: excludes.split(",").filterIt(it in validFilters),
@ -31,7 +31,7 @@ proc initQuery*(filters, includes, excludes, separator: string; name=""): Query
proc getMediaQuery*(name: string): Query =
Query(
queryType: media,
kind: media,
filters: @["twimg", "native_video"],
fromUser: name,
sep: "OR"
@ -39,7 +39,7 @@ proc getMediaQuery*(name: string): Query =
proc getReplyQuery*(name: string): Query =
Query(
queryType: replies,
kind: replies,
includes: @["nativeretweets"],
fromUser: name
)
@ -61,8 +61,8 @@ proc genQueryParam*(query: Query): string =
return strip(param & filters.join(&" {query.sep} "))
proc genQueryUrl*(query: Query): string =
result = &"/{query.queryType}?"
if query.queryType != custom: return
result = &"/{query.kind}?"
if query.kind != custom: return
var params: seq[string]
if query.filters.len > 0:

View File

@ -31,11 +31,11 @@ db("cache.db", "", "", ""):
.}: Time
type
QueryType* = enum
QueryKind* = enum
replies, media, custom = "search"
Query* = object
queryType*: QueryType
kind*: QueryKind
filters*: seq[string]
includes*: seq[string]
excludes*: seq[string]
@ -70,6 +70,19 @@ type
status*: string
leader*: int
CardKind* = enum
summary, summaryLarge, liveEvent, player, promoWebsite
Card* = object
kind*: CardKind
id*: string
query*: string
url*: string
title*: string
dest*: string
text*: string
image*: string
Quote* = object
id*: string
profile*: Profile
@ -104,6 +117,7 @@ type
stats*: TweetStats
retweet*: Option[Retweet]
quote*: Option[Quote]
card*: Option[Card]
gif*: Option[Gif]
video*: Option[Video]
photos*: seq[string]

View File

@ -14,7 +14,7 @@ proc getTabClass(timeline: Timeline; tab: string): string =
if timeline.query.isNone:
if tab == "tweets":
classes.add "active"
elif $timeline.query.get().queryType == tab:
elif $timeline.query.get().kind == tab:
classes.add "active"
return classes.join(" ")