Implement card fetching and parsing

This commit is contained in:
Zed 2019-07-11 19:22:23 +02:00
parent 1f90e2ab3e
commit d3a7ca834b
7 changed files with 113 additions and 13 deletions

View File

@ -163,6 +163,37 @@ proc getConversationPolls*(convo: Conversation) {.async.} =
futs.add convo.replies.map(getPolls) futs.add convo.replies.map(getPolls)
await all(futs) await all(futs)
proc getCard*(tweet: Tweet) {.async.} =
if tweet.card.isNone(): return
let headers = newHttpHeaders({
"Accept": cardAccept,
"Referer": $(base / getLink(tweet)),
"User-Agent": agent,
"Authority": "twitter.com",
"Accept-Language": lang,
})
let url = base / get(tweet.card).query
let html = await fetchHtml(url, headers)
if html == nil: return
parseCard(get(tweet.card), html)
# echo tweet.card.get()
proc getCards*(thread: Thread) {.async.} =
if thread == nil: return
var cards = thread.tweets.filterIt(it.card.isSome)
await all(cards.map(getCard))
proc getConversationCards*(convo: Conversation) {.async.} =
var futs: seq[Future[void]]
futs.add getCard(convo.tweet)
futs.add getCards(convo.before)
futs.add getCards(convo.after)
futs.add convo.replies.map(getCards)
await all(futs)
proc getPhotoRail*(username: string): Future[seq[GalleryPhoto]] {.async.} = proc getPhotoRail*(username: string): Future[seq[GalleryPhoto]] {.async.} =
let headers = newHttpHeaders({ let headers = newHttpHeaders({
"Accept": jsonAccept, "Accept": jsonAccept,
@ -234,9 +265,12 @@ proc getTweet*(username, id: string): Future[Conversation] {.async.} =
result = parseConversation(html) result = parseConversation(html)
let vidsFut = getConversationVideos(result) let
let pollFut = getConversationPolls(result) vidsFut = getConversationVideos(result)
await all(vidsFut, pollFut) pollFut = getConversationPolls(result)
cardFut = getConversationCards(result)
await all(vidsFut, pollFut, cardFut)
proc finishTimeline(json: JsonNode; query: Option[Query]; after: string): Future[Timeline] {.async.} = proc finishTimeline(json: JsonNode; query: Option[Query]; after: string): Future[Timeline] {.async.} =
if json == nil: return Timeline() if json == nil: return Timeline()
@ -257,8 +291,9 @@ proc finishTimeline(json: JsonNode; query: Option[Query]; after: string): Future
thread = parseThread(html) thread = parseThread(html)
vidsFut = getVideos(thread) vidsFut = getVideos(thread)
pollFut = getPolls(thread) pollFut = getPolls(thread)
cardFut = getCards(thread)
await all(vidsFut, pollFut) await all(vidsFut, pollFut, cardFut)
result.tweets = thread.tweets result.tweets = thread.tweets
proc getTimeline*(username, after: string): Future[Timeline] {.async.} = proc getTimeline*(username, after: string): Future[Timeline] {.async.} =

View File

@ -81,3 +81,7 @@ proc getTime*(tweet: Tweet): string =
proc getLink*(tweet: Tweet | Quote): string = proc getLink*(tweet: Tweet | Quote): string =
&"/{tweet.profile.username}/status/{tweet.id}" &"/{tweet.profile.username}/status/{tweet.id}"
proc getUrls*(text: string): seq[string] =
# temporary
text.findAll(urlRegex).mapIt(text[it.group(0)[0]])

View File

@ -178,3 +178,32 @@ proc parsePhotoRail*(node: XmlNode): seq[GalleryPhoto] =
tweetId: img.attr("data-tweet-id"), tweetId: img.attr("data-tweet-id"),
color: img.attr("background-color").replace("style: ", "") color: img.attr("background-color").replace("style: ", "")
) )
proc parseCard*(card: var Card; node: XmlNode) =
let cardKind = node.select("head > meta[name*=card_name]").attr("content")
if "summary_large_image" in cardKind:
card.kind = summaryLarge
elif "summary" in cardKind:
card.kind = summary
elif "live_event" in cardKind:
card.kind = liveEvent
elif "player" in cardKind:
card.kind = player
elif "promo_website" in cardKind:
card.kind = promoWebsite
card.title = node.selectText("h2.TwitterCard-title")
card.text = node.selectText("p.tcu-resetMargin")
card.dest = node.selectText("span.SummaryCard-destination")
let image = node.select(".tcu-imageWrapper > img")
if image != nil:
# workaround for issue 11713
card.image = image.attr("data-src").replace("gname", "g&name")
else:
echo card.id
if card.kind == liveEvent:
card.text = card.title
card.title = node.selectText(".TwitterCard-attribution--category")

View File

@ -169,8 +169,26 @@ proc getQuoteMedia*(quote: var Quote; node: XmlNode) =
proc getTweetCards*(tweet: Tweet; node: XmlNode) = proc getTweetCards*(tweet: Tweet; node: XmlNode) =
if node.attr("data-has-cards") == "false": return if node.attr("data-has-cards") == "false": return
if "poll" in node.attr("data-card2-type"): let cardType = node.attr("data-card2-type")
if "poll" in cardType:
tweet.poll = some(Poll()) tweet.poll = some(Poll())
return
let cardDiv = node.select(".card2 > div")
if cardDiv == nil: return
var card = Card(
id: tweet.id,
query: cardDiv.attr("data-src")
)
# temporary solution
let text = node.selectText(".tweet-text")
let urls = getUrls(text)
card.url = urls[0]
tweet.card = some(card)
proc getMoreReplies*(node: XmlNode): int = proc getMoreReplies*(node: XmlNode): int =
let text = node.innerText().strip() let text = node.innerText().strip()

View File

@ -21,7 +21,7 @@ const
proc initQuery*(filters, includes, excludes, separator: string; name=""): Query = proc initQuery*(filters, includes, excludes, separator: string; name=""): Query =
var sep = separator.strip().toUpper() var sep = separator.strip().toUpper()
Query( Query(
queryType: custom, kind: custom,
filters: filters.split(",").filterIt(it in validFilters), filters: filters.split(",").filterIt(it in validFilters),
includes: includes.split(",").filterIt(it in validFilters), includes: includes.split(",").filterIt(it in validFilters),
excludes: excludes.split(",").filterIt(it in validFilters), excludes: excludes.split(",").filterIt(it in validFilters),
@ -31,7 +31,7 @@ proc initQuery*(filters, includes, excludes, separator: string; name=""): Query
proc getMediaQuery*(name: string): Query = proc getMediaQuery*(name: string): Query =
Query( Query(
queryType: media, kind: media,
filters: @["twimg", "native_video"], filters: @["twimg", "native_video"],
fromUser: name, fromUser: name,
sep: "OR" sep: "OR"
@ -39,7 +39,7 @@ proc getMediaQuery*(name: string): Query =
proc getReplyQuery*(name: string): Query = proc getReplyQuery*(name: string): Query =
Query( Query(
queryType: replies, kind: replies,
includes: @["nativeretweets"], includes: @["nativeretweets"],
fromUser: name fromUser: name
) )
@ -61,8 +61,8 @@ proc genQueryParam*(query: Query): string =
return strip(param & filters.join(&" {query.sep} ")) return strip(param & filters.join(&" {query.sep} "))
proc genQueryUrl*(query: Query): string = proc genQueryUrl*(query: Query): string =
result = &"/{query.queryType}?" result = &"/{query.kind}?"
if query.queryType != custom: return if query.kind != custom: return
var params: seq[string] var params: seq[string]
if query.filters.len > 0: if query.filters.len > 0:

View File

@ -31,11 +31,11 @@ db("cache.db", "", "", ""):
.}: Time .}: Time
type type
QueryType* = enum QueryKind* = enum
replies, media, custom = "search" replies, media, custom = "search"
Query* = object Query* = object
queryType*: QueryType kind*: QueryKind
filters*: seq[string] filters*: seq[string]
includes*: seq[string] includes*: seq[string]
excludes*: seq[string] excludes*: seq[string]
@ -70,6 +70,19 @@ type
status*: string status*: string
leader*: int leader*: int
CardKind* = enum
summary, summaryLarge, liveEvent, player, promoWebsite
Card* = object
kind*: CardKind
id*: string
query*: string
url*: string
title*: string
dest*: string
text*: string
image*: string
Quote* = object Quote* = object
id*: string id*: string
profile*: Profile profile*: Profile
@ -104,6 +117,7 @@ type
stats*: TweetStats stats*: TweetStats
retweet*: Option[Retweet] retweet*: Option[Retweet]
quote*: Option[Quote] quote*: Option[Quote]
card*: Option[Card]
gif*: Option[Gif] gif*: Option[Gif]
video*: Option[Video] video*: Option[Video]
photos*: seq[string] photos*: seq[string]

View File

@ -14,7 +14,7 @@ proc getTabClass(timeline: Timeline; tab: string): string =
if timeline.query.isNone: if timeline.query.isNone:
if tab == "tweets": if tab == "tweets":
classes.add "active" classes.add "active"
elif $timeline.query.get().queryType == tab: elif $timeline.query.get().kind == tab:
classes.add "active" classes.add "active"
return classes.join(" ") return classes.join(" ")