Add parser utils to parserutils.nim
This commit is contained in:
parent
c559ab1f1a
commit
de9e452a7c
|
@ -12,6 +12,7 @@ Inspired by the [invidio.us](https://github.com/omarroth/invidious) project.
|
||||||
- Dark theme
|
- Dark theme
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/zedeus/nitter
|
git clone https://github.com/zedeus/nitter
|
||||||
cd nitter
|
cd nitter
|
||||||
|
@ -23,6 +24,9 @@ like the title, this will change as the project matures a bit. For now the focus
|
||||||
is on implementing missing features.
|
is on implementing missing features.
|
||||||
|
|
||||||
## Todo (roughly in this order)
|
## Todo (roughly in this order)
|
||||||
|
|
||||||
|
- Line connecting posts in threads
|
||||||
|
- "Show Thread" button
|
||||||
- Twitter "Cards" (link previews)
|
- Twitter "Cards" (link previews)
|
||||||
- Nitter link previews
|
- Nitter link previews
|
||||||
- Search (+ hashtag search)
|
- Search (+ hashtag search)
|
||||||
|
@ -39,6 +43,7 @@ is on implementing missing features.
|
||||||
- Nitter logo
|
- Nitter logo
|
||||||
|
|
||||||
## Why?
|
## Why?
|
||||||
|
|
||||||
It's basically impossible to use Twitter without JavaScript enabled. If you try,
|
It's basically impossible to use Twitter without JavaScript enabled. If you try,
|
||||||
you're redirected to the legacy mobile version which is awful both functionally
|
you're redirected to the legacy mobile version which is awful both functionally
|
||||||
and aesthetically. For privacy-minded folks, preventing JavaScript analytics and
|
and aesthetically. For privacy-minded folks, preventing JavaScript analytics and
|
||||||
|
|
122
src/parser.nim
122
src/parser.nim
|
@ -1,103 +1,71 @@
|
||||||
import xmltree, sequtils, strtabs, strutils, strformat, json, times
|
import xmltree, sequtils, strtabs, strutils, strformat
|
||||||
import nimquery, regex
|
import nimquery
|
||||||
|
|
||||||
import ./types, ./formatters
|
import ./types, ./parserutils
|
||||||
|
|
||||||
proc getAttr(node: XmlNode; attr: string; default=""): string =
|
|
||||||
if node.isNil or node.attrs.isNil: return default
|
|
||||||
return node.attrs.getOrDefault(attr)
|
|
||||||
|
|
||||||
proc selectAttr(node: XmlNode; selector: string; attr: string; default=""): string =
|
|
||||||
let res = node.querySelector(selector)
|
|
||||||
if res == nil: "" else: res.getAttr(attr, default)
|
|
||||||
|
|
||||||
proc selectText(node: XmlNode; selector: string): string =
|
|
||||||
let res = node.querySelector(selector)
|
|
||||||
result = if res == nil: "" else: res.innerText()
|
|
||||||
|
|
||||||
proc parsePopupProfile*(node: XmlNode): Profile =
|
proc parsePopupProfile*(node: XmlNode): Profile =
|
||||||
let profile = node.querySelector(".profile-card")
|
let profile = node.querySelector(".profile-card")
|
||||||
if profile.isNil: return
|
if profile.isNil: return
|
||||||
|
|
||||||
result = Profile(
|
result = Profile(
|
||||||
fullname: profile.selectText(".fullname").strip(),
|
fullname: profile.getName(".fullname"),
|
||||||
username: profile.selectText(".username").strip(chars={'@', ' '}),
|
username: profile.getUsername(".username"),
|
||||||
description: profile.selectText(".bio"),
|
description: profile.getBio(".bio"),
|
||||||
verified: profile.selectText(".Icon.Icon--verified").len > 0,
|
userpic: profile.getAvatar(".ProfileCard-avatarImage"),
|
||||||
protected: profile.selectText(".Icon.Icon--protected").len > 0,
|
verified: isVerified(profile),
|
||||||
userpic: profile.selectAttr(".ProfileCard-avatarImage", "src").getUserpic(),
|
protected: isProtected(profile),
|
||||||
banner: profile.selectAttr("svg > image", "xlink:href").replace("600x200", "1500x500")
|
banner: getBanner(profile)
|
||||||
)
|
)
|
||||||
|
result.getPopupStats(profile)
|
||||||
if result.banner.len == 0:
|
|
||||||
result.banner = profile.selectAttr(".ProfileCard-bg", "style")
|
|
||||||
|
|
||||||
let stats = profile.querySelectorAll(".ProfileCardStats-statLink")
|
|
||||||
for s in stats:
|
|
||||||
let text = s.getAttr("title").split(" ")[0]
|
|
||||||
case s.getAttr("href").split("/")[^1]
|
|
||||||
of "followers": result.followers = text
|
|
||||||
of "following": result.following = text
|
|
||||||
else: result.tweets = text
|
|
||||||
|
|
||||||
proc parseIntentProfile*(profile: XmlNode): Profile =
|
proc parseIntentProfile*(profile: XmlNode): Profile =
|
||||||
result = Profile(
|
result = Profile(
|
||||||
fullname: profile.selectText("a.fn.url.alternate-context").strip(),
|
fullname: profile.getName("a.fn.url.alternate-context"),
|
||||||
username: profile.selectText(".nickname").strip(chars={'@', ' '}),
|
username: profile.getUsername(".nickname"),
|
||||||
userpic: profile.querySelector(".profile.summary").selectAttr("img.photo", "src").getUserPic(),
|
description: profile.getBio("p.note"),
|
||||||
description: profile.selectText("p.note").strip(),
|
userpic: profile.querySelector(".profile.summary").getAvatar("img.photo"),
|
||||||
verified: not profile.querySelector("li.verified").isNil,
|
verified: not profile.querySelector("li.verified").isNil,
|
||||||
protected: not profile.querySelector("li.protected").isNil,
|
protected: not profile.querySelector("li.protected").isNil,
|
||||||
banner: "background-color: #161616",
|
banner: getBanner(profile)
|
||||||
tweets: "?"
|
|
||||||
)
|
)
|
||||||
|
result.getIntentStats(profile)
|
||||||
for stat in profile.querySelectorAll("dd.count > a"):
|
|
||||||
case stat.getAttr("href").split("/")[^1]
|
|
||||||
of "followers": result.followers = stat.innerText()
|
|
||||||
of "following": result.following = stat.innerText()
|
|
||||||
|
|
||||||
proc parseTweetProfile*(profile: XmlNode): Profile =
|
proc parseTweetProfile*(profile: XmlNode): Profile =
|
||||||
result = Profile(
|
result = Profile(
|
||||||
fullname: profile.getAttr("data-name"),
|
fullname: profile.getAttr("data-name"),
|
||||||
username: profile.getAttr("data-screen-name"),
|
username: profile.getAttr("data-screen-name"),
|
||||||
userpic: profile.selectAttr(".avatar", "src").getUserpic(),
|
userpic: profile.getAvatar(".avatar"),
|
||||||
verified: profile.selectText(".Icon.Icon--verified").len > 0
|
verified: isVerified(profile)
|
||||||
|
)
|
||||||
|
|
||||||
|
proc parseQuote*(tweet: XmlNode): Tweet =
|
||||||
|
let tweet = tweet.querySelector(".QuoteTweet-innerContainer")
|
||||||
|
|
||||||
|
result = Tweet(
|
||||||
|
id: tweet.getAttr("data-item-id"),
|
||||||
|
link: tweet.getAttr("href"),
|
||||||
|
text: tweet.selectText(".QuoteTweet-text")
|
||||||
|
)
|
||||||
|
|
||||||
|
result.profile = Profile(
|
||||||
|
fullname: tweet.getAttr("data-screen-name"),
|
||||||
|
username: tweet.selectText(".QuteTweet-fullname"),
|
||||||
|
verified: isVerified(tweet)
|
||||||
)
|
)
|
||||||
|
|
||||||
proc parseTweet*(tweet: XmlNode): Tweet =
|
proc parseTweet*(tweet: XmlNode): Tweet =
|
||||||
let time = tweet.querySelector(".js-short-timestamp")
|
|
||||||
result = Tweet(
|
result = Tweet(
|
||||||
id: tweet.getAttr("data-item-id"),
|
id: tweet.getAttr("data-item-id"),
|
||||||
link: tweet.getAttr("data-permalink-path"),
|
link: tweet.getAttr("data-permalink-path"),
|
||||||
text: tweet.selectText(".tweet-text").stripTwitterUrls(),
|
profile: parseTweetProfile(tweet),
|
||||||
pinned: "pinned" in tweet.getAttr("class"),
|
text: getTweetText(tweet),
|
||||||
profile: parseTweetProfile(tweet),
|
time: getTimestamp(tweet),
|
||||||
time: fromUnix(parseInt(time.getAttr("data-time", "0"))),
|
shortTime: getShortTime(tweet),
|
||||||
shortTime: time.innerText(),
|
pinned: "pinned" in tweet.getAttr("class")
|
||||||
replies: "0",
|
|
||||||
likes: "0",
|
|
||||||
retweets: "0"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for action in tweet.querySelectorAll(".ProfileTweet-actionCountForAria"):
|
result.getTweetStats(tweet)
|
||||||
let text = action.innerText.split()
|
result.getTweetMedia(tweet)
|
||||||
case text[1]
|
|
||||||
of "replies": result.replies = text[0]
|
|
||||||
of "likes": result.likes = text[0]
|
|
||||||
of "retweets": result.retweets = text[0]
|
|
||||||
else: discard
|
|
||||||
|
|
||||||
for photo in tweet.querySelectorAll(".AdaptiveMedia-photoContainer"):
|
|
||||||
result.photos.add photo.attrs["data-image-url"]
|
|
||||||
|
|
||||||
let player = tweet.selectAttr(".PlayableMedia-player", "style")
|
|
||||||
if player.len > 0:
|
|
||||||
let thumb = player.replace(re".+:url\('([^']+)'\)", "$1")
|
|
||||||
if "tweet_video" in thumb:
|
|
||||||
result.gif = some(thumb.replace(re".+thumb/([^\.']+)\.jpg.*", "$1"))
|
|
||||||
else:
|
|
||||||
result.videoThumb = some(thumb)
|
|
||||||
|
|
||||||
let by = tweet.selectText(".js-retweet-text > a > b")
|
let by = tweet.selectText(".js-retweet-text > a > b")
|
||||||
if by.len > 0:
|
if by.len > 0:
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
import xmltree, strtabs, times
|
||||||
|
import nimquery, regex
|
||||||
|
|
||||||
|
import ./types, ./formatters
|
||||||
|
|
||||||
|
const
|
||||||
|
thumbRegex = re".+:url\('([^']+)'\)"
|
||||||
|
gifRegex = re".+thumb/([^\.']+)\.jpg.*"
|
||||||
|
|
||||||
|
proc getAttr*(node: XmlNode; attr: string; default=""): string =
|
||||||
|
if node.isNil or node.attrs.isNil: return default
|
||||||
|
return node.attrs.getOrDefault(attr)
|
||||||
|
|
||||||
|
proc selectAttr*(node: XmlNode; selector: string; attr: string; default=""): string =
|
||||||
|
let res = node.querySelector(selector)
|
||||||
|
if res == nil: "" else: res.getAttr(attr, default)
|
||||||
|
|
||||||
|
proc selectText*(node: XmlNode; selector: string): string =
|
||||||
|
let res = node.querySelector(selector)
|
||||||
|
result = if res == nil: "" else: res.innerText()
|
||||||
|
|
||||||
|
proc isVerified*(profile: XmlNode): bool =
|
||||||
|
profile.selectText(".Icon.Icon--verified").len > 0
|
||||||
|
|
||||||
|
proc isProtected*(profile: XmlNode): bool =
|
||||||
|
profile.selectText(".Icon.Icon--protected").len > 0
|
||||||
|
|
||||||
|
proc getName*(profile: XmlNode; selector: string): string =
|
||||||
|
profile.selectText(selector).strip()
|
||||||
|
|
||||||
|
proc getUsername*(profile: XmlNode; selector: string): string =
|
||||||
|
profile.selectText(selector).strip(chars={'@', ' '})
|
||||||
|
|
||||||
|
proc getTweetText*(tweet: XmlNode): string =
|
||||||
|
let selector = ".tweet-text > a.twitter-timeline-link.u-hidden"
|
||||||
|
let link = tweet.selectAttr(selector, "data-expanded-url")
|
||||||
|
var text =tweet.selectText(".tweet-text")
|
||||||
|
|
||||||
|
if link.len > 0 and link in text:
|
||||||
|
text = text.replace(link, " " & link)
|
||||||
|
|
||||||
|
stripTwitterUrls(text)
|
||||||
|
|
||||||
|
proc getTime(tweet: XmlNode): XmlNode =
|
||||||
|
tweet.querySelector(".js-short-timestamp")
|
||||||
|
|
||||||
|
proc getTimestamp*(tweet: XmlNode): Time =
|
||||||
|
let time = getTime(tweet).getAttr("data-time", "0")
|
||||||
|
fromUnix(parseInt(time))
|
||||||
|
|
||||||
|
proc getShortTime*(tweet: XmlNode): string =
|
||||||
|
getTime(tweet).innerText()
|
||||||
|
|
||||||
|
proc getBio*(profile: XmlNode; selector: string): string =
|
||||||
|
profile.selectText(selector).strip()
|
||||||
|
|
||||||
|
proc getAvatar*(profile: XmlNode; selector: string): string =
|
||||||
|
profile.selectAttr(selector, "src").getUserpic()
|
||||||
|
|
||||||
|
proc getBanner*(tweet: XmlNode): string =
|
||||||
|
let url = tweet.selectAttr("svg > image", "xlink:href")
|
||||||
|
|
||||||
|
if url.len > 0:
|
||||||
|
result = url.replace("600x200", "1500x500")
|
||||||
|
else:
|
||||||
|
result = tweet.selectAttr(".ProfileCard-bg", "style")
|
||||||
|
|
||||||
|
if result.len == 0:
|
||||||
|
result = "background-color: #161616"
|
||||||
|
|
||||||
|
proc getPopupStats*(profile: var Profile; node: XmlNode) =
|
||||||
|
for s in node.querySelectorAll( ".ProfileCardStats-statLink"):
|
||||||
|
let text = s.getAttr("title").split(" ")[0]
|
||||||
|
case s.getAttr("href").split("/")[^1]
|
||||||
|
of "followers": profile.followers = text
|
||||||
|
of "following": profile.following = text
|
||||||
|
else: profile.tweets = text
|
||||||
|
|
||||||
|
proc getIntentStats*(profile: var Profile; node: XmlNode) =
|
||||||
|
profile.tweets = "?"
|
||||||
|
for s in node.querySelectorAll( "dd.count > a"):
|
||||||
|
let text = s.innerText()
|
||||||
|
case s.getAttr("href").split("/")[^1]
|
||||||
|
of "followers": profile.followers = text
|
||||||
|
of "following": profile.following = text
|
||||||
|
|
||||||
|
proc getTweetStats*(tweet: var Tweet; node: XmlNode) =
|
||||||
|
tweet.replies = "0"
|
||||||
|
tweet.retweets = "0"
|
||||||
|
tweet.likes = "0"
|
||||||
|
|
||||||
|
for action in node.querySelectorAll(".ProfileTweet-actionCountForAria"):
|
||||||
|
let text = action.innerText.split()
|
||||||
|
case text[1]
|
||||||
|
of "replies": tweet.replies = text[0]
|
||||||
|
of "likes": tweet.likes = text[0]
|
||||||
|
of "retweets": tweet.retweets = text[0]
|
||||||
|
|
||||||
|
proc getTweetMedia*(tweet: var Tweet; node: XmlNode) =
|
||||||
|
for photo in node.querySelectorAll(".AdaptiveMedia-photoContainer"):
|
||||||
|
tweet.photos.add photo.attrs["data-image-url"]
|
||||||
|
|
||||||
|
let player = node.selectAttr(".PlayableMedia-player", "style")
|
||||||
|
if player.len == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
let thumb = player.replace(thumbRegex, "$1")
|
||||||
|
if "tweet_video" in thumb:
|
||||||
|
tweet.gif = some(thumb.replace(gifRegex, "$1"))
|
||||||
|
else:
|
||||||
|
tweet.videoThumb = some(thumb)
|
Loading…
Reference in New Issue