Add parser utils to parserutils.nim
This commit is contained in:
		
							parent
							
								
									c559ab1f1a
								
							
						
					
					
						commit
						de9e452a7c
					
				| 
						 | 
					@ -12,6 +12,7 @@ Inspired by the [invidio.us](https://github.com/omarroth/invidious) project.
 | 
				
			||||||
- Dark theme
 | 
					- Dark theme
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Installation
 | 
					## Installation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
git clone https://github.com/zedeus/nitter
 | 
					git clone https://github.com/zedeus/nitter
 | 
				
			||||||
cd nitter
 | 
					cd nitter
 | 
				
			||||||
| 
						 | 
					@ -23,6 +24,9 @@ like the title, this will change as the project matures a bit. For now the focus
 | 
				
			||||||
is on implementing missing features.
 | 
					is on implementing missing features.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Todo (roughly in this order)
 | 
					## Todo (roughly in this order)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Line connecting posts in threads
 | 
				
			||||||
 | 
					- "Show Thread" button
 | 
				
			||||||
- Twitter "Cards" (link previews)
 | 
					- Twitter "Cards" (link previews)
 | 
				
			||||||
- Nitter link previews
 | 
					- Nitter link previews
 | 
				
			||||||
- Search (+ hashtag search)
 | 
					- Search (+ hashtag search)
 | 
				
			||||||
| 
						 | 
					@ -39,6 +43,7 @@ is on implementing missing features.
 | 
				
			||||||
- Nitter logo
 | 
					- Nitter logo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Why?
 | 
					## Why?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
It's basically impossible to use Twitter without JavaScript enabled. If you try,
 | 
					It's basically impossible to use Twitter without JavaScript enabled. If you try,
 | 
				
			||||||
you're redirected to the legacy mobile version which is awful both functionally
 | 
					you're redirected to the legacy mobile version which is awful both functionally
 | 
				
			||||||
and aesthetically. For privacy-minded folks, preventing JavaScript analytics and
 | 
					and aesthetically. For privacy-minded folks, preventing JavaScript analytics and
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										112
									
								
								src/parser.nim
								
								
								
								
							
							
						
						
									
										112
									
								
								src/parser.nim
								
								
								
								
							| 
						 | 
					@ -1,103 +1,71 @@
 | 
				
			||||||
import xmltree, sequtils, strtabs, strutils, strformat, json, times
 | 
					import xmltree, sequtils, strtabs, strutils, strformat
 | 
				
			||||||
import nimquery, regex
 | 
					import nimquery
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import ./types, ./formatters
 | 
					import ./types, ./parserutils
 | 
				
			||||||
 | 
					 | 
				
			||||||
proc getAttr(node: XmlNode; attr: string; default=""): string =
 | 
					 | 
				
			||||||
  if node.isNil or node.attrs.isNil: return default
 | 
					 | 
				
			||||||
  return node.attrs.getOrDefault(attr)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
proc selectAttr(node: XmlNode; selector: string; attr: string; default=""): string =
 | 
					 | 
				
			||||||
  let res = node.querySelector(selector)
 | 
					 | 
				
			||||||
  if res == nil: "" else: res.getAttr(attr, default)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
proc selectText(node: XmlNode; selector: string): string =
 | 
					 | 
				
			||||||
  let res = node.querySelector(selector)
 | 
					 | 
				
			||||||
  result = if res == nil: "" else: res.innerText()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
proc parsePopupProfile*(node: XmlNode): Profile =
 | 
					proc parsePopupProfile*(node: XmlNode): Profile =
 | 
				
			||||||
  let profile = node.querySelector(".profile-card")
 | 
					  let profile = node.querySelector(".profile-card")
 | 
				
			||||||
  if profile.isNil: return
 | 
					  if profile.isNil: return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  result = Profile(
 | 
					  result = Profile(
 | 
				
			||||||
     fullname: profile.selectText(".fullname").strip(),
 | 
					    fullname:    profile.getName(".fullname"),
 | 
				
			||||||
     username: profile.selectText(".username").strip(chars={'@', ' '}),
 | 
					    username:    profile.getUsername(".username"),
 | 
				
			||||||
     description: profile.selectText(".bio"),
 | 
					    description: profile.getBio(".bio"),
 | 
				
			||||||
     verified: profile.selectText(".Icon.Icon--verified").len > 0,
 | 
					    userpic:     profile.getAvatar(".ProfileCard-avatarImage"),
 | 
				
			||||||
     protected: profile.selectText(".Icon.Icon--protected").len > 0,
 | 
					    verified:    isVerified(profile),
 | 
				
			||||||
     userpic: profile.selectAttr(".ProfileCard-avatarImage", "src").getUserpic(),
 | 
					    protected:   isProtected(profile),
 | 
				
			||||||
     banner: profile.selectAttr("svg > image", "xlink:href").replace("600x200", "1500x500")
 | 
					    banner:      getBanner(profile)
 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
 | 
					  result.getPopupStats(profile)
 | 
				
			||||||
  if result.banner.len == 0:
 | 
					 | 
				
			||||||
      result.banner = profile.selectAttr(".ProfileCard-bg", "style")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  let stats = profile.querySelectorAll(".ProfileCardStats-statLink")
 | 
					 | 
				
			||||||
  for s in stats:
 | 
					 | 
				
			||||||
    let text = s.getAttr("title").split(" ")[0]
 | 
					 | 
				
			||||||
    case s.getAttr("href").split("/")[^1]
 | 
					 | 
				
			||||||
    of "followers": result.followers = text
 | 
					 | 
				
			||||||
    of "following": result.following = text
 | 
					 | 
				
			||||||
    else: result.tweets = text
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
proc parseIntentProfile*(profile: XmlNode): Profile =
 | 
					proc parseIntentProfile*(profile: XmlNode): Profile =
 | 
				
			||||||
  result = Profile(
 | 
					  result = Profile(
 | 
				
			||||||
    fullname: profile.selectText("a.fn.url.alternate-context").strip(),
 | 
					    fullname:    profile.getName("a.fn.url.alternate-context"),
 | 
				
			||||||
    username: profile.selectText(".nickname").strip(chars={'@', ' '}),
 | 
					    username:    profile.getUsername(".nickname"),
 | 
				
			||||||
    userpic: profile.querySelector(".profile.summary").selectAttr("img.photo", "src").getUserPic(),
 | 
					    description: profile.getBio("p.note"),
 | 
				
			||||||
    description: profile.selectText("p.note").strip(),
 | 
					    userpic:     profile.querySelector(".profile.summary").getAvatar("img.photo"),
 | 
				
			||||||
    verified:    not profile.querySelector("li.verified").isNil,
 | 
					    verified:    not profile.querySelector("li.verified").isNil,
 | 
				
			||||||
    protected:   not profile.querySelector("li.protected").isNil,
 | 
					    protected:   not profile.querySelector("li.protected").isNil,
 | 
				
			||||||
    banner: "background-color: #161616",
 | 
					    banner:      getBanner(profile)
 | 
				
			||||||
    tweets: "?"
 | 
					 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
 | 
					  result.getIntentStats(profile)
 | 
				
			||||||
  for stat in profile.querySelectorAll("dd.count > a"):
 | 
					 | 
				
			||||||
    case stat.getAttr("href").split("/")[^1]
 | 
					 | 
				
			||||||
    of "followers": result.followers = stat.innerText()
 | 
					 | 
				
			||||||
    of "following": result.following = stat.innerText()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
proc parseTweetProfile*(profile: XmlNode): Profile =
 | 
					proc parseTweetProfile*(profile: XmlNode): Profile =
 | 
				
			||||||
  result = Profile(
 | 
					  result = Profile(
 | 
				
			||||||
    fullname: profile.getAttr("data-name"),
 | 
					    fullname: profile.getAttr("data-name"),
 | 
				
			||||||
    username: profile.getAttr("data-screen-name"),
 | 
					    username: profile.getAttr("data-screen-name"),
 | 
				
			||||||
    userpic: profile.selectAttr(".avatar", "src").getUserpic(),
 | 
					    userpic:  profile.getAvatar(".avatar"),
 | 
				
			||||||
    verified: profile.selectText(".Icon.Icon--verified").len > 0
 | 
					    verified: isVerified(profile)
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc parseQuote*(tweet: XmlNode): Tweet =
 | 
				
			||||||
 | 
					  let tweet = tweet.querySelector(".QuoteTweet-innerContainer")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  result = Tweet(
 | 
				
			||||||
 | 
					    id:   tweet.getAttr("data-item-id"),
 | 
				
			||||||
 | 
					    link: tweet.getAttr("href"),
 | 
				
			||||||
 | 
					    text: tweet.selectText(".QuoteTweet-text")
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  result.profile = Profile(
 | 
				
			||||||
 | 
					    fullname: tweet.getAttr("data-screen-name"),
 | 
				
			||||||
 | 
					    username: tweet.selectText(".QuteTweet-fullname"),
 | 
				
			||||||
 | 
					    verified: isVerified(tweet)
 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
proc parseTweet*(tweet: XmlNode): Tweet =
 | 
					proc parseTweet*(tweet: XmlNode): Tweet =
 | 
				
			||||||
  let time = tweet.querySelector(".js-short-timestamp")
 | 
					 | 
				
			||||||
  result = Tweet(
 | 
					  result = Tweet(
 | 
				
			||||||
    id:        tweet.getAttr("data-item-id"),
 | 
					    id:        tweet.getAttr("data-item-id"),
 | 
				
			||||||
    link:      tweet.getAttr("data-permalink-path"),
 | 
					    link:      tweet.getAttr("data-permalink-path"),
 | 
				
			||||||
    text: tweet.selectText(".tweet-text").stripTwitterUrls(),
 | 
					 | 
				
			||||||
    pinned: "pinned" in tweet.getAttr("class"),
 | 
					 | 
				
			||||||
    profile:   parseTweetProfile(tweet),
 | 
					    profile:   parseTweetProfile(tweet),
 | 
				
			||||||
    time: fromUnix(parseInt(time.getAttr("data-time", "0"))),
 | 
					    text:      getTweetText(tweet),
 | 
				
			||||||
    shortTime: time.innerText(),
 | 
					    time:      getTimestamp(tweet),
 | 
				
			||||||
    replies: "0",
 | 
					    shortTime: getShortTime(tweet),
 | 
				
			||||||
    likes: "0",
 | 
					    pinned:    "pinned" in tweet.getAttr("class")
 | 
				
			||||||
    retweets: "0"
 | 
					 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for action in tweet.querySelectorAll(".ProfileTweet-actionCountForAria"):
 | 
					  result.getTweetStats(tweet)
 | 
				
			||||||
    let text = action.innerText.split()
 | 
					  result.getTweetMedia(tweet)
 | 
				
			||||||
    case text[1]
 | 
					 | 
				
			||||||
    of "replies": result.replies = text[0]
 | 
					 | 
				
			||||||
    of "likes": result.likes = text[0]
 | 
					 | 
				
			||||||
    of "retweets": result.retweets = text[0]
 | 
					 | 
				
			||||||
    else: discard
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for photo in tweet.querySelectorAll(".AdaptiveMedia-photoContainer"):
 | 
					 | 
				
			||||||
    result.photos.add photo.attrs["data-image-url"]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  let player = tweet.selectAttr(".PlayableMedia-player", "style")
 | 
					 | 
				
			||||||
  if player.len > 0:
 | 
					 | 
				
			||||||
    let thumb = player.replace(re".+:url\('([^']+)'\)", "$1")
 | 
					 | 
				
			||||||
    if "tweet_video" in thumb:
 | 
					 | 
				
			||||||
      result.gif = some(thumb.replace(re".+thumb/([^\.']+)\.jpg.*", "$1"))
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
      result.videoThumb = some(thumb)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  let by = tweet.selectText(".js-retweet-text > a > b")
 | 
					  let by = tweet.selectText(".js-retweet-text > a > b")
 | 
				
			||||||
  if by.len > 0:
 | 
					  if by.len > 0:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,111 @@
 | 
				
			||||||
 | 
					import xmltree, strtabs, times
 | 
				
			||||||
 | 
					import nimquery, regex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import ./types, ./formatters
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const
 | 
				
			||||||
 | 
					  thumbRegex = re".+:url\('([^']+)'\)"
 | 
				
			||||||
 | 
					  gifRegex = re".+thumb/([^\.']+)\.jpg.*"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getAttr*(node: XmlNode; attr: string; default=""): string =
 | 
				
			||||||
 | 
					  if node.isNil or node.attrs.isNil: return default
 | 
				
			||||||
 | 
					  return node.attrs.getOrDefault(attr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc selectAttr*(node: XmlNode; selector: string; attr: string; default=""): string =
 | 
				
			||||||
 | 
					  let res = node.querySelector(selector)
 | 
				
			||||||
 | 
					  if res == nil: "" else: res.getAttr(attr, default)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc selectText*(node: XmlNode; selector: string): string =
 | 
				
			||||||
 | 
					  let res = node.querySelector(selector)
 | 
				
			||||||
 | 
					  result = if res == nil: "" else: res.innerText()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc isVerified*(profile: XmlNode): bool =
 | 
				
			||||||
 | 
					  profile.selectText(".Icon.Icon--verified").len > 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc isProtected*(profile: XmlNode): bool =
 | 
				
			||||||
 | 
					  profile.selectText(".Icon.Icon--protected").len > 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getName*(profile: XmlNode; selector: string): string =
 | 
				
			||||||
 | 
					  profile.selectText(selector).strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getUsername*(profile: XmlNode; selector: string): string =
 | 
				
			||||||
 | 
					  profile.selectText(selector).strip(chars={'@', ' '})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getTweetText*(tweet: XmlNode): string =
 | 
				
			||||||
 | 
					  let selector = ".tweet-text > a.twitter-timeline-link.u-hidden"
 | 
				
			||||||
 | 
					  let link = tweet.selectAttr(selector, "data-expanded-url")
 | 
				
			||||||
 | 
					  var text =tweet.selectText(".tweet-text")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if link.len > 0 and link in text:
 | 
				
			||||||
 | 
					    text = text.replace(link, " " & link)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  stripTwitterUrls(text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getTime(tweet: XmlNode): XmlNode =
 | 
				
			||||||
 | 
					  tweet.querySelector(".js-short-timestamp")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getTimestamp*(tweet: XmlNode): Time =
 | 
				
			||||||
 | 
					  let time = getTime(tweet).getAttr("data-time", "0")
 | 
				
			||||||
 | 
					  fromUnix(parseInt(time))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getShortTime*(tweet: XmlNode): string =
 | 
				
			||||||
 | 
					  getTime(tweet).innerText()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getBio*(profile: XmlNode; selector: string): string =
 | 
				
			||||||
 | 
					  profile.selectText(selector).strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getAvatar*(profile: XmlNode; selector: string): string =
 | 
				
			||||||
 | 
					  profile.selectAttr(selector, "src").getUserpic()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getBanner*(tweet: XmlNode): string =
 | 
				
			||||||
 | 
					  let url = tweet.selectAttr("svg > image", "xlink:href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if url.len > 0:
 | 
				
			||||||
 | 
					    result = url.replace("600x200", "1500x500")
 | 
				
			||||||
 | 
					  else:
 | 
				
			||||||
 | 
					    result = tweet.selectAttr(".ProfileCard-bg", "style")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if result.len == 0:
 | 
				
			||||||
 | 
					    result = "background-color: #161616"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getPopupStats*(profile: var Profile; node: XmlNode) =
 | 
				
			||||||
 | 
					  for s in node.querySelectorAll( ".ProfileCardStats-statLink"):
 | 
				
			||||||
 | 
					    let text = s.getAttr("title").split(" ")[0]
 | 
				
			||||||
 | 
					    case s.getAttr("href").split("/")[^1]
 | 
				
			||||||
 | 
					    of "followers": profile.followers = text
 | 
				
			||||||
 | 
					    of "following": profile.following = text
 | 
				
			||||||
 | 
					    else: profile.tweets = text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getIntentStats*(profile: var Profile; node: XmlNode) =
 | 
				
			||||||
 | 
					  profile.tweets = "?"
 | 
				
			||||||
 | 
					  for s in node.querySelectorAll( "dd.count > a"):
 | 
				
			||||||
 | 
					    let text = s.innerText()
 | 
				
			||||||
 | 
					    case s.getAttr("href").split("/")[^1]
 | 
				
			||||||
 | 
					    of "followers": profile.followers = text
 | 
				
			||||||
 | 
					    of "following": profile.following = text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getTweetStats*(tweet: var Tweet; node: XmlNode) =
 | 
				
			||||||
 | 
					  tweet.replies = "0"
 | 
				
			||||||
 | 
					  tweet.retweets = "0"
 | 
				
			||||||
 | 
					  tweet.likes = "0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for action in node.querySelectorAll(".ProfileTweet-actionCountForAria"):
 | 
				
			||||||
 | 
					    let text = action.innerText.split()
 | 
				
			||||||
 | 
					    case text[1]
 | 
				
			||||||
 | 
					    of "replies":  tweet.replies = text[0]
 | 
				
			||||||
 | 
					    of "likes":    tweet.likes = text[0]
 | 
				
			||||||
 | 
					    of "retweets": tweet.retweets = text[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proc getTweetMedia*(tweet: var Tweet; node: XmlNode) =
 | 
				
			||||||
 | 
					  for photo in node.querySelectorAll(".AdaptiveMedia-photoContainer"):
 | 
				
			||||||
 | 
					    tweet.photos.add photo.attrs["data-image-url"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  let player = node.selectAttr(".PlayableMedia-player", "style")
 | 
				
			||||||
 | 
					  if player.len == 0:
 | 
				
			||||||
 | 
					    return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  let thumb = player.replace(thumbRegex, "$1")
 | 
				
			||||||
 | 
					  if "tweet_video" in thumb:
 | 
				
			||||||
 | 
					    tweet.gif = some(thumb.replace(gifRegex, "$1"))
 | 
				
			||||||
 | 
					  else:
 | 
				
			||||||
 | 
					    tweet.videoThumb = some(thumb)
 | 
				
			||||||
		Loading…
	
		Reference in New Issue