Improve thread/timeline parsing

This commit is contained in:
Zed 2019-06-29 06:31:02 +02:00
parent 49daad3930
commit b31338dcc7
3 changed files with 21 additions and 18 deletions

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
nitter nitter
*.html *.html
*.db *.db
tests/__pycache__ /tests/__pycache__
/tests/geckodriver.log

View File

@ -81,26 +81,34 @@ proc parseTweet*(node: XmlNode): Tweet =
if quote != nil: if quote != nil:
result.quote = some(parseQuote(quote)) result.quote = some(parseQuote(quote))
proc parseTweets*(node: XmlNode): Tweets = proc parseTweets*(nodes: XmlNode): Tweets =
if node == nil or node.kind == xnText: return if nodes == nil: return
for n in node.selectAll(".stream-item"): for n in nodes.filterIt(it.kind != xnText):
if "account" notin n.child("div").attr("class"): let class = n.attr("class").toLower()
if "tombstone" in class or "unavailable" in class:
result.add Tweet()
elif "morereplies" notin class:
result.add parseTweet(n) result.add parseTweet(n)
proc parseConversation*(node: XmlNode): Conversation = proc parseConversation*(node: XmlNode): Conversation =
result = Conversation( result = Conversation(
tweet: parseTweet(node.select(".permalink-tweet-container")), tweet: parseTweet(node.select(".permalink-tweet-container")),
before: parseTweets(node.select(".in-reply-to")) before: parseTweets(node.select(".in-reply-to .stream-items"))
) )
let replies = node.select(".replies-to", ".stream-items") let replies = node.select(".replies-to .stream-items")
if replies == nil: return if replies == nil: return
for reply in replies.filterIt(it.kind != xnText): for reply in replies.filterIt(it.kind != xnText):
if "selfThread" in reply.attr("class"): let class = reply.attr("class").toLower()
result.after = parseTweets(reply.select(".stream-items")) let thread = reply.select(".stream-items")
else:
if "self" in class:
result.after = parseTweets(thread)
elif "lone" in class:
result.replies.add parseTweets(reply) result.replies.add parseTweets(reply)
else:
result.replies.add parseTweets(thread)
proc parseVideo*(node: JsonNode): Video = proc parseVideo*(node: JsonNode): Video =
let track = node{"track"} let track = node{"track"}

View File

@ -18,11 +18,6 @@ proc select*(node: XmlNode; selector: string): XmlNode =
let nodes = node.selectAll(selector) let nodes = node.selectAll(selector)
if nodes.len > 0: nodes[0] else: nil if nodes.len > 0: nodes[0] else: nil
proc select*(node: XmlNode; parent, child: string): XmlNode =
let parentNode = node.select(parent)
if parentNode == nil: return
result = parentNode.select(child)
proc selectAttr*(node: XmlNode; selector: string; attr: string): string = proc selectAttr*(node: XmlNode; selector: string; attr: string): string =
let res = node.select(selector) let res = node.select(selector)
if res == nil: "" else: res.attr(attr) if res == nil: "" else: res.attr(attr)
@ -39,10 +34,10 @@ proc getHeader(profile: XmlNode): XmlNode =
result = profile.select(".ProfileCard-userFields") result = profile.select(".ProfileCard-userFields")
proc isVerified*(profile: XmlNode): bool = proc isVerified*(profile: XmlNode): bool =
getHeader(profile).selectText(".Icon.Icon--verified").len > 0 getHeader(profile).select(".Icon.Icon--verified") != nil
proc isProtected*(profile: XmlNode): bool = proc isProtected*(profile: XmlNode): bool =
getHeader(profile).selectText(".Icon.Icon--protected").len > 0 getHeader(profile).select(".Icon.Icon--protected") != nil
proc getName*(profile: XmlNode; selector: string): string = proc getName*(profile: XmlNode; selector: string): string =
profile.selectText(selector).stripText() profile.selectText(selector).stripText()
@ -92,7 +87,6 @@ proc getAvatar*(profile: XmlNode; selector: string): string =
proc getBanner*(tweet: XmlNode): string = proc getBanner*(tweet: XmlNode): string =
let url = tweet.selectAttr("svg > image", "xlink:href") let url = tweet.selectAttr("svg > image", "xlink:href")
if url.len > 0: if url.len > 0:
result = url.replace("600x200", "1500x500") result = url.replace("600x200", "1500x500")
else: else: