From 2ee79e7970258dda9edc658b2e962ca4f56875b2 Mon Sep 17 00:00:00 2001 From: Zed Date: Mon, 24 Jun 2019 22:55:37 +0200 Subject: [PATCH] Strip nbsp from account names --- src/parser.nim | 4 ++-- src/parserutils.nim | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/parser.nim b/src/parser.nim index 585334e..fd73727 100644 --- a/src/parser.nim +++ b/src/parser.nim @@ -34,7 +34,7 @@ proc parseIntentProfile*(profile: XmlNode): Profile = proc parseTweetProfile*(profile: XmlNode): Profile = result = Profile( - fullname: profile.getAttr("data-name"), + fullname: profile.getAttr("data-name").stripNbsp(), username: profile.getAttr("data-screen-name"), userpic: profile.getAvatar(".avatar"), verified: isVerified(profile) @@ -48,7 +48,7 @@ proc parseQuote*(quote: XmlNode): Quote = ) result.profile = Profile( - fullname: quote.selectText(".QuoteTweet-fullname"), + fullname: quote.selectText(".QuoteTweet-fullname").stripNbsp(), username: quote.getAttr("data-screen-name"), verified: isVerified(quote) ) diff --git a/src/parserutils.nim b/src/parserutils.nim index f883634..bb69035 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -3,6 +3,9 @@ import nimquery, regex import ./types, ./formatters, ./api +from unicode import Rune, `$` +const nbsp = $Rune(0x000A0) + const thumbRegex = re".+:url\('([^']+)'\)" gifRegex = re".+thumb/([^\.']+)\.jpg.*" @@ -26,6 +29,9 @@ proc getHeader(profile: XmlNode): XmlNode = if result.isNil: result = profile.querySelector(".ProfileCard-userFields") +proc stripNbsp*(text: string): string = + text.replace(nbsp, "") + proc isVerified*(profile: XmlNode): bool = getHeader(profile).selectText(".Icon.Icon--verified").len > 0 @@ -33,7 +39,7 @@ proc isProtected*(profile: XmlNode): bool = getHeader(profile).selectText(".Icon.Icon--protected").len > 0 proc getName*(profile: XmlNode; selector: string): string = - profile.selectText(selector).strip() + profile.selectText(selector).strip().stripNbsp() proc getUsername*(profile: XmlNode; selector: string): string = profile.selectText(selector).strip(chars={'@', ' '})