From 56a1386c9d94aa482844035c0f7169f04281cbea Mon Sep 17 00:00:00 2001 From: curben <2809763-curben@users.noreply.gitlab.com> Date: Sat, 16 May 2020 00:28:22 +0100 Subject: [PATCH] fix: remove invalid domains - #15 --- script.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script.sh b/script.sh index a99f5027..e0052fc6 100644 --- a/script.sh +++ b/script.sh @@ -35,9 +35,9 @@ sort -u > "urlhaus.txt" cat "urlhaus.txt" | \ cut -f 1 -d "/" | \ cut -f 1 -d ":" | \ -# Remove invalid query string, see #15 +# Remove invalid domains, see #15 +grep -vF "??" | \ cut -f 1 -d "?" | \ -grep -vF "?" | \ sort -u > "urlhaus-domains.txt" ## Parse online URLs only @@ -51,8 +51,8 @@ sort -u > "urlhaus-online.txt" cat "urlhaus-online.txt" | \ cut -f 1 -d "/" | \ cut -f 1 -d ":" | \ +grep -vF "??" | \ cut -f 1 -d "?" | \ -grep -vF "?" | \ sort -u > "urlhaus-domains-online.txt"