diff --git a/script.sh b/script.sh index a99f5027..e0052fc6 100644 --- a/script.sh +++ b/script.sh @@ -35,9 +35,9 @@ sort -u > "urlhaus.txt" cat "urlhaus.txt" | \ cut -f 1 -d "/" | \ cut -f 1 -d ":" | \ -# Remove invalid query string, see #15 +# Remove invalid domains, see #15 +grep -vF "??" | \ cut -f 1 -d "?" | \ -grep -vF "?" | \ sort -u > "urlhaus-domains.txt" ## Parse online URLs only @@ -51,8 +51,8 @@ sort -u > "urlhaus-online.txt" cat "urlhaus-online.txt" | \ cut -f 1 -d "/" | \ cut -f 1 -d ":" | \ +grep -vF "??" | \ cut -f 1 -d "?" | \ -grep -vF "?" | \ sort -u > "urlhaus-domains-online.txt"