fix: remove top level query string

- Closes #15
This commit is contained in:
curben 2020-05-15 11:14:57 +01:00
parent baebc326f1
commit 8a026ce356
No known key found for this signature in database
GPG Key ID: 5D9DB57A25D34EE3
1 changed files with 5 additions and 0 deletions

View File

@ -35,6 +35,9 @@ sort -u > "urlhaus.txt"
cat "urlhaus.txt" | \
cut -f 1 -d "/" | \
cut -f 1 -d ":" | \
# Remove invalid query string, see #15
cut -f 1 -d "?" | \
grep -vF "?" | \
sort -u > "urlhaus-domains.txt"
## Parse online URLs only
@ -48,6 +51,8 @@ sort -u > "urlhaus-online.txt"
cat "urlhaus-online.txt" | \
cut -f 1 -d "/" | \
cut -f 1 -d ":" | \
cut -f 1 -d "?" | \
grep -vF "?" | \
sort -u > "urlhaus-domains-online.txt"