fix: remove invalid domains

- #15
This commit is contained in:
curben 2020-05-16 00:28:22 +01:00
parent 44fe93f9ec
commit 56a1386c9d
No known key found for this signature in database
GPG Key ID: 5D9DB57A25D34EE3
1 changed files with 3 additions and 3 deletions

View File

@ -35,9 +35,9 @@ sort -u > "urlhaus.txt"
cat "urlhaus.txt" | \
cut -f 1 -d "/" | \
cut -f 1 -d ":" | \
# Remove invalid query string, see #15
# Remove invalid domains, see #15
grep -vF "??" | \
cut -f 1 -d "?" | \
grep -vF "?" | \
sort -u > "urlhaus-domains.txt"
## Parse online URLs only
@ -51,8 +51,8 @@ sort -u > "urlhaus-online.txt"
cat "urlhaus-online.txt" | \
cut -f 1 -d "/" | \
cut -f 1 -d ":" | \
grep -vF "??" | \
cut -f 1 -d "?" | \
grep -vF "?" | \
sort -u > "urlhaus-domains-online.txt"