fix(script): remove invalid protocol

- #32
This commit is contained in:
MDLeom 2021-01-21 04:09:23 +00:00
parent 99cbc9f288
commit 0957c1533d
1 changed files with 2 additions and 0 deletions

View File

@ -27,6 +27,8 @@ cut -f 6 -d '"' | \
cut -f 3- -d "/" | \
# Domain must have at least a 'dot'
grep -F "." | \
# Remove invalid protocol, see #32
sed -E "s/^(ttps:\/\/|https:\/|http\/)//g" | \
# Remove www.
sed "s/^www\.//g" | \
sort -u > "urlhaus.txt"