urlhaus-filter/utils/malware-url-top-domains.sh

24 lines
505 B
Bash
Raw Normal View History

#!/bin/sh
set -e -x
## Parse malware URLs from popular URLhaus domains
cat URLhaus.csv | \
# Convert DOS to Unix line ending
dos2unix | \
# Parse online URLs only
grep '"online"' | \
# Parse URLs
cut -f 6 -d '"' | \
cut -f 3- -d '/' | \
cut -f 1- -d ':' | \
# Remove www
# Only matches domains that start with www
# Not examplewww.com
sed 's/^www\.//g' | \
# Sort and remove duplicates
sort -u | \
# Parse URLs from popular domains only
grep -F -f urlhaus-top-domains.txt > malware-url-top-domains.txt