urlhaus-filter/utils/malware-domains.sh

23 lines
556 B
Bash

#!/bin/sh
## Parse domains from URLhaus excluding popular domains
cat URLhaus.csv | \
# Convert DOS to Unix line ending
dos2unix | \
# Parse online URLs only
grep '"online"' | \
# Parse domains and IP address only
cut -f 6 -d '"' | \
cut -f 3 -d '/' | \
cut -f 1 -d ':' | \
# Remove www
# Only matches domains that start with www
# Not examplewww.com
sed 's/^www\.//g' | \
# Sort and remove duplicates
sort -u | \
# Exclude Umbrella Top 1M and well-known domains
# grep inverse match whole line
grep -Fx -vf urlhaus-top-domains.txt > malware-domains.txt