urlhaus-filter/utils/malware-url-top-domains.sh

22 lines
494 B
Bash

#!/bin/sh
## Parse malware URLs from popular URLhaus domains
cat URLhaus.csv | \
# Convert DOS to Unix line ending
dos2unix | \
# Parse online URLs only
grep '"online"' | \
# Parse URLs
cut -f 6 -d '"' | \
cut -f 3- -d '/' | \
cut -f 1- -d ':' | \
# Remove www
# Only matches domains that start with www
# Not examplewww.com
sed -e 's/^www\.//g' | \
# Sort and remove duplicates
sort -u | \
# Include URLs from popular domains
grep -F -f urlhaus-top-domains.txt > malware-url-top-domains.txt