parent
63442cd576
commit
d4bef6923f
|
@ -4,7 +4,5 @@ set -e -x
|
|||
|
||||
## Parse domains from URLhaus excluding popular domains
|
||||
|
||||
cat urlhaus.txt | \
|
||||
# Exclude Umbrella Top 1M and well-known domains
|
||||
# grep inverse match whole line
|
||||
grep -Fx -vf urlhaus-top-domains.txt > malware-domains.txt
|
||||
cat urlhaus-domains.txt | \
|
||||
grep -F -vf urlhaus-top-domains.txt > malware-domains.txt
|
||||
|
|
|
@ -5,5 +5,4 @@ set -e -x
|
|||
## Parse malware URLs from popular URLhaus domains
|
||||
|
||||
cat urlhaus.txt | \
|
||||
# Parse URLs from popular domains only
|
||||
grep -F -f urlhaus-top-domains.txt > malware-url-top-domains.txt
|
||||
|
|
|
@ -17,12 +17,14 @@ dos2unix | \
|
|||
# Remove comment
|
||||
sed 's/^#.*//g' | \
|
||||
# Remove http(s)://
|
||||
cut -f 3 -d '/' | \
|
||||
# Remove port number
|
||||
cut -f 1 -d ':' | \
|
||||
# Remove www
|
||||
# Only matches domains that start with www
|
||||
# Not examplewww.com
|
||||
cut -f 3- -d '/' | \
|
||||
# Remove www.
|
||||
sed 's/^www\.//g' | \
|
||||
# Sort and remove duplicates
|
||||
sort -u > urlhaus.txt
|
||||
|
||||
## Parse domain and IP address only
|
||||
cat urlhaus.txt | \
|
||||
cut -f 1 -d '/' | \
|
||||
cut -f 1 -d ':' | \
|
||||
# Sort and remove duplicates
|
||||
sort -u > urlhaus-domains.txt
|
|
@ -14,11 +14,8 @@ dos2unix | \
|
|||
cut -f 2 -d ',' | \
|
||||
# Domain must have at least a 'dot'
|
||||
grep -F '.' | \
|
||||
# Remove www
|
||||
# Only matches domains that start with www
|
||||
# Not examplewww.com
|
||||
# Remove www.
|
||||
sed 's/^www\.//g' | \
|
||||
# Remove duplicates
|
||||
sort -u > top-1m.txt
|
||||
|
||||
# Merge Umbrella and self-maintained top domains
|
||||
|
|
|
@ -15,7 +15,6 @@ SIXTH_LINE="! Source: https://urlhaus.abuse.ch/api/"
|
|||
COMMENT="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE"
|
||||
|
||||
cat malware-domains.txt malware-url-top-domains.txt | \
|
||||
# Sort alphabetically
|
||||
sort | \
|
||||
# Append header comment to the filter list
|
||||
sed '1 i\'"$COMMENT"'' > ../urlhaus-filter.txt
|
||||
|
|
|
@ -4,7 +4,6 @@ set -e -x
|
|||
|
||||
## Parse popular domains from URLhaus
|
||||
|
||||
cat urlhaus.txt | \
|
||||
# Exclude Umbrella Top 1M and well-known domains
|
||||
cat urlhaus-domains.txt | \
|
||||
# grep match whole line
|
||||
grep -Fx -f top-1m-well-known.txt > urlhaus-top-domains.txt
|
||||
|
|
Loading…
Reference in New Issue