perf: grep using urlhaus-top-domains.txt instead of much larger top-1m.txt

This commit is contained in:
curben 2019-05-12 12:29:52 +09:30
parent acd4e760a1
commit 013267e310
3 changed files with 4 additions and 5 deletions

View File

@ -42,12 +42,12 @@ deploy:
# Process the Umbrella Top 1M
- sh ../utils/umbrella-top-1m.sh
# Parse domains from URLhaus excluding popular domains
- sh ../utils/malware-domains.sh
# Parse popular domains that also appear in URLhaus
- sh ../utils/urlhaus-top-domains.sh
# Parse domains from URLhaus excluding popular domains
- sh ../utils/malware-domains.sh
# Parse malware URLs from popular domains
- sh ../utils/malware-url-top-domains.sh

View File

@ -19,4 +19,4 @@ sed -e 's/^www\.//g' | \
sort -u | \
# Exclude Umbrella Top 1M and well-known domains
# grep inverse match whole line
grep -Fx -vf top-1m-well-known.txt > malware-domains.txt
grep -Fx -vf urlhaus-top-domains.txt > malware-domains.txt

View File

@ -3,7 +3,6 @@
## Merge malware-domains.txt malware-url-top-domains.txt,
## and append a header to instruct uBO to grab the filter daily.
CURRENT_TIME="$(date -R -u)"
FIRST_LINE="! Title: abuse.ch URLhaus Malicious URL Blocklist"
SECOND_LINE="! Updated: $CURRENT_TIME"