Merge branch 'perf' into 'master'
perf: grep using smaller file See merge request curben/urlhaus-filter!3
This commit is contained in:
commit
e258ffff3e
|
@ -42,12 +42,12 @@ deploy:
|
|||
# Process the Umbrella Top 1M
|
||||
- sh ../utils/umbrella-top-1m.sh
|
||||
|
||||
# Parse domains from URLhaus excluding popular domains
|
||||
- sh ../utils/malware-domains.sh
|
||||
|
||||
# Parse popular domains that also appear in URLhaus
|
||||
- sh ../utils/urlhaus-top-domains.sh
|
||||
|
||||
# Parse domains from URLhaus excluding popular domains
|
||||
- sh ../utils/malware-domains.sh
|
||||
|
||||
# Parse malware URLs from popular domains
|
||||
- sh ../utils/malware-url-top-domains.sh
|
||||
|
||||
|
|
|
@ -19,4 +19,4 @@ sed -e 's/^www\.//g' | \
|
|||
sort -u | \
|
||||
# Exclude Umbrella Top 1M and well-known domains
|
||||
# grep inverse match whole line
|
||||
grep -Fx -vf top-1m-well-known.txt > malware-domains.txt
|
||||
grep -Fx -vf urlhaus-top-domains.txt > malware-domains.txt
|
||||
|
|
|
@ -17,5 +17,5 @@ cut -f 1- -d ':' | \
|
|||
sed -e 's/^www\.//g' | \
|
||||
# Sort and remove duplicates
|
||||
sort -u | \
|
||||
# Include URLs from popular domains
|
||||
# Parse URLs from popular domains only
|
||||
grep -F -f urlhaus-top-domains.txt > malware-url-top-domains.txt
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
## Merge malware-domains.txt malware-url-top-domains.txt,
|
||||
## and append a header to instruct uBO to grab the filter daily.
|
||||
|
||||
|
||||
CURRENT_TIME="$(date -R -u)"
|
||||
FIRST_LINE="! Title: abuse.ch URLhaus Malicious URL Blocklist"
|
||||
SECOND_LINE="! Updated: $CURRENT_TIME"
|
||||
|
|
|
@ -18,5 +18,5 @@ sed -e 's/^www\.//g' | \
|
|||
# Sort and remove duplicates
|
||||
sort -u | \
|
||||
# Exclude Umbrella Top 1M and well-known domains
|
||||
# grep inverse match whole line
|
||||
# grep match whole line
|
||||
grep -Fx -f top-1m-well-known.txt > urlhaus-top-domains.txt
|
||||
|
|
Loading…
Reference in New Issue