From f9e1cb84ce0861f1ba7f0d038001a69d52766575 Mon Sep 17 00:00:00 2001 From: curben Date: Tue, 28 May 2019 09:59:02 +0930 Subject: [PATCH] fix: run dos2unix before text processing rename urlhaus.txt in tmp/ --- utils/malware-domains.sh | 2 +- utils/malware-url-top-domains.sh | 2 +- utils/prerequisites.sh | 6 +++--- utils/urlhaus-top-domains.sh | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/malware-domains.sh b/utils/malware-domains.sh index 829cb264..cd8fc7be 100644 --- a/utils/malware-domains.sh +++ b/utils/malware-domains.sh @@ -4,7 +4,7 @@ set -e -x ## Parse domains from URLhaus excluding popular domains -cat URLhaus.txt | \ +cat urlhaus.txt | \ # Exclude Umbrella Top 1M and well-known domains # grep inverse match whole line grep -Fx -vf urlhaus-top-domains.txt > malware-domains.txt diff --git a/utils/malware-url-top-domains.sh b/utils/malware-url-top-domains.sh index f58fd92a..c31eb109 100644 --- a/utils/malware-url-top-domains.sh +++ b/utils/malware-url-top-domains.sh @@ -4,6 +4,6 @@ set -e -x ## Parse malware URLs from popular URLhaus domains -cat URLhaus.txt | \ +cat urlhaus.txt | \ # Parse URLs from popular domains only grep -F -f urlhaus-top-domains.txt > malware-url-top-domains.txt diff --git a/utils/prerequisites.sh b/utils/prerequisites.sh index 5ceb251b..98bf0097 100644 --- a/utils/prerequisites.sh +++ b/utils/prerequisites.sh @@ -12,10 +12,10 @@ cp ../src/exclude.txt . ## Clean up URLhaus.txt cat ../src/URLhaus.txt | \ -# Remove comment -sed '/^#/ d' | \ # Convert DOS to Unix line ending dos2unix | \ +# Remove comment +sed '/^#/ d' | \ # Remove http(s):// cut -f 3 -d '/' | \ # Remove www @@ -23,4 +23,4 @@ cut -f 3 -d '/' | \ # Not examplewww.com sed 's/^www\.//g' | \ # Sort and remove duplicates -sort -u > URLhaus.txt +sort -u > urlhaus.txt diff --git a/utils/urlhaus-top-domains.sh b/utils/urlhaus-top-domains.sh index 821d287e..a75d80ac 100644 --- a/utils/urlhaus-top-domains.sh +++ b/utils/urlhaus-top-domains.sh @@ -4,7 +4,7 @@ set -e -x ## Parse popular domains from URLhaus -cat URLhaus.txt | \ +cat urlhaus.txt | \ # Exclude Umbrella Top 1M and well-known domains # grep match whole line grep -Fx -f top-1m-well-known.txt > urlhaus-top-domains.txt