From 1b75e73adebe4449f4f8501481f8c4a7d9beebc7 Mon Sep 17 00:00:00 2001 From: curben <2809763-curben@users.noreply.gitlab.com> Date: Sat, 8 Feb 2020 03:32:36 +0000 Subject: [PATCH] build: safer shell script - https://sipb.mit.edu/doc/safe-shell/ --- script.sh | 100 ++++++++++++++++++++++++------------------------ utils/commit.sh | 2 +- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/script.sh b/script.sh index 885b9773..7f9f656e 100644 --- a/script.sh +++ b/script.sh @@ -1,88 +1,88 @@ #!/bin/sh -set -e -x +set -efux -o pipefail ## Create a temporary working folder -mkdir -p tmp/ && cd tmp/ +mkdir -p "tmp/" && cd "tmp/" ## Prepare datasets -wget https://urlhaus.abuse.ch/downloads/csv/ -O ../src/URLhaus.csv -wget https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip -O top-1m.csv.zip +wget "https://urlhaus.abuse.ch/downloads/csv/" -O "../src/URLhaus.csv" +wget "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -O "top-1m.csv.zip" -cp ../src/exclude.txt . +cp "../src/exclude.txt" "." ## Clean up URLhaus.csv -cat ../src/URLhaus.csv | \ +cat "../src/URLhaus.csv" | \ # Convert DOS to Unix line ending dos2unix | \ # Remove comment -sed '/^#/d' | \ +sed "/^#/d" | \ # Parse URLs cut -f 6 -d '"' | \ -cut -f 3- -d '/' | \ +cut -f 3- -d "/" | \ # Domain must have at least a 'dot' -grep -F '.' | \ +grep -F "." | \ # Remove www. -sed 's/^www\.//g' | \ -sort -u > urlhaus.txt +sed "s/^www\.//g" | \ +sort -u > "urlhaus.txt" ## Parse domain and IP address only -cat urlhaus.txt | \ -cut -f 1 -d '/' | \ -cut -f 1 -d ':' | \ -sort -u > urlhaus-domains.txt +cat "urlhaus.txt" | \ +cut -f 1 -d "/" | \ +cut -f 1 -d ":" | \ +sort -u > "urlhaus-domains.txt" -cat ../src/URLhaus.csv | \ +cat "../src/URLhaus.csv" | \ dos2unix | \ -sed '/^#/d' | \ +sed "/^#/d" | \ # Parse online URLs only grep '"online"' | \ cut -f 6 -d '"' | \ -cut -f 3- -d '/' | \ -sed 's/^www\.//g' | \ -sort -u > urlhaus-online.txt +cut -f 3- -d "/" | \ +sed "s/^www\.//g" | \ +sort -u > "urlhaus-online.txt" -cat urlhaus-online.txt | \ -cut -f 1 -d '/' | \ -cut -f 1 -d ':' | \ -sort -u > urlhaus-domains-online.txt +cat "urlhaus-online.txt" | \ +cut -f 1 -d "/" | \ +cut -f 1 -d ":" | \ +sort -u > "urlhaus-domains-online.txt" ## Parse the Cisco Umbrella 1 Million -unzip -p top-1m.csv.zip | \ +unzip -p "top-1m.csv.zip" | \ dos2unix | \ # Parse domains only -cut -f 2 -d ',' | \ -grep -F '.' | \ +cut -f 2 -d "," | \ +grep -F "." | \ # Remove www. -sed 's/^www\.//g' | \ -sort -u > top-1m.txt +sed "s/^www\.//g" | \ +sort -u > "top-1m.txt" # Merge Umbrella and self-maintained top domains -cat top-1m.txt exclude.txt | \ -sort -u > top-1m-well-known.txt +cat "top-1m.txt" "exclude.txt" | \ +sort -u > "top-1m-well-known.txt" ## Parse popular domains from URLhaus -cat urlhaus-domains.txt | \ +cat "urlhaus-domains.txt" | \ # grep match whole line -grep -Fx -f top-1m-well-known.txt > urlhaus-top-domains.txt +grep -Fx -f "top-1m-well-known.txt" > "urlhaus-top-domains.txt" ## Parse domains from URLhaus excluding popular domains -cat urlhaus-domains.txt | \ -grep -F -vf urlhaus-top-domains.txt > malware-domains.txt +cat "urlhaus-domains.txt" | \ +grep -F -vf "urlhaus-top-domains.txt" > "malware-domains.txt" -cat urlhaus-domains-online.txt | \ -grep -F -vf urlhaus-top-domains.txt > malware-domains-online.txt +cat "urlhaus-domains-online.txt" | \ +grep -F -vf "urlhaus-top-domains.txt" > "malware-domains-online.txt" ## Parse malware URLs from popular domains -cat urlhaus.txt | \ -grep -F -f urlhaus-top-domains.txt > malware-url-top-domains.txt +cat "urlhaus.txt" | \ +grep -F -f "urlhaus-top-domains.txt" > "malware-url-top-domains.txt" -cat urlhaus-online.txt | \ -grep -F -f urlhaus-top-domains.txt > malware-url-top-domains-online.txt +cat "urlhaus-online.txt" | \ +grep -F -f "urlhaus-top-domains.txt" > "malware-url-top-domains-online.txt" ## Merge malware domains and URLs @@ -95,14 +95,14 @@ FIFTH_LINE="! License: https://creativecommons.org/publicdomain/zero/1.0/" SIXTH_LINE="! Source: https://urlhaus.abuse.ch/api/" COMMENT="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE" -cat malware-domains.txt malware-url-top-domains.txt | \ +cat "malware-domains.txt" "malware-url-top-domains.txt" | \ sort | \ -sed '1 i\'"$COMMENT"'' > ../urlhaus-filter.txt +sed '1 i\'"$COMMENT"'' > "../urlhaus-filter.txt" -cat malware-domains-online.txt malware-url-top-domains-online.txt | \ +cat "malware-domains-online.txt" "malware-url-top-domains-online.txt" | \ sort | \ sed '1 i\'"$COMMENT"'' | \ -sed '1s/Malicious/Online Malicious/' > ../urlhaus-filter-online.txt +sed "1s/Malicious/Online Malicious/" > "../urlhaus-filter-online.txt" ## Host-only blocklist @@ -113,13 +113,13 @@ FOURTH_LINE="# License: https://creativecommons.org/publicdomain/zero/1.0/" FIFTH_LINE="# Source: https://urlhaus.abuse.ch/api/" COMMENT="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE" -cat malware-domains.txt | \ +cat "malware-domains.txt" | \ sort | \ -sed '1 i\'"$COMMENT"'' > ../urlhaus-filter-hosts.txt +sed '1 i\'"$COMMENT"'' > "../urlhaus-filter-hosts.txt" -cat malware-domains-online.txt | \ +cat "malware-domains-online.txt" | \ sort | \ sed '1 i\'"$COMMENT"'' | \ -sed '1s/Malicious/Online Malicious/' > ../urlhaus-filter-hosts-online.txt +sed "1s/Malicious/Online Malicious/" > "../urlhaus-filter-hosts-online.txt" -cd ../ && rm -r tmp/ +cd ../ && rm -r "tmp/" diff --git a/utils/commit.sh b/utils/commit.sh index 15faddd2..7b3d5fc3 100644 --- a/utils/commit.sh +++ b/utils/commit.sh @@ -1,6 +1,6 @@ #!/bin/sh -set -e -x +set -efux -o pipefail ## Commit the filter update