From aaab4b82fb01615704a849f1d997a0b6e8438c75 Mon Sep 17 00:00:00 2001 From: MDLeom <2809763-curben@users.noreply.gitlab.com> Date: Sun, 2 Jan 2022 01:15:46 +0000 Subject: [PATCH] refactor: url encode space on the source --- src/script.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/script.sh b/src/script.sh index 15b94fae..7b476d68 100644 --- a/src/script.sh +++ b/src/script.sh @@ -39,14 +39,17 @@ sed 's/"//g' | \ cut -f 3- -d "/" | \ # Domain must have at least a 'dot' grep -F "." | \ -sed "s/^www\.//g" > "phishtank.txt" +sed "s/^www\.//g" | \ +# url encode space #11 +sed "s/ /%20/g" > "phishtank.txt" cat "openphish-raw.txt" | \ dos2unix | \ tr "[:upper:]" "[:lower:]" | \ cut -f 3- -d "/" | \ grep -F "." | \ -sed "s/^www\.//g" > "openphish.txt" +sed "s/^www\.//g" | \ +sed "s/ /%20/g" > "openphish.txt" ## Combine PhishTank and OpenPhish cat "phishtank.txt" "openphish.txt" | \ @@ -109,9 +112,7 @@ cat "phishing-domains.txt" | \ grep -F -vf "phishing-top-domains.txt" > "phishing-notop-domains-temp.txt" cat "phishing.txt" | \ -grep -F -f "phishing-top-domains.txt" | \ -# url encode space #11 -sed "s/ /%20/g" > "phishing-url-top-domains-temp.txt" +grep -F -f "phishing-top-domains.txt" > "phishing-url-top-domains-temp.txt" rm -f "phishing-url-top-domains.txt" "phishing-url-top-domains-raw.txt"