From 6e359f9a793c65edc0bedb1282961d9f5a4cff24 Mon Sep 17 00:00:00 2001 From: MDLeom <2809763-curben@users.noreply.gitlab.com> Date: Sun, 16 Mar 2025 10:05:02 +0000 Subject: [PATCH] fix: remove trailing slash from domain to replace previous workaround 0578e6c16a922072b0cb9b2f489541cb0526ed09 --- src/script.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/script.sh b/src/script.sh index 50d743a8..b16032c1 100644 --- a/src/script.sh +++ b/src/script.sh @@ -148,7 +148,10 @@ if [ -n "$(file 'phishtank.bz2' | grep 'bzip2 compressed data')" ]; then grep -F "." | \ sed "s/^www\.//g" | \ # url encode space #11 - sed "s/ /%20/g" > "phishtank.txt" + sed "s/ /%20/g" | \ + # remove trailing slash from domain except path #43 + sed -r "s/(^[^\/]*)\/+$/\1/g" | \ + sort -u > "phishtank.txt" else # cloudflare may impose captcha echo "phishtank.bz2 is not a bzip2, skipping it..." @@ -161,7 +164,9 @@ tr "[:upper:]" "[:lower:]" | \ cut -f 3- -d "/" | \ grep -F "." | \ sed "s/^www\.//g" | \ -sed "s/ /%20/g" > "openphish.txt" +sed "s/ /%20/g" | \ +sed -r "s/(^[^\/]*)\/+$/\1/g" | \ +sort -u > "openphish.txt" gzip -dc "ipthreat.gz" | \ # remove comment @@ -171,7 +176,9 @@ tr "[:upper:]" "[:lower:]" | \ cut -f 3- -d "/" | \ grep -F "." | \ sed "s/^www\.//g" | \ -sed "s/ /%20/g" > "ipthreat.txt" +sed "s/ /%20/g" | \ +sed -r "s/(^[^\/]*)\/+$/\1/g" | \ +sort -u > "ipthreat.txt" ## Combine all sources cat "openphish.txt" "ipthreat.txt" "phishtank.txt" | \