From 5c7b1f4645575d739feec825ae7c0baa7f038a7c Mon Sep 17 00:00:00 2001 From: MDLeom <2809763-curben@users.noreply.gitlab.com> Date: Sat, 9 Mar 2024 04:06:37 +0000 Subject: [PATCH] feat(source): add mitchellkrogza/Phishing.Database ref #40 revert e68268f50612b8689962e593c8d50ca948c4bc45 --- README.md | 8 +++++--- src/script.sh | 13 +++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bec70c07..400ea99b 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ - [CI Variables](#ci-variables) - [License](#license) -A blocklist of phishing websites, curated from [OpenPhish](https://openphish.com/). Blocklist is updated twice a day. +A blocklist of phishing websites, curated from [OpenPhish](https://openphish.com/) and [mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database/blob/master/phishing-domains-ACTIVE.txt). Blocklist is updated twice a day. | Client | mirror 1 | mirror 2 | mirror 3 | mirror 4 | mirror 5 | mirror 6 | | ------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -227,9 +227,9 @@ _Popular_ websites are as listed in the [Umbrella Popularity List](https://s3-us If you wish to exclude certain website(s) that you believe is sufficiently well-known, please create an [issue](https://gitlab.com/malware-filter/phishing-filter/issues) or [merge request](https://gitlab.com/malware-filter/phishing-filter/merge_requests). -This blocklist **only** accepts new phishing URLs from [OpenPhish](https://openphish.com/). +This blocklist **only** accepts new phishing URLs from [OpenPhish](https://openphish.com/) and [mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database). -Please report new phishing URL to [OpenPhish](https://openphish.com/faq.html). +Please report new phishing URL to [OpenPhish](https://openphish.com/faq.html) or [mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database/issues). ## See also @@ -259,6 +259,8 @@ filters: [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/) [OpenPhish](https://openphish.com/): Available [free of charge](https://openphish.com/terms.html) by OpenPhish +[mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database): MIT License + [Tranco List](https://tranco-list.eu/): [MIT License](https://choosealicense.com/licenses/mit/) [Umbrella Popularity List](https://s3-us-west-1.amazonaws.com/umbrella-static/index.html): Available free of charge by Cisco Umbrella diff --git a/src/script.sh b/src/script.sh index e6720c21..9db3c47c 100644 --- a/src/script.sh +++ b/src/script.sh @@ -56,6 +56,7 @@ cd "tmp/" ## Prepare datasets curl "https://openphish.com/feed.txt" -o "openphish-raw.txt" +curl "https://github.com/mitchellkrogza/Phishing.Database/raw/master/phishing-links-ACTIVE.txt" -o "phishing.db-raw.txt" curl "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip" curl "https://tranco-list.eu/top-1m.csv.zip" -o "top-1m-tranco.zip" @@ -98,8 +99,16 @@ sed "s/^www\.//g" | \ # url encode space #11 sed "s/ /%20/g" > "openphish.txt" +cat "phishing.db-raw.txt" | \ +tr "[:upper:]" "[:lower:]" | \ +cut -f 3- -d "/" | \ +grep -F "." | \ +sed "s/^www\.//g" | \ +sed "s/ /%20/g" > "phishing.db.txt" + ## Combine all sources -sort -u "openphish.txt" > "phishing.txt" +cat "openphish.txt" "phishing.db.txt" | \ +sort -u > "phishing.txt" ## Parse domain and IP address only cat "phishing.txt" | \ @@ -225,7 +234,7 @@ SECOND_LINE="! Updated: $CURRENT_TIME" THIRD_LINE="! Expires: 1 day (update frequency)" FOURTH_LINE="! Homepage: https://gitlab.com/malware-filter/phishing-filter" FIFTH_LINE="! License: https://gitlab.com/malware-filter/phishing-filter#license" -SIXTH_LINE="! Sources: openphish.com" +SIXTH_LINE="! Sources: openphish.com, github.com/mitchellkrogza/Phishing.Database" COMMENT_UBO="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE" mkdir -p "../public/"