diff --git a/README.md b/README.md index 354730a8..f73a1451 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # Phishing URL Blocklist -> Announcement (2022/05/21): curben.gitlab.io has been migrated to malware-filter.gitlab.io - -A blocklist of phishing websites, based on the [PhishTank](https://www.phishtank.com/) and [OpenPhish](https://openphish.com/) lists. Blocklist is updated twice a day. +A blocklist of phishing websites, curated from [PhishTank](https://www.phishtank.com/), [OpenPhish](https://openphish.com/), [phishunt.io](https://phishunt.io/) and [mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database/blob/master/phishing-domains-ACTIVE.txt). Blocklist is updated twice a day. There are multiple formats available, refer to the appropriate section according to the program used: @@ -461,8 +459,7 @@ Please report new phishing URL to [PhishTank](https://www.phishtank.com/add_web_ ## See also -[Phishing Army](https://phishing.army/) by [Andrea Draghetti](https://www.andreadraghetti.it/) is available in domain-based format and utilises more sources (PhishTank, OpenPhish, -Cert.pl, PhishFindR, Urlscan.io and Phishunt.io). Its exclusion methods are outdated though: [Anudeep's whitelist](https://github.com/anudeepND/whitelist) was lasted updated in Dec 2021 and [Alexa](https://www.alexa.com/topsites) was deprecated in May 2022. +[Phishing Army](https://phishing.army/) by [Andrea Draghetti](https://www.andreadraghetti.it/) is available in domain-based format and utilises more sources. Its exclusion methods are not up-to-date though: [Anudeep's whitelist](https://github.com/anudeepND/whitelist) was lasted updated in Dec 2021 and [Alexa](https://www.alexa.com/topsites) was deprecated in May 2022. ## FAQ and Guides @@ -480,10 +477,14 @@ _PhishTank is either trademark or registered trademark of Cisco Systems, Inc._ [OpenPhish](https://openphish.com/): Available [free of charge](https://openphish.com/terms.html) by OpenPhish -[Tranco List](https://tranco-list.eu/): MIT License +[Tranco List](https://tranco-list.eu/): [MIT License](https://choosealicense.com/licenses/mit/) [Umbrella Popularity List](https://s3-us-west-1.amazonaws.com/umbrella-static/index.html): Available free of charge by Cisco Umbrella -[csvquote](https://github.com/dbro/csvquote): [MIT License](https://choosealicense.com/licenses/mit/) +[csvquote](https://github.com/dbro/csvquote): MIT License + +[phishunt.io](https://phishunt.io/): All rights reserved by [Daniel López](https://twitter.com/0xDanielLopez) + +[mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database): MIT License This repository is not endorsed by PhishTank/OpenDNS and OpenPhish. diff --git a/src/script.sh b/src/script.sh index 48148c63..a2b9d1a8 100644 --- a/src/script.sh +++ b/src/script.sh @@ -38,6 +38,8 @@ cd "tmp/" ## Prepare datasets curl -L "https://data.phishtank.com/data/$PHISHTANK_API/online-valid.csv.bz2" -o "phishtank.bz2" curl -L "https://openphish.com/feed.txt" -o "openphish-raw.txt" +curl -L "https://phishunt.io/feed.txt" -o "phishunt-raw.txt" +curl -L "https://github.com/mitchellkrogza/Phishing.Database/raw/master/phishing-links-ACTIVE.txt" -o "phishing.db-raw.txt" curl -L "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip" curl -L "https://tranco-list.eu/top-1m.csv.zip" -o "top-1m-tranco.zip" @@ -67,8 +69,22 @@ grep -F "." | \ sed "s/^www\.//g" | \ sed "s/ /%20/g" > "openphish.txt" -## Combine PhishTank and OpenPhish -cat "phishtank.txt" "openphish.txt" | \ +cat "phishunt-raw.txt" | \ +tr "[:upper:]" "[:lower:]" | \ +cut -f 3- -d "/" | \ +grep -F "." | \ +sed "s/^www\.//g" | \ +sed "s/ /%20/g" > "phishunt.txt" + +cat "phishing.db-raw.txt" | \ +tr "[:upper:]" "[:lower:]" | \ +cut -f 3- -d "/" | \ +grep -F "." | \ +sed "s/^www\.//g" | \ +sed "s/ /%20/g" > "phishing.db.txt" + +## Combine all sources +cat "phishtank.txt" "openphish.txt" "phishunt.txt" "phishing.db.txt" | \ sort -u > "phishing.txt" ## Parse domain and IP address only @@ -165,9 +181,8 @@ SECOND_LINE="! Updated: $CURRENT_TIME" THIRD_LINE="! Expires: 1 day (update frequency)" FOURTH_LINE="! Homepage: https://gitlab.com/malware-filter/phishing-filter" FIFTH_LINE="! License: https://gitlab.com/malware-filter/phishing-filter#license" -SIXTH_LINE="! Source: https://www.phishtank.com/ & https://openphish.com/" -ANNOUNCEMENT_1="\n! Announcement (2022/05/21): curben.gitlab.io has been migrated to malware-filter.gitlab.io" -COMMENT_UBO="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE\n$ANNOUNCEMENT_1" +SIXTH_LINE="! Sources: phishtank.com, openphish.com, phishunt.io, github.com/mitchellkrogza/Phishing.Database" +COMMENT_UBO="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE" mkdir -p "../public/" @@ -341,7 +356,7 @@ sed "2s/Domains Blocklist/Hosts Blocklist (IE)/" > "../public/phishing-filter.tp ## Clean up artifacts -rm "phishtank.csv" "top-1m-umbrella.zip" "top-1m-umbrella.txt" "top-1m-tranco.txt" "openphish-raw.txt" +rm "phishtank.csv" "top-1m-umbrella.zip" "top-1m-umbrella.txt" "top-1m-tranco.txt" "openphish-raw.txt" "phishunt-raw.txt" cd ../