From bb9e8ca7cf3009f51290e96ac0731bda8d181c48 Mon Sep 17 00:00:00 2001 From: curben <2809763-curben@users.noreply.gitlab.com> Date: Wed, 1 Apr 2020 11:07:54 +0100 Subject: [PATCH] feat: use Tranco list - replace Cisco Umbrella - Tranco's ranking method is open-source * https://github.com/DistriNet/tranco-list * https://tranco-list.eu/ --- script.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script.sh b/script.sh index 33a00bfa..add6cdd5 100644 --- a/script.sh +++ b/script.sh @@ -8,7 +8,7 @@ mkdir -p "tmp/" && cd "tmp/" ## Prepare datasets wget "https://urlhaus.abuse.ch/downloads/csv/" -O "urlhaus.zip" -wget "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -O "top-1m.csv.zip" +wget "https://tranco-list.eu/top-1m.csv.zip" -O "top-1m.csv.zip" cp "../src/exclude.txt" "." @@ -49,7 +49,7 @@ cut -f 1 -d ":" | \ sort -u > "urlhaus-domains-online.txt" -## Parse the Cisco Umbrella 1 Million +## Parse the Tranco 1 Million unzip -p "top-1m.csv.zip" | \ dos2unix | \ # Parse domains only