Revert "fix: oisd.nl is down at the moment"

This reverts commit d931f137e2.
This commit is contained in:
MDLeom 2022-01-09 08:33:16 +00:00
parent d931f137e2
commit 0e7c5bb2af
No known key found for this signature in database
GPG Key ID: 32D3E28E96A695E8
1 changed files with 8 additions and 10 deletions

View File

@ -25,7 +25,7 @@ curl -L "https://data.phishtank.com/data/$PHISHTANK_API/online-valid.csv.bz2" -o
curl -L "https://openphish.com/feed.txt" -o "openphish-raw.txt"
curl -L "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip"
curl -L "https://tranco-list.eu/top-1m.csv.zip" -o "top-1m-tranco.zip"
#curl -L "https://oisd.nl/excludes.php" -o "oisd-exclude.html"
curl -L "https://oisd.nl/excludes.php" -o "oisd-exclude.html"
bunzip2 -kc "phishtank.bz2" > "phishtank.csv"
@ -90,15 +90,13 @@ grep -F "." | \
sed "s/^www\.//g" | \
sort -u > "top-1m-tranco.txt"
# ## Parse oisd exclusion list
# cat "oisd-exclude.html" | \
# # https://stackoverflow.com/a/47600828
# xmlstarlet format --recover --html 2>/dev/null | \
# xmlstarlet select --html --template --value-of '//a' | \
# ## Append new line https://unix.stackexchange.com/a/31955
# sed '$a\' > "oisd-exclude.txt"
## to be removed once oisd.nl is up
touch "oisd-exclude.txt"
## Parse oisd exclusion list
cat "oisd-exclude.html" | \
# https://stackoverflow.com/a/47600828
xmlstarlet format --recover --html 2>/dev/null | \
xmlstarlet select --html --template --value-of '//a' | \
## Append new line https://unix.stackexchange.com/a/31955
sed '$a\' > "oisd-exclude.txt"
# Merge Umbrella, Traco and self-maintained top domains
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" "oisd-exclude.txt" | \