parent
07a9806849
commit
7cba69f1d5
|
@ -9,7 +9,7 @@ build_job:
|
|||
stage: build
|
||||
|
||||
before_script:
|
||||
- 'which ssh-agent || (apk update && apk add curl openssh-client git grep)'
|
||||
- apk update && apk add curl openssh-client git grep xmlstarlet
|
||||
- eval $(ssh-agent -s)
|
||||
- echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - > /dev/null
|
||||
- mkdir -p ~/.ssh
|
||||
|
|
|
@ -53,4 +53,4 @@ s3-us-gov-east-1.amazonaws.com
|
|||
s3-us-gov-west-1.amazonaws.com
|
||||
encrypted-tbn0.gstatic.com
|
||||
pomoc.o2.pl
|
||||
form.elementform.com
|
||||
form.elementform.com
|
||||
|
|
|
@ -23,6 +23,7 @@ curl -L "https://data.phishtank.com/data/$PHISHTANK_API/online-valid.csv.bz2" -o
|
|||
curl -L "https://openphish.com/feed.txt" -o "openphish-raw.txt"
|
||||
curl -L "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip"
|
||||
curl -L "https://tranco-list.eu/top-1m.csv.zip" -o "top-1m-tranco.zip"
|
||||
curl -L "https://oisd.nl/excludes.php" -o "oisd-exclude.html"
|
||||
|
||||
bunzip2 -kc "phishtank.bz2" > "phishtank.csv"
|
||||
|
||||
|
@ -84,8 +85,16 @@ grep -F "." | \
|
|||
sed "s/^www\.//g" | \
|
||||
sort -u > "top-1m-tranco.txt"
|
||||
|
||||
## Parse oisd exclusion list
|
||||
cat "oisd-exclude.html" | \
|
||||
# https://stackoverflow.com/a/47600828
|
||||
xmlstarlet format --recover --html 2>/dev/null | \
|
||||
xmlstarlet select --html --template --value-of '//a' | \
|
||||
## Append new line https://unix.stackexchange.com/a/31955
|
||||
sed '$a\' > "oisd-exclude.txt"
|
||||
|
||||
# Merge Umbrella, Traco and self-maintained top domains
|
||||
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \
|
||||
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" "oisd-exclude.txt" | \
|
||||
sort -u > "top-1m-well-known.txt"
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue