fix: skip tranco if download fails
This commit is contained in:
parent
f70b9f20b2
commit
70bf08a46d
|
@ -14,7 +14,7 @@ jobs:
|
|||
- name: Install Dependencies
|
||||
run: |
|
||||
apk update
|
||||
apk add brotli curl git grep jq zstd
|
||||
apk add brotli curl file grep jq zstd
|
||||
- name: Build
|
||||
env:
|
||||
CF_API: ${{ secrets.CF_API }}
|
||||
|
|
|
@ -12,7 +12,7 @@ build_job:
|
|||
stage: build
|
||||
|
||||
before_script:
|
||||
- apk update && apk add brotli curl grep jq zstd
|
||||
- apk update && apk add brotli curl file grep jq zstd
|
||||
|
||||
script:
|
||||
- sh src/script.sh
|
||||
|
|
|
@ -72,7 +72,7 @@ cd "tmp/"
|
|||
## Prepare datasets
|
||||
curl "https://urlhaus.abuse.ch/downloads/csv/" -o "urlhaus.zip"
|
||||
curl "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip"
|
||||
curl "https://tranco-list.eu/top-1m.csv.zip" -o "top-1m-tranco.zip"
|
||||
curl "https://tranco-list.eu/download/daily/top-1m.csv.zip" -o "top-1m-tranco.zip"
|
||||
|
||||
## Cloudflare Radar
|
||||
if [ -n "$CF_API" ]; then
|
||||
|
@ -161,15 +161,21 @@ sed "s/^www\.//g" | \
|
|||
sort -u > "top-1m-umbrella.txt"
|
||||
|
||||
## Parse the Tranco 1 Million
|
||||
unzip "top-1m-tranco.zip" | \
|
||||
dos2unix | \
|
||||
tr "[:upper:]" "[:lower:]" | \
|
||||
# Parse domains only
|
||||
cut -f 2 -d "," | \
|
||||
grep -F "." | \
|
||||
# Remove www.
|
||||
sed "s/^www\.//g" | \
|
||||
sort -u > "top-1m-tranco.txt"
|
||||
if [ -n "$(file 'top-1m-tranco.zip' | grep 'Zip archive data')" ]; then
|
||||
unzip "top-1m-tranco.zip" | \
|
||||
dos2unix | \
|
||||
tr "[:upper:]" "[:lower:]" | \
|
||||
# Parse domains only
|
||||
cut -f 2 -d "," | \
|
||||
grep -F "." | \
|
||||
# Remove www.
|
||||
sed "s/^www\.//g" | \
|
||||
sort -u > "top-1m-tranco.txt"
|
||||
else
|
||||
# tranco has unreliable download
|
||||
echo "top-1m-tranco.zip is not a zip, skipping it..."
|
||||
touch "top-1m-tranco.txt"
|
||||
fi
|
||||
|
||||
# Merge Umbrella and self-maintained top domains
|
||||
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \
|
||||
|
|
Loading…
Reference in New Issue