fix: skip tranco if download fails
This commit is contained in:
parent
7e8139510d
commit
b94d832896
|
@ -12,7 +12,7 @@ if [ -n "$BASH_VERSION" ]; then
|
|||
shopt -s expand_aliases
|
||||
fi
|
||||
|
||||
alias curl="curl -L"
|
||||
alias curl="curl -iL"
|
||||
alias rm="rm -rf"
|
||||
|
||||
## Use GNU grep, busybox grep is not as performant
|
||||
|
@ -150,15 +150,22 @@ sed "s/^www\.//g" | \
|
|||
sort -u > "top-1m-umbrella.txt"
|
||||
|
||||
## Parse the Tranco 1 Million
|
||||
unzip "top-1m-tranco.zip" | \
|
||||
dos2unix | \
|
||||
tr "[:upper:]" "[:lower:]" | \
|
||||
# Parse domains only
|
||||
cut -f 2 -d "," | \
|
||||
grep -F "." | \
|
||||
# Remove www.
|
||||
sed "s/^www\.//g" | \
|
||||
sort -u > "top-1m-tranco.txt"
|
||||
if [ -n "$(file 'top-1m-tranco.zip' | grep 'Zip archive data')" ]; then
|
||||
unzip "top-1m-tranco.zip" | \
|
||||
dos2unix | \
|
||||
tr "[:upper:]" "[:lower:]" | \
|
||||
# Parse domains only
|
||||
cut -f 2 -d "," | \
|
||||
grep -F "." | \
|
||||
# Remove www.
|
||||
sed "s/^www\.//g" | \
|
||||
sort -u > "top-1m-tranco.txt"
|
||||
else
|
||||
# tranco has unreliable download
|
||||
echo "top-1m-tranco.zip is not a zip, skipping it..."
|
||||
touch "top-1m-tranco.txt"
|
||||
fi
|
||||
|
||||
|
||||
# Merge Umbrella, Tranco, Radar and self-maintained top domains
|
||||
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \
|
||||
|
|
Loading…
Reference in New Issue