curben 2018-10-11 14:24:31 +10:30
parent 88d6447fe0
commit 6c030d840e
2 changed files with 2 additions and 2 deletions

View File

@ -25,7 +25,7 @@ cut -f 1 -d ':' | \
# Remove www
# Only matches domains that start with www
# Not examplewww.com
sed ':a;N;$!ba;s/\nwww\./\n/g' | \
sed -e ':a' -e 'N' -e '$!ba' -e 's/\nwww\./\n/g' | \
# Sort and remove duplicates
sort -u | \
# Exclude Umbrella Top 1M. grep inverse match whole line

View File

@ -16,7 +16,7 @@ cut -f 2 -d ',' | \
# Remove www
# Only matches domains that start with www
# Not examplewww.com
sed ':a;N;$!ba;s/\nwww\./\n/g' | \
sed -e ':a' -e 'N' -e '$!ba' -e 's/\nwww\./\n/g' | \
# Remove duplicates
sort -u > ../src/top-1m.txt