parent
6c030d840e
commit
1449c6ec47
|
@ -25,7 +25,7 @@ cut -f 1 -d ':' | \
|
|||
# Remove www
|
||||
# Only matches domains that start with www
|
||||
# Not examplewww.com
|
||||
sed -e ':a' -e 'N' -e '$!ba' -e 's/\nwww\./\n/g' | \
|
||||
sed -e 's/^www\.//g' | \
|
||||
# Sort and remove duplicates
|
||||
sort -u | \
|
||||
# Exclude Umbrella Top 1M. grep inverse match whole line
|
||||
|
|
|
@ -16,7 +16,7 @@ cut -f 2 -d ',' | \
|
|||
# Remove www
|
||||
# Only matches domains that start with www
|
||||
# Not examplewww.com
|
||||
sed -e ':a' -e 'N' -e '$!ba' -e 's/\nwww\./\n/g' | \
|
||||
sed -e 's/^www\.//g' | \
|
||||
# Remove duplicates
|
||||
sort -u > ../src/top-1m.txt
|
||||
|
||||
|
|
Loading…
Reference in New Issue