Remove top-1m.txt

The dataset is not under public domain and may subject to copyright claim by Umbrella/Cisco
This commit is contained in:
curben 2018-10-22 13:40:22 +10:30
parent 28627e268d
commit 1547bb0e96
2 changed files with 3 additions and 916768 deletions

File diff suppressed because it is too large Load Diff

View File

@ -35,3 +35,6 @@ grep -Fx -vf ../src/top-1m.txt | \
grep -Fx -vf ../src/exclude.txt | \
# Append header comment to the filter list
sed '1 i\'"$COMMENT"'' > ../urlhaus-filter.txt
# Remove downloaded dataset
rm ../src/top-1m.txt