chore: remove unused oisd exclusion

This commit is contained in:
MDLeom 2025-02-15 01:12:03 +00:00
parent f1fa7e450c
commit 8506f18029
No known key found for this signature in database
GPG Key ID: 32D3E28E96A695E8
3 changed files with 2 additions and 16 deletions

View File

@ -14,7 +14,7 @@ jobs:
- name: Install Dependencies
run: |
apk update
apk add brotli curl git grep jq xmlstarlet zstd
apk add brotli curl git grep jq zstd
- name: Build
env:
PHISHTANK_API: ${{ secrets.PHISHTANK_API }}

View File

@ -12,7 +12,7 @@ build_job:
stage: build
before_script:
- apk update && apk add brotli curl grep jq xmlstarlet zstd
- apk update && apk add brotli curl grep jq zstd
script:
- sh src/script.sh

View File

@ -160,20 +160,6 @@ grep -F "." | \
sed "s/^www\.//g" | \
sort -u > "top-1m-tranco.txt"
# ## Parse oisd exclusion list
# cat "oisd-exclude.html" | \
# # https://stackoverflow.com/a/47600828
# xmlstarlet format --recover --html 2>/dev/null | \
# xmlstarlet select --html --template --value-of '//a' | \
# ## Append new line https://unix.stackexchange.com/a/31955
# sed '$a\' > "oisd-exclude.txt"
# # html-xml-utils
# cat "oisd-exclude.html" | \
# hxwls | \
# grep -F '?w=' | \
# sed 's/^?w=//g' > "oisd-exclude.txt"
# Merge Umbrella, Tranco, Radar and self-maintained top domains
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \
sort -u > "top-1m-well-known.txt"