From 5a4a8bb9bc55a01d86826b8db3b84cbb0848950a Mon Sep 17 00:00:00 2001 From: MDLeom <2809763-curben@users.noreply.gitlab.com> Date: Thu, 1 Dec 2022 10:00:32 +0000 Subject: [PATCH] refactor: xmlstarlet -> html-xml-utils --- src/script.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/script.sh b/src/script.sh index 24c3be16..bb458b48 100644 --- a/src/script.sh +++ b/src/script.sh @@ -173,6 +173,12 @@ sort -u > "top-1m-tranco.txt" # ## Append new line https://unix.stackexchange.com/a/31955 # sed '$a\' > "oisd-exclude.txt" +# # html-xml-utils +# cat "oisd-exclude.html" | \ +# hxwls | \ +# grep -F '?w=' | \ +# sed 's/^?w=//g' > "oisd-exclude.txt" + # Merge Umbrella, Tranco, Radar and self-maintained top domains cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \ sort -u > "top-1m-well-known.txt"