parent
8ec5dc356a
commit
8267a13ec1
72
README.md
72
README.md
|
@ -1,12 +1,74 @@
|
|||
# Tracking URL
|
||||
# Tracking URL Blocklist
|
||||
|
||||
A list of javascript links (or webpages with javascript) that perform [browser fingerprinting](https://en.wikipedia.org/wiki/Web_tracking). The original data set is collected by DuckDuckGo through its [Tracker Radar](https://github.com/duckduckgo/tracker-radar) project.
|
||||
A blocklist of javascript links (or webpages with javascript) that perform [browser fingerprinting](https://en.wikipedia.org/wiki/Web_tracking). The original data set is collected by DuckDuckGo's [Tracker Radar](https://github.com/duckduckgo/tracker-radar) project. Blocklist is updated on the first day of the month.
|
||||
|
||||
Data is updated on the first day of the month.
|
||||
The list contains **all** the links that Tracker Radar detected to have browser fingerprinting, there is no further processing to remove false positive.
|
||||
|
||||
The list contains **all** the links that Tracker Radar detected to have browser fingerprinting, there is no further processing to remove false positive. This is not a blocklist (yet).
|
||||
- uBlock Origin (uBO) -> [URL-based](#url-based) section
|
||||
- AdGuard browser extension -> [URL-based](#url-based) section
|
||||
- Vivaldi -> [URL-based (Vivaldi)](#url-based-vivaldi) section
|
||||
- [Plain URL](#plain-url) - original links without filter syntax
|
||||
|
||||
Download: https://curben.gitlab.io/malware-filter/tracking-data.txt
|
||||
Check out my other filters:
|
||||
- [urlhaus-filter](https://gitlab.com/curben/urlhaus-filter)
|
||||
- [phishing-filter](https://gitlab.com/curben/phishing-filter)
|
||||
- [pup-filter](https://gitlab.com/curben/pup-filter)
|
||||
|
||||
## URL-based
|
||||
|
||||
Import the following URL into uBO/AdGuard to subscribe:
|
||||
|
||||
- https://curben.gitlab.io/malware-filter/tracking-filter.txt
|
||||
|
||||
<details>
|
||||
<summary>Mirrors</summary>
|
||||
|
||||
- https://cdn.statically.io/gl/curben/phishing-filter/master/dist/tracking-filter.txt
|
||||
- https://glcdn.githack.com/curben/phishing-filter/raw/master/dist/tracking-filter.txt
|
||||
- https://raw.githubusercontent.com/curbengh/phishing-filter/master/dist/tracking-filter.txt
|
||||
- https://cdn.statically.io/gh/curbengh/phishing-filter/master/dist/tracking-filter.txt
|
||||
- https://gitcdn.xyz/repo/curbengh/phishing-filter/master/dist/tracking-filter.txt
|
||||
- https://cdn.jsdelivr.net/gh/curbengh/phishing-filter/dist/tracking-filter.txt
|
||||
|
||||
</details>
|
||||
|
||||
## URL-based (Vivaldi)
|
||||
|
||||
_Requires Vivaldi Desktop/Android 3.3+, blocking level must be at least "Block Trackers"_
|
||||
|
||||
Import the following URL into Vivaldi's **Tracker Blocking Sources** to subscribe:
|
||||
|
||||
- https://curben.gitlab.io/malware-filter/tracking-filter-vivaldi.txt
|
||||
|
||||
<details>
|
||||
<summary>Mirrors</summary>
|
||||
|
||||
- https://cdn.statically.io/gl/curben/tracking-filter/master/dist/tracking-filter-vivaldi.txt
|
||||
- https://glcdn.githack.com/curben/tracking-filter/raw/master/dist/tracking-filter-vivaldi.txt
|
||||
- https://raw.githubusercontent.com/curbengh/tracking-filter/master/dist/tracking-filter-vivaldi.txt
|
||||
- https://cdn.statically.io/gh/curbengh/tracking-filter/master/dist/tracking-filter-vivaldi.txt
|
||||
- https://gitcdn.xyz/repo/curbengh/tracking-filter/master/dist/tracking-filter-vivaldi.txt
|
||||
- https://cdn.jsdelivr.net/gh/curbengh/tracking-filter/dist/tracking-filter-vivaldi.txt
|
||||
|
||||
</details>
|
||||
|
||||
## Plain URL
|
||||
|
||||
A list of URLs without any filter syntax.
|
||||
|
||||
- https://curben.gitlab.io/malware-filter/tracking-data.txt
|
||||
|
||||
<details>
|
||||
<summary>Mirrors</summary>
|
||||
|
||||
- https://cdn.statically.io/gl/curben/tracking-filter/master/dist/tracking-data.txt
|
||||
- https://glcdn.githack.com/curben/tracking-filter/raw/master/dist/tracking-data.txt
|
||||
- https://raw.githubusercontent.com/curbengh/tracking-filter/master/dist/tracking-data.txt
|
||||
- https://cdn.statically.io/gh/curbengh/tracking-filter/master/dist/tracking-data.txt
|
||||
- https://gitcdn.xyz/repo/curbengh/tracking-filter/master/dist/tracking-data.txt
|
||||
- https://cdn.jsdelivr.net/gh/curbengh/tracking-filter/dist/tracking-data.txt
|
||||
|
||||
</details>
|
||||
|
||||
## License
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -9,8 +9,8 @@ const f = async () => {
|
|||
const outputFile = join(__dirname, '../dist/tracking-data.txt')
|
||||
await writeFile(outputFile, `# Title: Tracking URL
|
||||
# Updated: ${new Date().toUTCString()}
|
||||
# Repo: https://gitlab.com/curben/tracking-data
|
||||
# License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
# Repo: https://gitlab.com/curben/tracking-filter
|
||||
# License: https://gitlab.com/curben/tracking-filter#license
|
||||
# Source: https://github.com/duckduckgo/tracker-radar\n`)
|
||||
const domains = join(__dirname, '../tmp/tracker-radar/domains')
|
||||
const countries = await readdir(domains)
|
||||
|
|
|
@ -2,17 +2,97 @@
|
|||
|
||||
set -efux -o pipefail
|
||||
|
||||
## Create a temporary working folder
|
||||
mkdir -p "tmp/"
|
||||
cd "tmp/"
|
||||
|
||||
|
||||
## Prepare datasets
|
||||
# Prepare datasets
|
||||
# rm -rf "tracker-radar/"
|
||||
# git clone --depth 1 "https://github.com/duckduckgo/tracker-radar" "tracker-radar/"
|
||||
|
||||
|
||||
# # Extract tracking links
|
||||
# node "../src/script.js"
|
||||
|
||||
|
||||
# # Cleanup
|
||||
# cat "../dist/tracking-data.txt" | \
|
||||
# # Remove comment
|
||||
# sed "/^#/d" | \
|
||||
# # Remove www.
|
||||
# sed "s/^www\.//g" > "tracking-url.txt"
|
||||
|
||||
|
||||
CURRENT_TIME="$(date -R -u)"
|
||||
FIRST_LINE="! Title: Tracking URL Blocklist"
|
||||
SECOND_LINE="! Updated: $CURRENT_TIME"
|
||||
THIRD_LINE="! Expires: 7 day (update frequency)"
|
||||
FOURTH_LINE="! Homepage: https://gitlab.com/curben/tracking-filter"
|
||||
FIFTH_LINE="! License: https://gitlab.com/curben/tracking-filter#license"
|
||||
SIXTH_LINE="! Source: https://github.com/duckduckgo/tracker-radar"
|
||||
COMMENT_UBO="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE"
|
||||
|
||||
|
||||
# uBO & Adguard
|
||||
cat "tracking-url.txt" | \
|
||||
sed "s/^/||/g" | \
|
||||
sed "s/$/\$all/g" | \
|
||||
sed '1 i\'"$COMMENT_UBO"'' > "../dist/tracking-filter.txt"
|
||||
|
||||
|
||||
# Vivaldi
|
||||
cat "tracking-url.txt" | \
|
||||
sed "s/^/||/g" | \
|
||||
sed "s/$/\$document/g" | \
|
||||
sed '1 i\'"$COMMENT_UBO"'' | \
|
||||
sed "1s/Blocklist/Blocklist (Vivaldi)/" > "../dist/tracking-filter-vivaldi.txt"
|
||||
|
||||
## This took more than 10 minutes on my not-so-sham laptop which is equivalent of *forever* on puny CI/CD VM.
|
||||
# # Snort & Suricata
|
||||
# COMMENT_HASH=$(printf "$COMMENT_UBO" | sed "s/^!/#/g" | awk '{printf "%s\\n", $0}' | head -c -2)
|
||||
|
||||
# ## Temporarily disable command print
|
||||
# set +x
|
||||
|
||||
# rm -f "../dist/tracking-filter-snort2.rules" \
|
||||
# "../dist/tracking-filter-snort3.rules" \
|
||||
# "../dist/tracking-filter-suricata.rules"
|
||||
|
||||
# SID="400000001"
|
||||
|
||||
# while read URL; do
|
||||
# HOST=$(echo "$URL" | cut -d"/" -f1)
|
||||
# URI=$(echo "$URL" | sed -e "s/^$HOST//" -e "s/;/\\\;/g")
|
||||
|
||||
# # Snort2 only supports <=2047 characters of `content`
|
||||
# SN_RULE="alert tcp \$HOME_NET any -> \$EXTERNAL_NET [80,443] (msg:\"tracking-filter tracking link detected\"; flow:established,from_client; content:\"GET\"; http_method; content:\"$(echo $URI | cut -c -2047)\"; http_uri; nocase; content:\"$HOST\"; content:\"Host\"; http_header; classtype:attempted-recon; sid:$SID; rev:1;)"
|
||||
|
||||
# SN3_RULE="alert http \$HOME_NET any -> \$EXTERNAL_NET any (msg:\"tracking-filter tracking link detected\"; http_header:field host; content:\"$HOST\",nocase; http_uri; content:\"$URI\",nocase; classtype:attempted-recon; sid:$SID; rev:1;)"
|
||||
|
||||
# SR_RULE="alert http \$HOME_NET any -> \$EXTERNAL_NET any (msg:\"tracking-filter tracking link detected\"; flow:established,from_client; http.method; content:\"GET\"; http.uri; content:\"$URI\"; endswith; nocase; http.host; content:\"$HOST\"; classtype:attempted-recon; sid:$SID; rev:1;)"
|
||||
|
||||
# echo "$SN_RULE" >> "../dist/tracking-filter-snort2.rules"
|
||||
# echo "$SN3_RULE" >> "../dist/tracking-filter-snort3.rules"
|
||||
# echo "$SR_RULE" >> "../dist/tracking-filter-suricata.rules"
|
||||
|
||||
# SID=$(( $SID + 1 ))
|
||||
# done < "tracking-url.txt"
|
||||
|
||||
# ## Re-enable command print
|
||||
# set -x
|
||||
|
||||
# sed -i '1 i\'"$COMMENT_HASH"'' "../dist/tracking-filter-snort2.rules"
|
||||
# sed -i "1s/Blocklist/Snort2 Ruleset/" "../dist/tracking-filter-snort2.rules"
|
||||
|
||||
# sed -i '1 i\'"$COMMENT_HASH"'' "../dist/tracking-filter-snort3.rules"
|
||||
# sed -i "1s/Blocklist/Snort3 Ruleset/" "../dist/tracking-filter-snort3.rules"
|
||||
|
||||
# sed -i '1 i\'"$COMMENT_HASH"'' "../dist/tracking-filter-suricata.rules"
|
||||
# sed -i "1s/Blocklist/Suricata Ruleset/" "../dist/tracking-filter-suricata.rules"
|
||||
|
||||
|
||||
## Clean-up artifacts
|
||||
rm -rf "tracker-radar/"
|
||||
git clone --depth 1 "https://github.com/duckduckgo/tracker-radar" "tracker-radar/"
|
||||
|
||||
|
||||
## Parse links
|
||||
node "../src/script.js"
|
||||
|
||||
cd ../
|
||||
|
|
Loading…
Reference in New Issue