fix: dedup entries & parse .js links only
This commit is contained in:
parent
d880d44655
commit
b0477feea0
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,17 +1,13 @@
|
|||
'use strict'
|
||||
|
||||
const { join } = require('path')
|
||||
const { appendFile, readdir, readFile, writeFile } = require('fs').promises
|
||||
const { appendFile, readdir, readFile, rm } = require('fs').promises
|
||||
const { parse } = JSON
|
||||
|
||||
const f = async () => {
|
||||
// __dirname is src/
|
||||
const outputFile = join(__dirname, '../dist/tracking-data.txt')
|
||||
await writeFile(outputFile, `# Title: Tracking URL
|
||||
# Updated: ${new Date().toUTCString()}
|
||||
# Repo: https://gitlab.com/curben/tracking-filter
|
||||
# License: https://gitlab.com/curben/tracking-filter#license
|
||||
# Source: https://github.com/duckduckgo/tracker-radar\n`)
|
||||
const outputFile = join(__dirname, '../tmp/tracking-data-raw.txt')
|
||||
await rm(outputFile, { force: true })
|
||||
const domains = join(__dirname, '../tmp/tracker-radar/domains')
|
||||
const countries = await readdir(domains)
|
||||
for (const country of countries) {
|
||||
|
@ -21,8 +17,8 @@ const f = async () => {
|
|||
const { resources } = parse(data)
|
||||
const tracking = resources.filter(({ fingerprinting }) => fingerprinting === 1)
|
||||
for (const { rule } of tracking) {
|
||||
const link = rule.replace(/\\/g, '') + '\n'
|
||||
await appendFile(outputFile, link)
|
||||
const link = rule.replace(/\\/g, '').replace(/^www\./g, '')
|
||||
if (link.endsWith('.js')) await appendFile(outputFile, link + '\n')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,20 +7,15 @@ cd "tmp/"
|
|||
|
||||
|
||||
# Prepare datasets
|
||||
# rm -rf "tracker-radar/"
|
||||
# git clone --depth 1 "https://github.com/duckduckgo/tracker-radar" "tracker-radar/"
|
||||
rm -rf "tracker-radar/"
|
||||
git clone --depth 1 "https://github.com/duckduckgo/tracker-radar" "tracker-radar/"
|
||||
|
||||
# Extract tracking links
|
||||
node "../src/script.js"
|
||||
|
||||
# # Extract tracking links
|
||||
# node "../src/script.js"
|
||||
|
||||
|
||||
# # Cleanup
|
||||
# cat "../dist/tracking-data.txt" | \
|
||||
# # Remove comment
|
||||
# sed "/^#/d" | \
|
||||
# # Remove www.
|
||||
# sed "s/^www\.//g" > "tracking-url.txt"
|
||||
# Cleanup
|
||||
cat "tracking-data-raw.txt" | \
|
||||
sort -u > "tracking-url.txt"
|
||||
|
||||
|
||||
CURRENT_TIME="$(date -R -u)"
|
||||
|
@ -32,6 +27,10 @@ FIFTH_LINE="! License: https://gitlab.com/curben/tracking-filter#license"
|
|||
SIXTH_LINE="! Source: https://github.com/duckduckgo/tracker-radar"
|
||||
COMMENT_UBO="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE"
|
||||
|
||||
# Original data
|
||||
cat "tracking-url.txt" | \
|
||||
sed '1 i\'"$COMMENT_UBO"'' | \
|
||||
sed "s/^!/#/g" > "../dist/tracking-data.txt"
|
||||
|
||||
# uBO & Adguard
|
||||
cat "tracking-url.txt" | \
|
||||
|
@ -39,7 +38,6 @@ sed "s/^/||/g" | \
|
|||
sed "s/$/\$all/g" | \
|
||||
sed '1 i\'"$COMMENT_UBO"'' > "../dist/tracking-filter.txt"
|
||||
|
||||
|
||||
# Vivaldi
|
||||
cat "tracking-url.txt" | \
|
||||
sed "s/^/||/g" | \
|
||||
|
|
Loading…
Reference in New Issue