malware-filter/src/build.js

93 lines
2.8 KiB
JavaScript

'use strict'
// Attempt to download GitLab job artifact and failover to GitHub if unsuccessful.
// In GitLab Pages, the latest job status will be marked as unknown/failed if the repo has newer commit.
// The link to download the latest job artifact will also be unavailable when that happens,
// unless manually queried through API.
// Instead of using the API, I find it easier to failover to GitHub.
// ref: https://gitlab.com/gitlab-org/gitlab/-/issues/29257
import { Extract } from 'unzipper'
import { dirname, join } from 'node:path'
import { mkdir, readdir, rm } from 'node:fs/promises'
import { pipeline } from 'node:stream/promises'
import { fileURLToPath } from 'node:url'
import { Readable } from 'node:stream'
const __dirname = dirname(fileURLToPath(import.meta.url))
const rootPath = join(__dirname, '..')
const tmpPath = join(rootPath, 'tmp')
const publicPath = join(rootPath, 'public')
const projects = [
'urlhaus-filter',
'phishing-filter',
'tracking-filter',
'vn-badsite-filter',
'botnet-filter'
// 'pup-filter'
]
const pipelineStatus = async (url) => {
console.log(`Checking pipeline from "${url}"`)
try {
const svg = await (await fetch(url)).text()
if (svg.includes('failed')) throw new Error('last gitlab pipeline failed')
} catch ({ message }) {
throw new Error(message)
}
}
const dl = async (project) => {
const filename = project + '.zip'
const link = `https://gitlab.com/malware-filter/${project}/-/jobs/artifacts/main/download?job=pages`
const pipelineUrl = `https://gitlab.com/malware-filter/${project}/badges/main/pipeline.svg`
console.log(`Downloading ${filename} from "${link}"`)
try {
await pipeline(
Readable.fromWeb((await fetch(link)).body),
Extract({ path: rootPath })
)
await pipelineStatus(pipelineUrl)
} catch ({ message }) {
console.error(JSON.stringify({
error: message,
link,
filename
}))
const mirrorLink = `https://nightly.link/curbengh/${project}/workflows/pages/main/public.zip`
console.log(`Downloading ${filename} from "${mirrorLink}"`)
try {
await pipeline(
Readable.fromWeb((await fetch(mirrorLink)).body),
Extract({ path: publicPath })
)
} catch ({ message }) {
throw new Error(JSON.stringify({
error: message,
link: mirrorLink,
filename
}))
}
}
}
const f = async () => {
await mkdir(tmpPath, { recursive: true })
await mkdir(publicPath, { recursive: true })
await Promise.all(projects.map((project) => { return dl(project) }))
const files = await readdir(publicPath)
await Promise.all(files.map(async (file) => {
// cf pages limits file size to 26.2MB
// compressed (br/gz) files are excluded
if (file.startsWith('phishing-filter') && file.endsWith('.rules')) {
await rm(join(publicPath, file))
}
}))
}
f()