malware-filter/src/build.js

'use strict'

// Attempt to download GitLab job artifact and failover to GitHub if unsuccessful.
// In GitLab Pages, the latest job status will be marked as unknown/failed if the repo has newer commit.
// The link to download the latest job artifact will also be unavailable when that happens,
// unless manually queried through API.
// Instead of using the API, I find it easier to failover to GitHub.
// ref: https://gitlab.com/gitlab-org/gitlab/-/issues/29257

import { Extract } from 'unzipper'
import { dirname, join } from 'node:path'
import { mkdir, readdir, rm } from 'node:fs/promises'
import { pipeline } from 'node:stream/promises'
import { fileURLToPath } from 'node:url'
import { Readable } from 'node:stream'

const __dirname = dirname(fileURLToPath(import.meta.url))
const rootPath = join(__dirname, '..')
const tmpPath = join(rootPath, 'tmp')
const publicPath = join(rootPath, 'public')
const projects = [
  'urlhaus-filter',
  'phishing-filter',
  'tracking-filter',
  'vn-badsite-filter',
  'botnet-filter'
  // 'pup-filter'
]

const pipelineStatus = async (url) => {
  console.log(`Checking pipeline from "${url}"`)
  try {
    const svg = await (await fetch(url)).text()
    if (svg.includes('failed')) throw new Error('last gitlab pipeline failed')
  } catch ({ message }) {
    throw new Error(message)
  }
}

const dl = async (project) => {
  const filename = project + '.zip'
  const link = `https://gitlab.com/malware-filter/${project}/-/jobs/artifacts/main/download?job=pages`
  const pipelineUrl = `https://gitlab.com/malware-filter/${project}/badges/main/pipeline.svg`

  console.log(`Downloading ${filename} from "${link}"`)
  try {
    await pipeline(
      Readable.fromWeb((await fetch(link)).body),
      Extract({ path: rootPath })
    )
    await pipelineStatus(pipelineUrl)
  } catch ({ message }) {
    console.error(JSON.stringify({
      error: message,
      link,
      filename
    }))

    const mirrorLink = `https://nightly.link/curbengh/${project}/workflows/pages/main/public.zip`
    console.log(`Downloading ${filename} from "${mirrorLink}"`)

    try {
      await pipeline(
        Readable.fromWeb((await fetch(mirrorLink)).body),
        Extract({ path: publicPath })
      )
    } catch ({ message }) {
      throw new Error(JSON.stringify({
        error: message,
        link: mirrorLink,
        filename
      }))
    }
  }
}

const f = async () => {
  await mkdir(tmpPath, { recursive: true })
  await mkdir(publicPath, { recursive: true })
  await Promise.all(projects.map((project) => { return dl(project) }))

  const files = await readdir(publicPath)
  await Promise.all(files.map(async (file) => {
    // cf pages limits file size to 26.2MB
    // compressed (br/gz) files are excluded
    if (file.startsWith('phishing-filter') && file.endsWith('.rules')) {
      await rm(join(publicPath, file))
    }
  }))
}

f()