refactor: move script executions from CI config to index.sh

easier to test locally
This commit is contained in:
curben 2019-05-27 15:01:57 +09:30
parent a2ce30132c
commit a7046c77a6
8 changed files with 46 additions and 28 deletions

View File

@ -29,36 +29,10 @@ deploy:
# Shallow cloning for faster cloning
- git clone --depth 3 git@gitlab.com:curben/urlhaus-filter.git build
# Change to the downloaded repo directory
- cd build/
# Create a temporary working folder
- mkdir tmp/ && cd tmp/
# Download URLhaus database and Umbrella Top 1M
- sh ../utils/prerequisites.sh
# Process the Umbrella Top 1M
- sh ../utils/umbrella-top-1m.sh
# Parse popular domains that also appear in URLhaus
- sh ../utils/urlhaus-top-domains.sh
# Parse domains from URLhaus excluding popular domains
- sh ../utils/malware-domains.sh
# Parse malware URLs from popular domains
- sh ../utils/malware-url-top-domains.sh
# Merge malware domains and URLs
- sh ../utils/urlhaus-filter.sh
# Commit the changes
- sh ../utils/commit.sh
# Clean up the working folder
- cd ../ && rm -r tmp/
# Run scripts
- sh index.sh
# Push the commit
- git push

32
index.sh Normal file
View File

@ -0,0 +1,32 @@
#!/bin/sh
# -e: Fail the whole script if any command fails
# -x: Display running command
set -e -x
# Create a temporary working folder
mkdir tmp/ && cd tmp/
# Download URLhaus database and Umbrella Top 1M
sh ../utils/prerequisites.sh
# Process the Umbrella Top 1M
sh ../utils/umbrella-top-1m.sh
# Parse popular domains that also appear in URLhaus
sh ../utils/urlhaus-top-domains.sh
# Parse domains from URLhaus excluding popular domains
sh ../utils/malware-domains.sh
# Parse malware URLs from popular domains
sh ../utils/malware-url-top-domains.sh
# Merge malware domains and URLs
sh ../utils/urlhaus-filter.sh
# Commit the changes
sh ../utils/commit.sh
# Clean up the working folder
cd ../ && rm -r tmp/

View File

@ -1,5 +1,7 @@
#!/bin/sh
set -e -x
## Parse domains from URLhaus excluding popular domains
cat URLhaus.csv | \

View File

@ -1,5 +1,7 @@
#!/bin/sh
set -e -x
## Parse malware URLs from popular URLhaus domains
cat URLhaus.csv | \

View File

@ -1,5 +1,7 @@
#!/bin/sh
set -e -x
# Download URLhaus database
wget https://urlhaus.abuse.ch/downloads/csv/ -O ../src/URLhaus.csv

View File

@ -1,5 +1,7 @@
#!/bin/sh
set -e -x
## Parse the Cisco Umbrella 1 Million
## More info:
## https://s3-us-west-1.amazonaws.com/umbrella-static/index.html

View File

@ -1,5 +1,7 @@
#!/bin/sh
set -e -x
## Merge malware-domains.txt malware-url-top-domains.txt,
## and append a header to instruct uBO to grab the filter daily.

View File

@ -1,5 +1,7 @@
#!/bin/sh
set -e -x
## Parse popular domains from URLhaus
cat URLhaus.csv | \