25 lines
		
	
	
		
			568 B
		
	
	
	
		
			Bash
		
	
	
	
			
		
		
	
	
			25 lines
		
	
	
		
			568 B
		
	
	
	
		
			Bash
		
	
	
	
| #!/bin/sh
 | |
| 
 | |
| set -e -x
 | |
| 
 | |
| ## Parse domains from URLhaus excluding popular domains
 | |
| 
 | |
| cat URLhaus.csv | \
 | |
| # Convert DOS to Unix line ending
 | |
| dos2unix | \
 | |
| # Parse online URLs only
 | |
| #grep '"online"' | \
 | |
| # Parse domains and IP address only
 | |
| cut -f 6 -d '"' | \
 | |
| cut -f 3 -d '/' | \
 | |
| cut -f 1 -d ':' | \
 | |
| # Remove www
 | |
| # Only matches domains that start with www
 | |
| # Not examplewww.com
 | |
| sed 's/^www\.//g' | \
 | |
| # Sort and remove duplicates
 | |
| sort -u | \
 | |
| # Exclude Umbrella Top 1M and well-known domains
 | |
| # grep inverse match whole line
 | |
| grep -Fx -vf urlhaus-top-domains.txt > malware-domains.txt
 |