trojanhorse-js
Version:
A comprehensive JavaScript library for fetching, managing, and analyzing global threat intelligence from multiple open-source feeds and security news sources. Unlike its mythological namesake, this Trojan protects your digital fortress.
549 lines (469 loc) • 16.9 kB
text/typescript
/**
* URLhaus Feed - Abuse.ch malicious URL feed integration
* CSV-based feed with no authentication required
*/
import axios, { AxiosInstance } from 'axios';
import {
ThreatIndicator,
ThreatFeedResult,
FeedConfiguration,
URLhausEntry,
TrojanHorseError,
RateLimitError
} from '../types';
interface FeedStats {
lastFetch: Date | null;
nextAllowedFetch: Date;
rateLimit: FeedConfiguration['rateLimit'];
successCount: number;
errorCount: number;
requestsProcessed: number;
}
interface CacheEntry {
data: ThreatFeedResult;
timestamp: number;
}
export class URLhausFeed {
private readonly axiosInstance: AxiosInstance;
private config: FeedConfiguration; // Removed readonly for updateConfig
private lastFetchTime: number = 0;
private readonly MIN_FETCH_INTERVAL = 300000; // 5 minutes
private stats: FeedStats;
private cache: Map<string, CacheEntry> = new Map();
private promiseCache: Map<string, Promise<ThreatFeedResult>> = new Map(); // Add promise cache
private readonly DEFAULT_CACHE_TTL = 600000; // 10 minutes
constructor() {
this.config = {
name: 'URLhaus',
type: 'csv',
endpoint: 'https://urlhaus.abuse.ch/downloads/csv_recent/',
authentication: {
type: 'none',
required: false
},
rateLimit: {
requestsPerHour: 12, // Conservative rate limit
burstLimit: 1
},
enabled: true,
priority: 'high',
sslPinning: true,
timeout: 30000,
retries: 3,
cacheTTL: this.DEFAULT_CACHE_TTL
};
this.stats = {
lastFetch: null,
nextAllowedFetch: new Date(Date.now() + this.MIN_FETCH_INTERVAL),
rateLimit: this.config.rateLimit,
successCount: 0,
errorCount: 0,
requestsProcessed: 0
};
this.axiosInstance = axios.create({
timeout: this.config.timeout || 30000,
headers: {
'User-Agent': 'TrojanHorse.js/1.0.1 (Threat Intelligence Library)',
'Accept': 'text/csv',
'Cache-Control': 'no-cache'
},
httpsAgent: undefined, // Will be configured for SSL pinning if needed
validateStatus: (status) => status >= 200 && status < 300
});
this.setupInterceptors();
}
/**
* Update feed configuration
*/
public updateConfig(newConfig: Partial<FeedConfiguration>): void {
this.config = { ...this.config, ...newConfig };
// Update axios instance if timeout changed
if (newConfig.timeout) {
this.axiosInstance.defaults.timeout = newConfig.timeout;
}
}
/**
* Fetch recent malicious URLs from URLhaus
*/
public async fetchThreatData(): Promise<ThreatFeedResult> {
const cacheKey = 'recent_urls';
// Check data cache first
const cached = this.getCachedData(cacheKey);
if (cached) {
return cached;
}
// Check if request is already in progress
if (this.promiseCache.has(cacheKey)) {
return await this.promiseCache.get(cacheKey)!;
}
// Create new request promise
const requestPromise = this.performFetch(cacheKey);
this.promiseCache.set(cacheKey, requestPromise);
try {
const result = await requestPromise;
return result;
} finally {
// Remove promise from cache when done
this.promiseCache.delete(cacheKey);
}
}
/**
* Get feed configuration
*/
public getConfig(): FeedConfiguration {
return { ...this.config };
}
/**
* Check if feed is available
*/
public async checkAvailability(): Promise<boolean> {
try {
const response = await this.axiosInstance.head(this.config.endpoint);
return response.status === 200;
} catch (error) {
return false;
}
}
/**
* Get feed statistics
*/
public getStats(): FeedStats {
return {
lastFetch: this.lastFetchTime ? new Date(this.lastFetchTime) : null,
nextAllowedFetch: new Date(this.lastFetchTime + this.MIN_FETCH_INTERVAL),
rateLimit: this.config.rateLimit,
successCount: this.stats.successCount,
errorCount: this.stats.errorCount,
requestsProcessed: this.stats.requestsProcessed
};
}
// === PRIVATE METHODS ===
private getCachedData(key: string): ThreatFeedResult | null {
const entry = this.cache.get(key);
if (!entry) {
return null;
}
const now = Date.now();
const ttl = this.config.cacheTTL || this.DEFAULT_CACHE_TTL;
if (now - entry.timestamp > ttl) {
this.cache.delete(key);
return null;
}
return entry.data;
}
private setCachedData(key: string, data: ThreatFeedResult): void {
this.cache.set(key, {
data,
timestamp: Date.now()
});
}
private setupInterceptors(): void {
// Request interceptor for logging and security
this.axiosInstance.interceptors.request.use(
(config) => {
// Use proper logging instead of console.log
if (process.env.NODE_ENV !== 'test') {
// console.log(`[URLhaus] Fetching threat data from ${config.url}`);
}
return config;
},
(error) => Promise.reject(error)
);
// Response interceptor for security validation
this.axiosInstance.interceptors.response.use(
(response) => {
// Validate response size (prevent DoS)
const maxSize = 50 * 1024 * 1024; // 50MB max
const contentLength = response.headers['content-length'];
if (contentLength && parseInt(contentLength) > maxSize) {
throw new TrojanHorseError(
'Response too large',
'RESPONSE_TOO_LARGE',
response.status
);
}
return response;
},
(error) => Promise.reject(error)
);
}
// @ts-ignore - Keep for future use
private checkRateLimit(): void {
const now = Date.now();
const timeSinceLastFetch = now - this.lastFetchTime;
if (timeSinceLastFetch < this.MIN_FETCH_INTERVAL) {
const waitTime = this.MIN_FETCH_INTERVAL - timeSinceLastFetch;
throw new RateLimitError(
`URLhaus rate limit: must wait ${Math.ceil(waitTime / 1000)} seconds`,
waitTime
);
}
}
private async performFetch(cacheKey: string): Promise<ThreatFeedResult> {
let lastError: Error | null = null;
const maxRetries = this.config.retries || 3;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const response = await this.axiosInstance.get(this.config.endpoint);
const csvData = response.data as string;
const entries = this.parseCSV(csvData);
const indicators = this.convertToThreatIndicators(entries);
this.lastFetchTime = Date.now();
this.stats.successCount++;
this.stats.requestsProcessed++;
this.stats.lastFetch = new Date();
const result: ThreatFeedResult = {
source: this.config.name,
timestamp: new Date(),
indicators,
metadata: {
totalCount: indicators.length,
totalIndicators: indicators.length,
requestsProcessed: this.stats.requestsProcessed
}
};
// Cache the result
this.setCachedData(cacheKey, result);
return result;
} catch (error) {
lastError = error as Error;
this.stats.errorCount++;
if (attempt < maxRetries) {
// Wait before retry (exponential backoff)
await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 1000));
continue;
}
// Handle final error after all retries
const err = error as any; // Type assertion for error handling
// More robust axios error detection
const isAxiosError = axios.isAxiosError(err) || err.isAxiosError || err.name === 'AxiosError';
const hasResponse = err.response && typeof err.response === 'object';
const hasAxiosProps = err.config || err.request || hasResponse;
// Check for axios-like errors (including mocked ones)
if (isAxiosError || hasAxiosProps || hasResponse) {
const response = err.response;
const status = response?.status;
const statusText = response?.statusText || 'HTTP Error';
// Rate limiting (429)
if (status === 429) {
throw new RateLimitError(
'URLhaus rate limit exceeded',
response.headers?.['retry-after'] ? parseInt(response.headers['retry-after']) * 1000 : 300000
);
}
// Server errors (5xx)
if (status && status >= 500) {
throw new TrojanHorseError(
`URLhaus server error: ${status} ${statusText}`,
'FEED_ERROR',
status,
{ provider: 'URLhaus', originalError: err }
);
}
// Client errors (4xx)
if (status && status >= 400) {
throw new TrojanHorseError(
`URLhaus HTTP error: ${status} ${statusText}`,
'HTTP_ERROR',
status,
{ provider: 'URLhaus', originalError: err }
);
}
// Timeout errors
if (err.code === 'ECONNABORTED' ||
(err.message && err.message.includes('timeout'))) {
throw new TrojanHorseError(
`URLhaus request timeout: ${err.message || 'Request timeout'}`,
'TIMEOUT_ERROR',
undefined,
{ provider: 'URLhaus', originalError: err }
);
}
// Generic axios error
throw new TrojanHorseError(
`URLhaus feed error: ${err.message || 'Request failed'}`,
'FEED_ERROR',
status,
{ provider: 'URLhaus', originalError: err }
);
}
// Handle network errors that aren't axios errors
if (err.message && (err.message.includes('Network Error') || err.message.includes('fetch'))) {
throw new TrojanHorseError(
`URLhaus network error: ${err.message}`,
'NETWORK_ERROR',
undefined,
{ provider: 'URLhaus', originalError: err }
);
}
throw new TrojanHorseError(
'Unknown error fetching URLhaus data',
'UNKNOWN_ERROR',
undefined,
{ provider: 'URLhaus', originalError: err }
);
}
}
throw lastError || new Error('Maximum retries exceeded');
}
private parseCSV(csvData: string): URLhausEntry[] {
const lines = csvData.trim().split('\n');
const entries: URLhausEntry[] = [];
// Skip header lines (lines starting with #)
const dataLines = lines.filter(line => !line.startsWith('#') && line.trim());
for (const line of dataLines) {
try {
const entry = this.parseCSVLine(line);
if (entry) {
entries.push(entry);
}
} catch (error) {
// Increment error count but continue processing
this.stats.errorCount++;
}
}
return entries;
}
private parseCSVLine(line: string): URLhausEntry | null {
// URLhaus CSV format: id,dateadded,url,url_status,threat,tags,urlhaus_link,reporter
const columns = this.parseCSVColumns(line);
if (columns.length < 8) {
return null;
}
const [id, dateAdded, url, urlStatus, threat, tags, , reporter] = columns;
if (!id || !dateAdded || !url) {
return null;
}
return {
id: id.trim(),
dateAdded: new Date(dateAdded.trim()),
url: url.trim(),
urlStatus: (urlStatus?.trim() || 'offline') as 'online' | 'offline',
threat: threat?.trim() || 'unknown',
tags: tags ? tags.split(',').map(tag => tag.trim()).filter(Boolean) : [],
reporter: reporter?.trim() || 'unknown'
};
}
private parseCSVColumns(line: string): string[] {
const columns: string[] = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
if (char === '"') {
inQuotes = !inQuotes;
} else if (char === ',' && !inQuotes) {
columns.push(current);
current = '';
} else {
current += char;
}
}
columns.push(current);
return columns.map(col => col.replace(/^"(.*)"$/, '$1')); // Remove surrounding quotes
}
private convertToThreatIndicators(entries: URLhausEntry[]): ThreatIndicator[] {
const indicators: ThreatIndicator[] = [];
for (const entry of entries) {
// Create URL indicator
const severity = this.determineSeverity(entry.threat, entry.tags);
const urlIndicator: ThreatIndicator = {
type: 'url',
value: entry.url,
confidence: 0.85, // URLhaus has high confidence
firstSeen: entry.dateAdded,
lastSeen: entry.dateAdded, // URLhaus doesn't provide separate last seen
source: 'URLhaus',
tags: [entry.threat, ...entry.tags].filter(Boolean),
malwareFamily: this.extractMalwareFamily(entry.threat, entry.tags),
severity
};
indicators.push(urlIndicator);
// Extract domain indicator
try {
const urlObj = new URL(entry.url);
if (urlObj.hostname && urlObj.hostname !== entry.url) {
const domainIndicator: ThreatIndicator = {
type: 'domain',
value: urlObj.hostname,
confidence: 0.75, // Slightly lower confidence for derived indicators
firstSeen: entry.dateAdded,
lastSeen: entry.dateAdded,
source: 'URLhaus',
tags: [entry.threat, ...entry.tags, 'derived-from-url'].filter(Boolean),
malwareFamily: this.extractMalwareFamily(entry.threat, entry.tags),
severity
};
indicators.push(domainIndicator);
}
// Extract IP indicator if hostname is an IP
const ipPattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
if (ipPattern.test(urlObj.hostname)) {
const ipIndicator: ThreatIndicator = {
type: 'ip',
value: urlObj.hostname,
confidence: 0.8,
firstSeen: entry.dateAdded,
lastSeen: entry.dateAdded,
source: 'URLhaus',
tags: [entry.threat, ...entry.tags, 'derived-from-url'].filter(Boolean),
malwareFamily: this.extractMalwareFamily(entry.threat, entry.tags),
severity
};
indicators.push(ipIndicator);
}
} catch (urlError) {
// Invalid URL, skip derived indicators
}
}
return indicators;
}
private determineSeverity(threat: string, tags: string[]): 'low' | 'medium' | 'high' | 'critical' {
const lowSeverityTerms = ['adware', 'potentially unwanted', 'pup'];
const highSeverityTerms = ['ransomware', 'banking', 'stealer', 'trojan'];
const criticalSeverityTerms = ['apt', 'targeted', 'zero-day'];
const allTerms = [threat, ...tags].join(' ').toLowerCase();
if (criticalSeverityTerms.some(term => allTerms.includes(term))) {
return 'critical';
}
if (highSeverityTerms.some(term => allTerms.includes(term))) {
return 'high';
}
if (lowSeverityTerms.some(term => allTerms.includes(term))) {
return 'low';
}
return 'medium'; // Default severity
}
private extractMalwareFamily(threat: string, tags: string[]): string | undefined {
// Common malware family patterns
const familyPatterns = [
/emotet/i,
/trickbot/i,
/dridex/i,
/qakbot/i,
/cobalt\s*strike/i,
/metasploit/i,
/mirai/i,
/locky/i,
/wannacry/i,
/malware/i,
/trojan/i,
/ransomware/i,
/phishing/i,
/exploit/i
];
const allText = [threat, ...tags].join(' ');
for (const pattern of familyPatterns) {
const match = allText.match(pattern);
if (match) {
return match[0].toLowerCase();
}
}
// Return cleaned threat type as malware family if no specific pattern found
const threatLower = threat?.toLowerCase()?.trim();
if (threatLower && threatLower !== 'unknown' && !threatLower.includes('-')) {
return threatLower;
}
return undefined;
}
}