dnsweeper
Version:
Advanced CLI tool for DNS record risk analysis and cleanup. Features CSV import for Cloudflare/Route53, automated risk assessment, and parallel DNS validation.
677 lines (590 loc) • 20 kB
text/typescript
import fs, { createReadStream } from 'fs';
import Papa from 'papaparse';
import {
detectCsvEncoding,
readFileWithDetectedEncoding,
evaluateDetectionReliability,
type SupportedEncoding,
} from '../utils/encoding-detector.js';
import { CsvProcessingError } from './errors.js';
import { CSVBatchProcessor } from './performance/batch-processor.js';
import { MemoryOptimizer } from './performance/memory-optimizer.js';
import type { DNSRecordType, ICSVRecord } from '../types/index.js';
export interface ICSVParseOptions {
skipEmptyLines?: boolean;
trimValues?: boolean;
delimiter?: string;
encoding?: SupportedEncoding;
autoDetectEncoding?: boolean;
autoDetectDelimiter?: boolean;
}
export interface ICSVParseResult {
records: ICSVRecord[];
errors: Papa.ParseError[];
meta: Papa.ParseMeta;
totalRows: number;
validRows: number;
encodingInfo?: {
detectedEncoding: SupportedEncoding;
confidence: number;
reliability: 'high' | 'medium' | 'low';
bomPresent: boolean;
};
delimiters?: {
detected: string[];
used: string;
};
}
export class CSVProcessor {
private defaultOptions: ICSVParseOptions = {
skipEmptyLines: true,
trimValues: true,
delimiter: ',',
encoding: 'utf-8',
autoDetectEncoding: true,
autoDetectDelimiter: true,
};
private batchProcessor: CSVBatchProcessor<ICSVRecord>;
constructor(private options: ICSVParseOptions = {}) {
this.options = { ...this.defaultOptions, ...options };
// バッチ処理用の初期化
this.batchProcessor = new CSVBatchProcessor({
batchSize: 10000,
concurrency: 4,
onProgress: (processed, total) => {
console.log(
`CSV processing progress: ${processed}/${total} (${Math.round((processed / total) * 100)}%)`,
);
},
});
}
/**
* Parse Cloudflare DNS export CSV format
* Format: Name,Type,Content,TTL,Priority
*/
async parseCloudflare(filePath: string): Promise<ICSVParseResult> {
const { fileContent, encodingInfo, delimiters } =
await this.readFileWithAutoDetection(filePath);
return new Promise((resolve, reject) => {
Papa.parse<ICloudflareCSVRow>(fileContent, {
header: true,
skipEmptyLines: this.options.skipEmptyLines,
delimiter: delimiters?.used || this.options.delimiter,
transformHeader: (header) => header.toLowerCase().trim(),
transform: (value) => (this.options.trimValues ? value.trim() : value),
complete: (results) => {
try {
const csvResult = this.processCloudflareResults(results);
csvResult.encodingInfo = encodingInfo;
csvResult.delimiters = delimiters;
resolve(csvResult);
} catch (error: unknown) {
reject(error);
}
},
error: (error: Error) => reject(error),
});
});
}
/**
* Parse Route53 CSV export format
* Format: Name,Type,Value,TTL,Weight,SetIdentifier
*/
async parseRoute53(filePath: string): Promise<ICSVParseResult> {
const { fileContent, encodingInfo, delimiters } =
await this.readFileWithAutoDetection(filePath);
return new Promise((resolve, reject) => {
Papa.parse<IRoute53CSVRow>(fileContent, {
header: true,
skipEmptyLines: this.options.skipEmptyLines,
delimiter: delimiters?.used || this.options.delimiter,
transformHeader: (header) => header.toLowerCase().trim(),
transform: (value) => (this.options.trimValues ? value.trim() : value),
complete: (results) => {
try {
const csvResult = this.processRoute53Results(results);
csvResult.encodingInfo = encodingInfo;
csvResult.delimiters = delimiters;
resolve(csvResult);
} catch (error: unknown) {
reject(error);
}
},
error: (error: Error) => reject(error),
});
});
}
/**
* Parse generic CSV format
* Format: domain,record_type,value,ttl,priority,weight,port
*/
async parseGeneric(filePath: string): Promise<ICSVParseResult> {
const { fileContent, encodingInfo, delimiters } =
await this.readFileWithAutoDetection(filePath);
return new Promise((resolve, reject) => {
Papa.parse<IGenericCSVRow>(fileContent, {
header: true,
skipEmptyLines: this.options.skipEmptyLines,
delimiter: delimiters?.used || this.options.delimiter,
transformHeader: (header) => header.toLowerCase().trim(),
transform: (value) => (this.options.trimValues ? value.trim() : value),
complete: (results) => {
try {
const csvResult = this.processGenericResults(results);
csvResult.encodingInfo = encodingInfo;
csvResult.delimiters = delimiters;
resolve(csvResult);
} catch (error: unknown) {
reject(error);
}
},
error: (error: Error) => reject(error),
});
});
}
/**
* Auto-detect CSV format and parse accordingly
*/
async parseAuto(filePath: string): Promise<ICSVParseResult> {
const { fileContent } = await this.readFileWithAutoDetection(filePath);
const firstLine = fileContent.split('\n')[0]?.toLowerCase() || '';
if (
firstLine.includes('name,type,content,ttl') ||
firstLine.includes('name;type;content;ttl')
) {
return this.parseCloudflare(filePath);
} else if (
firstLine.includes('name,type,value,ttl') ||
firstLine.includes('name;type;value;ttl')
) {
return this.parseRoute53(filePath);
} else if (
firstLine.includes('domain,record_type,value') ||
firstLine.includes('domain;record_type;value')
) {
return this.parseGeneric(filePath);
} else {
throw new CsvProcessingError('サポートされていないCSV形式です。ヘッダーが認識できません。', {
filePath,
firstLine,
});
}
}
/**
* Parse CSV with streaming for large files
*/
async parseStreaming(
filePath: string,
onRecord: (record: ICSVRecord) => void,
format: 'cloudflare' | 'route53' | 'generic' = 'generic',
): Promise<{ totalProcessed: number; errors: Papa.ParseError[] }> {
const { encodingInfo, delimiters } = await this.readFileWithAutoDetection(filePath);
return new Promise((resolve, reject) => {
let totalProcessed = 0;
const errors: Papa.ParseError[] = [];
const encoding = encodingInfo?.detectedEncoding || this.options.encoding || 'utf-8';
const stream = fs.createReadStream(filePath, { encoding: encoding as BufferEncoding });
Papa.parse(stream, {
header: true,
skipEmptyLines: this.options.skipEmptyLines,
delimiter: delimiters?.used || this.options.delimiter,
transformHeader: (header) => header.toLowerCase().trim(),
transform: (value) => (this.options.trimValues ? value.trim() : value),
step: (row) => {
try {
const record = this.convertRowToRecord(row.data, format);
if (record) {
onRecord(record);
totalProcessed++;
}
} catch (error) {
errors.push({
type: 'FieldMismatch',
code: 'TooFewFields',
message: error instanceof Error ? error.message : 'Unknown error',
row: totalProcessed,
});
}
},
complete: () => {
resolve({ totalProcessed, errors });
},
error: (error) => reject(error),
});
});
}
private processCloudflareResults(results: Papa.ParseResult<ICloudflareCSVRow>): ICSVParseResult {
const records: ICSVRecord[] = [];
let validRows = 0;
for (const row of results.data) {
try {
const record = this.convertCloudflareRow(row);
if (record) {
records.push(record);
validRows++;
}
} catch (error) {
// Error already tracked in results.errors
}
}
return {
records,
errors: results.errors,
meta: results.meta,
totalRows: results.data.length,
validRows,
};
}
private processRoute53Results(results: Papa.ParseResult<IRoute53CSVRow>): ICSVParseResult {
const records: ICSVRecord[] = [];
let validRows = 0;
for (const row of results.data) {
try {
const record = this.convertRoute53Row(row);
if (record) {
records.push(record);
validRows++;
}
} catch (error) {
// Error already tracked in results.errors
}
}
return {
records,
errors: results.errors,
meta: results.meta,
totalRows: results.data.length,
validRows,
};
}
private processGenericResults(results: Papa.ParseResult<IGenericCSVRow>): ICSVParseResult {
const records: ICSVRecord[] = [];
let validRows = 0;
for (const row of results.data) {
try {
const record = this.convertGenericRow(row);
if (record) {
records.push(record);
validRows++;
}
} catch (error) {
// Error already tracked in results.errors
}
}
return {
records,
errors: results.errors,
meta: results.meta,
totalRows: results.data.length,
validRows,
};
}
private convertRowToRecord(
row: unknown,
format: 'cloudflare' | 'route53' | 'generic',
): ICSVRecord | null {
switch (format) {
case 'cloudflare':
return this.convertCloudflareRow(row as ICloudflareCSVRow);
case 'route53':
return this.convertRoute53Row(row as IRoute53CSVRow);
case 'generic':
return this.convertGenericRow(row as IGenericCSVRow);
default:
throw new Error(`Unsupported format: ${String(format)}`);
}
}
private convertCloudflareRow(row: ICloudflareCSVRow): ICSVRecord | null {
if (!row.name || !row.type || !row.content) {
return null;
}
const type = row.type.toUpperCase() as DNSRecordType;
const ttl = parseInt(row.ttl || '3600', 10);
const priority = row.priority ? parseInt(row.priority, 10) : undefined;
return {
domain: row.name,
type,
value: row.content,
ttl,
priority,
};
}
private convertRoute53Row(row: IRoute53CSVRow): ICSVRecord | null {
if (!row.name || !row.type || !row.value) {
return null;
}
const type = row.type.toUpperCase() as DNSRecordType;
const ttl = parseInt(row.ttl || '300', 10);
const weight = row.weight ? parseInt(row.weight, 10) : undefined;
// Handle MX records with priority in value (e.g., "10 mail.example.com")
let value = row.value;
let priority: number | undefined = undefined;
if (type === 'MX' && value.includes(' ')) {
const parts = value.split(' ');
priority = parseInt(parts[0] || '0', 10);
value = parts.slice(1).join(' ');
}
return {
domain: row.name,
type,
value,
ttl,
priority,
weight,
};
}
private convertGenericRow(row: IGenericCSVRow): ICSVRecord | null {
if (!row.domain || !row.record_type || !row.value) {
return null;
}
const type = row.record_type.toUpperCase() as DNSRecordType;
const ttl = parseInt(row.ttl || '3600', 10);
const priority = row.priority ? parseInt(row.priority, 10) : undefined;
const weight = row.weight ? parseInt(row.weight, 10) : undefined;
const port = row.port ? parseInt(row.port, 10) : undefined;
return {
domain: row.domain,
type,
value: row.value,
ttl,
priority,
weight,
port,
};
}
/**
* 大容量CSVファイルのストリーミング処理
*/
async parseStreamingCloudflare(
filePath: string,
onProgress?: (processed: number) => void,
): Promise<ICSVParseResult> {
console.log(`Starting streaming CSV processing for: ${filePath}`);
MemoryOptimizer.logMemoryUsage('Before streaming parse');
const fileStats = fs.statSync(filePath);
const fileSizeMB = fileStats.size / (1024 * 1024);
if (fileSizeMB < 10) {
// 小さなファイルは通常の処理
return this.parseCloudflare(filePath);
}
console.log(`Large file detected (${fileSizeMB.toFixed(2)}MB), using streaming processing`);
const records: ICSVRecord[] = [];
const errors: Papa.ParseError[] = [];
let processedRows = 0;
let validRows = 0;
// ストリーミング読み込み
const readStream = createReadStream(filePath, { encoding: 'utf-8' });
return new Promise((resolve, reject) => {
Papa.parse(readStream, {
header: true,
skipEmptyLines: true,
step: (result) => {
try {
processedRows++;
if (result.errors && result.errors.length > 0) {
errors.push(...result.errors);
return;
}
const row = result.data as ICloudflareCSVRow;
const record = this.convertCloudflareRow(row);
if (record) {
records.push(record);
validRows++;
}
// 進捗報告
if (processedRows % 1000 === 0) {
onProgress?.(processedRows);
// メモリチェック
MemoryOptimizer.checkMemoryWarning(512, (usage) => {
console.warn(`Streaming parse memory warning: ${usage.heapUsed}MB`);
MemoryOptimizer.forceGarbageCollection();
});
}
} catch (error) {
errors.push({
type: 'FieldMismatch',
code: 'TooFewFields',
message: error instanceof Error ? error.message : 'Unknown error',
row: processedRows,
} as Papa.ParseError);
}
},
complete: (results) => {
MemoryOptimizer.logMemoryUsage('After streaming parse');
console.log(
`Streaming parse completed: ${validRows} valid records from ${processedRows} total`,
);
resolve({
records,
errors,
meta: results.meta,
totalRows: processedRows,
validRows,
});
},
error: (error) => reject(error),
});
});
}
/**
* バッチ処理による大容量データの効率的変換
*/
async processBatchRecords(
records: ICSVRecord[],
processor: (record: ICSVRecord) => ICSVRecord,
): Promise<ICSVRecord[]> {
if (records.length < 1000) {
// 小さなデータセットは通常処理
return records.map(processor);
}
console.log(`Processing ${records.length} records in batches`);
MemoryOptimizer.logMemoryUsage('Before batch processing');
const result = await this.batchProcessor.process(records, async (record) => {
return processor(record);
});
MemoryOptimizer.logMemoryUsage('After batch processing');
console.log(
`Batch processing completed: ${result.successful.length} successful, ${result.failed.length} failed`,
);
if (result.failed.length > 0) {
console.warn(`${result.failed.length} records failed processing`);
result.failed.forEach((failure) => {
console.warn(`Failed record:`, failure.error.message);
});
}
return result.successful;
}
/**
* ファイルの読み込みとエンコーディング・区切り文字の自動検出
*/
private async readFileWithAutoDetection(filePath: string): Promise<{
fileContent: string;
encodingInfo?: {
detectedEncoding: SupportedEncoding;
confidence: number;
reliability: 'high' | 'medium' | 'low';
bomPresent: boolean;
};
delimiters?: {
detected: string[];
used: string;
};
}> {
try {
if (this.options.autoDetectEncoding) {
// エンコーディング自動検出
const csvDetection = await detectCsvEncoding(filePath);
const reliability = evaluateDetectionReliability(csvDetection);
const { content } = await readFileWithDetectedEncoding(filePath);
let delimiters: { detected: string[]; used: string } | undefined;
if (this.options.autoDetectDelimiter && csvDetection.csvSpecificInfo.looksLikeCsv) {
// 区切り文字自動検出
const detected = csvDetection.csvSpecificInfo.potentialDelimiters;
const used = this.selectBestDelimiter(detected, content);
delimiters = { detected, used };
}
return {
fileContent: content,
encodingInfo: {
detectedEncoding: csvDetection.encoding,
confidence: csvDetection.confidence,
reliability: reliability.level,
bomPresent: csvDetection.bomPresent,
},
delimiters,
};
} else {
// 手動エンコーディング指定
const encoding = (this.options.encoding || 'utf-8') as BufferEncoding;
const fileContent = fs.readFileSync(filePath, encoding);
let delimiters: { detected: string[]; used: string } | undefined;
if (this.options.autoDetectDelimiter) {
const potentialDelimiters = this.detectDelimitersInContent(fileContent);
const used = this.selectBestDelimiter(potentialDelimiters, fileContent);
delimiters = { detected: potentialDelimiters, used };
}
return {
fileContent,
delimiters,
};
}
} catch (error) {
throw new CsvProcessingError(
`ファイル読み込みまたはエンコーディング検出に失敗: ${filePath}`,
{ filePath, error: error instanceof Error ? error.message : 'Unknown error' },
);
}
}
/**
* コンテンツ内の潜在的な区切り文字を検出
*/
private detectDelimitersInContent(content: string): string[] {
const delimiters = [',', ';', '\t', '|'];
const lines = content.split(/\r?\n/).slice(0, 5); // 最初の5行を分析
return delimiters.filter((delimiter) => lines.some((line) => line.includes(delimiter)));
}
/**
* 最適な区切り文字を選択
*/
private selectBestDelimiter(candidates: string[], content: string): string {
if (candidates.length === 0) {
return this.options.delimiter || ',';
}
if (candidates.length === 1) {
const candidate = candidates[0];
return candidate || ',';
}
// 最初の数行で各区切り文字の出現頻度を計算
const lines = content.split(/\r?\n/).slice(0, 5);
const scores = candidates.map((delimiter) => {
let score = 0;
let consistency = 0;
let expectedCount = -1;
for (const line of lines) {
if (line.trim() === '') continue;
const count = (line.match(new RegExp(`\\${delimiter}`, 'g')) || []).length;
if (expectedCount === -1) {
expectedCount = count;
}
if (count === expectedCount && count > 0) {
consistency++;
}
score += count;
}
// 一貫性を重視(同じ行で同じ区切り文字数)
return {
delimiter,
score: score * consistency,
consistency,
};
});
// 最高スコアの区切り文字を選択
scores.sort((a, b) => b.score - a.score);
return scores[0]?.delimiter || candidates[0] || ',';
}
}
// Type definitions for different CSV formats
interface ICloudflareCSVRow {
name: string;
type: string;
content: string;
ttl?: string;
priority?: string;
}
interface IRoute53CSVRow {
name: string;
type: string;
value: string;
ttl: string;
weight?: string;
setidentifier?: string;
}
interface IGenericCSVRow {
domain: string;
record_type: string;
value: string;
ttl: string;
priority?: string;
weight?: string;
port?: string;
}