@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
847 lines • 31.6 kB
JavaScript
/**
* CSV Processing Utility
* Converts CSV files to LLM-friendly text formats
* Uses streaming for memory efficiency with large files
*/
import csvParser from "csv-parser";
import { Readable } from "stream";
import { logger } from "./logger.js";
// ============================================================================
// Data Type Detection Patterns
// ============================================================================
const DATE_PATTERNS = [
{ regex: /^\d{4}-\d{2}-\d{2}$/, format: "YYYY-MM-DD" },
{ regex: /^\d{2}\/\d{2}\/\d{4}$/, format: "MM/DD/YYYY" },
{ regex: /^\d{2}-\d{2}-\d{4}$/, format: "DD-MM-YYYY" },
{ regex: /^\d{2}\.\d{2}\.\d{4}$/, format: "DD.MM.YYYY" },
{ regex: /^\d{4}\/\d{2}\/\d{2}$/, format: "YYYY/MM/DD" },
];
const DATETIME_PATTERNS = [
{ regex: /^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/, format: "ISO8601" },
{ regex: /^\d{2}\/\d{2}\/\d{4} \d{2}:\d{2}/, format: "MM/DD/YYYY HH:mm" },
];
const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
const URL_REGEX = /^(https?:\/\/|www\.)[^\s]+$/i;
const INTEGER_REGEX = /^-?\d+$/;
const FLOAT_REGEX = /^-?\d+\.\d+$/;
const BOOLEAN_VALUES = new Set([
"true",
"false",
"yes",
"no",
"1",
"0",
"t",
"f",
"y",
"n",
]);
// ============================================================================
// Column Name Validation
// ============================================================================
/**
* Validate column name and return issues
*/
function validateColumnName(name) {
const issues = [];
if (!name || name.trim() === "") {
issues.push("Empty or blank column name");
return issues;
}
if (name !== name.trim()) {
issues.push("Leading or trailing whitespace");
}
if (/^\d/.test(name)) {
issues.push("Starts with a number");
}
if (/[^a-zA-Z0-9_\- ]/.test(name)) {
issues.push("Contains special characters");
}
if (name.length > 64) {
issues.push("Name exceeds 64 characters");
}
if (/\s{2,}/.test(name)) {
issues.push("Contains multiple consecutive spaces");
}
return issues;
}
// ============================================================================
// Data Type Detection
// ============================================================================
/**
* Detect the data type of a single value
*/
function detectValueType(value) {
if (value === "" || value === null || value === undefined) {
return "empty";
}
const trimmed = value.trim();
if (trimmed === "") {
return "empty";
}
// Check boolean first (before numbers since "1" and "0" could be both)
if (BOOLEAN_VALUES.has(trimmed.toLowerCase())) {
return "boolean";
}
// Check integer
if (INTEGER_REGEX.test(trimmed)) {
return "integer";
}
// Check float
if (FLOAT_REGEX.test(trimmed)) {
return "float";
}
// Check email
if (EMAIL_REGEX.test(trimmed)) {
return "email";
}
// Check URL
if (URL_REGEX.test(trimmed)) {
return "url";
}
// Check datetime (before date since datetime is more specific)
for (const pattern of DATETIME_PATTERNS) {
if (pattern.regex.test(trimmed)) {
return "datetime";
}
}
// Check date
for (const pattern of DATE_PATTERNS) {
if (pattern.regex.test(trimmed)) {
return "date";
}
}
return "string";
}
/**
* Detect date format from value
*/
function detectDateFormat(value) {
const trimmed = value.trim();
for (const pattern of DATETIME_PATTERNS) {
if (pattern.regex.test(trimmed)) {
return pattern.format;
}
}
for (const pattern of DATE_PATTERNS) {
if (pattern.regex.test(trimmed)) {
return pattern.format;
}
}
return undefined;
}
/**
* Determine the predominant type for a column based on sampled values
*/
function determineColumnType(types) {
const nonEmpty = types.filter((t) => t !== "empty");
if (nonEmpty.length === 0) {
return { type: "empty", confidence: 100 };
}
// Count occurrences of each type
const typeCounts = new Map();
for (const t of nonEmpty) {
typeCounts.set(t, (typeCounts.get(t) || 0) + 1);
}
// Find the most common type
let maxType = "string";
let maxCount = 0;
for (const [type, count] of typeCounts) {
if (count > maxCount) {
maxCount = count;
maxType = type;
}
}
// Calculate confidence
const confidence = Math.round((maxCount / nonEmpty.length) * 100);
// Consolidate integer and float into number if the column contains only numeric types
// This check must happen before the mixed-type check to avoid classifying numeric-only columns as mixed
if (typeCounts.has("integer") && typeCounts.has("float")) {
// Check if these are the only two types (purely numeric column)
if (typeCounts.size === 2) {
const totalNumeric = (typeCounts.get("integer") || 0) + (typeCounts.get("float") || 0);
const numericConfidence = Math.round((totalNumeric / nonEmpty.length) * 100);
return { type: "number", confidence: numericConfidence };
}
}
// If confidence is low and multiple types exist, mark as mixed
if (confidence < 70 && typeCounts.size > 1) {
return { type: "mixed", confidence };
}
return { type: maxType, confidence };
}
/**
* Analyze a single column and return rich metadata
*/
function analyzeColumn(columnName, columnIndex, values) {
const types = [];
const uniqueValues = new Set();
const numericValues = [];
let nullCount = 0;
let dateFormat;
for (const value of values) {
const trimmed = value?.trim() ?? "";
if (trimmed === "") {
nullCount++;
types.push("empty");
continue;
}
uniqueValues.add(trimmed);
const type = detectValueType(trimmed);
types.push(type);
// Collect numeric values for statistics
if (type === "integer" || type === "float") {
const num = parseFloat(trimmed);
if (!isNaN(num)) {
numericValues.push(num);
}
}
// Detect date format
if ((type === "date" || type === "datetime") && !dateFormat) {
dateFormat = detectDateFormat(trimmed);
}
}
const { type: detectedType, confidence } = determineColumnType(types);
// Get sample values (up to 5 unique non-empty)
const sampleValues = Array.from(uniqueValues).slice(0, 5);
// Calculate numeric statistics
let minValue;
let maxValue;
let avgValue;
if (numericValues.length > 0) {
minValue = Math.min(...numericValues);
maxValue = Math.max(...numericValues);
avgValue =
Math.round((numericValues.reduce((a, b) => a + b, 0) / numericValues.length) * 100) / 100;
}
// Validate column name
const nameIssues = validateColumnName(columnName);
const metadata = {
name: columnName,
index: columnIndex,
detectedType,
typeConfidence: confidence,
nullCount,
uniqueCount: uniqueValues.size,
sampleValues,
};
if (minValue !== undefined) {
metadata.minValue = minValue;
}
if (maxValue !== undefined) {
metadata.maxValue = maxValue;
}
if (avgValue !== undefined) {
metadata.avgValue = avgValue;
}
if (dateFormat) {
metadata.dateFormat = dateFormat;
}
if (nameIssues.length > 0) {
metadata.nameIssues = nameIssues;
}
return metadata;
}
/**
* Generate data quality warnings based on column analysis
*/
function generateDataQualityWarnings(columns, totalRows) {
const warnings = [];
for (const col of columns) {
// Check for high null rate (>20%)
const nullRate = totalRows > 0 ? col.nullCount / totalRows : 0;
if (nullRate > 0.2) {
warnings.push({
column: col.name,
type: "high_null_rate",
message: `Column has ${Math.round(nullRate * 100)}% empty/null values (${col.nullCount} of ${totalRows} rows)`,
severity: nullRate > 0.5 ? "warning" : "info",
affectedRows: col.nullCount,
});
}
// Check for invalid column names
if (col.nameIssues && col.nameIssues.length > 0) {
warnings.push({
column: col.name,
type: "invalid_name",
message: `Column name issues: ${col.nameIssues.join(", ")}`,
severity: col.name.trim() === "" ? "error" : "warning",
});
}
// Check for mixed types (low confidence)
if (col.detectedType === "mixed" || col.typeConfidence < 70) {
warnings.push({
column: col.name,
type: "mixed_types",
message: `Column has inconsistent data types (${col.typeConfidence}% confidence for ${col.detectedType})`,
severity: "warning",
});
}
// Check for potential duplicates (very low unique count)
if (totalRows > 10 && col.uniqueCount === 1 && col.nullCount === 0) {
warnings.push({
column: col.name,
type: "duplicates",
message: `All ${totalRows} rows have the same value`,
severity: "info",
affectedRows: totalRows,
});
}
// Check for all empty column
if (col.detectedType === "empty") {
warnings.push({
column: col.name,
type: "empty_values",
message: "Column is entirely empty",
severity: "warning",
affectedRows: totalRows,
});
}
}
return warnings;
}
/**
* Calculate overall data quality score
*/
function calculateDataQualityScore(columns, warnings, totalRows) {
if (columns.length === 0 || totalRows === 0) {
return 0;
}
let score = 100;
// Deduct for warnings
for (const warning of warnings) {
switch (warning.severity) {
case "error":
score -= 15;
break;
case "warning":
score -= 8;
break;
case "info":
score -= 3;
break;
}
}
// Deduct for overall null rate
const totalNulls = columns.reduce((sum, col) => sum + col.nullCount, 0);
const totalCells = columns.length * totalRows;
const overallNullRate = totalCells > 0 ? totalNulls / totalCells : 0;
score -= Math.round(overallNullRate * 30);
// Deduct for low type confidence
const avgConfidence = columns.reduce((sum, col) => sum + col.typeConfidence, 0) / columns.length;
if (avgConfidence < 80) {
score -= Math.round((80 - avgConfidence) / 2);
}
return Math.max(0, Math.min(100, score));
}
/**
* Analyze all columns in parsed CSV data
*/
function analyzeColumns(rows) {
if (rows.length === 0) {
return {
columnMetadata: [],
dataQualityWarnings: [],
dataQualityScore: 0,
};
}
const columnNames = Object.keys(rows[0]);
const columnMetadata = [];
for (let i = 0; i < columnNames.length; i++) {
const colName = columnNames[i];
const values = rows.map((row) => String(row[colName] ?? ""));
columnMetadata.push(analyzeColumn(colName, i, values));
}
const dataQualityWarnings = generateDataQualityWarnings(columnMetadata, rows.length);
const dataQualityScore = calculateDataQualityScore(columnMetadata, dataQualityWarnings, rows.length);
return {
columnMetadata,
dataQualityWarnings,
dataQualityScore,
};
}
/**
* Detect if the first row appears to be a header row
*
* Heuristics used:
* 1. Header values should be text/string type (not numbers, dates, emails, etc.)
* 2. Header values should be unique (no duplicate column names)
* 3. If data rows exist, headers should have different type profile than data
*
* @param headerValues - The values from the first row (potential headers)
* @param dataRows - Sample of data rows for comparison (optional)
* @returns true if the first row appears to be headers
*/
function detectHasHeaders(headerValues, dataRows) {
if (headerValues.length === 0) {
return false;
}
// Check 1: All header values should look like text labels, not data values
let textLikeCount = 0;
for (const value of headerValues) {
const trimmed = value?.trim() ?? "";
if (trimmed === "") {
continue; // Empty headers are allowed but don't count toward text-like
}
const type = detectValueType(trimmed);
// Headers are typically strings - not numbers, dates, emails, URLs, or booleans
if (type === "string") {
textLikeCount++;
}
}
// If most header values are text-like (not numeric/date/etc.), likely headers
const nonEmptyHeaders = headerValues.filter((v) => v?.trim()).length;
if (nonEmptyHeaders === 0) {
return false;
}
const textRatio = textLikeCount / nonEmptyHeaders;
// Check 2: Headers should be unique
const uniqueHeaders = new Set(headerValues.map((v) => v?.trim().toLowerCase()));
const hasUniqueHeaders = uniqueHeaders.size === headerValues.length;
// Check 3: Compare with data rows if available
if (dataRows && dataRows.length > 0) {
// If first data row has different type profile than headers, likely has headers
const firstDataRow = Object.values(dataRows[0] || {}).map((v) => String(v ?? ""));
let dataTextCount = 0;
for (const value of firstDataRow) {
const type = detectValueType(value?.trim() ?? "");
if (type === "string") {
dataTextCount++;
}
}
const dataTextRatio = firstDataRow.length > 0 ? dataTextCount / firstDataRow.length : 0;
// If headers are mostly text but data has more varied types, likely has headers
if (textRatio > 0.7 && dataTextRatio < textRatio - 0.2) {
return true;
}
}
// Default: if >70% of header values are text-like and unique, assume headers
return textRatio >= 0.7 && hasUniqueHeaders;
}
/**
* Detect if first line is CSV metadata (not actual data/headers)
* Common patterns:
* - Excel separator line: "SEP=,"
* - Lines with significantly different delimiter count than line 2
* - Lines that don't match CSV structure of subsequent lines
*/
function isMetadataLine(lines) {
if (!lines[0] || lines.length < 2) {
return false;
}
const firstLine = lines[0].trim();
const secondLine = lines[1].trim();
if (firstLine.match(/^sep=/i)) {
return true;
}
const firstCommaCount = (firstLine.match(/,/g) || []).length;
const secondCommaCount = (secondLine.match(/,/g) || []).length;
if (firstCommaCount === 0 && secondCommaCount > 0) {
return true;
}
if (secondCommaCount > 0 && firstCommaCount !== secondCommaCount) {
return true;
}
return false;
}
/**
* CSV processor for converting CSV data to LLM-optimized formats
*
* Supports three output formats:
* - raw: Original CSV format with proper escaping (RECOMMENDED for best LLM performance)
* - json: JSON array format (best for structured data processing)
* - markdown: Markdown table format (best for small datasets <100 rows)
*
* All formats use csv-parser for reliable parsing, then convert to the target format.
*
* @example
* ```typescript
* const csvBuffer = Buffer.from('name,age\nAlice,30\nBob,25');
* const result = await CSVProcessor.process(csvBuffer, {
* maxRows: 1000,
* formatStyle: 'raw'
* });
* console.log(result.content); // CSV string with proper escaping
* ```
*/
export class CSVProcessor {
/**
* Process CSV Buffer to LLM-friendly format
* Content already loaded by FileDetector
*
* @param content - CSV file as Buffer
* @param options - Processing options
* @returns Formatted CSV data ready for LLM (JSON or Markdown)
*/
static async process(content, options) {
const { maxRows: rawMaxRows = 1000, formatStyle = "raw", includeHeaders = true, sampleDataFormat = "json", extension = null, } = options || {};
const maxRows = Math.max(1, Math.min(10000, rawMaxRows));
logger.debug("[CSVProcessor] Starting CSV processing", {
contentSize: content.length,
formatStyle,
maxRows,
includeHeaders,
});
const csvString = content.toString("utf-8");
// For raw format, return original CSV with row limit (no parsing needed)
// This preserves the exact original format which works best for LLMs
if (formatStyle === "raw") {
const lines = csvString.split("\n");
const hasMetadataLine = isMetadataLine(lines);
if (hasMetadataLine) {
logger.debug("[CSVProcessor] Detected metadata line, skipping first line");
}
// Skip metadata line if present, then take header + maxRows data rows
const csvLines = hasMetadataLine
? lines.slice(1) // Skip metadata line
: lines;
const limitedLines = csvLines.slice(0, 1 + maxRows); // header + data rows
const limitedCSV = limitedLines.join("\n");
const rowCount = limitedLines
.slice(1)
.filter((line) => line.trim() !== "").length;
const originalRowCount = csvLines
.slice(1)
.filter((line) => line.trim() !== "").length;
const wasTruncated = rowCount < originalRowCount;
if (wasTruncated) {
logger.warn(`[CSVProcessor] CSV data truncated: showing ${rowCount} of ${originalRowCount} rows (limit: ${maxRows})`);
}
logger.debug(`[CSVProcessor] raw format: ${rowCount} rows (original: ${originalRowCount}) → ${limitedCSV.length} chars`, {
formatStyle: "raw",
originalSize: csvString.length,
limitedSize: limitedCSV.length,
});
logger.info("[CSVProcessor] ✅ Processed CSV file", {
formatStyle: "raw",
rowCount,
columnCount: (limitedLines[0] || "").split(",").length,
truncated: wasTruncated,
});
// Parse a sample for enhanced metadata analysis (raw format still benefits from column analysis)
const sampleForAnalysis = await this.parseCSVString(limitedCSV, Math.min(rowCount, 500));
const { columnMetadata, dataQualityWarnings, dataQualityScore } = analyzeColumns(sampleForAnalysis);
// Log data quality summary
if (dataQualityWarnings.length > 0) {
logger.debug("[CSVProcessor] Data quality warnings detected", {
warningCount: dataQualityWarnings.length,
score: dataQualityScore,
});
}
return {
type: "csv",
content: limitedCSV,
mimeType: "text/csv",
metadata: {
confidence: 100,
size: content.length,
rowCount,
totalLines: limitedLines.length,
columnCount: (limitedLines[0] || "").split(",").length,
extension,
columnMetadata,
dataQualityWarnings,
dataQualityScore,
hasHeaders: detectHasHeaders((limitedLines[0] || "").split(","), undefined),
detectedDelimiter: ",",
},
};
}
// Parse CSV for JSON and Markdown formats only
logger.debug("[CSVProcessor] Parsing CSV for structured format conversion", {
formatStyle,
maxRows,
});
const rows = await this.parseCSVString(csvString, maxRows);
// Filter out empty rows (empty objects or rows with only whitespace values from blank lines)
const nonEmptyRows = rows.filter((row) => {
if (!row || typeof row !== "object") {
return false;
}
const keys = Object.keys(row);
if (keys.length === 0) {
return false;
}
// Check if all values are empty or whitespace-only
return !Object.values(row).every((val) => val === "" || (typeof val === "string" && val.trim() === ""));
});
// Extract metadata from parsed results
const rowCount = nonEmptyRows.length;
const columnNames = nonEmptyRows.length > 0
? Object.keys(nonEmptyRows[0])
: [];
const columnCount = columnNames.length;
const hasEmptyColumns = columnNames.some((col) => !col || col.trim() === "");
const sampleRows = nonEmptyRows.slice(0, 3);
const sampleData = this.formatSampleData(sampleRows, sampleDataFormat, includeHeaders);
if (hasEmptyColumns) {
logger.warn("[CSVProcessor] CSV contains empty or blank column headers", {
columnNames,
});
}
if (rowCount === 0) {
logger.warn("[CSVProcessor] CSV file contains no data rows");
}
// Perform enhanced column analysis
const { columnMetadata, dataQualityWarnings, dataQualityScore } = analyzeColumns(nonEmptyRows);
// Log data quality summary
if (dataQualityWarnings.length > 0) {
logger.debug("[CSVProcessor] Data quality warnings detected", {
warningCount: dataQualityWarnings.length,
score: dataQualityScore,
});
}
// Format parsed data
logger.debug(`[CSVProcessor] Converting ${rowCount} rows to ${formatStyle} format`);
const formatted = this.formatForLLM(nonEmptyRows, formatStyle, includeHeaders);
logger.info("[CSVProcessor] ✅ Processed CSV file", {
formatStyle,
rowCount,
columnCount,
outputLength: formatted.length,
hasEmptyColumns,
dataQualityScore,
});
return {
type: "csv",
content: formatted,
mimeType: "text/csv",
metadata: {
confidence: 100,
size: content.length,
rowCount,
columnCount,
columnNames,
sampleData,
hasEmptyColumns,
extension,
columnMetadata,
dataQualityWarnings,
dataQualityScore,
hasHeaders: detectHasHeaders(columnNames, nonEmptyRows),
detectedDelimiter: ",",
},
};
}
/**
* Parse CSV string into array of row objects using streaming
* Memory-efficient for large files
*/
/**
* Parse CSV file from disk using streaming (memory efficient)
*
* @param filePath - Path to CSV file
* @param maxRows - Maximum rows to parse (default: 1000)
* @returns Array of row objects
*/
static async parseCSVFile(filePath, maxRows = 1000) {
const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
const fs = await import("fs");
logger.debug("[CSVProcessor] Starting file parsing", {
filePath,
maxRows: clampedMaxRows,
});
// Read first 2 lines to detect metadata
const fileHandle = await fs.promises.open(filePath, "r");
const firstLines = [];
const lineReader = fileHandle.createReadStream({ encoding: "utf-8" });
await new Promise((resolve) => {
let buffer = "";
lineReader.on("data", (chunk) => {
buffer += chunk.toString();
const lines = buffer.split("\n");
if (lines.length >= 2) {
firstLines.push(lines[0], lines[1]);
lineReader.destroy();
resolve();
}
});
lineReader.on("end", () => resolve());
});
await fileHandle.close();
const hasMetadataLine = isMetadataLine(firstLines);
const skipLines = hasMetadataLine ? 1 : 0;
if (hasMetadataLine) {
logger.debug("[CSVProcessor] Detected metadata line in file, will skip first line");
}
return new Promise((resolve, reject) => {
const rows = [];
let count = 0;
let lineCount = 0;
const source = fs.createReadStream(filePath, { encoding: "utf-8" });
const parser = csvParser();
const abort = () => {
source.destroy();
parser.destroy();
};
source
.pipe(parser)
.on("data", (row) => {
lineCount++;
if (lineCount <= skipLines) {
return;
}
rows.push(row);
count++;
if (count >= clampedMaxRows) {
logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
abort();
resolve(rows);
}
})
.on("end", () => {
logger.debug(`[CSVProcessor] File parsing complete: ${rows.length} rows parsed`);
resolve(rows);
})
.on("error", (error) => {
logger.error("[CSVProcessor] File parsing failed:", error);
reject(error);
});
});
}
/**
* Parse CSV string to array of row objects
* Exposed for use by tools that need direct CSV parsing
*
* @param csvString - CSV data as string
* @param maxRows - Maximum rows to parse (default: 1000)
* @returns Array of row objects
*/
static async parseCSVString(csvString, maxRows = 1000) {
const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
logger.debug("[CSVProcessor] Starting string parsing", {
inputLength: csvString.length,
maxRows: clampedMaxRows,
});
// Detect and skip metadata line
const lines = csvString.split("\n");
const hasMetadataLine = isMetadataLine(lines);
const csvData = hasMetadataLine ? lines.slice(1).join("\n") : csvString;
if (hasMetadataLine) {
logger.debug("[CSVProcessor] Detected metadata line in string, skipping");
}
return new Promise((resolve, reject) => {
const rows = [];
let count = 0;
const source = Readable.from([csvData]);
const parser = csvParser();
const abort = () => {
source.destroy();
parser.destroy();
};
source
.pipe(parser)
.on("data", (row) => {
rows.push(row);
count++;
if (count >= clampedMaxRows) {
logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
abort();
resolve(rows);
}
})
.on("end", () => {
logger.debug(`[CSVProcessor] String parsing complete: ${rows.length} rows parsed`);
resolve(rows);
})
.on("error", (error) => {
logger.error("[CSVProcessor] Parsing failed:", error);
reject(error);
});
});
}
/**
* Format parsed CSV data for LLM consumption
* Only used for JSON and Markdown formats (raw format handled separately)
*/
static formatForLLM(rows, formatStyle, includeHeaders) {
if (rows.length === 0) {
return "CSV file is empty or contains no data.";
}
if (formatStyle === "json") {
return JSON.stringify(rows, null, 2);
}
return this.toMarkdownTable(rows, includeHeaders);
}
/**
* Format as markdown table
* Best for small datasets (<100 rows)
*/
static toMarkdownTable(rows, includeHeaders) {
if (rows.length === 0) {
return "CSV file is empty or contains no data.";
}
const headers = Object.keys(rows[0]);
// Escape backslashes, pipes, and sanitize newlines to keep rows intact
const escapePipe = (str) => str.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
let markdown = "";
if (includeHeaders) {
markdown = "| " + headers.map(escapePipe).join(" | ") + " |\n";
markdown += "|" + headers.map(() => " --- ").join("|") + "|\n";
}
rows.forEach((row) => {
markdown +=
"| " +
headers
.map((h) => escapePipe(String(row[h] || "")))
.join(" | ") +
" |\n";
});
return markdown;
}
/**
* Format sample data according to the specified format
*
* @param sampleRows - Array of sample row objects
* @param format - Output format for sample data
* @param includeHeaders - Whether to include headers in CSV/markdown formats
* @returns Formatted sample data as string or array
*/
static formatSampleData(sampleRows, format, includeHeaders) {
if (sampleRows.length === 0) {
return format === "object" ? [] : "No data rows";
}
switch (format) {
case "object":
return sampleRows;
case "json":
return JSON.stringify(sampleRows, null, 2);
case "csv":
return this.toCSVString(sampleRows, includeHeaders);
case "markdown":
return this.toMarkdownTable(sampleRows, includeHeaders);
default:
return sampleRows;
}
}
/**
* Convert row objects to CSV string format
*
* @param rows - Array of row objects
* @param includeHeaders - Whether to include header row
* @returns CSV formatted string
*/
static toCSVString(rows, includeHeaders) {
if (rows.length === 0) {
return "";
}
const headers = Object.keys(rows[0]);
// Escape CSV values (wrap in quotes if contains comma, quote, or newline)
const escapeCSV = (value) => {
if (value.includes(",") || value.includes('"') || value.includes("\n")) {
return `"${value.replace(/"/g, '""')}"`;
}
return value;
};
const lines = [];
if (includeHeaders) {
lines.push(headers.map(escapeCSV).join(","));
}
rows.forEach((row) => {
const values = headers.map((h) => escapeCSV(String(row[h] ?? "")));
lines.push(values.join(","));
});
return lines.join("\n");
}
}
//# sourceMappingURL=csvProcessor.js.map