n8n
Version:
n8n Workflow Automation Tool
341 lines • 13.4 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.CSV_MAX_AGGREGATE_GROUPS = exports.CSV_DISTINCT_TRACK_LIMIT = exports.CSV_PROFILE_DISTINCT_LIMIT = exports.CSV_SAMPLE_VALUE_LIMIT = void 0;
exports.resolveCsvFile = resolveCsvFile;
exports.streamCsvRecords = streamCsvRecords;
exports.validateCsvColumns = validateCsvColumns;
exports.matchesFilters = matchesFilters;
exports.normaliseCsvValue = normaliseCsvValue;
exports.toCsvRecordValues = toCsvRecordValues;
exports.createCsvDistinctTracker = createCsvDistinctTracker;
exports.buildCsvAmbiguity = buildCsvAmbiguity;
exports.getSuggestedDisambiguatingColumns = getSuggestedDisambiguatingColumns;
exports.createCsvColumnProfileState = createCsvColumnProfileState;
exports.getLikelyDisambiguatingColumns = getLikelyDisambiguatingColumns;
exports.createCsvAggregateGroup = createCsvAggregateGroup;
exports.formatCsvAggregateGroup = formatCsvAggregateGroup;
exports.sortCsvAggregateResults = sortCsvAggregateResults;
const node_fs_1 = require("node:fs");
const node_path_1 = __importDefault(require("node:path"));
const fastest_levenshtein_1 = require("fastest-levenshtein");
const file_references_1 = require("./file-references");
exports.CSV_SAMPLE_VALUE_LIMIT = 5;
exports.CSV_PROFILE_DISTINCT_LIMIT = 100;
exports.CSV_DISTINCT_TRACK_LIMIT = 10_000;
exports.CSV_MAX_AGGREGATE_GROUPS = 50_000;
const CSV_OPERATION_TIMEOUT_MS = 15_000;
function isCsvFile(file) {
return file.mimeType === 'text/csv' || file.relativePath.toLowerCase().endsWith('.csv');
}
function resolveCsvFile(files, reference) {
const resolvedFile = (0, file_references_1.resolveFileReference)(files, reference);
if (resolvedFile.status !== 'found') {
throw new Error(resolvedFile.error);
}
const { file } = resolvedFile;
if (!isCsvFile(file)) {
throw new Error(`File "${file.fileName}" is not queryable as CSV.`);
}
return file;
}
async function streamCsvRecords(workspaceRoot, file, handlers) {
const filePath = node_path_1.default.join(workspaceRoot, file.relativePath);
const { parse } = await Promise.resolve().then(() => __importStar(require('csv-parse')));
const readStream = (0, node_fs_1.createReadStream)(filePath);
const parser = readStream.pipe(parse({
columns: (parsedHeaders) => {
handlers.onHeaders?.(parsedHeaders);
return parsedHeaders;
},
skip_empty_lines: true,
bom: true,
info: true,
relax_column_count: true,
}));
const timeout = setTimeout(() => {
parser.destroy(new Error('CSV operation exceeded the time limit'));
readStream.destroy();
}, CSV_OPERATION_TIMEOUT_MS);
try {
for await (const { record, info } of parser) {
handlers.onRecord({ record, fileLineNumber: info.lines });
}
}
finally {
clearTimeout(timeout);
readStream.destroy();
parser.destroy();
}
}
function validateCsvColumns(headers, fileName, columns) {
for (const column of columns) {
if (!headers.includes(column)) {
throw new Error(formatMissingCsvColumnError(fileName, column, headers));
}
}
}
function matchesFilters(record, filters) {
return filters.every((filter) => {
const value = normaliseCsvValue(record[filter.column]);
if (filter.op === 'eq')
return value === filter.value;
if (filter.op === 'contains')
return value.includes(filter.value);
return filter.value.includes(value);
});
}
function normaliseCsvValue(value) {
if (value === null || value === undefined)
return '';
return String(value);
}
function toCsvRecordValues(record, columns) {
return Object.fromEntries(columns.map((column) => [column, normaliseCsvValue(record[column])]));
}
function formatMissingCsvColumnError(fileName, requestedColumn, headers) {
const suggestions = getClosestColumnMatches(requestedColumn, headers);
const didYouMean = suggestions.length > 0
? ` Did you mean ${suggestions.map((value) => `"${value}"`).join(', ')}?`
: '';
return `CSV column "${requestedColumn}" not found in "${fileName}". Available columns: ${headers.join(', ')}.${didYouMean} Run csv_profile if you are uncertain about the schema.`;
}
function getClosestColumnMatches(requestedColumn, headers) {
const requested = requestedColumn.toLowerCase();
return headers
.map((header) => ({ header, distance: (0, fastest_levenshtein_1.distance)(requested, header.toLowerCase()) }))
.filter(({ header, distance: editDistance }) => {
return header.toLowerCase().includes(requested) || editDistance <= 3;
})
.sort((left, right) => left.distance - right.distance || left.header.localeCompare(right.header))
.slice(0, 3)
.map(({ header }) => header);
}
function createCsvDistinctTracker(columns, limit) {
const values = new Map(columns.map((column) => [column, new Set()]));
return {
add(record) {
for (const [column, distinctValues] of values) {
if (distinctValues.size < limit)
distinctValues.add(normaliseCsvValue(record[column]));
}
},
toOutput() {
return Object.fromEntries(Array.from(values.entries()).flatMap(([column, distinctValues]) => distinctValues.size > 0 ? [[column, Array.from(distinctValues)]] : []));
},
columns: Array.from(values.keys()),
};
}
function buildCsvAmbiguity(matchedRows, limit, tracker) {
return {
matchedRows,
message: matchedRows > limit
? `Matched ${matchedRows} rows and returned only the first ${limit}. This is not a unique result. Refine filters before answering.`
: `Matched ${matchedRows} rows. This is not a unique result. Refine filters before answering.`,
suggestedColumns: tracker?.columns ?? [],
sampleDistinctValues: tracker?.toOutput(),
};
}
function getSuggestedDisambiguatingColumns(headers, filters, selectedColumns) {
const alreadyUsed = new Set([...filters.map((filter) => filter.column), ...selectedColumns]);
return headers
.filter((header) => !alreadyUsed.has(header))
.sort((left, right) => preferenceScore(left) - preferenceScore(right))
.slice(0, 5);
}
const PREFERRED_DISAMBIGUATING_COLUMNS = [
'Year',
'Date',
'Month',
'Country',
'Country Name',
'Source',
'Category',
'Name',
];
function preferenceScore(column) {
const exactIndex = PREFERRED_DISAMBIGUATING_COLUMNS.findIndex((candidate) => candidate.toLowerCase() === column.toLowerCase());
if (exactIndex !== -1)
return exactIndex;
const partialIndex = PREFERRED_DISAMBIGUATING_COLUMNS.findIndex((candidate) => column.toLowerCase().includes(candidate.toLowerCase()));
return partialIndex === -1 ? PREFERRED_DISAMBIGUATING_COLUMNS.length + 1 : partialIndex + 0.5;
}
function createCsvColumnProfileState(distinctLimit) {
const distinctValues = new Set();
const sampleValues = [];
let distinctCountTruncated = false;
let emptyCount = 0;
let nonEmptyCount = 0;
let allInteger = true;
let allNumber = true;
let allBoolean = true;
let allDate = true;
return {
add(value) {
if (value === '') {
emptyCount++;
return;
}
nonEmptyCount++;
if (distinctValues.size < distinctLimit) {
distinctValues.add(value);
}
else if (!distinctValues.has(value)) {
distinctCountTruncated = true;
}
if (!sampleValues.includes(value) && sampleValues.length < exports.CSV_SAMPLE_VALUE_LIMIT) {
sampleValues.push(value);
}
allInteger &&= /^-?\d+$/.test(value);
allNumber &&= Number.isFinite(Number(value));
allBoolean &&= /^(true|false|yes|no|0|1)$/i.test(value);
allDate &&= isLikelyDate(value);
},
toOutput(name) {
return {
name,
inferredType: inferCsvColumnType({
nonEmptyCount,
allInteger,
allNumber,
allBoolean,
allDate,
}),
emptyCount,
distinctCount: distinctValues.size,
distinctCountTruncated,
sampleValues,
};
},
};
}
function inferCsvColumnType({ nonEmptyCount, allInteger, allNumber, allBoolean, allDate, }) {
if (nonEmptyCount === 0)
return 'empty';
if (allBoolean)
return 'boolean';
if (allInteger)
return 'integer';
if (allNumber)
return 'number';
if (allDate)
return 'date';
return 'string';
}
function isLikelyDate(value) {
if (!/^\d{4}[-/]\d{1,2}([-/]\d{1,2})?$/.test(value))
return false;
return Number.isFinite(Date.parse(value));
}
function getLikelyDisambiguatingColumns(columnProfiles, rowCount) {
return columnProfiles
.filter((column) => {
const distinctCount = column.distinctCount ?? 0;
return distinctCount > 1 && distinctCount < rowCount && !column.distinctCountTruncated;
})
.sort((left, right) => preferenceScore(left.name) - preferenceScore(right.name))
.slice(0, 5)
.map((column) => column.name);
}
function createCsvAggregateGroup(groupValues, metrics) {
return {
groupValues,
count: 0,
metrics: Object.fromEntries(metrics.map((metric) => [metric, createNumericAggregateState()])),
};
}
function createNumericAggregateState() {
return {
count: 0,
sum: 0,
min: undefined,
max: undefined,
skipped: 0,
add(value) {
const trimmedValue = value.trim();
const numericValue = Number(trimmedValue);
if (trimmedValue === '' || !Number.isFinite(numericValue)) {
this.skipped++;
return;
}
this.count++;
this.sum += numericValue;
this.min = this.min === undefined ? numericValue : Math.min(this.min, numericValue);
this.max = this.max === undefined ? numericValue : Math.max(this.max, numericValue);
},
};
}
function formatCsvAggregateGroup(group, functions, metrics) {
const output = { ...group.groupValues };
for (const fn of functions) {
if (fn === 'count')
output.count = group.count;
}
for (const metric of metrics) {
const state = group.metrics[metric];
for (const fn of functions) {
if (fn === 'min')
output[`min_${metric}`] = state.min ?? null;
if (fn === 'max')
output[`max_${metric}`] = state.max ?? null;
if (fn === 'sum')
output[`sum_${metric}`] = state.count > 0 ? state.sum : null;
if (fn === 'avg')
output[`avg_${metric}`] = state.count > 0 ? state.sum / state.count : null;
}
}
return output;
}
function sortCsvAggregateResults(results, orderBy) {
if (!orderBy)
return;
const direction = orderBy.direction === 'desc' ? -1 : 1;
results.sort((left, right) => compareCsvAggregateValues(left[orderBy.column], right[orderBy.column]) * direction);
}
function compareCsvAggregateValues(left, right) {
if (left === right)
return 0;
if (left === null || left === undefined)
return 1;
if (right === null || right === undefined)
return -1;
if (typeof left === 'number' && typeof right === 'number')
return left - right;
return String(left).localeCompare(String(right));
}
//# sourceMappingURL=csv-helpers.js.map