n8n
Version:
n8n Workflow Automation Tool
207 lines • 8.81 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.queryCsv = queryCsv;
exports.profileCsv = profileCsv;
exports.distinctCsv = distinctCsv;
exports.aggregateCsv = aggregateCsv;
const csv_helpers_1 = require("./csv-helpers");
async function queryCsv(workspaceRoot, files, input) {
const file = (0, csv_helpers_1.resolveCsvFile)(files, input.file);
const headers = [];
const limit = input.limit ?? 20;
const select = input.select;
const rows = [];
const rowNumbers = [];
const records = [];
let ambiguityTracker;
let matched = 0;
if (input.rowNumber === undefined && select === undefined) {
throw new Error('csv_query requires select unless rowNumber is provided.');
}
await (0, csv_helpers_1.streamCsvRecords)(workspaceRoot, file, {
onHeaders: (parsedHeaders) => {
headers.push(...parsedHeaders);
(0, csv_helpers_1.validateCsvColumns)(headers, file.fileName, [
...(select ?? []),
...(input.where ?? []).map((filter) => filter.column),
]);
ambiguityTracker = (0, csv_helpers_1.createCsvDistinctTracker)((0, csv_helpers_1.getSuggestedDisambiguatingColumns)(headers, input.where ?? [], select ?? []), csv_helpers_1.CSV_SAMPLE_VALUE_LIMIT);
},
onRecord: ({ record, fileLineNumber }) => {
if (input.rowNumber !== undefined && fileLineNumber !== input.rowNumber)
return;
if (input.rowNumber === undefined && !(0, csv_helpers_1.matchesFilters)(record, input.where ?? []))
return;
matched++;
ambiguityTracker?.add(record);
const columns = select ?? headers;
if (rows.length < limit) {
const values = (0, csv_helpers_1.toCsvRecordValues)(record, columns);
rows.push(columns.map((column) => values[column]));
rowNumbers.push(fileLineNumber);
records.push({ rowNumber: fileLineNumber, fileLineNumber, values });
}
},
});
if (headers.length === 0)
(0, csv_helpers_1.validateCsvColumns)(headers, file.fileName, select ?? []);
const columns = select ?? headers;
const truncated = matched > rows.length;
return {
fileName: file.fileName,
relativePath: file.relativePath,
columns,
rowNumbers,
rows,
records,
rowCount: matched,
truncated,
rowNumberBase: 'rowNumber is the CSV file line number; line 1 is the header row.',
ambiguity: input.rowNumber === undefined && (matched > 1 || truncated)
? (0, csv_helpers_1.buildCsvAmbiguity)(matched, input.limit ?? 20, ambiguityTracker)
: undefined,
};
}
async function profileCsv(workspaceRoot, files, input) {
const file = (0, csv_helpers_1.resolveCsvFile)(files, input.file);
const headers = [];
const sampleRows = [];
const rowCountByColumn = new Map();
let rowCount = 0;
const distinctLimit = input.distinctLimit ?? csv_helpers_1.CSV_PROFILE_DISTINCT_LIMIT;
await (0, csv_helpers_1.streamCsvRecords)(workspaceRoot, file, {
onHeaders: (parsedHeaders) => {
headers.push(...parsedHeaders);
for (const header of headers) {
rowCountByColumn.set(header, (0, csv_helpers_1.createCsvColumnProfileState)(distinctLimit));
}
},
onRecord: ({ record }) => {
rowCount++;
if (sampleRows.length < (input.sampleSize ?? 5)) {
sampleRows.push((0, csv_helpers_1.toCsvRecordValues)(record, headers));
}
for (const header of headers) {
rowCountByColumn.get(header)?.add((0, csv_helpers_1.normaliseCsvValue)(record[header]));
}
},
});
const columnProfiles = headers.map((header) => {
const profile = rowCountByColumn.get(header) ?? (0, csv_helpers_1.createCsvColumnProfileState)(distinctLimit);
return profile.toOutput(header);
});
return {
fileName: file.fileName,
relativePath: file.relativePath,
columns: headers,
rowCount,
sampleRows,
columnProfiles,
likelyKeyColumns: columnProfiles
.filter((column) => column.distinctCount === rowCount && rowCount > 0)
.map((column) => column.name),
likelyDisambiguatingColumns: (0, csv_helpers_1.getLikelyDisambiguatingColumns)(columnProfiles, rowCount),
};
}
async function distinctCsv(workspaceRoot, files, input) {
const file = (0, csv_helpers_1.resolveCsvFile)(files, input.file);
const values = new Set();
let distinctTruncated = false;
const outputValues = [];
await (0, csv_helpers_1.streamCsvRecords)(workspaceRoot, file, {
onHeaders: (headers) => {
(0, csv_helpers_1.validateCsvColumns)(headers, file.fileName, [
input.column,
...(input.where ?? []).map((filter) => filter.column),
]);
},
onRecord: ({ record }) => {
if (!(0, csv_helpers_1.matchesFilters)(record, input.where ?? []))
return;
const value = (0, csv_helpers_1.normaliseCsvValue)(record[input.column]);
if (!values.has(value)) {
if (values.size < csv_helpers_1.CSV_DISTINCT_TRACK_LIMIT) {
values.add(value);
}
else {
distinctTruncated = true;
}
if (outputValues.length < (input.limit ?? 50))
outputValues.push(value);
}
},
});
return {
fileName: file.fileName,
relativePath: file.relativePath,
column: input.column,
values: outputValues,
distinctCount: values.size,
truncated: distinctTruncated || values.size > outputValues.length,
};
}
async function aggregateCsv(workspaceRoot, files, input) {
const file = (0, csv_helpers_1.resolveCsvFile)(files, input.file);
const functions = input.functions ?? ['count'];
const metrics = Array.from(new Set([...(input.metric ? [input.metric] : []), ...(input.metrics ?? [])]));
const needsMetric = functions.some((fn) => fn !== 'count');
if (needsMetric && metrics.length === 0) {
throw new Error('csv_aggregate requires metric or metrics for min, max, sum, or avg.');
}
const groups = new Map();
let rowCount = 0;
let groupLimitReached = false;
await (0, csv_helpers_1.streamCsvRecords)(workspaceRoot, file, {
onHeaders: (headers) => {
(0, csv_helpers_1.validateCsvColumns)(headers, file.fileName, [
...metrics,
...(input.groupBy ?? []),
...(input.where ?? []).map((filter) => filter.column),
]);
},
onRecord: ({ record }) => {
if (!(0, csv_helpers_1.matchesFilters)(record, input.where ?? []))
return;
rowCount++;
const groupValues = (0, csv_helpers_1.toCsvRecordValues)(record, input.groupBy ?? []);
const key = JSON.stringify(groupValues);
let group = groups.get(key);
if (!group) {
if (groups.size >= csv_helpers_1.CSV_MAX_AGGREGATE_GROUPS) {
groupLimitReached = true;
return;
}
group = (0, csv_helpers_1.createCsvAggregateGroup)(groupValues, metrics);
groups.set(key, group);
}
group.count++;
for (const metric of metrics) {
group.metrics[metric].add((0, csv_helpers_1.normaliseCsvValue)(record[metric]));
}
},
});
if (groups.size === 0 && input.groupBy === undefined) {
groups.set(JSON.stringify({}), (0, csv_helpers_1.createCsvAggregateGroup)({}, metrics));
}
const results = Array.from(groups.values()).map((group) => (0, csv_helpers_1.formatCsvAggregateGroup)(group, functions, metrics));
(0, csv_helpers_1.sortCsvAggregateResults)(results, input.orderBy);
const limit = input.limit ?? 50;
const skippedNonNumeric = {};
for (const group of groups.values()) {
for (const metric of metrics) {
skippedNonNumeric[metric] = (skippedNonNumeric[metric] ?? 0) + group.metrics[metric].skipped;
}
}
return {
fileName: file.fileName,
relativePath: file.relativePath,
rowCount,
functions,
metrics,
groupBy: input.groupBy,
results: results.slice(0, limit),
truncated: results.length > limit || groupLimitReached,
skippedNonNumeric,
};
}
//# sourceMappingURL=csv.operation.js.map