@dpkit/table
Version:
Data Package implementation in TypeScript.
32 lines • 5.91 kB
JavaScript
import { DataType } from "nodejs-polars";
import { col, lit } from "nodejs-polars";
import { matchField } from "../field/index.js";
import { normalizeField } from "../field/index.js";
import { getPolarsSchema } from "../schema/index.js";
const HEAD_ROWS = 100;
export async function normalizeTable(table, schema, options) {
const { dontParse } = options ?? {};
const head = await table.head(HEAD_ROWS).collect();
const polarsSchema = getPolarsSchema(head.schema);
return table.select(...Object.values(normalizeFields(schema, polarsSchema, { dontParse })));
}
export function normalizeFields(schema, polarsSchema, options) {
const { dontParse } = options ?? {};
const exprs = {};
for (const [index, field] of schema.fields.entries()) {
const polarsField = matchField(index, field, schema, polarsSchema);
let expr = lit(null).alias(field.name);
if (polarsField) {
expr = col(polarsField.name).alias(field.name);
// TODO: Move this logic to normalizeField?
if (polarsField.type.equals(DataType.String)) {
const missingValues = field.missingValues ?? schema.missingValues;
const mergedField = { ...field, missingValues };
expr = normalizeField(mergedField, expr, { dontParse });
}
}
exprs[field.name] = expr;
}
return exprs;
}
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibm9ybWFsaXplLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vdGFibGUvbm9ybWFsaXplLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUVBLE9BQU8sRUFBRSxRQUFRLEVBQUUsTUFBTSxlQUFlLENBQUE7QUFDeEMsT0FBTyxFQUFFLEdBQUcsRUFBRSxHQUFHLEVBQUUsTUFBTSxlQUFlLENBQUE7QUFDeEMsT0FBTyxFQUFFLFVBQVUsRUFBRSxNQUFNLG1CQUFtQixDQUFBO0FBQzlDLE9BQU8sRUFBRSxjQUFjLEVBQUUsTUFBTSxtQkFBbUIsQ0FBQTtBQUNsRCxPQUFPLEVBQUUsZUFBZSxFQUFFLE1BQU0sb0JBQW9CLENBQUE7QUFJcEQsTUFBTSxTQUFTLEdBQUcsR0FBRyxDQUFBO0FBRXJCLE1BQU0sQ0FBQyxLQUFLLFVBQVUsY0FBYyxDQUNsQyxLQUFZLEVBQ1osTUFBYyxFQUNkLE9BRUM7SUFFRCxNQUFNLEVBQUUsU0FBUyxFQUFFLEdBQUcsT0FBTyxJQUFJLEVBQUUsQ0FBQTtJQUVuQyxNQUFNLElBQUksR0FBRyxNQUFNLEtBQUssQ0FBQyxJQUFJLENBQUMsU0FBUyxDQUFDLENBQUMsT0FBTyxFQUFFLENBQUE7SUFDbEQsTUFBTSxZQUFZLEdBQUcsZUFBZSxDQUFDLElBQUksQ0FBQyxNQUFNLENBQUMsQ0FBQTtJQUVqRCxPQUFPLEtBQUssQ0FBQyxNQUFNLENBQ2pCLEdBQUcsTUFBTSxDQUFDLE1BQU0sQ0FBQyxlQUFlLENBQUMsTUFBTSxFQUFFLFlBQVksRUFBRSxFQUFFLFNBQVMsRUFBRSxDQUFDLENBQUMsQ0FDdkUsQ0FBQTtBQUNILENBQUM7QUFFRCxNQUFNLFVBQVUsZUFBZSxDQUM3QixNQUFjLEVBQ2QsWUFBMEIsRUFDMUIsT0FFQztJQUVELE1BQU0sRUFBRSxTQUFTLEVBQUUsR0FBRyxPQUFPLElBQUksRUFBRSxDQUFBO0lBQ25DLE1BQU0sS0FBSyxHQUF5QixFQUFFLENBQUE7SUFFdEMsS0FBSyxNQUFNLENBQUMsS0FBSyxFQUFFLEtBQUssQ0FBQyxJQUFJLE1BQU0sQ0FBQyxNQUFNLENBQUMsT0FBTyxFQUFFLEVBQUUsQ0FBQztRQUNyRCxNQUFNLFdBQVcsR0FBRyxVQUFVLENBQUMsS0FBSyxFQUFFLEtBQUssRUFBRSxNQUFNLEVBQUUsWUFBWSxDQUFDLENBQUE7UUFDbEUsSUFBSSxJQUFJLEdBQUcsR0FBRyxDQUFDLElBQUksQ0FBQyxDQUFDLEtBQUssQ0FBQyxLQUFLLENBQUMsSUFBSSxDQUFDLENBQUE7UUFFdEMsSUFBSSxXQUFXLEVBQUUsQ0FBQztZQUNoQixJQUFJLEdBQUcsR0FBRyxDQUFDLFdBQVcsQ0FBQyxJQUFJLENBQUMsQ0FBQyxLQUFLLENBQUMsS0FBSyxDQUFDLElBQUksQ0FBQyxDQUFBO1lBRTlDLDJDQUEyQztZQUMzQyxJQUFJLFdBQVcsQ0FBQyxJQUFJLENBQUMsTUFBTSxDQUFDLFFBQVEsQ0FBQyxNQUFNLENBQUMsRUFBRSxDQUFDO2dCQUM3QyxNQUFNLGFBQWEsR0FBRyxLQUFLLENBQUMsYUFBYSxJQUFJLE1BQU0sQ0FBQyxhQUFhLENBQUE7Z0JBQ2pFLE1BQU0sV0FBVyxHQUFHLEVBQUUsR0FBRyxLQUFLLEVBQUUsYUFBYSxFQUFFLENBQUE7Z0JBQy9DLElBQUksR0FBRyxjQUFjLENBQUMsV0FBVyxFQUFFLElBQUksRUFBRSxFQUFFLFNBQVMsRUFBRSxDQUFDLENBQUE7WUFDekQsQ0FBQztRQUNILENBQUM7UUFFRCxLQUFLLENBQUMsS0FBSyxDQUFDLElBQUksQ0FBQyxHQUFHLElBQUksQ0FBQTtJQUMxQixDQUFDO0lBRUQsT0FBTyxLQUFLLENBQUE7QUFDZCxDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiaW1wb3J0IHR5cGUgeyBTY2hlbWEgfSBmcm9tIFwiQGRwa2l0L2NvcmVcIlxuaW1wb3J0IHR5cGUgeyBFeHByIH0gZnJvbSBcIm5vZGVqcy1wb2xhcnNcIlxuaW1wb3J0IHsgRGF0YVR5cGUgfSBmcm9tIFwibm9kZWpzLXBvbGFyc1wiXG5pbXBvcnQgeyBjb2wsIGxpdCB9IGZyb20gXCJub2RlanMtcG9sYXJzXCJcbmltcG9ydCB7IG1hdGNoRmllbGQgfSBmcm9tIFwiLi4vZmllbGQvaW5kZXgudHNcIlxuaW1wb3J0IHsgbm9ybWFsaXplRmllbGQgfSBmcm9tIFwiLi4vZmllbGQvaW5kZXgudHNcIlxuaW1wb3J0IHsgZ2V0UG9sYXJzU2NoZW1hIH0gZnJvbSBcIi4uL3NjaGVtYS9pbmRleC50c1wiXG5pbXBvcnQgdHlwZSB7IFBvbGFyc1NjaGVtYSB9IGZyb20gXCIuLi9zY2hlbWEvaW5kZXgudHNcIlxuaW1wb3J0IHR5cGUgeyBUYWJsZSB9IGZyb20gXCIuL1RhYmxlLnRzXCJcblxuY29uc3QgSEVBRF9ST1dTID0gMTAwXG5cbmV4cG9ydCBhc3luYyBmdW5jdGlvbiBub3JtYWxpemVUYWJsZShcbiAgdGFibGU6IFRhYmxlLFxuICBzY2hlbWE6IFNjaGVtYSxcbiAgb3B0aW9ucz86IHtcbiAgICBkb250UGFyc2U/OiBib29sZWFuXG4gIH0sXG4pIHtcbiAgY29uc3QgeyBkb250UGFyc2UgfSA9IG9wdGlvbnMgPz8ge31cblxuICBjb25zdCBoZWFkID0gYXdhaXQgdGFibGUuaGVhZChIRUFEX1JPV1MpLmNvbGxlY3QoKVxuICBjb25zdCBwb2xhcnNTY2hlbWEgPSBnZXRQb2xhcnNTY2hlbWEoaGVhZC5zY2hlbWEpXG5cbiAgcmV0dXJuIHRhYmxlLnNlbGVjdChcbiAgICAuLi5PYmplY3QudmFsdWVzKG5vcm1hbGl6ZUZpZWxkcyhzY2hlbWEsIHBvbGFyc1NjaGVtYSwgeyBkb250UGFyc2UgfSkpLFxuICApXG59XG5cbmV4cG9ydCBmdW5jdGlvbiBub3JtYWxpemVGaWVsZHMoXG4gIHNjaGVtYTogU2NoZW1hLFxuICBwb2xhcnNTY2hlbWE6IFBvbGFyc1NjaGVtYSxcbiAgb3B0aW9ucz86IHtcbiAgICBkb250UGFyc2U/OiBib29sZWFuXG4gIH0sXG4pIHtcbiAgY29uc3QgeyBkb250UGFyc2UgfSA9IG9wdGlvbnMgPz8ge31cbiAgY29uc3QgZXhwcnM6IFJlY29yZDxzdHJpbmcsIEV4cHI+ID0ge31cblxuICBmb3IgKGNvbnN0IFtpbmRleCwgZmllbGRdIG9mIHNjaGVtYS5maWVsZHMuZW50cmllcygpKSB7XG4gICAgY29uc3QgcG9sYXJzRmllbGQgPSBtYXRjaEZpZWxkKGluZGV4LCBmaWVsZCwgc2NoZW1hLCBwb2xhcnNTY2hlbWEpXG4gICAgbGV0IGV4cHIgPSBsaXQobnVsbCkuYWxpYXMoZmllbGQubmFtZSlcblxuICAgIGlmIChwb2xhcnNGaWVsZCkge1xuICAgICAgZXhwciA9IGNvbChwb2xhcnNGaWVsZC5uYW1lKS5hbGlhcyhmaWVsZC5uYW1lKVxuXG4gICAgICAvLyBUT0RPOiBNb3ZlIHRoaXMgbG9naWMgdG8gbm9ybWFsaXplRmllbGQ/XG4gICAgICBpZiAocG9sYXJzRmllbGQudHlwZS5lcXVhbHMoRGF0YVR5cGUuU3RyaW5nKSkge1xuICAgICAgICBjb25zdCBtaXNzaW5nVmFsdWVzID0gZmllbGQubWlzc2luZ1ZhbHVlcyA/PyBzY2hlbWEubWlzc2luZ1ZhbHVlc1xuICAgICAgICBjb25zdCBtZXJnZWRGaWVsZCA9IHsgLi4uZmllbGQsIG1pc3NpbmdWYWx1ZXMgfVxuICAgICAgICBleHByID0gbm9ybWFsaXplRmllbGQobWVyZ2VkRmllbGQsIGV4cHIsIHsgZG9udFBhcnNlIH0pXG4gICAgICB9XG4gICAgfVxuXG4gICAgZXhwcnNbZmllbGQubmFtZV0gPSBleHByXG4gIH1cblxuICByZXR1cm4gZXhwcnNcbn1cbiJdfQ==