@dpkit/table
Version:
Data Package implementation in TypeScript.
31 lines • 5.7 kB
JavaScript
import { DataType } from "nodejs-polars";
import { col, lit } from "nodejs-polars";
import { matchField } from "../field/index.js";
import { parseField } from "../field/index.js";
import { getPolarsSchema } from "../schema/index.js";
const HEAD_ROWS = 100;
export async function normalizeTable(table, schema, options) {
const { noParse } = options ?? {};
const head = await table.head(HEAD_ROWS).collect();
const polarsSchema = getPolarsSchema(head.schema);
return table.select(Object.values(normalizeFields(schema, polarsSchema, { noParse })));
}
export function normalizeFields(schema, polarsSchema, options) {
const { noParse } = options ?? {};
const exprs = {};
for (const [index, field] of schema.fields.entries()) {
const polarsField = matchField(index, field, schema, polarsSchema);
let expr = lit(null).alias(field.name);
if (polarsField) {
expr = col(polarsField.name).alias(field.name);
if (!noParse && polarsField.type.equals(DataType.String)) {
const missingValues = field.missingValues ?? schema.missingValues;
const mergedField = { ...field, missingValues };
expr = parseField(mergedField, expr);
}
}
exprs[field.name] = expr;
}
return exprs;
}
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibm9ybWFsaXplLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vdGFibGUvbm9ybWFsaXplLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUVBLE9BQU8sRUFBRSxRQUFRLEVBQUUsTUFBTSxlQUFlLENBQUE7QUFDeEMsT0FBTyxFQUFFLEdBQUcsRUFBRSxHQUFHLEVBQUUsTUFBTSxlQUFlLENBQUE7QUFDeEMsT0FBTyxFQUFFLFVBQVUsRUFBRSxNQUFNLG1CQUFtQixDQUFBO0FBQzlDLE9BQU8sRUFBRSxVQUFVLEVBQUUsTUFBTSxtQkFBbUIsQ0FBQTtBQUM5QyxPQUFPLEVBQUUsZUFBZSxFQUFFLE1BQU0sb0JBQW9CLENBQUE7QUFJcEQsTUFBTSxTQUFTLEdBQUcsR0FBRyxDQUFBO0FBRXJCLE1BQU0sQ0FBQyxLQUFLLFVBQVUsY0FBYyxDQUNsQyxLQUFZLEVBQ1osTUFBYyxFQUNkLE9BRUM7SUFFRCxNQUFNLEVBQUUsT0FBTyxFQUFFLEdBQUcsT0FBTyxJQUFJLEVBQUUsQ0FBQTtJQUVqQyxNQUFNLElBQUksR0FBRyxNQUFNLEtBQUssQ0FBQyxJQUFJLENBQUMsU0FBUyxDQUFDLENBQUMsT0FBTyxFQUFFLENBQUE7SUFDbEQsTUFBTSxZQUFZLEdBQUcsZUFBZSxDQUFDLElBQUksQ0FBQyxNQUFNLENBQUMsQ0FBQTtJQUVqRCxPQUFPLEtBQUssQ0FBQyxNQUFNLENBQ2pCLE1BQU0sQ0FBQyxNQUFNLENBQUMsZUFBZSxDQUFDLE1BQU0sRUFBRSxZQUFZLEVBQUUsRUFBRSxPQUFPLEVBQUUsQ0FBQyxDQUFDLENBQ2xFLENBQUE7QUFDSCxDQUFDO0FBRUQsTUFBTSxVQUFVLGVBQWUsQ0FDN0IsTUFBYyxFQUNkLFlBQTBCLEVBQzFCLE9BRUM7SUFFRCxNQUFNLEVBQUUsT0FBTyxFQUFFLEdBQUcsT0FBTyxJQUFJLEVBQUUsQ0FBQTtJQUNqQyxNQUFNLEtBQUssR0FBeUIsRUFBRSxDQUFBO0lBRXRDLEtBQUssTUFBTSxDQUFDLEtBQUssRUFBRSxLQUFLLENBQUMsSUFBSSxNQUFNLENBQUMsTUFBTSxDQUFDLE9BQU8sRUFBRSxFQUFFLENBQUM7UUFDckQsTUFBTSxXQUFXLEdBQUcsVUFBVSxDQUFDLEtBQUssRUFBRSxLQUFLLEVBQUUsTUFBTSxFQUFFLFlBQVksQ0FBQyxDQUFBO1FBQ2xFLElBQUksSUFBSSxHQUFHLEdBQUcsQ0FBQyxJQUFJLENBQUMsQ0FBQyxLQUFLLENBQUMsS0FBSyxDQUFDLElBQUksQ0FBQyxDQUFBO1FBRXRDLElBQUksV0FBVyxFQUFFLENBQUM7WUFDaEIsSUFBSSxHQUFHLEdBQUcsQ0FBQyxXQUFXLENBQUMsSUFBSSxDQUFDLENBQUMsS0FBSyxDQUFDLEtBQUssQ0FBQyxJQUFJLENBQUMsQ0FBQTtZQUU5QyxJQUFJLENBQUMsT0FBTyxJQUFJLFdBQVcsQ0FBQyxJQUFJLENBQUMsTUFBTSxDQUFDLFFBQVEsQ0FBQyxNQUFNLENBQUMsRUFBRSxDQUFDO2dCQUN6RCxNQUFNLGFBQWEsR0FBRyxLQUFLLENBQUMsYUFBYSxJQUFJLE1BQU0sQ0FBQyxhQUFhLENBQUE7Z0JBQ2pFLE1BQU0sV0FBVyxHQUFHLEVBQUUsR0FBRyxLQUFLLEVBQUUsYUFBYSxFQUFFLENBQUE7Z0JBQy9DLElBQUksR0FBRyxVQUFVLENBQUMsV0FBVyxFQUFFLElBQUksQ0FBQyxDQUFBO1lBQ3RDLENBQUM7UUFDSCxDQUFDO1FBRUQsS0FBSyxDQUFDLEtBQUssQ0FBQyxJQUFJLENBQUMsR0FBRyxJQUFJLENBQUE7SUFDMUIsQ0FBQztJQUVELE9BQU8sS0FBSyxDQUFBO0FBQ2QsQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbImltcG9ydCB0eXBlIHsgU2NoZW1hIH0gZnJvbSBcIkBkcGtpdC9jb3JlXCJcbmltcG9ydCB0eXBlIHsgRXhwciB9IGZyb20gXCJub2RlanMtcG9sYXJzXCJcbmltcG9ydCB7IERhdGFUeXBlIH0gZnJvbSBcIm5vZGVqcy1wb2xhcnNcIlxuaW1wb3J0IHsgY29sLCBsaXQgfSBmcm9tIFwibm9kZWpzLXBvbGFyc1wiXG5pbXBvcnQgeyBtYXRjaEZpZWxkIH0gZnJvbSBcIi4uL2ZpZWxkL2luZGV4LnRzXCJcbmltcG9ydCB7IHBhcnNlRmllbGQgfSBmcm9tIFwiLi4vZmllbGQvaW5kZXgudHNcIlxuaW1wb3J0IHsgZ2V0UG9sYXJzU2NoZW1hIH0gZnJvbSBcIi4uL3NjaGVtYS9pbmRleC50c1wiXG5pbXBvcnQgdHlwZSB7IFBvbGFyc1NjaGVtYSB9IGZyb20gXCIuLi9zY2hlbWEvaW5kZXgudHNcIlxuaW1wb3J0IHR5cGUgeyBUYWJsZSB9IGZyb20gXCIuL1RhYmxlLnRzXCJcblxuY29uc3QgSEVBRF9ST1dTID0gMTAwXG5cbmV4cG9ydCBhc3luYyBmdW5jdGlvbiBub3JtYWxpemVUYWJsZShcbiAgdGFibGU6IFRhYmxlLFxuICBzY2hlbWE6IFNjaGVtYSxcbiAgb3B0aW9ucz86IHtcbiAgICBub1BhcnNlPzogYm9vbGVhblxuICB9LFxuKSB7XG4gIGNvbnN0IHsgbm9QYXJzZSB9ID0gb3B0aW9ucyA/PyB7fVxuXG4gIGNvbnN0IGhlYWQgPSBhd2FpdCB0YWJsZS5oZWFkKEhFQURfUk9XUykuY29sbGVjdCgpXG4gIGNvbnN0IHBvbGFyc1NjaGVtYSA9IGdldFBvbGFyc1NjaGVtYShoZWFkLnNjaGVtYSlcblxuICByZXR1cm4gdGFibGUuc2VsZWN0KFxuICAgIE9iamVjdC52YWx1ZXMobm9ybWFsaXplRmllbGRzKHNjaGVtYSwgcG9sYXJzU2NoZW1hLCB7IG5vUGFyc2UgfSkpLFxuICApXG59XG5cbmV4cG9ydCBmdW5jdGlvbiBub3JtYWxpemVGaWVsZHMoXG4gIHNjaGVtYTogU2NoZW1hLFxuICBwb2xhcnNTY2hlbWE6IFBvbGFyc1NjaGVtYSxcbiAgb3B0aW9ucz86IHtcbiAgICBub1BhcnNlPzogYm9vbGVhblxuICB9LFxuKSB7XG4gIGNvbnN0IHsgbm9QYXJzZSB9ID0gb3B0aW9ucyA/PyB7fVxuICBjb25zdCBleHByczogUmVjb3JkPHN0cmluZywgRXhwcj4gPSB7fVxuXG4gIGZvciAoY29uc3QgW2luZGV4LCBmaWVsZF0gb2Ygc2NoZW1hLmZpZWxkcy5lbnRyaWVzKCkpIHtcbiAgICBjb25zdCBwb2xhcnNGaWVsZCA9IG1hdGNoRmllbGQoaW5kZXgsIGZpZWxkLCBzY2hlbWEsIHBvbGFyc1NjaGVtYSlcbiAgICBsZXQgZXhwciA9IGxpdChudWxsKS5hbGlhcyhmaWVsZC5uYW1lKVxuXG4gICAgaWYgKHBvbGFyc0ZpZWxkKSB7XG4gICAgICBleHByID0gY29sKHBvbGFyc0ZpZWxkLm5hbWUpLmFsaWFzKGZpZWxkLm5hbWUpXG5cbiAgICAgIGlmICghbm9QYXJzZSAmJiBwb2xhcnNGaWVsZC50eXBlLmVxdWFscyhEYXRhVHlwZS5TdHJpbmcpKSB7XG4gICAgICAgIGNvbnN0IG1pc3NpbmdWYWx1ZXMgPSBmaWVsZC5taXNzaW5nVmFsdWVzID8/IHNjaGVtYS5taXNzaW5nVmFsdWVzXG4gICAgICAgIGNvbnN0IG1lcmdlZEZpZWxkID0geyAuLi5maWVsZCwgbWlzc2luZ1ZhbHVlcyB9XG4gICAgICAgIGV4cHIgPSBwYXJzZUZpZWxkKG1lcmdlZEZpZWxkLCBleHByKVxuICAgICAgfVxuICAgIH1cblxuICAgIGV4cHJzW2ZpZWxkLm5hbWVdID0gZXhwclxuICB9XG5cbiAgcmV0dXJuIGV4cHJzXG59XG4iXX0=