@dpkit/table
Version:
Data Package implementation in TypeScript.
185 lines • 26.8 kB
JavaScript
import { col } from "nodejs-polars";
import { getPolarsSchema } from "../schema/index.js";
export async function inferSchemaFromTable(table, options) {
const { sampleRows = 100 } = options ?? {};
const sample = await table.head(sampleRows).collect();
return inferSchemaFromSample(sample, options);
}
export function inferSchemaFromSample(sample, options) {
const { confidence = 0.9, fieldTypes, keepStrings } = options ?? {};
const typeMapping = createTypeMapping();
const regexMapping = createRegexMapping(options);
const polarsSchema = getPolarsSchema(sample.schema);
const fieldNames = options?.fieldNames ?? polarsSchema.fields.map(f => f.name);
const failureThreshold = sample.height - Math.floor(sample.height * confidence) || 1;
const schema = {
fields: [],
};
for (const name of fieldNames) {
const polarsField = polarsSchema.fields.find(f => f.name === name);
if (!polarsField) {
throw new Error(`Field "${name}" not found in the table`);
}
// TODO: Remove this workaround once the issue is fixed
// https://github.com/pola-rs/nodejs-polars/issues/372
let variant = polarsField.type.variant;
if (!typeMapping[variant]) {
variant = variant.slice(0, -1);
}
let type = fieldTypes?.[name] ?? typeMapping[variant] ?? "any";
if (type === "array" && options?.arrayType === "list") {
type = "list";
}
let field = { name, type };
if (!keepStrings && type === "string" && !fieldTypes?.[name]) {
for (const [regex, patch] of Object.entries(regexMapping)) {
const failures = sample
.filter(col(name).str.contains(regex).not())
.head(failureThreshold).height;
if (failures < failureThreshold) {
field = { ...field, ...patch };
break;
}
}
}
enhanceField(field, options);
schema.fields.push(field);
}
enhanceSchema(schema, options);
return schema;
}
function createTypeMapping() {
const mapping = {
Array: "array",
Bool: "boolean",
Categorical: "string",
Date: "date",
Datetime: "datetime",
Decimal: "number",
Float32: "number",
Float64: "number",
Int16: "integer",
Int32: "integer",
Int64: "integer",
Int8: "integer",
List: "array",
Null: "any",
Object: "object",
String: "string",
Struct: "object",
Time: "time",
UInt16: "integer",
UInt32: "integer",
UInt64: "integer",
UInt8: "integer",
Utf8: "string",
};
return mapping;
}
function createRegexMapping(options) {
const { commaDecimal, monthFirst } = options ?? {};
const mapping = {
// Numeric
"^\\d+$": { type: "integer" },
"^\\d{1,3}(,\\d{3})+$": commaDecimal
? { type: "number" }
: { type: "integer", groupChar: "," },
"^\\d+\\.\\d+$": commaDecimal
? { type: "integer", groupChar: "." }
: { type: "number" },
"^\\d{1,3}(,\\d{3})+\\.\\d+$": { type: "number", groupChar: "," },
"^\\d{1,3}(\\.\\d{3})+,\\d+$": {
type: "number",
groupChar: ".",
decimalChar: ",",
},
// Boolean
"^(true|True|TRUE|false|False|FALSE)$": { type: "boolean" },
// Date
"^\\d{4}-\\d{2}-\\d{2}$": { type: "date" },
"^\\d{4}/\\d{2}/\\d{2}$": { type: "date", format: "%Y/%m/%d" },
"^\\d{2}/\\d{2}/\\d{4}$": monthFirst
? { type: "date", format: "%m/%d/%Y" }
: { type: "date", format: "%d/%m/%Y" },
"^\\d{2}-\\d{2}-\\d{4}$": monthFirst
? { type: "date", format: "%m-%d-%Y" }
: { type: "date", format: "%d-%m-%Y" },
"^\\d{2}\\.\\d{2}\\.\\d{4}$": monthFirst
? { type: "date", format: "%m.%d.%Y" }
: { type: "date", format: "%d.%m.%Y" },
// Time
"^\\d{2}:\\d{2}:\\d{2}$": { type: "time" },
"^\\d{2}:\\d{2}$": { type: "time", format: "%H:%M" },
"^\\d{1,2}:\\d{2}:\\d{2}\\s*(am|pm|AM|PM)$": {
type: "time",
format: "%I:%M:%S %p",
},
"^\\d{1,2}:\\d{2}\\s*(am|pm|AM|PM)$": { type: "time", format: "%I:%M %p" },
"^\\d{2}:\\d{2}:\\d{2}[+-]\\d{2}:?\\d{2}$": { type: "time" },
// Datetime - ISO format
"^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z?$": { type: "datetime" },
"^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}[+-]\\d{2}:?\\d{2}$": {
type: "datetime",
},
"^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$": {
type: "datetime",
format: "%Y-%m-%d %H:%M:%S",
},
"^\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}$": monthFirst
? { type: "datetime", format: "%m/%d/%Y %H:%M" }
: { type: "datetime", format: "%d/%m/%Y %H:%M" },
"^\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}:\\d{2}$": monthFirst
? { type: "datetime", format: "%m/%d/%Y %H:%M:%S" }
: { type: "datetime", format: "%d/%m/%Y %H:%M:%S" },
// Object
"^\\{": { type: "object" },
// Array
"^\\[": { type: "array" },
// List
// TODO: Support commaDecimal
"^\\d+,\\d+$": { type: "list", itemType: "integer" },
"^[\\d.]+,[\\d.]+$": { type: "list", itemType: "number" },
};
return mapping;
}
function enhanceField(field, options) {
if (field.type === "string") {
field.format = options?.stringFormat ?? field.format;
}
else if (field.type === "integer") {
field.groupChar = options?.groupChar ?? field.groupChar;
field.bareNumber = options?.bareNumber ?? field.bareNumber;
}
else if (field.type === "number") {
field.decimalChar = options?.decimalChar ?? field.decimalChar;
field.groupChar = options?.groupChar ?? field.groupChar;
field.bareNumber = options?.bareNumber ?? field.bareNumber;
}
else if (field.type === "boolean") {
field.trueValues = options?.trueValues ?? field.trueValues;
field.falseValues = options?.falseValues ?? field.falseValues;
}
else if (field.type === "datetime") {
field.format = options?.datetimeFormat ?? field.format;
}
else if (field.type === "date") {
field.format = options?.dateFormat ?? field.format;
}
else if (field.type === "time") {
field.format = options?.timeFormat ?? field.format;
}
else if (field.type === "list") {
field.delimiter = options?.listDelimiter ?? field.delimiter;
field.itemType = options?.listItemType ?? field.itemType;
}
else if (field.type === "geopoint") {
field.format = options?.geopointFormat ?? field.format;
}
else if (field.type === "geojson") {
field.format = options?.geojsonFormat ?? field.format;
}
}
function enhanceSchema(schema, options) {
schema.missingValues = options?.missingValues ?? schema.missingValues;
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"infer.js","sourceRoot":"","sources":["../../schema/infer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,GAAG,EAAE,MAAM,eAAe,CAAA;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AAepD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,KAAY,EACZ,OAA4B;IAE5B,MAAM,EAAE,UAAU,GAAG,GAAG,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;IAE1C,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,OAAO,EAAE,CAAA;IACrD,OAAO,qBAAqB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;AAC/C,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,MAAiB,EACjB,OAAmD;IAEnD,MAAM,EAAE,UAAU,GAAG,GAAG,EAAE,UAAU,EAAE,WAAW,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;IAEnE,MAAM,WAAW,GAAG,iBAAiB,EAAE,CAAA;IACvC,MAAM,YAAY,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAA;IAEhD,MAAM,YAAY,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IACnD,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IAE9E,MAAM,gBAAgB,GACpB,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,CAAA;IAE7D,MAAM,MAAM,GAAW;QACrB,MAAM,EAAE,EAAE;KACX,CAAA;IAED,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAA;QAClE,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,UAAU,IAAI,0BAA0B,CAAC,CAAA;QAC3D,CAAC;QAED,uDAAuD;QACvD,sDAAsD;QACtD,IAAI,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,OAAiB,CAAA;QAChD,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC;QAED,IAAI,IAAI,GAAG,UAAU,EAAE,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,IAAI,KAAK,CAAA;QAE9D,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;YACtD,IAAI,GAAG,MAAM,CAAA;QACf,CAAC;QAED,IAAI,KAAK,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;QAC1B,IAAI,CAAC,WAAW,IAAI,IAAI,KAAK,QAAQ,IAAI,CAAC,UAAU,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7D,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;gBAC1D,MAAM,QAAQ,GAAG,MAAM;qBACpB,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;qBAC3C,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAA;gBAChC,IAAI,QAAQ,GAAG,gBAAgB,EAAE,CAAC;oBAChC,KAAK,GAAG,EAAE,GAAG,KAAK,EAAE,GAAG,KAAK,EAAE,CAAA;oBAC9B,MAAK;gBACP,CAAC;YACH,CAAC;QACH,CAAC;QAED,YAAY,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAC3B,CAAC;IAED,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAC9B,OAAO,MAAM,CAAA;AACf,CAAC;AAED,SAAS,iBAAiB;IACxB,MAAM,OAAO,GAAkC;QAC7C,KAAK,EAAE,OAAO;QACd,IAAI,EAAE,SAAS;QACf,WAAW,EAAE,QAAQ;QACrB,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,QAAQ;QACjB,OAAO,EAAE,QAAQ;QACjB,OAAO,EAAE,QAAQ;QACjB,KAAK,EAAE,SAAS;QAChB,KAAK,EAAE,SAAS;QAChB,KAAK,EAAE,SAAS;QAChB,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,KAAK;QACX,MAAM,EAAE,QAAQ;QAChB,MAAM,EAAE,QAAQ;QAChB,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,MAAM;QACZ,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,SAAS;QACjB,KAAK,EAAE,SAAS;QAChB,IAAI,EAAE,QAAQ;KACf,CAAA;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,SAAS,kBAAkB,CAAC,OAA4B;IACtD,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;IAElD,MAAM,OAAO,GAAmC;QAC9C,UAAU;QACV,QAAQ,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;QAC7B,sBAAsB,EAAE,YAAY;YAClC,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE;YACpB,CAAC,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,EAAE;QACvC,eAAe,EAAE,YAAY;YAC3B,CAAC,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,EAAE;YACrC,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE;QACtB,6BAA6B,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,EAAE;QACjE,6BAA6B,EAAE;YAC7B,IAAI,EAAE,QAAQ;YACd,SAAS,EAAE,GAAG;YACd,WAAW,EAAE,GAAG;SACjB;QAED,UAAU;QACV,sCAAsC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;QAE3D,OAAO;QACP,wBAAwB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;QAC1C,wBAAwB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;QAC9D,wBAAwB,EAAE,UAAU;YAClC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;YACtC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;QACxC,wBAAwB,EAAE,UAAU;YAClC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;YACtC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;QACxC,4BAA4B,EAAE,UAAU;YACtC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;YACtC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;QAExC,OAAO;QACP,wBAAwB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;QAC1C,iBAAiB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE;QACpD,2CAA2C,EAAE;YAC3C,IAAI,EAAE,MAAM;YACZ,MAAM,EAAE,aAAa;SACtB;QACD,oCAAoC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE;QAC1E,0CAA0C,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;QAE5D,wBAAwB;QACxB,+CAA+C,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE;QACrE,+DAA+D,EAAE;YAC/D,IAAI,EAAE,UAAU;SACjB;QACD,6CAA6C,EAAE;YAC7C,IAAI,EAAE,UAAU;YAChB,MAAM,EAAE,mBAAmB;SAC5B;QACD,sCAAsC,EAAE,UAAU;YAChD,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,gBAAgB,EAAE;YAChD,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,gBAAgB,EAAE;QAClD,6CAA6C,EAAE,UAAU;YACvD,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,mBAAmB,EAAE;YACnD,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,mBAAmB,EAAE;QAErD,SAAS;QACT,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;QAE1B,QAAQ;QACR,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE;QAEzB,OAAO;QACP,6BAA6B;QAC7B,aAAa,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE;QACpD,mBAAmB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE;KAC1D,CAAA;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,KAAY,EAAE,OAA4B;IAC9D,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC5B,KAAK,CAAC,MAAM,GAAG,OAAO,EAAE,YAAY,IAAI,KAAK,CAAC,MAAM,CAAA;IACtD,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACpC,KAAK,CAAC,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,KAAK,CAAC,SAAS,CAAA;QACvD,KAAK,CAAC,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,KAAK,CAAC,UAAU,CAAA;IAC5D,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QACnC,KAAK,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,KAAK,CAAC,WAAW,CAAA;QAC7D,KAAK,CAAC,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,KAAK,CAAC,SAAS,CAAA;QACvD,KAAK,CAAC,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,KAAK,CAAC,UAAU,CAAA;IAC5D,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACpC,KAAK,CAAC,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,KAAK,CAAC,UAAU,CAAA;QAC1D,KAAK,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,KAAK,CAAC,WAAW,CAAA;IAC/D,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;QACrC,KAAK,CAAC,MAAM,GAAG,OAAO,EAAE,cAAc,IAAI,KAAK,CAAC,MAAM,CAAA;IACxD,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACjC,KAAK,CAAC,MAAM,GAAG,OAAO,EAAE,UAAU,IAAI,KAAK,CAAC,MAAM,CAAA;IACpD,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACjC,KAAK,CAAC,MAAM,GAAG,OAAO,EAAE,UAAU,IAAI,KAAK,CAAC,MAAM,CAAA;IACpD,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACjC,KAAK,CAAC,SAAS,GAAG,OAAO,EAAE,aAAa,IAAI,KAAK,CAAC,SAAS,CAAA;QAC3D,KAAK,CAAC,QAAQ,GAAG,OAAO,EAAE,YAAY,IAAI,KAAK,CAAC,QAAQ,CAAA;IAC1D,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;QACrC,KAAK,CAAC,MAAM,GAAG,OAAO,EAAE,cAAc,IAAI,KAAK,CAAC,MAAM,CAAA;IACxD,CAAC;SAAM,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACpC,KAAK,CAAC,MAAM,GAAG,OAAO,EAAE,aAAa,IAAI,KAAK,CAAC,MAAM,CAAA;IACvD,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,MAAc,EAAE,OAA4B;IACjE,MAAM,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,MAAM,CAAC,aAAa,CAAA;AACvE,CAAC","sourcesContent":["import type { Field, Schema } from \"@dpkit/core\"\nimport type { DataFrame } from \"nodejs-polars\"\nimport { col } from \"nodejs-polars\"\nimport { getPolarsSchema } from \"../schema/index.ts\"\nimport type { Table } from \"../table/index.ts\"\nimport type { SchemaOptions } from \"./Options.ts\"\n\n// TODO: Implement actual options usage for inferring\n// TODO: Review default values being {fields: []} vs undefined\n\nexport interface InferSchemaOptions extends SchemaOptions {\n  sampleRows?: number\n  confidence?: number\n  commaDecimal?: boolean\n  monthFirst?: boolean\n  keepStrings?: boolean\n}\n\nexport async function inferSchemaFromTable(\n  table: Table,\n  options?: InferSchemaOptions,\n) {\n  const { sampleRows = 100 } = options ?? {}\n\n  const sample = await table.head(sampleRows).collect()\n  return inferSchemaFromSample(sample, options)\n}\n\nexport function inferSchemaFromSample(\n  sample: DataFrame,\n  options?: Exclude<InferSchemaOptions, \"sampleRows\">,\n) {\n  const { confidence = 0.9, fieldTypes, keepStrings } = options ?? {}\n\n  const typeMapping = createTypeMapping()\n  const regexMapping = createRegexMapping(options)\n\n  const polarsSchema = getPolarsSchema(sample.schema)\n  const fieldNames = options?.fieldNames ?? polarsSchema.fields.map(f => f.name)\n\n  const failureThreshold =\n    sample.height - Math.floor(sample.height * confidence) || 1\n\n  const schema: Schema = {\n    fields: [],\n  }\n\n  for (const name of fieldNames) {\n    const polarsField = polarsSchema.fields.find(f => f.name === name)\n    if (!polarsField) {\n      throw new Error(`Field \"${name}\" not found in the table`)\n    }\n\n    // TODO: Remove this workaround once the issue is fixed\n    // https://github.com/pola-rs/nodejs-polars/issues/372\n    let variant = polarsField.type.variant as string\n    if (!typeMapping[variant]) {\n      variant = variant.slice(0, -1)\n    }\n\n    let type = fieldTypes?.[name] ?? typeMapping[variant] ?? \"any\"\n\n    if (type === \"array\" && options?.arrayType === \"list\") {\n      type = \"list\"\n    }\n\n    let field = { name, type }\n    if (!keepStrings && type === \"string\" && !fieldTypes?.[name]) {\n      for (const [regex, patch] of Object.entries(regexMapping)) {\n        const failures = sample\n          .filter(col(name).str.contains(regex).not())\n          .head(failureThreshold).height\n        if (failures < failureThreshold) {\n          field = { ...field, ...patch }\n          break\n        }\n      }\n    }\n\n    enhanceField(field, options)\n    schema.fields.push(field)\n  }\n\n  enhanceSchema(schema, options)\n  return schema\n}\n\nfunction createTypeMapping() {\n  const mapping: Record<string, Field[\"type\"]> = {\n    Array: \"array\",\n    Bool: \"boolean\",\n    Categorical: \"string\",\n    Date: \"date\",\n    Datetime: \"datetime\",\n    Decimal: \"number\",\n    Float32: \"number\",\n    Float64: \"number\",\n    Int16: \"integer\",\n    Int32: \"integer\",\n    Int64: \"integer\",\n    Int8: \"integer\",\n    List: \"array\",\n    Null: \"any\",\n    Object: \"object\",\n    String: \"string\",\n    Struct: \"object\",\n    Time: \"time\",\n    UInt16: \"integer\",\n    UInt32: \"integer\",\n    UInt64: \"integer\",\n    UInt8: \"integer\",\n    Utf8: \"string\",\n  }\n\n  return mapping\n}\n\nfunction createRegexMapping(options?: InferSchemaOptions) {\n  const { commaDecimal, monthFirst } = options ?? {}\n\n  const mapping: Record<string, Partial<Field>> = {\n    // Numeric\n    \"^\\\\d+$\": { type: \"integer\" },\n    \"^\\\\d{1,3}(,\\\\d{3})+$\": commaDecimal\n      ? { type: \"number\" }\n      : { type: \"integer\", groupChar: \",\" },\n    \"^\\\\d+\\\\.\\\\d+$\": commaDecimal\n      ? { type: \"integer\", groupChar: \".\" }\n      : { type: \"number\" },\n    \"^\\\\d{1,3}(,\\\\d{3})+\\\\.\\\\d+$\": { type: \"number\", groupChar: \",\" },\n    \"^\\\\d{1,3}(\\\\.\\\\d{3})+,\\\\d+$\": {\n      type: \"number\",\n      groupChar: \".\",\n      decimalChar: \",\",\n    },\n\n    // Boolean\n    \"^(true|True|TRUE|false|False|FALSE)$\": { type: \"boolean\" },\n\n    // Date\n    \"^\\\\d{4}-\\\\d{2}-\\\\d{2}$\": { type: \"date\" },\n    \"^\\\\d{4}/\\\\d{2}/\\\\d{2}$\": { type: \"date\", format: \"%Y/%m/%d\" },\n    \"^\\\\d{2}/\\\\d{2}/\\\\d{4}$\": monthFirst\n      ? { type: \"date\", format: \"%m/%d/%Y\" }\n      : { type: \"date\", format: \"%d/%m/%Y\" },\n    \"^\\\\d{2}-\\\\d{2}-\\\\d{4}$\": monthFirst\n      ? { type: \"date\", format: \"%m-%d-%Y\" }\n      : { type: \"date\", format: \"%d-%m-%Y\" },\n    \"^\\\\d{2}\\\\.\\\\d{2}\\\\.\\\\d{4}$\": monthFirst\n      ? { type: \"date\", format: \"%m.%d.%Y\" }\n      : { type: \"date\", format: \"%d.%m.%Y\" },\n\n    // Time\n    \"^\\\\d{2}:\\\\d{2}:\\\\d{2}$\": { type: \"time\" },\n    \"^\\\\d{2}:\\\\d{2}$\": { type: \"time\", format: \"%H:%M\" },\n    \"^\\\\d{1,2}:\\\\d{2}:\\\\d{2}\\\\s*(am|pm|AM|PM)$\": {\n      type: \"time\",\n      format: \"%I:%M:%S %p\",\n    },\n    \"^\\\\d{1,2}:\\\\d{2}\\\\s*(am|pm|AM|PM)$\": { type: \"time\", format: \"%I:%M %p\" },\n    \"^\\\\d{2}:\\\\d{2}:\\\\d{2}[+-]\\\\d{2}:?\\\\d{2}$\": { type: \"time\" },\n\n    // Datetime - ISO format\n    \"^\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}Z?$\": { type: \"datetime\" },\n    \"^\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}[+-]\\\\d{2}:?\\\\d{2}$\": {\n      type: \"datetime\",\n    },\n    \"^\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}$\": {\n      type: \"datetime\",\n      format: \"%Y-%m-%d %H:%M:%S\",\n    },\n    \"^\\\\d{2}/\\\\d{2}/\\\\d{4} \\\\d{2}:\\\\d{2}$\": monthFirst\n      ? { type: \"datetime\", format: \"%m/%d/%Y %H:%M\" }\n      : { type: \"datetime\", format: \"%d/%m/%Y %H:%M\" },\n    \"^\\\\d{2}/\\\\d{2}/\\\\d{4} \\\\d{2}:\\\\d{2}:\\\\d{2}$\": monthFirst\n      ? { type: \"datetime\", format: \"%m/%d/%Y %H:%M:%S\" }\n      : { type: \"datetime\", format: \"%d/%m/%Y %H:%M:%S\" },\n\n    // Object\n    \"^\\\\{\": { type: \"object\" },\n\n    // Array\n    \"^\\\\[\": { type: \"array\" },\n\n    // List\n    // TODO: Support commaDecimal\n    \"^\\\\d+,\\\\d+$\": { type: \"list\", itemType: \"integer\" },\n    \"^[\\\\d.]+,[\\\\d.]+$\": { type: \"list\", itemType: \"number\" },\n  }\n\n  return mapping\n}\n\nfunction enhanceField(field: Field, options?: InferSchemaOptions) {\n  if (field.type === \"string\") {\n    field.format = options?.stringFormat ?? field.format\n  } else if (field.type === \"integer\") {\n    field.groupChar = options?.groupChar ?? field.groupChar\n    field.bareNumber = options?.bareNumber ?? field.bareNumber\n  } else if (field.type === \"number\") {\n    field.decimalChar = options?.decimalChar ?? field.decimalChar\n    field.groupChar = options?.groupChar ?? field.groupChar\n    field.bareNumber = options?.bareNumber ?? field.bareNumber\n  } else if (field.type === \"boolean\") {\n    field.trueValues = options?.trueValues ?? field.trueValues\n    field.falseValues = options?.falseValues ?? field.falseValues\n  } else if (field.type === \"datetime\") {\n    field.format = options?.datetimeFormat ?? field.format\n  } else if (field.type === \"date\") {\n    field.format = options?.dateFormat ?? field.format\n  } else if (field.type === \"time\") {\n    field.format = options?.timeFormat ?? field.format\n  } else if (field.type === \"list\") {\n    field.delimiter = options?.listDelimiter ?? field.delimiter\n    field.itemType = options?.listItemType ?? field.itemType\n  } else if (field.type === \"geopoint\") {\n    field.format = options?.geopointFormat ?? field.format\n  } else if (field.type === \"geojson\") {\n    field.format = options?.geojsonFormat ?? field.format\n  }\n}\n\nfunction enhanceSchema(schema: Schema, options?: InferSchemaOptions) {\n  schema.missingValues = options?.missingValues ?? schema.missingValues\n}\n"]}