UNPKG

@dpkit/table

Version:

Data Package implementation in TypeScript.

316 lines (267 loc) 8.85 kB
import { DataFrame, Series } from "nodejs-polars" import { DataType } from "nodejs-polars" import { describe, expect, it } from "vitest" import { inferSchemaFromTable } from "./infer.ts" describe("inferSchemaFromTable", () => { it("should infer from native types", async () => { const table = DataFrame({ integer: Series("integer", [1, 2], DataType.Int32), number: [1.1, 2.2], }).lazy() const schema = { fields: [ { name: "integer", type: "integer" }, { name: "number", type: "number" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer numeric", async () => { const table = DataFrame({ name1: ["1", "2", "3"], name2: ["1,000", "2,000", "3,000"], name3: ["1.1", "2.2", "3.3"], name4: ["1,000.1", "2,000.2", "3,000.3"], }).lazy() const schema = { fields: [ { name: "name1", type: "integer" }, { name: "name2", type: "integer", groupChar: "," }, { name: "name3", type: "number" }, { name: "name4", type: "number", groupChar: "," }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer numeric (commaDecimal)", async () => { const table = DataFrame({ name1: ["1.000", "2.000", "3.000"], name2: ["1.000,5", "2.000,5", "3.000,5"], }).lazy() const schema = { fields: [ { name: "name1", type: "integer", groupChar: "." }, { name: "name2", type: "number", decimalChar: ",", groupChar: "." }, ], } expect(await inferSchemaFromTable(table, { commaDecimal: true })).toEqual( schema, ) }) it("should infer booleans", async () => { const table = DataFrame({ name1: ["true", "True", "TRUE"], name2: ["false", "False", "FALSE"], }).lazy() const schema = { fields: [ { name: "name1", type: "boolean" }, { name: "name2", type: "boolean" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer objects", async () => { const table = DataFrame({ name1: ['{"a": 1}'], name2: ["{}"], }).lazy() const schema = { fields: [ { name: "name1", type: "object" }, { name: "name2", type: "object" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer arrays", async () => { const table = DataFrame({ name1: ["[1,2,3]"], name2: ["[]"], }).lazy() const schema = { fields: [ { name: "name1", type: "array" }, { name: "name2", type: "array" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer dates with ISO format", async () => { const table = DataFrame({ name1: ["2023-01-15", "2023-02-20", "2023-03-25"], }).lazy() const schema = { fields: [{ name: "name1", type: "date" }], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer dates with slash format", async () => { const table = DataFrame({ yearFirst: ["2023/01/15", "2023/02/20", "2023/03/25"], dayMonth: ["15/01/2023", "20/02/2023", "25/03/2023"], monthDay: ["01/15/2023", "02/20/2023", "03/25/2023"], }).lazy() const schemaDefault = { fields: [ { name: "yearFirst", type: "date", format: "%Y/%m/%d" }, { name: "dayMonth", type: "date", format: "%d/%m/%Y" }, { name: "monthDay", type: "date", format: "%d/%m/%Y" }, ], } const schemaMonthFirst = { fields: [ { name: "yearFirst", type: "date", format: "%Y/%m/%d" }, { name: "dayMonth", type: "date", format: "%m/%d/%Y" }, { name: "monthDay", type: "date", format: "%m/%d/%Y" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schemaDefault) expect(await inferSchemaFromTable(table, { monthFirst: true })).toEqual( schemaMonthFirst, ) }) it("should infer dates with hyphen format", async () => { const table = DataFrame({ dayMonth: ["15-01-2023", "20-02-2023", "25-03-2023"], }).lazy() const schemaDefault = { fields: [{ name: "dayMonth", type: "date", format: "%d-%m-%Y" }], } const schemaMonthFirst = { fields: [{ name: "dayMonth", type: "date", format: "%m-%d-%Y" }], } expect(await inferSchemaFromTable(table)).toEqual(schemaDefault) expect(await inferSchemaFromTable(table, { monthFirst: true })).toEqual( schemaMonthFirst, ) }) it("should infer times with standard format", async () => { const table = DataFrame({ fullTime: ["14:30:45", "08:15:30", "23:59:59"], shortTime: ["14:30", "08:15", "23:59"], }).lazy() const schema = { fields: [ { name: "fullTime", type: "time" }, { name: "shortTime", type: "time", format: "%H:%M" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer times with 12-hour format", async () => { const table = DataFrame({ fullTime: ["2:30:45 PM", "8:15:30 AM", "11:59:59 PM"], shortTime: ["2:30 PM", "8:15 AM", "11:59 PM"], }).lazy() const schema = { fields: [ { name: "fullTime", type: "time", format: "%I:%M:%S %p" }, { name: "shortTime", type: "time", format: "%I:%M %p" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer times with timezone offset", async () => { const table = DataFrame({ name: ["14:30:45+01:00", "08:15:30-05:00", "23:59:59+00:00"], }).lazy() const schema = { fields: [{ name: "name", type: "time" }], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer datetimes with ISO format", async () => { const table = DataFrame({ standard: [ "2023-01-15T14:30:45", "2023-02-20T08:15:30", "2023-03-25T23:59:59", ], utc: [ "2023-01-15T14:30:45Z", "2023-02-20T08:15:30Z", "2023-03-25T23:59:59Z", ], withTz: [ "2023-01-15T14:30:45+01:00", "2023-02-20T08:15:30-05:00", "2023-03-25T23:59:59+00:00", ], withSpace: [ "2023-01-15 14:30:45", "2023-02-20 08:15:30", "2023-03-25 23:59:59", ], }).lazy() const schema = { fields: [ { name: "standard", type: "datetime" }, { name: "utc", type: "datetime" }, { name: "withTz", type: "datetime" }, { name: "withSpace", type: "datetime", format: "%Y-%m-%d %H:%M:%S" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schema) }) it("should infer datetimes with custom formats", async () => { const table = DataFrame({ shortDayMonth: [ "15/01/2023 14:30", "20/02/2023 08:15", "25/03/2023 23:59", ], fullDayMonth: [ "15/01/2023 14:30:45", "20/02/2023 08:15:30", "25/03/2023 23:59:59", ], shortMonthDay: [ "01/15/2023 14:30", "02/20/2023 08:15", "03/25/2023 23:59", ], fullMonthDay: [ "01/15/2023 14:30:45", "02/20/2023 08:15:30", "03/25/2023 23:59:59", ], }).lazy() const schemaDefault = { fields: [ { name: "shortDayMonth", type: "datetime", format: "%d/%m/%Y %H:%M" }, { name: "fullDayMonth", type: "datetime", format: "%d/%m/%Y %H:%M:%S" }, { name: "shortMonthDay", type: "datetime", format: "%d/%m/%Y %H:%M" }, { name: "fullMonthDay", type: "datetime", format: "%d/%m/%Y %H:%M:%S" }, ], } const schemaMonthFirst = { fields: [ { name: "shortDayMonth", type: "datetime", format: "%m/%d/%Y %H:%M" }, { name: "fullDayMonth", type: "datetime", format: "%m/%d/%Y %H:%M:%S" }, { name: "shortMonthDay", type: "datetime", format: "%m/%d/%Y %H:%M" }, { name: "fullMonthDay", type: "datetime", format: "%m/%d/%Y %H:%M:%S" }, ], } expect(await inferSchemaFromTable(table)).toEqual(schemaDefault) expect(await inferSchemaFromTable(table, { monthFirst: true })).toEqual( schemaMonthFirst, ) }) it("should infer lists", async () => { const table = DataFrame({ numericList: ["1.5,2.3", "4.1,5.9", "7.2,8.6"], integerList: ["1,2", "3,4", "5,6"], singleValue: ["1.5", "2.3", "4.1"], }).lazy() const schema = { fields: [ { name: "numericList", type: "list", itemType: "number" }, { name: "integerList", type: "list", itemType: "integer" }, { name: "singleValue", type: "number" }, ], missingValues: undefined, } expect(await inferSchemaFromTable(table)).toEqual(schema) }) })