@dpkit/table
Version:
Data Package implementation in TypeScript.
239 lines (200 loc) • 5.91 kB
text/typescript
import type { Schema } from "@dpkit/core"
import { DataFrame } from "nodejs-polars"
import { describe, expect, it } from "vitest"
import { inspectTable } from "../../table/index.ts"
describe("inspectTable", () => {
it("should inspect string to integer conversion errors", async () => {
const table = DataFrame({
id: ["1", "bad", "3", "4x"],
}).lazy()
const schema: Schema = {
fields: [{ name: "id", type: "integer" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(2)
expect(errors).toContainEqual({
type: "cell/type",
cell: "bad",
fieldName: "id",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "4x",
fieldName: "id",
rowNumber: 4,
})
})
it("should inspect string to number conversion errors", async () => {
const table = DataFrame({
price: ["10.5", "twenty", "30.75", "$40"],
}).lazy()
const schema: Schema = {
fields: [{ name: "price", type: "number" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(2)
expect(errors).toContainEqual({
type: "cell/type",
cell: "twenty",
fieldName: "price",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "$40",
fieldName: "price",
rowNumber: 4,
})
})
it("should inspect string to boolean conversion errors", async () => {
const table = DataFrame({
active: ["true", "yes", "false", "0", "1"],
}).lazy()
const schema: Schema = {
fields: [{ name: "active", type: "boolean" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(1)
expect(errors).toContainEqual({
type: "cell/type",
cell: "yes",
fieldName: "active",
rowNumber: 2,
})
})
it("should inspect string to date conversion errors", async () => {
const table = DataFrame({
created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"],
}).lazy()
const schema: Schema = {
fields: [{ name: "created", type: "date" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(3)
expect(errors).toContainEqual({
type: "cell/type",
cell: "Jan 15, 2023",
fieldName: "created",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "20230115",
fieldName: "created",
rowNumber: 3,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "not-a-date",
fieldName: "created",
rowNumber: 4,
})
})
it("should inspect string to time conversion errors", async () => {
const table = DataFrame({
time: ["14:30:00", "2:30pm", "invalid", "14h30"],
}).lazy()
const schema: Schema = {
fields: [{ name: "time", type: "time" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(3)
expect(errors).toContainEqual({
type: "cell/type",
cell: "2:30pm",
fieldName: "time",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "invalid",
fieldName: "time",
rowNumber: 3,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "14h30",
fieldName: "time",
rowNumber: 4,
})
})
it("should inspect string to year conversion errors", async () => {
const table = DataFrame({
year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"],
}).lazy()
const schema: Schema = {
fields: [{ name: "year", type: "year" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(3)
expect(errors).toContainEqual({
type: "cell/type",
cell: "23",
fieldName: "year",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "MMXXIII",
fieldName: "year",
rowNumber: 3,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "two-thousand-twenty-three",
fieldName: "year",
rowNumber: 4,
})
})
it("should inspect string to datetime conversion errors", async () => {
const table = DataFrame({
timestamp: [
"2023-01-15T14:30:00",
"January 15, 2023 2:30 PM",
"2023-01-15 14:30",
"not-a-datetime",
],
}).lazy()
const schema: Schema = {
fields: [{ name: "timestamp", type: "datetime" }],
}
const errors = await inspectTable(table, { schema })
// Adjust the expectations to match actual behavior
expect(errors.length).toBeGreaterThan(0)
// Check for specific invalid values we expect to fail
expect(errors).toContainEqual({
type: "cell/type",
cell: "January 15, 2023 2:30 PM",
fieldName: "timestamp",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "not-a-datetime",
fieldName: "timestamp",
rowNumber: 4,
})
})
it("should pass validation when all cells are valid", async () => {
const table = DataFrame({
id: ["1", "2", "3", "4"],
}).lazy()
const schema: Schema = {
fields: [{ name: "id", type: "integer" }],
}
const errors = await inspectTable(table, { schema })
expect(errors).toHaveLength(0)
})
it("should inspect with non-string source data", async () => {
const table = DataFrame({
is_active: [true, false, 1, 0],
}).lazy()
const schema: Schema = {
fields: [{ name: "is_active", type: "boolean" }],
}
const errors = await inspectTable(table, { schema })
// Since the column isn't string type, inspectField will not process it
expect(errors).toHaveLength(0)
})
})