UNPKG

@dpkit/table

Version:

Data Package implementation in TypeScript.

239 lines (200 loc) 5.91 kB
import type { Schema } from "@dpkit/core" import { DataFrame } from "nodejs-polars" import { describe, expect, it } from "vitest" import { inspectTable } from "../../table/index.ts" describe("inspectTable", () => { it("should inspect string to integer conversion errors", async () => { const table = DataFrame({ id: ["1", "bad", "3", "4x"], }).lazy() const schema: Schema = { fields: [{ name: "id", type: "integer" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", cell: "bad", fieldName: "id", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "4x", fieldName: "id", rowNumber: 4, }) }) it("should inspect string to number conversion errors", async () => { const table = DataFrame({ price: ["10.5", "twenty", "30.75", "$40"], }).lazy() const schema: Schema = { fields: [{ name: "price", type: "number" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", cell: "twenty", fieldName: "price", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "$40", fieldName: "price", rowNumber: 4, }) }) it("should inspect string to boolean conversion errors", async () => { const table = DataFrame({ active: ["true", "yes", "false", "0", "1"], }).lazy() const schema: Schema = { fields: [{ name: "active", type: "boolean" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ type: "cell/type", cell: "yes", fieldName: "active", rowNumber: 2, }) }) it("should inspect string to date conversion errors", async () => { const table = DataFrame({ created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"], }).lazy() const schema: Schema = { fields: [{ name: "created", type: "date" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ type: "cell/type", cell: "Jan 15, 2023", fieldName: "created", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "20230115", fieldName: "created", rowNumber: 3, }) expect(errors).toContainEqual({ type: "cell/type", cell: "not-a-date", fieldName: "created", rowNumber: 4, }) }) it("should inspect string to time conversion errors", async () => { const table = DataFrame({ time: ["14:30:00", "2:30pm", "invalid", "14h30"], }).lazy() const schema: Schema = { fields: [{ name: "time", type: "time" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ type: "cell/type", cell: "2:30pm", fieldName: "time", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "invalid", fieldName: "time", rowNumber: 3, }) expect(errors).toContainEqual({ type: "cell/type", cell: "14h30", fieldName: "time", rowNumber: 4, }) }) it("should inspect string to year conversion errors", async () => { const table = DataFrame({ year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"], }).lazy() const schema: Schema = { fields: [{ name: "year", type: "year" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ type: "cell/type", cell: "23", fieldName: "year", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "MMXXIII", fieldName: "year", rowNumber: 3, }) expect(errors).toContainEqual({ type: "cell/type", cell: "two-thousand-twenty-three", fieldName: "year", rowNumber: 4, }) }) it("should inspect string to datetime conversion errors", async () => { const table = DataFrame({ timestamp: [ "2023-01-15T14:30:00", "January 15, 2023 2:30 PM", "2023-01-15 14:30", "not-a-datetime", ], }).lazy() const schema: Schema = { fields: [{ name: "timestamp", type: "datetime" }], } const errors = await inspectTable(table, { schema }) // Adjust the expectations to match actual behavior expect(errors.length).toBeGreaterThan(0) // Check for specific invalid values we expect to fail expect(errors).toContainEqual({ type: "cell/type", cell: "January 15, 2023 2:30 PM", fieldName: "timestamp", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "not-a-datetime", fieldName: "timestamp", rowNumber: 4, }) }) it("should pass validation when all cells are valid", async () => { const table = DataFrame({ id: ["1", "2", "3", "4"], }).lazy() const schema: Schema = { fields: [{ name: "id", type: "integer" }], } const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should inspect with non-string source data", async () => { const table = DataFrame({ is_active: [true, false, 1, 0], }).lazy() const schema: Schema = { fields: [{ name: "is_active", type: "boolean" }], } const errors = await inspectTable(table, { schema }) // Since the column isn't string type, inspectField will not process it expect(errors).toHaveLength(0) }) })