UNPKG

@dpkit/table

Version:

Data Package implementation in TypeScript.

398 lines (341 loc) 9.43 kB
import type { Schema } from "@dpkit/core" import { DataFrame } from "nodejs-polars" import { describe, expect, it } from "vitest" import { validateTable } from "../table/validate.ts" describe("validateField", () => { describe("field name validation", () => { it("should report an error when field names don't match", async () => { const table = DataFrame({ actual_id: [1, 2, 3], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "number", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toContainEqual({ type: "field/name", fieldName: "id", actualFieldName: "actual_id", }) }) it("should not report errors when field names match", async () => { const table = DataFrame({ id: [1, 2, 3], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "number", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should be case-sensitive when comparing field names", async () => { const table = DataFrame({ ID: [1, 2, 3], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "number", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ type: "field/name", fieldName: "id", actualFieldName: "ID", }) }) }) describe("field type validation", () => { it("should report an error when field types don't match", async () => { const table = DataFrame({ id: [true, false, true], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "integer", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ type: "field/type", fieldName: "id", fieldType: "integer", actualFieldType: "boolean", }) }) it("should not report errors when field types match", async () => { const table = DataFrame({ id: [1, 2, 3], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "number", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(0) }) }) describe("cell types validation", () => { it("should validate string to integer conversion errors", async () => { const table = DataFrame({ id: ["1", "bad", "3", "4x"], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "integer", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", cell: "bad", fieldName: "id", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "4x", fieldName: "id", rowNumber: 4, }) }) it("should validate string to number conversion errors", async () => { const table = DataFrame({ price: ["10.5", "twenty", "30.75", "$40"], }).lazy() const schema: Schema = { fields: [ { name: "price", type: "number", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", cell: "twenty", fieldName: "price", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "$40", fieldName: "price", rowNumber: 4, }) }) it("should validate string to boolean conversion errors", async () => { const table = DataFrame({ active: ["true", "yes", "false", "0", "1"], }).lazy() const schema: Schema = { fields: [ { name: "active", type: "boolean", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ type: "cell/type", cell: "yes", fieldName: "active", rowNumber: 2, }) }) it("should validate string to date conversion errors", async () => { const table = DataFrame({ created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"], }).lazy() const schema: Schema = { fields: [ { name: "created", type: "date", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ type: "cell/type", cell: "Jan 15, 2023", fieldName: "created", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "20230115", fieldName: "created", rowNumber: 3, }) expect(errors).toContainEqual({ type: "cell/type", cell: "not-a-date", fieldName: "created", rowNumber: 4, }) }) it("should validate string to time conversion errors", async () => { const table = DataFrame({ time: ["14:30:00", "2:30pm", "invalid", "14h30"], }).lazy() const schema: Schema = { fields: [ { name: "time", type: "time", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ type: "cell/type", cell: "2:30pm", fieldName: "time", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "invalid", fieldName: "time", rowNumber: 3, }) expect(errors).toContainEqual({ type: "cell/type", cell: "14h30", fieldName: "time", rowNumber: 4, }) }) it("should validate string to year conversion errors", async () => { const table = DataFrame({ year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"], }).lazy() const schema: Schema = { fields: [ { name: "year", type: "year", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ type: "cell/type", cell: "23", fieldName: "year", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "MMXXIII", fieldName: "year", rowNumber: 3, }) expect(errors).toContainEqual({ type: "cell/type", cell: "two-thousand-twenty-three", fieldName: "year", rowNumber: 4, }) }) it("should validate string to datetime conversion errors", async () => { const table = DataFrame({ timestamp: [ "2023-01-15T14:30:00", "January 15, 2023 2:30 PM", "2023-01-15 14:30", "not-a-datetime", ], }).lazy() const schema: Schema = { fields: [ { name: "timestamp", type: "datetime", }, ], } const { errors } = await validateTable(table, { schema }) // Adjust the expectations to match actual behavior expect(errors.length).toBeGreaterThan(0) // Check for specific invalid values we expect to fail expect(errors).toContainEqual({ type: "cell/type", cell: "January 15, 2023 2:30 PM", fieldName: "timestamp", rowNumber: 2, }) expect(errors).toContainEqual({ type: "cell/type", cell: "not-a-datetime", fieldName: "timestamp", rowNumber: 4, }) }) it("should pass validation when all cells are valid", async () => { const table = DataFrame({ id: ["1", "2", "3", "4"], }).lazy() const schema: Schema = { fields: [ { name: "id", type: "integer", }, ], } const { errors } = await validateTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should validate with non-string source data", async () => { const table = DataFrame({ is_active: [true, false, true, false], }).lazy() const schema: Schema = { fields: [ { name: "is_active", type: "boolean", }, ], } const { errors } = await validateTable(table, { schema }) // Since the column matches the expected type, validation passes expect(errors).toHaveLength(0) }) }) })