@dpkit/table
Version:
Data Package implementation in TypeScript.
398 lines (341 loc) • 9.43 kB
text/typescript
import type { Schema } from "@dpkit/core"
import { DataFrame } from "nodejs-polars"
import { describe, expect, it } from "vitest"
import { validateTable } from "../table/validate.ts"
describe("validateField", () => {
describe("field name validation", () => {
it("should report an error when field names don't match", async () => {
const table = DataFrame({
actual_id: [1, 2, 3],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "number",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toContainEqual({
type: "field/name",
fieldName: "id",
actualFieldName: "actual_id",
})
})
it("should not report errors when field names match", async () => {
const table = DataFrame({
id: [1, 2, 3],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "number",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(0)
})
it("should be case-sensitive when comparing field names", async () => {
const table = DataFrame({
ID: [1, 2, 3],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "number",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(1)
expect(errors).toContainEqual({
type: "field/name",
fieldName: "id",
actualFieldName: "ID",
})
})
})
describe("field type validation", () => {
it("should report an error when field types don't match", async () => {
const table = DataFrame({
id: [true, false, true],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "integer",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(1)
expect(errors).toContainEqual({
type: "field/type",
fieldName: "id",
fieldType: "integer",
actualFieldType: "boolean",
})
})
it("should not report errors when field types match", async () => {
const table = DataFrame({
id: [1, 2, 3],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "number",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(0)
})
})
describe("cell types validation", () => {
it("should validate string to integer conversion errors", async () => {
const table = DataFrame({
id: ["1", "bad", "3", "4x"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "integer",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(2)
expect(errors).toContainEqual({
type: "cell/type",
cell: "bad",
fieldName: "id",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "4x",
fieldName: "id",
rowNumber: 4,
})
})
it("should validate string to number conversion errors", async () => {
const table = DataFrame({
price: ["10.5", "twenty", "30.75", "$40"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "price",
type: "number",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(2)
expect(errors).toContainEqual({
type: "cell/type",
cell: "twenty",
fieldName: "price",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "$40",
fieldName: "price",
rowNumber: 4,
})
})
it("should validate string to boolean conversion errors", async () => {
const table = DataFrame({
active: ["true", "yes", "false", "0", "1"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "active",
type: "boolean",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(1)
expect(errors).toContainEqual({
type: "cell/type",
cell: "yes",
fieldName: "active",
rowNumber: 2,
})
})
it("should validate string to date conversion errors", async () => {
const table = DataFrame({
created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "created",
type: "date",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(3)
expect(errors).toContainEqual({
type: "cell/type",
cell: "Jan 15, 2023",
fieldName: "created",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "20230115",
fieldName: "created",
rowNumber: 3,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "not-a-date",
fieldName: "created",
rowNumber: 4,
})
})
it("should validate string to time conversion errors", async () => {
const table = DataFrame({
time: ["14:30:00", "2:30pm", "invalid", "14h30"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "time",
type: "time",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(3)
expect(errors).toContainEqual({
type: "cell/type",
cell: "2:30pm",
fieldName: "time",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "invalid",
fieldName: "time",
rowNumber: 3,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "14h30",
fieldName: "time",
rowNumber: 4,
})
})
it("should validate string to year conversion errors", async () => {
const table = DataFrame({
year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "year",
type: "year",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(3)
expect(errors).toContainEqual({
type: "cell/type",
cell: "23",
fieldName: "year",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "MMXXIII",
fieldName: "year",
rowNumber: 3,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "two-thousand-twenty-three",
fieldName: "year",
rowNumber: 4,
})
})
it("should validate string to datetime conversion errors", async () => {
const table = DataFrame({
timestamp: [
"2023-01-15T14:30:00",
"January 15, 2023 2:30 PM",
"2023-01-15 14:30",
"not-a-datetime",
],
}).lazy()
const schema: Schema = {
fields: [
{
name: "timestamp",
type: "datetime",
},
],
}
const { errors } = await validateTable(table, { schema })
// Adjust the expectations to match actual behavior
expect(errors.length).toBeGreaterThan(0)
// Check for specific invalid values we expect to fail
expect(errors).toContainEqual({
type: "cell/type",
cell: "January 15, 2023 2:30 PM",
fieldName: "timestamp",
rowNumber: 2,
})
expect(errors).toContainEqual({
type: "cell/type",
cell: "not-a-datetime",
fieldName: "timestamp",
rowNumber: 4,
})
})
it("should pass validation when all cells are valid", async () => {
const table = DataFrame({
id: ["1", "2", "3", "4"],
}).lazy()
const schema: Schema = {
fields: [
{
name: "id",
type: "integer",
},
],
}
const { errors } = await validateTable(table, { schema })
expect(errors).toHaveLength(0)
})
it("should validate with non-string source data", async () => {
const table = DataFrame({
is_active: [true, false, true, false],
}).lazy()
const schema: Schema = {
fields: [
{
name: "is_active",
type: "boolean",
},
],
}
const { errors } = await validateTable(table, { schema })
// Since the column matches the expected type, validation passes
expect(errors).toHaveLength(0)
})
})
})