@dpkit/table
Version:
Data Package implementation in TypeScript.
243 lines (202 loc) • 5.77 kB
text/typescript
import type { Schema } from "@dpkit/core"
import { DataFrame } from "nodejs-polars"
import { describe, expect, it } from "vitest"
import { normalizeTable } from "./normalize.ts"
describe("normalizeTable", () => {
it("should work with schema", async () => {
const table = DataFrame({
id: [1, 2],
name: ["english", "中文"],
}).lazy()
const schema: Schema = {
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work with less fields in data", async () => {
const table = DataFrame({
id: [1, 2],
name: ["english", "中文"],
}).lazy()
const schema: Schema = {
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
{ name: "other", type: "boolean" },
],
}
const records = [
{ id: 1, name: "english", other: null },
{ id: 2, name: "中文", other: null },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work with more fields in data", async () => {
const table = DataFrame({
id: [1, 2],
name: ["english", "中文"],
other: [true, false],
}).lazy()
const schema: Schema = {
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work based on fields order", async () => {
const table = DataFrame({
field1: [1, 2],
field2: ["english", "中文"],
}).lazy()
const schema: Schema = {
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work based on field names (equal)", async () => {
const table = DataFrame({
name: ["english", "中文"],
id: [1, 2],
}).lazy()
const schema: Schema = {
fieldsMatch: "equal",
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work based on field names (subset)", async () => {
const table = DataFrame({
name: ["english", "中文"],
id: [1, 2],
}).lazy()
const schema: Schema = {
fieldsMatch: "subset",
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work based on field names (superset)", async () => {
const table = DataFrame({
name: ["english", "中文"],
id: [1, 2],
}).lazy()
const schema: Schema = {
fieldsMatch: "superset",
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should work based on field names (partial)", async () => {
const table = DataFrame({
name: ["english", "中文"],
id: [1, 2],
}).lazy()
const schema: Schema = {
fieldsMatch: "partial",
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should parse string columns", async () => {
const table = DataFrame({
id: ["1", "2"],
name: ["english", "中文"],
}).lazy()
const schema: Schema = {
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "string" },
],
}
const records = [
{ id: 1, name: "english" },
{ id: 2, name: "中文" },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
it("should read type errors as nulls", async () => {
const table = DataFrame({
id: [1, 2],
name: ["english", "中文"],
}).lazy()
const schema: Schema = {
fields: [
{ name: "id", type: "integer" },
{ name: "name", type: "integer" },
],
}
const records = [
{ id: 1, name: null },
{ id: 2, name: null },
]
const ldf = await normalizeTable(table, schema)
const df = await ldf.collect()
expect(df.toRecords()).toEqual(records)
})
})