dataframe-js
Version:
Immutable and functional data structure for datascientists and developpers
753 lines (661 loc) • 19.1 kB
JavaScript
import tape from "tape";
import { DataFrame } from "../src/index";
import { tryCatch } from "./utils";
const test = tape;
test("DataFrame columns can be", assert => {
const df2 = new DataFrame(
{
column1: [3, 6, 8],
column2: ["3", "4", "5", "6"],
column3: []
},
["column1", "column2", "column3"]
);
assert.deepEqual(
df2.transpose().toDict(),
{
"0": [3, "3", undefined],
"1": [6, "4", undefined],
"2": [8, "5", undefined],
"3": [undefined, "6", undefined]
},
"transposed."
);
assert.deepEqual(
df2.transpose(true).toDict(),
{
rowNames: ["column1", "column2", "column3"],
"0": [3, "3", undefined],
"1": [6, "4", undefined],
"2": [8, "5", undefined],
"3": [undefined, "6", undefined]
},
"transposed, keeping columnNames as rowNames."
);
const df = new DataFrame(
[[1, 6, 9, 10, 12], [1, 2], [6, 6, 9, 8, 9, 12]],
["c1", "c2", "c3", "c4", "c5", "c6"]
);
assert.deepEqual(
df.listColumns(),
["c1", "c2", "c3", "c4", "c5", "c6"],
"listed."
);
assert.equal(df.listColumns().length, 6, "counted.");
assert.deepEqual(
df.select("c2", "c3", "c4").toDict(),
{
c2: [6, 2, 6],
c3: [9, undefined, 9],
c4: [10, undefined, 8]
},
"selected."
);
assert.deepEqual(
df.select("c2").toArray(),
[[6], [2], [6]],
"selected individually."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.withColumn("c5", row => row.get("c2") - 2)
.toDict(),
{
c2: [6, 2, 6],
c3: [9, undefined, 9],
c4: [10, undefined, 8],
c5: [4, 0, 4]
},
"created."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.withColumn(
"c4",
row => (row.get("c2") ? row.get("c2") - 2 : 0 - 2)
)
.toDict(),
{
c2: [6, 2, 6],
c3: [9, undefined, 9],
c4: [4, 0, 4]
},
"modified."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.drop("c4")
.toDict(),
{
c2: [6, 2, 6],
c3: [9, undefined, 9]
},
"deleted."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.renameAll(["c16", "c17", "c18"])
.listColumns(),
["c16", "c17", "c18"],
"renamed."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.renameAll(["c16", "c17", "c18"])
.toArray()[0],
[6, 9, 10],
"renamed without altering data."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.rename("c2", "cRenamed")
.listColumns(),
["cRenamed", "c3", "c4"],
"renamed individually."
);
class CustomClass {
constructor(valueToConvert) {
this.value = String(Number(valueToConvert) * 10);
}
}
assert.deepEqual(
df
.select("c1", "c2", "c3")
.castAll([String, Number, val => new CustomClass(val)])
.toArray()[0],
["1", 6, { value: "90" }],
"cast."
);
assert.deepEqual(
df
.select("c1", "c2", "c3")
.cast("c2", String)
.toArray()[0],
[1, "6", 9],
"cast individually."
);
assert.deepEqual(
df.restructure(["c2", "c3", "c36"]).toDict(),
{
c2: [6, 2, 6],
c3: [9, undefined, 9],
c36: [undefined, undefined, undefined]
},
"restructured."
);
assert.deepEqual(
df.restructure(["c2", "c3", "c1"]).toCollection(),
[
{ c2: 6, c3: 9, c1: 1 },
{ c2: 2, c3: undefined, c1: 1 },
{ c2: 6, c3: 9, c1: 6 }
],
"restructured to reorder existing columns."
);
assert.deepEqual(
df.distinct("c1").toArray("c1"),
[1, 6],
"distinct, giving a column of unique values."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.replace(undefined, 0)
.toDict(),
{
c2: [6, 2, 6],
c3: [9, 0, 9],
c4: [10, 0, 8]
},
"modified, replacing a value by another."
);
assert.deepEqual(
df
.select("c2", "c3", "c4")
.replace(undefined, 0, ["c2", "c3"])
.toDict(),
{
c2: [6, 2, 6],
c3: [9, 0, 9],
c4: [10, undefined, 8]
},
"modified, replacing a value by another in some columns."
);
assert.deepEqual(df.toArray("c2"), [6, 2, 6], "converted into Array.");
assert.end();
});
test("DataFrame columns can't be ", assert => {
assert.equal(
tryCatch(() => new DataFrame([{ c1: 1, c2: 3 }]).renameAll(["c1"]))
.name,
"WrongSchemaError",
"renamed when providing different columns number, throwing WrongSchemaError."
);
assert.end();
});
test("DataFrame rows can be ", assert => {
const df1 = new DataFrame(
{
column1: [3, 6, 8],
column2: ["3", "4", "5", "6"],
column3: []
},
["column1", "column2", "column3"]
);
assert.equal(df1.count(), 4, "counted.");
assert.equal(
df1.countValue("4", "column2"),
1,
"counted based on a specific value in a column."
);
assert.equal(
df1.countValue(9, "column1"),
0,
"counted based on a specific value in a selected column."
);
assert.deepEqual(
df1.push([1, 9, 6], [0, 5, 6]).toArray(),
[
[3, "3", undefined],
[6, "4", undefined],
[8, "5", undefined],
[undefined, "6", undefined],
[1, 9, 6],
[0, 5, 6]
],
"completed by pushing Arrays."
);
assert.deepEqual(
df1
.push(
{ column1: 1, column2: 9, column3: 6 },
{ column1: 0, column2: undefined, column3: 9 }
)
.toArray(),
[
[3, "3", undefined],
[6, "4", undefined],
[8, "5", undefined],
[undefined, "6", undefined],
[1, 9, 6],
[0, undefined, 9]
],
"completed by pushing dictionnaries."
);
assert.deepEqual(
df1.push(...[...df1]).toArray(),
[
[3, "3", undefined],
[6, "4", undefined],
[8, "5", undefined],
[undefined, "6", undefined],
[3, "3", undefined],
[6, "4", undefined],
[8, "5", undefined],
[undefined, "6", undefined]
],
"completed by pushing rows."
);
assert.deepEqual(
df1.filter(line => line.get("column1") > 3).toArray(),
[[6, "4", undefined], [8, "5", undefined]],
"filtered."
);
assert.deepEqual(
df1.filter({ column1: 6 }).toArray(),
[[6, "4", undefined]],
"filtered by passing a column/value Object."
);
assert.deepEqual(
df1.find({ column1: 6 }).toArray(),
[6, "4", undefined],
"found a row and returned it."
);
assert.deepEqual(
df1.find({ column1: 12 }),
undefined,
"found nothing and returned undefined."
);
assert.deepEqual(
df1.map(line => line.set("column1", 3)).toArray(),
[
[3, "3", undefined],
[3, "4", undefined],
[3, "5", undefined],
[3, "6", undefined]
],
"modified."
);
assert.deepEqual(
df1
.filter(line => line.get("column1") > 3)
.map(line => line.set("column1", 3))
.toArray(),
[[3, "4", undefined], [3, "5", undefined]],
"filtered and modified."
);
assert.deepEqual(
df1
.chain(
line => line.get("column1") > 3,
line => line.set("column1", 3)
)
.toArray(),
[[3, "4", undefined], [3, "5", undefined]],
"filtered and modified by chains (giving the same result, but faster)."
);
assert.deepEqual(
df1
.chain(
line => line.get("column1") > 3,
line => line.set("column1", 3),
line => line.get("column2") === "5"
)
.toArray(),
[[3, "5", undefined]],
"filtered and modified and filtered (again) by chains."
);
const df2 = df1.withColumn(
"column1",
row => (row.get("column1") ? row.get("column1") : 0)
);
assert.equal(
df2.reduce((p, n) => n.get("column1") + p, 0),
17,
"reduced to obtain a value."
);
assert.deepEqual(
df2
.reduce((p, n) =>
n
.set("column1", p.get("column1") + n.get("column1"))
.set("column2", p.get("column2") + n.get("column2"))
)
.toArray(),
[17, "3456", undefined],
"reduced to obtain a row."
);
assert.deepEqual(
df2
.reduceRight((p, n) =>
n
.set("column1", p.get("column1") + n.get("column1"))
.set("column2", p.get("column2") + n.get("column2"))
)
.toArray(),
[17, "6543", undefined],
"reduced by right to obtain a row."
);
const df3 = new DataFrame(
{
id: [3, 6, 8, 1, 1, 3, 8],
value: [1, 0, 1, 1, 1, 2, 4]
},
["id", "value"]
);
const df4 = new DataFrame(
{
id: [3, 6, 8, 1, 1, 3, 8, 3],
id2: ["a", "a", "b", "c", "b", "b", "b", "a"],
value: [1, 0, 1, 1, 1, 2, 4, 6]
},
["id", "id2", "value"]
);
const df4b = new DataFrame(
{
name: ["Henry", "Jess", "William", "Clair", "Barbara", "John"],
test1: [95, 95, 95, 95, 94, 94],
test2: [90, 90, 95, 89, 94, 98],
test3: [76, 75, 76, 76, 99, 77],
isTall: [false, true, false, true, true, false]
},
["name", "test1", "test2", "test3", "isTall"]
);
assert.deepEqual(
df3
.groupBy("id")
.toCollection()
.map(({ groupKey, group }) => ({
groupKey,
group: group.toDict()
})),
[
{ groupKey: { id: 3 }, group: { id: [3, 3], value: [1, 2] } },
{ groupKey: { id: 6 }, group: { id: [6], value: [0] } },
{ groupKey: { id: 8 }, group: { id: [8, 8], value: [1, 4] } },
{ groupKey: { id: 1 }, group: { id: [1, 1], value: [1, 1] } }
],
"groupBy on a column."
);
assert.deepEqual(
df3
.groupBy("id")
.aggregate(group => group.count())
.toDict(),
{
id: [3, 6, 8, 1],
aggregation: [2, 1, 2, 2]
},
"groupBy and compute (by aggregation) the count by group."
);
assert.deepEqual(
df4
.groupBy("id", "id2")
.aggregate(group => group.count())
.toDict(),
{
id: [3, 6, 8, 1, 1, 3],
id2: ["a", "a", "b", "c", "b", "b"],
aggregation: [2, 1, 2, 1, 1, 1]
},
"groupBy on multiple columns and compute the count by group."
);
assert.deepEqual(
df3.sortBy("id").toArray(),
[[1, 1], [1, 1], [3, 1], [3, 2], [6, 0], [8, 1], [8, 4]],
"sorted by a column."
);
assert.deepEqual(
df3.sortBy("id", true).toArray(),
[[8, 1], [8, 4], [6, 0], [3, 1], [3, 2], [1, 1], [1, 1]],
"sorted and reverse by a column."
);
assert.deepEqual(
df4b.sortBy(["test1", "test2", "isTall"]).toArray(),
[
["Barbara", 94, 94, 99, true],
["John", 94, 98, 77, false],
["Clair", 95, 89, 76, true],
["Henry", 95, 90, 76, false],
["Jess", 95, 90, 75, true],
["William", 95, 95, 76, false]
],
"sorted by three columns."
);
assert.deepEqual(
df4b.sortBy(["test1", "test2", "isTall"], true).toArray(),
[
["William", 95, 95, 76, false],
["Jess", 95, 90, 75, true],
["Henry", 95, 90, 76, false],
["Clair", 95, 89, 76, true],
["John", 94, 98, 77, false],
["Barbara", 94, 94, 99, true]
],
"sorted and reverse by three columns."
);
const df5 = new DataFrame(
{
id: [3, 1, 8],
value: [1, 0, 1]
},
["id", "value"]
);
assert.deepEqual(
df3.union(df5).toArray(),
[
[8, 1],
[8, 4],
[6, 0],
[3, 1],
[3, 2],
[1, 1],
[1, 1],
[3, 1],
[1, 0],
[8, 1]
],
"concatenated with another DataFrame."
);
const df5b = new DataFrame(
{
id: [3, 1, 8],
value: [1, 0, 1]
},
["value", "id"]
);
assert.deepEqual(
df3.union(df5b).toArray(),
[
[8, 1],
[8, 4],
[6, 0],
[3, 1],
[3, 2],
[1, 1],
[1, 1],
[1, 3],
[0, 1],
[1, 8]
],
"concatenated with another DataFrame, with columns not in the same order."
);
const df6 = new DataFrame(
{
id: [3, 3, 1, 8],
id2: ["a", "b", "a", "c"],
value: [1, 2, 0, 1]
},
["id", "id2", "value"]
);
const df7 = new DataFrame([...Array(20).keys()].map(row => [row]), ["c1"]);
assert.isNotDeepEqual(
df7.shuffle().toArray(),
df7.toArray(),
"randomly shuffled."
);
assert.equal(
df7.shuffle().count(),
df7.count(),
"randomly shuffled and get the same length."
);
const df8 = new DataFrame([...Array(5000).keys()].map(row => [row]), [
"c1"
]);
assert.equal(df8.sample(0.2).count(), 1000, "randomly sampled.");
assert.deepEqual(
df8.bisect(0.2).map(splittedDF => splittedDF.count()),
[1000, 4000],
"bisected by percentage into 2 DataFrames."
);
const pivotedDf6 = df6
.groupBy("id")
.pivot("id2", gdf => gdf.stat.sum("value"));
assert.deepEqual(
pivotedDf6.toCollection(),
[
{ id: 3, a: 1, b: 2, c: undefined },
{ id: 1, a: 0, b: undefined, c: undefined },
{ id: 8, a: undefined, b: undefined, c: 1 }
],
"pivoted."
);
assert.deepEqual(
pivotedDf6
.groupBy("id")
.melt()
.toCollection(),
[
{ id: 3, variable: "a", value: 1 },
{ id: 3, variable: "b", value: 2 },
{ id: 3, variable: "c", value: undefined },
{ id: 1, variable: "a", value: 0 },
{ id: 1, variable: "b", value: undefined },
{ id: 1, variable: "c", value: undefined },
{ id: 8, variable: "a", value: undefined },
{ id: 8, variable: "b", value: undefined },
{ id: 8, variable: "c", value: 1 }
],
"melted."
);
assert.end();
});
test("DataFrame rows can't be ", assert => {
assert.equal(
tryCatch(() =>
new DataFrame([{ c1: 1, c2: 3 }]).union(
new DataFrame([{ c1: 1, c4: 3 }])
)
).name,
"WrongSchemaError",
"concatenated when providing different columns, throwing WrongSchemaError."
);
assert.equal(
tryCatch(() => new DataFrame([{ c1: 1, c2: 3 }]).union([])).name,
"ArgumentTypeError",
"concatened with not a DataFrame, throwing ArgumentTypeError."
);
assert.equal(
tryCatch(() => new DataFrame([{ c1: 1, c2: 3 }]).innerJoin([])).name,
"ArgumentTypeError",
"joined with not a DataFrame, throwing ArgumentTypeError."
);
assert.equal(
tryCatch(() =>
new DataFrame([
{ c1: 1, c2: 3 },
{ c1: undefined, c2: "4" }
]).sortBy("c1")
).name,
"MixedTypeError",
"sortBy on a mixed types column, throwing MixedTypeError."
);
assert.end();
});
test("DataFrame modules can be ", assert => {
class FakeModule {
constructor(dataframe) {
this.df = dataframe;
this.name = "fakemodule";
}
test(x) {
return x * 2;
}
}
const df = new DataFrame(
{
column1: [3, 6, 8],
column2: ["3", "4", "5", "6", "yolo"],
column3: []
},
["column1", "column2", "column3"],
{ modules: [FakeModule] }
);
assert.equal(
df.fakemodule.test(4),
8,
"registered in an DataFrame instance and used."
);
assert.equal(
df.options.modules.length,
DataFrame.defaultModules.length + 1,
"listed from an instance where modules are changed from options."
);
assert.equal(
new DataFrame([], []).options.modules.length,
3,
"listed from an instance where modules are default modules."
);
assert.equal(
DataFrame.defaultModules.length,
3,
"listed from the default DataFrame static properties and counted."
);
DataFrame.setDefaultModules(...DataFrame.defaultModules, FakeModule);
assert.equal(
new DataFrame(
{
column1: [3, 6, 8],
column2: ["3", "4", "5", "6", "yolo"],
column3: []
},
["column1", "column2", "column3"]
).fakemodule.test(6),
12,
"registered as default DataFrame static properties and used."
);
assert.end();
});
test("DataFrame stay immutable when", assert => {
const df = new DataFrame(
[[1, 6, 9, 10, 12], [1, 2], [6, 6, 9, 8, 9, 12]],
["c1", "c2", "c3", "c4", "c5", "c6"]
);
assert.equal(
Object.is(df.map(row => row.set("c1", 18)), df),
false,
"modified."
);
assert.equal(
Object.is(df.map(row => row), df),
false,
"modified, even if nothing have changed."
);
assert.end();
});