UNPKG

lorix

Version:
981 lines (774 loc) 57.2 kB
import chai from "chai"; const { expect } = chai; import { // Very Small DataFrames verySmallDataFrame1, verySmallDataFrame2, verySmallDataFrame4, verySmallValidObjArray, verySmallInvalidObjArray, verySmallDataFrameCrossJoinResult, verySmallDataFrameInnerJoinResult, verySmallDataFrameLeftJoinResult, verySmallDataFrameRightJoinResult, verySmallDataFrameLeftAntiJoinResult, verySmallDataFrameRightAntiJoinResult, verySmallDataFrameFullOuterJoinResult, // Small DataFrames smallDataFrame1, smallDataFrame2, smallDataFrame4, smallDataFrame5, smallDataFrame1OrderByIdResult, smallDataFrame1OrderByNameResult, smallDataFrame1OrderByIdWeightResult, smallDataFrame1OrderByIdDescWeightAscResult, smallDataFrame1FilterIdResult, smallDataFrame1FilterWeightResult, smallDataFrame2DistinctAllResult, smallDataFrame2DistinctIdResult, smallDataFrame2DistinctNameWeightResult, smallDataFrame4ReplaceName, smallDataFrame4ReplaceNameColour, smallDataFrame4ReplaceAllName, smallDataFrame4ReplaceAllNameColour, smallDataFrame4RegexReplaceName, smallDataFrame4RegexReplaceNameGlobal, smallDataFrame4RegexReplaceNameColourGlobal, smallDataFrame1and2UnionByName, iris, irisGroupBySpeciesResult, } from "./sample_data.js" import { DataFrame } from "../src/dataframe.js"; describe("DataFrame class", () => { describe("Constructor", () => { it("Should have rows and columns properties.", () => { const df = new DataFrame(); expect(df).to.have.property('rows'); expect(df).to.have.property('columns'); }); it("Should create an empty DataFrame when no parameters are passed.", () => { const df = new DataFrame(); expect(df.rows.length).to.equal(0); expect(df.columns.length).to.equal(0); }); it("Should create DataFrame with populated rows and columns properties when parameters are passed.", () => { const df = verySmallDataFrame1; expect(df.rows.length).to.equal(3); expect(df.columns.length).to.equal(2); }); }); describe("Row iteration", () => { it("Should allow iteration over DataFrame rows in a for...of loop", () => { const df = verySmallDataFrame1; let rowChecker; for (const row of df) { rowChecker = row; } // rowChecker should be set to the last row expect(rowChecker["id"]).to.equal(verySmallDataFrame1.slice(-1).toArray()[0]["id"]); }); it("Should allow destructuring DataFrame rows", () => { const df = [...verySmallDataFrame1]; // Compare the expect(df.slice(-1)[0]["id"]).to.equal(verySmallDataFrame1.slice(-1).toArray()[0]["id"]); }); }); describe("select()", function() { beforeEach(function() { this.currentTest.df = verySmallDataFrame1; }); it("Should throw an error if no columns are selected", function() { expect(() => {this.test.df.select()}).to.throw(); }); it("Should throw an error if at least one column does not exist in the DataFrame", function() { expect(() => {this.test.df.select("id", "colthatdoesnotexist")}).to.throw(); }); it("Should return a new DataFrame with only the selected columns", function() { const df = this.test.df.select("name"); expect(df.columns).to.have.all.members(["name"]); expect(df.columns).to.not.have.all.members(["id"]); for (let row of df) { expect(row).to.haveOwnProperty("name"); expect(row).not.to.haveOwnProperty("id"); } }); }); describe("drop()", function() { beforeEach(function() { this.currentTest.df = verySmallDataFrame1; }); it("Should throw an error if no columns are dropped", function() { expect(() => {this.test.df.drop()}).to.throw(); }); it("Should throw an error if at least one column does not exist in the DataFrame", function() { expect(() => {this.test.df.drop("id", "colthatdoesnotexist")}).to.throw(); }); it("Should return a new DataFrame excluding the dropped columns", function() { const df = this.test.df.drop("name"); expect(df.columns).to.have.all.members(["id"]); expect(df.columns).to.not.have.all.members(["name"]); for (let row of df) { expect(row).to.haveOwnProperty("id"); expect(row).not.to.haveOwnProperty("name"); } }); }); describe("withColumn()", function() { beforeEach(function() { this.currentTest.df = verySmallDataFrame1; }); it("Should throw an error if column name is invalid", function() { // Is not a string expect(() => {this.test.df.withColumn(1, () => 1)}).to.throw(); expect(() => {this.test.df.withColumn(undefined, () => 1)}).to.throw(); expect(() => {this.test.df.withColumn(null, () => 1)}).to.throw(); // Starts with a number expect(() => {this.test.df.withColumn("1newCol", () => 1)}).to.throw(); // Starts with punctuation other than underscore ["-", "~", "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "[", "]", "+", "=", "{", "}", ":", ";", "'", "|", "\\", "/", "<", ">", ",", ".", "?"].forEach((invalidChar) => { expect(() => {this.test.df.withColumn(invalidChar + "newCol", () => 1)}).to.throw(); }); }); it("Should add a new column when a non-existing valid column name is specified", function() { const df = this.test.df.withColumn("newCol", () => 1); expect(df.columns.includes("newCol")).to.be.true; for (let row of df) { expect(row).to.haveOwnProperty("newCol"); expect(row["newCol"]).to.equal(1); } }); it("Should overwrite the values of a column when an existing column name is specified", function() { const newColName = "newCol"; const df = ( this.test.df .withColumn(newColName, () => 1) .withColumn(newColName, () => 2) ); const numInstances = df.columns.filter((col) => col == newColName).length; expect(numInstances).to.equal(1); // There shouldn't be more than one instance of a column name expect(df.columns.includes(newColName)).to.be.true; for (let row of df) { expect(row).to.haveOwnProperty(newColName); expect(row[newColName]).to.equal(2); } }); it("Should throw an error if no definition is provided for the new column", function() { expect(() => {this.test.df.withColumn("newCol")}).to.throw(); }); it("Should throw an error when a function is not passed as the column definition", function() { expect(() => {this.test.df.withColumn("newCol", 1)}).to.throw(); }); it("Should allow for existing columns to be referenced in the column defintion", function() { const existingCol = this.test.df.columns[0]; // Get the first column in test DataFrame const df = this.test.df.withColumn("newCol", (row) => row[existingCol]); expect(df.columns.includes("newCol")).to.be.true; for (let row of df) { expect(row).to.haveOwnProperty("newCol"); expect(row["newCol"]).to.equal(row[existingCol]); } }); // Temporarily commenting-out this test as supporting the ability to // detect references to columns that do not exist in the DataFrame, also // disables running methods of the column itself. Example: If an existing // column "latest_date" is a Date type, and you want to create a new column // derived from this, that leverages a method of the Date object, such as // .toISOString(), then an error will be generated; that is, this will error: // // df.withColumn("derived_date", (row) => row["latest_date"].toISOString()) // // This is due to the function validateFunctionReferencesWithProxy that checks // the column references made by the function in the `withColumn` using a Proxy // object. // it("Should throw an error when referencing a non-existent column", function() { // expect(() => {this.test.df.withColumn("newCol", (row) => row["nonExistingColumn"])}).to.throw(); // }); }); describe("fromArray()", function() { beforeEach(function() { this.currentTest.validArray = verySmallValidObjArray; this.currentTest.invalidArray = verySmallInvalidObjArray; }); it("Should return a DataFrame if a valid array of objects is passed", function() { expect(() => DataFrame.fromArray(this.test.validArray)).to.not.throw(); const df = DataFrame.fromArray(this.test.validArray); expect(df.rows.length).to.equal(this.test.validArray.length); }); it("Should throw an error if array is empty", function() { expect(() => DataFrame.fromArray([])).to.throw(); }); it("Should throw an error if properties are not the same for all objects", function() { expect(() => DataFrame.fromArray(this.test.invalidArray)).to.throw(); }); it("Should throw an error if an array is not passed", function() { expect(() => DataFrame.fromArray(() => "error")).to.throw(); }); }); describe("crossJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame1; this.currentTest.df2 = verySmallDataFrame1; this.currentTest.crossJoinResultDf = verySmallDataFrameCrossJoinResult; }); it("Should return the cross join between two DataFrames", function() { let result = this.test.df1.crossJoin(this.test.df2); expect(result.toArray()).to.deep.equal(this.test.crossJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.crossJoinResultDf.columns); }); it("Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.crossJoin(()=> "should error")).to.throw(); expect(() => this.test.df1.crossJoin([{"id": 1, "name": "test"}])).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.crossJoin()).to.throw(); }); it("Should throw an error if more than one argument is passed", function() { expect(() => this.test.df1.crossJoin(this.test.df2, (l, r) => l.id == r.id)).to.throw(); }); }); describe("innerJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame1; this.currentTest.df2 = verySmallDataFrame2; this.currentTest.innerJoinResultDf = verySmallDataFrameInnerJoinResult; }); it("Should return the inner join between two DataFrames when using a single array join condition", function() { let result = this.test.df1.innerJoin(this.test.df2, ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.innerJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.innerJoinResultDf.columns); }); it("Should return the inner join between two DataFrames when using left and right array join conditions", function() { let result = this.test.df1.innerJoin(this.test.df2, ["id", "name"], ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.innerJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.innerJoinResultDf.columns); }); it("Should return the inner join between two DataFrames when using a function join condition", function() { let result = this.test.df1.innerJoin(this.test.df2, (l, r) => (l["id"] == r["id"]) && (l["name"] == r["name"])); expect(result.toArray()).to.deep.equal(this.test.innerJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.innerJoinResultDf.columns); }); it( "Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.innerJoin(()=> "should error"), ["id"]).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.innerJoin()).to.throw(); }); it("Should throw an error if more than three arguments are passed", function() { expect(() => this.test.df1.innerJoin(this.test.df2, ["id"], 1, 1)).to.throw(); }); it("Should throw an error if a non-existent column is passed using a single array join condition", function() { expect(() => this.test.df1.innerJoin(this.test.df2, ["invalidCol"])).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df2, ["id", "invalidCol"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using left and right array join conditions", function() { expect(() => this.test.df1.innerJoin(this.test.df2, ["id"], ["invalidCol"])).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df2, ["invalidCol"], ["id"])).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df2, ["id", "invalidCol"], ["id"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using a function join condition", function() { expect(() => this.test.df1.innerJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.invalidCol == "test") )).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df2, (l, r) => (l.id == r.id) || (r.invalidCol == "test") )).to.throw(); }); it("Should throw an error if an empty array is passed using a single array join condition", function() { expect(() => this.test.df1.innerJoin(this.test.df2, [])).to.throw(); }); it("Should throw an error if an empty array is passed using left and right array join conditions", function() { expect(() => this.test.df1.innerJoin(this.test.df2, [], [])).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df2, ["id"], [])).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df2, [], ["id"])).to.throw(); }); it("Should throw an error if an left and right array join conditions are of different lengths", function() { expect(() => this.test.df1.innerJoin(this.test.df2, ["id"], ["id", "name"])).to.throw(); }); it("Should throw an error if there are overlapping columns using a function join condition", function() { expect(() => this.test.df1.innerJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.name == "error") )).to.throw(); }); it("Should throw an error if there are overlapping columns using a single array join condition", function() { expect(() => this.test.df1.innerJoin(this.test.df2, ["id"])).to.throw(); }); it("Should throw an error if there are overlapping columns using left and right array join conditions", function() { expect(() => this.test.df1.innerJoin(this.test.df2, ["id"], ["id"])).to.throw(); expect(() => this.test.df1.innerJoin(this.test.df3, ["id"], ["idCol"])).to.throw(); }); }); describe("leftJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame1; this.currentTest.df2 = verySmallDataFrame2; this.currentTest.leftJoinResultDf = verySmallDataFrameLeftJoinResult; }); it("Should return the left join between two DataFrames when using a single array join condition", function() { let result = this.test.df1.leftJoin(this.test.df2, ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.leftJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.leftJoinResultDf.columns); }); it("Should return the left join between two DataFrames when using left and right array join conditions", function() { let result = this.test.df1.leftJoin(this.test.df2, ["id", "name"], ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.leftJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.leftJoinResultDf.columns); }); it("Should return the left join between two DataFrames when using a function join condition", function() { let result = this.test.df1.leftJoin(this.test.df2, (l, r) => (l["id"] == r["id"]) && (l["name"] == r["name"])); expect(result.toArray()).to.deep.equal(this.test.leftJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.leftJoinResultDf.columns); }); it( "Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.leftJoin(()=> "should error"), ["id"]).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.leftJoin()).to.throw(); }); it("Should throw an error if more than three arguments are passed", function() { expect(() => this.test.df1.leftJoin(this.test.df2, ["id"], 1, 1)).to.throw(); }); it("Should throw an error if a non-existent column is passed using a single array join condition", function() { expect(() => this.test.df1.leftJoin(this.test.df2, ["invalidCol"])).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df2, ["id", "invalidCol"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using left and right array join conditions", function() { expect(() => this.test.df1.leftJoin(this.test.df2, ["id"], ["invalidCol"])).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df2, ["invalidCol"], ["id"])).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df2, ["id", "invalidCol"], ["id"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using a function join condition", function() { expect(() => this.test.df1.leftJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.invalidCol == "test") )).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df2, (l, r) => (l.id == r.id) || (r.invalidCol == "test") )).to.throw(); }); it("Should throw an error if an empty array is passed using a single array join condition", function() { expect(() => this.test.df1.leftJoin(this.test.df2, [])).to.throw(); }); it("Should throw an error if an empty array is passed using left and right array join conditions", function() { expect(() => this.test.df1.leftJoin(this.test.df2, [], [])).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df2, ["id"], [])).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df2, [], ["id"])).to.throw(); }); it("Should throw an error if an left and right array join conditions are of different lengths", function() { expect(() => this.test.df1.leftJoin(this.test.df2, ["id"], ["id", "name"])).to.throw(); }); it("Should throw an error if there are overlapping columns using a function join condition", function() { expect(() => this.test.df1.leftJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.name == "error") )).to.throw(); }); it("Should throw an error if there are overlapping columns using a single array join condition", function() { expect(() => this.test.df1.leftJoin(this.test.df2, ["id"])).to.throw(); }); it("Should throw an error if there are overlapping columns using left and right array join conditions", function() { expect(() => this.test.df1.leftJoin(this.test.df2, ["id"], ["id"])).to.throw(); expect(() => this.test.df1.leftJoin(this.test.df3, ["id"], ["idCol"])).to.throw(); }); }); describe("rightJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame1; this.currentTest.df2 = verySmallDataFrame2; this.currentTest.rightJoinResultDf = verySmallDataFrameRightJoinResult; }); it("Should return the right join between two DataFrames when using a single array join condition", function() { let result = this.test.df1.rightJoin(this.test.df2, ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.rightJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.rightJoinResultDf.columns); }); it("Should return the right join between two DataFrames when using right and right array join conditions", function() { let result = this.test.df1.rightJoin(this.test.df2, ["id", "name"], ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.rightJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.rightJoinResultDf.columns); }); it("Should return the right join between two DataFrames when using a function join condition", function() { let result = this.test.df1.rightJoin(this.test.df2, (l, r) => (l["id"] == r["id"]) && (l["name"] == r["name"])); expect(result.toArray()).to.deep.equal(this.test.rightJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.rightJoinResultDf.columns); }); it( "Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.rightJoin(()=> "should error"), ["id"]).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.rightJoin()).to.throw(); }); it("Should throw an error if more than three arguments are passed", function() { expect(() => this.test.df1.rightJoin(this.test.df2, ["id"], 1, 1)).to.throw(); }); it("Should throw an error if a non-existent column is passed using a single array join condition", function() { expect(() => this.test.df1.rightJoin(this.test.df2, ["invalidCol"])).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df2, ["id", "invalidCol"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using right and right array join conditions", function() { expect(() => this.test.df1.rightJoin(this.test.df2, ["id"], ["invalidCol"])).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df2, ["invalidCol"], ["id"])).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df2, ["id", "invalidCol"], ["id"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using a function join condition", function() { expect(() => this.test.df1.rightJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.invalidCol == "test") )).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df2, (l, r) => (l.id == r.id) || (r.invalidCol == "test") )).to.throw(); }); it("Should throw an error if an empty array is passed using a single array join condition", function() { expect(() => this.test.df1.rightJoin(this.test.df2, [])).to.throw(); }); it("Should throw an error if an empty array is passed using right and right array join conditions", function() { expect(() => this.test.df1.rightJoin(this.test.df2, [], [])).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df2, ["id"], [])).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df2, [], ["id"])).to.throw(); }); it("Should throw an error if an right and right array join conditions are of different lengths", function() { expect(() => this.test.df1.rightJoin(this.test.df2, ["id"], ["id", "name"])).to.throw(); }); it("Should throw an error if there are overlapping columns using a function join condition", function() { expect(() => this.test.df1.rightJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.name == "error") )).to.throw(); }); it("Should throw an error if there are overlapping columns using a single array join condition", function() { expect(() => this.test.df1.rightJoin(this.test.df2, ["id"])).to.throw(); }); it("Should throw an error if there are overlapping columns using right and right array join conditions", function() { expect(() => this.test.df1.rightJoin(this.test.df2, ["id"], ["id"])).to.throw(); expect(() => this.test.df1.rightJoin(this.test.df3, ["id"], ["idCol"])).to.throw(); }); }); describe("leftAntiJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame1; this.currentTest.df2 = verySmallDataFrame2; this.currentTest.leftAntiJoinResultDf = verySmallDataFrameLeftAntiJoinResult; }); it("Should return the left anti join between two DataFrames when using a single array join condition", function() { let result = this.test.df1.leftAntiJoin(this.test.df2, ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.leftAntiJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.leftAntiJoinResultDf.columns); }); it("Should return the left anti join between two DataFrames when using left and right array join conditions", function() { let result = this.test.df1.leftAntiJoin(this.test.df2, ["id", "name"], ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.leftAntiJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.leftAntiJoinResultDf.columns); }); it("Should return the left anti join between two DataFrames when using a function join condition", function() { let result = this.test.df1.leftAntiJoin(this.test.df2, (l, r) => (l["id"] == r["id"]) && (l["name"] == r["name"])); expect(result.toArray()).to.deep.equal(this.test.leftAntiJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.leftAntiJoinResultDf.columns); }); it( "Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.leftAntiJoin(()=> "should error"), ["id"]).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.leftAntiJoin()).to.throw(); }); it("Should throw an error if more than three arguments are passed", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id"], 1, 1)).to.throw(); }); it("Should throw an error if a non-existent column is passed using a single array join condition", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["invalidCol"])).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id", "invalidCol"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using left and right array join conditions", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id"], ["invalidCol"])).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["invalidCol"], ["id"])).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id", "invalidCol"], ["id"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using a function join condition", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.invalidCol == "test") )).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df2, (l, r) => (l.id == r.id) || (r.invalidCol == "test") )).to.throw(); }); it("Should throw an error if an empty array is passed using a single array join condition", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, [])).to.throw(); }); it("Should throw an error if an empty array is passed using left and right array join conditions", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, [], [])).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id"], [])).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df2, [], ["id"])).to.throw(); }); it("Should throw an error if an left and right array join conditions are of different lengths", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id"], ["id", "name"])).to.throw(); }); it("Should throw an error if there are overlapping columns using a function join condition", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.name == "error") )).to.throw(); }); it("Should throw an error if there are overlapping columns using a single array join condition", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id"])).to.throw(); }); it("Should throw an error if there are overlapping columns using left and right array join conditions", function() { expect(() => this.test.df1.leftAntiJoin(this.test.df2, ["id"], ["id"])).to.throw(); expect(() => this.test.df1.leftAntiJoin(this.test.df3, ["id"], ["idCol"])).to.throw(); }); }); describe("rightAntiJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame1; this.currentTest.df2 = verySmallDataFrame2; this.currentTest.rightAntiJoinResultDf = verySmallDataFrameRightAntiJoinResult; }); it("Should return the right anti join between two DataFrames when using a single array join condition", function() { let result = this.test.df1.rightAntiJoin(this.test.df2, ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.rightAntiJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.rightAntiJoinResultDf.columns); }); it("Should return the right anti join between two DataFrames when using right and right array join conditions", function() { let result = this.test.df1.rightAntiJoin(this.test.df2, ["id", "name"], ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.rightAntiJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.rightAntiJoinResultDf.columns); }); it("Should return the right anti join between two DataFrames when using a function join condition", function() { let result = this.test.df1.rightAntiJoin(this.test.df2, (l, r) => (l["id"] == r["id"]) && (l["name"] == r["name"])); expect(result.toArray()).to.deep.equal(this.test.rightAntiJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.rightAntiJoinResultDf.columns); }); it( "Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.rightAntiJoin(()=> "should error"), ["id"]).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.rightAntiJoin()).to.throw(); }); it("Should throw an error if more than three arguments are passed", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id"], 1, 1)).to.throw(); }); it("Should throw an error if a non-existent column is passed using a single array join condition", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["invalidCol"])).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id", "invalidCol"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using right and right array join conditions", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id"], ["invalidCol"])).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["invalidCol"], ["id"])).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id", "invalidCol"], ["id"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using a function join condition", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.invalidCol == "test") )).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df2, (l, r) => (l.id == r.id) || (r.invalidCol == "test") )).to.throw(); }); it("Should throw an error if an empty array is passed using a single array join condition", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, [])).to.throw(); }); it("Should throw an error if an empty array is passed using right and right array join conditions", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, [], [])).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id"], [])).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df2, [], ["id"])).to.throw(); }); it("Should throw an error if an right and right array join conditions are of different lengths", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id"], ["id", "name"])).to.throw(); }); it("Should throw an error if there are overlapping columns using a function join condition", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.name == "error") )).to.throw(); }); it("Should throw an error if there are overlapping columns using a single array join condition", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id"])).to.throw(); }); it("Should throw an error if there are overlapping columns using right and right array join conditions", function() { expect(() => this.test.df1.rightAntiJoin(this.test.df2, ["id"], ["id"])).to.throw(); expect(() => this.test.df1.rightAntiJoin(this.test.df3, ["id"], ["idCol"])).to.throw(); }); }); describe("fullOuterJoin()", function() { beforeEach(function() { this.currentTest.df1 = verySmallDataFrame2; this.currentTest.df2 = verySmallDataFrame4; this.currentTest.fullOuterJoinResultDf = verySmallDataFrameFullOuterJoinResult; }); it("Should return the full outer join between two DataFrames when using a single array join condition", function() { let result = this.test.df1.fullOuterJoin(this.test.df2, ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.fullOuterJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.fullOuterJoinResultDf.columns); }); it("Should return the full outer join between two DataFrames when using right and right array join conditions", function() { let result = this.test.df1.fullOuterJoin(this.test.df2, ["id", "name"], ["id", "name"]); expect(result.toArray()).to.deep.equal(this.test.fullOuterJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.fullOuterJoinResultDf.columns); }); it("Should return the full outer join between two DataFrames when using a function join condition", function() { let result = this.test.df1.fullOuterJoin(this.test.df2, (l, r) => (l["id"] == r["id"]) && (l["name"] == r["name"])); expect(result.toArray()).to.deep.equal(this.test.fullOuterJoinResultDf.toArray()); expect(result.columns).to.deep.equal(this.test.fullOuterJoinResultDf.columns); }); it( "Should throw an error if a DataFrame is not passed", function() { expect(() => this.test.df1.fullOuterJoin(()=> "should error"), ["id"]).to.throw(); }); it("Should throw an error if no argument is passed", function() { expect(() => this.test.df1.fullOuterJoin()).to.throw(); }); it("Should throw an error if more than three arguments are passed", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id"], 1, 1)).to.throw(); }); it("Should throw an error if a non-existent column is passed using a single array join condition", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["invalidCol"])).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id", "invalidCol"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using right and right array join conditions", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id"], ["invalidCol"])).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["invalidCol"], ["id"])).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id", "invalidCol"], ["id"])).to.throw(); }); it("Should throw an error if a non-existent column is passed using a function join condition", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.invalidCol == "test") )).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df2, (l, r) => (l.id == r.id) || (r.invalidCol == "test") )).to.throw(); }); it("Should throw an error if an empty array is passed using a single array join condition", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, [])).to.throw(); }); it("Should throw an error if an empty array is passed using right and right array join conditions", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, [], [])).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id"], [])).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df2, [], ["id"])).to.throw(); }); it("Should throw an error if an right and right array join conditions are of different lengths", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id"], ["id", "name"])).to.throw(); }); it("Should throw an error if there are overlapping columns using a function join condition", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, (l, r) => (l.id == r.id) && (r.name == "error") )).to.throw(); }); it("Should throw an error if there are overlapping columns using a single array join condition", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id"])).to.throw(); }); it("Should throw an error if there are overlapping columns using right and right array join conditions", function() { expect(() => this.test.df1.fullOuterJoin(this.test.df2, ["id"], ["id"])).to.throw(); expect(() => this.test.df1.fullOuterJoin(this.test.df3, ["id"], ["idCol"])).to.throw(); }); }); describe("orderBy()", function() { beforeEach(function() { this.currentTest.df = smallDataFrame1; this.currentTest.orderByIdResultDf = smallDataFrame1OrderByIdResult; this.currentTest.orderByNameResultDf = smallDataFrame1OrderByNameResult; this.currentTest.orderByIdWeightResultDf = smallDataFrame1OrderByIdWeightResult; this.currentTest.orderByIdWeightDescResultDf = smallDataFrame1OrderByIdDescWeightAscResult; }); it("Should return a new DataFrame ordered by the specified columns and sort order", function() { let result1 = this.test.df.orderBy(["id"]); expect(result1.toArray()).to.deep.equal(this.test.orderByIdResultDf.toArray()); expect(result1.columns).to.deep.equal(this.test.orderByIdResultDf.columns); let result2 = this.test.df.orderBy(["name"]); expect(result2.toArray()).to.deep.equal(this.test.orderByNameResultDf.toArray()); expect(result2.columns).to.deep.equal(this.test.orderByNameResultDf.columns); let result3 = this.test.df.orderBy(["id", "weight"]); expect(result3.toArray()).to.deep.equal(this.test.orderByIdWeightResultDf.toArray()); expect(result3.columns).to.deep.equal(this.test.orderByIdWeightResultDf.columns); let result4 = this.test.df.orderBy(["id", "weight"], ["desc", "asc"]); expect(result4.toArray()).to.deep.equal(this.test.orderByIdWeightDescResultDf.toArray()); expect(result4.columns).to.deep.equal(this.test.orderByIdWeightDescResultDf.columns); }); it("Should throw an error if no columns are specified", function() { expect(() => this.test.df.orderBy([])).to.throw(); }); it("Should throw an error if an array is not passed", function() { expect(() => this.test.df.orderBy("notAnArray")).to.throw(); }); it("Should throw an error if an array is not passed for sort order", function() { expect(() => this.test.df.orderBy(["id"], "notAnArray")).to.throw(); }); it("Should throw an error if no valid columns are specified", function() { expect(() => this.test.df.orderBy(["invalidColumn"])).to.throw(); }); it("Should throw an error if no valid values are specified for sort order", function() { expect(() => this.test.df.orderBy(["id"], ["invalidValue"])).to.throw(); }); }); describe("groupBy()", function() { beforeEach(function() { this.currentTest.df = iris; this.currentTest.groupBySpeciesResultDf = irisGroupBySpeciesResult; }); it("Should return a new DataFrame grouped by the specified columns with the specified aggregations", function() { let result1 = ( this.test.df .groupBy( ["species"], { "sepal_length": ["min", "max", "mean", "count", "sum"] } ) ); expect(result1.toArray()).to.deep.equal(this.test.groupBySpeciesResultDf.toArray()); expect(result1.columns).to.deep.equal(this.test.groupBySpeciesResultDf.columns); }); it("Should throw an error if no columns are specified", function() { expect(() => this.test.df.groupBy([])).to.throw(); }); it("Should throw an error if an array is not passed", function() { expect(() => this.test.df.groupBy("notAnArray")).to.throw(); }); it("Should throw an error if no valid columns are specified", function() { expect(() => this.test.df.groupBy(["invalidColumn"])).to.throw(); }); it("Should throw an error if no valid values are specified for aggregation", function() { expect(() => this.test.df.groupBy( ["species"], {"sepal_length": ["min", "max", "invalidAgg"]} )).to.throw(); }); }); describe("filter()", function() { beforeEach(function() { this.currentTest.df = smallDataFrame1; this.currentTest.filterIdResultDf = smallDataFrame1FilterIdResult; this.currentTest.filterWeightResultDf = smallDataFrame1FilterWeightResult; }); it("Should return a new DataFrame filtered by the specified columns", function() { let resultId = this.test.df.filter((row) => row["id"] == 100); let resultWeight = this.test.df.filter((row) => row["weight"] < 80); // Filter based on id expect(resultId.toArray()).to.deep.equal(this.test.filterIdResultDf.toArray()); expect(resultId.columns).to.deep.equal(this.test.filterIdResultDf.columns); // Filter based on weight expect(resultWeight.toArray()).to.deep.equal(this.test.filterWeightResultDf.toArray()); expect(resultWeight.columns).to.deep.equal(this.test.filterWeightResultDf.columns); }); it("Should throw an error if no function is specified", function() { expect(() => this.test.df.filter()).to.throw(); }); it("Should throw an error when a type other than a function is passed", function() { expect(() => {this.test.df.filter(1)}).to.throw(); expect(() => {this.test.df.filter("stringInsteadOfFunction")}).to.throw(); }); it("Should throw an error if more than one argument is passed", function() { expect(() => {this.test.df.filter(((row) => row["weight"] < 80), "test")}).to.throw(); }); // Temporarily commenting-out this test as supporting the ability to // detect references to columns that do not exist in the DataFrame, also // disables running methods of the column itself. Example: If an existing // column "latest_date" is a Date type, and you want to create a new column // derived from this, that leverages a method of the Date object, such as // .toISOString(), then an error will be generated; that is, this will error: // // df.filter((row) => row["latest_date"].toISOString().split("T")[0] == "2023-02-06"); // // This is due to the function validateFunctionReferencesWithProxy that checks // the column references made by the function in the `filter` using a Proxy // object. // it("Should throw an error when referencing a non-existent column", function() { // expect(() => {this.test.df.filter((row) => row["nonExistingColumn"] > 1)}).to.throw(); // }); }); describe("distinct()", function() { beforeEach(function() { this.currentTest.df = smallDataFrame2; this.currentTest.distinctAllResultDf = smallDataFrame2DistinctAllResult; this.currentTest.distinctIdResultDf = smallDataFrame2DistinctIdResult; this.currentTest.distinctNameWeightResultDf = smallDataFrame2DistinctNameWeightResult; }); it("Should return a new DataFrame with duplicate rows dropped as per the specified columns", function() { let resultAll = this.test.df.distinct(); let resultId = this.test.df.distinct(["id"]); let resultNameWeight = this.test.df.distinct(["name", "weight"]); // Distinct based on all columns expect(resultAll.toArray()).to.deep.equal(this.test.distinctAllResultDf.toArray()); expect(resultAll.columns).to.deep.equal(this.test.distinctAllResultDf.columns); // Distinct based on id expect(resultId.toArray()).to.deep.equal(this.test.distinctIdResultDf.toArray()); expect(resultId.columns).to.deep.equal(this.test.distinctIdResultDf.columns); // Distinct based on name and weight expect(resultNameWeight.toArray()).to.deep.equal(this.test.distinctNameWeightResultDf.toArray()); expect(resultNameWeight.columns).to.deep.equal(this.test.distinctNameWeightResultDf.columns); }); it("Should throw an error when a type other than an array is passed", function() { expect(() => {this.test.df.distinct(1)}).to.throw(); expect(() => {this.test.df.distinct("stringInsteadOfFunction")}).to.throw(); }); it("Should throw an error when referencing a non-existent column", function() { expect(() => {this.test.df.distinct(["nonExistingColumn"])}).to.throw(); }); it("Should throw an error if more than one argument is passed", function() { expect(() => {this.test.df.distinct(["name", "weight"], "anotherArgument")}).to.throw(); }); }); describe("replace()", function() { beforeEach(function() { this.currentTest.df = smallDataFrame4; this.currentTest.replaceNameResultDf = smallDataFrame4ReplaceName; this.currentTest.replaceNameColourResultDf = smallDataFrame4ReplaceNameColour; }); it("Should return a new DataFrame with values correctly replaced across the specified columns", function() { let resultName = this.test.df.replace(["name"