UNPKG

@jrc03c/js-math-tools

Version:
280 lines (228 loc) 9.09 kB
import { assert } from "../assert.mjs" import { filter } from "../filter.mjs" import { forEach } from "../for-each.mjs" import { isArray } from "../is-array.mjs" import { isDataFrame } from "../is-dataframe.mjs" import { isJagged } from "../is-jagged.mjs" import { isSeries } from "../is-series.mjs" import { isUndefined } from "../is-undefined.mjs" import { map } from "../map.mjs" import { MathError } from "../math-error.mjs" import { ndarray } from "../ndarray.mjs" import { range } from "../range.mjs" import { set } from "../set.mjs" import { shape } from "../shape.mjs" function dfAppend(df, x, axis) { if (isUndefined(axis)) { axis = 0 } assert( axis === 0 || axis === 1 || axis === "vertical" || axis === "horizontal", 'The only valid axis values for use when appending data to a DataFrame are 0, 1, "vertical", and "horizontal". Note that 0 == "horizontal" and 1 == "vertical".', ) // appending arrays is relatively straightforward: either all of the rows in // `x` are appended to the `values` in the current DataFrame, or each row in // `x` is concatenated with each corresponding row in `values` if (isArray(x)) { assert( !isJagged(x), "The array of data you're trying to append to this DataFrame is jagged!", ) const xShape = shape(x) // if `x` is a vector... if (xShape.length === 1) { // if the `axis` is 0, then we'll assume that `x` must be a row that we // should append to the bottom of the stack of rows in `values`; and if // `x` is longer than the width of the current DataFrame, then we'll have // to extend all of the rows to keep the shape square; or if `x` is // shorter than the width of the current DataFrame, then we'll extend `x` // until it's the right length if (axis === 0) { const out = df.copy() out._values.push(x) const maxRowLength = Math.max(df.shape[1], xShape[0]) forEach(out._values, row => { while (row.length < maxRowLength) { row.push(undefined) } }) while (out._index.length < out._values.length) { out._index.push("row" + out._index.length) } while (out._columns.length < maxRowLength) { out._columns.push("col" + out._columns.length) } return out } // otherwise, if the `axis` is 1, then we'll assume that `x` is a column // that should be appended to the right of the existing columns; and if // `x` is longer than the height of the current DataFrame, then we'll // have to extend all of the columns to keep the shape square; or if `x` // is shorter than the height of the current DataFrame, then we'll extend // `x` until it's the right height else { const maxColLength = Math.max(df.shape[0], xShape[0]) const out = df.copy() range(0, maxColLength).forEach(i => { if (i >= out._values.length) { out._values.push(ndarray(df.shape[1])) } out._values[i].push(x[i]) }) while (out._index.length < out._values.length) { out._index.push("row" + out._index.length) } while (out._columns.length < out._values[0].length) { out._columns.push("col" + out._columns.length) } return out } } // otherwise, if `x` is a matrix... else if (xShape.length === 2) { // if the `axis` is 0, then we'll assume that `x` contains rows that // ought to be stacked beneath the rows in the current DataFrame; and if // `x` is wider than the width of the current DataFrame, then we'll have // to extend the rows of the current DataFrame to keep the shape square; // or if the current DataFrame is wider than `x`, then we'll extend the // rows of `x` until they're the right length if (axis === 0) { const maxRowLength = Math.max( ...map(x, row => row.length).concat([df.shape[1]]), ) const out = df.copy() out._values = map(out._values.concat(x), row => { while (row.length < maxRowLength) { row.push(undefined) } return row }) while (out._index.length < out._values.length) { out._index.push("row" + out._index.length) } while (out._columns.length < maxRowLength) { out._columns.push("col" + out._columns.length) } return out } // otherwise, if the `axis` is 1, then we'll assume that `x` contains // rows that ought to be stacked to the right of the rows in the current // DataFrame; and if `x` is taller than the height of the current // DataFrame, then we'll have to extend the columns of the current // DataFrame to keep the shape square; or if the current DataFrame is // taller than `x`, then we'll extend the columns of `x` until they're // the right length else { const maxRowLength = Math.max(...map(x, row => row.length)) + df.shape[1] const maxColLength = Math.max(df.shape[0], xShape[0]) const out = df.copy() range(0, maxColLength).forEach(i => { if (i >= out._values.length) { out._values.push(ndarray(df.shape[1])) } out._values[i] = out._values[i].concat(x[i]) while (out._values[i].length < maxRowLength) { out._values[i].push(undefined) } }) while (out._index.length < out._values.length) { out._index.push("row" + out._index.length) } while (out._columns.length < maxRowLength) { out._columns.push("col" + out._columns.length) } return out } } else { throw new MathError( "Only 1- and 2-dimensional arrays can be appended to a DataFrame!", ) } } // appending a Series is virtually the same as appending a vector but with // two differences: // 1) `x` (the incoming Series) will have its own index, and the values of // that index can't conflict with the names of the current DataFrame's // rows or columns; therefore, any conflicting index names appearing in // `x` must be renamed (by appending a "(2)" to the end of each) before // returning the new DataFrame, and... // 2) the name of the Series will be used as its row or column name in the // new DataFrame (e.g., if the `axis` is 0, then `x`'s name will // correspond to the new row containing `x`'s values) else if (isSeries(x)) { const out = dfAppend(df, x.values, axis) if (axis === 0) { out.index[out.index.length - 1] = out.index.indexOf(x.name) > -1 ? x.name + " (2)" : x.name } else { out.columns[out.columns.length - 1] = out.columns.indexOf(x.name) > -1 ? x.name + " (2)" : x.name } return out } // appending a DataFrame is slightly trickier than appending a Series, but // all of the same ideas apply else if (isDataFrame(x)) { if (axis === 0) { const out = df.copy() const maxRowLength = set(out._columns.concat(x._columns)).length forEach(out._values, row => { while (row.length < maxRowLength) { row.push(undefined) } }) x.apply(row => { const rowCopy = row.copy() const temp = [] forEach(out._columns, col => { const index = rowCopy._index.indexOf(col) if (index > -1) { temp.push(rowCopy._values[index]) rowCopy._values.splice(index, 1) rowCopy._index.splice(index, 1) } else { temp.push(undefined) } }) out._values.push(temp.concat(rowCopy._values)) }, 1) out._columns = out._columns.concat( filter(x._columns, c => out._columns.indexOf(c) < 0), ) while (out._index.length < out._values.length) { const newRowName = "row" + out._index.length out._index.push( newRowName + (df._index.indexOf(newRowName) > -1 ? " (2)" : ""), ) } return out } else { const out = df.copy() forEach(out._index, (rowName, i) => { const xIndex = x._index.indexOf(rowName) if (xIndex > -1) { out._values[i] = out._values[i].concat(x._values[xIndex]) } else { out._values[i] = out._values[i].concat(ndarray(x.shape[1])) } }) forEach(x._index, (rowName, i) => { const outIndex = out._index.indexOf(rowName) if (outIndex < 0) { out._index.push(rowName) out._values.push(ndarray(out._columns.length).concat(x._values[i])) } }) out._columns = out._columns.concat( map(x._columns, c => c + (out._columns.indexOf(c) > -1 ? " (2)" : "")), ) return out } } else { throw new MathError( "Only 1- or 2-dimensional arrays, Series, and DataFrames can be appended to a DataFrame!", ) } } export { dfAppend }