UNPKG

@severo_tests/hyparquet

Version:

Parquet file parser for JavaScript

134 lines (119 loc) 3.68 kB
/** * Build a tree from the schema elements. * * @import {SchemaElement, SchemaTree} from '../src/types.d.ts' * @param {SchemaElement[]} schema * @param {number} rootIndex index of the root element * @param {string[]} path path to the element * @returns {SchemaTree} tree of schema elements */ function schemaTree(schema, rootIndex, path) { const element = schema[rootIndex] const children = [] let count = 1 // Read the specified number of children if (element.num_children) { while (children.length < element.num_children) { const childElement = schema[rootIndex + count] const child = schemaTree(schema, rootIndex + count, [...path, childElement.name]) count += child.count children.push(child) } } return { count, element, children, path } } /** * Get schema elements from the root to the given element name. * * @param {SchemaElement[]} schema * @param {string[]} name path to the element * @returns {SchemaTree[]} list of schema elements */ export function getSchemaPath(schema, name) { let tree = schemaTree(schema, 0, []) const path = [tree] for (const part of name) { const child = tree.children.find(child => child.element.name === part) if (!child) throw new Error(`parquet schema element not found: ${name}`) path.push(child) tree = child } return path } /** * Get the max repetition level for a given schema path. * * @param {SchemaTree[]} schemaPath * @returns {number} max repetition level */ export function getMaxRepetitionLevel(schemaPath) { let maxLevel = 0 for (const { element } of schemaPath) { if (element.repetition_type === 'REPEATED') { maxLevel++ } } return maxLevel } /** * Get the max definition level for a given schema path. * * @param {SchemaTree[]} schemaPath * @returns {number} max definition level */ export function getMaxDefinitionLevel(schemaPath) { let maxLevel = 0 for (const { element } of schemaPath.slice(1)) { if (element.repetition_type !== 'REQUIRED') { maxLevel++ } } return maxLevel } /** * Check if a column is list-like. * * @param {SchemaTree} schema * @returns {boolean} true if list-like */ export function isListLike(schema) { if (!schema) return false if (schema.element.converted_type !== 'LIST') return false if (schema.children.length > 1) return false const firstChild = schema.children[0] if (firstChild.children.length > 1) return false if (firstChild.element.repetition_type !== 'REPEATED') return false return true } /** * Check if a column is map-like. * * @param {SchemaTree} schema * @returns {boolean} true if map-like */ export function isMapLike(schema) { if (!schema) return false if (schema.element.converted_type !== 'MAP') return false if (schema.children.length > 1) return false const firstChild = schema.children[0] if (firstChild.children.length !== 2) return false if (firstChild.element.repetition_type !== 'REPEATED') return false const keyChild = firstChild.children.find(child => child.element.name === 'key') if (keyChild?.element.repetition_type === 'REPEATED') return false const valueChild = firstChild.children.find(child => child.element.name === 'value') if (valueChild?.element.repetition_type === 'REPEATED') return false return true } /** * Returns true if a column is non-nested. * * @param {SchemaTree[]} schemaPath * @returns {boolean} */ export function isFlatColumn(schemaPath) { if (schemaPath.length !== 2) return false const [, column] = schemaPath if (column.element.repetition_type === 'REPEATED') return false if (column.children.length) return false return true }