UNPKG

@regrapes/access-db-parser

Version:

A pure javascript Microsoft AccessDB files (.mdb, .accdb) parser

333 lines (314 loc) 13.4 kB
import createDebug from 'debug' import type { TableObject } from './TableObject' import { MEMO, parseDataPageHeader, parseRelativeObjectMetadataStruct, parseTableData, parseTableHead, TDEF_HEADER, } from './parsing-primitives' import type { Column, PropType, TableHeader } from './parsing-primitives' import type { ALL_VERSIONS, Dict, PossibleTypes } from './types' import { DataType, parseType } from './utils' const debug = createDebug('access-db-parser:AccessTable') export class AccessTable { private parsedTable: Dict<Array<PossibleTypes>> = {} private columns: Dict<Column> private tableHeader: TableHeader public constructor( private table: TableObject, private version: ALL_VERSIONS, private pageSize: number, private dataPages: Dict<Buffer>, private tableDefs: Dict<Buffer>, private textEncoding: BufferEncoding, private sanitizeTextBuffer?: (buffer: Buffer) => Buffer ) { ;[this.columns, this.tableHeader] = this.getTableColumns() } private getTableColumns(): [Dict<Column>, TableHeader] { let tableHeader: TableHeader let colNames: PropType<ReturnType<typeof parseTableData>, 'columnNames'> let columns: Array<Column> try { tableHeader = parseTableHead(this.table.value, this.version) let mergedData = this.table.value.slice(tableHeader.tDefHeaderEnd) if (tableHeader.TDEF_header.nextPagePtr) { mergedData = Buffer.concat([mergedData, this.mergeTableData(tableHeader.TDEF_header.nextPagePtr)]) } const parsedData = parseTableData(mergedData, tableHeader.realIndexCount, tableHeader.columnCount, this.version) columns = parsedData.column as any colNames = parsedData.columnNames // REMOVE FOR NOW // (tableHeader as any).column = parsedData.column; // (tableHeader as any).columnNames = parsedData.columnNames; } catch (err) { throw new Error(`Failed to parse table header`) } // const colNames = tableHeader.columnNames; // const columns = tableHeader.column; columns.forEach((column, index) => { column.colNameStr = colNames[index].colNameStr }) const offset = Math.min(...columns.map(c => c.columnIndex)) const columnDict: Dict<Column> = {} for (const x of columns) columnDict[x.columnIndex - offset] = x if (Object.keys(columnDict).length !== columns.length) { for (const x of columns) columnDict[x.columnID] = x } if (Object.keys(columnDict).length !== tableHeader.columnCount) throw new Error(`Expected ${tableHeader.columnCount} columns got ${Object.keys(columnDict).length}`) return [columnDict, tableHeader] } private mergeTableData(firstPage: number): Buffer { let table = this.tableDefs[firstPage * this.pageSize]! let parsedHeader = TDEF_HEADER.parse(table) let data = table.slice(parsedHeader.headerEnd) while (parsedHeader.nextPagePtr) { table = this.tableDefs[parsedHeader.nextPagePtr * this.pageSize]! parsedHeader = TDEF_HEADER.parse(table) data = Buffer.concat([data, table.slice(parsedHeader.headerEnd)]) } return data } private createEmptyTable() { const parsedTable: Dict<Array<PossibleTypes>> = {} const [columns] = this.getTableColumns() for (const i of Object.keys(columns)) { const column = columns[i]! parsedTable[column.colNameStr] = [] } return parsedTable } private getOverflowRecord(recordPointer: number): Buffer | undefined { const recordOffset = (recordPointer & 0xff) >>> 0 const pageNum = recordPointer >>> 8 const recordPage = this.dataPages[pageNum * this.pageSize] if (!recordPage) return const parsedData = parseDataPageHeader(recordPage, this.version) if (recordOffset > parsedData.recordOffsets.length) return let start = parsedData.recordOffsets[recordOffset] if ((start & 0x8000) >>> 0) start = (start & 0xfff) >>> 0 else debug(`Overflow record flag is not present ${start}`) let record: Buffer if (recordOffset === 0) { record = recordPage.slice(start) } else { let end = parsedData.recordOffsets[recordOffset - 1] if (end & 0x8000 && (end & 0xff) !== 0) { end &= 0xfff } record = recordPage.slice(start, end) } return record } private parseFixedLengthData(originalRecord: Buffer, column: Column, nullTable: Array<boolean>) { const columnName = column.colNameStr let parsedType: PossibleTypes if (column.type === DataType.Boolean) { if (column.columnID > nullTable.length) throw new Error( `Failed to parse bool field, Column not found in nullTable column: ${columnName}, column id: ${column.columnID}, nullTable: ${nullTable}` ) parsedType = nullTable[column.columnID] } else { if (column.fixedOffset > originalRecord.length) throw new Error(`Column offset is bigger than the length of the record ${column.fixedOffset}`) const record = originalRecord.slice(column.fixedOffset) parsedType = parseType(column.type, record, this.version, undefined, this.textEncoding, this.sanitizeTextBuffer) } if (this.parsedTable[columnName] === undefined) this.parsedTable[columnName] = [] this.parsedTable[columnName]!.push(parsedType) return parsedType } private parseDynamicLengthRecordsMetadata(reverseRecord: Buffer, originalRecord: Buffer, nullTableLength: number) { if (this.version > 3) { reverseRecord = reverseRecord.slice(nullTableLength + 1) if (reverseRecord.length > 1 && reverseRecord[0] === 0) reverseRecord = reverseRecord.slice(1) return parseRelativeObjectMetadataStruct(reverseRecord, undefined, this.version) } const variableLengthJumpTableCNT = Math.floor((originalRecord.length - 1) / 256) reverseRecord = reverseRecord.slice(nullTableLength) let relativeRecordMetadata: ReturnType<typeof parseRelativeObjectMetadataStruct> try { relativeRecordMetadata = parseRelativeObjectMetadataStruct(reverseRecord, variableLengthJumpTableCNT, this.version) relativeRecordMetadata.relativeMetadataEnd += nullTableLength } catch { throw new Error('Failed parsing record') } if (relativeRecordMetadata && relativeRecordMetadata.variableLengthFieldCount !== this.tableHeader.variableColumns) { const tmpBuffer = Buffer.allocUnsafe(2) tmpBuffer.writeUInt16LE(this.tableHeader.variableColumns) const metadataStart = reverseRecord.indexOf(tmpBuffer) if (metadataStart !== 1 && metadataStart < 10) { reverseRecord = reverseRecord.slice(metadataStart) try { relativeRecordMetadata = parseRelativeObjectMetadataStruct(reverseRecord, variableLengthJumpTableCNT, this.version) } catch { throw new Error(`Failed to parse record metadata: ${originalRecord}`) } relativeRecordMetadata.relativeMetadataEnd += metadataStart } else { debug( `Record did not parse correctly. Number of columns: ${this.tableHeader.variableColumns}. Number of parsed columns: ${relativeRecordMetadata.variableLengthFieldCount}` ) return } } return relativeRecordMetadata } private parseMemo(relativeObjData: Buffer) { debug(`Parsing memo field ${relativeObjData}`) const parsedMemo = MEMO.parse(relativeObjData) let memoData: Buffer let memoType: DataType if (parsedMemo.memoLength & 0x80000000) { debug('Memo data inline') memoData = relativeObjData.slice(parsedMemo.memoEnd) memoType = DataType.Text } else if (parsedMemo.memoLength & 0x40000000) { debug('LVAL type 1') const tmp = this.getOverflowRecord(parsedMemo.recordPointer) if (tmp === undefined) { throw new Error('LVAL type 1 memoData is undefined') } memoData = tmp memoType = DataType.Text } else { debug('LVAL type 2') const dataBlocks: Array<Buffer> = [] let { recordPointer } = parsedMemo while (recordPointer) { const record = this.getOverflowRecord(recordPointer) if (record === undefined) { throw new Error('LVAL type 2 memoData is undefined') } dataBlocks.push(record.subarray(4)) recordPointer = record.readInt32LE() } memoData = Buffer.concat(dataBlocks) memoType = DataType.Text } return parseType(memoType, memoData, memoData.length, this.version, this.textEncoding, this.sanitizeTextBuffer) } private parseDynamicLengthData( originalRecord: Buffer, relativeRecordMetadata: ReturnType<typeof parseRelativeObjectMetadataStruct>, relativeRecordsColumnMap: Dict<Column> ): void { const relativeOffsets = relativeRecordMetadata.variableLengthFieldOffsets let jumpTableAddition = 0 let i = -1 for (const columnIndex of Object.keys(relativeRecordsColumnMap)) { i += 1 const column = relativeRecordsColumnMap[columnIndex]! const colName = column.colNameStr if (this.version === 3) { if (relativeRecordMetadata.variableLengthJumpTable.includes(i)) jumpTableAddition = (jumpTableAddition + 0x100) >>> 0 } let relStart = relativeOffsets[i] let relEnd: number if (i + 1 === relativeOffsets.length) relEnd = relativeRecordMetadata.varLenCount else relEnd = relativeOffsets[i + 1] if (this.version > 3) { if (relEnd > originalRecord.length) relEnd = (relEnd & 0xff) >>> 0 if (relStart > originalRecord.length) relStart = (relStart & 0xff) >>> 0 } if (relStart === relEnd) { if (this.parsedTable[colName] === undefined) this.parsedTable[colName] = [] this.parsedTable[colName]!.push('') continue } const relativeObjData = originalRecord.slice(relStart + jumpTableAddition, relEnd + jumpTableAddition) let parsedType: PossibleTypes if (column.type === DataType.Memo) { try { parsedType = this.parseMemo(relativeObjData) } catch { debug(`Failed to parse memo field. Using data as bytes`) parsedType = relativeObjData.toString() } } else { parsedType = parseType( column.type, relativeObjData, relativeObjData.length, this.version, this.textEncoding, this.sanitizeTextBuffer ) } if (this.parsedTable[colName] === undefined) this.parsedTable[colName] = [] this.parsedTable[colName]!.push(parsedType) } } private parseRow(record: Buffer): void { const originalRecord = Buffer.allocUnsafe(record.length) record.copy(originalRecord) let reverseRecord = Buffer.allocUnsafe(record.length) record.copy(reverseRecord) reverseRecord = reverseRecord.reverse() const nullTableLen = Math.floor((this.tableHeader.columnCount + 7) / 8) const nullTable: Array<boolean> = [] if (nullTableLen && nullTableLen < originalRecord.length) { const nullTableBuffer = record.slice(nullTableLen === 0 ? 0 : record.length - nullTableLen) debug(record.slice(record.length - nullTableLen)) for (let i = 0; i < nullTableBuffer.length; i++) { const byte = nullTableBuffer[i] for (let j = 0; j < 8; j++) { if ((byte & (1 << j)) === 0) nullTable.push(false) else nullTable.push(true) } } } else { debug(`Failed to parse null table column count ${this.tableHeader.columnCount}`) return } if (this.version > 3) record = record.slice(2) else record = record.slice(1) const relativeRecordsColumnMap: Dict<Column> = {} for (const i of Object.keys(this.columns)) { const column = this.columns[i]! if (!column.columnFlags.fixedLength) { relativeRecordsColumnMap[i] = column continue } this.parseFixedLengthData(record, column, nullTable) } if (relativeRecordsColumnMap) { const metadata = this.parseDynamicLengthRecordsMetadata(reverseRecord, originalRecord, nullTableLen) if (metadata === undefined) return this.parseDynamicLengthData(originalRecord, metadata, relativeRecordsColumnMap) } } public parse() { if (!this.table.linkedPages) return this.createEmptyTable() for (const dataChunk of this.table.linkedPages) { const originalData = dataChunk const parsedData = parseDataPageHeader(originalData, this.version) let lastOffset: number | undefined for (const recOffset of parsedData.recordOffsets) { if ((recOffset & 0x8000) >>> 0) { lastOffset = (recOffset & 0xfff) >>> 0 continue } if ((recOffset & 0x4000) >>> 0) { const recPtrOffset = (recOffset & 0xfff) >>> 0 lastOffset = recPtrOffset const overflowRecPtrBuffer = originalData.slice(recPtrOffset, recPtrOffset + 4) const overflowRecPtr = overflowRecPtrBuffer.readUInt32LE(0) const record = this.getOverflowRecord(overflowRecPtr) if (record !== undefined) this.parseRow(record) continue } let record: Buffer if (!lastOffset) record = originalData.slice(recOffset) else record = originalData.slice(recOffset, lastOffset) lastOffset = recOffset if (record) this.parseRow(record) } } return this.parsedTable } }