UNPKG

accessdb-parser

Version:

A pure javascript Microsoft AccessDB files (.mdb, .accdb) parser

489 lines (478 loc) 17.8 kB
import { categorizePages, DataType, parseType, } from "./utils"; import { ACCESSHEADER, TDEF_HEADER, MEMO, parseDataPageHeader, parseTableHead, parseTableData, parseRelativeObjectMetadataStruct, } from "./parsing-primitives"; import { Dico } from "./types"; const PAGE_SIZE_V3 = 0x800; const PAGE_SIZE_V4 = 0x1000; // Versions const VERSION_3 = 0x00; const VERSION_4 = 0x01; const VERSION_5 = 0x02; const VERSION_2010 = 0x03; enum ALL_VERSIONS { VERSION_3 = 3, VERSION_4 = 4, VERSION_5 = 5, VERSION_2010 = 2010, } const NEW_VERSIONS = [VERSION_4, VERSION_5, VERSION_2010]; const SYSTEM_TABLE_FLAGS = [-0x80000000, -0x00000002, 0x80000000, 0x00000002]; class TableObject { public value: Buffer; // private offset: number; public linkedPages: Array<Buffer> = []; public constructor(_offset: number, value: Buffer) { this.value = value; // this.offset = offset; this.linkedPages = []; } } type Line = Array<string>; interface Table { fields: Line; lines: Array<Line>; } export class AccessParser { private dbData: Buffer; private tableDefs: Dico<Buffer>; private dataPages: Dico<Buffer>; // private allPages: Dico<Buffer>; private tablesWithData: Dico<TableObject>; private version = ALL_VERSIONS.VERSION_3; private pageSize = PAGE_SIZE_V3; private catalog: Dico<number>; public constructor(dbData: Buffer) { this.dbData = dbData; this.parseFileHeader(); [this.tableDefs, this.dataPages, /*this.allPages*/] = categorizePages(this.dbData, this.pageSize); this.tablesWithData = this.linkTablesToData(); this.catalog = this.parseCatalog(); } private parseFileHeader(): void { let head: ReturnType<typeof ACCESSHEADER.parse>; try { head = ACCESSHEADER.parse(this.dbData); } catch { throw new Error("Failed to parse DB file header. Check it is a valid file header"); } const version = head.jetVersion; if (NEW_VERSIONS.includes(version)) { if (version === VERSION_4) this.version = ALL_VERSIONS.VERSION_4; else if (version === VERSION_5) this.version = ALL_VERSIONS.VERSION_5; else if (version === VERSION_2010) this.version = ALL_VERSIONS.VERSION_2010 this.pageSize = PAGE_SIZE_V4; } else if (version !== VERSION_3) { throw new Error(`Unknown database version ${version} Trying to parse database as version 3`); } } private linkTablesToData(): Dico<TableObject> { const tablesWithData: Dico<TableObject> = {}; for (const i of Object.keys(this.dataPages)) { const data = this.dataPages[i]!; let parsedDP: ReturnType<typeof parseDataPageHeader>; try { parsedDP = parseDataPageHeader(data, this.version) } catch { console.error(`Failed to parse data page ${data}`); continue; } const pageOffset = parsedDP.owner * this.pageSize; if (Object.keys(this.tableDefs).map(str => parseInt(str)).includes(pageOffset)) { const tablePageValue = this.tableDefs[pageOffset]!; if (!Object.keys(tablesWithData).includes(pageOffset.toString())) tablesWithData[pageOffset] = new TableObject(pageOffset, tablePageValue); tablesWithData[pageOffset]!.linkedPages.push(data); } } return tablesWithData; } private parseCatalog() { const catalogPage = this.tablesWithData[2 * this.pageSize]!; const accessTable = new AccessTable(catalogPage, this.version, this.pageSize, this.dataPages, this.tableDefs); const catalog = accessTable.parse(); const tablesMapping: Dico<number> = {}; let i = -1; const names: Array<string> = catalog["Name"] as any; const types: Array<number> = catalog["Type"] as any; const flags: Array<number> = catalog["Flags"] as any; const ids: Array<number> = catalog["Id"] as any; if (names === undefined || types === undefined || flags === undefined || ids === undefined) throw new Error("The catalog is missing required fields"); for (const tableName of names) { if (typeof tableName !== "string") continue; i += 1; const tableType = 1; if (types[i] === tableType) { if (!SYSTEM_TABLE_FLAGS.includes(flags[i]) && flags[i] === 0) { // TODO: CHECK IF 0 IS THE RIGHT FLAG TO SET // console.log(tableName); // console.log(flags[i]); tablesMapping[tableName] = ids[i]; } } } return tablesMapping; } private parseTableUnformatted(tableName: string) { let tableOffset = this.catalog[tableName]; if (tableOffset === undefined) throw new Error(`Could not find table ${tableName} in Database`); tableOffset *= this.pageSize; let table = this.tablesWithData[tableOffset]; if (table === undefined) { const tableDef = this.tableDefs[tableOffset]; if (tableDef === undefined) { throw new Error(`Could not find table ${tableName} offset ${tableOffset}`); } else { throw new Error("Empty table") // table = new TableObject(tableOffset, tableDef); } } const accessTable = new AccessTable(table, this.version, this.pageSize, this.dataPages, this.tableDefs); return accessTable.parse(); } public parseTable(name: string): Table { const table = this.parseTableUnformatted(name); const fields = Object.keys(table); if (fields.length === 0) { return { fields: [], lines: [] }; } const linesNumber = table[fields[0]]!.length; const lines: Array<Line> = []; for (let i = 0; i < linesNumber; ++i) { const line: Array<string> = []; for (const field of fields) line.push(table[field]![i].toString()); lines.push(line); } return { fields, lines }; } public getTables() { return Object.keys(this.catalog); } public getVersion(): number { return this.version; } } type PropType<TObj, TProp extends keyof TObj> = TObj[TProp]; type Column = PropType<ReturnType<typeof parseTableData>, "column">[0] & { colNameStr: string }; type TableHeader = ReturnType<typeof parseTableHead>; class AccessTable { private version: ALL_VERSIONS; private pageSize: number; private dataPages: Dico<Buffer>; private tableDefs: Dico<Buffer>; private table: TableObject; private parsedTable: Dico<Array<string | number | boolean>>; private columns: Dico<Column>; private tableHeader: TableHeader; public constructor(table: TableObject, version: ALL_VERSIONS, pageSize: number, dataPages: Dico<Buffer>, tableDefs: Dico<Buffer>) { this.version = version this.pageSize = pageSize; this.dataPages = dataPages this.tableDefs = tableDefs; this.table = table this.parsedTable = {}; [this.columns, this.tableHeader] = this.getTableColumns(); } private getTableColumns(): [Dico<Column>, TableHeader] { let tableHeader: TableHeader; let colNames: PropType<ReturnType<typeof parseTableData>, "columnNames">; let columns: Array<Column>; try { tableHeader = parseTableHead(this.table.value, this.version); let mergedData = this.table.value.slice(tableHeader.tDefHeaderEnd); if (tableHeader.TDEF_header.nextPagePtr) { mergedData = Buffer.concat([ mergedData, this.mergeTableData(tableHeader.TDEF_header.nextPagePtr), ]); } const parsedData = parseTableData( mergedData, tableHeader.realIndexCount, tableHeader.columnCount, this.version, ); columns = parsedData.column as any; colNames = parsedData.columnNames; // REMOVE FOR NOW // (tableHeader as any).column = parsedData.column; // (tableHeader as any).columnNames = parsedData.columnNames; } catch (err) { throw new Error(`Failed to parse table header`); } // const colNames = tableHeader.columnNames; // const columns = tableHeader.column; columns.forEach((c, i) => { c.colNameStr = colNames[i].colNameStr; }); const offset = Math.min(...columns.map(c => c.columnIndex)); const columnDict: Dico<Column> = {}; for (const x of columns) columnDict[x.columnIndex - offset] = x; if (Object.keys(columnDict).length !== columns.length) { for (const x of columns) columnDict[x.columnID] = x; } if (Object.keys(columnDict).length !== tableHeader.columnCount) throw new Error(`Expected ${tableHeader.columnCount} columns got ${Object.keys(columnDict).length}`); return [columnDict, tableHeader]; } private mergeTableData(firstPage: number): Buffer { let table = this.tableDefs[firstPage * this.pageSize]!; let parsedHeader = TDEF_HEADER.parse(table); let data = table.slice(parsedHeader.headerEnd); while (parsedHeader.nextPagePtr) { table = this.tableDefs[parsedHeader.nextPagePtr * this.pageSize]!; parsedHeader = TDEF_HEADER.parse(table); data = Buffer.concat([data, table.slice(parsedHeader.headerEnd)]); } return data; } private createEmptyTable() { const parsedTable: Dico<Array<string | number | boolean>> = {}; const [columns,] = this.getTableColumns(); for (let i of Object.keys(columns)) { const column = columns[i]!; parsedTable[column.colNameStr] = []; } return parsedTable; } private getOverflowRecord(recordPointer: number): Buffer | undefined { const recordOffset = (recordPointer & 0xFF) >>> 0; const pageNum = recordPointer >>> 8; const recordPage = this.dataPages[pageNum * this.pageSize]; if (!recordPage) return; const parsedData = parseDataPageHeader(recordPage, this.version); if (recordOffset > parsedData.recordOffsets.length) return; let start = parsedData.recordOffsets[recordOffset]; if ((start & 0x8000) >>> 0) start = (start & 0xFFF) >>> 0; else console.log(`Overflow record flag is not present ${start}`); let record: Buffer; if (recordOffset === 0) { record = recordPage.slice(start); } else { let end = parsedData.recordOffsets[recordOffset - 1]; if ((end & 0x8000) >>> 0) end = (end & 0xFFF) >>> 0; record = recordPage.slice(start, end); } return record; } private parseFixedLengthData(originalRecord: Buffer, column: Column, nullTable: Array<boolean>) { const columnName = column.colNameStr; let parsedType: boolean | string | number; if (column.type === DataType.Boolean) { if (column.columnID > nullTable.length) throw new Error(`Failed to parse bool field, Column not found in nullTable column: ${columnName}, column id: ${column.columnID}, nullTable: ${nullTable}`); parsedType = nullTable[column.columnID]; } else { if (column.fixedOffset > originalRecord.length) throw new Error(`Column offset is bigger than the length of the record ${column.fixedOffset}`); const record = originalRecord.slice(column.fixedOffset); parsedType = parseType(column.type, record, this.version); } if (this.parsedTable[columnName] === undefined) this.parsedTable[columnName] = []; this.parsedTable[columnName]!.push(parsedType); } private parseDynamicLengthRecordsMetadata(reverseRecord: Buffer, originalRecord: Buffer, nullTableLength: number) { if (this.version > 3) { reverseRecord = reverseRecord.slice(nullTableLength + 1); if (reverseRecord.length > 1 && reverseRecord[0] === 0) reverseRecord = reverseRecord.slice(1); return parseRelativeObjectMetadataStruct(reverseRecord, undefined, this.version); } const variableLengthJumpTableCNT = Math.floor((originalRecord.length - 1) / 256); reverseRecord = reverseRecord.slice(nullTableLength); let relativeRecordMetadata: ReturnType<typeof parseRelativeObjectMetadataStruct>; try { relativeRecordMetadata = parseRelativeObjectMetadataStruct(reverseRecord, variableLengthJumpTableCNT, this.version); relativeRecordMetadata.relativeMetadataEnd += nullTableLength; } catch { throw new Error("Failed parsing record"); } if (relativeRecordMetadata && relativeRecordMetadata.variableLengthFieldCount !== this.tableHeader.variableColumns) { const tmpBuffer = Buffer.allocUnsafe(2); tmpBuffer.writeUInt16LE(this.tableHeader.variableColumns); const metadataStart = reverseRecord.indexOf(tmpBuffer); if (metadataStart !== 1 && metadataStart < 10) { reverseRecord = reverseRecord.slice(metadataStart); try { relativeRecordMetadata = parseRelativeObjectMetadataStruct(reverseRecord, variableLengthJumpTableCNT, this.version); } catch { throw new Error(`Failed to parse record metadata: ${originalRecord}`); } relativeRecordMetadata.relativeMetadataEnd += metadataStart; } else { console.log(`Record did not parse correctly. Number of columns: ${this.tableHeader.variableColumns}. Number of parsed columns: ${relativeRecordMetadata.variableLengthFieldCount}`); return; } } return relativeRecordMetadata; } private parseMemo(relativeObjData: Buffer, column: Column): string | number | boolean { console.log(`Parsing memo field ${relativeObjData}`); const parsedMemo = MEMO.parse(relativeObjData); let memoData: Buffer; let memoType: DataType; if (parsedMemo.memoLength & 0x80000000) { console.log("Memo data inline"); memoData = relativeObjData.slice(parsedMemo.memoEnd); memoType = DataType.Text; } else if (parsedMemo.memoLength & 0x40000000) { console.log("LVAL type 1"); const tmp = this.getOverflowRecord(parsedMemo.recordPointer); if (tmp === undefined) throw new Error("LVAL type 1 memoData is undefined"); memoData = tmp; memoType = DataType.Text; } else { console.log("LVAL type 2"); console.log("memo lval type 2 currently not supported"); memoData = relativeObjData; memoType = column.type; } return parseType(memoType, memoData, memoData.length, this.version); } private parseDynamicLengthData( originalRecord: Buffer, relativeRecordMetadata: ReturnType<typeof parseRelativeObjectMetadataStruct>, relativeRecordsColumnMap: Dico<Column>, ): void { const relativeOffsets = relativeRecordMetadata.variableLengthFieldOffsets; let jumpTableAddition = 0; let i = -1; for (const columnIndex of Object.keys(relativeRecordsColumnMap)) { i += 1; const column = relativeRecordsColumnMap[columnIndex]!; const colName = column.colNameStr; if (this.version === 3) { if (relativeRecordMetadata.variableLengthJumpTable.includes(i)) jumpTableAddition = (jumpTableAddition + 0x100) >>> 0; } let relStart = relativeOffsets[i]; let relEnd: number; if (i + 1 === relativeOffsets.length) relEnd = relativeRecordMetadata.varLenCount; else relEnd = relativeOffsets[i + 1]; if (this.version > 3) { if (relEnd > originalRecord.length) relEnd = (relEnd & 0xFF) >>> 0; if (relStart > originalRecord.length) relStart = (relStart & 0xFF) >>> 0; } if (relStart === relEnd) { if (this.parsedTable[colName] === undefined) this.parsedTable[colName] = []; this.parsedTable[colName]!.push(""); continue; } const relativeObjData = originalRecord.slice(relStart + jumpTableAddition, relEnd + jumpTableAddition); let parsedType: string | number | boolean; if (column.type === DataType.Memo) { try { parsedType = this.parseMemo(relativeObjData, column); } catch { console.log(`Failed to parse memo field. Using data as bytes`); parsedType = relativeObjData.toString(); } } else { parsedType = parseType(column.type, relativeObjData, relativeObjData.length, this.version); } if (this.parsedTable[colName] === undefined) this.parsedTable[colName] = []; this.parsedTable[colName]!.push(parsedType); } } private parseRow(record: Buffer): void { const originalRecord = Buffer.allocUnsafe(record.length); record.copy(originalRecord); let reverseRecord = Buffer.allocUnsafe(record.length); record.copy(reverseRecord); reverseRecord = reverseRecord.reverse(); const nullTableLen = Math.floor((this.tableHeader.columnCount + 7) / 8); const nullTable: Array<boolean> = []; if (nullTableLen && nullTableLen < originalRecord.length) { const nullTableBuffer = record.slice(nullTableLen === 0 ? 0 : record.length - nullTableLen); for (let i = 0; i < nullTable.length * 8; ++i) nullTable.push(((nullTableBuffer[Math.floor(i / 8)]) & (1 << (i % 8) >>> 0) >>> 0) !== 0); // CHECK MOD } else { throw new Error(`Failed to parse null table column count ${this.tableHeader.columnCount}`); } if (this.version > 3) record = record.slice(2); else record = record.slice(1); const relativeRecordsColumnMap: Dico<Column> = {}; for (const i of Object.keys(this.columns)) { const column = this.columns[i]!; if (!column.columnFlags.fixedLength) { relativeRecordsColumnMap[i] = column; continue; } this.parseFixedLengthData(record, column, nullTable); } if (relativeRecordsColumnMap) { const metadata = this.parseDynamicLengthRecordsMetadata(reverseRecord, originalRecord, nullTableLen); if (metadata === undefined) return; this.parseDynamicLengthData(originalRecord, metadata, relativeRecordsColumnMap); } } public parse() { if (!this.table.linkedPages) return this.createEmptyTable(); for (const dataChunk of this.table.linkedPages) { const originalData = dataChunk; const parsedData = parseDataPageHeader(originalData, this.version); let lastOffset: number | undefined = undefined; for (const recOffset of parsedData.recordOffsets) { if ((recOffset & 0x8000) >>> 0) { lastOffset = (recOffset & 0xFFF) >>> 0; continue; } if ((recOffset & 0x4000) >>> 0) { const recPtrOffset = (recOffset & 0xFFF) >>> 0; lastOffset = recPtrOffset; const overflowRecPtrBuffer = originalData.slice(recPtrOffset, recPtrOffset + 4); const overflowRecPtr = overflowRecPtrBuffer.readUInt32LE(0); const record = this.getOverflowRecord(overflowRecPtr); if (record !== undefined) this.parseRow(record); continue; } let record: Buffer; if (!lastOffset) record = originalData.slice(recOffset); else record = originalData.slice(recOffset, lastOffset); lastOffset = recOffset; if (record) this.parseRow(record); } } return this.parsedTable; } }