UNPKG

pdf-parse-test

Version:

Pure TypeScript, cross-platform module for extracting text, images, and tabular data from PDFs. Run directly in your browser or in Node!

180 lines 6.1 kB
var RelativeDirections; (function (RelativeDirections) { RelativeDirections[RelativeDirections["None"] = 0] = "None"; RelativeDirections[RelativeDirections["Left"] = 1] = "Left"; RelativeDirections[RelativeDirections["Right"] = 2] = "Right"; RelativeDirections[RelativeDirections["Top"] = 3] = "Top"; RelativeDirections[RelativeDirections["Bottom"] = 4] = "Bottom"; })(RelativeDirections || (RelativeDirections = {})); export class Rectangle { id; x; y; width; height; x2; y2; text; constructor(id, x, y, width, height) { this.id = id; this.x = x; this.y = y; this.width = width; this.height = height; this.x2 = x + width; this.y2 = y + height; this.text = ''; } toString() { return `${this.id} ${this.text}`; } tryAddText(item) { const x = item.transform[4]; const y = item.transform[5]; const isInside = x >= this.x && y >= this.y && x <= this.x2 && y <= this.y2; if (isInside) { if (item.str?.length === 0 && this.text.length === 0) { return true; } this.text += `${item.str}${item.hasEOL ? '\n' : ''}`; return true; } return false; } isNeighbour(rect, distance = 1) { const result = RelativeDirections.None; const heightOk = Math.abs(this.height - rect.height) < distance; const yOk = Math.abs(this.y - rect.y) < distance; if (heightOk && yOk) { const isLeft = Math.abs(this.x - rect.x2) < distance; if (isLeft) return RelativeDirections.Left; const isRight = Math.abs(this.x2 - rect.x) < distance; if (isRight) return RelativeDirections.Right; } const widthOk = Math.abs(this.width - rect.width) < distance; const xOk = Math.abs(this.x - rect.x) < distance; if (widthOk && xOk) { const isTop = Math.abs(this.y - rect.y2) < distance; if (isTop) return RelativeDirections.Top; const isBottom = Math.abs(this.y2 - rect.y) < distance; if (isBottom) return RelativeDirections.Bottom; } return result; } } export class Table { grid; minTableX1 = Number.MAX_VALUE; minTableY1 = Number.MAX_VALUE; maxTableX2 = Number.MIN_VALUE; maxTableY2 = Number.MIN_VALUE; constructor(rect) { this.grid = [[rect]]; } _cellCount = -1; get cellCount() { if (this._cellCount > -1) { return this._cellCount; } for (const row of this.grid) { this._cellCount += row.length; } return this._cellCount; } get width() { return this.maxTableX2 - this.minTableX1; } get height() { return this.maxTableY2 - this.minTableY1; } static tryAddText(pageTables, item) { for (const table of pageTables) { if (table.cellCount < 3) continue; if (table.isInside(item)) { for (const row of table.grid) { for (const rectangle of row) { const res = rectangle.tryAddText(item); if (res) return true; } } } } return false; } static addRectangle(pageTables, rect) { for (const table of pageTables) { for (let rowIndex = 0; rowIndex < table.grid.length; rowIndex++) { const row = table.grid[rowIndex]; for (let colIndex = 0; row && colIndex < row.length; colIndex++) { const currentRect = row[colIndex]; const dir = currentRect?.isNeighbour(rect); if (dir === RelativeDirections.Right) { row.push(rect); return true; } if (dir === RelativeDirections.Bottom) { const bottomRow = table.grid[rowIndex + 1]; if (bottomRow === undefined) { const newRow = [rect]; table.grid.push(newRow); return true; } } if (dir === RelativeDirections.Left || dir === RelativeDirections.Top) { // TODO remove // debugger; } } } } pageTables.push(new Table(rect)); return true; } getTableArray() { const result = []; for (const row of this.grid) { const rowStr = []; for (const rect of row) { rowStr.push(rect.text.trim()); } result.push(rowStr); } return result; } initMinMax() { const firstRow = this.grid[0]; const lastRow = this.grid[this.grid.length - 1]; if (firstRow === undefined || lastRow === undefined) { throw new Error('malformed table'); } const firstRect = firstRow[0]; const lastRect = lastRow[lastRow.length - 1]; if (firstRect === undefined || lastRect === undefined) { throw new Error('malformed table'); } this.minTableX1 = firstRect.x; this.minTableY1 = firstRect.y; this.maxTableX2 = lastRect.x2; this.maxTableY2 = lastRect.y2; } isInside(item) { const x = item.transform[4]; const y = item.transform[5]; return x >= this.minTableX1 && y >= this.minTableY1 && x <= this.maxTableX2 && y <= this.maxTableY2; } toString() { const result = []; for (const row of this.grid) { const rowStr = row.map((i) => i.text).join('\t'); result.push(rowStr); } return result.join('\n'); } } //# sourceMappingURL=TableUtil.js.map