UNPKG

@briancullen/aws-textract-parser

Version:

Library for converting AWS Textract responses into a more usable structure.

141 lines (140 loc) 4.94 kB
import { Geometry } from './model/Geometry'; /** * Interface that descibes the properties implemented * by all blocks with the exception of the document block type. */ export interface BaseBlockProperties { /** Unique id used to identify a block */ readonly id: string; /** Location information identifying where the block can be found */ readonly geometry: Geometry; } /** * Interface that is implemented by blocks that include * recognised text. */ export interface TextProvider { /** The text recognised for this block */ readonly text: string; /** The confidence (as a precentage) that the text has * been identified correctly */ readonly confidence: number; } /** * Interface implemented by all nodes that make up the parsed tree. * @typeparam P type of the parent of this node * @typeparam C type of the children of this node */ export interface TreeNode<P extends Block, C extends Block> { /** * Get the parent node of this node * @returns the parent of this node or undefined if this is the root node */ parent(): P | undefined; /** * Gets the children of this node * @returns an array of children or an empty array if this is a leaf node */ children(): C[]; /** * Determines whether or not the specified node is a child of this node. * @param item the item to look for * @returns true if the item is a child of this node, false otherwise */ hasChild(item: C): boolean; } /** * Interface describing information made available about the document * that has been processed. Currently only one piece of information * is provided. */ export interface DocumentMetadata { /** Number of pages in the processed document */ readonly pages: number; } /** * Interface that provides access to information about Word blocks. * If present word blocks are always the leaves of the tree, that is * they never have any child node. * * In the current implementation the parent of a Word block is * always a line block. */ export interface WordBlock extends BaseBlockProperties, TextProvider, TreeNode<LineBlock, Block> { /** * Constant indicating the type of Block. Can be used for type descrimination, * see [[BlockType]] for more details. */ readonly blockType: BlockType.Word; } /** * Interface that provides access to information about Line blocks. * Line blocks are always children of the Page block and represent a * single line of text in the processed document. * * The line is further split into words what are made into Word * blocks and are added as children to a Line block. */ export interface LineBlock extends BaseBlockProperties, TextProvider, TreeNode<PageBlock, WordBlock> { /** * Constant indicating the type of Block. Can be used for type descrimination, * see [[BlockType]] for more details. */ readonly blockType: BlockType.Line; } /** * Interface that provides access to information about Page blocks. * Page blocks act as the root of all information processed from * a single page in the document. The parent of the Page block is * always a document block. * * Pages are broken down into a number of lines. The line blocks * that are created to represent these lines form the children * of a page block. */ export interface PageBlock extends BaseBlockProperties, TreeNode<Document, LineBlock> { /** * Constant indicating the type of Block. Can be used for type descrimination, * see [[BlockType]] for more details. */ readonly blockType: BlockType.Page; } /** * Interface that provides access to information about Document blocks. * The Document block always forms the root of the tree and there is * only one Document block per Textract response. * * The Document block will have one Page block child for each page * in the processed document. */ export interface Document extends TreeNode<Block, PageBlock> { /** * Constant indicating the type of Block. Can be used for type descrimination, * see [[BlockType]] for more details. */ readonly blockType: BlockType.Document; /** Provides metadata regarding the processed document */ readonly metadata: DocumentMetadata; } /** Union type for all the different possible block types */ export declare type Block = Document | PageBlock | LineBlock | WordBlock; /** * Type alias used as shorthand for an array of block of a particular type. * @typeparam T the type of block to be stored in the array */ export declare type Blocks<T extends Block> = T[]; /** * Enum used to identify the different types of block that can * be in the returned tree */ export declare enum BlockType { /** Indicates a Word Block */ Word = "WORD", /** Indicates a Line Block */ Line = "LINE", /** Indicates a Page Block */ Page = "PAGE", /** Indicates a Document Block */ Document = "DOCUMENT" }