@briancullen/aws-textract-parser
Version:
Library for converting AWS Textract responses into a more usable structure.
98 lines (97 loc) • 4.54 kB
TypeScript
import { AWSError, Textract } from 'aws-sdk';
import { DocumentFactory } from './factory';
import { Document } from './types';
/**
* Callback signature to be implemented by code wishing to use callbacks to process
* the data returned by Textract.
*/
export declare type ParsedDetectTextCallback = (err: AWSError | null, data: Document | null) => void;
/**
* Callback signature used by AWS to return data from Textract.
*/
export declare type TextractDetectTextCallback = (err: AWSError | null, data: Textract.Types.DetectDocumentTextResponse | null) => void;
/**
* Unified type to accept responses from either the synchronous or asynchronous detect text operations.
*/
export declare type TextractDetectTextResponse = Textract.DetectDocumentTextResponse & Textract.GetDocumentTextDetectionResponse;
/**
* This class provides methods to process the information returned by Textract into
* a tree structure that is easier to work with.
*
* A single instance of this class is automatically created for you and provided as
* the default export from this library.
*/
export declare class TextractParser {
private readonly factory;
/** @hidden */
constructor(factory: DocumentFactory);
/**
* Method that parses the textract response synchronously.
*
* For example it can also be used as part of processing the result of a promise
* as shown below.
*
* ```typescript
* textract.detectDocumentText(request).promise()
* .then(data => textractParser.parseDetectTextResponse(data))
* .then(parsedData => console.log(parsedData))
* .catch(err => console.log(err))
* ```
*
* **NOTE**: If used to process GetDocumentTextDetectionResponse response then all data should be
* contained within a single response. If a NextToken is detected on the response then null will
* be returned. See [[parseGetTextDetection]] for a helper method which will aggregate the
* responses from the GetDocumentTextDetection operation.
*
* @param response the response object returned from Textract or null if the response is incomplete
* @returns Document that acts as the root node for the processed tree
*/
parseDetectTextResponse(response: TextractDetectTextResponse): Document | null;
/**
* Method that acts as a proxy for the standard Textract callback.
*
* This proxy will process the data returned by Textract and call the provided callback
* with the processed information. It can be invoked as shown, where myCallback is written
* by the user of the library.
*
* ```typescript
* textract.detectDocumentText(request, textractParser.parseDetectTextCallback(myCallback))
* ```
*
* @param callback the callback to be invoked with the processed data or error
* @returns callback function that can be used with the AWS Textract invocation
*/
parseDetectTextCallback(callback: ParsedDetectTextCallback): TextractDetectTextCallback;
/**
* Method that retrieves the result of a asynchronous document text detection operation
* (which may require multiple requests to AWS) and produces a tree of the results.
*
* An example of how to use this method is shown below.
*
* ```typescript
* const jobId = 'your-job-id'
* const client = new AWS.Textract()
*
* textract.detectDocumentText(client, jobId)
* .then(parsedData => console.log(parsedData))
* .catch(err => console.log(err))
* ```
*
* If the specified Textract job is not marked as SUCCEEDED or the AWS operations fail
* to return the results then the Promise will be rejected.
*
* **NOTE**: This method will try and retrieve all the results for the Textract job and
* process them in memory. For extremely large documents then memory may become an issue.
*
* @param client the AWS client to use for retrieving the Textract results
* @param jobId the id of the Textract job for which we want to parse the results
* @returns Promise for a document that acts as the root node for the processed tree
*/
parseGetTextDetection(textract: Textract, jobId: string): Promise<Document>;
/** @hidden */
private getGetTextDetectionResponse;
}
export { Document, PageBlock, LineBlock, WordBlock, Block, Blocks, BlockType, DocumentMetadata } from './types';
export { Geometry, BoundaryBox, Polygon, Point } from './model/Geometry';
declare const _default: TextractParser;
export default _default;