@mescius/dspdfviewer
Version:
Document Solutions PDF Viewer
449 lines (448 loc) • 20.2 kB
TypeScript
import { SearchResult, FindOptions, PdfSearcherOptions } from "./types";
import PdfReportPlugin from "../plugin";
//@ts-ignore
import { PluginModel, SearchFeature } from "@grapecity/viewer-core";
import { IPdfSearcher } from "./IPdfSearcher";
import { IGcTextRect } from "../Models/GcMeasurementTypes";
import { ICustomHighlight } from "../HighlightManager/types";
/**
* PDF document searcher.
* Generates asynchronous search results (see search method).
* The GcPdfSearcher API was designed to be used internally with Search Panel UI.
**/
export declare class GcPdfSearcher implements IPdfSearcher {
readonly _plugin: PdfReportPlugin;
private _cancellation?;
private _extractTextPromises;
private _normalizedQuery;
private _pageContents;
private _pageContentsEndings;
_pageTextContents: any[];
_pageMatches: number[][];
_pageMatchesLength: number[][];
private _pageAcroFormResults;
allResults: SearchResult[];
private _pagesCount;
private _pdfDocument;
private _rawQuery;
private _state;
private _totalResultsCount;
private _totalResultsCountPromise;
private _firstSearchResult?;
private _selectedSearchResult?;
constructor(_plugin: PdfReportPlugin);
get viewer(): import("..").GcPdfViewer;
/**
* Retrieves the content of a specific page.
* @param {number} pageIndex - The index of the page.
* @returns {Promise<string>} The content of the specified page.
*/
fetchPageContent(pageIndex: number): Promise<string>;
/**
* Retrieves the text rectangles and styles of a specific page.
* @param {number} pageIndex - The index of the page.
* @returns {Promise<{ items: IGcTextRect[], styles: any[] }>} The text rectangles and styles of the specified page.
*/
fetchPageTextRects(pageIndex: number): Promise<{
items: IGcTextRect[];
styles: any[];
}>;
/**
* Retrieves the line endings of the content of a specific page.
* @param {number} pageIndex - The index of the page.
* @returns {Promise<{ [x: number]: boolean }>} The line endings of the content of the specified page.
*/
fetchPageContentLineEndings(pageIndex: number): Promise<{
[x: number]: boolean;
}>;
/**
* Highlights a specified portion of text on a given page.
*
* @param {number} pageIndex - The index of the page where the text is located (0-based).
* @param {number} startCharIndex - The starting character index (0-based) of the text segment to highlight.
* @param {number} endCharIndex - The ending character index (0-based) of the text segment to highlight.
* The character at this index will be excluded from the highlight.
* @param {Object} [args] - Optional parameters to customize the highlight, such as color, border color, and width.
* @param {string} [args.color='rgba(255, 255, 0, 0.5)'] - The fill color for the highlight, specified in `rgba`, `hex`, or named color format.
* Default is a semi-transparent yellow (`rgba(255, 255, 0, 0.5)` or `#FFFF00`) which provides a clear highlight.
* @param {string} [args.borderColor='rgba(255, 165, 0, 0.75)'] - The color of the highlight border, specified in `rgba`, `hex`, or named color format.
* Default is a semi-transparent orange (`rgba(255, 165, 0, 0.75)` or `#FFA500`) which provides a contrasting border.
* @param {number} [args.borderWidth=2] - The width of the highlight border in pixels (default is 2 pixels).
* @param {Function} [args.paintHandler] - A custom function for handling the painting of the highlight (optional).
* @param {boolean} [args.clearPrevious=false] - If `true`, removes existing highlights before applying the new one.
*
* @returns {Promise<boolean>} A promise that resolves to `true` if the text was successfully highlighted, otherwise `false`.
*
* @example
* ```javascript
* // Highlight the text from character 10 to character 20 on the first page with custom highlight colors.
* viewer.highlightTextSegment(0, 10, 20, {
* color: 'rgba(173, 216, 230, 0.5)', // semi-transparent light blue
* borderColor: 'blue', // named color for the border
* borderWidth: 4, // custom border width
* clearPrevious: true
* });
* ```
*/
createHighlightFromTextSegment(pageIndex: number, startCharIndex: number, endCharIndex: number, args: {
color?: string;
borderColor?: string;
borderWidth?: number;
paintHandler?: any;
}): Promise<ICustomHighlight | null>;
/**
* Toggles the visibility of the Search UI.
* @param {boolean} [forceExpand=true] - Indicates whether to force expanding the Search UI. Default is true.
* @param {boolean} [replaceMode=false] - Enables the replace mode in the Search UI. Default is false.
*/
toggle(forceExpand?: boolean, replaceMode?: boolean): void;
/**
*
* @ignore Exclude from docs
*/
close(): void;
updateUIState(state: any, previous: any, matchesCount: any): void;
/**
* Issues the next search result, primarily used internally by the PDF Searcher.
*
* @param result - The search result to be processed.
* @param cancellation - The cancellation token to handle the possibility of cancellation.
*
* @returns A Promise that resolves with the processed search result.
*/
nextSearchResult(result: SearchResult, cancellation: any): Promise<SearchResult>;
/**
* Checks whether a specific search result is currently selected.
*
* @param result - The search result to be checked for selection.
*
* @returns A boolean indicating whether the provided search result is currently selected.
*/
isResultSelected(result: SearchResult): boolean;
/**
* Retrieves non-empty searcher options.
*
* @returns The current state of the PDF searcher options, ensuring that it is a valid object.
*/
get state(): PdfSearcherOptions;
/**
* Gets highlightAll option.
**/
get highlightAll(): boolean;
/**
* Sets highlightAll option.
**/
set highlightAll(checked: boolean);
/**
* Internal property.
* @ignore
**/
get findController(): any;
/**
* Generates a unique hash ID for the given search result.
*
* @param {SearchResult} searchResult - The search result object to generate a hash ID for.
* @returns {string} A unique hash ID based on the properties of the search result.
*/
hashSearchResultId(searchResult: SearchResult): string;
/**
* Render highlight for the current search result.
**/
applyHighlight(): void;
/**
* Clear search results.
* This method must be called when the SearchPanel is closed.
**/
resetResults(): void;
/**
* Repaint highlight for visible pages.
**/
updateAllPages(): void;
/**
* Gets selected search result.
**/
get selectedSearchResult(): SearchResult | undefined;
/**
* Gets selected search result index. Returns -1 if nothing is selected.
**/
get selectedSearchResultIndex(): number;
getSearchResultByIndex(i: number): SearchResult;
/**
* Gets total search results count.
**/
get totalResultsCount(): number;
/**
* Gets total search results count promise.
**/
get totalResultsCountPromise(): Promise<number> | null;
/**
* Asynchronously generates search results based on the provided search options.
* @example
* ```javascript
* // Highlight all search results without opening SearchPanel.
* const searchIterator = viewer.searcher.search({ Text: "test", MatchCase: true, HighlightAll: true });
* searchIterator.next();
* searcher.applyHighlight();
* ```
* @example
* ```javascript
* // Iterate all search results
* const searcher = viewer.searcher;
* var searchResults = [];
* const searchIterator = searcher.search({ Text: textToSearch, MatchCase: true });
* var searchResult = await searchIterator.next();
* if (searchResult.value)
* searcher.highlight(searchResult.value)
* while (searchResult.value && !searchResult.done) {
* const searchResultValue = searchResult.value;
* searchResults.push(`index: ${searchResultValue.ItemIndex}, text: ${searchResultValue.DisplayText}, pageIndex: ${searchResultValue.PageIndex}`);
* searchResult = await searchIterator.next();
* }
* console.log("Search results: " + (searchResults.length ? searchResults.join("; ") : "No search results"));
* ```
* @example
* ```javascript
* // Open the document, find the text 'wildlife' and highlight the first result:
* async function loadPdfViewer(selector) {
* var viewer = new DsPdfViewer(selector, { restoreViewStateOnLoad: false });
* viewer.addDefaultPanels();
* var afterOpenPromise = new Promise((resolve)=>{ viewer.onAfterOpen.register(()=>{ resolve(); }); });
* await viewer.open('wetlands.pdf');
* await afterOpenPromise;
* var findOptions = { Text: 'wildlife' };
* var searchIterator = await viewer.searcher.search(findOptions);
* var searchResult = await searchIterator.next();
* viewer.searcher.cancel();
* viewer.searcher.highlight(searchResult.value);
* }
* loadPdfViewer('#root');
* ```
* @example
* // Open the document, find the text 'wildlife' and print search results to the console:
* ```javascript
* async function loadPdfViewer(selector) {
* var viewer = new DsPdfViewer(selector);
* viewer.addDefaultPanels();
* await viewer.open('wetlands.pdf');
* await (new Promise((resolve)=>{
* viewer.onAfterOpen.register(()=>{
* resolve();
* });
* }));
* var findOptions = {
* Text: 'wildlife',
* MatchCase: true,
* WholeWord: true,
* StartsWith: false,
* EndsWith: false,
* Wildcards: false,
* Proximity: false,
* SearchBackward: false,
* HighlightAll: true
* };
* var searcher = viewer.searcher;
* var searchIterator = await searcher.search(findOptions);
* var resultsCount = 0;
* var searchResult;
* do {
* searchResult = await searchIterator.next();
* if (searchResult.value) {
* // this could be either result or progress message (ItemIndex < 0)
* if(searchResult.value.ItemIndex >= 0) {
* console.log('next search result:');
* console.log(searchResult.value);
* resultsCount++;
* } else {
* const pageCount = _doc.pageCount.totalPageCount || _doc.pageCount.renderedSoFar;
* console.log('search progress, page index is ' + searchResult.value.PageIndex);
* }
* }
* else {
* console.log("Search completed");
* break;
* }
* }
* while(!searchResult.done);
* console.log('Total results count is ' + resultsCount);
* }
* ```
* @param options - Search options to customize the search.
* @returns An asynchronous iterable iterator that yields search results.
*/
//@ts-ignore
search(options: FindOptions): AsyncIterableIterator<SearchResult>;
/**
* Navigates to a page containing the result and highlights found text.
* @param searchResult
* @example
* ```javascript
* // Open the document, find the text 'wildlife' and highlight the first result:
* async function loadPdfViewer(selector) {
* var viewer = new GcPdfViewer(selector, { restoreViewStateOnLoad: false });
* viewer.addDefaultPanels();
* var afterOpenPromise = new Promise((resolve)=>{ viewer.onAfterOpen.register(()=>{ resolve(); }); });
* await viewer.open('wetlands.pdf');
* await afterOpenPromise;
* var findOptions = { Text: 'wildlife' };
* var searchIterator = await viewer.searcher.search(findOptions);
* var searchResult = await searchIterator.next();
* viewer.searcher.cancel();
* viewer.searcher.highlight(searchResult.value);
* }
* loadPdfViewer('#root');
* ```
**/
highlight(searchResult: SearchFeature.SearchResult | null, pageIndex?: number): Promise<void>;
/**
* Cancel search task.
* @example
* ```javascript
* // Open the document, find the text 'wildlife' and highlight the first result:
* async function loadPdfViewer(selector) {
* var viewer = new GcPdfViewer(selector, { restoreViewStateOnLoad: false });
* viewer.addDefaultPanels();
* var afterOpenPromise = new Promise((resolve)=>{ viewer.onAfterOpen.register(()=>{ resolve(); }); });
* await viewer.open('wetlands.pdf');
* await afterOpenPromise;
* var findOptions = { Text: 'wildlife' };
* var searchIterator = await viewer.searcher.search(findOptions);
* var searchResult = await searchIterator.next();
* viewer.searcher.cancel();
* viewer.searcher.highlight(searchResult.value);
* }
* loadPdfViewer('#root');
* ```
**/
cancel(): void;
_initialize(): void;
/**
* @return {string} The (current) normalized search query.
*/
get _query(): string;
/**
* Extract all text from pdf document once.
* */
_extractText(): void;
_reset(): void;
/**
* Helper for multi-term search that fills the `matchesWithLength` array
* and handles cases where one search term includes another search term (for
* example, "tamed tame" or "this is"). It looks for intersecting terms in
* the `matches` and keeps elements with a longer match length.
*/
_prepareMatches(matchesWithLength: any, matches: any, matchesLength: any): void;
/**
* Determine if the search query constitutes a "whole word", by comparing the
* first/last character type with the preceding/following character type.
*/
_isEntireWord(content: any, lineEndings: {
[x: number]: boolean;
}, startIdx: any, length: any): boolean;
_isStartsWith(content: any, lineEndings: {
[x: number]: boolean;
}, startIdx: any, _length: any): boolean;
_isEndsWith(content: any, lineEndings: {
[x: number]: boolean;
}, startIdx: any, length: any): boolean;
_findPhraseMathIndex(pageContent: string, query: string, startIndex: number, lineEndings: {
[x: number]: boolean;
}): {
matchIdx: number;
queryLen: number;
} | null;
/**
* Calculates the phrase matches in the given page content based on the specified query and matching options.
* @param {string} query - The query string to search for.
* @param {Object.<number, boolean>} pageContentsEndings - An object representing the line endings on the page, where keys are character indices and values indicate line endings.
* @param {string} pageContent - The content of the page.
* @param {boolean} entireWord - Whether to match the entire word only.
* @param {boolean} startsWith - Whether the query should match phrases that start with the query string.
* @param {boolean} endsWith - Whether the query should match phrases that end with the query string.
* @param {boolean} wildcards - Whether the query allows wildcards for matching.
* @returns {Object} An object containing the matches and their lengths.
* @returns {number[]} matches - An array of character indices (character indices) on the page where matches were found.
* @returns {number[]} matchesLength - An array of lengths for each matched text.
*/
_calculatePhraseMatch(query: string, pageContentsEndings: {
[x: number]: boolean;
}, pageContent: string, entireWord: boolean, startsWith: boolean, endsWith: boolean, wildcards: boolean): {
matches: number[];
matchesLength: number[];
};
findLineEndIndex(startIndex: number, pageContent: string, lineEndings: {
[x: number]: boolean;
}): number;
_calculateWordMatch(query: string, pageContentsEndings: {
[x: number]: boolean;
}, pageContent: any, entireWord: any, startsWith: any, endsWith: any, wildcards: any): {
matches: number[];
matchesLength: number[];
};
_calculateMatch(pageContent: string, pageContentsEndings: {
[x: number]: boolean;
}): {
matches: number[];
matchesLength: number[];
};
private handleProximitySearch;
/**
* Validates proximity conditions based on the number of words between matches.
*
* @param uniqueMatches - Array of unique matches containing matchIndex, matchLength, and wordsBetweenCount.
* @param proximityCounts - Array of proximity counts specifying the maximum allowed distance between words.
* @returns {boolean} - Returns true if all proximity conditions are satisfied, otherwise false.
*/
private validateProximityConditions;
private collectMatchesForOrder;
/**
* Extracts a substring from `pageContent` between `startInd` and `endInd`, and optionally fills line endings with spaces
* based on `pageContentsEndings` mapping.
*
* @param {number} startInd - The start index of the substring in `pageContent`.
* @param {number} endInd - The end index of the substring in `pageContent`.
* @param {string} pageContent - The full content of the page from which the substring is extracted.
* @param {{ [x: number]: boolean }} pageContentsEndings - A mapping indicating where line endings occur.
* @param {boolean} [fillLineEndingsWithSpaces=true] - Whether to fill line endings with spaces if they don't already exist.
*
* @returns {string} The substring with optional spaces inserted at line breaks.
*/
getContentSnippet(startInd: number, endInd: number, pageContent: string, pageContentsEndings: {
[x: number]: boolean;
}, fillLineEndingsWithSpaces?: boolean): string;
_countWordsBetweenTerms(startInd: number, endInd: number, pageContent: string, pageContentsEndings: {
[x: number]: boolean;
}): number;
/**
* Deduplicates matches based on matchIndex, storing matchLength and wordsBetweenCount.
*
* @param matches - Array of matches containing matchIndex, matchLength, and wordsBetweenCount.
* @returns {Array} - Returns an array of unique matches with matchIndex, matchLength, and wordsBetweenCount.
*/
private deduplicateMatches;
/**
* Checks all words in proximity search are found.
* Example:
* Text: Note that some annotation types may not display in certain viewers
* Search should be successful: annotation around(1) may around(2) in
* Search should fail: annotation around(1) may around(1) in
* @ignore
* @param queryArray
* @param matchIndicesToKeep
* @param pageContent
*/
_proximityCheckAllWordsInResults(queryArray: RegExpMatchArray, matchIndicesToKeep: {
matchIndex: number;
matchLength: number;
}[], pageContent: string): boolean;
_findMathIndexByQuery(pageContent: string, mathes: number[], mathesLength: number[], subquery: string, startIndex: number): {
mathIndex: number;
mathLength: number;
};
/**
* @ignore Exclude from documentation. Base interface implementation.
* @param page
* @param results
* @returns
*/
renderHighlightPage(page: PluginModel.IPageData, results: SearchFeature.SearchResult[]): PluginModel.PageView;
}