UNPKG

@mescius/dspdfviewer

Version:
449 lines (448 loc) 20.2 kB
import { SearchResult, FindOptions, PdfSearcherOptions } from "./types"; import PdfReportPlugin from "../plugin"; //@ts-ignore import { PluginModel, SearchFeature } from "@grapecity/viewer-core"; import { IPdfSearcher } from "./IPdfSearcher"; import { IGcTextRect } from "../Models/GcMeasurementTypes"; import { ICustomHighlight } from "../HighlightManager/types"; /** * PDF document searcher. * Generates asynchronous search results (see search method). * The GcPdfSearcher API was designed to be used internally with Search Panel UI. **/ export declare class GcPdfSearcher implements IPdfSearcher { readonly _plugin: PdfReportPlugin; private _cancellation?; private _extractTextPromises; private _normalizedQuery; private _pageContents; private _pageContentsEndings; _pageTextContents: any[]; _pageMatches: number[][]; _pageMatchesLength: number[][]; private _pageAcroFormResults; allResults: SearchResult[]; private _pagesCount; private _pdfDocument; private _rawQuery; private _state; private _totalResultsCount; private _totalResultsCountPromise; private _firstSearchResult?; private _selectedSearchResult?; constructor(_plugin: PdfReportPlugin); get viewer(): import("..").GcPdfViewer; /** * Retrieves the content of a specific page. * @param {number} pageIndex - The index of the page. * @returns {Promise<string>} The content of the specified page. */ fetchPageContent(pageIndex: number): Promise<string>; /** * Retrieves the text rectangles and styles of a specific page. * @param {number} pageIndex - The index of the page. * @returns {Promise<{ items: IGcTextRect[], styles: any[] }>} The text rectangles and styles of the specified page. */ fetchPageTextRects(pageIndex: number): Promise<{ items: IGcTextRect[]; styles: any[]; }>; /** * Retrieves the line endings of the content of a specific page. * @param {number} pageIndex - The index of the page. * @returns {Promise<{ [x: number]: boolean }>} The line endings of the content of the specified page. */ fetchPageContentLineEndings(pageIndex: number): Promise<{ [x: number]: boolean; }>; /** * Highlights a specified portion of text on a given page. * * @param {number} pageIndex - The index of the page where the text is located (0-based). * @param {number} startCharIndex - The starting character index (0-based) of the text segment to highlight. * @param {number} endCharIndex - The ending character index (0-based) of the text segment to highlight. * The character at this index will be excluded from the highlight. * @param {Object} [args] - Optional parameters to customize the highlight, such as color, border color, and width. * @param {string} [args.color='rgba(255, 255, 0, 0.5)'] - The fill color for the highlight, specified in `rgba`, `hex`, or named color format. * Default is a semi-transparent yellow (`rgba(255, 255, 0, 0.5)` or `#FFFF00`) which provides a clear highlight. * @param {string} [args.borderColor='rgba(255, 165, 0, 0.75)'] - The color of the highlight border, specified in `rgba`, `hex`, or named color format. * Default is a semi-transparent orange (`rgba(255, 165, 0, 0.75)` or `#FFA500`) which provides a contrasting border. * @param {number} [args.borderWidth=2] - The width of the highlight border in pixels (default is 2 pixels). * @param {Function} [args.paintHandler] - A custom function for handling the painting of the highlight (optional). * @param {boolean} [args.clearPrevious=false] - If `true`, removes existing highlights before applying the new one. * * @returns {Promise<boolean>} A promise that resolves to `true` if the text was successfully highlighted, otherwise `false`. * * @example * ```javascript * // Highlight the text from character 10 to character 20 on the first page with custom highlight colors. * viewer.highlightTextSegment(0, 10, 20, { * color: 'rgba(173, 216, 230, 0.5)', // semi-transparent light blue * borderColor: 'blue', // named color for the border * borderWidth: 4, // custom border width * clearPrevious: true * }); * ``` */ createHighlightFromTextSegment(pageIndex: number, startCharIndex: number, endCharIndex: number, args: { color?: string; borderColor?: string; borderWidth?: number; paintHandler?: any; }): Promise<ICustomHighlight | null>; /** * Toggles the visibility of the Search UI. * @param {boolean} [forceExpand=true] - Indicates whether to force expanding the Search UI. Default is true. * @param {boolean} [replaceMode=false] - Enables the replace mode in the Search UI. Default is false. */ toggle(forceExpand?: boolean, replaceMode?: boolean): void; /** * * @ignore Exclude from docs */ close(): void; updateUIState(state: any, previous: any, matchesCount: any): void; /** * Issues the next search result, primarily used internally by the PDF Searcher. * * @param result - The search result to be processed. * @param cancellation - The cancellation token to handle the possibility of cancellation. * * @returns A Promise that resolves with the processed search result. */ nextSearchResult(result: SearchResult, cancellation: any): Promise<SearchResult>; /** * Checks whether a specific search result is currently selected. * * @param result - The search result to be checked for selection. * * @returns A boolean indicating whether the provided search result is currently selected. */ isResultSelected(result: SearchResult): boolean; /** * Retrieves non-empty searcher options. * * @returns The current state of the PDF searcher options, ensuring that it is a valid object. */ get state(): PdfSearcherOptions; /** * Gets highlightAll option. **/ get highlightAll(): boolean; /** * Sets highlightAll option. **/ set highlightAll(checked: boolean); /** * Internal property. * @ignore **/ get findController(): any; /** * Generates a unique hash ID for the given search result. * * @param {SearchResult} searchResult - The search result object to generate a hash ID for. * @returns {string} A unique hash ID based on the properties of the search result. */ hashSearchResultId(searchResult: SearchResult): string; /** * Render highlight for the current search result. **/ applyHighlight(): void; /** * Clear search results. * This method must be called when the SearchPanel is closed. **/ resetResults(): void; /** * Repaint highlight for visible pages. **/ updateAllPages(): void; /** * Gets selected search result. **/ get selectedSearchResult(): SearchResult | undefined; /** * Gets selected search result index. Returns -1 if nothing is selected. **/ get selectedSearchResultIndex(): number; getSearchResultByIndex(i: number): SearchResult; /** * Gets total search results count. **/ get totalResultsCount(): number; /** * Gets total search results count promise. **/ get totalResultsCountPromise(): Promise<number> | null; /** * Asynchronously generates search results based on the provided search options. * @example * ```javascript * // Highlight all search results without opening SearchPanel. * const searchIterator = viewer.searcher.search({ Text: "test", MatchCase: true, HighlightAll: true }); * searchIterator.next(); * searcher.applyHighlight(); * ``` * @example * ```javascript * // Iterate all search results * const searcher = viewer.searcher; * var searchResults = []; * const searchIterator = searcher.search({ Text: textToSearch, MatchCase: true }); * var searchResult = await searchIterator.next(); * if (searchResult.value) * searcher.highlight(searchResult.value) * while (searchResult.value && !searchResult.done) { * const searchResultValue = searchResult.value; * searchResults.push(`index: ${searchResultValue.ItemIndex}, text: ${searchResultValue.DisplayText}, pageIndex: ${searchResultValue.PageIndex}`); * searchResult = await searchIterator.next(); * } * console.log("Search results: " + (searchResults.length ? searchResults.join("; ") : "No search results")); * ``` * @example * ```javascript * // Open the document, find the text 'wildlife' and highlight the first result: * async function loadPdfViewer(selector) { * var viewer = new DsPdfViewer(selector, { restoreViewStateOnLoad: false }); * viewer.addDefaultPanels(); * var afterOpenPromise = new Promise((resolve)=>{ viewer.onAfterOpen.register(()=>{ resolve(); }); }); * await viewer.open('wetlands.pdf'); * await afterOpenPromise; * var findOptions = { Text: 'wildlife' }; * var searchIterator = await viewer.searcher.search(findOptions); * var searchResult = await searchIterator.next(); * viewer.searcher.cancel(); * viewer.searcher.highlight(searchResult.value); * } * loadPdfViewer('#root'); * ``` * @example * // Open the document, find the text 'wildlife' and print search results to the console: * ```javascript * async function loadPdfViewer(selector) { * var viewer = new DsPdfViewer(selector); * viewer.addDefaultPanels(); * await viewer.open('wetlands.pdf'); * await (new Promise((resolve)=>{ * viewer.onAfterOpen.register(()=>{ * resolve(); * }); * })); * var findOptions = { * Text: 'wildlife', * MatchCase: true, * WholeWord: true, * StartsWith: false, * EndsWith: false, * Wildcards: false, * Proximity: false, * SearchBackward: false, * HighlightAll: true * }; * var searcher = viewer.searcher; * var searchIterator = await searcher.search(findOptions); * var resultsCount = 0; * var searchResult; * do { * searchResult = await searchIterator.next(); * if (searchResult.value) { * // this could be either result or progress message (ItemIndex < 0) * if(searchResult.value.ItemIndex >= 0) { * console.log('next search result:'); * console.log(searchResult.value); * resultsCount++; * } else { * const pageCount = _doc.pageCount.totalPageCount || _doc.pageCount.renderedSoFar; * console.log('search progress, page index is ' + searchResult.value.PageIndex); * } * } * else { * console.log("Search completed"); * break; * } * } * while(!searchResult.done); * console.log('Total results count is ' + resultsCount); * } * ``` * @param options - Search options to customize the search. * @returns An asynchronous iterable iterator that yields search results. */ //@ts-ignore search(options: FindOptions): AsyncIterableIterator<SearchResult>; /** * Navigates to a page containing the result and highlights found text. * @param searchResult * @example * ```javascript * // Open the document, find the text 'wildlife' and highlight the first result: * async function loadPdfViewer(selector) { * var viewer = new GcPdfViewer(selector, { restoreViewStateOnLoad: false }); * viewer.addDefaultPanels(); * var afterOpenPromise = new Promise((resolve)=>{ viewer.onAfterOpen.register(()=>{ resolve(); }); }); * await viewer.open('wetlands.pdf'); * await afterOpenPromise; * var findOptions = { Text: 'wildlife' }; * var searchIterator = await viewer.searcher.search(findOptions); * var searchResult = await searchIterator.next(); * viewer.searcher.cancel(); * viewer.searcher.highlight(searchResult.value); * } * loadPdfViewer('#root'); * ``` **/ highlight(searchResult: SearchFeature.SearchResult | null, pageIndex?: number): Promise<void>; /** * Cancel search task. * @example * ```javascript * // Open the document, find the text 'wildlife' and highlight the first result: * async function loadPdfViewer(selector) { * var viewer = new GcPdfViewer(selector, { restoreViewStateOnLoad: false }); * viewer.addDefaultPanels(); * var afterOpenPromise = new Promise((resolve)=>{ viewer.onAfterOpen.register(()=>{ resolve(); }); }); * await viewer.open('wetlands.pdf'); * await afterOpenPromise; * var findOptions = { Text: 'wildlife' }; * var searchIterator = await viewer.searcher.search(findOptions); * var searchResult = await searchIterator.next(); * viewer.searcher.cancel(); * viewer.searcher.highlight(searchResult.value); * } * loadPdfViewer('#root'); * ``` **/ cancel(): void; _initialize(): void; /** * @return {string} The (current) normalized search query. */ get _query(): string; /** * Extract all text from pdf document once. * */ _extractText(): void; _reset(): void; /** * Helper for multi-term search that fills the `matchesWithLength` array * and handles cases where one search term includes another search term (for * example, "tamed tame" or "this is"). It looks for intersecting terms in * the `matches` and keeps elements with a longer match length. */ _prepareMatches(matchesWithLength: any, matches: any, matchesLength: any): void; /** * Determine if the search query constitutes a "whole word", by comparing the * first/last character type with the preceding/following character type. */ _isEntireWord(content: any, lineEndings: { [x: number]: boolean; }, startIdx: any, length: any): boolean; _isStartsWith(content: any, lineEndings: { [x: number]: boolean; }, startIdx: any, _length: any): boolean; _isEndsWith(content: any, lineEndings: { [x: number]: boolean; }, startIdx: any, length: any): boolean; _findPhraseMathIndex(pageContent: string, query: string, startIndex: number, lineEndings: { [x: number]: boolean; }): { matchIdx: number; queryLen: number; } | null; /** * Calculates the phrase matches in the given page content based on the specified query and matching options. * @param {string} query - The query string to search for. * @param {Object.<number, boolean>} pageContentsEndings - An object representing the line endings on the page, where keys are character indices and values indicate line endings. * @param {string} pageContent - The content of the page. * @param {boolean} entireWord - Whether to match the entire word only. * @param {boolean} startsWith - Whether the query should match phrases that start with the query string. * @param {boolean} endsWith - Whether the query should match phrases that end with the query string. * @param {boolean} wildcards - Whether the query allows wildcards for matching. * @returns {Object} An object containing the matches and their lengths. * @returns {number[]} matches - An array of character indices (character indices) on the page where matches were found. * @returns {number[]} matchesLength - An array of lengths for each matched text. */ _calculatePhraseMatch(query: string, pageContentsEndings: { [x: number]: boolean; }, pageContent: string, entireWord: boolean, startsWith: boolean, endsWith: boolean, wildcards: boolean): { matches: number[]; matchesLength: number[]; }; findLineEndIndex(startIndex: number, pageContent: string, lineEndings: { [x: number]: boolean; }): number; _calculateWordMatch(query: string, pageContentsEndings: { [x: number]: boolean; }, pageContent: any, entireWord: any, startsWith: any, endsWith: any, wildcards: any): { matches: number[]; matchesLength: number[]; }; _calculateMatch(pageContent: string, pageContentsEndings: { [x: number]: boolean; }): { matches: number[]; matchesLength: number[]; }; private handleProximitySearch; /** * Validates proximity conditions based on the number of words between matches. * * @param uniqueMatches - Array of unique matches containing matchIndex, matchLength, and wordsBetweenCount. * @param proximityCounts - Array of proximity counts specifying the maximum allowed distance between words. * @returns {boolean} - Returns true if all proximity conditions are satisfied, otherwise false. */ private validateProximityConditions; private collectMatchesForOrder; /** * Extracts a substring from `pageContent` between `startInd` and `endInd`, and optionally fills line endings with spaces * based on `pageContentsEndings` mapping. * * @param {number} startInd - The start index of the substring in `pageContent`. * @param {number} endInd - The end index of the substring in `pageContent`. * @param {string} pageContent - The full content of the page from which the substring is extracted. * @param {{ [x: number]: boolean }} pageContentsEndings - A mapping indicating where line endings occur. * @param {boolean} [fillLineEndingsWithSpaces=true] - Whether to fill line endings with spaces if they don't already exist. * * @returns {string} The substring with optional spaces inserted at line breaks. */ getContentSnippet(startInd: number, endInd: number, pageContent: string, pageContentsEndings: { [x: number]: boolean; }, fillLineEndingsWithSpaces?: boolean): string; _countWordsBetweenTerms(startInd: number, endInd: number, pageContent: string, pageContentsEndings: { [x: number]: boolean; }): number; /** * Deduplicates matches based on matchIndex, storing matchLength and wordsBetweenCount. * * @param matches - Array of matches containing matchIndex, matchLength, and wordsBetweenCount. * @returns {Array} - Returns an array of unique matches with matchIndex, matchLength, and wordsBetweenCount. */ private deduplicateMatches; /** * Checks all words in proximity search are found. * Example: * Text: Note that some annotation types may not display in certain viewers * Search should be successful: annotation around(1) may around(2) in * Search should fail: annotation around(1) may around(1) in * @ignore * @param queryArray * @param matchIndicesToKeep * @param pageContent */ _proximityCheckAllWordsInResults(queryArray: RegExpMatchArray, matchIndicesToKeep: { matchIndex: number; matchLength: number; }[], pageContent: string): boolean; _findMathIndexByQuery(pageContent: string, mathes: number[], mathesLength: number[], subquery: string, startIndex: number): { mathIndex: number; mathLength: number; }; /** * @ignore Exclude from documentation. Base interface implementation. * @param page * @param results * @returns */ renderHighlightPage(page: PluginModel.IPageData, results: SearchFeature.SearchResult[]): PluginModel.PageView; }