related-documents
Version:
Find related text documents.
70 lines (69 loc) • 2.22 kB
TypeScript
/**
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { TfIdf } from "natural";
import type { Stemmer, Tokenizer } from "natural";
export declare type serializer = (item: any) => string[];
export interface Options {
serializer: serializer;
weights: number[];
stemmer?: Stemmer;
tokenizer?: Tokenizer;
}
export declare class Related {
private stems_;
private tfidfs_;
private documents_;
private options_;
private debug;
constructor(documents: any[], options: Options);
get documents(): any;
get weights(): number[];
set weights(weights: number[]);
get numParts(): number;
get stems(): string[][][];
get tfidfs(): TfIdf[];
get serializer(): serializer;
set serializer(serializer: serializer);
get tokenizer(): Tokenizer;
set tokenizer(tokenizer: Tokenizer);
get stemmer(): Stemmer | undefined;
set stemmer(stemmer: Stemmer | undefined);
private reset;
private prepare;
/**
* Serialize according the {@link Options.serializer} where an object is
* serialized into an array of strings based upon specific parts of the
* document such as `title`, `summary`, etc.
*/
serialize(document: any): string[];
/**
* Tokenize the individual serialized parts of the document.
*/
tokenize(parts: string[]): string[][];
/**
* Run the stemmer over the words.
*/
stem(parts: string[][]): string[][];
/**
* Convert the document into parts, each having an array of stems or words
*/
private process;
rank(document: any): {
relative: number;
absolute: number;
document: any;
}[];
}