UNPKG

@azure/search-documents

Version:
1,190 lines (1,189 loc) 120 kB
import type { OperationOptions } from "@azure-rest/core-client"; import type { PagedAsyncIterableIterator } from "./static-helpers/pagingHelpers.js"; import type { AIFoundryModelCatalogName, AIServicesAccountKey, AsciiFoldingTokenFilter, AzureOpenAIModelName, CognitiveServicesAccount as BaseCognitiveServicesAccount, KnowledgeBaseModel as BaseKnowledgeBaseModel, SearchIndexerSkill as BaseSearchIndexerSkill, BinaryQuantizationCompression, BM25Similarity, CharFilterName, ChatCompletionExtraParametersBehavior, ChatCompletionResponseFormat, CjkBigramTokenFilter, ClassicSimilarity, ClassicTokenizer, CognitiveServicesAccountKey, CommonGramTokenFilter, CommonModelParameters, ConditionalSkill, ContentUnderstandingSkill, CorsOptions, CustomEntity, CustomNormalizer, DefaultCognitiveServicesAccount, DictionaryDecompounderTokenFilter, DistanceScoringFunction, DocumentExtractionSkill, DocumentIntelligenceLayoutSkillChunkingProperties, DocumentIntelligenceLayoutSkillExtractionOptions, DocumentIntelligenceLayoutSkillMarkdownHeaderDepth, DocumentIntelligenceLayoutSkillOutputFormat, DocumentIntelligenceLayoutSkillOutputMode, EdgeNGramTokenFilterSide, EdgeNGramTokenizer, ElisionTokenFilter, EntityLinkingSkill, EntityRecognitionSkillV3, FieldMapping, FreshnessScoringFunction, HighWaterMarkChangeDetectionPolicy, IndexingSchedule, IndexProjectionMode, KeepTokenFilter, KeywordMarkerTokenFilter, KnowledgeSourceContentExtractionMode, KnownBlobIndexerDataToExtract, KnownBlobIndexerImageAction, KnownBlobIndexerParsingMode, KnownBlobIndexerPDFTextRotationAlgorithm, KnownCharFilterName, KnownCustomEntityLookupSkillLanguage, KnownImageAnalysisSkillLanguage, KnownImageDetail, KnownIndexerExecutionEnvironment, KnownKeyPhraseExtractionSkillLanguage, KnownLexicalAnalyzerName, KnownLexicalTokenizerName, KnownOcrSkillLanguage, KnownPIIDetectionSkillMaskingMode, KnownRegexFlags, KnownSearchFieldDataType, KnownSearchIndexerDataSourceType, KnownSplitSkillLanguage, KnownTextSplitMode, KnownTextTranslationSkillLanguage, KnownTokenFilterName, KnownVectorSearchAlgorithmKind, KnownVectorSearchAlgorithmMetric, KnownVisualFeature, LanguageDetectionSkill, LengthTokenFilter, LexicalAnalyzerName, LexicalNormalizerName, LexicalTokenizerName, LimitTokenFilter, LuceneStandardAnalyzer, MagnitudeScoringFunction, MappingCharFilter, MarkdownHeaderDepth, MarkdownParsingSubmode, MergeSkill, MicrosoftLanguageStemmingTokenizer, MicrosoftLanguageTokenizer, NativeBlobSoftDeleteDeletionDetectionPolicy, NGramTokenizer, OcrLineEnding, PathHierarchyTokenizerV2 as PathHierarchyTokenizer, PatternCaptureTokenFilter, PatternReplaceCharFilter, PatternReplaceTokenFilter, PhoneticTokenFilter, ScalarQuantizationCompression, ScoringFunctionAggregation, SearchAlias, SearchIndexerDataContainer, SearchIndexerDataNoneIdentity, SearchIndexerDataUserAssignedIdentity, SearchIndexerIndexProjectionSelector, SearchIndexerKnowledgeStoreProjection, SearchIndexKnowledgeSourceParameters, SearchSuggester, SemanticSearch, SentimentSkillV3, ServiceCounters, ServiceLimits, ShaperSkill, ShingleTokenFilter, SnowballTokenFilter, SoftDeleteColumnDeletionDetectionPolicy, SqlIntegratedChangeTrackingPolicy, StemmerOverrideTokenFilter, StemmerTokenFilter, StopAnalyzer, StopwordsTokenFilter, SynonymTokenFilter, TagScoringFunction, TextWeights, TokenFilterName, TruncateTokenFilter, UaxUrlEmailTokenizer, UniqueTokenFilter, VectorEncodingFormat, VectorSearchProfile, VectorSearchVectorizerKind, WebKnowledgeSourceParameters, WordDelimiterTokenFilter } from "./models/azure/search/documents/indexes/index.js"; import type { AIServices, KnowledgeSourceVectorizer as BaseKnowledgeSourceVectorizer } from "./models/azure/search/documents/knowledgeBases/index.js"; import type { KnowledgeBase } from "./knowledgeBaseModels.js"; /** * Options for a list skillsets operation. */ export type ListSkillsetsOptions = OperationOptions; /** * Options for a list synonymMaps operation. */ export type ListSynonymMapsOptions = OperationOptions; /** * Options for a list indexes operation. */ export type ListIndexesOptions = OperationOptions; /** * Options for a list indexers operation. */ export type ListIndexersOptions = OperationOptions; /** * Options for a list data sources operation. */ export type ListDataSourceConnectionsOptions = OperationOptions; /** * Options for get index operation. */ export type GetIndexOptions = OperationOptions; /** * Options for get skillset operation. */ export type GetSkillSetOptions = OperationOptions; /** * Options for get synonymmaps operation. */ export type GetSynonymMapsOptions = OperationOptions; /** * Options for get indexer operation. */ export type GetIndexerOptions = OperationOptions; /** * Options for get datasource operation. */ export type GetDataSourceConnectionOptions = OperationOptions; /** * Options for get index statistics operation. */ export type GetIndexStatisticsOptions = OperationOptions; /** * Statistics for a given index. Statistics are collected periodically and are not guaranteed to * always be up-to-date. */ export interface SearchIndexStatistics { /** * The number of documents in the index. * **NOTE: This property will not be serialized. It can only be populated by the server.** */ readonly documentCount: number; /** * The amount of storage in bytes consumed by the index. * **NOTE: This property will not be serialized. It can only be populated by the server.** */ readonly storageSize: number; /** * The amount of memory in bytes consumed by vectors in the index. * NOTE: This property will not be serialized. It can only be populated by the server. */ readonly vectorIndexSize: number; } /** * Response from a get service statistics request. If successful, it includes service level * counters and limits. */ export interface SearchServiceStatistics { /** * Service level resource counters. */ counters: ServiceCounters; /** * Service level general limits. */ limits: ServiceLimits; } /** * Options for get service statistics operation. */ export type GetServiceStatisticsOptions = OperationOptions; /** * Options for get indexer status operation. */ export type GetIndexerStatusOptions = OperationOptions; /** * Options for reset indexer operation. */ export type ResetIndexerOptions = OperationOptions; /** * Options for run indexer operation. */ export type RunIndexerOptions = OperationOptions; /** * Options for create index operation. */ export type CreateIndexOptions = OperationOptions; /** * Options for create skillset operation. */ export type CreateSkillsetOptions = OperationOptions; /** * Options for create alias operation. */ export type CreateAliasOptions = OperationOptions; /** * Options for create or update alias operation. */ export interface CreateOrUpdateAliasOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for delete alias operation. */ export interface DeleteAliasOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for get alias operation. */ export type GetAliasOptions = OperationOptions; /** * Options for list aliases operation. */ export type ListAliasesOptions = OperationOptions; /** * Search Alias object. */ export type SearchIndexAlias = SearchAlias; /** * Options for create synonymmap operation. */ export type CreateSynonymMapOptions = OperationOptions; /** * Options for create indexer operation. */ export type CreateIndexerOptions = OperationOptions; /** * Options for create datasource operation. */ export type CreateDataSourceConnectionOptions = OperationOptions; /** * Options for create/update index operation. */ export interface CreateOrUpdateIndexOptions extends OperationOptions { /** * Allows new analyzers, tokenizers, token filters, or char filters to be added to an index by * taking the index offline for at least a few seconds. This temporarily causes indexing and * query requests to fail. Performance and write availability of the index can be impaired for * several minutes after the index is updated, or longer for very large indexes. */ allowIndexDowntime?: boolean; /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for create/update skillset operation. */ export interface CreateOrUpdateSkillsetOptions extends OperationOptions { /** * If set to true, Resource will be updated only if the etag matches. */ onlyIfUnchanged?: boolean; /** * Ignores cache reset requirements. */ skipIndexerResetRequirementForCache?: boolean; /** * Disables cache reprocessing change detection. */ disableCacheReprocessingChangeDetection?: boolean; } /** * Options for create/update synonymmap operation. */ export interface CreateOrUpdateSynonymMapOptions extends OperationOptions { /** * If set to true, Resource will be updated only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for create/update indexer operation. */ export interface CreateorUpdateIndexerOptions extends OperationOptions { /** * If set to true, Resource will be updated only if the etag matches. */ onlyIfUnchanged?: boolean; /** * Ignores cache reset requirements. */ skipIndexerResetRequirementForCache?: boolean; /** * Disables cache reprocessing change detection. */ disableCacheReprocessingChangeDetection?: boolean; } /** * Options for create/update datasource operation. */ export interface CreateorUpdateDataSourceConnectionOptions extends OperationOptions { /** * If set to true, Resource will be updated only if the etag matches. */ onlyIfUnchanged?: boolean; /** * Ignores cache reset requirements. */ skipIndexerResetRequirementForCache?: boolean; } /** * Options for delete index operation. */ export interface DeleteIndexOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for delete skillset operaion. */ export interface DeleteSkillsetOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for delete synonymmap operation. */ export interface DeleteSynonymMapOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for delete indexer operation. */ export interface DeleteIndexerOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Options for delete datasource operation. */ export interface DeleteDataSourceConnectionOptions extends OperationOptions { /** * If set to true, Resource will be deleted only if the etag matches. */ onlyIfUnchanged?: boolean; } /** * Specifies some text and analysis components used to break that text into tokens. */ export interface AnalyzeRequest { /** * The text to break into tokens. */ text: string; /** * The name of the analyzer to use to break the given text. If this parameter is not specified, * you must specify a tokenizer instead. The tokenizer and analyzer parameters are mutually * exclusive. {@link KnownAnalyzerNames} is an enum containing built-in analyzer names. * NOTE: Either analyzerName or tokenizerName is required in an AnalyzeRequest. */ analyzerName?: LexicalAnalyzerName; /** * The name of the tokenizer to use to break the given text. If this parameter is not specified, * you must specify an analyzer instead. The tokenizer and analyzer parameters are mutually * exclusive. {@link KnownTokenizerNames} is an enum containing built-in tokenizer names. * NOTE: Either analyzerName or tokenizerName is required in an AnalyzeRequest. */ tokenizerName?: LexicalTokenizerName; /** * The name of the normalizer to use to normalize the given text. {@link KnownNormalizerNames} is * an enum containing built-in analyzer names. */ normalizerName?: LexicalNormalizerName; /** * An optional list of token filters to use when breaking the given text. This parameter can only * be set when using the tokenizer parameter. */ tokenFilters?: TokenFilterName[]; /** * An optional list of character filters to use when breaking the given text. This parameter can * only be set when using the tokenizer parameter. */ charFilters?: CharFilterName[]; } /** * Options for analyze text operation. */ export type AnalyzeTextOptions = OperationOptions & AnalyzeRequest; /** * Flexibly separates text into terms via a regular expression pattern. This analyzer is * implemented using Apache Lucene. */ export interface PatternAnalyzer { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.PatternAnalyzer"; /** * The name of the analyzer. It must only contain letters, digits, spaces, dashes or underscores, * can only start and end with alphanumeric characters, and is limited to 128 characters. */ name: string; /** * A value indicating whether terms should be lower-cased. Default is true. Default value: true. */ lowerCaseTerms?: boolean; /** * A regular expression pattern to match token separators. Default is an expression that matches * one or more whitespace characters. Default value: `\W+`. */ pattern?: string; /** * Regular expression flags. Possible values include: 'CANON_EQ', 'CASE_INSENSITIVE', 'COMMENTS', * 'DOTALL', 'LITERAL', 'MULTILINE', 'UNICODE_CASE', 'UNIX_LINES' */ flags?: RegexFlags[]; /** * A list of stopwords. */ stopwords?: string[]; } /** * Allows you to take control over the process of converting text into indexable/searchable tokens. * It's a user-defined configuration consisting of a single predefined tokenizer and one or more * filters. The tokenizer is responsible for breaking text into tokens, and the filters for * modifying tokens emitted by the tokenizer. */ export interface CustomAnalyzer { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.CustomAnalyzer"; /** * The name of the analyzer. It must only contain letters, digits, spaces, dashes or underscores, * can only start and end with alphanumeric characters, and is limited to 128 characters. */ name: string; /** * The name of the tokenizer to use to divide continuous text into a sequence of tokens, such as * breaking a sentence into words. {@link KnownTokenizerNames} is an enum containing built-in * tokenizer names. */ tokenizerName: LexicalTokenizerName; /** * A list of token filters used to filter out or modify the tokens generated by a tokenizer. For * example, you can specify a lowercase filter that converts all characters to lowercase. The * filters are run in the order in which they are listed. */ tokenFilters?: TokenFilterName[]; /** * A list of character filters used to prepare input text before it is processed by the * tokenizer. For instance, they can replace certain characters or symbols. The filters are run * in the order in which they are listed. */ charFilters?: CharFilterName[]; } /** * Contains the possible cases for Analyzer. */ export type LexicalAnalyzer = CustomAnalyzer | PatternAnalyzer | LuceneStandardAnalyzer | StopAnalyzer; /** * A skill that calls a language model via Azure AI Foundry's Chat Completions endpoint. */ export interface ChatCompletionSkill extends WebApiSkill { /** * Polymorphic discriminator, which specifies the different types this object can be */ odatatype: "#Microsoft.Skills.Custom.ChatCompletionSkill"; /** * API key for authenticating to the model. Both apiKey and authIdentity cannot be specified at * the same time. */ apiKey?: string; /** * Common language model parameters that customers can tweak. If omitted, reasonable defaults will * be applied. */ commonModelParameters?: CommonModelParameters; /** * Open-type dictionary for model-specific parameters that should be appended to the chat * completions call. Follows Azure AI Foundry’s extensibility pattern. */ extraParameters?: { [propertyName: string]: unknown; }; /** * How extra parameters are handled by Azure AI Foundry. Default is 'error'. */ extraParametersBehavior?: ChatCompletionExtraParametersBehavior; /** * Determines how the LLM should format its response. Defaults to 'text' response type. */ responseFormat?: ChatCompletionResponseFormat; } /** * A skill that can call a Web API endpoint, allowing you to extend a skillset by having it call * your custom code. */ export interface WebApiSkill extends BaseSearchIndexerSkill { /** * Polymorphic discriminator, which specifies the different types this object can be */ odatatype: "#Microsoft.Skills.Custom.WebApiSkill" | "#Microsoft.Skills.Custom.ChatCompletionSkill"; /** * The url for the Web API. */ uri: string; /** * The headers required to make the http request. */ httpHeaders?: { [propertyName: string]: string; }; /** * The method for the http request. */ httpMethod?: string; /** * The desired timeout for the request. Default is 30 seconds. */ timeout?: string; /** * The desired batch size which indicates number of documents. */ batchSize?: number; /** * If set, the number of parallel calls that can be made to the Web API. */ degreeOfParallelism?: number; /** * Applies to custom skills that connect to external code in an Azure function or some other * application that provides the transformations. This value should be the application ID created * for the function or app when it was registered with Azure Active Directory. When specified, the * custom skill connects to the function or app using a managed ID (either system or * user-assigned) of the search service and the access token of the function or app, using this * value as the resource id for creating the scope of the access token. */ authResourceId?: string; /** * The user-assigned managed identity used for outbound connections. If an authResourceId is * provided and it's not specified, the system-assigned managed identity is used. On updates to * the indexer, if the identity is unspecified, the value remains unchanged. If set to "none", the * value of this property is cleared. */ authIdentity?: SearchIndexerDataIdentity; } export type WebApiSkills = WebApiSkill | ChatCompletionSkill; /** * Contains the possible cases for Skill. */ export type SearchIndexerSkill = AzureOpenAIEmbeddingSkill | ConditionalSkill | CustomEntityLookupSkill | DocumentExtractionSkill | DocumentIntelligenceLayoutSkill | ContentUnderstandingSkill | EntityLinkingSkill | EntityRecognitionSkill | EntityRecognitionSkillV3 | ImageAnalysisSkill | KeyPhraseExtractionSkill | LanguageDetectionSkill | MergeSkill | OcrSkill | PIIDetectionSkill | SentimentSkill | SentimentSkillV3 | ShaperSkill | SplitSkill | TextTranslationSkill | WebApiSkills; /** * A skill that extracts content and layout information (as markdown), via Azure AI Services, from * files within the enrichment pipeline. */ export interface DocumentIntelligenceLayoutSkill extends BaseSearchIndexerSkill { /** * Polymorphic discriminator, which specifies the different types this object can be */ odatatype: "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill"; /** * Controls the cardinality of the output format. Default is 'markdown'. */ outputFormat?: DocumentIntelligenceLayoutSkillOutputFormat; /** * Controls the cardinality of the output produced by the skill. Default is 'oneToMany'. */ outputMode?: DocumentIntelligenceLayoutSkillOutputMode; /** * The depth of headers in the markdown output. Default is h6. */ markdownHeaderDepth?: DocumentIntelligenceLayoutSkillMarkdownHeaderDepth; /** * Controls the cardinality of the content extracted from the document by the skill */ extractionOptions?: DocumentIntelligenceLayoutSkillExtractionOptions[]; /** * Controls the cardinality for chunking the content. */ chunkingProperties?: DocumentIntelligenceLayoutSkillChunkingProperties; } /** * Contains the possible cases for CognitiveServicesAccount. */ export type CognitiveServicesAccount = DefaultCognitiveServicesAccount | CognitiveServicesAccountKey | AIServicesAccountKey | AIServicesAccountIdentity; /** * The multi-region account of an Azure AI service resource that's attached to a skillset. */ export interface AIServicesAccountIdentity extends BaseCognitiveServicesAccount { /** * Polymorphic discriminator, which specifies the different types this object can be */ odatatype: "#Microsoft.Azure.Search.AIServicesByIdentity"; /** * The user-assigned managed identity used for connections to AI Service. If not specified, the * system-assigned managed identity is used. On updates to the skillset, if the identity is * unspecified, the value remains unchanged. If set to "none", the value of this property is * cleared. */ identity?: SearchIndexerDataIdentity; /** * The subdomain url for the corresponding AI Service. */ subdomainUrl: string; } /** * Tokenizer that uses regex pattern matching to construct distinct tokens. This tokenizer is * implemented using Apache Lucene. */ export interface PatternTokenizer { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.PatternTokenizer"; /** * The name of the tokenizer. It must only contain letters, digits, spaces, dashes or * underscores, can only start and end with alphanumeric characters, and is limited to 128 * characters. */ name: string; /** * A regular expression pattern to match token separators. Default is an expression that matches * one or more whitespace characters. Default value: `\W+`. */ pattern?: string; /** * Regular expression flags. Possible values include: 'CANON_EQ', 'CASE_INSENSITIVE', 'COMMENTS', * 'DOTALL', 'LITERAL', 'MULTILINE', 'UNICODE_CASE', 'UNIX_LINES' */ flags?: RegexFlags[]; /** * The zero-based ordinal of the matching group in the regular expression pattern to extract into * tokens. Use -1 if you want to use the entire pattern to split the input into tokens, * irrespective of matching groups. Default is -1. Default value: -1. */ group?: number; } /** * Breaks text following the Unicode Text Segmentation rules. This tokenizer is implemented using * Apache Lucene. */ export interface LuceneStandardTokenizer { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.StandardTokenizerV2" | "#Microsoft.Azure.Search.StandardTokenizer"; /** * The name of the tokenizer. It must only contain letters, digits, spaces, dashes or * underscores, can only start and end with alphanumeric characters, and is limited to 128 * characters. */ name: string; /** * The maximum token length. Default is 255. Tokens longer than the maximum length are split. The * maximum token length that can be used is 300 characters. Default value: 255. */ maxTokenLength?: number; } /** * Generates n-grams of the given size(s) starting from the front or the back of an input token. * This token filter is implemented using Apache Lucene. */ export interface EdgeNGramTokenFilter { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.EdgeNGramTokenFilterV2" | "#Microsoft.Azure.Search.EdgeNGramTokenFilter"; /** * The name of the token filter. It must only contain letters, digits, spaces, dashes or * underscores, can only start and end with alphanumeric characters, and is limited to 128 * characters. */ name: string; /** * The minimum n-gram length. Default is 1. Maximum is 300. Must be less than the value of * maxGram. Default value: 1. */ minGram?: number; /** * The maximum n-gram length. Default is 2. Maximum is 300. Default value: 2. */ maxGram?: number; /** * Specifies which side of the input the n-gram should be generated from. Default is "front". * Possible values include: 'Front', 'Back' */ side?: EdgeNGramTokenFilterSide; } /** * Emits the entire input as a single token. This tokenizer is implemented using Apache Lucene. */ export interface KeywordTokenizer { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.KeywordTokenizerV2" | "#Microsoft.Azure.Search.KeywordTokenizer"; /** * The name of the tokenizer. It must only contain letters, digits, spaces, dashes or * underscores, can only start and end with alphanumeric characters, and is limited to 128 * characters. */ name: string; /** * The maximum token length. Default is 256. Tokens longer than the maximum length are split. The * maximum token length that can be used is 300 characters. Default value: 256. */ maxTokenLength?: number; } /** * Contains the possible cases for Tokenizer. */ export type LexicalTokenizer = ClassicTokenizer | EdgeNGramTokenizer | KeywordTokenizer | MicrosoftLanguageTokenizer | MicrosoftLanguageStemmingTokenizer | NGramTokenizer | PathHierarchyTokenizer | PatternTokenizer | LuceneStandardTokenizer | UaxUrlEmailTokenizer; /** * Definition of additional projections to azure blob, table, or files, of enriched data. */ export interface SearchIndexerKnowledgeStore { /** * The connection string to the storage account projections will be stored in. */ storageConnectionString: string; /** * A list of additional projections to perform during indexing. */ projections: SearchIndexerKnowledgeStoreProjection[]; /** * The user-assigned managed identity used for connections to Azure Storage when writing * knowledge store projections. If the connection string indicates an identity (ResourceId) and * it's not specified, the system-assigned managed identity is used. On updates to the indexer, * if the identity is unspecified, the value remains unchanged. If set to "none", the value of * this property is cleared. */ identity?: SearchIndexerDataIdentity; } /** * Contains the possible cases for Similarity. */ export type SimilarityAlgorithm = ClassicSimilarity | BM25Similarity; /** * Generates n-grams of the given size(s). This token filter is implemented using Apache Lucene. */ export interface NGramTokenFilter { /** * Polymorphic Discriminator */ odatatype: "#Microsoft.Azure.Search.NGramTokenFilterV2" | "#Microsoft.Azure.Search.NGramTokenFilter"; /** * The name of the token filter. It must only contain letters, digits, spaces, dashes or * underscores, can only start and end with alphanumeric characters, and is limited to 128 * characters. */ name: string; /** * The minimum n-gram length. Default is 1. Maximum is 300. Must be less than the value of * maxGram. Default value: 1. */ minGram?: number; /** * The maximum n-gram length. Default is 2. Maximum is 300. Default value: 2. */ maxGram?: number; } /** * Contains the possible cases for TokenFilter. */ export type TokenFilter = AsciiFoldingTokenFilter | CjkBigramTokenFilter | CommonGramTokenFilter | DictionaryDecompounderTokenFilter | EdgeNGramTokenFilter | ElisionTokenFilter | KeepTokenFilter | KeywordMarkerTokenFilter | LengthTokenFilter | LimitTokenFilter | NGramTokenFilter | PatternCaptureTokenFilter | PatternReplaceTokenFilter | PhoneticTokenFilter | ShingleTokenFilter | SnowballTokenFilter | StemmerTokenFilter | StemmerOverrideTokenFilter | StopwordsTokenFilter | SynonymTokenFilter | TruncateTokenFilter | UniqueTokenFilter | WordDelimiterTokenFilter; /** * Contains the possible cases for CharFilter. */ export type CharFilter = MappingCharFilter | PatternReplaceCharFilter; /** * Contains the possible cases for LexicalNormalizer. */ export type LexicalNormalizer = CustomNormalizer; /** * Contains the possible cases for ScoringFunction. */ export type ScoringFunction = DistanceScoringFunction | FreshnessScoringFunction | MagnitudeScoringFunction | TagScoringFunction; /** * Defines values for ComplexDataType. * Possible values include: 'Edm.ComplexType', 'Collection(Edm.ComplexType)' * @readonly */ export type ComplexDataType = "Edm.ComplexType" | "Collection(Edm.ComplexType)"; /** * Represents a field in an index definition, which describes the name, data type, and search * behavior of a field. */ export type SearchField = SimpleField | ComplexField; /** * Represents a field in an index definition, which describes the name, data type, and search * behavior of a field. */ export interface SimpleField { /** * The name of the field, which must be unique within the fields collection of the index or * parent field. */ name: string; /** * The data type of the field. */ type: SearchFieldDataType; /** * A value indicating whether the field uniquely identifies documents in the index. Exactly one * top-level field in each index must be chosen as the key field and it must be of type * Edm.String. Key fields can be used to look up documents directly and update or delete specific * documents. Default is false. */ key?: boolean; /** * A value indicating whether the field can be returned in a search result. You can disable this * option if you want to use a field (for example, margin) as a filter, sorting, or scoring * mechanism but do not want the field to be visible to the end user. This property must be false * for key fields. This property can be changed on existing fields. Enabling this property does * not cause any increase in index storage requirements. Default is true for vector fields, false * otherwise. */ hidden?: boolean; /** * An immutable value indicating whether the field will be persisted separately on disk to be * returned in a search result. You can disable this option if you don't plan to return the field * contents in a search response to save on storage overhead. This can only be set during index * creation and only for vector fields. This property cannot be changed for existing fields or set * as false for new fields. If this property is set to `false`, the property `hidden` must be set * to `true`. This property must be true or unset for key fields, for new fields, and for * non-vector fields, and it must be null for complex fields. Disabling this property will reduce * index storage requirements. The default is true for vector fields. */ stored?: boolean; /** * A value indicating whether the field is full-text searchable. This means it will undergo * analysis such as word-breaking during indexing. If you set a searchable field to a value like * "sunny day", internally it will be split into the individual tokens "sunny" and "day". This * enables full-text searches for these terms. Fields of type Edm.String or Collection(Edm.String) * are searchable by default. This property must be false for simple * fields of other non-string data types. * Note: searchable fields consume extra space in your index to accommodate additional tokenized * versions of the field value for full-text searches. If you want to save space in your index and * you don't need a field to be included in searches, set searchable to false. Default is false. */ searchable?: boolean; /** * A value indicating whether to enable the field to be referenced in $filter queries. * `filterable` differs from `searchable` in how strings are handled. Fields of type Edm.String or * Collection(Edm.String) that are filterable do not undergo word-breaking, so comparisons are for * exact matches only. For example, if you set such a field f to "sunny day", $filter=f eq 'sunny' * will find no matches, but $filter=f eq 'sunny day' will. Default is false. */ filterable?: boolean; /** * A value indicating whether to enable the field to be referenced in $orderby expressions. By * default, the service sorts results by score, but in many experiences users will want * to sort by fields in the documents. A simple field can be sortable only if it is single-valued * (it has a single value in the scope of the parent document). Simple collection fields cannot * be sortable, since they are multi-valued. Simple sub-fields of complex collections are also * multi-valued, and therefore cannot be sortable. This is true whether it's an immediate parent * field, or an ancestor field, that's the complex collection. The default is false. */ sortable?: boolean; /** * A value indicating whether to enable the field to be referenced in facet queries. Typically * used in a presentation of search results that includes hit count by category (for example, * search for digital cameras and see hits by brand, by megapixels, by price, and so on). * Fields of type Edm.GeographyPoint or Collection(Edm.GeographyPoint) cannot be facetable. * Default is false for all other simple fields. */ facetable?: boolean; /** * The name of the analyzer to use for the field. This option can be used only with * searchable fields and it can't be set together with either searchAnalyzer or indexAnalyzer. * Once the analyzer is chosen, it cannot be changed for the field. */ analyzerName?: LexicalAnalyzerName; /** * The name of the analyzer used at search time for the field. This option can be used only with * searchable fields. It must be set together with `indexAnalyzerName` and it cannot be set * together with the `analyzerName` option. This property cannot be set to the name of a language * analyzer; use the `analyzerName` property instead if you need a language analyzer. This * analyzer can be updated on an existing field. */ searchAnalyzerName?: LexicalAnalyzerName; /** * The name of the analyzer used at indexing time for the field. This option can be used only with * searchable fields. It must be set together with searchAnalyzer and it cannot be set together * with the `analyzerName` option. Once the analyzer is chosen, it cannot be changed for the * field. KnownAnalyzerNames is an enum containing known values. */ indexAnalyzerName?: LexicalAnalyzerName; /** * A list of the names of synonym maps to associate with this field. This option can be used only * with searchable fields. Currently only one synonym map per field is supported. Assigning a * synonym map to a field ensures that query terms targeting that field are expanded at * query-time using the rules in the synonym map. This attribute can be changed on existing * fields. */ synonymMapNames?: string[]; /** * The name of the normalizer used at indexing time for the field. */ normalizerName?: LexicalNormalizerName; /** * The dimensionality of the vector field. */ vectorSearchDimensions?: number; /** * The name of the vector search profile that specifies the algorithm and vectorizer to use when * searching the vector field. */ vectorSearchProfileName?: string; /** * The encoding format to interpret the field contents. */ vectorEncodingFormat?: VectorEncodingFormat; /** A value indicating whether the field should be used for sensitivity label filtering. This enables document-level filtering based on Microsoft Purview sensitivity labels. */ hasSensitivityLabel?: boolean; } export declare function isComplexField(field: SearchField): field is ComplexField; /** * Represents a field in an index definition, which describes the name, data type, and search * behavior of a field. */ export interface ComplexField { /** * The name of the field, which must be unique within the fields collection of the index or * parent field. */ name: string; /** * The data type of the field. * Possible values include: 'Edm.ComplexType','Collection(Edm.ComplexType)' */ type: ComplexDataType; /** * A list of sub-fields. */ fields?: SearchField[]; } /** * Represents a synonym map definition. */ export interface SynonymMap { /** * The name of the synonym map. */ name: string; /** * An array of synonym rules in the specified synonym map format. */ synonyms: string[]; /** * A description of an encryption key that you create in Azure Key Vault. This key is used to * provide an additional level of encryption-at-rest for your data when you want full assurance * that no one, not even Microsoft, can decrypt your data in Azure AI Search. Once you * have encrypted your data, it will always remain encrypted. Azure AI Search will ignore * attempts to set this property to null. You can change this property as needed if you want to * rotate your encryption key; Your data will be unaffected. Encryption with customer-managed * keys is not available for free search services, and is only available for paid services * created on or after January 1, 2019. */ encryptionKey?: SearchResourceEncryptionKey; /** * The ETag of the synonym map. */ etag?: string; } /** * An iterator for listing the indexes that exist in the Search service. Will make requests * as needed during iteration. Use .byPage() to make one request to the server * per iteration. */ export type IndexIterator = PagedAsyncIterableIterator<SearchIndex, SearchIndex[], {}>; /** * An iterator for listing the knowledge bases that exist in the Search service. Will make requests * as needed during iteration. Use .byPage() to make one request to the server per iteration. */ export type KnowledgeBaseIterator = PagedAsyncIterableIterator<KnowledgeBase, KnowledgeBase[], {}>; /** * An iterator for listing the knowledge sources that exist in the Search service. Will make requests * as needed during iteration. Use .byPage() to make one request to the server per iteration. */ export type KnowledgeSourceIterator = PagedAsyncIterableIterator<KnowledgeSource, KnowledgeSource[], {}>; /** * An iterator for listing the aliases that exist in the Search service. This will make requests * as needed during iteration. Use .byPage() to make one request to the server * per iteration. */ export type AliasIterator = PagedAsyncIterableIterator<SearchIndexAlias, SearchIndexAlias[], {}>; /** * An iterator for listing the indexes that exist in the Search service. Will make requests * as needed during iteration. Use .byPage() to make one request to the server * per iteration. */ export type IndexNameIterator = PagedAsyncIterableIterator<string, string[], {}>; /** * Represents a search index definition, which describes the fields and search behavior of an * index. */ export interface SearchIndex { /** * The name of the index. */ name: string; /** * The description of the index. */ description?: string; /** * The fields of the index. */ fields: SearchField[]; /** * The scoring profiles for the index. */ scoringProfiles?: ScoringProfile[]; /** * The name of the scoring profile to use if none is specified in the query. If this property is * not set and no scoring profile is specified in the query, then default scoring (tf-idf) will * be used. */ defaultScoringProfile?: string; /** * Options to control Cross-Origin Resource Sharing (CORS) for the index. */ corsOptions?: CorsOptions; /** * The suggesters for the index. */ suggesters?: SearchSuggester[]; /** * The analyzers for the index. */ analyzers?: LexicalAnalyzer[]; /** * The tokenizers for the index. */ tokenizers?: LexicalTokenizer[]; /** * The token filters for the index. */ tokenFilters?: TokenFilter[]; /** * The character filters for the index. */ charFilters?: CharFilter[]; /** * The normalizers for the index. */ normalizers?: LexicalNormalizer[]; /** * A description of an encryption key that you create in Azure Key Vault. This key is used to * provide an additional level of encryption-at-rest for your data when you want full assurance * that no one, not even Microsoft, can decrypt your data in Azure AI Search. Once you * have encrypted your data, it will always remain encrypted. Azure AI Search will ignore * attempts to set this property to null. You can change this property as needed if you want to * rotate your encryption key; Your data will be unaffected. Encryption with customer-managed * keys is not available for free search services, and is only available for paid services * created on or after January 1, 2019. */ encryptionKey?: SearchResourceEncryptionKey; /** * The type of similarity algorithm to be used when scoring and ranking the documents matching a * search query. The similarity algorithm can only be defined at index creation time and cannot * be modified on existing indexes. If null, the ClassicSimilarity algorithm is used. */ similarity?: SimilarityAlgorithm; /** * Defines parameters for a search index that influence semantic capabilities. */ semanticSearch?: SemanticSearch; /** * Contains configuration options related to vector search. */ vectorSearch?: VectorSearch; /** * The ETag of the index. */ etag?: string; /** A value indicating whether the index is leveraging Purview-specific features. This property defaults to false and cannot be changed after index creation. */ purviewEnabled?: boolean; } /** * Represents an indexer. */ export interface SearchIndexer { /** * The name of the indexer. */ name: string; /** * The description of the indexer. */ description?: string; /** * The name of the datasource from which this indexer reads data. */ dataSourceName: string; /** * The name of the skillset executing with this indexer. */ skillsetName?: string; /** * The name of the index to which this indexer writes data. */ targetIndexName: string; /** * The schedule for this indexer. */ schedule?: IndexingSchedule; /** * Parameters for indexer execution. */ parameters?: IndexingParameters; /** * Defines mappings between fields in the data source and corresponding target fields in the * index. */ fieldMappings?: FieldMapping[]; /** * Output field mappings are applied after enrichment and immediately before indexing. */ outputFieldMappings?: FieldMapping[]; /** * A value indicating whether the indexer is disabled. Default is false. Default value: false. */ isDisabled?: boolean; /** * The ETag of the indexer. */ etag?: string; /** * A description of an encryption key that you create in Azure Key Vault. This key is used to * provide an additional level of encryption-at-rest for your indexer definition (as well as * indexer execution status) when you want full assurance that no one, not even Microsoft, can * decrypt them in Azure AI Search. Once you have encrypted your indexer definition, it * will always remain encrypted. Azure AI Search will ignore attempts to set this property * to null. You can change this property as needed if you want to rotate your encryption key; * Your indexer definition (and indexer execution status) will be unaffected. Encryption with * customer-managed keys is not available for free search services, and is only available for * paid services created on or after January 1, 2019. */ encryptionKey?: SearchResourceEncryptionKey; } /** * A customer-managed encryption key in Azure Key Vault. Keys that you create and manage can be * used to encrypt or decrypt data-at-rest in Azure AI Search, such as indexes and synonym * maps. */ export interface SearchResourceEncryptionKey { /** * The name of your Azure Key Vault key to be used to encrypt your data at rest. */ keyName: string; /** * The version of your Azure Key Vault key to be used to encrypt your data at rest. */ keyVersion?: string; /** * The URI of your Azure Key Vault, also referred to as DNS name, that contains the key to be * used to encrypt your data at rest. An example URI might be * https://my-keyvault-name.vault.azure.net. */ vaultUrl: string; /** * An AAD Application ID that was granted the required access permissions to the Azure Key Vault * that is to be used when encrypting your data at rest. The Application ID should not be * confused with the Object ID for your AAD Application. */ applicationId?: string; /** * The authentication key of the specified AAD application. */ applicationSecret?: string; /** * An explicit managed identity to use for this encryption key. If not specified and the access * credentials property is null, the system-assigned managed identity is used. On update to the * resource, if the explicit identity is unspecified, it remains unchanged. If "none" is specified, * the value of this property is cleared. */ identity?: SearchIndexerDataIdentity; } /** * A list of skills. */ export interface SearchIndexerSkillset { /** * The name of the skillset. */ name: string; /** * The description of the skillset. */ description?: string; /** * A list of skills in the skillset. */ skills: SearchIndexerSkill[]; /** * Details about cognitive services to be used when running skills. */ cognitiveServicesAccount?: CognitiveServicesAccount; /** * Definition of additional projections to azure blob, table, or files, of enriched data. */ knowledgeStore?: SearchIndexerKnowledgeStore; /** * Definition of additional projections to secondary search index(es). */ indexProjection?: SearchIndexerIndexProjection; /** * The ETag of the skillset. */ etag?: string; /** * A description of an encryption key that you create in Azure Key Vault. This key is used to * provide an additional level of encryption-at-rest for your skillset definition when you want * full assurance that no one, not even Microsoft, can decrypt your skillset definition in Azure * AI Search. Once you have encrypted your skillset definition, it will always remain * encrypted. Azure AI Search will ignore attempts to set this property to null. You can * change this property as needed if you want to rotate your encryption key; Your skillset * definition will be unaffected. Encryption with customer-managed keys is not available for free * search services, and is only available for paid services created on or after January 1, 2019. */ encryptionKey?: SearchResourceEncryptionKey; } /** * Defines parameters for a search index that influence scoring in search queries. */ export interface ScoringProfile { /** * The name of the scoring profile. */ name: string; /** * Parameters that boost scoring based on text matches in certain index fields. */ textWeights?: TextWeights; /** * The collection of functions that influence the scoring of documents. */ functions?: ScoringFunction[]; /** * A value indicating how the results of individual scoring functions should be combined. * Defaults to "Sum". Ignored if there are no scoring functions. Possible values include: 'sum', * 'average', 'minimum', 'maximum', 'firstMatching' */ functionAggregation?: ScoringFunctionAggregation; } /** * Defines values for TokenizerName. * @readonly */ export declare enum KnownTokenizerNames { /** * Grammar-based tokenizer that is suitable for processing most European-language documents. See * http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicTokenizer.html */ Classic = "classic", /** * Tokenizes the input from an edge into n-grams of the given size(s). See * https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.html */ EdgeNGram = "edgeNGram", /** * Emits the entire input as a single token. See