UNPKG

vietnamese-text-search

Version:
37 lines 8.96 kB
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _utils=require("./utils"),_config=_interopRequireDefault(require("./config")),_textHandler=_interopRequireDefault(require("./text-handler"));function _interopRequireDefault(obj){return obj&&obj.__esModule?obj:{default:obj}}var _default={/** * @description Create new text indices from a single text object * @param {TextIndex} textIndex * @param {TextObject} textObj * @param {CreateIndexOptions} options * @returns {{bucket: string, keywords: Keyword[]}} { bucket: string, keywords: [] } */createIndexForTextObj(textBucket={},textObj,{textKeyName=_config.default.DefaultKeyName,textValueName=_config.default.DefaultValueName,keywords:newKeywords=[]}){const textKey=textObj[textKeyName],textValue=textObj[textValueName],bucket=textObj.bucket;let keywords=newKeywords,pureKeywords=keywords.map(kw=>(0,_utils.removeAccents)(kw));if(!keywords.length){const{keywords:_keywords,pureKeywords:_pureKeywords}=_textHandler.default.extractKeywordsFromText(textValue,!0);keywords=_keywords,pureKeywords=_pureKeywords}let textIndex=null;textBucket[bucket]?textIndex=textBucket[bucket].textIndex:(textBucket[bucket]={textIndex:{},textDict:{}},textIndex=textBucket[bucket].textIndex);const length=pureKeywords.length;for(let i=0;i<length;){const keyword=keywords[i],firstKeywordChars=(0,_utils.getLigatures)(keyword),pureKeyword=pureKeywords[i],firstPureKeywordChars=(0,_utils.removeAccents)(firstKeywordChars);// nested level 0 if(textIndex[firstPureKeywordChars]){const textIndexL0=textIndex[firstPureKeywordChars];// nested level 1 if(textIndexL0[firstKeywordChars]){const textIndexL1=textIndexL0[firstKeywordChars];// nested level 2 if(textIndexL1[pureKeyword]){const textIndexL2=textIndexL1[pureKeyword];// nested level 3 textIndexL2[keyword]?textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars][pureKeyword][keyword].add(textKey):textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars][pureKeyword][keyword]=new Set([textKey])}else textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars][pureKeyword]={[keyword]:new Set([textKey])}}else textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars]={[pureKeyword]:{[keyword]:new Set([textKey])}}}else textBucket[bucket].textIndex[firstPureKeywordChars]={[firstKeywordChars]:{[pureKeyword]:{[keyword]:new Set([textKey])}}};i+=1}return textBucket[bucket].textDict[textKey]=textObj,{bucket,keywords}},/** * @description Remove created text indices of a single text object * @param {TextIndex} textIndex * @param {TextObject} textObj * @param {RemoveIndexOptions} options * @returns {{bucket: string, keywords: Keyword[]}} { bucket:string, keywords: [] } */removeIndexOfTextObj(textBucket={},textObj,{textKeyName=_config.default.DefaultKeyName,textValueName=_config.default.DefaultValueName,keywords:removedKeywords=[]}){const bucket=textObj.bucket,textKey=textObj[textKeyName],textValue=textObj[textValueName];let keywords=removedKeywords,pureKeywords=keywords.map(kw=>(0,_utils.removeAccents)(kw));if(!keywords.length){const{keywords:_keywords,pureKeywords:_pureKeywords}=_textHandler.default.extractKeywordsFromText(textValue,!0);keywords=_keywords,pureKeywords=_pureKeywords}let textIndex=null;textBucket[bucket]?textIndex=textBucket[bucket].textIndex:(textBucket[bucket]={textIndex:{},textDict:{}},textIndex=textBucket[bucket].textIndex);const length=pureKeywords.length;for(let i=0;i<length;){const keyword=keywords[i],firstKeywordChars=(0,_utils.getLigatures)(keyword),pureKeyword=pureKeywords[i],firstPureKeywordChars=(0,_utils.removeAccents)(firstKeywordChars);// nested level 0 if(textIndex[firstPureKeywordChars]){const textIndexL0=textIndex[firstPureKeywordChars],textIndexL0Size=Object.keys(textIndexL0).length;// nested level 1 if(textIndexL0[firstKeywordChars]){const textIndexL1=textIndexL0[firstKeywordChars],textIndexL1Size=Object.keys(textIndexL1).length;// nested level 2 if(textIndexL1[pureKeyword]){const textIndexL2=textIndexL1[pureKeyword],textIndexL2Size=Object.keys(textIndexL2).length;// nested level 3 if(textIndexL2[keyword]){const textIndexL3=textIndexL2[keyword],textIndexL3Size=Object.keys(textIndexL3).length;textIndexL3.has(textKey)&&(textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars][pureKeyword][keyword].delete(textKey),1>=textIndexL3Size&&delete textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars][pureKeyword][keyword],1>=textIndexL2Size&&delete textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars][pureKeyword],1>=textIndexL1Size&&delete textBucket[bucket].textIndex[firstPureKeywordChars][firstKeywordChars],1>=textIndexL0Size&&delete textBucket[bucket].textIndex[firstPureKeywordChars])}}}}i+=1}return delete textBucket[bucket].textDict[textKey],{bucket,removedKeywords:keywords}},/** * @description Update created text indices of a single text object * @param {TextBucket} textBucket * @param {TextObject} oldTextObj * @param {TextObject} textObj * @param {UpdateIndexOptions} options * @returns {Promise<{bucket: string, newKeywords: Keyword[], removedKeywords: Keyword[], nUpdated: number, nAdded: number}>} { bucket: string, newKeywords: [], removedKeywords: [], nUpdated: number, nAdded: number } */async updateIndexOfTextObj(textBucket,oldTextObj,textObj,{textKeyName=_config.default.DefaultKeyName,textValueName=_config.default.DefaultValueName,keywords:_keywords=[],oldKeywords:_oldKeywords=[]}){const bucket=textObj.bucket,textKey=textObj[textKeyName],textValue=textObj[textValueName],textOldValue=oldTextObj?oldTextObj[textValueName]:"";let keywords=_keywords,oldKeywords=_oldKeywords;if(!keywords.length){const{keywords:keywordsFromTextObj}=_textHandler.default.extractKeywordsFromText(textValue,!0);keywords=keywordsFromTextObj}if(!oldKeywords.length){const{keywords:keywordsFromOldTextObj}=oldTextObj?_textHandler.default.extractKeywordsFromText(textOldValue,!0):{keywords:[]};oldKeywords=keywordsFromOldTextObj}const{diff1:removedKeywords,diff2:newKeywords}=(0,_utils.intersect)(oldKeywords,keywords),baseOptions={textKeyName,textValueName};if(!removedKeywords.length&&!newKeywords.length){const isSame=(0,_utils.compare2Objs)(oldTextObj,textObj);return isSame&&(textBucket[bucket].textDict[textKey]=textObj),{bucket,newKeywords:[],removedKeywords:[],nUpdated:1,nAdded:0}}removedKeywords.length&&oldTextObj&&this.removeIndexOfTextObj(textBucket,oldTextObj,{...baseOptions,keywords:removedKeywords});const oldObj={...(oldTextObj||{})},newObj={...textObj};delete oldObj[textKeyName];const newTextObj={...oldObj,...newObj};return newKeywords.length&&this.createIndexForTextObj(textBucket,newTextObj,{...baseOptions,keywords:newKeywords}),{bucket,newKeywords,removedKeywords,nUpdated:+!!oldTextObj,nAdded:+!oldTextObj}},/** * Create text indices from many text objects, if the second argument is passed to the function, * then using this argument as global textBucket for indexing new text objects. * @param {TextObject[]} textObjs * @param {TextBucket} textBucket * @param {CreateIndexOptions} options * @returns {Promise<{textBucket: TextBucket, totalIndices: number, totalObjects: number, emptyKeywordKeys: TextKey[], details: object}>} { textBucket: {}, totalIndices: number, totalObjects: number, emptyKeywordKeys: [], details: {} } */async createTextIndexByManyTextObjs(textObjs=[],textBucket=null,{textKeyName=_config.default.DefaultKeyName,textValueName=_config.default.DefaultValueName}){try{const length=textObjs.length;(0,_utils.log)(`[${new Date}]: Start indexing for ${length} objects...`),(0,_utils.log)(`[${new Date}]: Extracting keywords from text objects...`);// ouput: [{ keywords: [], pureKeywords: [], textKey, textValue },...] const keywordObjs=await _textHandler.default.extractKeywordsFromManyTextObjs(textObjs,{toLower:!0,textKeyName,textValueName}),emptyKeywordKeys=[];let totalIndices=0,totalObjects=0;const details={},globalTextBucket=keywordObjs.reduce((accObj,{keywords,pureKeywords,...textObj})=>{// sometime input text is not at normal form (e.g. '𝐕𝐄𝐑𝐒𝐀𝐂𝐄 𝐀𝐔𝐓𝐇𝐄𝐍𝐓𝐈𝐂 𝟑𝟒𝐦𝐦') if(keywords.length){const{bucket}=this.createIndexForTextObj(accObj,textObj,{keywords,textKeyName,textValueName});totalIndices+=1,totalObjects+=1,details[bucket]?(details[bucket].nAdded+=1,details[bucket].nIndices+=keywords.length):details[bucket]={nAdded:1,nIndices:keywords.length,errorKeys:[]}}else emptyKeywordKeys.push(textObj[textKeyName]);return accObj},textBucket||{});return(0,_utils.log)(`[${new Date}]: Finish indexing for ${totalObjects} objects`),(0,_utils.log)(`[${new Date}]: Error object keys ${emptyKeywordKeys}`),{textBucket:globalTextBucket,totalObjects,totalIndices,emptyKeywordKeys,details}}catch(err){throw(0,_utils.log)("createTextIndexByManyTextObjs error"),err}}};exports.default=_default;