UNPKG

@restnfeel/agentc-starter-kit

Version:

한국어 기업용 CMS 모듈 - Task Master AI와 함께 빠르게 웹사이트를 구현할 수 있는 재사용 가능한 컴포넌트 시스템

233 lines (231 loc) 7.12 kB
class QueryProcessor { constructor(config = {}) { this.config = { enableSpellCheck: false, enableSynonymExpansion: true, enableStopWordRemoval: true, minQueryLength: 3, maxQueryLength: 500, ...config, }; this.initializeStopWords(); this.initializeSynonyms(); } async processQuery(query) { if (!this.isValidQuery(query)) { throw new Error("Invalid query: too short or too long"); } let processed = query.trim(); // Step 1: Normalize text processed = this.normalizeText(processed); // Step 2: Remove stop words (optional) if (this.config.enableStopWordRemoval) { processed = this.removeStopWords(processed); } // Step 3: Extract keywords const keywords = this.extractKeywords(processed); // Step 4: Expand with synonyms (optional) if (this.config.enableSynonymExpansion) { processed = this.expandWithSynonyms(processed); } // Step 5: Detect language const language = this.detectLanguage(query); // Step 6: Extract entities (basic implementation) const entities = this.extractEntities(processed); // Step 7: Determine intent (basic implementation) const intent = this.determineIntent(query); return { original: query, processed: processed.trim(), keywords, entities, intent, language, }; } isValidQuery(query) { const length = query.trim().length; return (length >= (this.config.minQueryLength || 3) && length <= (this.config.maxQueryLength || 500)); } normalizeText(text) { return text .toLowerCase() .replace(/[^\w\s가-힣]/g, " ") // Keep alphanumeric and Korean characters .replace(/\s+/g, " ") .trim(); } removeStopWords(text) { const words = text.split(" "); const filteredWords = words.filter((word) => !this.stopWords.has(word)); return filteredWords.join(" "); } extractKeywords(text) { const words = text.split(" ").filter((word) => word.length > 2); // Simple keyword extraction - can be enhanced with TF-IDF or other methods const wordFreq = new Map(); words.forEach((word) => { wordFreq.set(word, (wordFreq.get(word) || 0) + 1); }); // Return unique words sorted by frequency return Array.from(wordFreq.entries()) .sort((a, b) => b[1] - a[1]) .map(([word]) => word) .slice(0, 10); // Top 10 keywords } expandWithSynonyms(text) { let expanded = text; for (const [word, synonyms] of this.synonyms) { if (expanded.includes(word)) { // Add synonyms to the query expanded += " " + synonyms.join(" "); } } return expanded; } detectLanguage(text) { // Simple language detection const koreanRegex = /[가-힣]/g; const koreanMatches = text.match(koreanRegex); if (koreanMatches && koreanMatches.length > text.length * 0.1) { return "ko"; } return "en"; } extractEntities(text) { const entities = []; // Simple entity extraction patterns const patterns = { email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g, phone: /\b\d{3}-\d{3,4}-\d{4}\b/g, date: /\b\d{4}[-/.]\d{1,2}[-/.]\d{1,2}\b/g, number: /\b\d+\b/g, }; for (const [type, pattern] of Object.entries(patterns)) { const matches = text.match(pattern); if (matches) { entities.push(...matches.map((match) => `${type}:${match}`)); } } return entities; } determineIntent(query) { const lowerQuery = query.toLowerCase(); // Simple intent classification if (lowerQuery.includes("how") || lowerQuery.includes("어떻게")) { return "how-to"; } if (lowerQuery.includes("what") || lowerQuery.includes("무엇")) { return "definition"; } if (lowerQuery.includes("where") || lowerQuery.includes("어디")) { return "location"; } if (lowerQuery.includes("when") || lowerQuery.includes("언제")) { return "time"; } if (lowerQuery.includes("why") || lowerQuery.includes("왜")) { return "explanation"; } return "general"; } initializeStopWords() { // Combined English and Korean stop words const stopWordsList = [ // English "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it", "its", "of", "on", "that", "the", "to", "was", "will", "with", "or", "but", "not", "this", "these", "they", "their", "we", "you", "your", "have", "had", "can", "could", // Korean "그", "이", "저", "것", "수", "있", "없", "하", "되", "시", "가", "를", "에", "의", "은", "는", "이", "가", "을", "를", "에서", "로", "으로", "와", "과", "도", "만", "뿐", "까지", "부터", "이다", "있다", "없다", ]; this.stopWords = new Set(stopWordsList); } initializeSynonyms() { // Basic synonym mapping this.synonyms = new Map([ ["ai", ["artificial intelligence", "machine learning", "deep learning"]], ["인공지능", ["AI", "머신러닝", "딥러닝", "기계학습"]], ["개발", ["프로그래밍", "코딩", "소프트웨어 개발"]], ["문서", ["파일", "자료", "데이터"]], ["검색", ["찾기", "조회", "탐색"]], ]); } // Method to add custom synonyms addSynonyms(word, synonyms) { this.synonyms.set(word, synonyms); } // Method to add custom stop words addStopWords(words) { words.forEach((word) => this.stopWords.add(word.toLowerCase())); } } export { QueryProcessor }; //# sourceMappingURL=query-processor.js.map