UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.

40 lines (39 loc) 1.61 kB
// src/utils/sanitizeQuery.ts import { STOP_WORDS } from '../fileRules/stopWords.js'; // Stage 1: normalize junk out of query export function basicCleanup(raw) { return raw .replace(/["']/g, '') // remove quotes .replace(/[()]/g, ' ') // replace parens with space .replace(/\bOR\b|\bAND\b/gi, ' ') // strip boolean operators .replace(/\w+:/g, '') // remove field prefixes like path:, ext: .replace(/[<>=*]/g, ' ') // remove invalid operators .replace(/\\/g, '') // remove stray backslashes .replace(/<[^>]+>/g, '') // strip placeholders like <endpoint> .replace(/\s+/g, ' ') // collapse whitespace .trim(); } // Stage 2: tokenize & prepare for FTS5 export function sanitizeQueryForFts(input) { input = basicCleanup(input) .toLowerCase() .replace(/\(([^)]+)\)/g, (_, inner) => inner.replace(/[|]/g, ' ')) // expand grouped ORs .replace(/[|]/g, ' '); // handle standalone ORs if (/^[\w\-./]+$/.test(input) && !/\s/.test(input)) { return `"${input.replace(/"/g, '""')}"*`; } const tokens = input .split(/\s+/) .map(token => { if (/[\w]+\.[a-z0-9]+$/.test(token)) { return `"${token.replace(/"/g, '""')}"`; } return token .replace(/[^a-z0-9_*"]/gi, '') .replace(/'/g, "''"); }) .filter(token => token.length > 2 && !STOP_WORDS.has(token.replace(/[*"]/g, ''))) .map(token => (token.startsWith('"') ? token : token + '*')); return tokens.length > 0 ? tokens.join(' OR ') : '*'; }