scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
40 lines (39 loc) • 1.61 kB
JavaScript
// src/utils/sanitizeQuery.ts
import { STOP_WORDS } from '../fileRules/stopWords.js';
// Stage 1: normalize junk out of query
export function basicCleanup(raw) {
return raw
.replace(/["']/g, '') // remove quotes
.replace(/[()]/g, ' ') // replace parens with space
.replace(/\bOR\b|\bAND\b/gi, ' ') // strip boolean operators
.replace(/\w+:/g, '') // remove field prefixes like path:, ext:
.replace(/[<>=*]/g, ' ') // remove invalid operators
.replace(/\\/g, '') // remove stray backslashes
.replace(/<[^>]+>/g, '') // strip placeholders like <endpoint>
.replace(/\s+/g, ' ') // collapse whitespace
.trim();
}
// Stage 2: tokenize & prepare for FTS5
export function sanitizeQueryForFts(input) {
input = basicCleanup(input)
.toLowerCase()
.replace(/\(([^)]+)\)/g, (_, inner) => inner.replace(/[|]/g, ' ')) // expand grouped ORs
.replace(/[|]/g, ' '); // handle standalone ORs
if (/^[\w\-./]+$/.test(input) && !/\s/.test(input)) {
return `"${input.replace(/"/g, '""')}"*`;
}
const tokens = input
.split(/\s+/)
.map(token => {
if (/[\w]+\.[a-z0-9]+$/.test(token)) {
return `"${token.replace(/"/g, '""')}"`;
}
return token
.replace(/[^a-z0-9_*"]/gi, '')
.replace(/'/g, "''");
})
.filter(token => token.length > 2 &&
!STOP_WORDS.has(token.replace(/[*"]/g, '')))
.map(token => (token.startsWith('"') ? token : token + '*'));
return tokens.length > 0 ? tokens.join(' OR ') : '*';
}