UNPKG

openai-code

Version:

An unofficial proxy layer that lets you use Anthropic Claude Code with any OpenAI API backend.

110 lines (97 loc) 3.98 kB
import { writeFileSync, existsSync } from 'node:fs' import { join } from 'node:path' import { safeReadFileSync, ensureAbsolutePath } from './fs.mjs' import { getEnv } from './env.mjs' import { md5 } from './md5.mjs' let workingDir = ''; let documents = [] // [{ path, embedding, hash }, ...] export const storeFileName = getEnv('OPENAI_CODE_STORE_FILE_NAME') || 'CLAUDE_VECTORDB.json' /** computes the dot product between a vector and the query vector, using loop unrolling for JIT optimization */ export const matmul = ( vectorA, vectorB ) => { const dimensions = vectorA.length let result = 0.0 let j = 0 const unrollFactor = 4 const length = Math.floor(dimensions / unrollFactor) * unrollFactor for (; j < length; j += unrollFactor) { // unroll by 4 for JIT optimization (SIMD vector intrinsics on CPU - see V8 internals/Turbofan) result += vectorA[j] * vectorB[j] + vectorA[j + 1] * vectorB[j + 1] + vectorA[j + 2] * vectorB[j + 2] + vectorA[j + 3] * vectorB[j + 3] } // process remaining elements if dimensions are not a multiple of the unroll factor. for (; j < dimensions; j++) { result += vectorA[j] * vectorB[j] } return result } export const getByPath = (path) => { const absolutePath = ensureAbsolutePath(workingDir, path); return documents.find(({ path: p }) => p === absolutePath); }; export const removeByPath = (path) => { const index = documents.findIndex(({ path: p }) => p === ensureAbsolutePath(workingDir, path)); if (index > -1) { documents.splice(index, 1); } storeToDisk(workingDir); }; export const addToIndex = (path, embedding) => { if (!embedding) return; const absolutePath = ensureAbsolutePath(workingDir, path); documents.push({ path: absolutePath, embedding, hash: md5(safeReadFileSync(absolutePath)) }); storeToDisk(workingDir); }; export const updateByPath = (path, embedding) => { if (!embedding) return; const absolutePath = ensureAbsolutePath(workingDir, path); const document = getByPath(absolutePath); if (document) { document.embedding = embedding; document.hash = md5(safeReadFileSync(absolutePath)); storeToDisk(workingDir); } }; export const needsUpdate = (path) => { const absolutePath = ensureAbsolutePath(workingDir, path); const document = getByPath(absolutePath) if (!document) return false return document.hash !== md5(safeReadFileSync(absolutePath)) } /** search topK closest embeddings using cosine similarity. ~75ms/100k docs (dim 1024) */ export const search = (query, topK = 5) => { const queryVector = query.embedding const results = documents.map(item => { const score = matmul(item.embedding, queryVector) return { score, source: item.source, path: item.path } }) results.sort((a, b) => b.score - a.score) const topKResults = results.slice(0, topK) const maxScore = topKResults[0]?.score || 0 const minScore = topKResults[topKResults.length - 1]?.score || 0 const scoreRange = maxScore - minScore // normalize scores to a range between 1 and 0 const normalizedResults = topKResults .map(result => { const score = scoreRange ? (result.score - minScore) / scoreRange : 0 return { ...result, score } }) .filter(result => result.score > 0) // cutting off results with a score of 0 .map(result => ({ ...result, source: safeReadFileSync(result.path, "utf8") })) return normalizedResults } export const loadFromDisk = (_workingDir) => { workingDir = _workingDir const filePath = join(workingDir, storeFileName) if (existsSync(filePath)) { const data = safeReadFileSync(filePath, 'utf-8') // good enough for up to 10k files documents = JSON.parse(data) } else { documents = [] } return documents } export const storeToDisk = (_workingDir) => { workingDir = _workingDir const filePath = join(workingDir, storeFileName) const data = JSON.stringify(documents) // good enough for up to 10k files return writeFileSync(filePath, data, 'utf-8') }