openai-code
Version:
An unofficial proxy layer that lets you use Anthropic Claude Code with any OpenAI API backend.
110 lines (97 loc) • 3.98 kB
JavaScript
import { writeFileSync, existsSync } from 'node:fs'
import { join } from 'node:path'
import { safeReadFileSync, ensureAbsolutePath } from './fs.mjs'
import { getEnv } from './env.mjs'
import { md5 } from './md5.mjs'
let workingDir = '';
let documents = [] // [{ path, embedding, hash }, ...]
export const storeFileName = getEnv('OPENAI_CODE_STORE_FILE_NAME') || 'CLAUDE_VECTORDB.json'
/** computes the dot product between a vector and the query vector, using loop unrolling for JIT optimization */
export const matmul = (
vectorA, vectorB
) => {
const dimensions = vectorA.length
let result = 0.0
let j = 0
const unrollFactor = 4
const length = Math.floor(dimensions / unrollFactor) * unrollFactor
for (; j < length; j += unrollFactor) { // unroll by 4 for JIT optimization (SIMD vector intrinsics on CPU - see V8 internals/Turbofan)
result += vectorA[j] * vectorB[j] + vectorA[j + 1] * vectorB[j + 1] + vectorA[j + 2] * vectorB[j + 2] + vectorA[j + 3] * vectorB[j + 3]
}
// process remaining elements if dimensions are not a multiple of the unroll factor.
for (; j < dimensions; j++) {
result += vectorA[j] * vectorB[j]
}
return result
}
export const getByPath = (path) => {
const absolutePath = ensureAbsolutePath(workingDir, path);
return documents.find(({ path: p }) => p === absolutePath);
};
export const removeByPath = (path) => {
const index = documents.findIndex(({ path: p }) => p === ensureAbsolutePath(workingDir, path));
if (index > -1) {
documents.splice(index, 1);
}
storeToDisk(workingDir);
};
export const addToIndex = (path, embedding) => {
if (!embedding) return;
const absolutePath = ensureAbsolutePath(workingDir, path);
documents.push({ path: absolutePath, embedding, hash: md5(safeReadFileSync(absolutePath)) });
storeToDisk(workingDir);
};
export const updateByPath = (path, embedding) => {
if (!embedding) return;
const absolutePath = ensureAbsolutePath(workingDir, path);
const document = getByPath(absolutePath);
if (document) {
document.embedding = embedding;
document.hash = md5(safeReadFileSync(absolutePath));
storeToDisk(workingDir);
}
};
export const needsUpdate = (path) => {
const absolutePath = ensureAbsolutePath(workingDir, path);
const document = getByPath(absolutePath)
if (!document) return false
return document.hash !== md5(safeReadFileSync(absolutePath))
}
/** search topK closest embeddings using cosine similarity. ~75ms/100k docs (dim 1024) */
export const search = (query, topK = 5) => {
const queryVector = query.embedding
const results = documents.map(item => {
const score = matmul(item.embedding, queryVector)
return { score, source: item.source, path: item.path }
})
results.sort((a, b) => b.score - a.score)
const topKResults = results.slice(0, topK)
const maxScore = topKResults[0]?.score || 0
const minScore = topKResults[topKResults.length - 1]?.score || 0
const scoreRange = maxScore - minScore // normalize scores to a range between 1 and 0
const normalizedResults = topKResults
.map(result => {
const score = scoreRange ? (result.score - minScore) / scoreRange : 0
return { ...result, score }
})
.filter(result => result.score > 0) // cutting off results with a score of 0
.map(result => ({ ...result, source: safeReadFileSync(result.path, "utf8") }))
return normalizedResults
}
export const loadFromDisk = (_workingDir) => {
workingDir = _workingDir
const filePath = join(workingDir, storeFileName)
if (existsSync(filePath)) {
const data = safeReadFileSync(filePath, 'utf-8') // good enough for up to 10k files
documents = JSON.parse(data)
} else {
documents = []
}
return documents
}
export const storeToDisk = (_workingDir) => {
workingDir = _workingDir
const filePath = join(workingDir, storeFileName)
const data = JSON.stringify(documents) // good enough for up to 10k files
return writeFileSync(filePath, data, 'utf-8')
}