vexify
Version:
Portable vector database with in-process ONNX embeddings. Zero-config semantic search via SQLite. No external servers required.
171 lines (144 loc) • 4.77 kB
JavaScript
;
const http = require('http');
const https = require('https');
const { getModelDimension } = require('../config/defaults');
class VLLMEmbedder {
constructor(options = {}) {
this.modelName = options.modelName;
this.host = options.host || 'http://localhost:8000';
this.dimension = options.dimension;
this.detectedHost = null;
}
async detectHost() {
if (this.detectedHost) return this.detectedHost;
if (await this._tryHost(this.host)) {
this.detectedHost = this.host;
return this.detectedHost;
}
if (await this._tryHost('http://localhost:8000')) {
this.detectedHost = 'http://localhost:8000';
return this.detectedHost;
}
this.detectedHost = this.host;
return this.detectedHost;
}
async _tryHost(hostUrl) {
return new Promise((resolve) => {
const url = new URL('/v1/models', hostUrl);
const protocol = url.protocol === 'https:' ? https : http;
const req = protocol.get(url, { timeout: 500 }, (res) => {
resolve(res.statusCode === 200);
});
req.on('error', () => resolve(false));
req.on('timeout', () => { req.destroy(); resolve(false); });
});
}
async embed(text, retries = 3) {
if (!text || text.trim().length === 0) {
throw new Error('Cannot embed empty text');
}
const effectiveHost = await this.detectHost();
const originalHost = this.host;
this.host = effectiveHost;
try {
for (let attempt = 0; attempt < retries; attempt++) {
try {
return await this._embedOnce(text);
} catch (error) {
if (attempt === retries - 1) {
throw error;
}
await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)));
}
}
} finally {
this.host = originalHost;
}
}
async _embedOnce(text) {
return new Promise((resolve, reject) => {
const data = JSON.stringify({
model: this.modelName,
input: text,
encoding_format: 'float'
});
const url = new URL('/v1/embeddings', this.host);
const options = {
hostname: url.hostname,
port: url.port || (url.protocol === 'https:' ? 443 : 80),
path: url.pathname,
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(data)
},
timeout: 300000
};
const protocol = url.protocol === 'https:' ? https : http;
const req = protocol.request(options, (res) => {
let responseData = '';
res.on('data', (chunk) => {
responseData += chunk;
});
res.on('end', () => {
try {
const response = JSON.parse(responseData);
if (response.data && response.data.length > 0) {
const embedding = response.data[0].embedding;
if (this.dimension && embedding.length > this.dimension) {
resolve(embedding.slice(0, this.dimension));
} else {
resolve(embedding);
}
} else if (response.error) {
reject(new Error(`vLLM error: ${JSON.stringify(response.error)}`));
} else {
reject(new Error(`No embedding in response. Response: ${responseData.substring(0, 200)}`));
}
} catch (e) {
reject(new Error(`Failed to parse response: ${e.message}. Data: ${responseData.substring(0, 200)}`));
}
});
res.on('error', (error) => {
reject(new Error(`Response stream error: ${error.message}`));
});
});
req.on('error', (error) => {
reject(new Error(`vLLM request failed: ${error.message}`));
});
req.on('timeout', () => {
req.destroy();
reject(new Error('vLLM request timeout after 300s'));
});
req.write(data);
req.end();
});
}
async checkConnection() {
const effectiveHost = await this.detectHost();
return new Promise((resolve) => {
const url = new URL('/v1/models', effectiveHost);
const protocol = url.protocol === 'https:' ? https : http;
const req = protocol.get(url, (res) => {
resolve(res.statusCode === 200);
});
req.on('error', () => {
resolve(false);
});
req.setTimeout(2000, () => {
req.destroy();
resolve(false);
});
});
}
getDimension() {
if (this.dimension) return this.dimension;
try {
return getModelDimension(this.modelName);
} catch (e) {
console.warn(`Model ${this.modelName} not in registry, defaulting to 768: ${e.message}`);
return 768;
}
}
}
module.exports = { VLLMEmbedder };