UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

176 lines (175 loc) 5.65 kB
/** * ScalarQuantizer — SQ8 标量量化器 * * 将 Float32 向量映射到 Uint8 (0-255),使用 per-dimension min/max 线性缩放: * q_i = round((v_i - min_i) / (max_i - min_i) * 255) * v̂_i = q_i / 255 * (max_i - min_i) + min_i * * 优势: * - 内存: 768 维 × 4 bytes → 768 维 × 1 byte = 75% 节省 * - Recall: > 95% (误差极小) * * @module infrastructure/vector/ScalarQuantizer */ export class ScalarQuantizer { #dimension; /** 每维最小值 */ #mins; /** 每维最大值 */ #maxs; /** 每维范围 (max - min), 预计算避免重复减法 */ #ranges; #trained = false; /** @param dimension 向量维度 */ constructor(dimension) { this.#dimension = dimension; this.#mins = new Float32Array(dimension); this.#maxs = new Float32Array(dimension); this.#ranges = new Float32Array(dimension); } get dimension() { return this.#dimension; } get trained() { return this.#trained; } /** * 训练量化器 — 从一批向量中统计 per-dimension min/max * @param vectors 训练集 (建议 ≥ 100 条) */ train(vectors) { if (!vectors || vectors.length === 0) { throw new Error('ScalarQuantizer.train() requires at least 1 vector'); } const dim = this.#dimension; const mins = new Float32Array(dim).fill(Infinity); const maxs = new Float32Array(dim).fill(-Infinity); for (const vec of vectors) { for (let i = 0; i < dim; i++) { const v = vec[i] || 0; if (v < mins[i]) { mins[i] = v; } if (v > maxs[i]) { maxs[i] = v; } } } // 防止 range = 0 (所有值相同的维度), 设置最小范围 const ranges = new Float32Array(dim); for (let i = 0; i < dim; i++) { const range = maxs[i] - mins[i]; ranges[i] = range > 1e-10 ? range : 1e-10; } this.#mins = mins; this.#maxs = maxs; this.#ranges = ranges; this.#trained = true; } /** 量化单个向量 */ encode(vector) { if (!this.#trained) { throw new Error('ScalarQuantizer not trained. Call train() first.'); } const dim = this.#dimension; const result = new Uint8Array(dim); const mins = this.#mins; const ranges = this.#ranges; for (let i = 0; i < dim; i++) { const v = vector[i] || 0; // 钳位到 [min, max] 范围, 然后映射到 [0, 255] const normalized = (v - mins[i]) / ranges[i]; result[i] = Math.max(0, Math.min(255, Math.round(normalized * 255))); } return result; } /** 批量量化 */ encodeBatch(vectors) { return vectors.map((v) => this.encode(v)); } /** 反量化 (用于精排 re-rank) */ decode(quantized) { if (!this.#trained) { throw new Error('ScalarQuantizer not trained.'); } const dim = this.#dimension; const result = new Float32Array(dim); const mins = this.#mins; const ranges = this.#ranges; for (let i = 0; i < dim; i++) { result[i] = (quantized[i] / 255) * ranges[i] + mins[i]; } return result; } /** * 量化空间内的距离计算 (避免反量化, 整数运算) * 使用 L2 on quantized space 近似余弦距离 * * @returns 距离值 (越小越相似) */ distance(a, b) { const dim = this.#dimension; let sum = 0; for (let i = 0; i < dim; i++) { const diff = a[i] - b[i]; sum += diff * diff; } // 归一化到 ~[0, 1] 范围: max possible = 255² × dim return sum / (255 * 255 * dim); } /** * 混合距离: 量化粗排 + 原始精排 * 用于搜索时: 先用 SQ8 快速过滤, 再用 Float32 精确计算 * * @returns } */ hybridDistance(quantizedA, originalA, quantizedB, originalB) { return { coarse: this.distance(quantizedA, quantizedB), fine: ScalarQuantizer.#cosineDistanceFloat(originalA, originalB), }; } /** 余弦距离 (Float32 精确计算) */ static #cosineDistanceFloat(a, b) { if (!a || !b || a.length === 0) { return 1; } const len = Math.min(a.length, b.length); let dot = 0, normA = 0, normB = 0; for (let i = 0; i < len; i++) { dot += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const denom = Math.sqrt(normA) * Math.sqrt(normB); return denom === 0 ? 1 : 1 - dot / denom; } /** * 序列化量化器参数 * @returns } */ serialize() { return { dimension: this.#dimension, mins: Array.from(this.#mins), maxs: Array.from(this.#maxs), }; } /** * 从序列化数据恢复量化器 * @param data */ static deserialize(data) { const q = new ScalarQuantizer(data.dimension); q.#mins = new Float32Array(data.mins); q.#maxs = new Float32Array(data.maxs); const ranges = new Float32Array(data.dimension); for (let i = 0; i < data.dimension; i++) { const range = q.#maxs[i] - q.#mins[i]; ranges[i] = range > 1e-10 ? range : 1e-10; } q.#ranges = ranges; q.#trained = true; return q; } }