@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
132 lines (113 loc) • 3.4 kB
JavaScript
import Matrix from '../util/matrix.js'
/**
* Kullback-Leibler importance estimation procedure
*/
export default class KLIEP {
// https://github.com/hoxo-m/densratio
/**
* @param {number[]} sigma Sigmas of normal distribution
* @param {number} fold Number of folds
* @param {number} kernelNum Number of kernels
*/
constructor(sigma, fold, kernelNum) {
this._sigma_cand = sigma
this._fold = fold
this._kernelNum = kernelNum
}
_kernel_gaussian(x, c, s) {
const k = []
for (let i = 0; i < c.rows; i++) {
const ki = []
for (let j = 0; j < x.rows; j++) {
const r = Matrix.sub(c.row(i), x.row(j))
ki.push(Math.exp(-r.reduce((ss, v) => ss + v ** 2, 0) / (2 * s ** 2)))
}
k.push(ki)
}
return Matrix.fromArray(k)
}
_optimize_alpha(a, b) {
const c = Matrix.div(b, b.norm() ** 2)
let alpha = Matrix.ones(a.rows, 1)
alpha.add(Matrix.mult(c, 1 - b.tDot(alpha).toScaler()))
alpha.map(v => (v < 0 ? 0 : v))
alpha.div(b.tDot(alpha).toScaler())
let score = a.tDot(alpha)
score.map(Math.log)
score = score.mean()
for (let k = 3; k >= -3; k--) {
const epsilon = 10 ** k
const epsa = Matrix.mult(a, epsilon)
for (let i = 0; i < 100; i++) {
const alpha0 = epsa.dot(Matrix.div(1, a.tDot(alpha)))
alpha0.add(alpha)
alpha0.add(Matrix.mult(c, 1 - b.tDot(alpha0).toScaler()))
alpha0.map(v => (v < 0 ? 0 : v))
alpha0.div(b.tDot(alpha0).toScaler())
let newScore = a.tDot(alpha0)
newScore.map(Math.log)
newScore = newScore.mean()
if (newScore <= score) {
break
}
alpha = alpha0
score = newScore
}
}
return alpha
}
/**
* Fit model.
* @param {Array<Array<number>>} x1 Numerator data
* @param {Array<Array<number>>} x2 Denominator data
*/
fit(x1, x2) {
x1 = Matrix.fromArray(x1)
x2 = Matrix.fromArray(x2)
const n1 = x1.rows
const kn = Math.min(this._kernelNum, n1)
const centers = x1.sample(kn)[0]
this._centers = centers
this._sigma = this._sigma_cand[0]
if (this._sigma_cand.length > 1) {
let best_score = -Infinity
const cvCls = Array.from({ length: n1 }, (_, i) => i % this._fold)
for (const sgm of this._sigma_cand) {
const phi1 = this._kernel_gaussian(x1, centers, sgm)
const phi2 = this._kernel_gaussian(x2, centers, sgm)
const phi2mean = phi2.mean(1)
for (let i = cvCls.length - 1; i > 0; i--) {
const r = Math.floor(Math.random() * (i + 1))
;[cvCls[i], cvCls[r]] = [cvCls[r], cvCls[i]]
}
let totalScore = 0
for (let i = 0; i < this._fold; i++) {
const idx = cvCls.map(c => c === i)
const nidx = cvCls.map(c => c !== i)
const alpha = this._optimize_alpha(phi1.col(idx), phi2mean)
const score = phi1.col(nidx).tDot(alpha)
score.map(Math.log)
totalScore += score.mean()
}
totalScore /= this._fold
if (totalScore > best_score) {
best_score = totalScore
this._sigma = sgm
}
}
}
const phi1 = this._kernel_gaussian(x1, centers, this._sigma)
const phi2 = this._kernel_gaussian(x2, centers, this._sigma)
this._kw = this._optimize_alpha(phi1, phi2.mean(1))
}
/**
* Returns estimated values.
* @param {Array<Array<number>>} x Sample data
* @returns {number[]} Predicted values
*/
predict(x) {
x = Matrix.fromArray(x)
const phi = this._kernel_gaussian(x, this._centers, this._sigma)
return phi.tDot(this._kw).value
}
}