@astermind/astermind-premium
Version:
Astermind Premium - Premium ML Toolkit
125 lines • 4.29 kB
JavaScript
// string-kernel-elm.ts — String Kernel ELM
// String kernels for text/DNA/protein sequences
import { KernelELM } from '@astermind/astermind-elm';
import { requireLicense } from '../core/license.js';
/**
* String Kernel ELM for sequence data
* Features:
* - N-gram kernels
* - Subsequence kernels
* - Spectrum kernels
* - Text/DNA/protein sequence analysis
*/
export class StringKernelELM {
constructor(options) {
this.trained = false;
this.vocabulary = new Set();
requireLicense(); // Premium feature - requires valid license
this.categories = options.categories;
this.options = {
categories: options.categories,
kernelType: options.kernelType ?? 'ngram',
n: options.n ?? 3,
lambda: options.lambda ?? 0.5,
activation: options.activation ?? 'relu',
maxLen: options.maxLen ?? 100,
};
// Use polynomial kernel as base (will be adapted for strings)
this.kelm = new KernelELM({
categories: this.options.categories,
kernel: 'polynomial',
degree: this.options.n,
});
}
/**
* Train on string sequences
*/
train(X, y) {
// Prepare labels
const labelIndices = y.map(label => typeof label === 'number'
? label
: this.options.categories.indexOf(label));
// Convert strings to feature vectors
const stringX = X;
const featureVectors = this._stringsToFeatures(stringX);
// Train KELM
this.kelm.setCategories?.(this.options.categories);
this.kelm.trainFromData?.(featureVectors, labelIndices);
this.trained = true;
}
/**
* Convert strings to feature vectors using string kernels
*/
_stringsToFeatures(strings) {
// Build vocabulary
this.vocabulary.clear();
for (const s of strings) {
const ngrams = this._extractNgrams(s);
for (const ngram of ngrams) {
this.vocabulary.add(ngram);
}
}
const vocabArray = Array.from(this.vocabulary);
const features = [];
for (const s of strings) {
const feature = new Array(vocabArray.length).fill(0);
const ngrams = this._extractNgrams(s);
for (const ngram of ngrams) {
const idx = vocabArray.indexOf(ngram);
if (idx >= 0) {
feature[idx] += 1;
}
}
// Normalize
const sum = feature.reduce((a, b) => a + b, 0);
if (sum > 0) {
for (let i = 0; i < feature.length; i++) {
feature[i] /= sum;
}
}
features.push(feature);
}
return features;
}
/**
* Extract n-grams from string
*/
_extractNgrams(s) {
const ngrams = [];
if (this.options.kernelType === 'ngram' || this.options.kernelType === 'spectrum') {
// N-gram extraction
for (let i = 0; i <= s.length - this.options.n; i++) {
ngrams.push(s.substring(i, i + this.options.n));
}
}
else if (this.options.kernelType === 'subsequence') {
// Subsequence extraction (simplified)
for (let i = 0; i <= s.length - this.options.n; i++) {
ngrams.push(s.substring(i, i + this.options.n));
}
}
return ngrams;
}
/**
* Predict on strings
*/
predict(X, topK = 3) {
if (!this.trained) {
throw new Error('Model must be trained before prediction');
}
const stringX = X;
const featureVectors = this._stringsToFeatures(stringX);
const results = [];
for (const features of featureVectors) {
const preds = this.kelm.predictFromVector?.([features], topK) || [];
for (const pred of preds.slice(0, topK)) {
results.push({
label: pred.label || this.options.categories[pred.index || 0],
prob: pred.prob || 0,
});
}
}
return results;
}
}
//# sourceMappingURL=string-kernel-elm.js.map