UNPKG

@astermind/astermind-premium

Version:

Astermind Premium - Premium ML Toolkit

125 lines 4.29 kB
// string-kernel-elm.ts — String Kernel ELM // String kernels for text/DNA/protein sequences import { KernelELM } from '@astermind/astermind-elm'; import { requireLicense } from '../core/license.js'; /** * String Kernel ELM for sequence data * Features: * - N-gram kernels * - Subsequence kernels * - Spectrum kernels * - Text/DNA/protein sequence analysis */ export class StringKernelELM { constructor(options) { this.trained = false; this.vocabulary = new Set(); requireLicense(); // Premium feature - requires valid license this.categories = options.categories; this.options = { categories: options.categories, kernelType: options.kernelType ?? 'ngram', n: options.n ?? 3, lambda: options.lambda ?? 0.5, activation: options.activation ?? 'relu', maxLen: options.maxLen ?? 100, }; // Use polynomial kernel as base (will be adapted for strings) this.kelm = new KernelELM({ categories: this.options.categories, kernel: 'polynomial', degree: this.options.n, }); } /** * Train on string sequences */ train(X, y) { // Prepare labels const labelIndices = y.map(label => typeof label === 'number' ? label : this.options.categories.indexOf(label)); // Convert strings to feature vectors const stringX = X; const featureVectors = this._stringsToFeatures(stringX); // Train KELM this.kelm.setCategories?.(this.options.categories); this.kelm.trainFromData?.(featureVectors, labelIndices); this.trained = true; } /** * Convert strings to feature vectors using string kernels */ _stringsToFeatures(strings) { // Build vocabulary this.vocabulary.clear(); for (const s of strings) { const ngrams = this._extractNgrams(s); for (const ngram of ngrams) { this.vocabulary.add(ngram); } } const vocabArray = Array.from(this.vocabulary); const features = []; for (const s of strings) { const feature = new Array(vocabArray.length).fill(0); const ngrams = this._extractNgrams(s); for (const ngram of ngrams) { const idx = vocabArray.indexOf(ngram); if (idx >= 0) { feature[idx] += 1; } } // Normalize const sum = feature.reduce((a, b) => a + b, 0); if (sum > 0) { for (let i = 0; i < feature.length; i++) { feature[i] /= sum; } } features.push(feature); } return features; } /** * Extract n-grams from string */ _extractNgrams(s) { const ngrams = []; if (this.options.kernelType === 'ngram' || this.options.kernelType === 'spectrum') { // N-gram extraction for (let i = 0; i <= s.length - this.options.n; i++) { ngrams.push(s.substring(i, i + this.options.n)); } } else if (this.options.kernelType === 'subsequence') { // Subsequence extraction (simplified) for (let i = 0; i <= s.length - this.options.n; i++) { ngrams.push(s.substring(i, i + this.options.n)); } } return ngrams; } /** * Predict on strings */ predict(X, topK = 3) { if (!this.trained) { throw new Error('Model must be trained before prediction'); } const stringX = X; const featureVectors = this._stringsToFeatures(stringX); const results = []; for (const features of featureVectors) { const preds = this.kelm.predictFromVector?.([features], topK) || []; for (const pred of preds.slice(0, topK)) { results.push({ label: pred.label || this.options.categories[pred.index || 0], prob: pred.prob || 0, }); } } return results; } } //# sourceMappingURL=string-kernel-elm.js.map