UNPKG

neurex

Version:

A trainable neural network in NodeJS. Designed for ease of implementation and ANN modelling

726 lines (609 loc) 31.6 kB
/** * Neurex - Feedforward Neural Network NodeJS library * Author: Kark Angelo V. Pada * * Copyright (c) all rights reserved * * Licensed under the MIT License. * See LICENSE file in the project root for full license information. * */ /** import necessary modules */ const fs = require('fs'); const zlib = require('zlib'); const path = require('path'); const activation = require('../gpu/kernels/activations'); const detect = require('../gpu/detectGPU'); const { computeWeightGradients, scaleGradients} = require('../gpu/kernels/gradientKernels'); const optimizers = require('../optimizers') const lossFunctions = require('../loss_functions'); /** * Neurex is a configurable feedforward artificial neural network. * * This class allows you to define the architecture of a neural network by specifying the number of layers, * neurons per layer, and activation functions. It supports training with various optimizers, saving * model state, and provides utility methods for inspecting the model structure. * @class * @property {Array<Array<Array<number>>>} weights - The weights for each layer, organized as 3D array [layer][input][output]. * @property {Array<Array<number>>} biases - The biases for each layer, organized as 2D array [layer][neuron]. * @property {number} learning_rate - The learning rate used during training. * @property {number} num_layers - The total number of layers in the network. * @property {Array<Function>} activation_functions - The activation functions for each layer. * @property {Array<Function>} derivative_functions - The derivatives of the activation functions for each layer. * @property {Array<number>} number_of_neurons - The number of neurons in each layer. * @property {number} input_size - The number of input features (input layer size). * @property {number} epoch_count - The number of epochs the model has been trained for. * @property {string} optimizer - The name of the optimizer used for training. * @property {Object} optimizerStates - Internal state for optimizers, storing per-layer weight and bias states. * */ class Neurex { constructor () { this.weights = []; this.biases = []; this.num_layers = 0; this.input_size = 0; this.accuracy = ''; this.loss_function = ''; this.output_size = 0; this.task = null; this.epoch_count = 0; this.batch_size = 0; this.layers = []; // layers (except input type layers) and their details will store here this.hasSequentiallyBuild = false; this.hasBuilt = false; // default configs this.optimizer = 'sgd'; this.learning_rate = 0.001; this.randMin = -1; this.randMax = 1; // Optimizer state for each layer (weights and biases) this.optimizerStates = { weights: [], // Array of state objects for each layer's weights biases: [] // Array of state objects for each layer's biases }; this.onGPU = true; this.isfailed = false; } /** * @typedef {Object} NeurexConfig * @property {number} [learning_rate] - Learning rate for training. * @property {string} [optimizer] - Optimizer to use [available: sgd, adam, adagrad, rmsprop, adadelta ]. * @property {number} [randMin] - Minimum value for random initialization of weights/biases. * @property {number} [randMax] - Maximum value for random initialization of weights/biases. */ /** * Allows configuration of your neural network's parameters. * @method configure * @param {NeurexConfig} configs - Configuration options for the neural network. * * You may configure them optionally. Be careful of tweaking them as they will have an effect on your model's performance. * * Default configurations: * learning_rate: 0.001 * optimizer: 'adam' * randMin: -1 * randMax: 1 */ configure(configs) { if (configs.learning_rate !== undefined) this.learning_rate = configs.learning_rate; if (configs.optimizer !== undefined) this.optimizer = configs.optimizer; if (configs.randMin !== undefined) this.randMin = configs.randMin; if (configs.randMax !== undefined) this.randMax = configs.randMax; } /** * @method modelSummary() Shows the model architecture */ /** * @method modelSummary() * Shows the model architecture */ modelSummary() { console.log("_______________________________________________________________"); console.log(" Model Summary "); console.log("_______________________________________________________________"); console.log(`Input size: ${this.input_size}`); console.log(`Number of layers: ${this.num_layers}`); console.log("---------------------------------------------------------------"); console.log("Layer (type) Output Shape Activation "); console.log("==============================================================="); // This will be the shape for the next layer. It starts with the input size. let currentOutputShape = this.input_size; this.layers.forEach((layer, i) => { const layerName = layer.layer_name === "connected_layer" ? "Connected Layer" : layer.layer_name; const activationName = layer.activation_function ? layer.activation_function.name : 'None'; const outputShape = layer.layer_size; console.log( `Connected Layer (None, ${outputShape}) ${activationName.padEnd(10, ' ')}` ); // Update the output shape for the next iteration currentOutputShape = outputShape; }); const total_weights = this.weights.flat(Infinity).length const total_biases = this.biases.flat(Infinity).length console.log("==============================================================="); console.log("Total layers: " + this.num_layers); console.log("Total Learnable parameters:",parseInt(total_weights+total_biases)); console.log("==============================================================="); } /** * @method saveModel() @param {string} modelName - the filename of your model saveModel() allows you to save your model's architecture, weights, and biases, as well as other parameters. The model will be exported as a .nrx (neurex) model and a metadata.json will be generated along with the model file. */ saveModel(modelName = null) { console.log("\n[TASK]------- Saving model's architecture..."); let fileName = modelName; if (!modelName || modelName == null || modelName == undefined) { fileName = `Model_${new Date().toISOString().replace(/[:.]/g, '-')}`; } const data = { "task":this.task, "loss_function":this.loss_function, "epoch":this.epoch_count, "batch_size":this.batch_size, "optimizer":this.optimizer, "learning_rate":this.learning_rate, "layers": this.layers.map(layer => ({ layer_name: layer.layer_name, activation_function_name: layer.activation_function ? layer.activation_function.name : null, derivative_activation_function_name: layer.derivative_activation_function ? layer.derivative_activation_function.name : null, layer_size: layer.layer_size || null, feedforward: layer.feedforward, backpropagate: layer.backpropagate })), "input_size":this.input_size, "output_size":this.output_size, "num_layers":this.num_layers, "weights":this.weights, "biases":this.biases, }; const metadata = [ this.epoch_count, this.optimizer, this.loss_function, this.task ]; this.#save(data, fileName, metadata); } /** * * @method sequentialBuild * * interface to stack layer types. No weights and biases initialization here * @param {Object} layer_data */ sequentialBuild(layer_data) { try { if (!layer_data || layer_data.length < 2) { throw new Error("[ERROR]------- No layers"); } layer_data.forEach(layer => { // extract input size if (layer.layer_name === "input_layer") { this.input_size = layer.layer_size; } else { this.layers.push(layer); } }); this.hasSequentiallyBuild = true; this.num_layers = this.layers.length; return layer_data; } catch(err) { console.error(err); } } /** * * Initiate weights and biases for the layers */ build() { try { if (!this.hasSequentiallyBuild || this.layers.length == 0) { throw new Error('[ERROR]------- Use sequentialBuild() first to build your model'); } let prev_size = this.input_size; // initialized weights and biases this.layers.forEach(layer_data => { if (layer_data.layer_name === "connected_layer") { let layer_size = layer_data.layer_size; // initialize biases let generated_biases = []; for (let j = 0; j < layer_size; j++) { generated_biases.push(Math.random() * (this.randMax - this.randMin) + this.randMin); } this.biases.push(generated_biases); // initialize weights let layerWeights = []; for (let r = 0; r < prev_size; r++) { let row = []; for (let c = 0; c < layer_size; c++) { row.push(Math.random() * (this.randMax - this.randMin) + this.randMin); } layerWeights.push(row); } this.weights.push(layerWeights); prev_size = layer_size; } else { // for other layer types } }); this.hasBuilt = true; } catch (error) { console.error(error); } } /** * Trains the neural network using the provided training data, target values, number of epochs, and learning rate. * * This method initializes the weights and biases for each layer, then iteratively performs forward propagation, * computes the loss, backpropagates the error, and updates the weights and biases using gradient descent. * * @method train() * @param {Array<Array<number>>} trainX - The input training data. Each element is an array representing a single sample's features. * @param {Array<number>} trainY - The target values (ground truth) corresponding to each sample in trainX. * @param {string} loss - loss function to use: MSE, MAE, binary_crossentropy, categorical_crossentropy, sparse_categorical_cross_entropy * @param {Number} epoch - the number of training iteration * @param {Number} batch_size - mini batch sizing * * @throws {Error} Throws an error if any required parameter is missing. * @returns Progress of every epoch can be print in the console. * * @example * // Example usage: * * const {Neurex, Layers} = require('neurex'); * const model = new Neurex(); * const layer = new Layers(); * * model.sequentialBuild([ * layer.inputShape(X_train), * layer.connectedLayer("relu", 3), * layer.connectedLayer("relu", 3), * layer.connectedLayer("softmax", 2) * ]); * model.build(); * * model.train(X_train, Y_train, 'categorical_cross_entropy', 2000, 12); * * After training, you can use the network for predictions */ train(trainX, trainY, loss, epoch, batch_size) { const lastLayerObject = this.layers[this.layers.length - 1]; this.output_size = lastLayerObject.layer_size; // Initialize optimizer state for each layer this.optimizerStates = { weights: Array(this.num_layers).fill().map(() => ({})), biases: Array(this.num_layers).fill().map(() => ({})) }; try { if (!this.hasBuilt || this.biases.length == 0 || this.weights.length == 0) { this.isfailed = true; throw new Error("[FAILED]------- No model has been built. Use build() first"); } if (!trainX || !trainY || !loss) { this.isfailed = true; throw new Error(`[FAILED]------- There is/are missing parameter/s. Failed to start training...`); } if (epoch == 0 || batch_size == 0 || !epoch || !batch_size) { this.isfailed = true; throw new Error("[FAILED]------- Epoch or batch size cannot be zero"); } const {gpu, backend, isGPUAvailable, isSoftwareGPU} = detect(); if (!isGPUAvailable || isSoftwareGPU) { console.log(`[INFO]------- Falling back to CPU mode (no GPU acceleration)`); this.onGPU = false; } else { console.log(`[INFO]-------- Backend Detected: ${backend}. Using ${gpu}`); this.onGPU = true; } this.loss_function = loss.toLowerCase(); const loss_function = lossFunctions[this.loss_function.toLowerCase()]; const optimizerFn = optimizers[this.optimizer.toLowerCase()]; this.epoch_count = epoch; this.batch_size = batch_size; const batchSize = batch_size; // Infer task type based on output layer and loss/activation const lastLayerActivation = lastLayerObject.activation_function.name; const lossLower = loss.toLowerCase(); // Regression: output_size == X, activation linear, loss mse/mae if (lastLayerActivation === 'linear' && (lossLower === 'mse' || lossLower === 'mae')) { this.task = 'regression'; } // binary classification task: activation in output layer = sigmoid, loss = binary_cross_entropy else if (lastLayerActivation === "sigmoid" && lossLower === 'binary_cross_entropy') { this.task = 'binary_classification'; } // multi-class classification task: activation in output layer = softmax, loss = categorical_cross_entropy (labels must be one-hot encoding) else if (lastLayerActivation === 'softmax' && lossLower === 'categorical_cross_entropy') { // do a loop if any of the rows of trainY is not trainY.forEach(row => { if (this.output_size != row.length) { this.isfailed = true; throw new Error(`[ERROR]------- Output shape mismatch. The size of the output layer must be the same number of classes`); } }); // check if the Y_train is not one-hot encoded const isOneHotEncoded = this.#ifOneHotEndcoded(trainY); if (isOneHotEncoded) { this.task = 'multi_class_classification'; } else { this.isfailed = true; throw new Error("[ERROR]------- Y_train must be one-hot encoded for the categorical_cross_entropy loss. Use 'sparse_categorical_cross_entropy' instead if the Y_train is interger-encoded labels."); } } else if (lastLayerActivation === 'softmax' && lossLower === 'sparse_categorical_cross_entropy') { this.task = 'multi_class_classification'; } else { this.isfailed = true; throw new Error(`[ERROR]------- Using ${lossLower} having output size of ${this.output_size} and a ${lastLayerActivation} function in the output layer is currently unavailable for this core's task.`); } if (!optimizerFn) { this.isfailed = true; throw new Error(`Unknown optimizer: ${this.optimizer}`) }; console.log("\n[TASK]------- Training session is starting\n"); // epoch loop for (let current_epoch = 0; current_epoch < epoch; current_epoch++) { let totalepochLoss = 0; let numBatches = 0; // Added to count batches // batch size for (let batchStart = 0; batchStart < trainX.length; batchStart += batchSize) { numBatches++; // Increment batch count const batchEnd = Math.min(batchStart + batchSize, trainX.length); const actualBatchSize = batchEnd - batchStart; // Initialize accumulators for gradients let weightGrads = this.weights.map(layer => layer.map(row => row.map(() => 0))); let biasGrads = this.biases.map(layer => layer.map(() => 0)); let batchLoss = 0; // Accumulate gradients for each sample in the batch for (let sample_index = batchStart; sample_index < batchEnd; sample_index++) { let input = trainX[sample_index]; let actual = trainY[sample_index]; // feed forward const {predictions, activations, zs} = this.#Feedforward(input); const deltas = []; // === STEP 1: Compute delta for output layer === // let output_layer_index = this.num_layers - 1; let dOutputlayer = []; const network_output_layer = this.layers[output_layer_index]; batchLoss += loss_function(predictions, actual); for (let j = 0; j < network_output_layer.layer_size; j++) { if (this.task === "binary_classification") { // binary classification dOutputlayer.push(predictions[j] - actual[j]); } else if (this.task === "multi_class_classification") { if (lastLayerActivation === 'softmax' && lossLower === "categorical_cross_entropy") { dOutputlayer.push(predictions[j] - actual[j]); } else if (lastLayerActivation === 'softmax' && lossLower === "sparse_categorical_cross_entropy") { dOutputlayer = [...predictions]; dOutputlayer[actual[0]] -= 1; } else { throw new Error(`[ERROR]------- Uknown loss function for multi-class classification loss. Loss: ${lossLower} is unknown.`) } } else { // regression tasks single or multi-output regression const error = predictions[j] - actual[j]; //const dAct = this.derivative_functions[output_layer](zs[output_layer][j]); const dAct = network_output_layer.derivative_activation_function(zs[output_layer_index][j]); dOutputlayer.push(error * dAct); } } deltas[output_layer_index] = dOutputlayer; // backpropagation loop const allDeltas = this.#backpropagation(activations, zs, deltas); // === STEP 3: Accumulate Gradients === // for (let l = 0; l < this.num_layers; l++) { const delta = allDeltas[l]; const a_prev = activations[l]; // GPU-accelerated weight gradient (outer product) const weightGrad = computeWeightGradients(this.onGPU, a_prev, delta); weightGrads[l] = weightGrads[l].map((row, i) => row.map((val, j) => val + weightGrad[i][j]) ); // Accumulate bias gradients (still CPU for now) for (let j = 0; j < biasGrads[l].length; j++) { biasGrads[l][j] += delta[j]; } } } batchLoss /= actualBatchSize; totalepochLoss += batchLoss; // Divide accumulated gradients by the actual batch size for (let l = 0; l < this.num_layers; l++) { for (let i = 0; i < weightGrads[l].length; i++) { weightGrads[l][i] = scaleGradients(this.onGPU, weightGrads[l][i], actualBatchSize); } biasGrads[l] = scaleGradients(this.onGPU, biasGrads[l], actualBatchSize); } for (let l = 0; l < this.num_layers; l++) { // Update weights this.optimizerStates.weights[l] = optimizerFn( this.onGPU, this.weights[l], weightGrads[l], this.optimizerStates.weights[l], this.learning_rate ); // Update biases this.optimizerStates.biases[l] = optimizerFn( this.onGPU, this.biases[l], biasGrads[l], this.optimizerStates.biases[l], this.learning_rate ); } } let AverageEpochLoss = totalepochLoss / numBatches; let logMessage = `[Epoch] ${current_epoch+1}/${epoch} | [Loss]: ${AverageEpochLoss.toFixed(7)}`; if (this.task === 'binary_classification' || this.task === 'multi_class_classification') { let epochPredictions = []; for (let i = 0; i < trainX.length; i++) { epochPredictions.push(this.#Feedforward(trainX[i]).predictions); } const accuracy = this.#calculateClassificationAccuracy(epochPredictions, trainY, this.task); logMessage += ` | [Accuracy in Training]: ${accuracy.toFixed(2)}%`; } console.log(logMessage); } } catch (error) { console.log(error); } } /** * @method predict() @param {Array} input - input data @returns Array of predictions @throws Error when there's shape mismatch and no input data produces predictions based on the input data */ predict(input) { try { if (!input) { throw new Error("\n[ERROR]-------No inputs") } if (input[0].length != this.input_size) { throw new Error(`\n[ERROR]-------Shape Mismatch | Input shape length: ${input[0].length} | Expecting ${this.input_size}`); } let outputs = []; for (let sample_index = 0; sample_index < input.length; sample_index++) { let input_data = input[sample_index]; const {predictions} = this.#Feedforward(input_data); outputs.push(predictions); } return outputs; } catch (error) { console.log(error); } } // ========= Private methods ======= // backpropagation #backpropagation(activations, zs, deltas) { // The loop should iterate from the second-to-last layer (this.num_layers - 2) down to the first layer (0). for (let layer_index = this.num_layers - 2; layer_index >= 0; layer_index--) { const next_weights = this.weights[layer_index + 1]; const next_delta = deltas[layer_index + 1]; if (!Array.isArray(next_delta)) { throw new Error(`deltaNext at layer ${layer_index + 1} is undefined`); } // Get the current layer object, which now holds its backpropagation logic const currentLayer = this.layers[layer_index]; // Call the layer's specific backpropagation method const current_delta = currentLayer.backpropagate(this.onGPU, next_weights, next_delta, zs, layer_index); deltas[layer_index] = current_delta; } return deltas; } // forward propagation #Feedforward(input) { let current_input = input let all_layer_outputs = [input]; let zs = []; for (let layer_index = 0; layer_index < this.num_layers; layer_index++) { const current_layer = this.layers[layer_index]; const layer_weights = this.weights[layer_index]; const layer_biases = this.biases[layer_index]; // Call the layer's specific feedforward method const { outputs, z_values } = current_layer.feedforward(this.onGPU, current_input, layer_weights, layer_biases); zs.push(z_values); current_input = outputs; all_layer_outputs.push(current_input); } // after all the layers gives off their outputs, return final array of current_input as the predictions return { predictions: current_input, activations : all_layer_outputs, zs: zs }; } //saving model #save(data, fileName, meta) { if (this.isfailed) { console.log('[FAILED]------- Failed to save model'); } else { const dir = path.dirname(require.main.filename); const metadata = { "Date Created": `${new Date().toISOString().replace(/[:.]/g, '-')}`, "Number of epoch to train": meta[0], "Optimizer": meta[1], "Loss function": meta[2], "Task": meta[3], "Trained using": "Neurex", "Note": "This model can only be used on Neurex library. This cannot be used directly in other ML frameworks. DO NOT modify any of the parameters." }; // Serialize and compress the model data const jsonString = JSON.stringify(data); const compressedData = zlib.deflateSync(jsonString); // Define file format: // [HEADER (4 bytes)] + [VERSION (1 byte)] + [DATA (compressed)] const header = Buffer.from("NRX2"); // Magic bytes const version = Buffer.from([0x02]); // Version 2 // Combine all parts const finalBuffer = Buffer.concat([header, version, compressedData]); const nrxFilePath = path.join(dir, `${fileName}.nrx`); const metadataFilePath = path.join(dir, `metadata.json`); fs.writeFileSync(nrxFilePath, finalBuffer); fs.writeFileSync(metadataFilePath, JSON.stringify(metadata, null, 2)); console.log(`[SUCCESS]------- Model is saved as ${fileName}.nrx`); } } #calculateClassificationAccuracy(predictions, actuals, taskType) { let correctPredictions = 0; for (let i = 0; i < predictions.length; i++) { let predictedLabel; let actualLabel; if (taskType === 'binary_classification') { predictedLabel = predictions[i][0] >= 0.5 ? 1 : 0; actualLabel = actuals[i][0]; // Assuming actuals are also arrays like [[0], [1]] } else if (taskType === 'multi_class_classification') { // Find the index of the maximum value in predictions for the predicted class predictedLabel = predictions[i].indexOf(Math.max(...predictions[i])); // If actuals[i] is an array with a single element (e.g., [0], [1]), it's integer-encoded. if (Array.isArray(actuals[i]) && actuals[i].length === 1) { actualLabel = actuals[i][0]; // Directly take the integer label } else if (Array.isArray(actuals[i]) && actuals[i].length > 1) { // Otherwise, assume one-hot encoded if it's an array with multiple elements (e.g., [1,0,0]) actualLabel = actuals[i].indexOf(1); } else { // Fallback for direct integer label if actuals[i] is not an array (e.g., 0, 1, 2 directly) // This case might not be hit if Y_train is always provided as arrays of arrays. actualLabel = actuals[i]; } } if (predictedLabel === actualLabel) { correctPredictions++; } } return (correctPredictions / predictions.length) * 100; } #ifOneHotEndcoded(Y_train) { /** Checks if all rows in Y_train are one-hot encoded. Each row must: - Contain only 0s and 1s - Have exactly one "1" */ for (let i = 0; i < Y_train.length; i++) { const row = Y_train[i]; if (!Array.isArray(row)) return false; let onesCount = 0; for (let j = 0; j < row.length; j++) { if (row[j] !== 0 && row[j] !== 1) return false; if (row[j] === 1) onesCount++; } if (onesCount !== 1) return false; } return true; } } module.exports = Neurex;