UNPKG

ml-q-learning

Version:

Library implementing the q-learning algorithm and several exploration algorithms.

71 lines (70 loc) 2.71 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const tfjs_core_1 = require("@tensorflow/tfjs-core"); function argMax(array) { const argMaxTensor = tfjs_core_1.tidy(() => { const arrayTensor = tfjs_core_1.tensor1d(array); return arrayTensor.argMax(); }); const index = argMaxTensor.dataSync()[0]; argMaxTensor.dispose(); return index; } exports.argMax = argMax; function randomPickAction(actionsStats) { return Math.floor(Math.random() * actionsStats.length); } exports.randomPickAction = randomPickAction; function greedyPickAction(actionsStats) { const actionIndex = argMax(actionsStats); return actionIndex; } exports.greedyPickAction = greedyPickAction; function epsilonGreedyPickAction(epsilon = 0.05) { return (actionsStats) => { if (Math.random() <= epsilon) { return randomPickAction(actionsStats); } else { return greedyPickAction(actionsStats); } }; } exports.epsilonGreedyPickAction = epsilonGreedyPickAction; function decayingEpsilonGreedyPickAction(minEpsilon = 0.05, epsilonDecrease = 0.99, episodeDenominator = 1) { return (actionsStats, episode) => { const epsilon = Math.max(minEpsilon, Math.pow(epsilonDecrease, Math.floor(episode / episodeDenominator))); return epsilonGreedyPickAction(epsilon)(actionsStats); }; } exports.decayingEpsilonGreedyPickAction = decayingEpsilonGreedyPickAction; function softmaxPickAction(actionsStats) { const result = tfjs_core_1.tidy(() => { const arrayTensor = tfjs_core_1.tensor1d(actionsStats); const softmax = arrayTensor.softmax(); const argMax = tfjs_core_1.multinomial(softmax, actionsStats.length).argMax(); return argMax; }); const index = result.dataSync()[0]; result.dispose(); return index; } exports.softmaxPickAction = softmaxPickAction; function epsilonSoftmaxGreedyPickAction(epsilon = 0.05) { return (actionsStats) => { if (Math.random() <= epsilon) { return softmaxPickAction(actionsStats); } else { return greedyPickAction(actionsStats); } }; } exports.epsilonSoftmaxGreedyPickAction = epsilonSoftmaxGreedyPickAction; function decayingEpsilonSoftmaxGreedyPickAction(minEpsilon = 0.05, epsilonDecrease = 0.99, episodeDenominator = 1) { return (actionsStats, episode) => { const epsilon = Math.max(minEpsilon, Math.pow(epsilonDecrease, Math.floor(episode / episodeDenominator))); return epsilonSoftmaxGreedyPickAction(epsilon)(actionsStats); }; } exports.decayingEpsilonSoftmaxGreedyPickAction = decayingEpsilonSoftmaxGreedyPickAction;