ml-q-learning
Version:
Library implementing the q-learning algorithm and several exploration algorithms.
71 lines (70 loc) • 2.71 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const tfjs_core_1 = require("@tensorflow/tfjs-core");
function argMax(array) {
const argMaxTensor = tfjs_core_1.tidy(() => {
const arrayTensor = tfjs_core_1.tensor1d(array);
return arrayTensor.argMax();
});
const index = argMaxTensor.dataSync()[0];
argMaxTensor.dispose();
return index;
}
exports.argMax = argMax;
function randomPickAction(actionsStats) {
return Math.floor(Math.random() * actionsStats.length);
}
exports.randomPickAction = randomPickAction;
function greedyPickAction(actionsStats) {
const actionIndex = argMax(actionsStats);
return actionIndex;
}
exports.greedyPickAction = greedyPickAction;
function epsilonGreedyPickAction(epsilon = 0.05) {
return (actionsStats) => {
if (Math.random() <= epsilon) {
return randomPickAction(actionsStats);
}
else {
return greedyPickAction(actionsStats);
}
};
}
exports.epsilonGreedyPickAction = epsilonGreedyPickAction;
function decayingEpsilonGreedyPickAction(minEpsilon = 0.05, epsilonDecrease = 0.99, episodeDenominator = 1) {
return (actionsStats, episode) => {
const epsilon = Math.max(minEpsilon, Math.pow(epsilonDecrease, Math.floor(episode / episodeDenominator)));
return epsilonGreedyPickAction(epsilon)(actionsStats);
};
}
exports.decayingEpsilonGreedyPickAction = decayingEpsilonGreedyPickAction;
function softmaxPickAction(actionsStats) {
const result = tfjs_core_1.tidy(() => {
const arrayTensor = tfjs_core_1.tensor1d(actionsStats);
const softmax = arrayTensor.softmax();
const argMax = tfjs_core_1.multinomial(softmax, actionsStats.length).argMax();
return argMax;
});
const index = result.dataSync()[0];
result.dispose();
return index;
}
exports.softmaxPickAction = softmaxPickAction;
function epsilonSoftmaxGreedyPickAction(epsilon = 0.05) {
return (actionsStats) => {
if (Math.random() <= epsilon) {
return softmaxPickAction(actionsStats);
}
else {
return greedyPickAction(actionsStats);
}
};
}
exports.epsilonSoftmaxGreedyPickAction = epsilonSoftmaxGreedyPickAction;
function decayingEpsilonSoftmaxGreedyPickAction(minEpsilon = 0.05, epsilonDecrease = 0.99, episodeDenominator = 1) {
return (actionsStats, episode) => {
const epsilon = Math.max(minEpsilon, Math.pow(epsilonDecrease, Math.floor(episode / episodeDenominator)));
return epsilonSoftmaxGreedyPickAction(epsilon)(actionsStats);
};
}
exports.decayingEpsilonSoftmaxGreedyPickAction = decayingEpsilonSoftmaxGreedyPickAction;