tabular-sarsa
Version:
A tabular implementation of the SARSA reinforcement learning algorithm which is related to Q-learning
78 lines (73 loc) • 1.99 kB
JavaScript
var markovDecisionProcess = {
numberOfPossibleStates: 4,
numberOfPossibleActions: 2,
states: [
{// State 0
actions: [
{// Action 0
reward: 1,
nextState: 2
},
{// Action 1
reward: -2,
nextState: 3
},
]
},
{// State 1
actions: [
{// Action 0
reward: -7,
nextState: 2
},
{// Action 1
reward: 100,
nextState: 3
},
]
},
{// State 2
actions: [
{// Action 0
reward: 1,
nextState: 0
},
{// Action 1
reward: -5,
nextState: 3
},
]
},
{// State 3
actions: [
{// Action 0
reward: -10,
nextState: 1
},
{// Action 1
reward: -10,
nextState: 0
},
]
}
]
};
class MdpEnvironment {
constructor(mdp) {
this._mdp = mdp;
this._state = Math.floor(Math.random() * Object.keys(mdp.numberOfPossibleStates).length)
}
getCurrentState() {
return this._state;
}
takeAction(action) {
var actionInfo = this._mdp.states[this._state].actions[action];
this._state = actionInfo.nextState;
return actionInfo.reward;
}
}
module.exports.numberOfPossibleStates = markovDecisionProcess.numberOfPossibleStates;
module.exports.numberOfPossibleActions = markovDecisionProcess.numberOfPossibleActions;
module.exports.Environment = function () {
return new MdpEnvironment(markovDecisionProcess);
};