@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
81 lines (68 loc) • 1.75 kB
JavaScript
import { RLRealRange, RLEnvironmentBase } from './base.js'
/**
* In-hypercube environment
*/
export default class InHypercubeRLEnvironment extends RLEnvironmentBase {
/**
* @param {number} [d] Dimension of the environment
*/
constructor(d = 2) {
super()
this._d = d
this._position = Array(this._d).fill(0)
this._velocity = Array(this._d).fill(0)
this._force = 0.1
this._success_dim = 0
this._fail_position = 1
this._max_step = 20
this._reward = {
goal: 1,
step: 0,
fail: 0,
}
}
get actions() {
return [Array.from({ length: this._d * 2 }, (_, i) => i)]
}
get states() {
const s = []
for (let i = 0; i < this._d; i++) {
s.push(new RLRealRange(-this._fail_position, this._fail_position))
}
for (let i = 0; i < this._d; i++) {
s.push(new RLRealRange(-1, 1))
}
return s
}
reset() {
super.reset()
this._position = Array(this._d).fill(0)
this._velocity = Array(this._d).fill(0)
return this.state()
}
state() {
return [...this._position, ...this._velocity]
}
setState(state) {
this._position = state.slice(0, this._d)
this._velocity = state.slice(this._d)
}
test(state, action) {
const p = state.slice(0, this._d)
const dp = state.slice(this._d)
const d = Math.floor(action[0] / 2)
dp[d] = this._force * (action[0] % 2 === 0 ? 1 : -1)
for (let i = 0; i < this._d; i++) {
p[i] += dp[i]
}
const success = p[this._success_dim] <= -this._fail_position
const fail = !success && p.some(v => Math.abs(v) >= this._fail_position)
const done = this.epoch >= this._max_step || success || fail
const reward = fail ? this._reward.fail : success ? this._reward.goal : this._reward.step
return {
state: [...p, ...dp],
reward,
done,
}
}
}