UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

81 lines (62 loc) 1.7 kB
import { RLRealRange, RLEnvironmentBase } from './base.js' /** * Pendulum environment */ export default class PendulumRLEnvironment extends RLEnvironmentBase { constructor() { super() this._theta = 0 this._dtheta = 0 this._mass = 1 this._length = 1 this._max_speed = 8 this._max_torque = 2 this._g = 9.8 this._dt = 0.05 this._max_step = 200 } get actions() { return [new RLRealRange(-this._max_torque, this._max_torque)] } get states() { return [new RLRealRange(-1, 1), new RLRealRange(-1, 1), new RLRealRange(-this._max_speed, this._max_speed)] } reset() { super.reset() this._theta = Math.random() * 2 * Math.PI - Math.PI this._dtheta = Math.random() - 0.5 return this.state() } state() { return [Math.cos(this._theta), Math.sin(this._theta), this._dtheta] } setState(state) { this._theta = Math.atan2(state[1], state[0]) this._dtheta = state[2] } test(state, action) { let t = Math.atan2(state[1], state[0]) let dt = state[2] const clip = (x, min, max) => (x < min ? min : x > max ? max : x) const a = clip(action[0], -this._max_torque, this._max_torque) const g = this._g const m = this._mass const l = this._length const c = this._angle_normalize(t) ** 2 + 0.1 * dt ** 2 + 0.001 * a ** 2 dt += (((-3 * g) / (2 * l)) * Math.sin(t + Math.PI) + (3 / (m * l ** 2)) * a) * this._dt t += dt * this._dt dt = clip(dt, -this._max_speed, this._max_speed) return { state: [Math.cos(t), Math.sin(t), dt], reward: -c, done: this.epoch >= this._max_step, } } _angle_normalize(t) { t += Math.PI const pi2 = 2 * Math.PI while (t < 0) t += pi2 while (t >= pi2) t -= pi2 return t - Math.PI } }