UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

ai-on-browser.github.io/docs

ai-on-browser/ai-on-browser.github.io

100 lines (86 loc) • 2.31 kB

JavaScript

import { RLRealRange, RLEnvironmentBase } from './base.js' /** * Cartpole environment */ export default class CartPoleRLEnvironment extends RLEnvironmentBase { constructor() { super() this._position = 0 this._angle = 0 this._cart_velocity = 0 this._pendulum_velocity = 0 if (true) { this._cart_weight = 0.711 this._pendulum_weight = 0.209 this._pendulum_length = 0.326 } else { // OpenAI gym setting this._cart_weight = 1.0 this._pendulum_weight = 0.1 this._pendulum_length = 0.5 } this._g = 9.8 this._t = 0.02 this._force = 10 this._fail_position = 2.4 this._fail_angle = (12 / 180) * Math.PI this._max_step = 200 this._reward = { goal: 1, step: 1, fail: 0, } } get actions() { return [[0, 1]] } get states() { return [ new RLRealRange(-this._fail_position, this._fail_position), new RLRealRange(-this._fail_angle, this._fail_angle), new RLRealRange(-2, 2), new RLRealRange(-3, 3), ] } reset() { super.reset() this._position = Math.random() * 0.1 - 0.05 this._angle = Math.random() * 0.1 - 0.05 this._cart_velocity = Math.random() * 0.1 - 0.05 this._pendulum_velocity = Math.random() * 0.1 - 0.05 return this.state() } state() { return [this._position, this._angle, this._cart_velocity, this._pendulum_velocity] } setState(state) { this._position = state[0] this._angle = state[1] this._cart_velocity = state[2] this._pendulum_velocity = state[3] } test(state, action) { let [x, t, dx, dt] = state const f = this._force * (action[0] === 0 ? -1 : 1) const M = this._cart_weight const m = this._pendulum_weight const l = this._pendulum_length const sint = Math.sin(t) const cost = Math.cos(t) const ddt = ((M + m) * this._g * sint - cost * (f + m * l * dt ** 2 * sint)) / (l * ((4 / 3) * (M + m) - m * cost ** 2)) const ddx = (f + m * l * (dt ** 2 * sint - ddt * cost)) / (M + m) x += dx * this._t t += dt * this._t dx += ddx * this._t dt += ddt * this._t const fail = Math.abs(t) >= this._fail_angle || Math.abs(x) > this._fail_position const done = this.epoch >= this._max_step || fail const reward = fail ? this._reward.fail : done ? this._reward.goal : this._reward.step return { state: [x, t, dx, dt], reward, done, } } }