UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

225 lines (211 loc) 6.49 kB
import { RLEnvironmentBase, RLRealRange, RLStepResult } from './base.js' /** * Breaker environment */ export default class BreakerRLEnvironment extends RLEnvironmentBase { constructor() { super() this._size = [300, 500] this._padding = [ [30, 30], [370, 30], ] this._block_size = [30, 10] this._paddle_baseline = 20 this._paddle_size = [60, 8] this._ball_radius = 3 this._ball_speed = 3 this._paddle_speed = 5 this._block_positions = [] for (let i = this._padding[0][0]; i < this._size[0] - this._padding[0][1]; i += this._block_size[0]) { for (let j = this._padding[1][0]; j < this._size[1] - this._padding[1][1]; j += this._block_size[1]) { this._block_positions.push([i + this._block_size[0] / 2, j + this._block_size[1] / 2]) } } this._ball_position = [0, 0] this._paddle_position = 0 this._ball_velocity = [0, 0] this._block_existances = [] this._reward = { break: 100, step: 0.1, hit: 100, goal: 1000, fail: -1000, } } get actions() { return [[-1, 0, 1]] } get states() { const block_existance = [] for (let i = 0; i < this._block_positions.length; i++) { block_existance[i] = [0, 1] } return [ new RLRealRange(0, this._size[0]), new RLRealRange(0, this._size[1]), new RLRealRange(-this._ball_speed, this._ball_speed), new RLRealRange(-this._ball_speed, this._ball_speed), new RLRealRange(0, this._size[0]), ...block_existance, ] } reset() { super.reset() this._paddle_position = this._size[0] / 2 this._ball_position = [ this._size[0] / 2, this._paddle_baseline + this._paddle_size[1] / 2 + this._ball_radius * 2, ] const vx = Math.random() * (this._ball_speed - 0.1) * 2 - (this._ball_speed - 0.1) const vy = Math.sqrt(Math.abs(vx ** 2 - this._ball_speed ** 2)) this._ball_velocity = [vx, vy] this._block_existances = [] for (let i = 0; i < this._block_positions.length; i++) { this._block_existances[i] = 1 } return this.state() } state() { return [...this._ball_position, ...this._ball_velocity, this._paddle_position, ...this._block_existances] } setState(state) { this._ball_position[0] = state[0] this._ball_position[1] = state[1] this._ball_velocity[0] = state[2] this._ball_velocity[1] = state[3] this._paddle_position = state[4] this._block_existances = state.slice(5) } test(state, action) { let move_paddle = state[4] + this._paddle_speed * action[0] if (move_paddle < this._paddle_size[0] / 2) { move_paddle = this._paddle_size[0] / 2 } else if (move_paddle > this._size[0] - this._paddle_size[0] / 2) { move_paddle = this._size[0] - this._paddle_size[0] / 2 } const move_ball = [state[0], state[1]] const velo_ball = [state[2], state[3]] for (let i = 0; i < 2; i++) { move_ball[i] += velo_ball[i] } const block_existance = state.slice(5) let min_d = Infinity let new_velo = velo_ball let hit_paddle = false { const [d, v, p] = this._check_contact(move_ball, [move_paddle, this._paddle_baseline], this._paddle_size) if (d < min_d) { min_d = d if (p) { new_velo = [ Math.sin((p * Math.PI) / 2) * this._ball_speed, Math.cos((p * Math.PI) / 2) * this._ball_speed, ] } else { new_velo = [velo_ball[0] * v[0], velo_ball[1] * v[1]] } hit_paddle = true } } for (const x of [-10, this._size[0] + 10]) { const [d] = this._check_contact(move_ball, [x, this._size[1] / 2], [20, this._size[1]]) if (d < min_d) { min_d = d new_velo = [-Math.sign(x) * Math.abs(velo_ball[0]), velo_ball[1]] hit_paddle = false } } { const [d] = this._check_contact(move_ball, [this._size[0] / 2, this._size[1] + 10], [this._size[0], 20]) if (d < min_d) { min_d = d new_velo = [velo_ball[0], -Math.abs(velo_ball[1])] hit_paddle = false } } const [under_d] = this._check_contact(move_ball, [this._size[0] / 2, -10], [this._size[0], 20]) let erace_block = -1 for (let i = 0; i < block_existance.length; i++) { if (block_existance[i] === 0) { continue } const [d, v] = this._check_contact(move_ball, this._block_positions[i], this._block_size) if (d < min_d) { min_d = d new_velo = [velo_ball[0] * v[0], velo_ball[1] * v[1]] erace_block = i hit_paddle = false } } let reward = this._reward.step if (erace_block >= 0) { block_existance[erace_block] = 0 reward = this._reward.break } if (under_d < Infinity) { reward = this._reward.fail } else if (hit_paddle) { reward = this._reward.hit } const done = block_existance.every(e => e === 0) || under_d < Infinity return new RLStepResult(this, [...move_ball, ...new_velo, move_paddle, ...block_existance], reward, done) } _check_contact(ball_c, block_c, block_size) { for (let i = 0; i < 2; i++) { if ( ball_c[i] + this._ball_radius < block_c[i] - block_size[i] / 2 || block_c[i] + block_size[i] / 2 < ball_c[i] - this._ball_radius ) { return [Infinity, []] } } let d = Infinity for (const [rl, ud] of [ [-1, -1], [-1, 1], [1, -1], [1, 1], ]) { if (rl * ball_c[0] <= rl * block_c[0] + block_size[0] / 2) { continue } if (ud * ball_c[1] <= ud * block_c[1] + block_size[1] / 2) { continue } const r = Math.sqrt( (ball_c[0] - (block_c[0] + (rl * block_size[0]) / 2)) ** 2 + (ball_c[1] - (block_c[1] + (ud * block_size[1]) / 2)) ** 2 ) if (r > this._ball_radius) { return [Infinity, []] } d = r } if (d === Infinity) { if (ball_c[0] + this._ball_radius < block_c[0] - block_size[0] / 2) { d = block_c[0] - block_size[0] / 2 - ball_c[0] } else if (ball_c[1] + this._ball_radius < block_c[1] - block_size[1] / 2) { d = block_c[1] - block_size[1] / 2 - ball_c[1] } else if (ball_c[0] - this._ball_radius > block_c[0] + block_size[0] / 2) { d = ball_c[0] - (block_c[0] + block_size[0] / 2) } else if (ball_c[1] - this._ball_radius > block_c[1] + block_size[1] / 2) { d = ball_c[1] - (block_c[1] + block_size[1] / 2) } else { d = 0 } } let off = null if (ball_c[1] >= block_c[1]) { off = ball_c[1] - (block_c[1] + block_size[1] / 2) } else { off = block_c[1] - block_size[1] / 2 - ball_c[1] } if (block_c[0] - block_size[0] / 2 - off < ball_c[0] && ball_c[0] < block_c[0] + block_size[0] / 2 + off) { return [d, [1, -1], (ball_c[0] - block_c[0]) / (block_size[0] / 2 + off + 1)] } else { return [d, [-1, 1]] } } }