@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
180 lines (161 loc) • 3.81 kB
JavaScript
import { RLIntRange, RLEnvironmentBase } from './base.js'
/**
* Grid world environment
*/
export default class GridMazeRLEnvironment extends RLEnvironmentBase {
constructor() {
super()
this._points = []
this._dim = 2
this._size = [20, 10]
this._position = Array(this._dim).fill(0)
this._max_step = 0
this._reward = {
step: -1,
wall: -2,
goal: 5,
fail: -100,
}
this.__map = null
}
get size() {
return this._size
}
get actions() {
return this._dim === 1 ? [[0, 1]] : [[0, 1, 2, 3]]
}
get _action_move() {
return this._dim === 1
? [[1], [-1]]
: [
[1, 0],
[0, 1],
[-1, 0],
[0, -1],
]
}
get states() {
const st = []
for (let i = 0; i < this._dim; i++) {
st.push(new RLIntRange(0, this._size[i] - 1))
}
return st
}
get map() {
this.__map ??= []
if (this.__map.length < this._size[0]) {
for (let i = this.__map.length; i < this._size[0]; i++) {
this.__map[i] = Array(this._size[1])
}
}
for (let i = 0; i < this._size[0]; i++) {
this.__map[i].fill(false)
}
this._points.forEach(p => {
this.__map[p[0]][p[1]] = 1 - this.__map[p[0]][p[1]]
})
this.__map[0][0] = false
this.__map[this._size[0] - 1][this._size[1] - 1] = false
return this.__map
}
set reward(value) {
this._reward = {
step: -1,
wall: -2,
goal: 5,
fail: -100,
}
if (value === 'achieve') {
const _this = this
this._reward = {
get step() {
return Math.sqrt(_this._position[0] ** 2 + _this._position[1] ** 2)
},
wall: 0,
goal: 0,
fail: 0,
}
}
}
reset() {
super.reset()
this._position = Array(this._dim).fill(0)
return this._position
}
resetMap() {
this._points = []
}
resetMapAsMaze() {
const size = this._size
const map = Array.from({ length: size[0] }, () => Array(size[1]).fill(true))
const points = [[0, 0]]
map[0][0] = false
while (points.length > 0) {
for (let i = points.length - 1; i > 0; i--) {
const r = Math.floor(Math.random() * (i + 1))
;[points[i], points[r]] = [points[r], points[i]]
}
const [x0, y0] = points.pop()
let x = x0
let y = y0
while (true) {
const ds = []
if (x > 0 && map[x - 2][y]) ds.push([-1, 0])
if (x < size[0] - 2 && map[x + 2][y]) ds.push([1, 0])
if (y > 0 && map[x][y - 2]) ds.push([0, -1])
if (y < size[1] - 2 && map[x][y + 2]) ds.push([0, 1])
if (ds.length === 0) {
break
}
const [dx, dy] = ds[Math.floor(Math.random() * ds.length)]
map[x + dx][y + dy] = false
map[x + dx * 2][y + dy * 2] = false
x += dx * 2
y += dy * 2
points.push([x, y])
}
}
this._points = []
for (let i = 0; i < size[0]; i++) {
for (let j = 0; j < size[1]; j++) {
if (size[0] % 2 === 0 && i === size[0] - 1 && !map[i - 1][j]) {
continue
} else if (size[1] % 2 === 0 && j === size[1] - 1 && !map[i][j - 1]) {
continue
}
if (map[i][j]) this._points.push([i, j])
}
}
}
state() {
return this._position
}
setState(state) {
this._position = state
}
test(state, action) {
let reward = this._reward.step
let mov_state = [].concat(state)
const map = this.map
const moves = this._action_move[action[0]]
for (let i = 0; i < moves.length; i++) {
mov_state[i] += moves[i]
}
if (mov_state.some((s, i) => s < 0 || this._size[i] <= s)) {
reward = this._reward.wall
mov_state = [].concat(state)
} else if (map[mov_state[0]][mov_state[1] || 0]) {
reward = this._reward.wall
mov_state = [].concat(state)
}
const fail = this._max_step && this._max_step <= this.epoch
const done = mov_state.every((v, i) => v === this._size[i] - 1) || fail
if (done) reward = this._reward.goal
if (fail) reward = this._reward.fail
return {
state: mov_state,
reward,
done,
}
}
}