UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

338 lines (298 loc) 6.7 kB
import { RLEnvironmentBase, RLStepResult } from './base.js' const EMPTY = 1 const BLACK = 2 const WHITE = 3 const flipPiece = p => { if (p === BLACK) { return WHITE } else if (p === WHITE) { return BLACK } return EMPTY } /** * Reversi environment */ export default class ReversiRLEnvironment extends RLEnvironmentBase { constructor() { super() this._size = [8, 8] this._board = new ReversiBoard(this._size) this._turn = BLACK this._reward = { win: 1, lose: -1, step: 0, } } static BLACK = BLACK static WHITE = WHITE static EMPTY = EMPTY static OWN = 2 static OTHER = 3 get actions() { const a = [EMPTY] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { a.push(`${String.fromCharCode('a'.charCodeAt(0) + i)}${i + 1}`) } } return [a] } get states() { const s = [[BLACK, WHITE]] for (let i = 0; i < this._size[0] * this._size[1]; i++) { s.push([EMPTY, ReversiRLEnvironment.OWN, ReversiRLEnvironment.OTHER]) } return s } _makeState(board, agentturn, gameturn) { const s = [gameturn] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { const p = board.at([i, j]) s.push(p === EMPTY ? p : p === agentturn ? ReversiRLEnvironment.OWN : ReversiRLEnvironment.OTHER) } } return s } _state2board(state, turn) { const board = new ReversiBoard(this._size) const opturn = flipPiece(turn) for (let i = 0, p = 1; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++, p++) { if (state[p] === EMPTY) { board._board[i][j] = EMPTY } else if (state[p] === ReversiRLEnvironment.OWN) { board._board[i][j] = turn } else { board._board[i][j] = opturn } } } return board } _checkAgent(agent) { if (!this._agents) { throw new Error('Agent does not exist. Call reset to set agents.') } if (agent !== BLACK && agent !== WHITE) { throw new Error('Unknown agent.') } } reset() { super.reset() this._agents = [BLACK, WHITE] this._board.reset() this._turn = BLACK return this.state(BLACK) } state(agent) { if (!agent) { agent = this._turn } this._checkAgent(agent) return this._makeState(this._board, agent, this._turn) } setState(state, agent) { this._turn = state[0] this._board = this._state2board(state, agent) } step(action, agent) { if (!agent) { agent = this._turn } return super.step(action, agent) } test(state, action, agent) { if (!agent) { agent = this._turn } this._checkAgent(agent) const gameturn = state[0] const getreward = done => !done ? this._reward.step : board.winner === agent ? this._reward.win : this._reward.lose const board = this._state2board(state, agent) if (agent !== gameturn) { const done = board.finish return new RLStepResult(this, state, getreward(done), done, true) } if (action[0] === EMPTY) { const choices = board.choices(agent) const done = board.finish const invalid = choices.length > 0 return new RLStepResult( this, invalid ? state : this._makeState(board, agent, flipPiece(gameturn)), getreward(done), done, invalid ) } const changed = board.set(action[0], agent) const done = board.finish if (!changed) { return new RLStepResult(this, state, getreward(done), done, true) } return new RLStepResult(this, this._makeState(board, agent, flipPiece(gameturn)), getreward(done), done) } } class ReversiBoard { constructor(size) { this._size = size this.reset() } get size() { return this._size } get count() { let b = 0 let w = 0 for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { if (this._board[i][j] === WHITE) { w++ } else if (this._board[i][j] === BLACK) { b++ } } } return { black: b, white: w, } } get finish() { return this.choices(BLACK).length + this.choices(WHITE).length === 0 } get winner() { if (!this.finish) { return null } const count = this.count if (count.black > count.white) { return BLACK } else if (count.black < count.white) { return WHITE } return null } toString() { let buf = '' for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { if (j > 0) { buf += ' ' } if (this._board[i][j] === BLACK) { buf += 'x' } else if (this._board[i][j] === WHITE) { buf += 'o' } else { buf += '-' } } buf += '\n' } return buf } nextTurn(turn) { return flipPiece(turn) } copy() { const cp = new ReversiBoard(this._size) for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { cp._board[i][j] = this._board[i][j] } } return cp } score(turn) { const count = this.count if (turn === BLACK) { return count.black - count.white } else { return count.white - count.black } } at(p) { if (typeof p === 'string') { p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)] } return this._board[p[0]][p[1]] } set(p, turn) { if (typeof p === 'string') { p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)] } const flips = this.flipPositions(p[0], p[1], turn) if (flips.length === 0) { return false } this._board[p[0]][p[1]] = turn for (const [ti, tj] of flips) { this._board[ti][tj] = turn } return true } reset() { this._board = [] for (let i = 0; i < this._size[0]; i++) { this._board[i] = Array(this._size[1]).fill(EMPTY) } const cx = Math.floor(this._size[0] / 2) const cy = Math.floor(this._size[1] / 2) this._board[cx - 1][cy - 1] = WHITE this._board[cx - 1][cy] = BLACK this._board[cx][cy - 1] = BLACK this._board[cx][cy] = WHITE } choices(turn) { const c = [] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { if (this.flipPositions(i, j, turn).length > 0) { c.push([i, j]) } } } return c } flipPositions(i, j, turn) { if (i < 0 || j < 0 || this._size[0] <= i || this._size[1] <= j) { return [] } else if (turn === EMPTY || this._board[i][j] !== EMPTY) { return [] } const p = [] for (const [di, dj] of [ [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1], [0, 1], ]) { let ti = i let tj = j const tmp = [] while (true) { ti += di tj += dj if (ti < 0 || tj < 0 || this._size[0] <= ti || this._size[1] <= tj) { break } else if (this._board[ti][tj] === turn) { p.push(...tmp) break } else if (this._board[ti][tj] === EMPTY) { break } tmp.push([ti, tj]) } } return p } }