UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

312 lines (274 loc) 6.47 kB
import { RLEnvironmentBase, RLStepResult } from './base.js' const EMPTY = 1 const BLACK = 2 const WHITE = 3 /** * Gomoku environment */ export default class GomokuRLEnvironment extends RLEnvironmentBase { constructor() { super() this._size = [8, 8] this._board = new GomokuBoard(this._size) this._reward = { win: 1, lose: -1, step: 0, } } static BLACK = BLACK static WHITE = WHITE static EMPTY = EMPTY static OWN = 2 static OTHER = 3 get actions() { const a = [] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { a.push(`${i}_${j}`) } } return [a] } get states() { const s = [[BLACK, WHITE]] for (let i = 0; i < this._size[0] * this._size[1]; i++) { s.push([EMPTY, GomokuRLEnvironment.OWN, GomokuRLEnvironment.OTHER]) } return s } _makeState(board, agentturn, gameturn) { const s = [gameturn] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { const p = board.at([i, j]) s.push(p === EMPTY ? p : p === agentturn ? GomokuRLEnvironment.OWN : GomokuRLEnvironment.OTHER) } } return s } _state2board(state, turn) { const board = new GomokuBoard(this._size) const opturn = board.nextTurn(turn) for (let i = 0, p = 1; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++, p++) { if (state[p] === GomokuRLEnvironment.OWN) { board.set([i, j], turn) } else if (state[p] === GomokuRLEnvironment.OTHER) { board.set([i, j], opturn) } } } return board } _checkAgent(agent) { if (!this._agents) { throw new Error('Agent does not exist. Call reset to set agents.') } if (agent !== BLACK && agent !== WHITE) { throw new Error('Unknown agent.') } } reset() { super.reset() this._agents = [BLACK, WHITE] this._board.reset() this._turn = BLACK return this.state(BLACK) } state(agent) { if (!agent) { agent = this._turn } this._checkAgent(agent) return this._makeState(this._board, agent, this._turn) } setState(state, agent) { this._turn = state[0] this._board = this._state2board(state, agent) } step(action, agent) { if (!agent) { agent = this._turn } return super.step(action, agent) } test(state, action, agent) { if (!agent) { agent = this._turn } this._checkAgent(agent) const gameturn = state[0] const getreward = done => !done ? this._reward.step : board.winner === agent ? this._reward.win : this._reward.lose const board = this._state2board(state, agent) if (agent !== gameturn) { const done = board.finish return new RLStepResult(this, state, getreward(done), done, true) } const choice = action[0].split('_').map(v => +v) const changed = board.set(choice, agent) const done = board.finish if (!changed) { return new RLStepResult(this, state, getreward(done), done, true) } return new RLStepResult(this, this._makeState(board, agent, board.nextTurn(gameturn)), getreward(done), done) } } class GomokuBoard { constructor(size) { this._size = size this._a = 5 this._count = 0 this.reset() } get size() { return this._size } get finish() { return this.winner !== null || this._count === this._size[0] * this._size[1] } get winner() { if (this.row(WHITE, this._a).length > 0) { return WHITE } else if (this.row(BLACK, this._a).length > 0) { return BLACK } return null } toString() { let buf = '' for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { if (j > 0) { buf += ' ' } if (this._board[i][j] === BLACK) { buf += 'x' } else if (this._board[i][j] === WHITE) { buf += 'o' } else { buf += '-' } } buf += '\n' } return buf } nextTurn(turn) { return turn === BLACK ? WHITE : BLACK } copy() { const cp = new GomokuBoard(this._size) for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { cp._board[i][j] = this._board[i][j] } } cp._count = this._count return cp } score(turn) { const winner = this.winner const nt = this.nextTurn(turn) if (winner === turn) return this._size[0] * this._size[1] * 100 - this._count if (winner === nt) return -this._size[0] * this._size[1] * 100 + this._count return 0 } at(p) { return this._board[p[0]][p[1]] } set(p, turn) { if (this.at(p) !== EMPTY) { return false } this._board[p[0]][p[1]] = turn this._count++ return true } reset() { this._count = 0 this._board = [] for (let i = 0; i < this._size[0]; i++) { this._board[i] = Array(this._size[1]).fill(EMPTY) } } choices() { const c = [] if (this.finish) { return c } for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { if (this._board[i][j] === EMPTY) { c.push([i, j]) } } } return c } row(turn, length, separate = false) { const p = [] const checkInbound = (i, j) => { return 0 <= i && i < this._size[0] && 0 <= j && j < this._size[1] } const d = [ [1, 1], [1, 0], [1, -1], [0, 1], ] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { if (this._board[i][j] !== turn) { continue } for (const [di, dj] of d) { if (checkInbound(i - di, j - dj) && this._board[i - di][j - dj] === turn) { continue } else if (!checkInbound(i + di * (length - 1), j + dj * (length - 1))) { continue } const path = [[i, j]] let ti = i + di let tj = j + dj let sep = false const s = [] for (let t = 1; t < length; t++, ti += di, tj += dj) { if (!checkInbound(ti, tj)) { break } else if (separate && !sep && this._board[ti][tj] === EMPTY) { sep = true t-- s.push([ti, tj]) continue } else if (this._board[ti][tj] !== turn) { break } path.push([ti, tj]) if (t === length - 1) { ti += di tj += dj if (checkInbound(ti, tj) && this._board[ti][tj] === turn) { break } if (!sep) { if (checkInbound(ti, tj) && this._board[ti][tj] === EMPTY) { s.push([ti, tj]) } if (checkInbound(i - di, j - dj) && this._board[i - di][j - dj] === EMPTY) { s.push([i - di, j - dj]) } } p.push({ path: path, s: s, }) } } } } } return p } }