UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

127 lines (106 loc) 2.44 kB
import { RLIntRange, RLEnvironmentBase } from './base.js' const SPADE = 0 const DIAMOND = 1 const HEART = 2 const CLUB = 3 class Deck { constructor() { this.cards = [] for (const suit of [SPADE, HEART, DIAMOND, CLUB]) { for (let i = 1; i <= 13; i++) { this.cards.push({ suit, value: i }) } } this.shuffle() } init() { this.cards = [] for (const suit of [SPADE, HEART, DIAMOND, CLUB]) { for (let i = 1; i <= 13; i++) { this.cards.push({ suit, value: i }) } } this.shuffle() } shuffle() { for (let i = this.cards.length - 1; i > 0; i--) { const r = Math.floor(Math.random() * (i + 1)) ;[this.cards[i], this.cards[r]] = [this.cards[r], this.cards[i]] } } pop() { return this.cards.pop() } } /** * Blackjack environment */ export default class BlackjackRLEnvironment extends RLEnvironmentBase { constructor() { super() this._deck = new Deck() this._dealer_hands = [] this._player_hands = [] this._done = false this._reward = { bust: -1, win: 1, step: 0, } this.reset() } get actions() { return [[0, 1]] } get states() { return [new RLIntRange(2, 31), new RLIntRange(1, 10), [0, 1]] } _sumhands(hands) { let sumhands = hands.reduce((s, c) => s + Math.min(10, c.value), 0) let usableace = sumhands <= 11 && hands.some(c => c.value === 1) if (usableace) { sumhands += 10 } return [sumhands, usableace] } reset() { super.reset() this._deck.init() this._dealer_hands = [this._deck.pop(), this._deck.pop()] this._player_hands = [this._deck.pop(), this._deck.pop()] this._done = false return this.state() } state() { const [sumhands, usableace] = this._sumhands(this._player_hands) return [sumhands, Math.min(10, this._dealer_hands[0].value), usableace ? 1 : 0] } step(action) { if (action[0] === 1) { this._player_hands.push(this._deck.pop()) if (this._sumhands(this._player_hands)[0] > 21) { this._done = true return { state: this.state(), reward: this._reward.bust, done: true, } } return { state: this.state(), reward: this._reward.step, done: false, } } this._done = true while (this._sumhands(this._dealer_hands)[0] < 17) { this._dealer_hands.push(this._deck.pop()) } const reward = this._sumhands(this._player_hands)[0] - this._sumhands(this._dealer_hands)[0] return { state: this.state(), reward, done: true, } } }