UNPKG

q-exp

Version:

Reinforcement learning (Q-Learning) library

404 lines (329 loc) • 9.19 kB

JavaScript

"use strict"; /** * Tic-Tac-Toe strategy learning sample */ var colors = require('colors'); var prompt = require('prompt'); var _ = require('underscore'); var ql = require('../main.js'); var State = require('../state.js'); var fs = require('fs'); ql.isVerbose = true; // Make sure it's gonna go verbose var isVsHuman = process.argv.splice(2).indexOf('play')>=0; /*----------------- * c00 | c10 | c20 *----------------- * c01 | c11 | c21 *----------------- * c02 | c12 | c22 *----------------- */ var actionSet = [ 'c00','c10','c20', 'c01','c11','c21', 'c02','c12','c22' ]; var ttt = {} var b1 = ''; var b2 = '✓'; ttt.agentVsHuman = function agentVsHuman(){ var alpha; // Initialise bot & human handler var bot = ql.newAgent('tictactoe-1',actionSet,alpha=0.44) .then(ql.bindRewardMeasure( rewardOf(b1) )) .then(ql.bindActionCostMeasure( costOfAct )) .then(ql.bindStateGenerator( takeMove(b1) )) .then(ql.bindStatePrinter( statePrint(b1,b2) )) .then(ql.load('./agent')); var board = boardToState(emptyBoard(),b1); // Bot starts the game bot.then(ql.start(board)) .then(humanTake); // Human takes the next turn } ttt.agentVsAgent = function agentVsAgent(){ var alpha; // Initialise bots var bot1 = ql.newAgent('tictactoe-1',actionSet,alpha=0.35) .then(ql.bindRewardMeasure( rewardOf(b1) )) .then(ql.bindActionCostMeasure( costOfAct )) .then(ql.bindStateGenerator( takeMove(b1) )) .then(ql.bindStatePrinter( statePrint(b1,b2) )) .then(ql.load('./agent')); var bot2 = ql.newAgent('tictactoe-2',actionSet,alpha=0.35) .then(ql.bindRewardMeasure( rewardOf(b2) )) .then(ql.bindActionCostMeasure( costOfAct )) .then(ql.bindStateGenerator( takeMove(b2) )) .then(ql.bindStatePrinter( statePrint(b2,b1) )) .then(ql.load('./agent')); var board = boardToState(emptyBoard(),b1); // Start the game Promise.all([bot1,bot2]) .then(function(bots){ let bot1 = bots[0]; let bot2 = bots[1]; // Bot1 makes the first move of the game Promise.resolve(bot1) .then(ql.start(board)) .then((_bot1) => { // Bot2 takes the next move handoverTo(_bot1,bot2) }) .catch((e) => { console.error('FATAL '.red + e); console.error(e.stack); }) }) } function humanTake(bot){ // Human takes the next move console.log('HUMAN takes a move'.magenta); var state = flipSide(bot.state); var board = stateToBoard(state,b2,b1); // Print the board board.map((row,j) => { console.log(' ' + row.map((c,i) => c == 0 ? `[ ${i}${j}]`.white : c == b1 ? '[ ' + c.red + ' ]' : '[ ' + c.green + ' ]' ).join('-')) }) prompt.start(); prompt.get(['move'], (err,res) => { console.log('You picked: '.yellow + res['move']); // Apply an action var action = 'c' + res['move']; var state_ = takeMove(b1)(state,action); // Switch over to bot botTake(bot,state_); }) } function botTake(bot,state){ console.log('Bot takes a move'.magenta); // Get the current state var state_ = flipSide(state); bot = ql.setState(state_)(bot); var reward = rewardOf(b1)(state_); console.log('Bot perceives a state reward of : '.cyan + reward); // Conclude the game function conclude(reward){ if (Math.abs(reward)>=100 || isEnd){ // The game has ended if (reward>=100){ console.log('¬¬¬¬¬¬ BOT WON! ¬¬¬¬¬¬ '.red) } else if (reward<=-100){ console.log('¬¬¬¬¬¬ YOU WON! ¬¬¬¬¬¬ '.green) } else{ console.log('¬¬¬¬¬¬ DRAW! ¬¬¬¬¬¬ '.cyan) } } return Promise.reject('Game Ended'); } // Check if the game has ended var isEnd = !isAvailableToMove(state_) || Math.abs(reward)>=100; Promise.resolve(bot) .then(function(_bot) { // If the game is over, skips // Otherwise, the bot makes a move return isEnd ? conclude(reward) : ql.step(_bot) }) .then((_bot) => { var reward = rewardOf(b1)(_bot.state); // Game has ended? if (Math.abs(reward)>=100) conclude(reward); else if (!isAvailableToMove(_bot.state)) conclude(reward); else humanTake(_bot); // Handover to human }) .catch((e) => { if (e!='Game Ended'){ console.error('FATAL '.red + e); console.error(e); } }) } /** * Turn handover between bots */ function handoverTo(from,to){ console.log(to.name.green + ' now takes turn'.magenta); // Get the current state var state = flipSide(from.state); var me = ql.setState(state)(to); var reward = rewardOf(b1)(state); console.log(to.name.green + ' perceives a state reward of : '.cyan + reward); // Check if the game has ended var isEnd = !isAvailableToMove(state); if (Math.abs(reward)>=100 || isEnd){ // A winner has been decided! if (me.name=='tictactoe-1'){ if (reward >= 100) console.log('TICTACTOE-1 WON!'.green); else if (reward <= -100) console.log('TICTACTOE-1 LOST!'.red); else console.log('TICTACTOE-1 DREW'.silver); // Learn from its recent move Promise.resolve(me) .then(ql.learn) .then(ql.save('./agent')) .then(ql.saveAs('./agent/tictactoe-2')) } else{ // Skip this turn, and handover to the bot1 console.log('skips the turn'.magenta) var opponent = from; Promise.resolve(me) .then((myself) => handoverTo(myself,opponent)) } } else{ // Still in the game, just learn // and move on if (me.name=='tictactoe-1'){ var opponent = from; Promise.resolve(me) .then(ql.learn) .then(ql.step) .then((myself) => handoverTo(myself,opponent)) .catch((e) => { console.error('BOT1 ERROR '.red + e); console.error(e.stack) }) } else{ // Me takes the current state from opponent // makes a new move // then handover the turns var opponent = from; Promise.resolve(me) .then(ql.step) .then((myself) => handoverTo(myself,opponent)) .catch((e) => { console.error('BOT2 ERROR '.red + e); console.error(e.stack) }) } } } function flipSide(state){ var sides = state.hash.split(':'); return new State([sides[1] + ':' + sides[0]]) } function rewardOf(piece){ return function(state){ var mystate = state.hash.split(':')[0]; var theirstate = state.hash.split(':')[1]; if (mystate.length==0) return 0; // Measure the score based on how close we win // or lose var score = 0; winningPatterns().forEach((pattern) => { // Skip if winner has been decided if (Math.abs(score)>=100) return; let agg = 0; pattern.forEach((act) => { if (mystate.indexOf(act)>=0) agg++; if (theirstate.indexOf(act)>=0) agg--; }) // Win? if (agg==3) score = 100; // Almost win? else if (agg==2) score += 30; // Lost? if (agg==-3) score = -100; // Almost lost? else if (agg==-2) score -= 30; }) return score; } } function costOfAct(state,action){ // Invalid moves result minus value if (state.hash.indexOf(action)>=0) return -Infinity; // Otherwise, we blindly guess the cost return Math.random()*10; } function emptyBoard(){ return [[0,0,0],[0,0,0],[0,0,0]]; } function transpose(board){ var t = board.map((row,i) => row.map((c,j) => board[j][i]) ) return t; } function takeMove(piece){ return function(state,action){ // Convert the state back to a board var board = stateToBoard(state,piece,'X'); // Take a move! var move = actionToMove(action); board[move[1]][move[0]] = piece; // Convert the board back to the state and return return boardToState(board,piece); } } function actionToMove(a){ var m = a.match(/c*(\d)(\d)/); return [m[1],m[2]]; } // Convert a board to a state string // based on the perspective of a player function boardToState(board,piece){ var a = []; var b = []; board.forEach(function(row,j){ row.forEach(function(c,i){ if (c==piece) a.push(`c${i}${j}`); else if (c!=0) b.push(`c${i}${j}`); }) }) var s = a.join(',') + ':' + b.join(','); return new State([s]); } // Convert a state string back to a board function stateToBoard(state,piece,theirPiece){ var players = state.hash.split(':'); var board = emptyBoard(); var represent = [piece,theirPiece]; players.forEach((p,n) => { if (p.length==0) return; // No move representation p.split(',').forEach((action) => { var move = actionToMove(action); var i = move[0]; var j = move[1]; board[j][i] = represent[n]; }) }) return board; } function statePrint(piece,theirPiece){ return function(state){ var board = stateToBoard(state,piece,theirPiece); board.forEach((row) => { var r = row.map((u) => u==piece ? u.green : u==theirPiece ? u.red : '0' ); console.log(' [' + r.join('-') + ']'); }) } } function winningPatterns(){ return [ ['c00','c11','c22'], // diagonal ['c20','c11','c02'], // diagonal ['c00','c10','c20'], // first row ['c01','c11','c21'], // second row ['c02','c12','c22'], // third row ['c00','c01','c02'], // first column ['c10','c11','c12'], // second column ['c20','c21','c22'] // third column ]; } function isAvailableToMove(state){ var validMoves = _.reject(state.hash.split(/,|:/),_.isEmpty); var numMoves = validMoves.length; return numMoves < 9; } // Start isVsHuman ? ttt.agentVsHuman() : ttt.agentVsAgent();