q-exp
Version:
Reinforcement learning (Q-Learning) library
404 lines (329 loc) • 9.19 kB
JavaScript
;
/**
* Tic-Tac-Toe strategy learning sample
*/
var colors = require('colors');
var prompt = require('prompt');
var _ = require('underscore');
var ql = require('../main.js');
var State = require('../state.js');
var fs = require('fs');
ql.isVerbose = true; // Make sure it's gonna go verbose
var isVsHuman = process.argv.splice(2).indexOf('play')>=0;
/*-----------------
* c00 | c10 | c20
*-----------------
* c01 | c11 | c21
*-----------------
* c02 | c12 | c22
*-----------------
*/
var actionSet = [
'c00','c10','c20',
'c01','c11','c21',
'c02','c12','c22'
];
var ttt = {}
var b1 = '';
var b2 = '✓';
ttt.agentVsHuman = function agentVsHuman(){
var alpha;
// Initialise bot & human handler
var bot = ql.newAgent('tictactoe-1',actionSet,alpha=0.44)
.then(ql.bindRewardMeasure( rewardOf(b1) ))
.then(ql.bindActionCostMeasure( costOfAct ))
.then(ql.bindStateGenerator( takeMove(b1) ))
.then(ql.bindStatePrinter( statePrint(b1,b2) ))
.then(ql.load('./agent'));
var board = boardToState(emptyBoard(),b1);
// Bot starts the game
bot.then(ql.start(board))
.then(humanTake); // Human takes the next turn
}
ttt.agentVsAgent = function agentVsAgent(){
var alpha;
// Initialise bots
var bot1 = ql.newAgent('tictactoe-1',actionSet,alpha=0.35)
.then(ql.bindRewardMeasure( rewardOf(b1) ))
.then(ql.bindActionCostMeasure( costOfAct ))
.then(ql.bindStateGenerator( takeMove(b1) ))
.then(ql.bindStatePrinter( statePrint(b1,b2) ))
.then(ql.load('./agent'));
var bot2 = ql.newAgent('tictactoe-2',actionSet,alpha=0.35)
.then(ql.bindRewardMeasure( rewardOf(b2) ))
.then(ql.bindActionCostMeasure( costOfAct ))
.then(ql.bindStateGenerator( takeMove(b2) ))
.then(ql.bindStatePrinter( statePrint(b2,b1) ))
.then(ql.load('./agent'));
var board = boardToState(emptyBoard(),b1);
// Start the game
Promise.all([bot1,bot2])
.then(function(bots){
let bot1 = bots[0];
let bot2 = bots[1];
// Bot1 makes the first move of the game
Promise.resolve(bot1)
.then(ql.start(board))
.then((_bot1) => {
// Bot2 takes the next move
handoverTo(_bot1,bot2)
})
.catch((e) => {
console.error('FATAL '.red + e);
console.error(e.stack);
})
})
}
function humanTake(bot){
// Human takes the next move
console.log('HUMAN takes a move'.magenta);
var state = flipSide(bot.state);
var board = stateToBoard(state,b2,b1);
// Print the board
board.map((row,j) => {
console.log(' ' + row.map((c,i) =>
c == 0 ? `[ ${i}${j}]`.white :
c == b1 ? '[ ' + c.red + ' ]' :
'[ ' + c.green + ' ]'
).join('-'))
})
prompt.start();
prompt.get(['move'], (err,res) => {
console.log('You picked: '.yellow + res['move']);
// Apply an action
var action = 'c' + res['move'];
var state_ = takeMove(b1)(state,action);
// Switch over to bot
botTake(bot,state_);
})
}
function botTake(bot,state){
console.log('Bot takes a move'.magenta);
// Get the current state
var state_ = flipSide(state);
bot = ql.setState(state_)(bot);
var reward = rewardOf(b1)(state_);
console.log('Bot perceives a state reward of : '.cyan + reward);
// Conclude the game
function conclude(reward){
if (Math.abs(reward)>=100 || isEnd){
// The game has ended
if (reward>=100){
console.log('¬¬¬¬¬¬ BOT WON! ¬¬¬¬¬¬ '.red)
}
else if (reward<=-100){
console.log('¬¬¬¬¬¬ YOU WON! ¬¬¬¬¬¬ '.green)
}
else{
console.log('¬¬¬¬¬¬ DRAW! ¬¬¬¬¬¬ '.cyan)
}
}
return Promise.reject('Game Ended');
}
// Check if the game has ended
var isEnd = !isAvailableToMove(state_) || Math.abs(reward)>=100;
Promise.resolve(bot)
.then(function(_bot) {
// If the game is over, skips
// Otherwise, the bot makes a move
return isEnd ? conclude(reward) : ql.step(_bot)
})
.then((_bot) => {
var reward = rewardOf(b1)(_bot.state);
// Game has ended?
if (Math.abs(reward)>=100) conclude(reward);
else if (!isAvailableToMove(_bot.state)) conclude(reward);
else humanTake(_bot); // Handover to human
})
.catch((e) => {
if (e!='Game Ended'){
console.error('FATAL '.red + e);
console.error(e);
}
})
}
/**
* Turn handover between bots
*/
function handoverTo(from,to){
console.log(to.name.green + ' now takes turn'.magenta);
// Get the current state
var state = flipSide(from.state);
var me = ql.setState(state)(to);
var reward = rewardOf(b1)(state);
console.log(to.name.green + ' perceives a state reward of : '.cyan + reward);
// Check if the game has ended
var isEnd = !isAvailableToMove(state);
if (Math.abs(reward)>=100 || isEnd){
// A winner has been decided!
if (me.name=='tictactoe-1'){
if (reward >= 100) console.log('TICTACTOE-1 WON!'.green);
else if (reward <= -100) console.log('TICTACTOE-1 LOST!'.red);
else console.log('TICTACTOE-1 DREW'.silver);
// Learn from its recent move
Promise.resolve(me)
.then(ql.learn)
.then(ql.save('./agent'))
.then(ql.saveAs('./agent/tictactoe-2'))
}
else{
// Skip this turn, and handover to the bot1
console.log('skips the turn'.magenta)
var opponent = from;
Promise.resolve(me)
.then((myself) => handoverTo(myself,opponent))
}
}
else{
// Still in the game, just learn
// and move on
if (me.name=='tictactoe-1'){
var opponent = from;
Promise.resolve(me)
.then(ql.learn)
.then(ql.step)
.then((myself) => handoverTo(myself,opponent))
.catch((e) => {
console.error('BOT1 ERROR '.red + e);
console.error(e.stack)
})
}
else{
// Me takes the current state from opponent
// makes a new move
// then handover the turns
var opponent = from;
Promise.resolve(me)
.then(ql.step)
.then((myself) => handoverTo(myself,opponent))
.catch((e) => {
console.error('BOT2 ERROR '.red + e);
console.error(e.stack)
})
}
}
}
function flipSide(state){
var sides = state.hash.split(':');
return new State([sides[1] + ':' + sides[0]])
}
function rewardOf(piece){
return function(state){
var mystate = state.hash.split(':')[0];
var theirstate = state.hash.split(':')[1];
if (mystate.length==0) return 0;
// Measure the score based on how close we win
// or lose
var score = 0;
winningPatterns().forEach((pattern) => {
// Skip if winner has been decided
if (Math.abs(score)>=100) return;
let agg = 0;
pattern.forEach((act) => {
if (mystate.indexOf(act)>=0) agg++;
if (theirstate.indexOf(act)>=0) agg--;
})
// Win?
if (agg==3) score = 100;
// Almost win?
else if (agg==2) score += 30;
// Lost?
if (agg==-3) score = -100;
// Almost lost?
else if (agg==-2) score -= 30;
})
return score;
}
}
function costOfAct(state,action){
// Invalid moves result minus value
if (state.hash.indexOf(action)>=0) return -Infinity;
// Otherwise, we blindly guess the cost
return Math.random()*10;
}
function emptyBoard(){
return [[0,0,0],[0,0,0],[0,0,0]];
}
function transpose(board){
var t = board.map((row,i) =>
row.map((c,j) => board[j][i])
)
return t;
}
function takeMove(piece){
return function(state,action){
// Convert the state back to a board
var board = stateToBoard(state,piece,'X');
// Take a move!
var move = actionToMove(action);
board[move[1]][move[0]] = piece;
// Convert the board back to the state and return
return boardToState(board,piece);
}
}
function actionToMove(a){
var m = a.match(/c*(\d)(\d)/);
return [m[1],m[2]];
}
// Convert a board to a state string
// based on the perspective of a player
function boardToState(board,piece){
var a = [];
var b = [];
board.forEach(function(row,j){
row.forEach(function(c,i){
if (c==piece) a.push(`c${i}${j}`);
else if (c!=0) b.push(`c${i}${j}`);
})
})
var s = a.join(',') + ':' + b.join(',');
return new State([s]);
}
// Convert a state string back to a board
function stateToBoard(state,piece,theirPiece){
var players = state.hash.split(':');
var board = emptyBoard();
var represent = [piece,theirPiece];
players.forEach((p,n) => {
if (p.length==0) return; // No move representation
p.split(',').forEach((action) => {
var move = actionToMove(action);
var i = move[0];
var j = move[1];
board[j][i] = represent[n];
})
})
return board;
}
function statePrint(piece,theirPiece){
return function(state){
var board = stateToBoard(state,piece,theirPiece);
board.forEach((row) => {
var r = row.map((u) =>
u==piece ? u.green :
u==theirPiece ? u.red :
'0'
);
console.log(' [' + r.join('-') + ']');
})
}
}
function winningPatterns(){
return [
['c00','c11','c22'], // diagonal
['c20','c11','c02'], // diagonal
['c00','c10','c20'], // first row
['c01','c11','c21'], // second row
['c02','c12','c22'], // third row
['c00','c01','c02'], // first column
['c10','c11','c12'], // second column
['c20','c21','c22'] // third column
];
}
function isAvailableToMove(state){
var validMoves = _.reject(state.hash.split(/,|:/),_.isEmpty);
var numMoves = validMoves.length;
return numMoves < 9;
}
// Start
isVsHuman ? ttt.agentVsHuman() : ttt.agentVsAgent();