ppo-tfjs

Version:

Proximal Policy Optimization (PPO) in Tensorflow.js

92 lines (91 loc) • 3.33 kB

HTML

<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>ppo-tfjs (box)</title> </head> <body> <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"></script> <script src="ppo.js"></script> <script> const canvas = document.createElement('canvas') canvas.width = 500 canvas.height = 500 document.body.appendChild(canvas) const ctx = canvas.getContext('2d') class Env { constructor() { this.actionSpace = { 'class': 'Box', 'shape': [2], 'low': [-1, -1], 'high': [1, 1], } this.observationSpace = { 'class': 'Box', 'shape': [4], 'dtype': 'float32' } } async step(action) { const oldAgent = this.agent.slice(0) this.agent[0] += action[0] * 0.05 this.agent[1] += action[1] * 0.05 this.i += 1 const reward = -Math.sqrt( (this.agent[0] - this.goal[0]) * (this.agent[0] - this.goal[0]) + (this.agent[1] - this.goal[1]) * (this.agent[1] - this.goal[1]) ) const done = this.i > 30 || reward > -0.01 if (reward > -0.01) { console.log('Goal reached:', reward) } ctx.fillStyle = 'blue' ctx.fillRect(this.agent[0] * 500, this.agent[1] * 500, 2, 2) ctx.beginPath() ctx.moveTo(oldAgent[0] * 500, oldAgent[1] * 500) ctx.lineTo(this.agent[0] * 500, this.agent[1] * 500) ctx.stroke() await new Promise(resolve => setTimeout(resolve, 1)) return [ [this.agent[0], this.agent[1], this.goal[0], this.goal[1]], reward, done ] } reset() { this.agent = [ Math.random(), Math.random(), ] this.goal = [ Math.random(), Math.random(), ] this.i = 0 ctx.clearRect(0, 0, 500, 500) ctx.fillStyle = 'red' ctx.fillRect(this.goal[0] * 500, this.goal[1] * 500, 10, 10) // draw point ctx.fillStyle = 'blue' ctx.fillRect(this.agent[0] * 500, this.agent[1] * 500, 10, 10) return [this.agent[0], this.agent[1], this.goal[0], this.goal[1]] } } const env = new Env() const ppo = new PPO(env, {'nSteps': 1024, 'nEpochs': 50, 'verbose': 1}) ;(async () => { await ppo.learn({ 'totalTimesteps': 100000, 'callback': { 'onTrainingStart': function (p) { console.log(p.config) } } }) })() </script> </body> </html>