ppo-tfjs
Version:
Proximal Policy Optimization (PPO) in Tensorflow.js
92 lines (91 loc) • 3.33 kB
HTML
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ppo-tfjs (box)</title>
</head>
<body>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"></script>
<script src="ppo.js"></script>
<script>
const canvas = document.createElement('canvas')
canvas.width = 500
canvas.height = 500
document.body.appendChild(canvas)
const ctx = canvas.getContext('2d')
class Env {
constructor() {
this.actionSpace = {
'class': 'Box',
'shape': [2],
'low': [-1, -1],
'high': [1, 1],
}
this.observationSpace = {
'class': 'Box',
'shape': [4],
'dtype': 'float32'
}
}
async step(action) {
const oldAgent = this.agent.slice(0)
this.agent[0] += action[0] * 0.05
this.agent[1] += action[1] * 0.05
this.i += 1
const reward = -Math.sqrt(
(this.agent[0] - this.goal[0]) * (this.agent[0] - this.goal[0]) +
(this.agent[1] - this.goal[1]) * (this.agent[1] - this.goal[1])
)
const done = this.i > 30 || reward > -0.01
if (reward > -0.01) {
console.log('Goal reached:', reward)
}
ctx.fillStyle = 'blue'
ctx.fillRect(this.agent[0] * 500, this.agent[1] * 500, 2, 2)
ctx.beginPath()
ctx.moveTo(oldAgent[0] * 500, oldAgent[1] * 500)
ctx.lineTo(this.agent[0] * 500, this.agent[1] * 500)
ctx.stroke()
await new Promise(resolve => setTimeout(resolve, 1))
return [
[this.agent[0], this.agent[1], this.goal[0], this.goal[1]],
reward,
done
]
}
reset() {
this.agent = [
Math.random(),
Math.random(),
]
this.goal = [
Math.random(),
Math.random(),
]
this.i = 0
ctx.clearRect(0, 0, 500, 500)
ctx.fillStyle = 'red'
ctx.fillRect(this.goal[0] * 500, this.goal[1] * 500, 10, 10)
// draw point
ctx.fillStyle = 'blue'
ctx.fillRect(this.agent[0] * 500, this.agent[1] * 500, 10, 10)
return [this.agent[0], this.agent[1], this.goal[0], this.goal[1]]
}
}
const env = new Env()
const ppo = new PPO(env, {'nSteps': 1024, 'nEpochs': 50, 'verbose': 1})
;(async () => {
await ppo.learn({
'totalTimesteps': 100000,
'callback': {
'onTrainingStart': function (p) {
console.log(p.config)
}
}
})
})()
</script>
</body>
</html>