arc-agents
Version:
A library for creating and deploying gaming agents at scale
116 lines (101 loc) • 3.76 kB
JavaScript
const AutonomousAgentWrapper = require('./autonomous-agent-wrapper')
class ProbabilisticAgentWrapper extends AutonomousAgentWrapper {
constructor(model, config={}) {
super(model, config)
this.numSamples = {}
this.framesRemaining = {}
this.actionSubkeys = {}
this.currentAction = {}
this.previousPolicy = {}
model.outputGroups.forEach((actionGroup) => {
this.numSamples[actionGroup] = config.numSamples?.[actionGroup] ?? 1
this.framesRemaining[actionGroup] = 0
this.actionSubkeys[actionGroup] = []
this.currentAction[actionGroup] = []
this.previousPolicy[actionGroup] = null
})
this.policySimilarityThreshold = 0.8
}
static policySimilarity = (A, B) => {
return 1 - A.reduce((sum, value, i) => sum + Math.abs(value - B[i]), 0)
}
static getMostCommonAction(array) {
if (array.length === 0) return null
let modeMap = {}
let maxEl = array[0]
let maxCount = 1
for (let i = 0; i < array.length; i++) {
let el = array[i]
if (modeMap[el] === undefined) {
modeMap[el] = 0
}
modeMap[el]++
if (modeMap[el] > maxCount) {
maxEl = el
maxCount = modeMap[el]
}
}
return { action: maxEl, count: maxCount }
}
monteCarloSampling(probabilities, actionKey) {
const actionArray = []
for (let i = 0; i < this.numSamples[actionKey]; i++) {
actionArray.push(this.model.selectActionOneHead(probabilities, actionKey))
}
return actionArray
}
convertInputToString(input, actionKey) {
if (this.actionSubkeys[actionKey].length === 0) {
this.actionSubkeys[actionKey] = Object.keys(input).map((x) => x)
}
const actionArray = this.actionSubkeys[actionKey].filter(dir => input[dir])
return actionArray.length > 0 ? actionArray.sort().join('-') : undefined
}
assignSampledInput(actionKey) {
const inputs = {}
this.actionSubkeys[actionKey].forEach((inputKey) => {
inputs[inputKey] = this.currentAction[actionKey].includes(inputKey)
})
return inputs
}
sampleAction(probabilities, actionKey) {
if (this.numSamples[actionKey] > 1) {
let policyChange = false
if (this.previousPolicy[actionKey] !== null) {
const similarity = ProbabilisticAgentWrapper.policySimilarity(
probabilities[actionKey][0],
this.previousPolicy[actionKey]
)
policyChange = similarity < this.policySimilarityThreshold
}
if (this.framesRemaining[actionKey] === 0 || policyChange) {
const actionArray = this.monteCarloSampling(probabilities, actionKey)
const { action, count } = ProbabilisticAgentWrapper.getMostCommonAction(
actionArray.map(input => { return this.convertInputToString(input, actionKey) })
)
this.currentAction[actionKey] = action ? action.split('-') : []
this.framesRemaining[actionKey] = count
this.previousPolicy[actionKey] = probabilities[actionKey][0]
}
else {
this.framesRemaining[actionKey] -= 1
}
return this.assignSampledInput(actionKey)
}
else {
return this.model.selectActionOneHead(probabilities, actionKey)
}
}
selectionFunction(input) {
const probabilities = this.model.getProbabilities(input)
let selection = {}
this.model.outputGroups.forEach((actionGroup) => {
selection = {...selection, ...this.sampleAction(probabilities, actionGroup)}
})
return selection
}
selectAction(input) {
return super.selectAction(input, this.selectionFunction)
}
}
module.exports = ProbabilisticAgentWrapper