UNPKG

arc-agents

Version:

A library for creating and deploying gaming agents at scale

116 lines (101 loc) 3.76 kB
const AutonomousAgentWrapper = require('./autonomous-agent-wrapper') class ProbabilisticAgentWrapper extends AutonomousAgentWrapper { constructor(model, config={}) { super(model, config) this.numSamples = {} this.framesRemaining = {} this.actionSubkeys = {} this.currentAction = {} this.previousPolicy = {} model.outputGroups.forEach((actionGroup) => { this.numSamples[actionGroup] = config.numSamples?.[actionGroup] ?? 1 this.framesRemaining[actionGroup] = 0 this.actionSubkeys[actionGroup] = [] this.currentAction[actionGroup] = [] this.previousPolicy[actionGroup] = null }) this.policySimilarityThreshold = 0.8 } static policySimilarity = (A, B) => { return 1 - A.reduce((sum, value, i) => sum + Math.abs(value - B[i]), 0) } static getMostCommonAction(array) { if (array.length === 0) return null let modeMap = {} let maxEl = array[0] let maxCount = 1 for (let i = 0; i < array.length; i++) { let el = array[i] if (modeMap[el] === undefined) { modeMap[el] = 0 } modeMap[el]++ if (modeMap[el] > maxCount) { maxEl = el maxCount = modeMap[el] } } return { action: maxEl, count: maxCount } } monteCarloSampling(probabilities, actionKey) { const actionArray = [] for (let i = 0; i < this.numSamples[actionKey]; i++) { actionArray.push(this.model.selectActionOneHead(probabilities, actionKey)) } return actionArray } convertInputToString(input, actionKey) { if (this.actionSubkeys[actionKey].length === 0) { this.actionSubkeys[actionKey] = Object.keys(input).map((x) => x) } const actionArray = this.actionSubkeys[actionKey].filter(dir => input[dir]) return actionArray.length > 0 ? actionArray.sort().join('-') : undefined } assignSampledInput(actionKey) { const inputs = {} this.actionSubkeys[actionKey].forEach((inputKey) => { inputs[inputKey] = this.currentAction[actionKey].includes(inputKey) }) return inputs } sampleAction(probabilities, actionKey) { if (this.numSamples[actionKey] > 1) { let policyChange = false if (this.previousPolicy[actionKey] !== null) { const similarity = ProbabilisticAgentWrapper.policySimilarity( probabilities[actionKey][0], this.previousPolicy[actionKey] ) policyChange = similarity < this.policySimilarityThreshold } if (this.framesRemaining[actionKey] === 0 || policyChange) { const actionArray = this.monteCarloSampling(probabilities, actionKey) const { action, count } = ProbabilisticAgentWrapper.getMostCommonAction( actionArray.map(input => { return this.convertInputToString(input, actionKey) }) ) this.currentAction[actionKey] = action ? action.split('-') : [] this.framesRemaining[actionKey] = count this.previousPolicy[actionKey] = probabilities[actionKey][0] } else { this.framesRemaining[actionKey] -= 1 } return this.assignSampledInput(actionKey) } else { return this.model.selectActionOneHead(probabilities, actionKey) } } selectionFunction(input) { const probabilities = this.model.getProbabilities(input) let selection = {} this.model.outputGroups.forEach((actionGroup) => { selection = {...selection, ...this.sampleAction(probabilities, actionGroup)} }) return selection } selectAction(input) { return super.selectAction(input, this.selectionFunction) } } module.exports = ProbabilisticAgentWrapper