|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const getRandomInt = (min, max) => { |
|
min = Math.ceil(min) |
|
max = Math.floor(max) |
|
return Math.floor(Math.random() * (max - min + 1)) + min |
|
} |
|
|
|
|
|
|
|
|
|
class ReplyBuffer { |
|
|
|
|
|
|
|
|
|
|
|
|
|
constructor(limit = 500, onDiscard = () => {}) { |
|
this._limit = limit |
|
this._onDiscard = onDiscard |
|
|
|
this._buffer = new Array(limit).fill() |
|
this._pool = [] |
|
|
|
this.size = 0 |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
add(transition) { |
|
let { id, priority = 1 } = transition |
|
if (id === undefined || id < 0 || priority < 1) |
|
throw new Error('Invalid arguments') |
|
|
|
id = id % this._limit |
|
|
|
if (this._buffer[id]) { |
|
const start = this._pool.indexOf(id) |
|
let deleteCount = 0 |
|
while (this._pool[start + deleteCount] == id) |
|
deleteCount++ |
|
|
|
this._pool.splice(start, deleteCount) |
|
|
|
this._onDiscard(this._buffer[id]) |
|
} else |
|
this.size++ |
|
|
|
while (priority--) |
|
this._pool.push(id) |
|
|
|
this._buffer[id] = transition |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sample(n = 1) { |
|
if (this.size < n) |
|
return [] |
|
|
|
const |
|
sampleIndices = new Set(), |
|
samples = [] |
|
|
|
let counter = n |
|
while (counter--) |
|
while (sampleIndices.size < this.size) { |
|
const randomIndex = this._pool[getRandomInt(0, this._pool.length - 1)] |
|
if (sampleIndices.has(randomIndex)) |
|
continue |
|
|
|
sampleIndices.add(randomIndex) |
|
|
|
const { id, state, action, reward } = this._buffer[randomIndex] |
|
const nextId = id + 1 |
|
const next = this._buffer[nextId % this._limit] |
|
|
|
if (next && next.id === nextId) { |
|
samples.push({ state, action, reward, nextState: next.state}) |
|
break |
|
} |
|
} |
|
|
|
return samples.length == n ? samples : [] |
|
} |
|
} |
|
|
|
|
|
(() => { |
|
return |
|
|
|
const rb = new ReplyBuffer(5) |
|
rb.add({id: 0, state: 0}) |
|
rb.add({id: 1, state: 1}) |
|
rb.add({id: 2, state: 2, priority: 3}) |
|
|
|
console.assert(rb.size === 3) |
|
console.assert(rb._pool.length === 5) |
|
console.assert(rb._buffer[0].id === 0) |
|
|
|
rb.add({id: 2, state: 2}) |
|
rb.add({id: 4, state: 4, priority: 2}) |
|
|
|
console.assert(rb.size === 4) |
|
console.assert(rb._pool.length === 5) |
|
console.assert(JSON.stringify(rb._pool) === '[0,1,2,4,4]') |
|
|
|
rb.add({id: 5, state: 0, priority: 2}) |
|
|
|
console.assert(rb.size === 4) |
|
console.assert(rb._pool.length === 6) |
|
console.assert(rb._buffer.length === 5) |
|
console.assert(rb._buffer[0].id === 5) |
|
console.assert(JSON.stringify(rb._pool) === '[1,2,4,4,0,0]') |
|
|
|
console.assert(rb.sample(999).length === 0, 'Too many samples') |
|
|
|
let samples1 = rb.sample(2) |
|
console.assert(samples1.length === 2, 'Only two samples possible') |
|
console.assert(samples1[0].nextState === (samples1[0].state + 1) % 5, 'Next state should be valid', samples1) |
|
|
|
rb.add({id: 506, state: 506, priority: 3}) |
|
|
|
let samples2 = rb.sample(1) |
|
console.assert(samples2.length === 1, 'Only one suitable sample with valid next state') |
|
console.assert(samples2[0].state === 4, 'Sample with id:4') |
|
console.assert(rb._buffer[1].id === 506, '506 % 5 = 1') |
|
|
|
console.assert(rb.sample(2).length === 0, |
|
'Can not sample 2 transitions since next state is available only for one state') |
|
})() |
|
|