diffusion
Version:
Diffusion JavaScript client
709 lines (708 loc) • 30.6 kB
JavaScript
"use strict";
/**
* @module diffusion.datatypes
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.JSONDeltaImpl = void 0;
var errors_1 = require("./../../../errors/errors");
var json_pointer_1 = require("./../../data/json/json-pointer");
var json_pointer_map_1 = require("./../../data/json/json-pointer-map");
var span_parser_1 = require("./../../data/json/span-parser");
/**
* A consumer for the span parser that handles any match and only holds on to
* the last pointer
*/
var LastResult = /** @class */ (function () {
/**
* Create a LastResult consumer
*
* @param splitStructures a pointer map that will be filled with any split
* structures found in the matching data
*/
function LastResult(splitStructures) {
this.splitStructures = splitStructures;
}
/**
* Accept a JSON pointer found
*
* @param pointer the JSON pointer
* @param start the starting position in the buffer
* @param length the number of bytes in the buffer
*/
LastResult.prototype.accept = function (pointer, start, length) {
this.last = pointer;
this.lastStart = start;
this.lastLength = length;
};
/**
* Check if a last pointer has been found
*
* @return `true` if {@link accept} has been called with a JSON pointer
*/
LastResult.prototype.foundLast = function () {
/* tslint:disable-next-line:strict-type-predicates */
return this.last !== undefined;
};
/**
* Pass the last pointer to the delegate consumer
*
* @param delegate the delegate consumer
*/
LastResult.prototype.consumeLast = function (delegate) {
delegate.accept(this.last, this.lastStart, this.lastLength);
};
/**
* Signal the end of a split structure
*
* @param pointer the JSON pointer
* @param count the number of tokens in the split structure
* @param start the starting position in the buffer
* @param length the number of bytes in the buffer
*/
LastResult.prototype.splitStructureEnd = function (pointer, count, start, length) {
this.splitStructures.put(pointer, {
start: start,
length: length,
elements: count
});
};
return LastResult;
}());
/**
* Check if a {@link BufferSlice} contains the same bytes as the original {@link Bytes}
*
* @param original the original bytes
* @param other the buffer to compare to
* @param start the offset of the first byte to compare in the buffer
* @param length the number of bytes to compare
* @return `true` if the slice of the `other` buffer is equal to the
* bytes contained in `original`
*/
function isPartOf(original, other, start, length) {
return original.equalBytes(other.$buffer, other.$offset + start, length);
}
/**
* Incrementally parse the binary delta to calculate a structural delta.
*
* The result reports all differences, but is not necessarily minimal. In
* particular there are occasional 'false positive' REMOVE/INSERT pairs.
*
* ## The Algorithm
*
* We re-constitute the match and insert visitor callbacks into a sequence
* of delete, insert, and match edits. The binary delta format guarantees
* there will not be consecutive callbacks of the same type, and that match
* callbacks are presented in order with no overlaps. Delete and match
* edits will be contiguous with no gap between the end of one edit and
* the start of the next.
*
* The processing uses two SpanParsers, one for the old token stream and one
* for the new token streams. We move forward along each parser driven by
* the binary delta edits, skipping old values covered by a match, adding a
* REMOVE change for each old value covered by a delete, and adding an
* INSERT change for each new value covered by an insert.
*
* The SpanParsers calculate the appropriate JSON pointers to return for a
* span. A span finishes at the end of the first token found after the
* target offset, with these exceptions:
*
* # If a split is found in a field name, the span continues until the
* first pointer is found in the value, potentially consuming one or more
* structure start tokens. The reporting of a change that affects a field
* does not distinguish between the field name and its value, except if the
* value is a structure and the change only affects part of the structure.
* # End structure tokens are eagerly consumed. This has two benefits.
* First, it collapses empty structures to the appropriate parent pointer.
* Second, the closing tokens of non-empty structures are associated with
* the last pointer, which simplifies boundary detection.
*
* The implementation relies on the SpanParser not consuming start structure
* tokens eagerly at a split unless instructed to do so (see
* {@link SpanParser.spanToNext}). This provides a tighter alignment
* between the detected binary differences and the token parsers, allowing a
* simpler pairing of tokens between the two streams. I'm not certain there
* aren't additional edge cases introduced by the eager consumption of
* structure tokens for field names.
*
* The two token streams are treated symmetrically. Each SpanParser is only
* moved in a forward direction; there is no backtracking. We maintain the
* current byte position in each binary value as {@link DeltaVisitor.oldOffset} and
* {@link DeltaVisitor.newOffset}. After each edit is processed,
* `oldParser.nextByte() >= oldOffset` and
* `newParser.nextByte() >= newOffset`.
*
* The fun happens at the edges, specifically where a token covers one or
* more binary edits. We call such a misaligned edge a 'split'.
*
* ### Splits
*
* The appropriate processing of a split depends on the type of edit in
* which it is found. For all edit types, we detect and process
* _trailing edge_ splits; i.e. splits detected at the end of a span
* because the end of the last parsed token is after the end edge. It turns
* out we must also consider _leading edge_ splits for match edits.
*
* #### insert edits
*
* We insert everything in the span. If a trailing edge split is detected,
* we do one of two things.
* * If the old stream parser is at a split, there is nothing to do.
* The split will have been considered and processed for an earlier edit.
* * If the old stream parser is at `oldOffset` (i.e. is not at a
* split token), we consume and add a REMOVE change for the next pointer in
* the old stream. The old stream is otherwise unaffected by the insert
* edit, so `oldOffset` is the start of the next delete or match
* edit. Consuming the token might move it further into other delete or
* match edits. In all cases, correct handling of the insert split requires
* adding a REMOVE change for the first pointer found.
*
* Further split detection is required to detect differences that only
* affect the CBOR structure. If the parser structure depth is different at
* the end of the span than it was at start, the last token is treated in
* the same manner as a trailing edge split. Otherwise comparing 'b' with
* `['a', 'b', 'c']` incorrectly generates
* `INSERT /0,INSERT /2` rather than
* `INSERT /0,REMOVE /1,INSERT /1,INSERT /2`.
*
* #### delete edits
*
* The processing of splits for delete edits is the inverse of that for
* insert edits. (Swap INSERT and REMOVE, old stream and new stream).
*
* #### match edits
*
* When processing match edits we move both parsers. Either or both can
* have a trailing edge split. If both are split, the corresponding pointers
* are used for the REMOVE/INSERT pair. If neither are split, or if only one
* is split, we do nothing. As far as I can tell, there can only be a single
* split if the match ends with a start structure token (`{` or
* `[`), so differences in the structure content will be detected
* by later edits.
*
* I haven't fully convinced myself that it is correct to do nothing in all
* single split cases, but it appears to work. I've experimented with a
* variant where a single split matches the next pointer eagerly consumed
* from the opposite stream, similar to the approach taken for
* insert/delete. This produces less satisfactory matching between
* structures. E.g. for `[]` with `['a' 'b']` it produces
* `REMOVE /0,INSERT /0/0,INSERT/0/1` where the current
* implementation produces `INSERT/0/0,INSERT/0/1`. It may be
* worth revisiting this in conjunction with the post-processing fix to
* collapse complete spans into the parent.
*
* Consideration of parser depth is unnecessary for match trailing edge
* splits. A difference in structure depth must correspond to a binary
* difference covering structure delimiters, which will be handled by insert
* or delete processing.
*
* There is a further complication to deal with for matches. A token in one
* stream can match the end and start of two tokens in the other stream. We
* deal with that by checking for a leading edge split. If the new stream
* parser is at the expected offset, and the old stream parser isn't, we've
* found a token spanning across two matches in the old stream. The previous
* insert edit will have inserted the first pointer for the new stream. We
* add the next found in the new stream in the match span (if any). Similar
* processing is performed if the old stream parser is at the expected
* offset, but the new stream parser is not.
*
* ### Heuristic clean up
*
* The basic approach of using the binary delta to identify splits and
* matching the splits to the appropriate tokens produces a reasonable but
* imperfect result. Further processing is performed to improve the output.
*
* False positives (redundant REMOVE/INSERT pairs) occur reasonably
* frequently in the raw results. These are identified and removed by
* comparing each potential REMOVE with the previous INSERT, and each
* potential INSERT with the previous REMOVE. If the associated values are
* equal, and the pointers are compatible, both changes are dropped.
*
* The raw results might contain pointers to the entire contents of a
* structure. These are replaced with a single pointer to the structure.
*/
var DeltaVisitor = /** @class */ (function () {
/**
* Construct a DeltaVisitor
*
* @param oldValue the old value
* @param newValue the new value
* @param inserted the parts of the second JSON value not found in the
* first JSON value. This map should be empty. It will be
* filled by the DeltaVisitor.
* @param removed the parts of the first JSON value not found in the
* second JSON value. This map should be empty. It will be
* filled by the DeltaVisitor.
* @param partOf a function to return a {@link JSON} object from a part
* of a {@link BufferSlice}.
* @param copyPartOf a function to return a {@link JSON} object from a copy
* of part of a {@link BufferSlice}.
*/
function DeltaVisitor(oldValue, newValue, inserted, removed, partOf, copyPartOf) {
var _this = this;
/**
* The split structures that are part of the old JSON value not found in the new JSON value
*/
this.removedSplitStructures = new json_pointer_map_1.JSONPointerMap();
/**
* The split structures that are part of the new JSON value not found in the old JSON value
*/
this.insertedSplitStructures = new json_pointer_map_1.JSONPointerMap();
/**
* The end of the last match edit. After each edit is processed,
* `oldParser.nextByte() >= oldOffset`.
*/
this.oldOffset = 0;
/**
* The newParser position where the next match or insert will start,
* which is the cumulative length of the match and insert edits. May be
* less than or greater than oldOffset. After each edit is processed,
* `newParser.nextByte() >= newOffset`.
*/
this.newOffset = 0;
/**
* A JSON pointer that is part of the old value but hasn't yet been found
* in the new value.
*/
this.pendingRemove = null;
/**
* A JSON pointer that is part of the new value but hasn't yet been found
* in the old value.
*/
this.pendingInsert = null;
/**
* A consumer for the span parser that inserts values
*/
this.inserter = {
/**
* Accept a JSON pointer found in the new value
*
* @param pointer the JSON pointer of the inserted value
* @param start the starting position in the {@link newValue} buffer
* @param length the number of bytes in the {@link newValue} buffer
*/
accept: function (pointer, start, length) {
if (_this.pendingRemove !== null &&
_this.pendingRemove.equalIgnoringIndexes(pointer) &&
isPartOf(_this.pendingRemoveValue, _this.newValue, start, length)) {
// value to insert is equal to the pending removed value
// and their pointers are sufficiently similar. Treat as a
// false positive and discard both.
_this.pendingRemove = null;
_this.pendingRemoveValue = null;
}
else {
// share the underlying bytes to avoid copying at the
// expense of pinning a potentially large array in memory. We
// expect the user to be using the structural delta to
// decide whether to take action, then throwing everything
// away if not. If it turns out that users often keep the
// structural delta but not the value it might be worth
// providing API control.
_this.addInsert(pointer, _this.partOf(_this.newValue, start, length));
// flush pending remove. Subsequent inserts are less likely
// to match it.
_this.addRemove(null, null);
}
},
/**
* Signal the end of a split structure
*
* @param pointer the JSON pointer of the inserted value
* @param count the number of tokens in the split structure
* @param start the starting position in the {@link newValue} buffer
* @param length the number of bytes in the {@link newValue} buffer
*/
splitStructureEnd: function (pointer, count, start, length) {
_this.insertedSplitStructures.put(pointer, {
start: start,
length: length,
elements: count
});
}
};
/**
* A consumer for the span parser that removes values
*/
this.remover = {
/**
* Accept a JSON pointer found in the old value
*
* @param pointer the JSON pointer of the removed value
* @param start the starting position in the {@link oldValue} buffer
* @param length the number of bytes in the {@link oldValue} buffer
*/
accept: function (pointer, start, length) {
if (_this.pendingInsert !== null &&
_this.pendingInsert.equalIgnoringIndexes(pointer) &&
isPartOf(_this.pendingInsertValue, _this.oldValue, start, length)) {
// value to remove is equal to the pending inserted value
// and their pointers are sufficiently similar. Treat as a
// false positive and discard both.
_this.pendingInsert = null;
_this.pendingInsertValue = null;
}
else {
// in contrast to the insert consumer, we copy byte ranges
// to avoid pinning the entire old byte array in memory.
_this.addRemove(pointer, _this.copyPartOf(_this.oldValue, start, length));
// flush pending insert. Subsequent removes are less likely
// to match it.
_this.addInsert(null, null);
}
},
/**
* Signal the end of a split structure
*
* @param pointer the JSON pointer of the inserted value
* @param count the number of tokens in the split structure
* @param start the starting position in the {@link newValue} buffer
* @param length the number of bytes in the {@link newValue} buffer
*/
splitStructureEnd: function (pointer, count, start, length) {
_this.removedSplitStructures.put(pointer, {
start: start,
length: length,
elements: count
});
}
};
this.oldParser = new span_parser_1.SpanParser(oldValue);
this.newParser = new span_parser_1.SpanParser(newValue);
this.oldValue = oldValue;
this.newValue = newValue;
this.inserted = inserted;
this.removed = removed;
this.partOf = partOf;
this.copyPartOf = copyPartOf;
}
/**
* Called by the {@link BinaryDelta} when a match of binary data has been
* found.
*
* @param start the starting position in the old buffer
* @param length the number of matching bytes
* @return `true`
* @throws an {@link InvalidDataError} if either buffer contains invalid binary data
*/
DeltaVisitor.prototype.match = function (start, length) {
this.handleDelete(this.oldOffset, start - this.oldOffset);
this.handleMatch(start, length);
return true;
};
/**
* Handle deleted binary data
*
* @param start the starting position of the deleted data in the old buffer
* @param length the number of deleted bytes
* @throws an {@link InvalidDataError} if either buffer contains invalid binary data
*/
DeltaVisitor.prototype.handleDelete = function (start, length) {
this.checkInvariants();
var end = start + length;
if (this.oldParser.nextByte() < end &&
(this.oldParser.spanTo(end, this.remover) !== 0 || this.oldParser.nextByte() > end)) {
// the end is split, If newParser is not split, insert the
// next pointer found. This will process at most one pointer
// because newParser.nextByte() >= newOffset, so we can use
// the stateless insert consumer.
this.newParser.spanToNext(this.newOffset + 1, this.inserter);
}
};
/**
* Handle a match of binary data
*
* @param start the starting position in the old buffer
* @param length the number of matching bytes
* @throws an {@link InvalidDataError} if either buffer contains invalid binary data
*/
DeltaVisitor.prototype.handleMatch = function (start, length) {
this.checkInvariants();
var newStart = this.newOffset;
var end = start + length;
this.newOffset += length;
this.oldOffset = end;
var oldNextByte = this.oldParser.nextByte();
var newNextByte = this.newParser.nextByte();
if (newNextByte > newStart && oldNextByte === start) {
// new stream split affects two tokens in old stream. Remove
// the second. This will remove exactly one pointer.
this.oldParser.spanToNext(start + 1, this.remover);
}
else if (oldNextByte > start && newNextByte === newStart) {
// old stream split affects two tokens in new stream. Insert
// the second. This will insert exactly one pointer.
this.newParser.spanToNext(newStart + 1, this.inserter);
}
var lastOld = new LastResult(this.removedSplitStructures);
var lastNew = new LastResult(this.insertedSplitStructures);
this.oldParser.spanTo(end, lastOld);
this.newParser.spanTo(this.newOffset, lastNew);
var oldSplit = lastOld.foundLast() && this.oldParser.nextByte() > end;
var newSplit = lastNew.foundLast() && this.newParser.nextByte() > this.newOffset;
if (oldSplit && newSplit) {
lastOld.consumeLast(this.remover);
lastNew.consumeLast(this.inserter);
}
};
/**
* Called by the {@link BinaryDelta} when a inserted binary data has been
* found.
*
* @param bytes the inserted data
* @return `true`
*/
DeltaVisitor.prototype.insert = function (bytes) {
this.checkInvariants();
this.newOffset += bytes.length;
if (this.newParser.nextByte() < this.newOffset &&
(this.newParser.spanTo(this.newOffset, this.inserter) !== 0
|| this.newParser.nextByte() > this.newOffset)) {
// the end is split. Iff oldParser is not split, remove the
// next pointer found. This will process at most one pointer
// because/ oldParser.nextByte() >= oldOffset, so we
// can use the stateless remove consumer.
this.oldParser.spanToNext(this.oldOffset + 1, this.remover);
}
return true;
};
/**
* Called by the {@link BinaryDelta} when the end of the buffer has been found.
*
* @throws an {@link InvalidDataError} if either buffer contains invalid binary data
*/
DeltaVisitor.prototype.end = function () {
this.handleDelete(this.oldOffset, this.oldValue.$length - this.oldOffset);
this.addInsert(null, null);
this.addRemove(null, null);
this.replaceFullRemovedStructures();
this.replaceFullInsertedStructures();
};
/**
* Add an JSON pointer that is pending an insert. Any previous pending
* insert is confirmed and placed into the {@link inserted} map.
*
* @param nextPointer the next pointer pending an insert
* @param nextValue the data associated with the insert
*/
DeltaVisitor.prototype.addInsert = function (nextPointer, nextValue) {
if (this.pendingInsert !== null) {
this.inserted.put(this.pendingInsert, this.pendingInsertValue);
}
this.pendingInsert = nextPointer;
this.pendingInsertValue = nextValue;
};
/**
* Add an JSON pointer that is pending a remove. Any previous pending
* remove is confirmed and placed into the {@link removed} map.
*
* @param nextPointer the next pointer pending a remove
* @param nextValue the data associated with the remove
*/
DeltaVisitor.prototype.addRemove = function (nextPointer, nextValue) {
if (this.pendingRemove !== null) {
this.removed.put(this.pendingRemove, this.pendingRemoveValue);
}
this.pendingRemove = nextPointer;
this.pendingRemoveValue = nextValue;
};
/**
* No change
*/
DeltaVisitor.prototype.noChange = function () {
// no-op
};
/**
* Check if an invalid state has occurred
*
* @throws an {@link InvalidDataError} if either buffer contains invalid binary data
*/
DeltaVisitor.prototype.checkInvariants = function () {
if (this.oldParser.nextByte() < this.oldOffset ||
this.newParser.nextByte() < this.newOffset) {
throw new errors_1.InvalidDataError('Invalid binary delta');
}
};
/**
* Heuristic post-processing: replace split structures for which removed
* includes every entry with a single pointer.
*/
DeltaVisitor.prototype.replaceFullRemovedStructures = function () {
var i = this.removedSplitStructures.postOrder();
while (i.hasNext()) {
var s = i.next();
var split = s.value;
var entry = this.removed.getEntry(s.pointer);
if (entry !== null && entry.numberOfChildren() === split.elements) {
entry.setValue(this.copyPartOf(this.oldValue, split.start, split.length));
entry.removeDescendants();
}
}
};
/**
* Heuristic post-processing: replace split structures for which
* inserted includes every entry with a single pointer.
*/
DeltaVisitor.prototype.replaceFullInsertedStructures = function () {
var i = this.insertedSplitStructures.postOrder();
while (i.hasNext()) {
var s = i.next();
var split = s.value;
var entry = this.inserted.getEntry(s.pointer);
if (entry !== null && entry.numberOfChildren() === split.elements) {
entry.setValue(this.partOf(this.newValue, split.start, split.length));
entry.removeDescendants();
}
}
};
return DeltaVisitor;
}());
/**
* Implementation of the change map
*
* @inheritdoc
*/
var ChangeMapImpl = /** @class */ (function () {
/**
* Create a ChangeMapImpl
*
* @param parts the JSON pointers contained in the change
*/
function ChangeMapImpl(parts) {
/**
* The array of key-value pairs contained in the change map
*/
this.entries = [];
this.parts = parts;
var i = parts.iterator();
while (i.hasNext()) {
var n = i.next();
this.entries.push({
key: n.pointer.toString(),
value: n.value.get()
});
}
this.length = this.entries.length;
}
/**
* @inheritdoc
*/
ChangeMapImpl.prototype.get = function (key) {
return this.parts.get(json_pointer_1.JSONPointer.parse(key)).get();
};
/**
* @inheritdoc
*/
ChangeMapImpl.prototype.entrySet = function () {
return this.entries;
};
/**
* @inheritdoc
*/
ChangeMapImpl.prototype.containsKey = function (key) {
return this.parts.contains(json_pointer_1.JSONPointer.parse(key));
};
/**
* @inheritdoc
*/
ChangeMapImpl.prototype.descendants = function (pointer) {
return new ChangeMapImpl(this.parts.descendants(json_pointer_1.JSONPointer.parse(pointer)));
};
/**
* @inheritdoc
*/
ChangeMapImpl.prototype.intersection = function (pointer) {
return new ChangeMapImpl(this.parts.intersection(json_pointer_1.JSONPointer.parse(pointer)));
};
return ChangeMapImpl;
}());
/**
* Implementation of {@link JSONDelta}.
*
* @inheritdoc
*/
var JSONDeltaImpl = /** @class */ (function () {
/**
* Create a new JSONDeltaImpl
*
* @param factory constructor function for creating {@link JSON} objects
* @param original the original JSON data
* @param newValue the modified JSON data
* @param binaryDelta the binary delta between the original and the modified
* data. If no binary delta is provided the JSON delta
* will assume that all old JSON data is removed and
* replaced by the new JSON data.
* @throws an {@link InvalidDataError} if a binaryDelta was supplied but it could not be parsed
*/
function JSONDeltaImpl(factory, original, newValue, binaryDelta) {
/**
* The parts of the second JSON value not found in the first JSON value.
*/
this.insertedMap = new json_pointer_map_1.JSONPointerMap();
/**
* The parts of the first JSON value not found in the second JSON value.
*/
this.removedMap = new json_pointer_map_1.JSONPointerMap();
this.factory = factory;
if (binaryDelta !== undefined) {
binaryDelta.visit(new DeltaVisitor(original, newValue, this.insertedMap, this.removedMap, this.partOf.bind(this), this.copyPartOf.bind(this)));
}
else {
this.insertedMap.put(json_pointer_1.ROOT, original);
this.removedMap.put(json_pointer_1.ROOT, newValue);
}
}
/**
* Return a {@link JSON} object from a part of a {@link BufferSlice}.
*
* @param value the buffer slice to construct the JSON from
* @param start the offset in the buffer slice
* @param length the number of bytes to use from the buffer slice
* @return a JSON object constructed from the part of the buffer slice
*/
JSONDeltaImpl.prototype.partOf = function (value, start, length) {
return new this.factory(value.$buffer, value.$offset + start, length);
};
/**
* Return a {@link JSON} object from a part of a {@link BufferSlice}. The
* buffer is copied before being passed to the JSON constructor.
*
* @param value the buffer slice to construct the JSON from
* @param start the offset in the buffer slice
* @param length the number of bytes to use from the buffer slice
* @return a JSON object constructed from the part of the buffer slice
*/
JSONDeltaImpl.prototype.copyPartOf = function (value, start, length) {
var offsetStart = value.$offset + start;
var buffer = new Uint8Array(length);
buffer.set(value.$buffer.subarray(offsetStart, offsetStart + length));
return new this.factory(buffer, 0, length);
};
/**
* @inheritdoc
*/
JSONDeltaImpl.prototype.removed = function () {
return new ChangeMapImpl(this.removedMap);
};
/**
* @inheritdoc
*/
JSONDeltaImpl.prototype.inserted = function () {
return new ChangeMapImpl(this.insertedMap);
};
/**
* @inheritdoc
*/
JSONDeltaImpl.prototype.hasChanges = function () {
return this.removedMap.size !== 0 || this.insertedMap.size !== 0;
};
/**
* Convert the JSON delta to a string
*
* @returns a string representation of the JSON delta
*/
JSONDeltaImpl.prototype.toString = function () {
return ['REMOVE ', this.removedMap, ' INSERT ', this.insertedMap].join('');
};
return JSONDeltaImpl;
}());
exports.JSONDeltaImpl = JSONDeltaImpl;