UNPKG

diffusion

Version:

Diffusion JavaScript client

709 lines (708 loc) 30.6 kB
"use strict"; /** * @module diffusion.datatypes */ Object.defineProperty(exports, "__esModule", { value: true }); exports.JSONDeltaImpl = void 0; var errors_1 = require("./../../../errors/errors"); var json_pointer_1 = require("./../../data/json/json-pointer"); var json_pointer_map_1 = require("./../../data/json/json-pointer-map"); var span_parser_1 = require("./../../data/json/span-parser"); /** * A consumer for the span parser that handles any match and only holds on to * the last pointer */ var LastResult = /** @class */ (function () { /** * Create a LastResult consumer * * @param splitStructures a pointer map that will be filled with any split * structures found in the matching data */ function LastResult(splitStructures) { this.splitStructures = splitStructures; } /** * Accept a JSON pointer found * * @param pointer the JSON pointer * @param start the starting position in the buffer * @param length the number of bytes in the buffer */ LastResult.prototype.accept = function (pointer, start, length) { this.last = pointer; this.lastStart = start; this.lastLength = length; }; /** * Check if a last pointer has been found * * @return `true` if {@link accept} has been called with a JSON pointer */ LastResult.prototype.foundLast = function () { /* tslint:disable-next-line:strict-type-predicates */ return this.last !== undefined; }; /** * Pass the last pointer to the delegate consumer * * @param delegate the delegate consumer */ LastResult.prototype.consumeLast = function (delegate) { delegate.accept(this.last, this.lastStart, this.lastLength); }; /** * Signal the end of a split structure * * @param pointer the JSON pointer * @param count the number of tokens in the split structure * @param start the starting position in the buffer * @param length the number of bytes in the buffer */ LastResult.prototype.splitStructureEnd = function (pointer, count, start, length) { this.splitStructures.put(pointer, { start: start, length: length, elements: count }); }; return LastResult; }()); /** * Check if a {@link BufferSlice} contains the same bytes as the original {@link Bytes} * * @param original the original bytes * @param other the buffer to compare to * @param start the offset of the first byte to compare in the buffer * @param length the number of bytes to compare * @return `true` if the slice of the `other` buffer is equal to the * bytes contained in `original` */ function isPartOf(original, other, start, length) { return original.equalBytes(other.$buffer, other.$offset + start, length); } /** * Incrementally parse the binary delta to calculate a structural delta. * * The result reports all differences, but is not necessarily minimal. In * particular there are occasional 'false positive' REMOVE/INSERT pairs. * * ## The Algorithm * * We re-constitute the match and insert visitor callbacks into a sequence * of delete, insert, and match edits. The binary delta format guarantees * there will not be consecutive callbacks of the same type, and that match * callbacks are presented in order with no overlaps. Delete and match * edits will be contiguous with no gap between the end of one edit and * the start of the next. * * The processing uses two SpanParsers, one for the old token stream and one * for the new token streams. We move forward along each parser driven by * the binary delta edits, skipping old values covered by a match, adding a * REMOVE change for each old value covered by a delete, and adding an * INSERT change for each new value covered by an insert. * * The SpanParsers calculate the appropriate JSON pointers to return for a * span. A span finishes at the end of the first token found after the * target offset, with these exceptions: * * # If a split is found in a field name, the span continues until the * first pointer is found in the value, potentially consuming one or more * structure start tokens. The reporting of a change that affects a field * does not distinguish between the field name and its value, except if the * value is a structure and the change only affects part of the structure. * # End structure tokens are eagerly consumed. This has two benefits. * First, it collapses empty structures to the appropriate parent pointer. * Second, the closing tokens of non-empty structures are associated with * the last pointer, which simplifies boundary detection. * * The implementation relies on the SpanParser not consuming start structure * tokens eagerly at a split unless instructed to do so (see * {@link SpanParser.spanToNext}). This provides a tighter alignment * between the detected binary differences and the token parsers, allowing a * simpler pairing of tokens between the two streams. I'm not certain there * aren't additional edge cases introduced by the eager consumption of * structure tokens for field names. * * The two token streams are treated symmetrically. Each SpanParser is only * moved in a forward direction; there is no backtracking. We maintain the * current byte position in each binary value as {@link DeltaVisitor.oldOffset} and * {@link DeltaVisitor.newOffset}. After each edit is processed, * `oldParser.nextByte() >= oldOffset` and * `newParser.nextByte() >= newOffset`. * * The fun happens at the edges, specifically where a token covers one or * more binary edits. We call such a misaligned edge a 'split'. * * ### Splits * * The appropriate processing of a split depends on the type of edit in * which it is found. For all edit types, we detect and process * _trailing edge_ splits; i.e. splits detected at the end of a span * because the end of the last parsed token is after the end edge. It turns * out we must also consider _leading edge_ splits for match edits. * * #### insert edits * * We insert everything in the span. If a trailing edge split is detected, * we do one of two things. * * If the old stream parser is at a split, there is nothing to do. * The split will have been considered and processed for an earlier edit. * * If the old stream parser is at `oldOffset` (i.e. is not at a * split token), we consume and add a REMOVE change for the next pointer in * the old stream. The old stream is otherwise unaffected by the insert * edit, so `oldOffset` is the start of the next delete or match * edit. Consuming the token might move it further into other delete or * match edits. In all cases, correct handling of the insert split requires * adding a REMOVE change for the first pointer found. * * Further split detection is required to detect differences that only * affect the CBOR structure. If the parser structure depth is different at * the end of the span than it was at start, the last token is treated in * the same manner as a trailing edge split. Otherwise comparing 'b' with * `['a', 'b', 'c']` incorrectly generates * `INSERT /0,INSERT /2` rather than * `INSERT /0,REMOVE /1,INSERT /1,INSERT /2`. * * #### delete edits * * The processing of splits for delete edits is the inverse of that for * insert edits. (Swap INSERT and REMOVE, old stream and new stream). * * #### match edits * * When processing match edits we move both parsers. Either or both can * have a trailing edge split. If both are split, the corresponding pointers * are used for the REMOVE/INSERT pair. If neither are split, or if only one * is split, we do nothing. As far as I can tell, there can only be a single * split if the match ends with a start structure token (`{` or * `[`), so differences in the structure content will be detected * by later edits. * * I haven't fully convinced myself that it is correct to do nothing in all * single split cases, but it appears to work. I've experimented with a * variant where a single split matches the next pointer eagerly consumed * from the opposite stream, similar to the approach taken for * insert/delete. This produces less satisfactory matching between * structures. E.g. for `[]` with `['a' 'b']` it produces * `REMOVE /0,INSERT /0/0,INSERT/0/1` where the current * implementation produces `INSERT/0/0,INSERT/0/1`. It may be * worth revisiting this in conjunction with the post-processing fix to * collapse complete spans into the parent. * * Consideration of parser depth is unnecessary for match trailing edge * splits. A difference in structure depth must correspond to a binary * difference covering structure delimiters, which will be handled by insert * or delete processing. * * There is a further complication to deal with for matches. A token in one * stream can match the end and start of two tokens in the other stream. We * deal with that by checking for a leading edge split. If the new stream * parser is at the expected offset, and the old stream parser isn't, we've * found a token spanning across two matches in the old stream. The previous * insert edit will have inserted the first pointer for the new stream. We * add the next found in the new stream in the match span (if any). Similar * processing is performed if the old stream parser is at the expected * offset, but the new stream parser is not. * * ### Heuristic clean up * * The basic approach of using the binary delta to identify splits and * matching the splits to the appropriate tokens produces a reasonable but * imperfect result. Further processing is performed to improve the output. * * False positives (redundant REMOVE/INSERT pairs) occur reasonably * frequently in the raw results. These are identified and removed by * comparing each potential REMOVE with the previous INSERT, and each * potential INSERT with the previous REMOVE. If the associated values are * equal, and the pointers are compatible, both changes are dropped. * * The raw results might contain pointers to the entire contents of a * structure. These are replaced with a single pointer to the structure. */ var DeltaVisitor = /** @class */ (function () { /** * Construct a DeltaVisitor * * @param oldValue the old value * @param newValue the new value * @param inserted the parts of the second JSON value not found in the * first JSON value. This map should be empty. It will be * filled by the DeltaVisitor. * @param removed the parts of the first JSON value not found in the * second JSON value. This map should be empty. It will be * filled by the DeltaVisitor. * @param partOf a function to return a {@link JSON} object from a part * of a {@link BufferSlice}. * @param copyPartOf a function to return a {@link JSON} object from a copy * of part of a {@link BufferSlice}. */ function DeltaVisitor(oldValue, newValue, inserted, removed, partOf, copyPartOf) { var _this = this; /** * The split structures that are part of the old JSON value not found in the new JSON value */ this.removedSplitStructures = new json_pointer_map_1.JSONPointerMap(); /** * The split structures that are part of the new JSON value not found in the old JSON value */ this.insertedSplitStructures = new json_pointer_map_1.JSONPointerMap(); /** * The end of the last match edit. After each edit is processed, * `oldParser.nextByte() >= oldOffset`. */ this.oldOffset = 0; /** * The newParser position where the next match or insert will start, * which is the cumulative length of the match and insert edits. May be * less than or greater than oldOffset. After each edit is processed, * `newParser.nextByte() >= newOffset`. */ this.newOffset = 0; /** * A JSON pointer that is part of the old value but hasn't yet been found * in the new value. */ this.pendingRemove = null; /** * A JSON pointer that is part of the new value but hasn't yet been found * in the old value. */ this.pendingInsert = null; /** * A consumer for the span parser that inserts values */ this.inserter = { /** * Accept a JSON pointer found in the new value * * @param pointer the JSON pointer of the inserted value * @param start the starting position in the {@link newValue} buffer * @param length the number of bytes in the {@link newValue} buffer */ accept: function (pointer, start, length) { if (_this.pendingRemove !== null && _this.pendingRemove.equalIgnoringIndexes(pointer) && isPartOf(_this.pendingRemoveValue, _this.newValue, start, length)) { // value to insert is equal to the pending removed value // and their pointers are sufficiently similar. Treat as a // false positive and discard both. _this.pendingRemove = null; _this.pendingRemoveValue = null; } else { // share the underlying bytes to avoid copying at the // expense of pinning a potentially large array in memory. We // expect the user to be using the structural delta to // decide whether to take action, then throwing everything // away if not. If it turns out that users often keep the // structural delta but not the value it might be worth // providing API control. _this.addInsert(pointer, _this.partOf(_this.newValue, start, length)); // flush pending remove. Subsequent inserts are less likely // to match it. _this.addRemove(null, null); } }, /** * Signal the end of a split structure * * @param pointer the JSON pointer of the inserted value * @param count the number of tokens in the split structure * @param start the starting position in the {@link newValue} buffer * @param length the number of bytes in the {@link newValue} buffer */ splitStructureEnd: function (pointer, count, start, length) { _this.insertedSplitStructures.put(pointer, { start: start, length: length, elements: count }); } }; /** * A consumer for the span parser that removes values */ this.remover = { /** * Accept a JSON pointer found in the old value * * @param pointer the JSON pointer of the removed value * @param start the starting position in the {@link oldValue} buffer * @param length the number of bytes in the {@link oldValue} buffer */ accept: function (pointer, start, length) { if (_this.pendingInsert !== null && _this.pendingInsert.equalIgnoringIndexes(pointer) && isPartOf(_this.pendingInsertValue, _this.oldValue, start, length)) { // value to remove is equal to the pending inserted value // and their pointers are sufficiently similar. Treat as a // false positive and discard both. _this.pendingInsert = null; _this.pendingInsertValue = null; } else { // in contrast to the insert consumer, we copy byte ranges // to avoid pinning the entire old byte array in memory. _this.addRemove(pointer, _this.copyPartOf(_this.oldValue, start, length)); // flush pending insert. Subsequent removes are less likely // to match it. _this.addInsert(null, null); } }, /** * Signal the end of a split structure * * @param pointer the JSON pointer of the inserted value * @param count the number of tokens in the split structure * @param start the starting position in the {@link newValue} buffer * @param length the number of bytes in the {@link newValue} buffer */ splitStructureEnd: function (pointer, count, start, length) { _this.removedSplitStructures.put(pointer, { start: start, length: length, elements: count }); } }; this.oldParser = new span_parser_1.SpanParser(oldValue); this.newParser = new span_parser_1.SpanParser(newValue); this.oldValue = oldValue; this.newValue = newValue; this.inserted = inserted; this.removed = removed; this.partOf = partOf; this.copyPartOf = copyPartOf; } /** * Called by the {@link BinaryDelta} when a match of binary data has been * found. * * @param start the starting position in the old buffer * @param length the number of matching bytes * @return `true` * @throws an {@link InvalidDataError} if either buffer contains invalid binary data */ DeltaVisitor.prototype.match = function (start, length) { this.handleDelete(this.oldOffset, start - this.oldOffset); this.handleMatch(start, length); return true; }; /** * Handle deleted binary data * * @param start the starting position of the deleted data in the old buffer * @param length the number of deleted bytes * @throws an {@link InvalidDataError} if either buffer contains invalid binary data */ DeltaVisitor.prototype.handleDelete = function (start, length) { this.checkInvariants(); var end = start + length; if (this.oldParser.nextByte() < end && (this.oldParser.spanTo(end, this.remover) !== 0 || this.oldParser.nextByte() > end)) { // the end is split, If newParser is not split, insert the // next pointer found. This will process at most one pointer // because newParser.nextByte() >= newOffset, so we can use // the stateless insert consumer. this.newParser.spanToNext(this.newOffset + 1, this.inserter); } }; /** * Handle a match of binary data * * @param start the starting position in the old buffer * @param length the number of matching bytes * @throws an {@link InvalidDataError} if either buffer contains invalid binary data */ DeltaVisitor.prototype.handleMatch = function (start, length) { this.checkInvariants(); var newStart = this.newOffset; var end = start + length; this.newOffset += length; this.oldOffset = end; var oldNextByte = this.oldParser.nextByte(); var newNextByte = this.newParser.nextByte(); if (newNextByte > newStart && oldNextByte === start) { // new stream split affects two tokens in old stream. Remove // the second. This will remove exactly one pointer. this.oldParser.spanToNext(start + 1, this.remover); } else if (oldNextByte > start && newNextByte === newStart) { // old stream split affects two tokens in new stream. Insert // the second. This will insert exactly one pointer. this.newParser.spanToNext(newStart + 1, this.inserter); } var lastOld = new LastResult(this.removedSplitStructures); var lastNew = new LastResult(this.insertedSplitStructures); this.oldParser.spanTo(end, lastOld); this.newParser.spanTo(this.newOffset, lastNew); var oldSplit = lastOld.foundLast() && this.oldParser.nextByte() > end; var newSplit = lastNew.foundLast() && this.newParser.nextByte() > this.newOffset; if (oldSplit && newSplit) { lastOld.consumeLast(this.remover); lastNew.consumeLast(this.inserter); } }; /** * Called by the {@link BinaryDelta} when a inserted binary data has been * found. * * @param bytes the inserted data * @return `true` */ DeltaVisitor.prototype.insert = function (bytes) { this.checkInvariants(); this.newOffset += bytes.length; if (this.newParser.nextByte() < this.newOffset && (this.newParser.spanTo(this.newOffset, this.inserter) !== 0 || this.newParser.nextByte() > this.newOffset)) { // the end is split. Iff oldParser is not split, remove the // next pointer found. This will process at most one pointer // because/ oldParser.nextByte() >= oldOffset, so we // can use the stateless remove consumer. this.oldParser.spanToNext(this.oldOffset + 1, this.remover); } return true; }; /** * Called by the {@link BinaryDelta} when the end of the buffer has been found. * * @throws an {@link InvalidDataError} if either buffer contains invalid binary data */ DeltaVisitor.prototype.end = function () { this.handleDelete(this.oldOffset, this.oldValue.$length - this.oldOffset); this.addInsert(null, null); this.addRemove(null, null); this.replaceFullRemovedStructures(); this.replaceFullInsertedStructures(); }; /** * Add an JSON pointer that is pending an insert. Any previous pending * insert is confirmed and placed into the {@link inserted} map. * * @param nextPointer the next pointer pending an insert * @param nextValue the data associated with the insert */ DeltaVisitor.prototype.addInsert = function (nextPointer, nextValue) { if (this.pendingInsert !== null) { this.inserted.put(this.pendingInsert, this.pendingInsertValue); } this.pendingInsert = nextPointer; this.pendingInsertValue = nextValue; }; /** * Add an JSON pointer that is pending a remove. Any previous pending * remove is confirmed and placed into the {@link removed} map. * * @param nextPointer the next pointer pending a remove * @param nextValue the data associated with the remove */ DeltaVisitor.prototype.addRemove = function (nextPointer, nextValue) { if (this.pendingRemove !== null) { this.removed.put(this.pendingRemove, this.pendingRemoveValue); } this.pendingRemove = nextPointer; this.pendingRemoveValue = nextValue; }; /** * No change */ DeltaVisitor.prototype.noChange = function () { // no-op }; /** * Check if an invalid state has occurred * * @throws an {@link InvalidDataError} if either buffer contains invalid binary data */ DeltaVisitor.prototype.checkInvariants = function () { if (this.oldParser.nextByte() < this.oldOffset || this.newParser.nextByte() < this.newOffset) { throw new errors_1.InvalidDataError('Invalid binary delta'); } }; /** * Heuristic post-processing: replace split structures for which removed * includes every entry with a single pointer. */ DeltaVisitor.prototype.replaceFullRemovedStructures = function () { var i = this.removedSplitStructures.postOrder(); while (i.hasNext()) { var s = i.next(); var split = s.value; var entry = this.removed.getEntry(s.pointer); if (entry !== null && entry.numberOfChildren() === split.elements) { entry.setValue(this.copyPartOf(this.oldValue, split.start, split.length)); entry.removeDescendants(); } } }; /** * Heuristic post-processing: replace split structures for which * inserted includes every entry with a single pointer. */ DeltaVisitor.prototype.replaceFullInsertedStructures = function () { var i = this.insertedSplitStructures.postOrder(); while (i.hasNext()) { var s = i.next(); var split = s.value; var entry = this.inserted.getEntry(s.pointer); if (entry !== null && entry.numberOfChildren() === split.elements) { entry.setValue(this.partOf(this.newValue, split.start, split.length)); entry.removeDescendants(); } } }; return DeltaVisitor; }()); /** * Implementation of the change map * * @inheritdoc */ var ChangeMapImpl = /** @class */ (function () { /** * Create a ChangeMapImpl * * @param parts the JSON pointers contained in the change */ function ChangeMapImpl(parts) { /** * The array of key-value pairs contained in the change map */ this.entries = []; this.parts = parts; var i = parts.iterator(); while (i.hasNext()) { var n = i.next(); this.entries.push({ key: n.pointer.toString(), value: n.value.get() }); } this.length = this.entries.length; } /** * @inheritdoc */ ChangeMapImpl.prototype.get = function (key) { return this.parts.get(json_pointer_1.JSONPointer.parse(key)).get(); }; /** * @inheritdoc */ ChangeMapImpl.prototype.entrySet = function () { return this.entries; }; /** * @inheritdoc */ ChangeMapImpl.prototype.containsKey = function (key) { return this.parts.contains(json_pointer_1.JSONPointer.parse(key)); }; /** * @inheritdoc */ ChangeMapImpl.prototype.descendants = function (pointer) { return new ChangeMapImpl(this.parts.descendants(json_pointer_1.JSONPointer.parse(pointer))); }; /** * @inheritdoc */ ChangeMapImpl.prototype.intersection = function (pointer) { return new ChangeMapImpl(this.parts.intersection(json_pointer_1.JSONPointer.parse(pointer))); }; return ChangeMapImpl; }()); /** * Implementation of {@link JSONDelta}. * * @inheritdoc */ var JSONDeltaImpl = /** @class */ (function () { /** * Create a new JSONDeltaImpl * * @param factory constructor function for creating {@link JSON} objects * @param original the original JSON data * @param newValue the modified JSON data * @param binaryDelta the binary delta between the original and the modified * data. If no binary delta is provided the JSON delta * will assume that all old JSON data is removed and * replaced by the new JSON data. * @throws an {@link InvalidDataError} if a binaryDelta was supplied but it could not be parsed */ function JSONDeltaImpl(factory, original, newValue, binaryDelta) { /** * The parts of the second JSON value not found in the first JSON value. */ this.insertedMap = new json_pointer_map_1.JSONPointerMap(); /** * The parts of the first JSON value not found in the second JSON value. */ this.removedMap = new json_pointer_map_1.JSONPointerMap(); this.factory = factory; if (binaryDelta !== undefined) { binaryDelta.visit(new DeltaVisitor(original, newValue, this.insertedMap, this.removedMap, this.partOf.bind(this), this.copyPartOf.bind(this))); } else { this.insertedMap.put(json_pointer_1.ROOT, original); this.removedMap.put(json_pointer_1.ROOT, newValue); } } /** * Return a {@link JSON} object from a part of a {@link BufferSlice}. * * @param value the buffer slice to construct the JSON from * @param start the offset in the buffer slice * @param length the number of bytes to use from the buffer slice * @return a JSON object constructed from the part of the buffer slice */ JSONDeltaImpl.prototype.partOf = function (value, start, length) { return new this.factory(value.$buffer, value.$offset + start, length); }; /** * Return a {@link JSON} object from a part of a {@link BufferSlice}. The * buffer is copied before being passed to the JSON constructor. * * @param value the buffer slice to construct the JSON from * @param start the offset in the buffer slice * @param length the number of bytes to use from the buffer slice * @return a JSON object constructed from the part of the buffer slice */ JSONDeltaImpl.prototype.copyPartOf = function (value, start, length) { var offsetStart = value.$offset + start; var buffer = new Uint8Array(length); buffer.set(value.$buffer.subarray(offsetStart, offsetStart + length)); return new this.factory(buffer, 0, length); }; /** * @inheritdoc */ JSONDeltaImpl.prototype.removed = function () { return new ChangeMapImpl(this.removedMap); }; /** * @inheritdoc */ JSONDeltaImpl.prototype.inserted = function () { return new ChangeMapImpl(this.insertedMap); }; /** * @inheritdoc */ JSONDeltaImpl.prototype.hasChanges = function () { return this.removedMap.size !== 0 || this.insertedMap.size !== 0; }; /** * Convert the JSON delta to a string * * @returns a string representation of the JSON delta */ JSONDeltaImpl.prototype.toString = function () { return ['REMOVE ', this.removedMap, ' INSERT ', this.insertedMap].join(''); }; return JSONDeltaImpl; }()); exports.JSONDeltaImpl = JSONDeltaImpl;