position-strings
Version:
Lexicographically-ordered position strings for collaborative lists and text
342 lines (323 loc) • 12.2 kB
text/typescript
import { IDs } from "./ids";
import { assert, LastInternal, precond } from "./util";
/**
* A source of lexicographically-ordered "position strings" for
* collaborative lists and text.
*
* In a collaborative list (or text string), you need a way to refer
* to "positions" within that list that:
* 1. Point to a specific list element (or text character).
* 2. Are global (all users agree on them) and immutable (they do not
* change over time).
* 3. Can be sorted.
* 4. Are unique, even if different users concurrently create positions
* at the same place.
*
* `PositionSource` gives you such positions, in the form
* of lexicographically-ordered strings. Specifically, `createBetween`
* returns a new "position string" in between two existing position strings.
*
* These strings have the bonus properties:
* - 5. (Non-Interleaving) If two `PositionSource`s concurrently create a (forward or backward)
* sequence of positions at the same place,
* their sequences will not be interleaved.
* For example, if
* Alice types "Hello" while Bob types "World" at the same place,
* and they each use a `PositionSource` to create a position for each
* character, then
* the resulting order will be "HelloWorld" or "WorldHello", not
* "HWeolrllod".
* - 6. If a `PositionSource` creates positions in a forward (increasing)
* sequence, their lengths as strings will only grow logarithmically,
* not linearly.
*
* Position strings are printable ASCII. Specifically, they
* contain alphanumeric characters, `','`, and `'.'`.
* Also, the special string `PositionSource.LAST` is `'~'`.
*
* Further reading:
* - [Fractional indexing](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/#fractional-indexing),
* a related scheme that satisfies 1-3 but not 4-6.
* - [List CRDTs](https://mattweidner.com/2022/10/21/basic-list-crdt.html)
* and how they map to position strings. `PositionSource` uses an optimized
* variant of that link's string implementation.
* - [Paper about interleaving](https://www.repository.cam.ac.uk/handle/1810/290391)
* in collaborative text editors.
*/
export class PositionSource {
/**
* A string that is less than all positions.
*
* Value: `""`.
*/
static readonly FIRST: string = "";
/**
* A string that is greater than all positions.
*
* Value: `"~"`.
*/
static readonly LAST: string = LastInternal;
/**
* The unique ID for this `PositionSource`.
*/
readonly ID: string;
/**
* Our waypoints' long name: `,${ID}.`.
*/
private readonly longName: string;
/**
* Variant of longName used for a position's first ID: `${ID}.`.
* (Otherwise every position would start with a redundant ','.)
*/
private readonly firstName: string;
/**
* For each waypoint that we created, maps a prefix (see getPrefix)
* for that waypoint to its last (most recent) valueSeq.
* We always store the right-side version (odd valueSeq).
*/
private lastValueSeqs = new Map<string, number>();
/**
* Constructs a new `PositionSource`.
*
* It is okay to share a single `PositionSource` between
* all documents (lists/text strings) in the same JavaScript runtime.
*
* For efficiency (shorter position strings),
* within each JavaScript runtime, you should not use
* more than one `PositionSource` for the same document.
* An exception is if multiple logical users share the same runtime;
* we then recommend one `PositionSource` per user.
*
* @param options.ID A unique ID for this `PositionSource`. Defaults to
* `IDs.random()`.
*
* If provided, `options.ID` must satisfy:
* - It is unique across the entire collaborative application, i.e.,
* all `PositionSource`s whose positions may be compared to ours. This
* includes past `PositionSource`s, even if they correspond to the same
* user/device.
* - It does not contain `','` or `'.'`.
* - The first character is lexicographically less than `'~'` (code point 126).
*
* If `options.ID` contains non-alphanumeric characters, then created
* positions will contain those characters in addition to
* alphanumeric characters, `','`, and `'.'`.
*/
constructor(options?: { ID?: string }) {
if (options?.ID !== undefined) {
IDs.validate(options.ID);
}
this.ID = options?.ID ?? IDs.random();
this.longName = `,${this.ID}.`;
this.firstName = `${this.ID}.`;
}
/**
* Returns a new position between `left` and `right`
* (`left < new < right`).
*
* The new position is unique across the entire collaborative application,
* even in the face of concurrent calls to this method on other
* `PositionSource`s.
*
* @param left Defaults to `PositionSource.FIRST` (insert at the beginning).
*
* @param right Defaults to `PositionSource.LAST` (insert at the end).
*/
createBetween(
left: string = PositionSource.FIRST,
right: string = PositionSource.LAST
): string {
precond(left < right, "left must be less than right:", left, "!<", right);
precond(
right <= PositionSource.LAST,
"right must be less than or equal to LAST:",
right,
"!<=",
PositionSource.LAST
);
const leftFixed = left === PositionSource.FIRST ? null : left;
const rightFixed = right === PositionSource.LAST ? null : right;
let ans: string;
if (
rightFixed !== null &&
(leftFixed === null || rightFixed.startsWith(leftFixed))
) {
// Left child of right. This always appends a waypoint.
const ancestor = leftVersion(rightFixed);
ans = this.appendWaypoint(ancestor);
} else {
// Right child of left.
if (leftFixed === null) {
// ancestor is FIRST.
ans = this.appendWaypoint("");
} else {
// Check if we can reuse left's prefix.
// It needs to be one of ours, and right can't use the same
// prefix (otherwise we would get ans > right by comparing right's
// older valueIndex to our new valueIndex).
const prefix = getPrefix(leftFixed);
const lastValueSeq = this.lastValueSeqs.get(prefix);
if (
lastValueSeq !== undefined &&
!(rightFixed !== null && rightFixed.startsWith(prefix))
) {
// Reuse.
const valueSeq = nextOddValueSeq(lastValueSeq);
ans = prefix + stringifyBase52(valueSeq);
this.lastValueSeqs.set(prefix, valueSeq);
} else {
// Append waypoint.
ans = this.appendWaypoint(leftFixed);
}
}
}
assert(left < ans && ans < right, "Bad position:", left, ans, right);
return ans;
}
/**
* Appends a wayoint to the given ancestor (= prefix adjusted for
* side), returning a unique new position using that waypoint.
*
* lastValueSeqs is also updated as needed for the waypoint.
*/
private appendWaypoint(ancestor: string): string {
let waypointName = ancestor === "" ? this.firstName : this.longName;
// If our ID already appears in ancestor, instead use a short
// name for the waypoint.
// Here we use the uniqueness of ',' and '.' to
// claim that if this.longName (= `,${ID}.`) appears in ancestor, then it
// must actually be from a waypoint that we created.
let existing = ancestor.lastIndexOf(this.longName);
if (ancestor.startsWith(this.firstName)) existing = 0;
if (existing !== -1) {
// Find the index of existing among the long-name
// waypoints, in backwards order. Here we use the fact that
// each longName ends with '.' and that '.' does not appear otherwise.
let index = -1;
for (let i = existing; i < ancestor.length; i++) {
if (ancestor[i] === ".") index++;
}
waypointName = stringifyShortName(index);
}
const prefix = ancestor + waypointName;
const lastValueSeq = this.lastValueSeqs.get(prefix);
// Use next odd (right-side) valueSeq (1 if it's a new waypoint).
const valueSeq =
lastValueSeq === undefined ? 1 : nextOddValueSeq(lastValueSeq);
this.lastValueSeqs.set(prefix, valueSeq);
return prefix + stringifyBase52(valueSeq);
}
}
/**
* Returns position's *prefix*: the string through the last waypoint
* name, or equivalently, without the final valueSeq.
*/
function getPrefix(position: string): string {
// Last waypoint char is the last '.' (for long names) or
// digit (for short names). Note that neither appear in valueSeq,
// which is all letters.
for (let i = position.length - 2; i >= 0; i--) {
const char = position[i];
if (char === "." || ("0" <= char && char <= "9")) {
// i is the last waypoint char, i.e., the end of the prefix.
return position.slice(0, i + 1);
}
}
assert(false, "No last waypoint char found (not a position?)", position);
return "";
}
/**
* Returns the variant of position ending with a "left" marker
* instead of the default "right" marker.
*
* I.e., the ancestor for position's left descendants.
*/
function leftVersion(position: string) {
// We need to subtract one from the (odd) valueSeq, equivalently, from
// its last base52 digit.
const last = parseBase52(position[position.length - 1]);
assert(last % 2 === 1, "Bad valueSeq (not a position?)", last, position);
return position.slice(0, -1) + stringifyBase52(last - 1);
}
/**
* Base 52, except for last digit, which is base 10 using
* digits. That makes it easy to find the end of a short name
* in getPrefix: it ends at the last digit.
*/
function stringifyShortName(n: number): string {
if (n < 10) return String.fromCharCode(48 + n);
else
return (
stringifyBase52(Math.floor(n / 10)) + String.fromCharCode(48 + (n % 10))
);
}
/**
* Base 52 encoding using letters (with "digits" in order by code point).
*/
function stringifyBase52(n: number): string {
if (n === 0) return "A";
const codes: number[] = [];
while (n > 0) {
const digit = n % 52;
codes.unshift((digit >= 26 ? 71 : 65) + digit);
n = Math.floor(n / 52);
}
return String.fromCharCode(...codes);
}
function parseBase52(s: string): number {
let n = 0;
for (let i = 0; i < s.length; i++) {
const code = s.charCodeAt(i);
const digit = code - (code >= 97 ? 71 : 65);
n = 52 * n + digit;
}
return n;
}
const log52 = Math.log(52);
/**
* Returns the next odd valueSeq in the special sequence.
* This is equivalent to mapping n to its valueIndex, adding 2,
* then mapping back.
*
* The sequence has the following properties:
* 1. Each number is a nonnegative integer (however, not all
* nonnegative integers are enumerated).
* 2. The numbers' base-52 representations are enumerated in
* lexicographic order, with no prefixes (i.e., no string
* representation is a prefix of another).
* 3. The n-th enumerated number has O(log(n)) base-52 digits.
*
* Properties (2) and (3) are analogous to normal counting, except
* that we order by the (base-52) lexicographic order instead of the
* usual order by magnitude. It is also the case that
* the numbers are in order by magnitude, although we do not
* use this property.
*
* The specific sequence is as follows:
* - Start with 0.
* - Enumerate 26^1 numbers (A, B, ..., Z).
* - Add 1, multiply by 52, then enumerate 26^2 numbers
* (aA, aB, ..., mz).
* - Add 1, multiply by 52, then enumerate 26^3 numbers
* (nAA, nAB, ..., tZz).
* - Repeat this pattern indefinitely, enumerating
* 26^d d-digit numbers for each d >= 1. Imagining a decimal place
* in front of each number, each d consumes 2^(-d) of the unit interval,
* so we never "reach 1" (overflow to d+1 digits when
* we meant to use d digits).
*
* I believe this is related to
* [Elias gamma coding](https://en.wikipedia.org/wiki/Elias_gamma_coding).
*/
function nextOddValueSeq(n: number): number {
const d = n === 0 ? 1 : Math.floor(Math.log(n) / log52) + 1;
// You can calculate that the last d-digit number is 52^d - 26^d - 1.
if (n === Math.pow(52, d) - Math.pow(26, d) - 1) {
// First step is a new length: n -> (n + 1) * 52.
// Second step is n -> n + 1.
return (n + 1) * 52 + 1;
} else {
// n -> n + 1 twice.
return n + 2;
}
}