@atproto/api
Version:
Client library for atproto and Bluesky
415 lines (356 loc) • 12.1 kB
text/typescript
/*
= Rich Text Manipulation
When we sanitize rich text, we have to update the entity indices as the
text is modified. This can be modeled as inserts() and deletes() of the
rich text string. The possible scenarios are outlined below, along with
their expected behaviors.
NOTE: Slices are start inclusive, end exclusive
== richTextInsert()
Target string:
0 1 2 3 4 5 6 7 8 910 // string indices
h e l l o w o r l d // string value
^-------^ // target slice {start: 2, end: 7}
Scenarios:
A: ^ // insert "test" at 0
B: ^ // insert "test" at 4
C: ^ // insert "test" at 8
A = before -> move both by num added
B = inner -> move end by num added
C = after -> noop
Results:
A: 0 1 2 3 4 5 6 7 8 910 // string indices
t e s t h e l l o w // string value
^-------^ // target slice {start: 6, end: 11}
B: 0 1 2 3 4 5 6 7 8 910 // string indices
h e l l t e s t o w // string value
^---------------^ // target slice {start: 2, end: 11}
C: 0 1 2 3 4 5 6 7 8 910 // string indices
h e l l o w o t e s // string value
^-------^ // target slice {start: 2, end: 7}
== richTextDelete()
Target string:
0 1 2 3 4 5 6 7 8 910 // string indices
h e l l o w o r l d // string value
^-------^ // target slice {start: 2, end: 7}
Scenarios:
A: ^---------------^ // remove slice {start: 0, end: 9}
B: ^-----^ // remove slice {start: 7, end: 11}
C: ^-----------^ // remove slice {start: 4, end: 11}
D: ^-^ // remove slice {start: 3, end: 5}
E: ^-----^ // remove slice {start: 1, end: 5}
F: ^-^ // remove slice {start: 0, end: 2}
A = entirely outer -> delete slice
B = entirely after -> noop
C = partially after -> move end to remove-start
D = entirely inner -> move end by num removed
E = partially before -> move start to remove-start index, move end by num removed
F = entirely before -> move both by num removed
Results:
A: 0 1 2 3 4 5 6 7 8 910 // string indices
l d // string value
// target slice (deleted)
B: 0 1 2 3 4 5 6 7 8 910 // string indices
h e l l o w // string value
^-------^ // target slice {start: 2, end: 7}
C: 0 1 2 3 4 5 6 7 8 910 // string indices
h e l l // string value
^-^ // target slice {start: 2, end: 4}
D: 0 1 2 3 4 5 6 7 8 910 // string indices
h e l w o r l d // string value
^---^ // target slice {start: 2, end: 5}
E: 0 1 2 3 4 5 6 7 8 910 // string indices
h w o r l d // string value
^-^ // target slice {start: 1, end: 3}
F: 0 1 2 3 4 5 6 7 8 910 // string indices
l l o w o r l d // string value
^-------^ // target slice {start: 0, end: 5}
*/
import { AppBskyFeedPost, AppBskyRichtextFacet, AtpBaseClient } from '../client'
import { detectFacets } from './detection'
import { sanitizeRichText } from './sanitization'
import { UnicodeString } from './unicode'
export type Facet = AppBskyRichtextFacet.Main
export type FacetLink = AppBskyRichtextFacet.Link
export type FacetMention = AppBskyRichtextFacet.Mention
export type FacetTag = AppBskyRichtextFacet.Tag
export type Entity = AppBskyFeedPost.Entity
export interface RichTextProps {
text: string
facets?: Facet[]
/**
* @deprecated Use facets instead
*/
entities?: Entity[]
}
export interface RichTextOpts {
cleanNewlines?: boolean
}
export class RichTextSegment {
constructor(
public text: string,
public facet?: Facet,
) {}
get link(): FacetLink | undefined {
return this.facet?.features.find(AppBskyRichtextFacet.isLink)
}
isLink() {
return !!this.link
}
get mention(): FacetMention | undefined {
return this.facet?.features.find(AppBskyRichtextFacet.isMention)
}
isMention() {
return !!this.mention
}
get tag(): FacetTag | undefined {
return this.facet?.features.find(AppBskyRichtextFacet.isTag)
}
isTag() {
return !!this.tag
}
}
export class RichText {
unicodeText: UnicodeString
facets?: Facet[]
constructor(props: RichTextProps, opts?: RichTextOpts) {
this.unicodeText = new UnicodeString(props.text)
this.facets = props.facets
if (!this.facets?.length && props.entities?.length) {
this.facets = entitiesToFacets(this.unicodeText, props.entities)
}
if (this.facets) {
this.facets = this.facets.filter(facetFilter).sort(facetSort)
}
if (opts?.cleanNewlines) {
sanitizeRichText(this, { cleanNewlines: true }).copyInto(this)
}
}
get text() {
return this.unicodeText.toString()
}
get length() {
return this.unicodeText.length
}
get graphemeLength() {
return this.unicodeText.graphemeLength
}
clone() {
return new RichText({
text: this.unicodeText.utf16,
facets: cloneDeep(this.facets),
})
}
copyInto(target: RichText) {
target.unicodeText = this.unicodeText
target.facets = cloneDeep(this.facets)
}
*segments(): Generator<RichTextSegment, void, void> {
const facets = this.facets || []
if (!facets.length) {
yield new RichTextSegment(this.unicodeText.utf16)
return
}
let textCursor = 0
let facetCursor = 0
do {
const currFacet = facets[facetCursor]
if (textCursor < currFacet.index.byteStart) {
yield new RichTextSegment(
this.unicodeText.slice(textCursor, currFacet.index.byteStart),
)
} else if (textCursor > currFacet.index.byteStart) {
facetCursor++
continue
}
if (currFacet.index.byteStart < currFacet.index.byteEnd) {
const subtext = this.unicodeText.slice(
currFacet.index.byteStart,
currFacet.index.byteEnd,
)
if (!subtext.trim()) {
// dont empty string entities
yield new RichTextSegment(subtext)
} else {
yield new RichTextSegment(subtext, currFacet)
}
}
textCursor = currFacet.index.byteEnd
facetCursor++
} while (facetCursor < facets.length)
if (textCursor < this.unicodeText.length) {
yield new RichTextSegment(
this.unicodeText.slice(textCursor, this.unicodeText.length),
)
}
}
insert(insertIndex: number, insertText: string) {
this.unicodeText = new UnicodeString(
this.unicodeText.slice(0, insertIndex) +
insertText +
this.unicodeText.slice(insertIndex),
)
if (!this.facets?.length) {
return this
}
const numCharsAdded = insertText.length
for (const ent of this.facets) {
// see comment at top of file for labels of each scenario
// scenario A (before)
if (insertIndex <= ent.index.byteStart) {
// move both by num added
ent.index.byteStart += numCharsAdded
ent.index.byteEnd += numCharsAdded
}
// scenario B (inner)
else if (
insertIndex >= ent.index.byteStart &&
insertIndex < ent.index.byteEnd
) {
// move end by num added
ent.index.byteEnd += numCharsAdded
}
// scenario C (after)
// noop
}
return this
}
delete(removeStartIndex: number, removeEndIndex: number) {
this.unicodeText = new UnicodeString(
this.unicodeText.slice(0, removeStartIndex) +
this.unicodeText.slice(removeEndIndex),
)
if (!this.facets?.length) {
return this
}
const numCharsRemoved = removeEndIndex - removeStartIndex
for (const ent of this.facets) {
// see comment at top of file for labels of each scenario
// scenario A (entirely outer)
if (
removeStartIndex <= ent.index.byteStart &&
removeEndIndex >= ent.index.byteEnd
) {
// delete slice (will get removed in final pass)
ent.index.byteStart = 0
ent.index.byteEnd = 0
}
// scenario B (entirely after)
else if (removeStartIndex > ent.index.byteEnd) {
// noop
}
// scenario C (partially after)
else if (
removeStartIndex > ent.index.byteStart &&
removeStartIndex <= ent.index.byteEnd &&
removeEndIndex > ent.index.byteEnd
) {
// move end to remove start
ent.index.byteEnd = removeStartIndex
}
// scenario D (entirely inner)
else if (
removeStartIndex >= ent.index.byteStart &&
removeEndIndex <= ent.index.byteEnd
) {
// move end by num removed
ent.index.byteEnd -= numCharsRemoved
}
// scenario E (partially before)
else if (
removeStartIndex < ent.index.byteStart &&
removeEndIndex >= ent.index.byteStart &&
removeEndIndex <= ent.index.byteEnd
) {
// move start to remove-start index, move end by num removed
ent.index.byteStart = removeStartIndex
ent.index.byteEnd -= numCharsRemoved
}
// scenario F (entirely before)
else if (removeEndIndex < ent.index.byteStart) {
// move both by num removed
ent.index.byteStart -= numCharsRemoved
ent.index.byteEnd -= numCharsRemoved
}
}
// filter out any facets that were made irrelevant
this.facets = this.facets.filter(
(ent) => ent.index.byteStart < ent.index.byteEnd,
)
return this
}
/**
* Detects facets such as links and mentions
* Note: Overwrites the existing facets with auto-detected facets
*/
async detectFacets(agent: AtpBaseClient) {
this.facets = detectFacets(this.unicodeText)
if (this.facets) {
const promises: Promise<void>[] = []
for (const facet of this.facets) {
for (const feature of facet.features) {
if (AppBskyRichtextFacet.isMention(feature)) {
promises.push(
agent.com.atproto.identity
.resolveHandle({ handle: feature.did })
.then((res) => res?.data.did)
.catch((_) => undefined)
.then((did) => {
feature.did = did || ''
}),
)
}
}
}
await Promise.allSettled(promises)
this.facets.sort(facetSort)
}
}
/**
* Detects facets such as links and mentions but does not resolve them
* Will produce invalid facets! For instance, mentions will not have their DIDs set.
* Note: Overwrites the existing facets with auto-detected facets
*/
detectFacetsWithoutResolution() {
this.facets = detectFacets(this.unicodeText)
if (this.facets) {
this.facets.sort(facetSort)
}
}
}
const facetSort = (a: Facet, b: Facet) => a.index.byteStart - b.index.byteStart
const facetFilter = (facet: Facet) =>
// discard negative-length facets. zero-length facets are valid
facet.index.byteStart <= facet.index.byteEnd
function entitiesToFacets(text: UnicodeString, entities: Entity[]): Facet[] {
const facets: Facet[] = []
for (const ent of entities) {
if (ent.type === 'link') {
facets.push({
$type: 'app.bsky.richtext.facet',
index: {
byteStart: text.utf16IndexToUtf8Index(ent.index.start),
byteEnd: text.utf16IndexToUtf8Index(ent.index.end),
},
features: [{ $type: 'app.bsky.richtext.facet#link', uri: ent.value }],
})
} else if (ent.type === 'mention') {
facets.push({
$type: 'app.bsky.richtext.facet',
index: {
byteStart: text.utf16IndexToUtf8Index(ent.index.start),
byteEnd: text.utf16IndexToUtf8Index(ent.index.end),
},
features: [
{ $type: 'app.bsky.richtext.facet#mention', did: ent.value },
],
})
}
}
return facets
}
function cloneDeep<T>(v: T): T {
if (typeof v === 'undefined') {
return v
}
return JSON.parse(JSON.stringify(v))
}