@atproto/api
Version:
Client library for atproto and Bluesky
389 lines (371 loc) β’ 11.6 kB
text/typescript
import { AtpAgent, RichText, RichTextSegment } from '../src'
import {
isLink,
isMention,
isTag,
} from '../src/client/types/app/bsky/richtext/facet'
describe('detectFacets', () => {
const agent = new AtpAgent({ service: 'http://localhost' })
// Mock handle resolution
agent.com.atproto.identity.resolveHandle = async (params) => ({
success: true,
headers: {},
data: { did: `did:fake:${params?.handle}` },
})
const inputs = [
'no mention',
'@handle.com middle end',
'start @handle.com end',
'start middle @handle.com',
'@handle.com @handle.com @handle.com',
'@full123-chars.test',
'not@right',
'@handle.com!@#$chars',
'@handle.com\n@handle.com',
'parenthetical (@handle.com)',
'π¨βπ©βπ§βπ§ @handle.com π¨βπ©βπ§βπ§',
'start https://middle.com end',
'start https://middle.com/foo/bar end',
'start https://middle.com/foo/bar?baz=bux end',
'start https://middle.com/foo/bar?baz=bux#hash end',
'https://start.com/foo/bar?baz=bux#hash middle end',
'start middle https://end.com/foo/bar?baz=bux#hash',
'https://newline1.com\nhttps://newline2.com',
'π¨βπ©βπ§βπ§ https://middle.com π¨βπ©βπ§βπ§',
'start middle.com end',
'start middle.com/foo/bar end',
'start middle.com/foo/bar?baz=bux end',
'start middle.com/foo/bar?baz=bux#hash end',
'start.com/foo/bar?baz=bux#hash middle end',
'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com',
'a example.com/index.php php link',
'a trailing bsky.app: colon',
'not.. a..url ..here',
'e.g.',
'something-cool.jpg',
'website.com.jpg',
'e.g./foo',
'website.com.jpg/foo',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/ ',
'https://foo.com https://bar.com/whatever https://baz.com',
'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
'parenthentical (https://foo.com)',
'except for https://foo.com/thing_(cool)',
]
const outputs: string[][][] = [
[['no mention']],
[['@handle.com', 'did:fake:handle.com'], [' middle end']],
[['start '], ['@handle.com', 'did:fake:handle.com'], [' end']],
[['start middle '], ['@handle.com', 'did:fake:handle.com']],
[
['@handle.com', 'did:fake:handle.com'],
[' '],
['@handle.com', 'did:fake:handle.com'],
[' '],
['@handle.com', 'did:fake:handle.com'],
],
[['@full123-chars.test', 'did:fake:full123-chars.test']],
[['not@right']],
[['@handle.com', 'did:fake:handle.com'], ['!@#$chars']],
[
['@handle.com', 'did:fake:handle.com'],
['\n'],
['@handle.com', 'did:fake:handle.com'],
],
[['parenthetical ('], ['@handle.com', 'did:fake:handle.com'], [')']],
[['π¨βπ©βπ§βπ§ '], ['@handle.com', 'did:fake:handle.com'], [' π¨βπ©βπ§βπ§']],
[['start '], ['https://middle.com', 'https://middle.com'], [' end']],
[
['start '],
['https://middle.com/foo/bar', 'https://middle.com/foo/bar'],
[' end'],
],
[
['start '],
[
'https://middle.com/foo/bar?baz=bux',
'https://middle.com/foo/bar?baz=bux',
],
[' end'],
],
[
['start '],
[
'https://middle.com/foo/bar?baz=bux#hash',
'https://middle.com/foo/bar?baz=bux#hash',
],
[' end'],
],
[
[
'https://start.com/foo/bar?baz=bux#hash',
'https://start.com/foo/bar?baz=bux#hash',
],
[' middle end'],
],
[
['start middle '],
[
'https://end.com/foo/bar?baz=bux#hash',
'https://end.com/foo/bar?baz=bux#hash',
],
],
[
['https://newline1.com', 'https://newline1.com'],
['\n'],
['https://newline2.com', 'https://newline2.com'],
],
[['π¨βπ©βπ§βπ§ '], ['https://middle.com', 'https://middle.com'], [' π¨βπ©βπ§βπ§']],
[['start '], ['middle.com', 'https://middle.com'], [' end']],
[
['start '],
['middle.com/foo/bar', 'https://middle.com/foo/bar'],
[' end'],
],
[
['start '],
['middle.com/foo/bar?baz=bux', 'https://middle.com/foo/bar?baz=bux'],
[' end'],
],
[
['start '],
[
'middle.com/foo/bar?baz=bux#hash',
'https://middle.com/foo/bar?baz=bux#hash',
],
[' end'],
],
[
[
'start.com/foo/bar?baz=bux#hash',
'https://start.com/foo/bar?baz=bux#hash',
],
[' middle end'],
],
[
['start middle '],
['end.com/foo/bar?baz=bux#hash', 'https://end.com/foo/bar?baz=bux#hash'],
],
[
['newline1.com', 'https://newline1.com'],
['\n'],
['newline2.com', 'https://newline2.com'],
],
[
['a '],
['example.com/index.php', 'https://example.com/index.php'],
[' php link'],
],
[['a trailing '], ['bsky.app', 'https://bsky.app'], [': colon']],
[['not.. a..url ..here']],
[['e.g.']],
[['something-cool.jpg']],
[['website.com.jpg']],
[['e.g./foo']],
[['website.com.jpg/foo']],
[
['Classic article '],
[
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
],
],
[
['Classic article '],
[
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
],
[' '],
],
[
['https://foo.com', 'https://foo.com'],
[' '],
['https://bar.com/whatever', 'https://bar.com/whatever'],
[' '],
['https://baz.com', 'https://baz.com'],
],
[
['punctuation '],
['https://foo.com', 'https://foo.com'],
[', '],
['https://bar.com/whatever', 'https://bar.com/whatever'],
['; '],
['https://baz.com', 'https://baz.com'],
['.'],
],
[['parenthentical ('], ['https://foo.com', 'https://foo.com'], [')']],
[
['except for '],
['https://foo.com/thing_(cool)', 'https://foo.com/thing_(cool)'],
],
]
it('correctly handles a set of text inputs', async () => {
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i]
const rt = new RichText({ text: input })
await rt.detectFacets(agent)
expect(Array.from(rt.segments(), segmentToOutput)).toEqual(outputs[i])
}
})
describe('correctly detects tags inline', () => {
const inputs: [
string,
string[],
{ byteStart: number; byteEnd: number }[],
][] = [
['#a', ['a'], [{ byteStart: 0, byteEnd: 2 }]],
[
'#a #b',
['a', 'b'],
[
{ byteStart: 0, byteEnd: 2 },
{ byteStart: 3, byteEnd: 5 },
],
],
['#1', [], []],
['#1a', ['1a'], [{ byteStart: 0, byteEnd: 3 }]],
['#tag', ['tag'], [{ byteStart: 0, byteEnd: 4 }]],
['body #tag', ['tag'], [{ byteStart: 5, byteEnd: 9 }]],
['#tag body', ['tag'], [{ byteStart: 0, byteEnd: 4 }]],
['body #tag body', ['tag'], [{ byteStart: 5, byteEnd: 9 }]],
['body #1', [], []],
['body #1a', ['1a'], [{ byteStart: 5, byteEnd: 8 }]],
['body #a1', ['a1'], [{ byteStart: 5, byteEnd: 8 }]],
['#', [], []],
['#?', [], []],
['text #', [], []],
['text # text', [], []],
[
'body #thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
['thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'],
[{ byteStart: 5, byteEnd: 70 }],
],
[
'body #thisisa65characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
[],
[],
],
[
'body #thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!',
['thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'],
[{ byteStart: 5, byteEnd: 70 }],
],
[
'its a #double#rainbow',
['double#rainbow'],
[{ byteStart: 6, byteEnd: 21 }],
],
['##hashash', ['#hashash'], [{ byteStart: 0, byteEnd: 9 }]],
['##', [], []],
['some #n0n3s@n5e!', ['n0n3s@n5e'], [{ byteStart: 5, byteEnd: 15 }]],
[
'works #with,punctuation',
['with,punctuation'],
[{ byteStart: 6, byteEnd: 23 }],
],
[
'strips trailing #punctuation, #like. #this!',
['punctuation', 'like', 'this'],
[
{ byteStart: 16, byteEnd: 28 },
{ byteStart: 30, byteEnd: 35 },
{ byteStart: 37, byteEnd: 42 },
],
],
[
'strips #multi_trailing___...',
['multi_trailing'],
[{ byteStart: 7, byteEnd: 22 }],
],
[
'works with #π¦ emoji, and #butterπ¦fly',
['π¦', 'butterπ¦fly'],
[
{ byteStart: 11, byteEnd: 16 },
{ byteStart: 28, byteEnd: 42 },
],
],
[
'#same #same #but #diff',
['same', 'same', 'but', 'diff'],
[
{ byteStart: 0, byteEnd: 5 },
{ byteStart: 6, byteEnd: 11 },
{ byteStart: 12, byteEnd: 16 },
{ byteStart: 17, byteEnd: 22 },
],
],
['this #οΈβ£tag should not be a tag', [], []],
[
'this ##οΈβ£tag should be a tag',
['#οΈβ£tag'],
[
{
byteStart: 5,
byteEnd: 16,
},
],
],
[
'this #t\nag should be a tag',
['t'],
[
{
byteStart: 5,
byteEnd: 7,
},
],
],
['no match (\\u200B): #β', [], []],
['no match (\\u200Ba): #βa', [], []],
['match (a\\u200Bb): #aβb', ['a'], [{ byteStart: 18, byteEnd: 20 }]],
['match (ab\\u200B): #abβ', ['ab'], [{ byteStart: 18, byteEnd: 21 }]],
['no match (\\u20e2tag): #β’tag', [], []],
['no match (a\\u20e2b): #aβ’b', ['a'], [{ byteStart: 21, byteEnd: 23 }]],
[
'match full width number sign (tag): οΌtag',
['tag'],
[{ byteStart: 36, byteEnd: 42 }],
],
[
'match full width number sign (tag): οΌ#οΈβ£tag',
['#οΈβ£tag'],
[{ byteStart: 36, byteEnd: 49 }],
],
['no match 1?: #1?', [], []],
]
it.each(inputs)('%s', async (input, tags, indices) => {
const rt = new RichText({ text: input })
await rt.detectFacets(agent)
const detectedTags: string[] = []
const detectedIndices: { byteStart: number; byteEnd: number }[] = []
for (const { facet } of rt.segments()) {
if (!facet) continue
for (const feature of facet.features) {
if (isTag(feature)) {
detectedTags.push(feature.tag)
}
}
detectedIndices.push(facet.index)
}
expect(detectedTags).toEqual(tags)
expect(detectedIndices).toEqual(indices)
})
})
})
function segmentToOutput(segment: RichTextSegment): string[] {
if (segment.facet) {
return [
segment.text,
segment.facet?.features.map((f) => {
if (isMention(f)) return f.did
if (isLink(f)) return f.uri
return undefined
})?.[0] || '',
]
}
return [segment.text]
}