@atproto/api
Version:
Client library for atproto and Bluesky
118 lines (108 loc) • 3.03 kB
text/typescript
import TLDs from 'tlds'
import { AppBskyRichtextFacet } from '../client'
import { UnicodeString } from './unicode'
import {
MENTION_REGEX,
TAG_REGEX,
TRAILING_PUNCTUATION_REGEX,
URL_REGEX,
} from './util'
export type Facet = AppBskyRichtextFacet.Main
export function detectFacets(text: UnicodeString): Facet[] | undefined {
let match
const facets: Facet[] = []
{
// mentions
const re = MENTION_REGEX
while ((match = re.exec(text.utf16))) {
if (!isValidDomain(match[3]) && !match[3].endsWith('.test')) {
continue // probably not a handle
}
const start = text.utf16.indexOf(match[3], match.index) - 1
facets.push({
$type: 'app.bsky.richtext.facet',
index: {
byteStart: text.utf16IndexToUtf8Index(start),
byteEnd: text.utf16IndexToUtf8Index(start + match[3].length + 1),
},
features: [
{
$type: 'app.bsky.richtext.facet#mention',
did: match[3], // must be resolved afterwards
},
],
})
}
}
{
// links
const re = URL_REGEX
while ((match = re.exec(text.utf16))) {
let uri = match[2]
if (!uri.startsWith('http')) {
const domain = match.groups?.domain
if (!domain || !isValidDomain(domain)) {
continue
}
uri = `https://${uri}`
}
const start = text.utf16.indexOf(match[2], match.index)
const index = { start, end: start + match[2].length }
// strip ending puncuation
if (/[.,;:!?]$/.test(uri)) {
uri = uri.slice(0, -1)
index.end--
}
if (/[)]$/.test(uri) && !uri.includes('(')) {
uri = uri.slice(0, -1)
index.end--
}
facets.push({
index: {
byteStart: text.utf16IndexToUtf8Index(index.start),
byteEnd: text.utf16IndexToUtf8Index(index.end),
},
features: [
{
$type: 'app.bsky.richtext.facet#link',
uri,
},
],
})
}
}
{
const re = TAG_REGEX
while ((match = re.exec(text.utf16))) {
const leading = match[1]
let tag = match[2]
if (!tag) continue
// strip ending punctuation and any spaces
tag = tag.trim().replace(TRAILING_PUNCTUATION_REGEX, '')
if (tag.length === 0 || tag.length > 64) continue
const index = match.index + leading.length
facets.push({
index: {
byteStart: text.utf16IndexToUtf8Index(index),
byteEnd: text.utf16IndexToUtf8Index(index + 1 + tag.length),
},
features: [
{
$type: 'app.bsky.richtext.facet#tag',
tag: tag,
},
],
})
}
}
return facets.length > 0 ? facets : undefined
}
function isValidDomain(str: string): boolean {
return !!TLDs.find((tld) => {
const i = str.lastIndexOf(tld)
if (i === -1) {
return false
}
return str.charAt(i - 1) === '.' && i === str.length - tld.length
})
}