UNPKG

@konemono/nostr-content-parser

Version:
676 lines (581 loc) 19.2 kB
// 修正版 parseContent.ts - ハッシュタグオプション追加 import { NIP19_PATTERNS, URL_PATTERN, TokenType, LN_URL_PATTERN, LNBC_PATTERN, CASHU_TOKEN_PATTERN, BITCOIN_ADDRESS_PATTERNS, EMAIL_PATTERN, CUSTOM_EMOJI_PATTERN, HASHTAG_PATTERN, LN_ADDRESS_PATTERN, NIP19_PLAIN_PATTERNS, NIP_IDENTIFIER_PATTERN, Token, parseNipIdentifier, isLightningAddress, findCustomEmojiMetadata, cleanUrlEnd, RELAY_URL_PATTERN, NIP19_TYPE_MAP, NIP19SubType, LEGACY_REFERENCE_PATTERN, findLegacyReferenceMetadata, } from "./patterns.js"; function createToken( type: TokenType, content: string, start: number, end: number, metadata: Record<string, unknown> = {} ): Token { return { type, content, start, end, metadata }; } function isOverlapping( start1: number, end1: number, start2: number, end2: number ): boolean { return start1 < end2 && start2 < end1; } function detectUrlTypeFromExtension(url: string): string | undefined { const ext = url.split("?")[0].split("#")[0].split(".").pop()?.toLowerCase(); if (!ext) return; const videoExt = ["mp4", "webm", "mov", "mkv"]; const audioExt = ["mp3", "wav", "ogg", "flac"]; const imageExt = ["jpg", "jpeg", "png", "gif", "webp", "bmp", "svg"]; if (videoExt.includes(ext)) return "video"; if (audioExt.includes(ext)) return "audio"; if (imageExt.includes(ext)) return "image"; return; } // tタグからハッシュタグセットを作成 function extractHashtagsFromTags(tags: string[][]): Set<string> { const hashtags = new Set<string>(); for (const tag of tags) { if (tag.length >= 2 && tag[0] === "t") { hashtags.add(tag[1].toLowerCase()); } } return hashtags; } // 同期版:拡張子ベースの判定のみ function findUrlTokensSync(content: string): Token[] { const urlTokens: Token[] = []; const pattern = new RegExp(URL_PATTERN.source, URL_PATTERN.flags); let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const originalUrl = match[0]; const cleanedUrl = cleanUrlEnd(originalUrl); const start = match.index; const end = start + cleanedUrl.length; const scheme = cleanedUrl.startsWith("https://") ? "https" : cleanedUrl.startsWith("http://") ? "http" : null; const metadata: Record<string, unknown> = { scheme }; const detectedType = detectUrlTypeFromExtension(cleanedUrl); if (detectedType) { metadata.type = detectedType; } urlTokens.push( createToken(TokenType.URL, cleanedUrl, start, end, metadata) ); if (cleanedUrl !== originalUrl) { const removedPart = originalUrl.slice(cleanedUrl.length); urlTokens.push( createToken( TokenType.TEXT, removedPart, start + cleanedUrl.length, start + originalUrl.length ) ); } } return urlTokens; } // 非同期版:HTTPヘッダーも確認 async function findUrlTokensAsync(content: string): Promise<Token[]> { const urlTokens: Token[] = []; const pattern = new RegExp(URL_PATTERN.source, URL_PATTERN.flags); let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const originalUrl = match[0]; const cleanedUrl = cleanUrlEnd(originalUrl); const start = match.index; const end = start + cleanedUrl.length; const scheme = cleanedUrl.startsWith("https://") ? "https" : cleanedUrl.startsWith("http://") ? "http" : null; const metadata: Record<string, unknown> = { scheme }; const detectedType = detectUrlTypeFromExtension(cleanedUrl); if (detectedType) { metadata.type = detectedType; } else { const fetchedType = await fetchUrlContentType(cleanedUrl); if (fetchedType) metadata.type = fetchedType; } urlTokens.push( createToken(TokenType.URL, cleanedUrl, start, end, metadata) ); if (cleanedUrl !== originalUrl) { const removedPart = originalUrl.slice(cleanedUrl.length); urlTokens.push( createToken( TokenType.TEXT, removedPart, start + cleanedUrl.length, start + originalUrl.length ) ); } } return urlTokens; } const PATTERN_CONFIGS = [ { patterns: { nip_identifier: NIP_IDENTIFIER_PATTERN }, handler: (match: RegExpExecArray, type: string) => { try { const nipInfo = parseNipIdentifier(match[0]); return { type: TokenType.NIP_IDENTIFIER, metadata: nipInfo }; } catch { return null; } }, }, { patterns: { legacy_reference: LEGACY_REFERENCE_PATTERN }, handler: (match: RegExpExecArray, type: string, tags: string[][]) => { const metadata = findLegacyReferenceMetadata(match[0], tags); return { type: TokenType.LEGACY_REFERENCE, metadata: metadata || { tagIndex: -1 }, }; }, }, { patterns: { [TokenType.RELAY]: RELAY_URL_PATTERN }, handler: (match: RegExpExecArray, type: string, tags: string[][]) => { const url = match[0]; const scheme = url.startsWith("wss://") ? "wss" : url.startsWith("ws://") ? "ws" : null; return { type: TokenType.RELAY, metadata: scheme ? { scheme } : {}, }; }, }, { patterns: { ln_url: LN_URL_PATTERN }, handler: () => ({ type: TokenType.LN_URL }), }, { patterns: { lnbc: LNBC_PATTERN }, handler: () => ({ type: TokenType.LNBC }), }, { patterns: { cashu_token: CASHU_TOKEN_PATTERN }, handler: () => ({ type: TokenType.CASHU_TOKEN }), }, { patterns: BITCOIN_ADDRESS_PATTERNS, handler: (match: RegExpExecArray, addressType: string) => ({ type: TokenType.BITCOIN_ADDRESS, metadata: { addressType }, }), }, { patterns: { email: EMAIL_PATTERN }, handler: (match: RegExpExecArray) => { const emailLike = match[0]; const isLN = isLightningAddress(emailLike); return { type: isLN ? TokenType.LN_ADDRESS : TokenType.EMAIL, metadata: isLN ? { domain: emailLike.split("@")[1] } : {}, }; }, }, { patterns: { custom_emoji: CUSTOM_EMOJI_PATTERN }, handler: (match: RegExpExecArray, type: string, tags: string[][]) => { const emojiName = match[1]; const metadata = findCustomEmojiMetadata(emojiName, tags); return { type: TokenType.CUSTOM_EMOJI, metadata: { name: emojiName, ...metadata }, }; }, }, ]; function processNip19Patterns( content: string, patterns: typeof NIP19_PATTERNS, matches: Token[], protectedRanges: Token[] ): void { Object.entries(patterns).forEach(([oldType, rawPattern]) => { const pattern = new RegExp(rawPattern.source, rawPattern.flags); let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const [matchedContent] = match; const start = match.index; const end = start + matchedContent.length; // 既存のマッチとの重複チェック const hasOverlap = matches.some((m) => isOverlapping(start, end, m.start, m.end) ); // 保護された範囲(URL等)との重複チェック const isInProtectedRange = protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end) ); if (!hasOverlap && !isInProtectedRange) { // NIP19統合: subTypeをmetadataに格納 const subType = NIP19_TYPE_MAP[oldType] || oldType; matches.push( createToken(TokenType.NIP19, matchedContent, start, end, { subType: subType, hasNostrPrefix: matchedContent.startsWith("nostr:"), plainNip19: matchedContent.replace(/^nostr:/, ""), }) ); } } }); } function processPatterns( content: string, matches: Token[], tags: string[][] = [], protectedRanges: Token[] = [], hashtagsFromTagsOnly: boolean = true ): void { const validHashtags = hashtagsFromTagsOnly ? extractHashtagsFromTags(tags) : null; for (const config of PATTERN_CONFIGS) { for (const [patternType, rawPattern] of Object.entries(config.patterns)) { const pattern = new RegExp(rawPattern.source, rawPattern.flags); let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const start = match.index; const end = start + match[0].length; // 既存のマッチとの重複チェック if (matches.some((m) => isOverlapping(start, end, m.start, m.end))) continue; // 保護された範囲との重複チェック if ( protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end)) ) continue; const result = config.handler(match, patternType, tags); if (result) { matches.push( createToken( result.type, match[0], start, end, "metadata" in result ? result.metadata : {} ) ); } } } } // ハッシュタグの処理(別途処理) processHashtagPatterns(content, matches, protectedRanges, validHashtags); } function processHashtagPatterns( content: string, matches: Token[], protectedRanges: Token[], validHashtags: Set<string> | null ): void { const pattern = new RegExp(HASHTAG_PATTERN.source, HASHTAG_PATTERN.flags); let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const start = match.index; const end = start + match[0].length; const hashtag = match[0].slice(1); // # を除去 // 既存のマッチとの重複チェック if (matches.some((m) => isOverlapping(start, end, m.start, m.end))) continue; // 保護された範囲との重複チェック if (protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end))) continue; // tタグ検証が有効な場合のチェック if (validHashtags && !validHashtags.has(hashtag.toLowerCase())) { continue; } matches.push( createToken(TokenType.HASHTAG, match[0], start, end, { tag: hashtag, validated: validHashtags !== null, }) ); } } //重なったトークン同士があったとき、どちらを優先するか const PRIORITY: Record<TokenType, number> = { [TokenType.URL]: 15, [TokenType.NIP19]: 10, [TokenType.RELAY]: 10, [TokenType.CASHU_TOKEN]: 2, [TokenType.LNBC]: 2, [TokenType.LN_URL]: 2, [TokenType.LN_ADDRESS]: 2, [TokenType.CUSTOM_EMOJI]: 1, [TokenType.BITCOIN_ADDRESS]: 1, [TokenType.EMAIL]: 1, [TokenType.LEGACY_REFERENCE]: 1, [TokenType.HASHTAG]: 0, [TokenType.NIP_IDENTIFIER]: 0, [TokenType.TEXT]: 0, }; function removeOverlaps(matches: Token[]): Token[] { const sorted = [...matches].sort((a, b) => { if (a.start !== b.start) return a.start - b.start; if (a.end !== b.end) return b.end - a.end; // 長い方を先に return (PRIORITY[b.type] ?? 0) - (PRIORITY[a.type] ?? 0); }); const result: Token[] = []; for (const token of sorted) { const overlapIndex = result.findIndex((t) => isOverlapping(t.start, t.end, token.start, token.end) ); if (overlapIndex === -1) { result.push(token); } else { const existing = result[overlapIndex]; const tokenPriority = PRIORITY[token.type] ?? 0; const existingPriority = PRIORITY[existing.type] ?? 0; // 同じ位置でも「より外側のトークンを優先」 const tokenLength = token.end - token.start; const existingLength = existing.end - existing.start; const shouldReplace = tokenPriority > existingPriority || (tokenPriority === existingPriority && tokenLength > existingLength); if (shouldReplace) { result.splice(overlapIndex, 1, token); } // else: skip token } } return result.sort((a, b) => a.start - b.start); // 再整列 } function buildTokens(content: string, matches: Token[]): Token[] { const filteredMatches = removeOverlaps(matches); // テキストトークンを挿入 const tokens: Token[] = []; let currentPos = 0; for (const match of filteredMatches) { if (match.start > currentPos) { tokens.push( createToken( TokenType.TEXT, content.slice(currentPos, match.start), currentPos, match.start ) ); } tokens.push(match); currentPos = match.end; } if (currentPos < content.length) { tokens.push( createToken( TokenType.TEXT, content.slice(currentPos), currentPos, content.length ) ); } return tokens; } export interface ParseOptions { includeNostrPrefixOnly?: boolean; hashtagsFromTagsOnly?: boolean; } // 同期版:detectUrlType = falseの場合 export function parseContent( content: string, tags: string[][] = [], options: ParseOptions = {} ): Token[] { if (!content) return []; const { includeNostrPrefixOnly = true, hashtagsFromTagsOnly = true } = options; // URLを検出(拡張子ベースのみ) const urlTokens = findUrlTokensSync(content); // NIP-19パターンを処理(URLの範囲を除外) processNip19Patterns(content, NIP19_PATTERNS, urlTokens, urlTokens); if (!includeNostrPrefixOnly) { processNip19Patterns(content, NIP19_PLAIN_PATTERNS, urlTokens, urlTokens); } // その他のパターンを処理(URLの範囲を除外) processPatterns(content, urlTokens, tags, urlTokens, hashtagsFromTagsOnly); return buildTokens(content, urlTokens); } // 非同期版:detectUrlType = trueの場合 export async function parseContentAsync( content: string, tags: string[][] = [], options: ParseOptions = {} ): Promise<Token[]> { if (!content) return []; const { includeNostrPrefixOnly = true, hashtagsFromTagsOnly = true } = options; // URLを検出(HTTPヘッダーも確認) const urlTokens = await findUrlTokensAsync(content); // NIP-19パターンを処理(URLの範囲を除外) processNip19Patterns(content, NIP19_PATTERNS, urlTokens, urlTokens); if (!includeNostrPrefixOnly) { processNip19Patterns(content, NIP19_PLAIN_PATTERNS, urlTokens, urlTokens); } // その他のパターンを処理(URLの範囲を除外) processPatterns(content, urlTokens, tags, urlTokens, hashtagsFromTagsOnly); return buildTokens(content, urlTokens); } export function filterTokens<T extends TokenType>( tokens: Token[], types: T | T[] ): Token[] { const typeSet = new Set(Array.isArray(types) ? types : [types]); return tokens.filter((token) => typeSet.has(token.type as T)); } export function filterTokensBy( tokens: Token[], predicate: (token: Token) => boolean ): Token[] { return tokens.filter(predicate); } // NIP19統合後のフィルター関数 export function getNip19Entities(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.NIP19); } // 特定のNIP19サブタイプでフィルター export function filterNip19BySubType( tokens: Token[], subType: NIP19SubType | NIP19SubType[] ): Token[] { const subTypeSet = new Set(Array.isArray(subType) ? subType : [subType]); return tokens.filter( (token) => token.type === TokenType.NIP19 && token.metadata?.subType && subTypeSet.has(token.metadata.subType as NIP19SubType) ); } // 個別のNIP19サブタイプ取得関数 export function getNpubs(tokens: Token[]): Token[] { return filterNip19BySubType(tokens, NIP19SubType.NPUB); } export function getNprofiles(tokens: Token[]): Token[] { return filterNip19BySubType(tokens, NIP19SubType.NPROFILE); } export function getNotes(tokens: Token[]): Token[] { return filterNip19BySubType(tokens, NIP19SubType.NOTE); } export function getNevents(tokens: Token[]): Token[] { return filterNip19BySubType(tokens, NIP19SubType.NEVENT); } export function getNaddrs(tokens: Token[]): Token[] { return filterNip19BySubType(tokens, NIP19SubType.NADDR); } export function getNsecs(tokens: Token[]): Token[] { return filterNip19BySubType(tokens, NIP19SubType.NSEC); } export function getNipIdentifiers(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.NIP_IDENTIFIER); } // 旧タイプ引用を取得する関数 export function getLegacyReferences(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.LEGACY_REFERENCE); } export function getUrls(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.URL); } export function getCustomEmojis(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.CUSTOM_EMOJI); } export function getHashtags(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.HASHTAG); } // tタグで検証されたハッシュタグのみを取得 export function getValidatedHashtags(tokens: Token[]): Token[] { return tokens.filter( (token) => token.type === TokenType.HASHTAG && token.metadata?.validated === true ); } export function getLightningAddresses(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.LN_ADDRESS); } export function getLightningUrls(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.LN_URL); } export function getLightningInvoices(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.LNBC); } export function getBitcoinAddresses(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.BITCOIN_ADDRESS); } export function getCashuTokens(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.CASHU_TOKEN); } export function getEmails(tokens: Token[]): Token[] { return filterTokens(tokens, TokenType.EMAIL); } export function resetPatterns(): void { const allPatterns = [ ...Object.values(NIP19_PATTERNS), ...Object.values(NIP19_PLAIN_PATTERNS), ...Object.values(BITCOIN_ADDRESS_PATTERNS), URL_PATTERN, RELAY_URL_PATTERN, LN_ADDRESS_PATTERN, LN_URL_PATTERN, LNBC_PATTERN, EMAIL_PATTERN, CASHU_TOKEN_PATTERN, CUSTOM_EMOJI_PATTERN, HASHTAG_PATTERN, NIP_IDENTIFIER_PATTERN, LEGACY_REFERENCE_PATTERN, ]; allPatterns.forEach((pattern) => (pattern.lastIndex = 0)); } // モジュール内キャッシュ const urlTypeCache = new Map<string, string>(); // cleanedUrl → "image"/"video"/... async function fetchUrlContentType(url: string): Promise<string | undefined> { if (urlTypeCache.has(url)) { return urlTypeCache.get(url); } try { const res = await fetch(url, { method: "HEAD" }); const contentType = res.headers.get("Content-Type") || ""; let type: string | undefined; if (contentType.startsWith("video/")) type = "video"; else if (contentType.startsWith("audio/")) type = "audio"; else if (contentType.startsWith("image/")) type = "image"; if (type) { urlTypeCache.set(url, type); // 成功したものだけキャッシュ } return type; } catch { return undefined; // ネットワークエラー時はキャッシュしない } }