UNPKG

@konemono/nostr-content-parser

Version:
465 lines 18.4 kB
// 修正版 parseContent.ts - ハッシュタグオプション追加 import { NIP19_PATTERNS, URL_PATTERN, TokenType, LN_URL_PATTERN, LNBC_PATTERN, CASHU_TOKEN_PATTERN, BITCOIN_ADDRESS_PATTERNS, EMAIL_PATTERN, CUSTOM_EMOJI_PATTERN, HASHTAG_PATTERN, LN_ADDRESS_PATTERN, NIP19_PLAIN_PATTERNS, NIP_IDENTIFIER_PATTERN, parseNipIdentifier, isLightningAddress, findCustomEmojiMetadata, cleanUrlEnd, RELAY_URL_PATTERN, NIP19_TYPE_MAP, NIP19SubType, LEGACY_REFERENCE_PATTERN, findLegacyReferenceMetadata, } from "./patterns.js"; function createToken(type, content, start, end, metadata = {}) { return { type, content, start, end, metadata }; } function isOverlapping(start1, end1, start2, end2) { return start1 < end2 && start2 < end1; } function detectUrlTypeFromExtension(url) { const ext = url.split("?")[0].split("#")[0].split(".").pop()?.toLowerCase(); if (!ext) return; const videoExt = ["mp4", "webm", "mov", "mkv"]; const audioExt = ["mp3", "wav", "ogg", "flac"]; const imageExt = ["jpg", "jpeg", "png", "gif", "webp", "bmp", "svg"]; if (videoExt.includes(ext)) return "video"; if (audioExt.includes(ext)) return "audio"; if (imageExt.includes(ext)) return "image"; return; } // tタグからハッシュタグセットを作成 function extractHashtagsFromTags(tags) { const hashtags = new Set(); for (const tag of tags) { if (tag.length >= 2 && tag[0] === "t") { hashtags.add(tag[1].toLowerCase()); } } return hashtags; } // 同期版:拡張子ベースの判定のみ function findUrlTokensSync(content) { const urlTokens = []; const pattern = new RegExp(URL_PATTERN.source, URL_PATTERN.flags); let match; while ((match = pattern.exec(content)) !== null) { const originalUrl = match[0]; const cleanedUrl = cleanUrlEnd(originalUrl); const start = match.index; const end = start + cleanedUrl.length; const scheme = cleanedUrl.startsWith("https://") ? "https" : cleanedUrl.startsWith("http://") ? "http" : null; const metadata = { scheme }; const detectedType = detectUrlTypeFromExtension(cleanedUrl); if (detectedType) { metadata.type = detectedType; } urlTokens.push(createToken(TokenType.URL, cleanedUrl, start, end, metadata)); if (cleanedUrl !== originalUrl) { const removedPart = originalUrl.slice(cleanedUrl.length); urlTokens.push(createToken(TokenType.TEXT, removedPart, start + cleanedUrl.length, start + originalUrl.length)); } } return urlTokens; } // 非同期版:HTTPヘッダーも確認 async function findUrlTokensAsync(content) { const urlTokens = []; const pattern = new RegExp(URL_PATTERN.source, URL_PATTERN.flags); let match; while ((match = pattern.exec(content)) !== null) { const originalUrl = match[0]; const cleanedUrl = cleanUrlEnd(originalUrl); const start = match.index; const end = start + cleanedUrl.length; const scheme = cleanedUrl.startsWith("https://") ? "https" : cleanedUrl.startsWith("http://") ? "http" : null; const metadata = { scheme }; const detectedType = detectUrlTypeFromExtension(cleanedUrl); if (detectedType) { metadata.type = detectedType; } else { const fetchedType = await fetchUrlContentType(cleanedUrl); if (fetchedType) metadata.type = fetchedType; } urlTokens.push(createToken(TokenType.URL, cleanedUrl, start, end, metadata)); if (cleanedUrl !== originalUrl) { const removedPart = originalUrl.slice(cleanedUrl.length); urlTokens.push(createToken(TokenType.TEXT, removedPart, start + cleanedUrl.length, start + originalUrl.length)); } } return urlTokens; } const PATTERN_CONFIGS = [ { patterns: { nip_identifier: NIP_IDENTIFIER_PATTERN }, handler: (match, type) => { try { const nipInfo = parseNipIdentifier(match[0]); return { type: TokenType.NIP_IDENTIFIER, metadata: nipInfo }; } catch { return null; } }, }, { patterns: { legacy_reference: LEGACY_REFERENCE_PATTERN }, handler: (match, type, tags) => { const metadata = findLegacyReferenceMetadata(match[0], tags); return { type: TokenType.LEGACY_REFERENCE, metadata: metadata || { tagIndex: -1 }, }; }, }, { patterns: { [TokenType.RELAY]: RELAY_URL_PATTERN }, handler: (match, type, tags) => { const url = match[0]; const scheme = url.startsWith("wss://") ? "wss" : url.startsWith("ws://") ? "ws" : null; return { type: TokenType.RELAY, metadata: scheme ? { scheme } : {}, }; }, }, { patterns: { ln_url: LN_URL_PATTERN }, handler: () => ({ type: TokenType.LN_URL }), }, { patterns: { lnbc: LNBC_PATTERN }, handler: () => ({ type: TokenType.LNBC }), }, { patterns: { cashu_token: CASHU_TOKEN_PATTERN }, handler: () => ({ type: TokenType.CASHU_TOKEN }), }, { patterns: BITCOIN_ADDRESS_PATTERNS, handler: (match, addressType) => ({ type: TokenType.BITCOIN_ADDRESS, metadata: { addressType }, }), }, { patterns: { email: EMAIL_PATTERN }, handler: (match) => { const emailLike = match[0]; const isLN = isLightningAddress(emailLike); return { type: isLN ? TokenType.LN_ADDRESS : TokenType.EMAIL, metadata: isLN ? { domain: emailLike.split("@")[1] } : {}, }; }, }, { patterns: { custom_emoji: CUSTOM_EMOJI_PATTERN }, handler: (match, type, tags) => { const emojiName = match[1]; const metadata = findCustomEmojiMetadata(emojiName, tags); return { type: TokenType.CUSTOM_EMOJI, metadata: { name: emojiName, ...metadata }, }; }, }, ]; function processNip19Patterns(content, patterns, matches, protectedRanges) { Object.entries(patterns).forEach(([oldType, rawPattern]) => { const pattern = new RegExp(rawPattern.source, rawPattern.flags); let match; while ((match = pattern.exec(content)) !== null) { const [matchedContent] = match; const start = match.index; const end = start + matchedContent.length; // 既存のマッチとの重複チェック const hasOverlap = matches.some((m) => isOverlapping(start, end, m.start, m.end)); // 保護された範囲(URL等)との重複チェック const isInProtectedRange = protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end)); if (!hasOverlap && !isInProtectedRange) { // NIP19統合: subTypeをmetadataに格納 const subType = NIP19_TYPE_MAP[oldType] || oldType; matches.push(createToken(TokenType.NIP19, matchedContent, start, end, { subType: subType, hasNostrPrefix: matchedContent.startsWith("nostr:"), plainNip19: matchedContent.replace(/^nostr:/, ""), })); } } }); } function processPatterns(content, matches, tags = [], protectedRanges = [], hashtagsFromTagsOnly = true) { const validHashtags = hashtagsFromTagsOnly ? extractHashtagsFromTags(tags) : null; for (const config of PATTERN_CONFIGS) { for (const [patternType, rawPattern] of Object.entries(config.patterns)) { const pattern = new RegExp(rawPattern.source, rawPattern.flags); let match; while ((match = pattern.exec(content)) !== null) { const start = match.index; const end = start + match[0].length; // 既存のマッチとの重複チェック if (matches.some((m) => isOverlapping(start, end, m.start, m.end))) continue; // 保護された範囲との重複チェック if (protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end))) continue; const result = config.handler(match, patternType, tags); if (result) { matches.push(createToken(result.type, match[0], start, end, "metadata" in result ? result.metadata : {})); } } } } // ハッシュタグの処理(別途処理) processHashtagPatterns(content, matches, protectedRanges, validHashtags); } function processHashtagPatterns(content, matches, protectedRanges, validHashtags) { const pattern = new RegExp(HASHTAG_PATTERN.source, HASHTAG_PATTERN.flags); let match; while ((match = pattern.exec(content)) !== null) { const start = match.index; const end = start + match[0].length; const hashtag = match[0].slice(1); // # を除去 // 既存のマッチとの重複チェック if (matches.some((m) => isOverlapping(start, end, m.start, m.end))) continue; // 保護された範囲との重複チェック if (protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end))) continue; // tタグ検証が有効な場合のチェック if (validHashtags && !validHashtags.has(hashtag.toLowerCase())) { continue; } matches.push(createToken(TokenType.HASHTAG, match[0], start, end, { tag: hashtag, validated: validHashtags !== null, })); } } //重なったトークン同士があったとき、どちらを優先するか const PRIORITY = { [TokenType.URL]: 15, [TokenType.NIP19]: 10, [TokenType.RELAY]: 10, [TokenType.CASHU_TOKEN]: 2, [TokenType.LNBC]: 2, [TokenType.LN_URL]: 2, [TokenType.LN_ADDRESS]: 2, [TokenType.CUSTOM_EMOJI]: 1, [TokenType.BITCOIN_ADDRESS]: 1, [TokenType.EMAIL]: 1, [TokenType.LEGACY_REFERENCE]: 1, [TokenType.HASHTAG]: 0, [TokenType.NIP_IDENTIFIER]: 0, [TokenType.TEXT]: 0, }; function removeOverlaps(matches) { const sorted = [...matches].sort((a, b) => { if (a.start !== b.start) return a.start - b.start; if (a.end !== b.end) return b.end - a.end; // 長い方を先に return (PRIORITY[b.type] ?? 0) - (PRIORITY[a.type] ?? 0); }); const result = []; for (const token of sorted) { const overlapIndex = result.findIndex((t) => isOverlapping(t.start, t.end, token.start, token.end)); if (overlapIndex === -1) { result.push(token); } else { const existing = result[overlapIndex]; const tokenPriority = PRIORITY[token.type] ?? 0; const existingPriority = PRIORITY[existing.type] ?? 0; // 同じ位置でも「より外側のトークンを優先」 const tokenLength = token.end - token.start; const existingLength = existing.end - existing.start; const shouldReplace = tokenPriority > existingPriority || (tokenPriority === existingPriority && tokenLength > existingLength); if (shouldReplace) { result.splice(overlapIndex, 1, token); } // else: skip token } } return result.sort((a, b) => a.start - b.start); // 再整列 } function buildTokens(content, matches) { const filteredMatches = removeOverlaps(matches); // テキストトークンを挿入 const tokens = []; let currentPos = 0; for (const match of filteredMatches) { if (match.start > currentPos) { tokens.push(createToken(TokenType.TEXT, content.slice(currentPos, match.start), currentPos, match.start)); } tokens.push(match); currentPos = match.end; } if (currentPos < content.length) { tokens.push(createToken(TokenType.TEXT, content.slice(currentPos), currentPos, content.length)); } return tokens; } // 同期版:detectUrlType = falseの場合 export function parseContent(content, tags = [], options = {}) { if (!content) return []; const { includeNostrPrefixOnly = true, hashtagsFromTagsOnly = true } = options; // URLを検出(拡張子ベースのみ) const urlTokens = findUrlTokensSync(content); // NIP-19パターンを処理(URLの範囲を除外) processNip19Patterns(content, NIP19_PATTERNS, urlTokens, urlTokens); if (!includeNostrPrefixOnly) { processNip19Patterns(content, NIP19_PLAIN_PATTERNS, urlTokens, urlTokens); } // その他のパターンを処理(URLの範囲を除外) processPatterns(content, urlTokens, tags, urlTokens, hashtagsFromTagsOnly); return buildTokens(content, urlTokens); } // 非同期版:detectUrlType = trueの場合 export async function parseContentAsync(content, tags = [], options = {}) { if (!content) return []; const { includeNostrPrefixOnly = true, hashtagsFromTagsOnly = true } = options; // URLを検出(HTTPヘッダーも確認) const urlTokens = await findUrlTokensAsync(content); // NIP-19パターンを処理(URLの範囲を除外) processNip19Patterns(content, NIP19_PATTERNS, urlTokens, urlTokens); if (!includeNostrPrefixOnly) { processNip19Patterns(content, NIP19_PLAIN_PATTERNS, urlTokens, urlTokens); } // その他のパターンを処理(URLの範囲を除外) processPatterns(content, urlTokens, tags, urlTokens, hashtagsFromTagsOnly); return buildTokens(content, urlTokens); } export function filterTokens(tokens, types) { const typeSet = new Set(Array.isArray(types) ? types : [types]); return tokens.filter((token) => typeSet.has(token.type)); } export function filterTokensBy(tokens, predicate) { return tokens.filter(predicate); } // NIP19統合後のフィルター関数 export function getNip19Entities(tokens) { return filterTokens(tokens, TokenType.NIP19); } // 特定のNIP19サブタイプでフィルター export function filterNip19BySubType(tokens, subType) { const subTypeSet = new Set(Array.isArray(subType) ? subType : [subType]); return tokens.filter((token) => token.type === TokenType.NIP19 && token.metadata?.subType && subTypeSet.has(token.metadata.subType)); } // 個別のNIP19サブタイプ取得関数 export function getNpubs(tokens) { return filterNip19BySubType(tokens, NIP19SubType.NPUB); } export function getNprofiles(tokens) { return filterNip19BySubType(tokens, NIP19SubType.NPROFILE); } export function getNotes(tokens) { return filterNip19BySubType(tokens, NIP19SubType.NOTE); } export function getNevents(tokens) { return filterNip19BySubType(tokens, NIP19SubType.NEVENT); } export function getNaddrs(tokens) { return filterNip19BySubType(tokens, NIP19SubType.NADDR); } export function getNsecs(tokens) { return filterNip19BySubType(tokens, NIP19SubType.NSEC); } export function getNipIdentifiers(tokens) { return filterTokens(tokens, TokenType.NIP_IDENTIFIER); } // 旧タイプ引用を取得する関数 export function getLegacyReferences(tokens) { return filterTokens(tokens, TokenType.LEGACY_REFERENCE); } export function getUrls(tokens) { return filterTokens(tokens, TokenType.URL); } export function getCustomEmojis(tokens) { return filterTokens(tokens, TokenType.CUSTOM_EMOJI); } export function getHashtags(tokens) { return filterTokens(tokens, TokenType.HASHTAG); } // tタグで検証されたハッシュタグのみを取得 export function getValidatedHashtags(tokens) { return tokens.filter((token) => token.type === TokenType.HASHTAG && token.metadata?.validated === true); } export function getLightningAddresses(tokens) { return filterTokens(tokens, TokenType.LN_ADDRESS); } export function getLightningUrls(tokens) { return filterTokens(tokens, TokenType.LN_URL); } export function getLightningInvoices(tokens) { return filterTokens(tokens, TokenType.LNBC); } export function getBitcoinAddresses(tokens) { return filterTokens(tokens, TokenType.BITCOIN_ADDRESS); } export function getCashuTokens(tokens) { return filterTokens(tokens, TokenType.CASHU_TOKEN); } export function getEmails(tokens) { return filterTokens(tokens, TokenType.EMAIL); } export function resetPatterns() { const allPatterns = [ ...Object.values(NIP19_PATTERNS), ...Object.values(NIP19_PLAIN_PATTERNS), ...Object.values(BITCOIN_ADDRESS_PATTERNS), URL_PATTERN, RELAY_URL_PATTERN, LN_ADDRESS_PATTERN, LN_URL_PATTERN, LNBC_PATTERN, EMAIL_PATTERN, CASHU_TOKEN_PATTERN, CUSTOM_EMOJI_PATTERN, HASHTAG_PATTERN, NIP_IDENTIFIER_PATTERN, LEGACY_REFERENCE_PATTERN, ]; allPatterns.forEach((pattern) => (pattern.lastIndex = 0)); } // モジュール内キャッシュ const urlTypeCache = new Map(); // cleanedUrl → "image"/"video"/... async function fetchUrlContentType(url) { if (urlTypeCache.has(url)) { return urlTypeCache.get(url); } try { const res = await fetch(url, { method: "HEAD" }); const contentType = res.headers.get("Content-Type") || ""; let type; if (contentType.startsWith("video/")) type = "video"; else if (contentType.startsWith("audio/")) type = "audio"; else if (contentType.startsWith("image/")) type = "image"; if (type) { urlTypeCache.set(url, type); // 成功したものだけキャッシュ } return type; } catch { return undefined; // ネットワークエラー時はキャッシュしない } } //# sourceMappingURL=parseContent.js.map