@konemono/nostr-content-parser
Version:
Parse Nostr content into tokens
465 lines • 18.4 kB
JavaScript
// 修正版 parseContent.ts - ハッシュタグオプション追加
import { NIP19_PATTERNS, URL_PATTERN, TokenType, LN_URL_PATTERN, LNBC_PATTERN, CASHU_TOKEN_PATTERN, BITCOIN_ADDRESS_PATTERNS, EMAIL_PATTERN, CUSTOM_EMOJI_PATTERN, HASHTAG_PATTERN, LN_ADDRESS_PATTERN, NIP19_PLAIN_PATTERNS, NIP_IDENTIFIER_PATTERN, parseNipIdentifier, isLightningAddress, findCustomEmojiMetadata, cleanUrlEnd, RELAY_URL_PATTERN, NIP19_TYPE_MAP, NIP19SubType, LEGACY_REFERENCE_PATTERN, findLegacyReferenceMetadata, } from "./patterns.js";
function createToken(type, content, start, end, metadata = {}) {
return { type, content, start, end, metadata };
}
function isOverlapping(start1, end1, start2, end2) {
return start1 < end2 && start2 < end1;
}
function detectUrlTypeFromExtension(url) {
const ext = url.split("?")[0].split("#")[0].split(".").pop()?.toLowerCase();
if (!ext)
return;
const videoExt = ["mp4", "webm", "mov", "mkv"];
const audioExt = ["mp3", "wav", "ogg", "flac"];
const imageExt = ["jpg", "jpeg", "png", "gif", "webp", "bmp", "svg"];
if (videoExt.includes(ext))
return "video";
if (audioExt.includes(ext))
return "audio";
if (imageExt.includes(ext))
return "image";
return;
}
// tタグからハッシュタグセットを作成
function extractHashtagsFromTags(tags) {
const hashtags = new Set();
for (const tag of tags) {
if (tag.length >= 2 && tag[0] === "t") {
hashtags.add(tag[1].toLowerCase());
}
}
return hashtags;
}
// 同期版:拡張子ベースの判定のみ
function findUrlTokensSync(content) {
const urlTokens = [];
const pattern = new RegExp(URL_PATTERN.source, URL_PATTERN.flags);
let match;
while ((match = pattern.exec(content)) !== null) {
const originalUrl = match[0];
const cleanedUrl = cleanUrlEnd(originalUrl);
const start = match.index;
const end = start + cleanedUrl.length;
const scheme = cleanedUrl.startsWith("https://")
? "https"
: cleanedUrl.startsWith("http://")
? "http"
: null;
const metadata = { scheme };
const detectedType = detectUrlTypeFromExtension(cleanedUrl);
if (detectedType) {
metadata.type = detectedType;
}
urlTokens.push(createToken(TokenType.URL, cleanedUrl, start, end, metadata));
if (cleanedUrl !== originalUrl) {
const removedPart = originalUrl.slice(cleanedUrl.length);
urlTokens.push(createToken(TokenType.TEXT, removedPart, start + cleanedUrl.length, start + originalUrl.length));
}
}
return urlTokens;
}
// 非同期版:HTTPヘッダーも確認
async function findUrlTokensAsync(content) {
const urlTokens = [];
const pattern = new RegExp(URL_PATTERN.source, URL_PATTERN.flags);
let match;
while ((match = pattern.exec(content)) !== null) {
const originalUrl = match[0];
const cleanedUrl = cleanUrlEnd(originalUrl);
const start = match.index;
const end = start + cleanedUrl.length;
const scheme = cleanedUrl.startsWith("https://")
? "https"
: cleanedUrl.startsWith("http://")
? "http"
: null;
const metadata = { scheme };
const detectedType = detectUrlTypeFromExtension(cleanedUrl);
if (detectedType) {
metadata.type = detectedType;
}
else {
const fetchedType = await fetchUrlContentType(cleanedUrl);
if (fetchedType)
metadata.type = fetchedType;
}
urlTokens.push(createToken(TokenType.URL, cleanedUrl, start, end, metadata));
if (cleanedUrl !== originalUrl) {
const removedPart = originalUrl.slice(cleanedUrl.length);
urlTokens.push(createToken(TokenType.TEXT, removedPart, start + cleanedUrl.length, start + originalUrl.length));
}
}
return urlTokens;
}
const PATTERN_CONFIGS = [
{
patterns: { nip_identifier: NIP_IDENTIFIER_PATTERN },
handler: (match, type) => {
try {
const nipInfo = parseNipIdentifier(match[0]);
return { type: TokenType.NIP_IDENTIFIER, metadata: nipInfo };
}
catch {
return null;
}
},
},
{
patterns: { legacy_reference: LEGACY_REFERENCE_PATTERN },
handler: (match, type, tags) => {
const metadata = findLegacyReferenceMetadata(match[0], tags);
return {
type: TokenType.LEGACY_REFERENCE,
metadata: metadata || { tagIndex: -1 },
};
},
},
{
patterns: { [TokenType.RELAY]: RELAY_URL_PATTERN },
handler: (match, type, tags) => {
const url = match[0];
const scheme = url.startsWith("wss://")
? "wss"
: url.startsWith("ws://")
? "ws"
: null;
return {
type: TokenType.RELAY,
metadata: scheme ? { scheme } : {},
};
},
},
{
patterns: { ln_url: LN_URL_PATTERN },
handler: () => ({ type: TokenType.LN_URL }),
},
{
patterns: { lnbc: LNBC_PATTERN },
handler: () => ({ type: TokenType.LNBC }),
},
{
patterns: { cashu_token: CASHU_TOKEN_PATTERN },
handler: () => ({ type: TokenType.CASHU_TOKEN }),
},
{
patterns: BITCOIN_ADDRESS_PATTERNS,
handler: (match, addressType) => ({
type: TokenType.BITCOIN_ADDRESS,
metadata: { addressType },
}),
},
{
patterns: { email: EMAIL_PATTERN },
handler: (match) => {
const emailLike = match[0];
const isLN = isLightningAddress(emailLike);
return {
type: isLN ? TokenType.LN_ADDRESS : TokenType.EMAIL,
metadata: isLN ? { domain: emailLike.split("@")[1] } : {},
};
},
},
{
patterns: { custom_emoji: CUSTOM_EMOJI_PATTERN },
handler: (match, type, tags) => {
const emojiName = match[1];
const metadata = findCustomEmojiMetadata(emojiName, tags);
return {
type: TokenType.CUSTOM_EMOJI,
metadata: { name: emojiName, ...metadata },
};
},
},
];
function processNip19Patterns(content, patterns, matches, protectedRanges) {
Object.entries(patterns).forEach(([oldType, rawPattern]) => {
const pattern = new RegExp(rawPattern.source, rawPattern.flags);
let match;
while ((match = pattern.exec(content)) !== null) {
const [matchedContent] = match;
const start = match.index;
const end = start + matchedContent.length;
// 既存のマッチとの重複チェック
const hasOverlap = matches.some((m) => isOverlapping(start, end, m.start, m.end));
// 保護された範囲(URL等)との重複チェック
const isInProtectedRange = protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end));
if (!hasOverlap && !isInProtectedRange) {
// NIP19統合: subTypeをmetadataに格納
const subType = NIP19_TYPE_MAP[oldType] || oldType;
matches.push(createToken(TokenType.NIP19, matchedContent, start, end, {
subType: subType,
hasNostrPrefix: matchedContent.startsWith("nostr:"),
plainNip19: matchedContent.replace(/^nostr:/, ""),
}));
}
}
});
}
function processPatterns(content, matches, tags = [], protectedRanges = [], hashtagsFromTagsOnly = true) {
const validHashtags = hashtagsFromTagsOnly
? extractHashtagsFromTags(tags)
: null;
for (const config of PATTERN_CONFIGS) {
for (const [patternType, rawPattern] of Object.entries(config.patterns)) {
const pattern = new RegExp(rawPattern.source, rawPattern.flags);
let match;
while ((match = pattern.exec(content)) !== null) {
const start = match.index;
const end = start + match[0].length;
// 既存のマッチとの重複チェック
if (matches.some((m) => isOverlapping(start, end, m.start, m.end)))
continue;
// 保護された範囲との重複チェック
if (protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end)))
continue;
const result = config.handler(match, patternType, tags);
if (result) {
matches.push(createToken(result.type, match[0], start, end, "metadata" in result ? result.metadata : {}));
}
}
}
}
// ハッシュタグの処理(別途処理)
processHashtagPatterns(content, matches, protectedRanges, validHashtags);
}
function processHashtagPatterns(content, matches, protectedRanges, validHashtags) {
const pattern = new RegExp(HASHTAG_PATTERN.source, HASHTAG_PATTERN.flags);
let match;
while ((match = pattern.exec(content)) !== null) {
const start = match.index;
const end = start + match[0].length;
const hashtag = match[0].slice(1); // # を除去
// 既存のマッチとの重複チェック
if (matches.some((m) => isOverlapping(start, end, m.start, m.end)))
continue;
// 保護された範囲との重複チェック
if (protectedRanges.some((p) => isOverlapping(start, end, p.start, p.end)))
continue;
// tタグ検証が有効な場合のチェック
if (validHashtags && !validHashtags.has(hashtag.toLowerCase())) {
continue;
}
matches.push(createToken(TokenType.HASHTAG, match[0], start, end, {
tag: hashtag,
validated: validHashtags !== null,
}));
}
}
//重なったトークン同士があったとき、どちらを優先するか
const PRIORITY = {
[TokenType.URL]: 15,
[TokenType.NIP19]: 10,
[TokenType.RELAY]: 10,
[TokenType.CASHU_TOKEN]: 2,
[TokenType.LNBC]: 2,
[TokenType.LN_URL]: 2,
[TokenType.LN_ADDRESS]: 2,
[TokenType.CUSTOM_EMOJI]: 1,
[TokenType.BITCOIN_ADDRESS]: 1,
[TokenType.EMAIL]: 1,
[TokenType.LEGACY_REFERENCE]: 1,
[TokenType.HASHTAG]: 0,
[TokenType.NIP_IDENTIFIER]: 0,
[TokenType.TEXT]: 0,
};
function removeOverlaps(matches) {
const sorted = [...matches].sort((a, b) => {
if (a.start !== b.start)
return a.start - b.start;
if (a.end !== b.end)
return b.end - a.end; // 長い方を先に
return (PRIORITY[b.type] ?? 0) - (PRIORITY[a.type] ?? 0);
});
const result = [];
for (const token of sorted) {
const overlapIndex = result.findIndex((t) => isOverlapping(t.start, t.end, token.start, token.end));
if (overlapIndex === -1) {
result.push(token);
}
else {
const existing = result[overlapIndex];
const tokenPriority = PRIORITY[token.type] ?? 0;
const existingPriority = PRIORITY[existing.type] ?? 0;
// 同じ位置でも「より外側のトークンを優先」
const tokenLength = token.end - token.start;
const existingLength = existing.end - existing.start;
const shouldReplace = tokenPriority > existingPriority ||
(tokenPriority === existingPriority && tokenLength > existingLength);
if (shouldReplace) {
result.splice(overlapIndex, 1, token);
}
// else: skip token
}
}
return result.sort((a, b) => a.start - b.start); // 再整列
}
function buildTokens(content, matches) {
const filteredMatches = removeOverlaps(matches);
// テキストトークンを挿入
const tokens = [];
let currentPos = 0;
for (const match of filteredMatches) {
if (match.start > currentPos) {
tokens.push(createToken(TokenType.TEXT, content.slice(currentPos, match.start), currentPos, match.start));
}
tokens.push(match);
currentPos = match.end;
}
if (currentPos < content.length) {
tokens.push(createToken(TokenType.TEXT, content.slice(currentPos), currentPos, content.length));
}
return tokens;
}
// 同期版:detectUrlType = falseの場合
export function parseContent(content, tags = [], options = {}) {
if (!content)
return [];
const { includeNostrPrefixOnly = true, hashtagsFromTagsOnly = true } = options;
// URLを検出(拡張子ベースのみ)
const urlTokens = findUrlTokensSync(content);
// NIP-19パターンを処理(URLの範囲を除外)
processNip19Patterns(content, NIP19_PATTERNS, urlTokens, urlTokens);
if (!includeNostrPrefixOnly) {
processNip19Patterns(content, NIP19_PLAIN_PATTERNS, urlTokens, urlTokens);
}
// その他のパターンを処理(URLの範囲を除外)
processPatterns(content, urlTokens, tags, urlTokens, hashtagsFromTagsOnly);
return buildTokens(content, urlTokens);
}
// 非同期版:detectUrlType = trueの場合
export async function parseContentAsync(content, tags = [], options = {}) {
if (!content)
return [];
const { includeNostrPrefixOnly = true, hashtagsFromTagsOnly = true } = options;
// URLを検出(HTTPヘッダーも確認)
const urlTokens = await findUrlTokensAsync(content);
// NIP-19パターンを処理(URLの範囲を除外)
processNip19Patterns(content, NIP19_PATTERNS, urlTokens, urlTokens);
if (!includeNostrPrefixOnly) {
processNip19Patterns(content, NIP19_PLAIN_PATTERNS, urlTokens, urlTokens);
}
// その他のパターンを処理(URLの範囲を除外)
processPatterns(content, urlTokens, tags, urlTokens, hashtagsFromTagsOnly);
return buildTokens(content, urlTokens);
}
export function filterTokens(tokens, types) {
const typeSet = new Set(Array.isArray(types) ? types : [types]);
return tokens.filter((token) => typeSet.has(token.type));
}
export function filterTokensBy(tokens, predicate) {
return tokens.filter(predicate);
}
// NIP19統合後のフィルター関数
export function getNip19Entities(tokens) {
return filterTokens(tokens, TokenType.NIP19);
}
// 特定のNIP19サブタイプでフィルター
export function filterNip19BySubType(tokens, subType) {
const subTypeSet = new Set(Array.isArray(subType) ? subType : [subType]);
return tokens.filter((token) => token.type === TokenType.NIP19 &&
token.metadata?.subType &&
subTypeSet.has(token.metadata.subType));
}
// 個別のNIP19サブタイプ取得関数
export function getNpubs(tokens) {
return filterNip19BySubType(tokens, NIP19SubType.NPUB);
}
export function getNprofiles(tokens) {
return filterNip19BySubType(tokens, NIP19SubType.NPROFILE);
}
export function getNotes(tokens) {
return filterNip19BySubType(tokens, NIP19SubType.NOTE);
}
export function getNevents(tokens) {
return filterNip19BySubType(tokens, NIP19SubType.NEVENT);
}
export function getNaddrs(tokens) {
return filterNip19BySubType(tokens, NIP19SubType.NADDR);
}
export function getNsecs(tokens) {
return filterNip19BySubType(tokens, NIP19SubType.NSEC);
}
export function getNipIdentifiers(tokens) {
return filterTokens(tokens, TokenType.NIP_IDENTIFIER);
}
// 旧タイプ引用を取得する関数
export function getLegacyReferences(tokens) {
return filterTokens(tokens, TokenType.LEGACY_REFERENCE);
}
export function getUrls(tokens) {
return filterTokens(tokens, TokenType.URL);
}
export function getCustomEmojis(tokens) {
return filterTokens(tokens, TokenType.CUSTOM_EMOJI);
}
export function getHashtags(tokens) {
return filterTokens(tokens, TokenType.HASHTAG);
}
// tタグで検証されたハッシュタグのみを取得
export function getValidatedHashtags(tokens) {
return tokens.filter((token) => token.type === TokenType.HASHTAG && token.metadata?.validated === true);
}
export function getLightningAddresses(tokens) {
return filterTokens(tokens, TokenType.LN_ADDRESS);
}
export function getLightningUrls(tokens) {
return filterTokens(tokens, TokenType.LN_URL);
}
export function getLightningInvoices(tokens) {
return filterTokens(tokens, TokenType.LNBC);
}
export function getBitcoinAddresses(tokens) {
return filterTokens(tokens, TokenType.BITCOIN_ADDRESS);
}
export function getCashuTokens(tokens) {
return filterTokens(tokens, TokenType.CASHU_TOKEN);
}
export function getEmails(tokens) {
return filterTokens(tokens, TokenType.EMAIL);
}
export function resetPatterns() {
const allPatterns = [
...Object.values(NIP19_PATTERNS),
...Object.values(NIP19_PLAIN_PATTERNS),
...Object.values(BITCOIN_ADDRESS_PATTERNS),
URL_PATTERN,
RELAY_URL_PATTERN,
LN_ADDRESS_PATTERN,
LN_URL_PATTERN,
LNBC_PATTERN,
EMAIL_PATTERN,
CASHU_TOKEN_PATTERN,
CUSTOM_EMOJI_PATTERN,
HASHTAG_PATTERN,
NIP_IDENTIFIER_PATTERN,
LEGACY_REFERENCE_PATTERN,
];
allPatterns.forEach((pattern) => (pattern.lastIndex = 0));
}
// モジュール内キャッシュ
const urlTypeCache = new Map(); // cleanedUrl → "image"/"video"/...
async function fetchUrlContentType(url) {
if (urlTypeCache.has(url)) {
return urlTypeCache.get(url);
}
try {
const res = await fetch(url, { method: "HEAD" });
const contentType = res.headers.get("Content-Type") || "";
let type;
if (contentType.startsWith("video/"))
type = "video";
else if (contentType.startsWith("audio/"))
type = "audio";
else if (contentType.startsWith("image/"))
type = "image";
if (type) {
urlTypeCache.set(url, type); // 成功したものだけキャッシュ
}
return type;
}
catch {
return undefined; // ネットワークエラー時はキャッシュしない
}
}
//# sourceMappingURL=parseContent.js.map