UNPKG

@konemono/nostr-content-parser

Version:

Parse Nostr content into tokens

github.com/TsukemonoGit/nostr-content-parser

TsukemonoGit/nostr-content-parser

356 lines (316 loc) • 13.7 kB

JavaScript

import { describe, it, expect, beforeEach, vi } from "vitest"; import { parseContent, filterTokens, resetPatterns, parseContentAsync, } from "../src/parseContent.js"; import { isLightningAddress, findCustomEmojiMetadata, parseNipIdentifier, findLegacyReferenceMetadata, cleanUrlEnd, TokenType, // patterns.ts からの TokenType を区別するために別名でインポート } from "../src/patterns"; // Test data const TEST_NPUB = "npub1sjcvg64knxkrt6ev52rywzu9uzqakgy8ehhk8yezxmpewsthst6sw3jqcw"; const TEST_NOTE = "note10vns6zm2xecfs43n40fawleevvtqlc73dv3ptj5xr9nwysywjq9sjyv6r0"; const TEST_NEVENT = "nevent1qvzqqqqqqypzpp9sc34tdxdvxh4jeg5xgu9ctcypmvsg0n00vwfjydkrjaqh0qh4qyxhwumn8ghj77tpvf6jumt9qys8wumn8ghj7un9d3shjtt2wqhxummnw3ezuamfwfjkgmn9wshx5uqpzamhxue69uhkummnw3ezu6t5w3skumt09ekk2mspzdmhxue69uhhwmm59ehx7um5wghxuet5qyghwumn8ghj7mnxwfjkccte9eshquqqypajwrgtdgm8pxzkxw4a84ml8933vrlr694jy9w2scvkdcjq36gqknr2489"; const TEST_NPROFILE = "nprofile1qyxhwumn8ghj77tpvf6jumt9qqsgfvxyd2mfntp4avk29pj8pwz7pqwmyzrummmrjv3rdsuhg9mc9agccpc2g"; // describe("parseContent - Core Functionality", () => { beforeEach(() => { resetPatterns(); }); it("should parse plain text", () => { const content = "Hello world!"; const tokens = parseContent(content); expect(tokens).toEqual([ { type: TokenType.TEXT, content: "Hello world!", start: 0, end: 12, metadata: {}, }, ]); }); it("should parse NIP-19 entities with 'nostr:' prefix", () => { const content = `Hello nostr:${TEST_NPUB} world!`; const tokens = parseContent(content); expect(tokens).toHaveLength(3); expect(tokens[1]).toEqual({ type: TokenType.NIP19, content: `nostr:${TEST_NPUB}`, start: 6, end: 75, metadata: { hasNostrPrefix: true, subType: "npub", plainNip19: TEST_NPUB, }, }); }); it("should parse multiple NIP-19 entities without 'nostr:' prefix", () => { const content = `${TEST_NPUB} and ${TEST_NOTE}`; const tokens = parseContent(content, [], { includeNostrPrefixOnly: false }); expect(tokens).toHaveLength(3); expect(tokens[0].type).toBe(TokenType.NIP19); expect(tokens[2].type).toBe(TokenType.NIP19); }); it("should parse URLs", () => { const content = 'Check https://example.com and <script src="https://njump.me/embed/[nip-19-entity]" />'; const tokens = parseContent(content); expect(tokens).toHaveLength(5); expect(tokens[1].content).toBe("https://example.com"); expect(tokens[3].content).toBe("https://njump.me/embed/[nip-19-entity]"); }); it("should parse custom emojis with and without tags", () => { const content = "Hello :pepe: world :unknown_emoji:"; const tags = [["emoji", "pepe", "https://example.com/pepe.png"]]; const tokens = parseContent(content, tags); expect( tokens.filter((t) => t.type === TokenType.CUSTOM_EMOJI) ).toHaveLength(2); expect(tokens[1].metadata.url).toBe("https://example.com/pepe.png"); expect(tokens[3].metadata.url).toBeUndefined(); }); it("should parse hashtags", () => { const content = "Learning #nostr and #bitcoin today"; const tokens = parseContent(content, [], { hashtagsFromTagsOnly: false }); const hashtagTokens = tokens.filter((t) => t.type === TokenType.HASHTAG); expect(hashtagTokens).toHaveLength(2); expect(hashtagTokens[0].metadata.tag).toBe("nostr"); expect(hashtagTokens[1].metadata.tag).toBe("bitcoin"); }); }); describe("parseContent - Edge Cases & Mixed Content", () => { it("should handle empty or null content", () => { expect(parseContent("")).toHaveLength(0); expect(parseContent(null)).toHaveLength(0); }); it("should clean trailing punctuation from URLs", () => { const content = "Visit https://google.com."; const tokens = parseContent(content); expect(tokens[1].content).toBe("https://google.com"); }); it("should handle URLs inside parentheses", () => { const content = "Look here (https://en.wikipedia.org). https://example.com/path.) (https://example.com/path.)"; const tokens = parseContent(content); expect(tokens[1].content).toBe("https://en.wikipedia.org"); expect(tokens[2].content).toBe(")"); expect(tokens[3].content).toBe(". "); expect(tokens[4].content).toBe("https://example.com/path"); expect(tokens[7].content).toBe("https://example.com/path"); }); it("should handle URLs with IP addresses and ports", () => { const content = "Server address is http://192.168.1.1:8080/api/data"; const tokens = parseContent(content); expect(tokens[1].content).toBe("http://192.168.1.1:8080/api/data"); }); // URLの直後に絵文字が続く場合は特殊だからとりあえずいいかぁポート番号で:つくことあってむずそうだし it.skip("should separate URL and custom emoji correctly", () => { const content = "This is great! http://example.org/:great_emoji:"; const tags = [["emoji", "great_emoji", "https://example.org/great.png"]]; const tokens = parseContent(content, tags); expect(tokens).toHaveLength(3); expect(tokens[1].content).toBe("http://example.org/"); expect(tokens[2].content).toBe(":great_emoji:"); }); it("should handle malformed NIP-19 entities as text", () => { const content = "npub1short note1toolong123456789012345678901234567890123456789012345678901234567890"; const tokens = parseContent(content); expect(tokens.every((t) => t.type === TokenType.TEXT)).toBe(true); }); }); describe("parseContent - Cryptocurrency Parsing", () => { it("should parse Lightning addresses and distinguish from emails", () => { const content = "Send sats to alice@getalby.com or contact me at alice@gmail.com"; const tokens = parseContent(content); const lnAddresses = tokens.filter((t) => t.type === TokenType.LN_ADDRESS); const emails = tokens.filter((t) => t.type === TokenType.EMAIL); expect(lnAddresses).toHaveLength(1); expect(lnAddresses[0].content).toBe("alice@getalby.com"); expect(emails).toHaveLength(1); expect(emails[0].content).toBe("alice@gmail.com"); }); it("should parse Bitcoin addresses of different types", () => { const content = "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa or bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4"; const tokens = parseContent(content); const btcAddresses = tokens.filter( (t) => t.type === TokenType.BITCOIN_ADDRESS ); expect(btcAddresses).toHaveLength(2); expect(btcAddresses[0].metadata.addressType).toBe("legacy"); expect(btcAddresses[1].metadata.addressType).toBe("bech32"); }); it("should parse Lightning URLs and Cashu tokens with full strings", () => { const fullLnUrl = "lnurl1dp68gurn8ghj7amfdchxummpw3ek2urx4a6x2mrwdanx4a6hjem9wa6hjem9w3ezuamfdeuaxenp05gqmp"; const fullCashuToken = "cashuAeyJ0b2tlbiI6W3sicHJvb2ZzIjpbeyJpZCI6IjAwOWExZjI5MzI1M2U0MWUiLCJhbW91bnQiOjIsInNlY3JldCI6IjQwNzkxNWJjMjEyYmUxMDFkZDMxMzA5MzMxNGU3MzQ0MjA2MzQyM2VhNGU5NzY5ZGE3NTg1NzM5NjA2NzQyYWIiLCJDIjoiMDJiYzkwOTc5OTdkODFhZmIyY2MxNDAzNGUyNzNhNzEyZDUzMDJlMTU1MGI5OWY0NzI0YjA4OWQxNzNhZGU3OGZjIn1dLCJtaW50IjoiaHR0cHM6Ly9taW50LXRlc3QuZXhhbXBsZS5jb20ifV0sIm1lbW8iOiJjYXNodSBwYXltZW50In0="; const content = `LNURL: ${fullLnUrl} Cashu: ${fullCashuToken}`; const tokens = parseContent(content); const lnUrlToken = tokens.find((t) => t.type === TokenType.LN_URL); const cashuToken = tokens.find((t) => t.type === TokenType.CASHU_TOKEN); expect(lnUrlToken).toBeDefined(); expect(lnUrlToken.content).toBe(fullLnUrl); expect(cashuToken).toBeDefined(); expect(cashuToken.content).toBe(fullCashuToken); }); }); describe("parseContent - New Patterns & Special Cases", () => { it("should parse NIP-A0, NIP-B1 identifiers", () => { const content = "This is NIP-01 and NIP-C7, also NIP-A0 and NIP-B1."; const tokens = parseContent(content); const nipTokens = tokens.filter((t) => t.type === TokenType.NIP_IDENTIFIER); expect(nipTokens).toHaveLength(4); expect(nipTokens[0].content).toBe("NIP-01"); expect(nipTokens[1].content).toBe("NIP-C7"); expect(nipTokens[2].content).toBe("NIP-A0"); expect(nipTokens[3].content).toBe("NIP-B1"); }); it("should parse legacy references like #[0]", () => { const content = "Check out #[0] for more info."; const tags = [ ["p", TEST_NPUB], ["e", TEST_NOTE], ]; const tokens = parseContent(content, tags); const legacyToken = tokens.find( (t) => t.type === TokenType.LEGACY_REFERENCE ); expect(legacyToken).toBeDefined(); expect(legacyToken.content).toBe("#[0]"); expect(legacyToken.metadata.tagType).toBe("p"); expect(legacyToken.metadata.referenceId).toBe(TEST_NPUB); }); it("should handle out-of-bounds legacy references", () => { const content = "Check out #[99] which does not exist."; const tags = [["p", TEST_NPUB]]; const tokens = parseContent(content, tags); const legacyToken = tokens.find( (t) => t.type === TokenType.LEGACY_REFERENCE ); expect(legacyToken).toBeDefined(); expect(legacyToken.metadata.tagIndex).toBe(99); expect(legacyToken.metadata.tagType).toBeUndefined(); }); }); describe("parseContent - Media Type Detection", () => { it("should detect image type based on file extension", () => { const content = "This is an image: https://example.com/photo.png"; const tokens = parseContent(content); const urlToken = tokens.find((t) => t.type === TokenType.URL); expect(urlToken?.metadata?.type).toBe("image"); }); it("should detect video type using async HEAD request", async () => { globalThis.fetch = vi.fn().mockResolvedValueOnce({ headers: { get: (key) => key.toLowerCase() === "content-type" ? "video/mp4" : null, }, }); const content = "https://example.com/videofile"; const tokens = await parseContentAsync(content); const urlToken = tokens.find((t) => t.type === TokenType.URL); expect(urlToken?.metadata?.type).toBe("video"); expect(globalThis.fetch).toHaveBeenCalledWith(content, { method: "HEAD" }); }); it("should not perform async request with parseContent", () => { const fetchSpy = vi.fn(); globalThis.fetch = fetchSpy; const content = "https://example.com/unknown"; parseContent(content); expect(fetchSpy).not.toHaveBeenCalled(); }); }); describe("Utility Functions", () => { const sampleTokens = [ { type: TokenType.TEXT, content: "hello" }, { type: TokenType.NIP19, content: TEST_NPUB }, { type: TokenType.URL, content: "https://example.com" }, { type: TokenType.HASHTAG, content: "#nostr", metadata: { tag: "nostr" } }, ]; it("should filter tokens by single type", () => { const textTokens = filterTokens(sampleTokens, TokenType.TEXT); expect(textTokens).toHaveLength(1); expect(textTokens[0].type).toBe(TokenType.TEXT); }); it("should filter tokens by multiple types", () => { const filtered = filterTokens(sampleTokens, [ TokenType.NIP19, TokenType.HASHTAG, ]); expect(filtered).toHaveLength(2); expect(filtered.map((t) => t.type)).toEqual([ TokenType.NIP19, TokenType.HASHTAG, ]); }); it("should check if an email is a Lightning Address", () => { expect(isLightningAddress("satoshi@getalby.com")).toBe(true); expect(isLightningAddress("alice@gmail.com")).toBe(false); }); it("should find custom emoji metadata from tags", () => { const tags = [["emoji", "pepe", "https://example.com/pepe.png"]]; expect(findCustomEmojiMetadata("pepe", tags)?.url).toBe( "https://example.com/pepe.png" ); expect(findCustomEmojiMetadata("unknown", tags)).toBeNull(); }); it("should parse NIP identifiers correctly", () => { const nip01 = parseNipIdentifier("NIP-01"); expect(nip01.number).toBe("01"); expect(nip01.hasAlpha).toBe(false); expect(nip01.hasDigit).toBe(true); const nipA0 = parseNipIdentifier("NIP-A0"); expect(nipA0.number).toBe("A0"); expect(nipA0.hasAlpha).toBe(true); expect(nipA0.hasDigit).toBe(true); }); it("should find legacy reference metadata from tags", () => { const tags = [ ["p", TEST_NPUB], ["e", TEST_NOTE], ]; const metadataP = findLegacyReferenceMetadata("#[0]", tags); expect(metadataP?.tagIndex).toBe(0); expect(metadataP?.tagType).toBe("p"); expect(metadataP?.referenceId).toBe(TEST_NPUB); expect(metadataP?.referenceType).toBe("npub"); const metadataE = findLegacyReferenceMetadata("#[1]", tags); expect(metadataE?.tagIndex).toBe(1); expect(metadataE?.tagType).toBe("e"); expect(metadataE?.referenceId).toBe(TEST_NOTE); expect(metadataE?.referenceType).toBe("note"); const metadataOutOfBounds = findLegacyReferenceMetadata("#[99]", tags); expect(metadataOutOfBounds?.tagIndex).toBe(99); expect(metadataOutOfBounds?.tagType).toBeUndefined(); }); it("should clean trailing characters from URL", () => { expect(cleanUrlEnd("https://example.com.")).toBe("https://example.com"); expect(cleanUrlEnd("https://example.com/path).")).toBe( "https://example.com/path" ); expect(cleanUrlEnd("https://example.com/test?a=1]")).toBe( "https://example.com/test?a=1" ); expect(cleanUrlEnd("https://example.com/path...")).toBe( "https://example.com/path" ); expect(cleanUrlEnd("http://nostr.com/path.note1.")).toBe( "http://nostr.com/path.note1" ); }); });