@konemono/nostr-content-parser
Version:
Parse Nostr content into tokens
356 lines (316 loc) • 13.7 kB
JavaScript
import { describe, it, expect, beforeEach, vi } from "vitest";
import {
parseContent,
filterTokens,
resetPatterns,
parseContentAsync,
} from "../src/parseContent.js";
import {
isLightningAddress,
findCustomEmojiMetadata,
parseNipIdentifier,
findLegacyReferenceMetadata,
cleanUrlEnd,
TokenType, // patterns.ts からの TokenType を区別するために別名でインポート
} from "../src/patterns";
// Test data
const TEST_NPUB =
"npub1sjcvg64knxkrt6ev52rywzu9uzqakgy8ehhk8yezxmpewsthst6sw3jqcw";
const TEST_NOTE =
"note10vns6zm2xecfs43n40fawleevvtqlc73dv3ptj5xr9nwysywjq9sjyv6r0";
const TEST_NEVENT =
"nevent1qvzqqqqqqypzpp9sc34tdxdvxh4jeg5xgu9ctcypmvsg0n00vwfjydkrjaqh0qh4qyxhwumn8ghj77tpvf6jumt9qys8wumn8ghj7un9d3shjtt2wqhxummnw3ezuamfwfjkgmn9wshx5uqpzamhxue69uhkummnw3ezu6t5w3skumt09ekk2mspzdmhxue69uhhwmm59ehx7um5wghxuet5qyghwumn8ghj7mnxwfjkccte9eshquqqypajwrgtdgm8pxzkxw4a84ml8933vrlr694jy9w2scvkdcjq36gqknr2489";
const TEST_NPROFILE =
"nprofile1qyxhwumn8ghj77tpvf6jumt9qqsgfvxyd2mfntp4avk29pj8pwz7pqwmyzrummmrjv3rdsuhg9mc9agccpc2g";
//
describe("parseContent - Core Functionality", () => {
beforeEach(() => {
resetPatterns();
});
it("should parse plain text", () => {
const content = "Hello world!";
const tokens = parseContent(content);
expect(tokens).toEqual([
{
type: TokenType.TEXT,
content: "Hello world!",
start: 0,
end: 12,
metadata: {},
},
]);
});
it("should parse NIP-19 entities with 'nostr:' prefix", () => {
const content = `Hello nostr:${TEST_NPUB} world!`;
const tokens = parseContent(content);
expect(tokens).toHaveLength(3);
expect(tokens[1]).toEqual({
type: TokenType.NIP19,
content: `nostr:${TEST_NPUB}`,
start: 6,
end: 75,
metadata: {
hasNostrPrefix: true,
subType: "npub",
plainNip19: TEST_NPUB,
},
});
});
it("should parse multiple NIP-19 entities without 'nostr:' prefix", () => {
const content = `${TEST_NPUB} and ${TEST_NOTE}`;
const tokens = parseContent(content, [], { includeNostrPrefixOnly: false });
expect(tokens).toHaveLength(3);
expect(tokens[0].type).toBe(TokenType.NIP19);
expect(tokens[2].type).toBe(TokenType.NIP19);
});
it("should parse URLs", () => {
const content =
'Check https://example.com and <script src="https://njump.me/embed/[nip-19-entity]" />';
const tokens = parseContent(content);
expect(tokens).toHaveLength(5);
expect(tokens[1].content).toBe("https://example.com");
expect(tokens[3].content).toBe("https://njump.me/embed/[nip-19-entity]");
});
it("should parse custom emojis with and without tags", () => {
const content = "Hello :pepe: world :unknown_emoji:";
const tags = [["emoji", "pepe", "https://example.com/pepe.png"]];
const tokens = parseContent(content, tags);
expect(
tokens.filter((t) => t.type === TokenType.CUSTOM_EMOJI)
).toHaveLength(2);
expect(tokens[1].metadata.url).toBe("https://example.com/pepe.png");
expect(tokens[3].metadata.url).toBeUndefined();
});
it("should parse hashtags", () => {
const content = "Learning #nostr and #bitcoin today";
const tokens = parseContent(content, [], { hashtagsFromTagsOnly: false });
const hashtagTokens = tokens.filter((t) => t.type === TokenType.HASHTAG);
expect(hashtagTokens).toHaveLength(2);
expect(hashtagTokens[0].metadata.tag).toBe("nostr");
expect(hashtagTokens[1].metadata.tag).toBe("bitcoin");
});
});
describe("parseContent - Edge Cases & Mixed Content", () => {
it("should handle empty or null content", () => {
expect(parseContent("")).toHaveLength(0);
expect(parseContent(null)).toHaveLength(0);
});
it("should clean trailing punctuation from URLs", () => {
const content = "Visit https://google.com.";
const tokens = parseContent(content);
expect(tokens[1].content).toBe("https://google.com");
});
it("should handle URLs inside parentheses", () => {
const content =
"Look here (https://en.wikipedia.org). https://example.com/path.) (https://example.com/path.)";
const tokens = parseContent(content);
expect(tokens[1].content).toBe("https://en.wikipedia.org");
expect(tokens[2].content).toBe(")");
expect(tokens[3].content).toBe(". ");
expect(tokens[4].content).toBe("https://example.com/path");
expect(tokens[7].content).toBe("https://example.com/path");
});
it("should handle URLs with IP addresses and ports", () => {
const content = "Server address is http://192.168.1.1:8080/api/data";
const tokens = parseContent(content);
expect(tokens[1].content).toBe("http://192.168.1.1:8080/api/data");
});
// URLの直後に絵文字が続く場合は特殊だからとりあえずいいかぁ ポート番号で:つくことあってむずそうだし
it.skip("should separate URL and custom emoji correctly", () => {
const content = "This is great! http://example.org/:great_emoji:";
const tags = [["emoji", "great_emoji", "https://example.org/great.png"]];
const tokens = parseContent(content, tags);
expect(tokens).toHaveLength(3);
expect(tokens[1].content).toBe("http://example.org/");
expect(tokens[2].content).toBe(":great_emoji:");
});
it("should handle malformed NIP-19 entities as text", () => {
const content =
"npub1short note1toolong123456789012345678901234567890123456789012345678901234567890";
const tokens = parseContent(content);
expect(tokens.every((t) => t.type === TokenType.TEXT)).toBe(true);
});
});
describe("parseContent - Cryptocurrency Parsing", () => {
it("should parse Lightning addresses and distinguish from emails", () => {
const content =
"Send sats to alice@getalby.com or contact me at alice@gmail.com";
const tokens = parseContent(content);
const lnAddresses = tokens.filter((t) => t.type === TokenType.LN_ADDRESS);
const emails = tokens.filter((t) => t.type === TokenType.EMAIL);
expect(lnAddresses).toHaveLength(1);
expect(lnAddresses[0].content).toBe("alice@getalby.com");
expect(emails).toHaveLength(1);
expect(emails[0].content).toBe("alice@gmail.com");
});
it("should parse Bitcoin addresses of different types", () => {
const content =
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa or bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4";
const tokens = parseContent(content);
const btcAddresses = tokens.filter(
(t) => t.type === TokenType.BITCOIN_ADDRESS
);
expect(btcAddresses).toHaveLength(2);
expect(btcAddresses[0].metadata.addressType).toBe("legacy");
expect(btcAddresses[1].metadata.addressType).toBe("bech32");
});
it("should parse Lightning URLs and Cashu tokens with full strings", () => {
const fullLnUrl =
"lnurl1dp68gurn8ghj7amfdchxummpw3ek2urx4a6x2mrwdanx4a6hjem9wa6hjem9w3ezuamfdeuaxenp05gqmp";
const fullCashuToken =
"cashuAeyJ0b2tlbiI6W3sicHJvb2ZzIjpbeyJpZCI6IjAwOWExZjI5MzI1M2U0MWUiLCJhbW91bnQiOjIsInNlY3JldCI6IjQwNzkxNWJjMjEyYmUxMDFkZDMxMzA5MzMxNGU3MzQ0MjA2MzQyM2VhNGU5NzY5ZGE3NTg1NzM5NjA2NzQyYWIiLCJDIjoiMDJiYzkwOTc5OTdkODFhZmIyY2MxNDAzNGUyNzNhNzEyZDUzMDJlMTU1MGI5OWY0NzI0YjA4OWQxNzNhZGU3OGZjIn1dLCJtaW50IjoiaHR0cHM6Ly9taW50LXRlc3QuZXhhbXBsZS5jb20ifV0sIm1lbW8iOiJjYXNodSBwYXltZW50In0=";
const content = `LNURL: ${fullLnUrl} Cashu: ${fullCashuToken}`;
const tokens = parseContent(content);
const lnUrlToken = tokens.find((t) => t.type === TokenType.LN_URL);
const cashuToken = tokens.find((t) => t.type === TokenType.CASHU_TOKEN);
expect(lnUrlToken).toBeDefined();
expect(lnUrlToken.content).toBe(fullLnUrl);
expect(cashuToken).toBeDefined();
expect(cashuToken.content).toBe(fullCashuToken);
});
});
describe("parseContent - New Patterns & Special Cases", () => {
it("should parse NIP-A0, NIP-B1 identifiers", () => {
const content = "This is NIP-01 and NIP-C7, also NIP-A0 and NIP-B1.";
const tokens = parseContent(content);
const nipTokens = tokens.filter((t) => t.type === TokenType.NIP_IDENTIFIER);
expect(nipTokens).toHaveLength(4);
expect(nipTokens[0].content).toBe("NIP-01");
expect(nipTokens[1].content).toBe("NIP-C7");
expect(nipTokens[2].content).toBe("NIP-A0");
expect(nipTokens[3].content).toBe("NIP-B1");
});
it("should parse legacy references like #[0]", () => {
const content = "Check out #[0] for more info.";
const tags = [
["p", TEST_NPUB],
["e", TEST_NOTE],
];
const tokens = parseContent(content, tags);
const legacyToken = tokens.find(
(t) => t.type === TokenType.LEGACY_REFERENCE
);
expect(legacyToken).toBeDefined();
expect(legacyToken.content).toBe("#[0]");
expect(legacyToken.metadata.tagType).toBe("p");
expect(legacyToken.metadata.referenceId).toBe(TEST_NPUB);
});
it("should handle out-of-bounds legacy references", () => {
const content = "Check out #[99] which does not exist.";
const tags = [["p", TEST_NPUB]];
const tokens = parseContent(content, tags);
const legacyToken = tokens.find(
(t) => t.type === TokenType.LEGACY_REFERENCE
);
expect(legacyToken).toBeDefined();
expect(legacyToken.metadata.tagIndex).toBe(99);
expect(legacyToken.metadata.tagType).toBeUndefined();
});
});
describe("parseContent - Media Type Detection", () => {
it("should detect image type based on file extension", () => {
const content = "This is an image: https://example.com/photo.png";
const tokens = parseContent(content);
const urlToken = tokens.find((t) => t.type === TokenType.URL);
expect(urlToken?.metadata?.type).toBe("image");
});
it("should detect video type using async HEAD request", async () => {
globalThis.fetch = vi.fn().mockResolvedValueOnce({
headers: {
get: (key) =>
key.toLowerCase() === "content-type" ? "video/mp4" : null,
},
});
const content = "https://example.com/videofile";
const tokens = await parseContentAsync(content);
const urlToken = tokens.find((t) => t.type === TokenType.URL);
expect(urlToken?.metadata?.type).toBe("video");
expect(globalThis.fetch).toHaveBeenCalledWith(content, { method: "HEAD" });
});
it("should not perform async request with parseContent", () => {
const fetchSpy = vi.fn();
globalThis.fetch = fetchSpy;
const content = "https://example.com/unknown";
parseContent(content);
expect(fetchSpy).not.toHaveBeenCalled();
});
});
describe("Utility Functions", () => {
const sampleTokens = [
{ type: TokenType.TEXT, content: "hello" },
{ type: TokenType.NIP19, content: TEST_NPUB },
{ type: TokenType.URL, content: "https://example.com" },
{ type: TokenType.HASHTAG, content: "#nostr", metadata: { tag: "nostr" } },
];
it("should filter tokens by single type", () => {
const textTokens = filterTokens(sampleTokens, TokenType.TEXT);
expect(textTokens).toHaveLength(1);
expect(textTokens[0].type).toBe(TokenType.TEXT);
});
it("should filter tokens by multiple types", () => {
const filtered = filterTokens(sampleTokens, [
TokenType.NIP19,
TokenType.HASHTAG,
]);
expect(filtered).toHaveLength(2);
expect(filtered.map((t) => t.type)).toEqual([
TokenType.NIP19,
TokenType.HASHTAG,
]);
});
it("should check if an email is a Lightning Address", () => {
expect(isLightningAddress("satoshi@getalby.com")).toBe(true);
expect(isLightningAddress("alice@gmail.com")).toBe(false);
});
it("should find custom emoji metadata from tags", () => {
const tags = [["emoji", "pepe", "https://example.com/pepe.png"]];
expect(findCustomEmojiMetadata("pepe", tags)?.url).toBe(
"https://example.com/pepe.png"
);
expect(findCustomEmojiMetadata("unknown", tags)).toBeNull();
});
it("should parse NIP identifiers correctly", () => {
const nip01 = parseNipIdentifier("NIP-01");
expect(nip01.number).toBe("01");
expect(nip01.hasAlpha).toBe(false);
expect(nip01.hasDigit).toBe(true);
const nipA0 = parseNipIdentifier("NIP-A0");
expect(nipA0.number).toBe("A0");
expect(nipA0.hasAlpha).toBe(true);
expect(nipA0.hasDigit).toBe(true);
});
it("should find legacy reference metadata from tags", () => {
const tags = [
["p", TEST_NPUB],
["e", TEST_NOTE],
];
const metadataP = findLegacyReferenceMetadata("#[0]", tags);
expect(metadataP?.tagIndex).toBe(0);
expect(metadataP?.tagType).toBe("p");
expect(metadataP?.referenceId).toBe(TEST_NPUB);
expect(metadataP?.referenceType).toBe("npub");
const metadataE = findLegacyReferenceMetadata("#[1]", tags);
expect(metadataE?.tagIndex).toBe(1);
expect(metadataE?.tagType).toBe("e");
expect(metadataE?.referenceId).toBe(TEST_NOTE);
expect(metadataE?.referenceType).toBe("note");
const metadataOutOfBounds = findLegacyReferenceMetadata("#[99]", tags);
expect(metadataOutOfBounds?.tagIndex).toBe(99);
expect(metadataOutOfBounds?.tagType).toBeUndefined();
});
it("should clean trailing characters from URL", () => {
expect(cleanUrlEnd("https://example.com.")).toBe("https://example.com");
expect(cleanUrlEnd("https://example.com/path).")).toBe(
"https://example.com/path"
);
expect(cleanUrlEnd("https://example.com/test?a=1]")).toBe(
"https://example.com/test?a=1"
);
expect(cleanUrlEnd("https://example.com/path...")).toBe(
"https://example.com/path"
);
expect(cleanUrlEnd("http://nostr.com/path.note1.")).toBe(
"http://nostr.com/path.note1"
);
});
});