UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

github.com/cdxgen/cdxgen

494 lines (441 loc) • 18.6 kB

JavaScript

import { strict as assert } from "node:assert"; import { describe, test } from "poku"; import { IriValidationStrategy, parseIRI, validateIri } from "./iri.js"; const VALID_ABSOLUTE_IRIS = [ "file://foo", "ftp://ftp.is.co.za/rfc/rfc1808.txt", "http://www.ietf.org/rfc/rfc2396.txt", "mailto:John.Doe@example.com", "news:comp.infosystems.www.servers.unix", "tel:+1-816-555-1212", "telnet://192.0.2.16:80/", "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", "http://example.com", "http://example.com/", "http://example.com/foo", "http://example.com/foo/bar", "http://example.com/foo/bar/", "http://example.com/foo/bar?q=1&r=2", "http://example.com/foo/bar/?q=1&r=2", "http://example.com#toto", "http://example.com/#toto", "http://example.com/foo#toto", "http://example.com/foo/bar#toto", "http://example.com/foo/bar/#toto", "http://example.com/foo/bar?q=1&r=2#toto", "http://example.com/foo/bar/?q=1&r=2#toto", "http://example.com/foo/bar/.././baz", "file:///foo/bar", "mailto:user@host?subject=blah", "http://www.yahoo.com", "http://www.yahoo.com/", "http://1.2.3.4/", "http://www.yahoo.com/stuff", "http://www.yahoo.com/stuff/", "http://www.yahoo.com/hello%20world/", "http://www.yahoo.com?name=obi", "http://www.yahoo.com?name=obi+wan&status=jedi", "http://www.yahoo.com?onery", "http://www.yahoo.com#bottom", "http://www.yahoo.com/yelp.html#bottom", "ftp://www.yahoo.com/", "ftp://www.yahoo.com/hello", "http://www.yahoo.com?name=%00%01", "http://www.yaho%6f.com", // Lowercase hex in percent encoding "http://www.yahoo.com/hello%00world/", "http://www.yahoo.com/hello+world/", "http://www.yahoo.com?name=obi&", "http://www.yahoo.com?name=obi&type=", "http://www.yahoo.com/yelp.html#", "http://example.org/aaa/bbb#ccc", "mailto:local@domain.org", "mailto:local@domain.org#frag", "HTTP://EXAMPLE.ORG/AAA/BBB#CCC", "http://example.org/aaa%2fbbb#ccc", "http://example.org/aaa%2Fbbb#ccc", "http://example.com/%2F", "http://example.com/?%2F", "http://example.com/#?%2F", "http://example.com/aaa%2Fbbb", "http://example.org:80/aaa/bbb#ccc", "http://example.org:/aaa/bbb#ccc", "http://example.org./aaa/bbb#ccc", "http://example.123./aaa/bbb#ccc", "http://example.org", "http://example/Andrȷ", // HTML entity in path (treated as literal) "file:///C:/DEV/Haskell/lib/HXmlToolbox-3.01/examples/", // HTTPS "https://secure.example.com/", "https://example.com:443/path?query=value#frag", // WebSockets "ws://websocket.example.com/socket", "wss://secure.websocket.example.com/socket", // LDAP "ldap://ldap.example.com/dc=example,dc=com", // IPv6 literals "http://[2001:db8::1]/", "http://[::1]:8080/path", "https://[2001:db8::1]:8443/secure", // Unicode in path/query/fragment "http://example.com/路径/测试", "http://example.com/search?q=搜索词", "http://example.com/page#章节", // Complex userinfo "http://user:pass@example.com:8080/path?query=1#frag", "http://user@example.com/path", // Empty components "http://example.com?", "http://example.com#", "http://example.com/?", "http://example.com/#", // Special characters in path "http://example.com/path;param=value", "http://example.com/~user", "http://example.com/$path", "http://example.com/path,with,commas", // Percent-encoding variations "http://example.com/%E2%9C%93", // ✓ checkmark "http://example.com/%F0%9F%98%8A", // 😊 emoji "http://example.com/p%C3%A5th", // 'å' in UTF-8 // Query strings with special values "http://example.com/path?a=b=c&d=e%26f", // value of d is 'e&f' "http://example.com/path?param=value%23withhash", // '#' is %23 // Fragments with special content "http://example.com/path#section?notquery", // '?' is allowed in fragment "http://example.com/path#fragment%20with%20space", // Authority with trailing dot "http://example.com./", // Port edge cases "http://example.com:0/", "http://example.com:65535/", // Query with no value "http://example.com/path?a&b=c", "http://example.com/path?a=&b=c", // Fragment-only navigation "http://example.com/path#onlyfragment", // Path with encoded slash "http://example.com/path%2Fto%2Fresource", // Percent-encoded uppercase/lowercase mix "http://example.com/p%C3%A4th", // ä "http://example.com/p%e2%82%ac", // € (Euro sign) // Multiple slashes in path (valid) "http://example.com/a//b///c////d", // Colon in path (valid in absolute IRIs) "http://example.com/some:thing", "http://example.com/path:to:resource", // At symbol in path (valid) "http://example.com/user@example.com", "http://example.com/path@boo", // Query with equals in value "http://example.com/path?filter=category:books&sort=date", // Fragment with encoded characters "http://example.com/page#%E2%9C%93", // ✓ in fragment ]; // IRIs that should fail for both Pragmatic and Strict/Parse strategies const ALWAYS_INVALID_ABSOLUTE_IRIS = [ "", "foo", // No scheme "http://example.com/beepbeep\u0007\u0007", // Control characters "http://example.com/\n", // Control character ]; // IRIs that should fail for Strict/Parse but might pass Pragmatic // Note: Some original comments suggested these might be invalid per Strict, // but the original test expected them to pass Strict. Adjusted based on RFC 3987. const STRICTLY_INVALID_ABSOLUTE_IRIS = [ "http://www yahoo.com", // Space in authority (not % encoded) "http://www.yahoo.com/hello world/", // Space in path (not % encoded) "http://www.yahoo.com/yelp.html#\"'", // Quote in fragment (not % encoded) "http://example.com/ ", // Space in path (not % encoded) "http://example.com/%", // Incomplete percent encoding "http://example.com/A%Z", // Invalid hex in percent encoding "http://example.com/%ZZ", // Invalid hex in percent encoding "http://example.com/%AZ", // Invalid hex in percent encoding "http://example.com/A C", // Space in path (not % encoded) "http://example.com/A`C", // Backtick not generally allowed unencoded in path "http://example.com/A<C", // Less-than not allowed unencoded "http://example.com/A>C", // Greater-than not allowed unencoded "http://example.com/A^C", // Caret not generally allowed unencoded in path "http://example.com/A\\C", // Backslash not allowed unencoded "http://example.com/A{C", // Left brace not generally allowed unencoded in path "http://example.com/A|C", // Pipe not allowed unencoded "http://example.com/A}C", // Right brace not generally allowed unencoded in path "http://example.com/A[C", // Left bracket not generally allowed unencoded in path (outside IPv6) "http://example.com/A]C", // Right bracket not generally allowed unencoded in path (outside IPv6) "http://example.com/A[**]C", // Brackets with content not allowed unencoded in path "http://[xyz]/", // Invalid IPv6 "http://]/", // Invalid authority start "http://example.org/[2010:836B:4179::836B:4179]", // IPv6 literal not in brackets in path "http://example.org/abc#[2010:836B:4179::836B:4179]", // IPv6 literal not in brackets in fragment "http://example.org/xxx/[qwerty]#a[b]", // Brackets with non-IPv6 content in path/fragment // Iprivate characters are NOT allowed in path or fragment (per RFC 3987) "http://example.com/\uE000", // Iprivate in path "http://example.com/#\uE000", // Iprivate in fragment // Bad characters based on RFC 3987 ranges (ucschar/iprivate) // These are simplified checks. Full validation is complex. // Control characters "http://\u0000", // Null char in scheme/host "http://example.com/\u0000", // Null char in path "http://example.com/?\u0000", // Null char in query "http://example.com/#\u0000", // Null char in fragment // Characters outside defined ranges (simplified examples) // Note: Full range checking is complex in JS. These are indicative. // '\uFFFF' is often a non-character // 'http://\uFFFF', // Non-character in scheme/host // 'http://example.com/?\uFFFF', // Non-character in query // Bad host structure "http://[/", // Malformed IPv6 start "http://[::1]a/", // Garbage after IPv6 literal // Fuzzing examples (simplified representation) // 'http://\u034F@[]', // Combining grapheme joiner, malformed authority // Represented more simply: "http://@[]", // Empty userinfo, empty host ]; describe("IRI Parser and Validator", () => { describe("Valid IRIs", () => { for (const iri of VALID_ABSOLUTE_IRIS) { test(`should validate '${iri}' as valid`, () => { // Test new Parse strategy const _parseResult = parseIRI(iri); // Test Parse validation strategy const parseError = validateIri(iri, IriValidationStrategy.Parse); assert.strictEqual( parseError, undefined, `Validate (Parse) failed: ${parseError?.message}`, ); // Test Pragmatic strategy const pragmaticError = validateIri( iri, IriValidationStrategy.Pragmatic, ); assert.strictEqual( pragmaticError, undefined, `Validate (Pragmatic) failed: ${pragmaticError?.message}`, ); }); } }); describe("Always Invalid IRIs", () => { for (const iri of ALWAYS_INVALID_ABSOLUTE_IRIS) { test(`should validate '${iri}' as invalid (All strategies)`, () => { // Test Parse strategy via parser const _parseResult = parseIRI(iri); // Test Parse validation strategy const parseError = validateIri(iri, IriValidationStrategy.Parse); assert.ok( parseError instanceof Error, `Validate (Parse) should have failed for '${iri}'`, ); // Test Pragmatic strategy const pragmaticError = validateIri( iri, IriValidationStrategy.Pragmatic, ); assert.ok( pragmaticError instanceof Error, `Validate (Pragmatic) should have failed for '${iri}'`, ); // Test Strict strategy const strictError = validateIri(iri, IriValidationStrategy.Strict); assert.ok( strictError instanceof Error, `Validate (Strict) should have failed for '${iri}'`, ); }); } }); describe("Strictly Invalid IRIs (RFC 3987 syntax)", () => { for (const iri of STRICTLY_INVALID_ABSOLUTE_IRIS) { test(`should validate '${iri}' as invalid (Parse/Strict strategies)`, () => { // Test Parse strategy via parser const _parseResult = parseIRI(iri); // Test Parse validation strategy (main focus) const parseError = validateIri(iri, IriValidationStrategy.Parse); assert.ok( parseError instanceof Error, `Validate (Parse) should have failed for '${iri}': ${parseError?.message}`, ); }); } }); describe("Edge Cases and Strategy Handling", () => { test("should handle invalid strategy gracefully", () => { const error = validateIri("http://example.com/", "foo"); assert.ok(error instanceof Error); assert.ok(error.message.includes("Not supported validation strategy")); }); test("should not validate with the none strategy", () => { assert.strictEqual( validateIri("", IriValidationStrategy.None), undefined, ); assert.strictEqual( validateIri("\n", IriValidationStrategy.None), undefined, ); assert.strictEqual( validateIri("http://example.com/\u0000", IriValidationStrategy.None), undefined, ); }); test("should identify structural errors in parsing", () => { // Missing scheme let result = parseIRI("notascheme"); assert.strictEqual(result.valid, false); assert.ok(result.error); // Missing colon after scheme result = parseIRI("http//example.com"); assert.strictEqual(result.valid, false); assert.ok(result.error); // Malformed authority start result = parseIRI("http://[invalid:::ipv6]"); // Parsing might fail here or later, but should be invalid // assert.strictEqual(result.valid, false); // Depends on robustness of parseHostPort // Incomplete components result = parseIRI("http://example.com/path??query"); // Might parse, but structure is odd. Parser should ideally handle robustly. // Key is that validateIri catches issues. }); }); }); // biome-ignore-start lint/style/useTemplate: This is a unit test // --- ReDoS Resilience Tests --- const REDOS_RESILIENCE_TESTS = [ // Very long scheme-like part (should fail quickly on missing ':') "a".repeat(100000) + "://example.com", // Authority with many '@' signs (tests findUserInfoEnd logic) "http://" + "user@".repeat(10000) + "example.com", // Authority with deeply nested brackets (tests IP literal logic robustness) "http://[" + "[".repeat(10000) + "xyz" + "]".repeat(10000) + "]/path", // Very long path segment (tests path segment validation loop) "http://example.com/" + "a".repeat(100000), // Very long query with repeated invalid patterns (tests iquery validation) "http://example.com/path?" + "invalid%".repeat(10000), // Very long fragment with repeated invalid patterns (tests ifragment validation) "http://example.com/path#" + "invalid%".repeat(10000), // Complex percent-encoding pattern that could trip up regex backtracking "http://example.com/" + "%A".repeat(50000), // Incomplete percent-encoding // Repeated groups that might stress regex engines "http://[" + "1234:5678:".repeat(10000) + "]/path", ]; // --- UNC Path Tests --- // UNC paths use the 'file' scheme. RFC 8089 defines the syntax. // file://host/path or file:///path (localhost) const UNC_PATH_TESTS = [ // Basic UNC path "file://server/share/file.txt", // UNC path with authority and path "file://hostname/path/to/resource", // Local file path (3 slashes) "file:///C:/Users/name/file.txt", "file:///etc/passwd", // UNC path with IPv4 literal "file://192.168.1.1/share/folder", // Edge case: file: with empty host and path "file:///", // Root // file: with just a scheme (edge case, might be valid as an empty opaque part) // "file:" // This is valid according to RFC 3987 if the scheme allows an empty path/authority ]; // --- Unicode and International Domain Name (IDN) Tests --- const UNICODE_IDN_TESTS = [ "http://例子.中国/path", // "example.china" "http://παράδειγμα.δοκιμή/προσωπικός_φάκελος/", // "example.test/personal_folder" "http://ουτοπία.δπθ.gr/οδηγίες.html", // "utopia.edu.gr/instructions.html" ]; const MORE_EDGE_CASE_TESTS = [ // Query containing '#' "http://example.com/path?param=value%23withhash", // '#' is %23 // Fragment containing '?' "http://example.com/path#section?notquery", // '?' is allowed in fragment // Path with '@' "http://example.com/path@boo", // '@' is allowed in path // Path with ':' "http://example.com/some:thing", // ':' is allowed in path (not at start of segment in relative IRIs, but absolute is okay) // Multiple consecutive slashes in path (valid) "http://example.com/a//b///c", // Percent-encoding case sensitivity (both %41 and %62 are valid for A and b) "http://example.com/p%C4%8Ath", // UTF-8 for 'č' // Percent-encoding normalization (should pass, even if not normalized) "http://example.com/p%61th", // 'a' is %61 // Query with '=', '&', in values "http://example.com/path?a=b=c&d=e%26f", // value of d is 'e&f' ]; // biome-ignore-end lint/style/useTemplate: This is a unit test describe("ReDoS Resilience", () => { for (const iri of REDOS_RESILIENCE_TESTS) { test(`should handle potentially ReDoS-inducing IRI quickly: ${iri.substring(0, 50)}...`, () => { // Use a simple time-based check to ensure it doesn't hang const start = Date.now(); const error = validateIri(iri, IriValidationStrategy.Parse); const duration = Date.now() - start; // Assert it finishes in a reasonable time (e.g., < 100ms) // Note: Time-based tests can be flaky in CI, consider adjusting threshold or skipping in CI assert.ok( duration < 100, `Validation took too long (${duration}ms): ${iri.substring(0, 50)}...`, ); // It should either be valid or invalid, but not hang or throw unexpectedly // Most of these should be invalid assert.ok( error instanceof Error || error === undefined, `Unexpected result for ReDoS test: ${error}`, ); // console.log(`ReDoS Test: '${iri.substring(0, 30)}...' -> ${error ? 'Invalid' : 'Valid'} (${duration}ms)`); // Optional logging }); } }); describe("UNC Paths", () => { for (const iri of UNC_PATH_TESTS) { test(`should parse and validate UNC path IRI: ${iri}`, () => { const parseResult = parseIRI(iri); assert.strictEqual( parseResult.valid, true, `Parsing failed for UNC path: ${parseResult.error}`, ); const validateError = validateIri(iri, IriValidationStrategy.Parse); assert.strictEqual( validateError, undefined, `Validation (Parse) failed for UNC path: ${validateError?.message}`, ); }); } }); describe("Unicode and IDN", () => { for (const iri of UNICODE_IDN_TESTS) { test(`should parse and validate Unicode/IDN IRI: ${iri}`, () => { const parseResult = parseIRI(iri); const validateError = validateIri(iri, IriValidationStrategy.Parse); assert.strictEqual( parseResult.valid, true, `Structural parsing failed for Unicode IRI: ${parseResult.error}`, ); // And check validation result separately if (validateError) { // This is expected if ucschar is not yet supported in validation // console.log(`Unicode IRI failed validation (expected if ucschar not supported): ${iri}`); } }); } }); describe("Additional Edge Cases", () => { for (const iri of MORE_EDGE_CASE_TESTS) { test(`should parse and validate edge case IRI: ${iri}`, () => { const parseResult = parseIRI(iri); assert.strictEqual( parseResult.valid, true, `Parsing failed for edge case: ${parseResult.error}`, ); const validateError = validateIri(iri, IriValidationStrategy.Parse); assert.strictEqual( validateError, undefined, `Validation (Parse) failed for edge case: ${validateError?.message}`, ); }); } });