@cyclonedx/cdxgen
Version:
Creates CycloneDX Software Bill of Materials (SBOM) from source or container image
494 lines (441 loc) • 18.6 kB
JavaScript
import { strict as assert } from "node:assert";
import { describe, test } from "poku";
import { IriValidationStrategy, parseIRI, validateIri } from "./iri.js";
const VALID_ABSOLUTE_IRIS = [
"file://foo",
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"http://example.com",
"http://example.com/",
"http://example.com/foo",
"http://example.com/foo/bar",
"http://example.com/foo/bar/",
"http://example.com/foo/bar?q=1&r=2",
"http://example.com/foo/bar/?q=1&r=2",
"http://example.com#toto",
"http://example.com/#toto",
"http://example.com/foo#toto",
"http://example.com/foo/bar#toto",
"http://example.com/foo/bar/#toto",
"http://example.com/foo/bar?q=1&r=2#toto",
"http://example.com/foo/bar/?q=1&r=2#toto",
"http://example.com/foo/bar/.././baz",
"file:///foo/bar",
"mailto:user@host?subject=blah",
"http://www.yahoo.com",
"http://www.yahoo.com/",
"http://1.2.3.4/",
"http://www.yahoo.com/stuff",
"http://www.yahoo.com/stuff/",
"http://www.yahoo.com/hello%20world/",
"http://www.yahoo.com?name=obi",
"http://www.yahoo.com?name=obi+wan&status=jedi",
"http://www.yahoo.com?onery",
"http://www.yahoo.com#bottom",
"http://www.yahoo.com/yelp.html#bottom",
"ftp://www.yahoo.com/",
"ftp://www.yahoo.com/hello",
"http://www.yahoo.com?name=%00%01",
"http://www.yaho%6f.com", // Lowercase hex in percent encoding
"http://www.yahoo.com/hello%00world/",
"http://www.yahoo.com/hello+world/",
"http://www.yahoo.com?name=obi&",
"http://www.yahoo.com?name=obi&type=",
"http://www.yahoo.com/yelp.html#",
"http://example.org/aaa/bbb#ccc",
"mailto:local@domain.org",
"mailto:local@domain.org#frag",
"HTTP://EXAMPLE.ORG/AAA/BBB#CCC",
"http://example.org/aaa%2fbbb#ccc",
"http://example.org/aaa%2Fbbb#ccc",
"http://example.com/%2F",
"http://example.com/?%2F",
"http://example.com/#?%2F",
"http://example.com/aaa%2Fbbb",
"http://example.org:80/aaa/bbb#ccc",
"http://example.org:/aaa/bbb#ccc",
"http://example.org./aaa/bbb#ccc",
"http://example.123./aaa/bbb#ccc",
"http://example.org",
"http://example/Andrȷ", // HTML entity in path (treated as literal)
"file:///C:/DEV/Haskell/lib/HXmlToolbox-3.01/examples/",
// HTTPS
"https://secure.example.com/",
"https://example.com:443/path?query=value#frag",
// WebSockets
"ws://websocket.example.com/socket",
"wss://secure.websocket.example.com/socket",
// LDAP
"ldap://ldap.example.com/dc=example,dc=com",
// IPv6 literals
"http://[2001:db8::1]/",
"http://[::1]:8080/path",
"https://[2001:db8::1]:8443/secure",
// Unicode in path/query/fragment
"http://example.com/路径/测试",
"http://example.com/search?q=搜索词",
"http://example.com/page#章节",
// Complex userinfo
"http://user:pass@example.com:8080/path?query=1#frag",
"http://user@example.com/path",
// Empty components
"http://example.com?",
"http://example.com#",
"http://example.com/?",
"http://example.com/#",
// Special characters in path
"http://example.com/path;param=value",
"http://example.com/~user",
"http://example.com/$path",
"http://example.com/path,with,commas",
// Percent-encoding variations
"http://example.com/%E2%9C%93", // ✓ checkmark
"http://example.com/%F0%9F%98%8A", // 😊 emoji
"http://example.com/p%C3%A5th", // 'å' in UTF-8
// Query strings with special values
"http://example.com/path?a=b=c&d=e%26f", // value of d is 'e&f'
"http://example.com/path?param=value%23withhash", // '#' is %23
// Fragments with special content
"http://example.com/path#section?notquery", // '?' is allowed in fragment
"http://example.com/path#fragment%20with%20space",
// Authority with trailing dot
"http://example.com./",
// Port edge cases
"http://example.com:0/",
"http://example.com:65535/",
// Query with no value
"http://example.com/path?a&b=c",
"http://example.com/path?a=&b=c",
// Fragment-only navigation
"http://example.com/path#onlyfragment",
// Path with encoded slash
"http://example.com/path%2Fto%2Fresource",
// Percent-encoded uppercase/lowercase mix
"http://example.com/p%C3%A4th", // ä
"http://example.com/p%e2%82%ac", // € (Euro sign)
// Multiple slashes in path (valid)
"http://example.com/a//b///c////d",
// Colon in path (valid in absolute IRIs)
"http://example.com/some:thing",
"http://example.com/path:to:resource",
// At symbol in path (valid)
"http://example.com/user@example.com",
"http://example.com/path@boo",
// Query with equals in value
"http://example.com/path?filter=category:books&sort=date",
// Fragment with encoded characters
"http://example.com/page#%E2%9C%93", // ✓ in fragment
];
// IRIs that should fail for both Pragmatic and Strict/Parse strategies
const ALWAYS_INVALID_ABSOLUTE_IRIS = [
"",
"foo", // No scheme
"http://example.com/beepbeep\u0007\u0007", // Control characters
"http://example.com/\n", // Control character
];
// IRIs that should fail for Strict/Parse but might pass Pragmatic
// Note: Some original comments suggested these might be invalid per Strict,
// but the original test expected them to pass Strict. Adjusted based on RFC 3987.
const STRICTLY_INVALID_ABSOLUTE_IRIS = [
"http://www yahoo.com", // Space in authority (not % encoded)
"http://www.yahoo.com/hello world/", // Space in path (not % encoded)
"http://www.yahoo.com/yelp.html#\"'", // Quote in fragment (not % encoded)
"http://example.com/ ", // Space in path (not % encoded)
"http://example.com/%", // Incomplete percent encoding
"http://example.com/A%Z", // Invalid hex in percent encoding
"http://example.com/%ZZ", // Invalid hex in percent encoding
"http://example.com/%AZ", // Invalid hex in percent encoding
"http://example.com/A C", // Space in path (not % encoded)
"http://example.com/A`C", // Backtick not generally allowed unencoded in path
"http://example.com/A<C", // Less-than not allowed unencoded
"http://example.com/A>C", // Greater-than not allowed unencoded
"http://example.com/A^C", // Caret not generally allowed unencoded in path
"http://example.com/A\\C", // Backslash not allowed unencoded
"http://example.com/A{C", // Left brace not generally allowed unencoded in path
"http://example.com/A|C", // Pipe not allowed unencoded
"http://example.com/A}C", // Right brace not generally allowed unencoded in path
"http://example.com/A[C", // Left bracket not generally allowed unencoded in path (outside IPv6)
"http://example.com/A]C", // Right bracket not generally allowed unencoded in path (outside IPv6)
"http://example.com/A[**]C", // Brackets with content not allowed unencoded in path
"http://[xyz]/", // Invalid IPv6
"http://]/", // Invalid authority start
"http://example.org/[2010:836B:4179::836B:4179]", // IPv6 literal not in brackets in path
"http://example.org/abc#[2010:836B:4179::836B:4179]", // IPv6 literal not in brackets in fragment
"http://example.org/xxx/[qwerty]#a[b]", // Brackets with non-IPv6 content in path/fragment
// Iprivate characters are NOT allowed in path or fragment (per RFC 3987)
"http://example.com/\uE000", // Iprivate in path
"http://example.com/#\uE000", // Iprivate in fragment
// Bad characters based on RFC 3987 ranges (ucschar/iprivate)
// These are simplified checks. Full validation is complex.
// Control characters
"http://\u0000", // Null char in scheme/host
"http://example.com/\u0000", // Null char in path
"http://example.com/?\u0000", // Null char in query
"http://example.com/#\u0000", // Null char in fragment
// Characters outside defined ranges (simplified examples)
// Note: Full range checking is complex in JS. These are indicative.
// '\uFFFF' is often a non-character
// 'http://\uFFFF', // Non-character in scheme/host
// 'http://example.com/?\uFFFF', // Non-character in query
// Bad host structure
"http://[/", // Malformed IPv6 start
"http://[::1]a/", // Garbage after IPv6 literal
// Fuzzing examples (simplified representation)
// 'http://\u034F@[]', // Combining grapheme joiner, malformed authority
// Represented more simply:
"http://@[]", // Empty userinfo, empty host
];
describe("IRI Parser and Validator", () => {
describe("Valid IRIs", () => {
for (const iri of VALID_ABSOLUTE_IRIS) {
test(`should validate '${iri}' as valid`, () => {
// Test new Parse strategy
const _parseResult = parseIRI(iri);
// Test Parse validation strategy
const parseError = validateIri(iri, IriValidationStrategy.Parse);
assert.strictEqual(
parseError,
undefined,
`Validate (Parse) failed: ${parseError?.message}`,
);
// Test Pragmatic strategy
const pragmaticError = validateIri(
iri,
IriValidationStrategy.Pragmatic,
);
assert.strictEqual(
pragmaticError,
undefined,
`Validate (Pragmatic) failed: ${pragmaticError?.message}`,
);
});
}
});
describe("Always Invalid IRIs", () => {
for (const iri of ALWAYS_INVALID_ABSOLUTE_IRIS) {
test(`should validate '${iri}' as invalid (All strategies)`, () => {
// Test Parse strategy via parser
const _parseResult = parseIRI(iri);
// Test Parse validation strategy
const parseError = validateIri(iri, IriValidationStrategy.Parse);
assert.ok(
parseError instanceof Error,
`Validate (Parse) should have failed for '${iri}'`,
);
// Test Pragmatic strategy
const pragmaticError = validateIri(
iri,
IriValidationStrategy.Pragmatic,
);
assert.ok(
pragmaticError instanceof Error,
`Validate (Pragmatic) should have failed for '${iri}'`,
);
// Test Strict strategy
const strictError = validateIri(iri, IriValidationStrategy.Strict);
assert.ok(
strictError instanceof Error,
`Validate (Strict) should have failed for '${iri}'`,
);
});
}
});
describe("Strictly Invalid IRIs (RFC 3987 syntax)", () => {
for (const iri of STRICTLY_INVALID_ABSOLUTE_IRIS) {
test(`should validate '${iri}' as invalid (Parse/Strict strategies)`, () => {
// Test Parse strategy via parser
const _parseResult = parseIRI(iri);
// Test Parse validation strategy (main focus)
const parseError = validateIri(iri, IriValidationStrategy.Parse);
assert.ok(
parseError instanceof Error,
`Validate (Parse) should have failed for '${iri}': ${parseError?.message}`,
);
});
}
});
describe("Edge Cases and Strategy Handling", () => {
test("should handle invalid strategy gracefully", () => {
const error = validateIri("http://example.com/", "foo");
assert.ok(error instanceof Error);
assert.ok(error.message.includes("Not supported validation strategy"));
});
test("should not validate with the none strategy", () => {
assert.strictEqual(
validateIri("", IriValidationStrategy.None),
undefined,
);
assert.strictEqual(
validateIri("\n", IriValidationStrategy.None),
undefined,
);
assert.strictEqual(
validateIri("http://example.com/\u0000", IriValidationStrategy.None),
undefined,
);
});
test("should identify structural errors in parsing", () => {
// Missing scheme
let result = parseIRI("notascheme");
assert.strictEqual(result.valid, false);
assert.ok(result.error);
// Missing colon after scheme
result = parseIRI("http//example.com");
assert.strictEqual(result.valid, false);
assert.ok(result.error);
// Malformed authority start
result = parseIRI("http://[invalid:::ipv6]");
// Parsing might fail here or later, but should be invalid
// assert.strictEqual(result.valid, false); // Depends on robustness of parseHostPort
// Incomplete components
result = parseIRI("http://example.com/path??query");
// Might parse, but structure is odd. Parser should ideally handle robustly.
// Key is that validateIri catches issues.
});
});
});
// biome-ignore-start lint/style/useTemplate: This is a unit test
// --- ReDoS Resilience Tests ---
const REDOS_RESILIENCE_TESTS = [
// Very long scheme-like part (should fail quickly on missing ':')
"a".repeat(100000) + "://example.com",
// Authority with many '@' signs (tests findUserInfoEnd logic)
"http://" + "user@".repeat(10000) + "example.com",
// Authority with deeply nested brackets (tests IP literal logic robustness)
"http://[" + "[".repeat(10000) + "xyz" + "]".repeat(10000) + "]/path",
// Very long path segment (tests path segment validation loop)
"http://example.com/" + "a".repeat(100000),
// Very long query with repeated invalid patterns (tests iquery validation)
"http://example.com/path?" + "invalid%".repeat(10000),
// Very long fragment with repeated invalid patterns (tests ifragment validation)
"http://example.com/path#" + "invalid%".repeat(10000),
// Complex percent-encoding pattern that could trip up regex backtracking
"http://example.com/" + "%A".repeat(50000), // Incomplete percent-encoding
// Repeated groups that might stress regex engines
"http://[" + "1234:5678:".repeat(10000) + "]/path",
];
// --- UNC Path Tests ---
// UNC paths use the 'file' scheme. RFC 8089 defines the syntax.
// file://host/path or file:///path (localhost)
const UNC_PATH_TESTS = [
// Basic UNC path
"file://server/share/file.txt",
// UNC path with authority and path
"file://hostname/path/to/resource",
// Local file path (3 slashes)
"file:///C:/Users/name/file.txt",
"file:///etc/passwd",
// UNC path with IPv4 literal
"file://192.168.1.1/share/folder",
// Edge case: file: with empty host and path
"file:///", // Root
// file: with just a scheme (edge case, might be valid as an empty opaque part)
// "file:" // This is valid according to RFC 3987 if the scheme allows an empty path/authority
];
// --- Unicode and International Domain Name (IDN) Tests ---
const UNICODE_IDN_TESTS = [
"http://例子.中国/path", // "example.china"
"http://παράδειγμα.δοκιμή/προσωπικός_φάκελος/", // "example.test/personal_folder"
"http://ουτοπία.δπθ.gr/οδηγίες.html", // "utopia.edu.gr/instructions.html"
];
const MORE_EDGE_CASE_TESTS = [
// Query containing '#'
"http://example.com/path?param=value%23withhash", // '#' is %23
// Fragment containing '?'
"http://example.com/path#section?notquery", // '?' is allowed in fragment
// Path with '@'
"http://example.com/path@boo", // '@' is allowed in path
// Path with ':'
"http://example.com/some:thing", // ':' is allowed in path (not at start of segment in relative IRIs, but absolute is okay)
// Multiple consecutive slashes in path (valid)
"http://example.com/a//b///c",
// Percent-encoding case sensitivity (both %41 and %62 are valid for A and b)
"http://example.com/p%C4%8Ath", // UTF-8 for 'č'
// Percent-encoding normalization (should pass, even if not normalized)
"http://example.com/p%61th", // 'a' is %61
// Query with '=', '&', in values
"http://example.com/path?a=b=c&d=e%26f", // value of d is 'e&f'
];
// biome-ignore-end lint/style/useTemplate: This is a unit test
describe("ReDoS Resilience", () => {
for (const iri of REDOS_RESILIENCE_TESTS) {
test(`should handle potentially ReDoS-inducing IRI quickly: ${iri.substring(0, 50)}...`, () => {
// Use a simple time-based check to ensure it doesn't hang
const start = Date.now();
const error = validateIri(iri, IriValidationStrategy.Parse);
const duration = Date.now() - start;
// Assert it finishes in a reasonable time (e.g., < 100ms)
// Note: Time-based tests can be flaky in CI, consider adjusting threshold or skipping in CI
assert.ok(
duration < 100,
`Validation took too long (${duration}ms): ${iri.substring(0, 50)}...`,
);
// It should either be valid or invalid, but not hang or throw unexpectedly
// Most of these should be invalid
assert.ok(
error instanceof Error || error === undefined,
`Unexpected result for ReDoS test: ${error}`,
);
// console.log(`ReDoS Test: '${iri.substring(0, 30)}...' -> ${error ? 'Invalid' : 'Valid'} (${duration}ms)`); // Optional logging
});
}
});
describe("UNC Paths", () => {
for (const iri of UNC_PATH_TESTS) {
test(`should parse and validate UNC path IRI: ${iri}`, () => {
const parseResult = parseIRI(iri);
assert.strictEqual(
parseResult.valid,
true,
`Parsing failed for UNC path: ${parseResult.error}`,
);
const validateError = validateIri(iri, IriValidationStrategy.Parse);
assert.strictEqual(
validateError,
undefined,
`Validation (Parse) failed for UNC path: ${validateError?.message}`,
);
});
}
});
describe("Unicode and IDN", () => {
for (const iri of UNICODE_IDN_TESTS) {
test(`should parse and validate Unicode/IDN IRI: ${iri}`, () => {
const parseResult = parseIRI(iri);
const validateError = validateIri(iri, IriValidationStrategy.Parse);
assert.strictEqual(
parseResult.valid,
true,
`Structural parsing failed for Unicode IRI: ${parseResult.error}`,
);
// And check validation result separately
if (validateError) {
// This is expected if ucschar is not yet supported in validation
// console.log(`Unicode IRI failed validation (expected if ucschar not supported): ${iri}`);
}
});
}
});
describe("Additional Edge Cases", () => {
for (const iri of MORE_EDGE_CASE_TESTS) {
test(`should parse and validate edge case IRI: ${iri}`, () => {
const parseResult = parseIRI(iri);
assert.strictEqual(
parseResult.valid,
true,
`Parsing failed for edge case: ${parseResult.error}`,
);
const validateError = validateIri(iri, IriValidationStrategy.Parse);
assert.strictEqual(
validateError,
undefined,
`Validation (Parse) failed for edge case: ${validateError?.message}`,
);
});
}
});