UNPKG

nehonix-uri-processor

Version:

A powerful URI processor for encoding, decoding, and analyzing URI data securely.

788 lines 28.5 kB
import { NehonixURIProcessor } from ".."; import { DetectedPattern, MaliciousPatternOptions, MaliciousPatternType } from "../services/MaliciousPatterns.service"; /** * Supported encoding types. This enumeration defines all the encoding schemes * that the NehonixURIProcessor can handle. Each type represents a distinct * method of representing characters or data in a string format, often used * in URLs, web applications, and data transmission. */ export type ENC_TYPE = "percentEncoding" | "doublepercent" | "base64" | "hex" | "unicode" | "htmlEntity" | "punycode" | "asciihex" | "asciioct" | "rot13" | "base32" | "urlSafeBase64" | "jsEscape" | "cssEscape" | "utf7" | "quotedPrintable" | "decimalHtmlEntity" | "rawHexadecimal" | "jwt" | "url" | "rawHex"; export type DEC_FEATURE_TYPE = "url" | "any"; interface PartialEncodingInfo { type: string; confidence: number; } /** * Result of encoding detection. This interface describes the outcome of an * attempt to identify the encoding scheme used in a given string. It provides * an array of all detected encoding types, the most likely type, and a * confidence score for that detection. */ export interface EncodingDetectionResult { partialEncodings?: PartialEncodingInfo[]; /** * An array of all encoding types detected in the input string. */ types: string[]; /** * The encoding type that is considered the most likely based on analysis. */ mostLikely: ENC_TYPE | "plainText" | "mixedEncoding"; /** * A numerical value representing the confidence level of the most likely * encoding detection, usually between 0 and 1. */ confidence: number; /** * Indicates whether nested encoding was detected (i.e., one encoding within another). */ isNested?: boolean; /** * If nested encoding is detected, this array lists the inner encoding types. */ nestedTypes?: (ENC_TYPE | "mixedEncoding")[]; } /** * Result of nested encoding detection. This interface provides more specific * information about nested encoding scenarios, including the outer and inner * encoding types and a confidence score. */ export interface NestedEncodingResult { /** * Indicates whether nested encoding was detected. */ isNested: boolean; /** * The outer layer encoding type. */ outerType: string; /** * The inner layer encoding type. */ innerType: string; /** * A numerical value representing the confidence level of the nested encoding detection. */ confidenceScore: number; } /** * Result of URL analysis. This interface defines the structure of the output * from analyzing a URL, including the base URL, extracted parameters, and * any potential vulnerabilities detected. */ export interface URLAnalysisResult { /** * The base URL without parameters. */ baseURL: string; /** * An object containing the extracted URL parameters and their values. */ parameters: { [key: string]: string; }; /** * An array of strings describing potential security vulnerabilities found in the URL. */ potentialVulnerabilities: string[]; vulnerabilitieDetails?: DetectedPattern[]; } /** * Result of WAF bypass variants generation. This interface represents the * various encoding variants generated for Web Application Firewall (WAF) * bypass testing. */ export interface WAFBypassVariants { /** * Standard percent-encoded version of the input string. */ percentEncoding: string; /** * Double percent-encoded version of the input string. */ doublePercentEncoding: string; /** * A version with mixed encoding types. */ mixedEncoding: string; /** * A version with alternating character case. */ alternatingCase: string; /** * A fully hexadecimal-encoded version of the input string. */ fullHexEncoding: string; /** * A version with unicode character representations. */ unicodeVariant: string; /** * A version with HTML entity representations. */ htmlEntityVariant: string; } /** * Result of detectAndDecode operation. This interface describes the result of * automatically detecting and decoding a string, including the decoded value, * the detected encoding type, and the confidence level. */ export interface DecodeResult { /** * The decoded string value. */ val: () => string; /** * The detected encoding type of the input string. */ encodingType: string; /** * A numerical value representing the confidence level of the encoding detection. */ confidence: number; /** * If nested encoding is detected, this array lists the inner encoding types. */ nestedTypes?: string[]; original?: string; attemptedDecode?: string; attemptedVal?: string | undefined; decodingHistory?: { result: string; type: string; confidence: number; }[]; } /** * Interface for the NehonixURIProcessor class. This interface defines the * public methods and properties available in the NehonixURIProcessor class, * which provides functionality for encoding and decoding strings, analyzing * URLs, and generating WAF bypass variants. */ export interface INehonixURIProcessor { /** * Automatically detects and decodes a URI based on the detected encoding type. * @param input The URI string to decode. * @returns The decoded string with encoding information. */ detectAndDecode(input: string): DecodeResult; /** * Decodes a string according to a specific encoding type. * @param input The string to decode. * @param encodingType The encoding type to use. * @param maxRecursionDepth Maximum recursion depth for nested decoding (default: 5). * @returns The decoded string. */ decode(input: string, encodingType: ENC_TYPE, maxRecursionDepth?: number): string; /** * Encodes a string according to a specific encoding type. * @param input The string to encode. * @param encodingType The encoding type to use. * @returns The encoded string. */ encode(input: string, encodingType: ENC_TYPE): string; /** * Decodes percent encoding (URL). * @param input The string to decode. * @returns The decoded string. */ decodePercentEncoding(input: string): string; /** * Decodes double percent encoding. * @param input The string to decode. * @returns The decoded string. */ decodeDoublePercentEncoding(input: string): string; /** * Decodes base64 encoding. * @param input The string to decode. * @returns The decoded string. */ decodeBase64(input: string): string; /** * Decodes hexadecimal encoding. * @param input The string to decode. * @returns The decoded string. */ decodeHex(input: string): string; /** * Decodes Unicode encoding. * @param input The string to decode. * @returns The decoded string. */ decodeUnicode(input: string): string; /** * Decodes HTML entities. * @param input The string to decode. * @returns The decoded string. */ decodeHTMLEntities(input: string): string; /** * Decodes punycode. * @param input The string to decode. * @returns The decoded string. */ decodePunycode(input: string): string; /** * Decodes a raw hexadecimal string (without prefixes). * @param input The hexadecimal string to decode. * @returns The decoded string. */ decodeRawHex(input: string): string; /** * Decodes a JWT token. * @param input The JWT token to decode. * @returns The decoded JWT as a formatted string. */ decodeJWT(input: string): string; /** * Encodes with percent encoding (URL). * @param input The string to encode. * @param encodeSpaces Whether to encode spaces as %20 (default: false). * @returns The encoded string. */ encodePercentEncoding(input: string, encodeSpaces?: boolean): string; /** * Encodes with double percent encoding. * @param input The string to encode. * @returns The encoded string. */ encodeDoublePercentEncoding(input: string): string; /** * Encodes in base64. * @param input The string to encode. * @returns The encoded string. */ encodeBase64(input: string): string; /** * Encodes in hexadecimal (format \xXX). * @param input The string to encode. * @returns The encoded string. */ encodeHex(input: string): string; /** * Encodes in Unicode (format \uXXXX). * @param input The string to encode. * @returns The encoded string. */ encodeUnicode(input: string): string; /** * Encodes in HTML entities. * @param input The string to encode. * @returns The encoded string. */ encodeHTMLEntities(input: string): string; /** * Encodes in punycode. * @param input The string to encode. * @returns The encoded string. */ encodePunycode(input: string): string; /** * Encodes in ASCII with hexadecimal representation. * @param input The string to encode. * @returns The encoded string. */ encodeASCIIWithHex(input: string): string; /** * Encodes in ASCII with octal representation. * @param input The string to encode. * @returns The encoded string. */ encodeASCIIWithOct(input: string): string; /** * Encodes all characters in percent encoding. * @param input The string to encode. * @returns The encoded string. */ encodeAllChars(input: string): string; /** * Analyzes a URL and extracts potentially vulnerable parameters. * @param url The URL to analyze. * @returns An object containing information about the URL and parameters. */ analyzeURL(url: string): URLAnalysisResult; /** * Generates encoding variants of a string for WAF bypass testing. * @param input The string to encode. * @returns An object containing different encoding variants. */ generateWAFBypassVariants(input: string): WAFBypassVariants; /** * Automatically detects the encoding type of a URI string. * @param input The URI string to analyze. * @param depth Current recursion depth (internal use). * @returns An object containing the detected encoding types and their probability. */ detectEncoding(input: string, depth?: number): EncodingDetectionResult; } export type NehonixURIProcessorType = typeof NehonixURIProcessor; /** *Real-world application: Encoding user input for various contexts (RWA) * Defines the context in which user input will be used, * allowing for appropriate encoding selection */ export type RWA_TYPES = "url" | "urlParam" | "html" | "htmlAttr" | "js" | "jsString" | "css" | "cssSelector" | "email" | "emailSubject" | "command" | "xml" | "json" | "obfuscate" | "idnDomain"; /** * Options for URI validation. */ export interface UrlValidationOptions { /** * If `true`, requires a leading slash before paths or query parameters (e.g., `/path` or `/?query`). * If `false`, allows query parameters without a leading slash (e.g., `?query`). * @default false */ strictMode?: boolean; /** * If `true`, allows Unicode escape sequences (e.g., `\u0068`) in query parameters. * If `false`, rejects URIs containing Unicode escape sequences. * @default true */ allowUnicodeEscapes?: boolean; /** * If `true`, rejects URIs with duplicate query parameter keys (e.g., `?p1=a&p1=b`). * If `false`, allows duplicate keys. * @default true */ rejectDuplicateParams?: boolean; /** * If `true`, only allows https:// URLs (rejects http://). * If `false`, allows both http:// and https:// URLs. * @default false */ httpsOnly?: boolean; /** * Maximum allowed length for the entire URL. * Set to 0 to disable length checking. * @default 2048 */ maxUrlLength?: number | "NO_LIMIT"; /** * List of allowed top-level domains (e.g., ['com', 'org', 'net']). * If empty, all TLDs are allowed. * @default [] */ allowedTLDs?: string[]; /** * List of allowed protocols (e.g., ['https', 'http', 'ftp']). * Only relevant if requireProtocol is true. * @default ['http', 'https'] */ allowedProtocols?: string[]; /** * If `true`, requires the protocol to be explicitly specified in the URL. * If `false`, adds https:// if no protocol is specified. * @default false */ requireProtocol?: boolean; /** * If `true`, validates that the URL has a path or query string. * If `false`, allows bare domains like 'example.com'. * @default false */ requirePathOrQuery?: boolean; /** * If `true`, validates each parameter value against URI encoding standards. * If `false`, performs basic validation only. * @default false */ strictParamEncoding?: boolean; /** *If `true`, it will find keys in "parameters" that map to the same value (e.g., * { param1: "value1", param2: "value1", param3: "value2" }), * then group keys by their values and filter for values with multiple keys * @default false */ rejectDuplicatedValues?: boolean; /** * If `true`, allows localhost URLs (e.g., http://localhost:8080). * These would otherwise be rejected due to domain validation rules. * @default false */ allowLocalhost?: boolean; /** * An array of custom validation rules to apply to the URL. * Each rule specifies a URL component (e.g., 'hostname', 'pathname') or a literal string, * a comparison operator, and a value to compare against. If provided, the URL is validated * against each rule, and the results are reported in the UrlCheckResult. * @example * // Validate that the hostname is 'nehonix.space' and the pathname is '/api' * const options: UrlValidationOptions = { * customValidations: [ * ['hostname', '===', 'nehonix.space'], * ['pathname', '===', '/api'], * ['literal', '!=', 'nehonix.space'] // Compare a literal value (e.g., URL string) * ] * }; */ customValidations?: ComparisonRule[]; /** * The value to use as the left operand for 'literal' comparisons in customValidations. * Required if any rule uses 'literal' as the component. * @example * // Compare a custom string to 'nehonix.space' * literalValue: 'nehonix.space' // Used for ['literal', '==', 'nehonix.space'] * output: true * With "@this" option, use the URL string itself as the left operand for comparisons. * @example * input = "https://google.com" * literalValue: '@this' // Used for ['literal', '==', 'nehonix.space'] * output: false */ literalValue?: "@this" | string | number; /** * If true, enables debug logging for custom validations, printing the actual values * of components or literal inputs to aid in troubleshooting. * @default false */ debug?: boolean; fullCustomValidation?: Record<string, string | number>; allowInternationalChars?: boolean; allowIPAddresses?: boolean; ipv4Only?: boolean; allowFragments?: boolean; allowCredentials?: boolean; maxQueryParams?: number; maxPathSegments?: number; disallowedKeywords?: string[]; validationLevel?: UrlValidationLevel; disallowEmptyParameterValues?: boolean; requireSecureProtocolForNonLocalhost?: boolean; allowSubdomains?: boolean; allowedDomains?: string[]; minUrlLength?: number; allowDataUrl?: boolean; allowMailto?: boolean; } export type UrlValidationLevel = "strict" | "moderate" | "relaxed"; export type AsyncUrlValidationOptions = UrlValidationOptions & AsyncUrlValidationOptFeature; export type AsyncUrlValidationOptFeature = { detectMaliciousPatterns?: boolean; maliciousPatternSensitivity?: number; maliciousPatternMinScore?: number; ignoreMaliciousPatternTypes?: MaliciousPatternType[]; customMaliciousPatterns?: MaliciousPatternOptions["customPatterns"]; }; /** * A custom validation rule for URLs, defining a comparison to perform. * @typedef {Array<keyof URL | 'literal', comparisonOperator, string>} customValidations * @property {keyof URL | 'literal'} 0 - The URL component to compare (e.g., 'hostname', 'pathname') or 'literal' for a direct string comparison. * @property {comparisonOperator} 1 - The operator to use for the comparison (e.g., '===', '=='). * @property {string} 2 - The value to compare against. * @example * // Rule to check if hostname is 'nehonix.space' * const rule: customValidations = ['hostname', '===', 'nehonix.space']; * @example * // Rule for literal comparison * const literalRule: customValidations = ['literal', '!=', 'nehonix.space']; */ export type ComparisonRule = [ ValidUrlComponents | custumValidUriComponent, comparisonOperator, string | number ]; export type ValidUrlComponents = "href" | "origin" | "protocol" | "username" | "password" | "host" | "hostname" | "port" | "pathname" | "search" | "hash" | `fullCustomValidation.${string}` | `fcv.${string}`; export type custumValidUriComponent = "literal"; /** * Comparison operators for custom validation rules. * @type {'===' | '==' | '<=' | '>=' | '!=' | '!==' | '<' | '>'} comparisonOperator */ export type comparisonOperator = "===" | "==" | "<=" | ">=" | "!=" | "!==" | "<" | ">"; /** * Represents the detailed result of a URL validation process. * This interface provides a comprehensive breakdown of the validation checks performed * on a URL, including overall validity and specific details for each validation step. * * @interface UrlCheckResult */ export interface UrlCheckResult { /** * Indicates whether the URL is valid based on all validation checks. * `true` if all checks pass, `false` if any check fails. */ isValid: boolean; /** * Return the reason of failling */ cause?: string; /** * Contains detailed results for each validation check performed on the URL. * Each property corresponds to a specific validation aspect and is optional, * as not all validations may be relevant depending on the provided options. */ validationDetails: { /** * Results of custom validation rules applied to the URL. * @property {boolean} isValid - Whether all custom validation rules passed. * @property {string} message - A summary of the validation outcomes, combining messages for each rule. * @property {Array} results - Detailed results for each custom validation rule. * @property {boolean} results[].isValid - Whether the specific rule passed. * @property {string} results[].message - A message describing the rule's outcome (e.g., success or failure details). * @property {customValidations} results[].rule - The custom validation rule that was evaluated. * @example * // Example result for custom validations * { * isValid: true, * message: "Validation passed: hostname === nehonix.space; Validation passed: pathname === /api", * results: [ * { * isValid: true, * message: "Validation passed: hostname === nehonix.space", * rule: ["hostname", "===", "nehonix.space"] * }, * { * isValid: true, * message: "Validation passed: pathname === /api", * rule: ["pathname", "===", "/api"] * } * ] * } */ customValidations?: { isValid: boolean; message: string; results: { isValid: boolean; message: string; rule: ComparisonRule; }[]; }; /** * Validation result for the URL length check. */ length?: { /** Indicates if the URL length is within the specified limit. */ isValid: boolean; /** Descriptive message about the length validation result. */ message?: string; /** The actual length of the URL in characters. */ actualLength?: number; /** The maximum allowed length as specified in options. */ maxLength?: number | "NO_LIMIT"; minLength?: number; }; /** * Validation result for checking if the URL is empty or contains only whitespace. */ emptyCheck?: { /** Indicates if the URL is non-empty. */ isValid: boolean; /** Descriptive message about the empty check result. */ message?: string; }; /** * Validation result for the URL protocol check. */ protocol?: { /** Indicates if the protocol is valid and allowed. */ isValid: boolean; /** Descriptive message about the protocol validation result. */ message?: string; /** The detected protocol in the URL (e.g., 'http', 'https'). */ detectedProtocol?: string; /** The list of allowed protocols specified in options. */ allowedProtocols?: string[]; }; /** * Validation result for the HTTPS-only requirement. */ httpsOnly?: { /** Indicates if the URL uses HTTPS when required. */ isValid: boolean; /** Descriptive message about the HTTPS-only validation result. */ message?: string; }; /** * Validation result for the domain structure check. */ domain?: { /** Indicates if the domain structure is valid. */ isValid: boolean; /** Descriptive message about the domain validation result. */ message?: string; /** The hostname extracted from the URL. */ hostname?: string; error?: string; type?: "INV_DOMAIN_ERR" | "INV_STRUCTURE" | "ERR_UNKNOWN"; }; /** * Validation result for the top-level domain (TLD) check. */ tld?: { /** Indicates if the TLD is valid and allowed. */ isValid: boolean; /** Descriptive message about the TLD validation result. */ message?: string; /** The detected TLD in the URL (e.g., 'com', 'org'). */ detectedTld?: string; /** The list of allowed TLDs specified in options. */ allowedTlds?: string[]; }; /** * Validation result for the path or query string requirement. */ pathOrQuery?: { /** Indicates if the URL satisfies path or query requirements. */ isValid: boolean; /** Descriptive message about the path/query validation result. */ message?: string; }; /** * Validation result for strict mode path requirements. */ strictMode?: { /** Indicates if the URL satisfies strict mode path requirements. */ isValid: boolean; /** Descriptive message about the strict mode validation result. */ message?: string; }; /** * Validation result for checking unencoded spaces in the query string. */ querySpaces?: { /** Indicates if the query string is free of unencoded spaces. */ isValid: boolean; /** Descriptive message about the query spaces validation result. */ message?: string; }; /** * Validation result for strict parameter encoding check. */ paramEncoding?: { /** Indicates if query parameters are properly encoded. */ isValid: boolean; /** Descriptive message about the parameter encoding validation result. */ message?: string; /** List of parameters that failed encoding validation, if any. */ invalidParams?: string[]; }; /** * Validation result for duplicate query parameter keys check. */ duplicateParams?: { /** Indicates if there are no duplicate query parameter keys. */ isValid: boolean; /** Descriptive message about the duplicate parameters validation result. */ message?: string; /** List of query parameter keys that are duplicated, if any. */ duplicatedKeys?: string[]; }; /** * Validation result for duplicate query parameter values check. */ duplicateValues?: { /** Indicates if there are no duplicate query parameter values. */ isValid: boolean; /** Descriptive message about the duplicate values validation result. */ message?: string; /** List of query parameter values that are duplicated, if any. */ duplicatedValues?: string[]; }; /** * Validation result for Unicode escape sequences check. */ unicodeEscapes?: { /** Indicates if the URL is free of disallowed Unicode escape sequences. */ isValid: boolean; /** Descriptive message about the Unicode escapes validation result. */ message?: string; }; /** * Validation result for URL parsing. */ parsing?: { /** Indicates if the URL was parsed successfully. */ isValid: boolean; /** Descriptive message about the parsing validation result. */ message?: string; }; internationalChars?: { isValid: boolean; message: string; containsNonAscii?: boolean; containsPunycode?: boolean; }; dataUrl?: { isValid: boolean; message?: string; }; mailto?: { isValid: boolean; message?: string; }; disallowedKeywords?: { isValid: boolean; message?: string; foundKeywords?: string[]; }; secureNonLocalhost?: { isValid: boolean; message?: string; }; allowSubdomains?: { isValid: boolean; message?: string; }; credentials?: { isValid: boolean; message?: string; }; fragments?: { isValid: boolean; message?: string; }; allowedDomains?: { isValid: boolean; message?: string; hostname?: string; allowedDomains?: string[]; }; ipAddress?: { isValid: boolean; message?: string; ipAddress?: string; hostname?: string; isIPv4?: boolean; isIPv6?: boolean; }; pathSegments?: { isValid: boolean; segmentCount?: number; maxSegments?: number; message?: string; }; queryParamCount?: { isValid: boolean; paramCount?: number; maxParams?: number; message?: string; }; emptyParams?: { isValid: boolean; message: string; emptyParams?: string[]; }; }; } export type AsyncUrlCheckResult = Omit<UrlCheckResult, "validationDetails"> & { validationDetails: UrlCheckResult["validationDetails"] & AsyncUrlCheckResComponent; }; export type AsyncUrlCheckResComponent = { maliciousPatterns?: { isValid?: boolean; message?: string; error?: string; detectedPatterns?: DetectedPattern[]; score?: number; confidence?: string; recommendation?: string; }; }; export interface UriHandlerInterface { maxIterations?: number; output?: { encodeUrl?: boolean; }; } export {}; //# sourceMappingURL=index.d.ts.map