nehonix-uri-processor
Version:
A powerful URI processor for encoding, decoding, and analyzing URI data securely.
1,342 lines (1,335 loc) • 477 kB
JavaScript
'use strict';
var punycode = require('punycode');
var chalk = require('chalk');
var idnaUts46Hx = require('idna-uts46-hx');
var React = require('react');
var express = require('express');
var fs = require('fs');
var path = require('path');
var crypto$1 = require('crypto');
var ipRangeCheck = require('ip-range-check');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var fs__namespace = /*#__PURE__*/_interopNamespaceDefault(fs);
var path__namespace = /*#__PURE__*/_interopNamespaceDefault(path);
class AppLogger {
/**
* Configure logger options
*/
static configure(options) {
this.options = { ...this.options, ...options };
if (options.level) {
this.currentLevel = options.level;
}
}
/**
* Format timestamp based on configuration
*/
static formatTimestamp() {
if (!this.options.timestamp)
return "";
const now = new Date();
let timestamp = "";
switch (this.options.timestampFormat) {
case "iso":
timestamp = now.toISOString();
break;
case "locale":
timestamp = now.toLocaleString();
break;
case "time":
timestamp = now.toLocaleTimeString();
break;
default:
timestamp = now.toISOString();
}
return `[${timestamp}]`;
}
/**
* Apply chalk color safely
*/
static applyColor(text, colorName) {
// Make sure the color exists on chalk
if (typeof chalk[colorName] === "function") {
return chalk[colorName](text);
}
// Fallback to no color
return text;
}
/**
* Core logging method
*/
static appLog(props) {
// Check if this log should be shown based on level
if (this.logLevels[props.level] > this.logLevels[this.currentLevel]) {
return;
}
const timestamp = this.formatTimestamp();
const prefix = this.options.prefix ? `[${this.options.prefix}]` : "";
const levelTag = `[${props.level.toUpperCase()}]`;
let formattedMessages = [...props.messages];
// Apply color if enabled
if (this.options.colorize) {
// Only colorize strings, leave objects and other types as-is
formattedMessages = formattedMessages.map((msg) => typeof msg === "string" ? this.applyColor(msg, props.colorName) : msg);
// Add colored prefix
const headerParts = [timestamp, prefix, levelTag]
.filter(Boolean)
.join(" ");
if (headerParts) {
formattedMessages.unshift(this.applyColor(headerParts, props.colorName));
}
}
else {
// No color, just add prefix
const headerParts = [timestamp, prefix, levelTag]
.filter(Boolean)
.join(" ");
if (headerParts) {
formattedMessages.unshift(headerParts);
}
}
// Use appropriate console method
if (this.debugs_state) {
if (props.type === "table" && Array.isArray(props.messages[0])) {
console.table(props.messages[0]);
}
else {
console[props.type](...formattedMessages);
}
}
}
/**
* Log an informational message
*/
static log(...messages) {
this.appLog({
messages,
type: "log",
level: "info",
colorName: "green",
});
}
/**
* Log an informational message
*/
static info(...messages) {
this.appLog({
messages,
type: "info",
level: "info",
colorName: "green",
});
}
/**
* Log a warning message
*/
static warn(...messages) {
this.appLog({
messages,
type: "warn",
level: "warn",
colorName: "yellow",
});
}
/**
* Log an error message
*/
static error(...messages) {
this.appLog({
messages,
type: "error",
level: "error",
colorName: "red",
});
}
/**
* Log a debug message
*/
static debug(...messages) {
this.appLog({
messages,
type: "debug",
level: "debug",
colorName: "blue",
});
}
/**
* Log a verbose message
*/
static verbose(...messages) {
this.appLog({
messages,
type: "log",
level: "verbose",
colorName: "magenta",
});
}
/**
* Log detailed or silly debug information
*/
static silly(...messages) {
this.appLog({
messages,
type: "log",
level: "silly",
colorName: "gray",
});
}
/**
* Log data as a table
*/
static table(tableData, columns) {
this.appLog({
messages: [tableData, columns],
type: "table",
level: "info",
colorName: "cyan",
});
}
/**
* Log start of a process with a title
*/
static start(title) {
this.appLog({
messages: [`▶ ${title}`],
type: "info",
level: "info",
colorName: "cyan",
});
}
/**
* Log successful completion of a process
*/
static success(message) {
this.appLog({
messages: [`✅ ${message}`],
type: "info",
level: "info",
colorName: "green",
});
}
/**
* Log failure of a process
*/
static fail(message) {
this.appLog({
messages: [`❌ ${message}`],
type: "error",
level: "error",
colorName: "red",
});
}
}
AppLogger.logLevels = {
error: 0,
warn: 1,
info: 2,
debug: 3,
verbose: 4,
silly: 5,
};
AppLogger.currentLevel = "info";
AppLogger.options = {
timestamp: true,
timestampFormat: "iso",
colorize: true,
prefix: "",
};
AppLogger.debugs_state = false;
const htmlEntities = {
// Special characters
"<": "<",
">": ">",
"&": "&",
'"': """,
"'": "'",
" ": " ",
"¢": "¢",
"£": "£",
"¥": "¥",
"€": "€",
"©": "©",
"®": "®",
"™": "™",
// Mathematical symbols
"×": "×",
"÷": "÷",
"±": "±",
"≠": "≠",
"≤": "≤",
"≥": "≥",
"∑": "∑",
"∏": "∏",
"∫": "∫",
"∞": "∞",
"√": "√",
// Greek letters
Α: "Α",
α: "α",
Β: "Β",
β: "β",
Γ: "Γ",
γ: "γ",
Δ: "Δ",
δ: "δ",
Ε: "Ε",
ε: "ε",
Ζ: "Ζ",
ζ: "ζ",
Η: "Η",
η: "η",
Θ: "Θ",
θ: "θ",
Ι: "Ι",
ι: "ι",
Κ: "Κ",
κ: "κ",
Λ: "Λ",
λ: "λ",
Μ: "Μ",
μ: "μ",
Ν: "Ν",
ν: "ν",
Ξ: "Ξ",
ξ: "ξ",
Ο: "Ο",
ο: "ο",
Π: "Π",
π: "π",
Ρ: "Ρ",
ρ: "ρ",
Σ: "Σ",
σ: "σ",
ς: "ς",
Τ: "Τ",
τ: "τ",
Υ: "Υ",
υ: "υ",
Φ: "Φ",
φ: "φ",
Χ: "Χ",
χ: "χ",
Ψ: "Ψ",
ψ: "ψ",
Ω: "Ω",
ω: "ω",
// Arrows
"←": "←",
"↑": "↑",
"→": "→",
"↓": "↓",
"↔": "↔",
// Accented characters
À: "À",
à: "à",
Á: "Á",
á: "á",
Â: "Â",
â: "â",
Ã: "Ã",
ã: "ã",
Ä: "Ä",
ä: "ä",
Å: "Å",
å: "å",
Æ: "Æ",
æ: "æ",
Ç: "Ç",
ç: "ç",
È: "È",
è: "è",
É: "É",
é: "é",
Ê: "Ê",
ê: "ê",
Ë: "Ë",
ë: "ë",
Ì: "Ì",
ì: "ì",
Í: "Í",
í: "í",
Î: "Î",
î: "î",
Ï: "Ï",
ï: "ï",
Ð: "Ð",
ð: "ð",
Ñ: "Ñ",
ñ: "ñ",
Ò: "Ò",
ò: "ò",
Ó: "Ó",
ó: "ó",
Ô: "Ô",
ô: "ô",
Õ: "Õ",
õ: "õ",
Ö: "Ö",
ö: "ö",
Ø: "Ø",
ø: "ø",
Ù: "Ù",
ù: "ù",
Ú: "Ú",
ú: "ú",
Û: "Û",
û: "û",
Ü: "Ü",
ü: "ü",
Ý: "Ý",
ý: "ý",
Þ: "Þ",
þ: "þ",
ÿ: "ÿ",
// Special punctuation
"–": "–",
"—": "—",
// "'": '‘',
"''": "’",
"‚": "‚",
// '"': '“',
// '"': '”',
"„": "„",
"†": "†",
"‡": "‡",
"•": "•",
"…": "…",
"‰": "‰",
"′": "′",
"″": "″",
"‹": "‹",
"›": "›",
"«": "«",
"»": "»",
"°": "°",
"§": "§",
"¶": "¶",
"‾": "‾",
"¨": "¨",
"¯": "¯",
"¸": "¸",
"¹": "¹",
"²": "²",
"³": "³",
"´": "´",
"·": "·",
"¼": "¼",
"½": "½",
"¾": "¾",
"¿": "¿",
"¡": "¡",
};
class NDS {
// private static hasBase64Pattern = NehonixCoreUtils.hasBase64Pattern;
// // private static hasPercentEncoding = NehonixSharedUtils.hasPercentEncoding;
// private static enc: typeof NehonixEncService = NehonixEncService;
// private static hasDoublePercentEncoding =
// NehonixCoreUtils.hasDoublePercentEncoding;
// private static hasHexEncoding = NehonixCoreUtils.hasHexEncoding;
// private static hasUnicodeEncoding = NehonixCoreUtils.hasUnicodeEncoding;
// private static hasRawHexString = NehonixCoreUtils.hasRawHexString;
// private static calculateBase64Confidence = NES.calculateBase64Confidence;
// private static hasHTMLEntityEncoding = NehonixCoreUtils.hasHTMLEntityEncoding;
// private static hasJWTFormat = NehonixCoreUtils.hasJWTFormat;
// private static hasPunycode = NehonixCoreUtils.hasPunycode;
// private static decodeBase64 = NehonixCoreUtils.decodeB64;
// private static decodeRawHexWithoutPrefix = NehonixCoreUtils.drwp;
// In your detectEncoding function or a new function
static detectMixedEncodings(input) {
const detectedEncodings = [];
// Check for percent encoding
if (/%[0-9A-Fa-f]{2}/.test(input)) {
detectedEncodings.push("percentEncoding");
}
// Check for Base64 content
const base64Regex = /[A-Za-z0-9+/=]{4,}/g;
const potentialBase64 = input.match(base64Regex);
if (potentialBase64) {
for (const match of potentialBase64) {
if (NehonixCommonUtils.isBase64(match)) {
detectedEncodings.push("base64");
break;
}
}
}
// Add more checks as needed
return detectedEncodings;
}
/**
* Automatically detects and decodes a URI based on the detected encoding type
* @param input The URI string to decode
* @returns The decoded string according to the most probable encoding type
*/
static detectAndDecode(input) {
// Special case for URLs with parameters
if (input.includes("?") && input.includes("=")) {
const urlParts = input.split("?");
const basePath = urlParts[0];
const queryString = urlParts[1];
// Split query parameters
const params = queryString.split("&");
const decodedParams = params.map((param) => {
const [key, value] = param.split("=");
if (!value)
return param; // Handle cases where parameter has no value
// Try to detect encoding for each parameter value
const detection = NDS.detectEncoding(value);
if (detection.confidence > 0.8) {
try {
let decodedValue = value;
switch (detection.mostLikely) {
case "base64":
let base64Input = value;
// Ensure proper padding
while (base64Input.length % 4 !== 0) {
base64Input += "=";
}
base64Input = base64Input.replace(/-/g, "+").replace(/_/g, "/");
decodedValue = NehonixCommonUtils.decodeB64(base64Input);
// Check if the result is still Base64-encoded
if (NehonixCoreUtils.hasBase64Pattern(decodedValue)) {
let nestedBase64 = decodedValue;
while (nestedBase64.length % 4 !== 0) {
nestedBase64 += "=";
}
nestedBase64 = nestedBase64
.replace(/-/g, "+")
.replace(/_/g, "/");
decodedValue = NehonixCommonUtils.decodeB64(nestedBase64);
}
// Handle case where decoded value contains '&' (e.g., 'true&')
if (decodedValue.includes("&")) {
return `${key}=${decodedValue.split("&")[0]}`; // Take only the first part
}
break;
case "rawHexadecimal":
if (/^[0-9A-Fa-f]+$/.test(value) && value.length % 2 === 0) {
decodedValue = NDS.decodeRawHex(value);
}
break;
case "percentEncoding":
decodedValue = NDS.decodePercentEncoding(value);
break;
case "doublepercent":
decodedValue = NDS.decodeDoublePercentEncoding(value);
break;
}
// Validate the decoded value to ensure it's readable text
const printableChars = decodedValue.replace(/[^\x20-\x7E]/g, "").length;
const printableRatio = printableChars / decodedValue.length;
// Only use decoded value if it's mostly printable characters
if (printableRatio > 0.7) {
return `${key}=${decodedValue}`;
}
}
catch (e) {
AppLogger.warn(`Failed to decode parameter ${key}: ${e}`);
}
}
return param; // Keep original for non-decodable params
});
// Reconstruct URL with decoded parameters
const decodedQueryString = decodedParams.join("&");
const decodedURL = `${basePath}?${decodedQueryString}`;
if (decodedURL !== input) {
const paramEncoding = params
.map((param) => {
const [key, value] = param.split("=");
if (value) {
return NDS.detectEncoding(value).mostLikely;
}
return "none";
})
.find((type) => type !== "plainText" && type !== "none") ||
"unknown";
return {
val: () => decodedURL,
encodingType: paramEncoding,
confidence: 0.85,
};
}
}
// Process nested encoding
const detection = NDS.detectEncoding(input);
let decodedValue = input;
if (detection.isNested && detection.nestedTypes) {
try {
decodedValue = input;
for (const encType of detection.nestedTypes) {
decodedValue = NDS.decode({
encodingType: encType,
input,
});
}
return {
val: () => decodedValue,
encodingType: detection.mostLikely,
confidence: detection.confidence,
nestedTypes: detection.nestedTypes,
};
}
catch (e) {
AppLogger.error(`Error while decoding nested encodings:`, e);
}
}
try {
switch (detection.mostLikely) {
case "percentEncoding":
decodedValue = NDS.decodePercentEncoding(input);
break;
case "doublepercent":
decodedValue = NDS.decodeDoublePercentEncoding(input);
break;
case "base64":
let base64Input = input;
while (base64Input.length % 4 !== 0) {
base64Input += "=";
}
decodedValue = NehonixCommonUtils.decodeB64(base64Input.replace(/-/g, "+").replace(/_/g, "/"));
break;
case "hex":
decodedValue = NDS.decodeHex(input);
break;
case "rawHexadecimal":
decodedValue = NDS.decodeRawHex(input);
break;
case "unicode":
decodedValue = NDS.decodeUnicode(input);
break;
case "htmlEntity":
decodedValue = NDS.decodeHTMLEntities(input);
break;
case "punycode":
decodedValue = NDS.decodePunycode(input);
break;
case "jwt":
decodedValue = NDS.decodeJWT(input);
break;
default:
if (input.includes("=")) {
const parts = input.split("=");
const value = parts[parts.length - 1];
if (value &&
value.length >= 6 &&
/^[0-9A-Fa-f]+$/.test(value) &&
value.length % 2 === 0) {
try {
const decodedParam = NDS.decodeRawHex(value);
const printableChars = decodedParam.replace(/[^\x20-\x7E]/g, "").length;
const printableRatio = printableChars / decodedParam.length;
if (printableRatio > 0.7) {
decodedValue = input.replace(value, decodedParam);
return {
val: () => decodedValue,
encodingType: "rawHexadecimal",
confidence: 0.8,
};
}
}
catch {
// Fall through to return original
}
}
}
decodedValue = input;
}
const printableChars = decodedValue.replace(/[^\x20-\x7E]/g, "").length;
const printableRatio = printableChars / decodedValue.length;
if (printableRatio < 0.7 && detection.mostLikely !== "plainText") {
AppLogger.warn(`Decoded value contains too many unprintable characters (${printableRatio.toFixed(2)}), reverting to original`);
decodedValue = input;
}
}
catch (e) {
AppLogger.error(`Error while decoding using ${detection.mostLikely}:`, e);
decodedValue = input;
}
return {
val: () => decodedValue,
encodingType: detection.mostLikely,
confidence: detection.confidence,
};
}
// Decode JWT
static decodeJWT(input) {
const parts = input.split(".");
if (parts.length !== 3)
throw new Error("Invalid JWT format");
try {
// Décoder seulement les parties header et payload (pas la signature)
const header = NehonixCommonUtils.decodeB64(parts[0].replace(/-/g, "+").replace(/_/g, "/"));
const payload = NehonixCommonUtils.decodeB64(parts[1].replace(/-/g, "+").replace(/_/g, "/"));
// Formater en JSON pour une meilleure lisibilité
const headerObj = JSON.parse(header);
const payloadObj = JSON.parse(payload);
return JSON.stringify({
header: headerObj,
payload: payloadObj,
signature: "[signature]", // Ne pas décoder la signature
}, null, 2);
}
catch (e) {
throw new Error(`JWT decoding failed: ${e.message}`);
}
}
// =============== DECODING METHODS ===============
/**
* Decodes percent encoding (URL)
*/
static decodePercentEncoding(input) {
try {
return decodeURIComponent(input);
}
catch (e) {
// In case of error (invalid sequence), try to decode valid parts
AppLogger.warn("Error while percent-decoding, attempting partial decoding");
return input.replace(/%[0-9A-Fa-f]{2}/g, (match) => {
try {
return decodeURIComponent(match);
}
catch {
return match;
}
});
}
}
/**
// * Decodes double percent encoding
// */
// static decodeDoublePercentEncoding(input: string): string {
// // First decode %25XX to %XX, then decode %XX
// const firstPass = input.replace(/%25([0-9A-Fa-f]{2})/g, (match, hex) => {
// return `%${hex}`;
// });
// return NDS.decodePercentEncoding(firstPass);
// }
/**
* Decodes hexadecimal encoding
*/
/**
* Fix 1: Proper hex string decoding implementation
*/
static decodeHex(input) {
// Remove any whitespace and convert to lowercase
input = input.trim().toLowerCase();
// Check if input is a valid hex string
if (!/^[0-9a-f]+$/.test(input)) {
if (this.throwError) {
throw new Error("Invalid hex string");
}
}
// Ensure even number of characters
if (input.length % 2 !== 0) {
throw new Error("Hex string must have an even number of characters");
}
try {
let result = "";
for (let i = 0; i < input.length; i += 2) {
const hexByte = input.substring(i, i + 2);
const charCode = parseInt(hexByte, 16);
result += String.fromCharCode(charCode);
}
return result;
}
catch (e) {
throw new Error(`Hex decoding failed: ${e.message}`);
}
}
/**
* Decodes Unicode encoding
*/
static decodeUnicode(input) {
try {
// Replace \uXXXX and \u{XXXXX} with their equivalent characters
return input
.replace(/\\u([0-9A-Fa-f]{4})/g, (match, hex) => {
return String.fromCodePoint(parseInt(hex, 16));
})
.replace(/\\u\{([0-9A-Fa-f]+)\}/g, (match, hex) => {
return String.fromCodePoint(parseInt(hex, 16));
});
}
catch (e) {
throw new Error(`Unicode decoding failed: ${e.message}`);
}
}
/**
* Decodes HTML entities
*/
static decodeHTMLEntities(input) {
const entities = htmlEntities;
// Replace named entities
let result = input;
for (const [entity, char] of Object.entries(entities)) {
result = result.replace(new RegExp(entity, "g"), char);
}
// Replace numeric entities (decimal)
result = result.replace(/&#(\d+);/g, (match, dec) => {
return String.fromCodePoint(parseInt(dec, 10));
});
// Replace numeric entities (hexadecimal)
result = result.replace(/&#x([0-9A-Fa-f]+);/g, (match, hex) => {
return String.fromCodePoint(parseInt(hex, 16));
});
return result;
}
/**
* Decodes punycode
* Note: Requires the 'punycode' library
*/
static decodePunycode(input) {
try {
// If the punycode module is available
if (typeof require !== "undefined") {
// For URLs with international domains
return input.replace(/xn--[a-z0-9-]+/g, (match) => {
try {
return punycode.decode(match.replace("xn--", ""));
}
catch {
return match;
}
});
}
else {
// Alternative for browser (less accurate)
// For a complete browser implementation, include a punycode library
AppLogger.warn("Punycode module not available, limited punycode decoding");
return input;
}
}
catch (e) {
throw new Error(`Punycode decoding failed: ${e.message}`);
}
}
/**
* Automatically detects the encoding type(s) of a string (URI or raw text)
* @param input The string to analyze
* @param depth Internal recursion depth (default: 0)
* @returns An object with detected types, confidence scores and the most likely one
*/
static detectEncoding(input, depth = 0) {
const MAX_DEPTH = 3;
if (depth > MAX_DEPTH || !input || input.length < 2) {
return {
types: ["plainText"],
mostLikely: "plainText",
confidence: 1.0,
};
}
const detectionScores = {};
const utils = NehonixCommonUtils;
const isValidUrl = NehonixCoreUtils.isValidUrl(input, NDS.default_checkurl_opt);
// First, check for mixed encoding patterns
const percentEncodedSegments = input.match(/%[0-9A-Fa-f]{2}/g);
const hasPercentEncodedParts = percentEncodedSegments && percentEncodedSegments.length > 0;
// Check what percentage of the input is percent-encoded
if (hasPercentEncodedParts) {
const encodedCharCount = percentEncodedSegments.length * 3; // Each %XX is 3 chars
const encodedRatio = encodedCharCount / input.length;
if (encodedRatio > 0.8) {
// Mostly percent-encoded
detectionScores["percentEncoding"] = 0.9;
}
else {
// Partially percent-encoded
detectionScores["partialPercentEncoding"] = 0.75 + encodedRatio * 0.2;
// Also recognize it's still partially plain text
detectionScores["plainText"] = 0.5 + (1 - encodedRatio) * 0.4;
}
}
// Special handling for URLs
try {
if (isValidUrl) {
// URL parameters may have individual encodings
const url = new URL(input);
if (url.search && url.search.length > 1) {
// Track URL parameter encodings
let hasEncodedParams = false;
for (const [_, value] of new URLSearchParams(url.search)) {
// Check for common encodings in parameter values
if (/%[0-9A-Fa-f]{2}/.test(value)) {
detectionScores["percentEncoding"] = Math.max(detectionScores["percentEncoding"] || 0, 0.85);
hasEncodedParams = true;
}
if (/^[A-Za-z0-9+\/=]{4,}$/.test(value)) {
detectionScores["base64"] = Math.max(detectionScores["base64"] || 0, 0.82);
hasEncodedParams = true;
}
if (/^[0-9A-Fa-f]+$/.test(value) && value.length % 2 === 0) {
detectionScores["rawHexadecimal"] = Math.max(detectionScores["rawHexadecimal"] || 0, 0.8);
hasEncodedParams = true;
}
if (/\\u[0-9A-Fa-f]{4}/.test(value)) {
detectionScores["unicode"] = Math.max(detectionScores["unicode"] || 0, 0.85);
hasEncodedParams = true;
}
if (/\\x[0-9A-Fa-f]{2}/.test(value)) {
detectionScores["jsEscape"] = Math.max(detectionScores["jsEscape"] || 0, 0.83);
hasEncodedParams = true;
}
}
if (hasEncodedParams) {
detectionScores["url"] = 0.9; // High confidence this is a URL with encoded params
}
}
}
}
catch (e) {
// URL parsing failed, continue with normal detection
}
// Standard encoding detection checks
const detectionChecks = [
{ type: "doublepercent", fn: utils.isDoublePercent, score: 0.95 },
{
type: "percentEncoding",
fn: utils.isPercentEncoding,
score: 0.9,
partialDetectionFn: (s) => {
const matches = s.match(/%[0-9A-Fa-f]{2}/g);
const isPartial = matches !== null && matches.length > 0;
const ratio = isPartial ? (matches.length * 3) / s.length : 0;
return { isPartial, ratio };
},
},
{
type: "base64",
fn: utils.isBase64,
score: 0.9,
minLength: 4,
partialDetectionFn: (s) => {
const base64Segments = s.match(/[A-Za-z0-9+\/=]{4,}/g);
const isPartial = base64Segments !== null &&
base64Segments.some((seg) => seg.length >= 4);
let totalBase64Length = 0;
if (isPartial && base64Segments) {
totalBase64Length = base64Segments.reduce((sum, seg) => sum + seg.length, 0);
}
return { isPartial, ratio: totalBase64Length / s.length };
},
},
{
type: "urlSafeBase64",
fn: utils.isUrlSafeBase64,
score: 0.93,
minLength: 4,
},
{ type: "base32", fn: utils.isBase32, score: 0.88, minLength: 8 },
{ type: "asciihex", fn: utils.isAsciiHex, score: 0.85 },
{ type: "asciioct", fn: utils.isAsciiOct, score: 0.85 },
{
type: "hex",
fn: utils.isHex,
score: 0.8,
minLength: 6,
partialDetectionFn: (s) => {
const hexSegments = s.match(/[0-9A-Fa-f]{6,}/g);
const isPartial = hexSegments !== null && hexSegments.length > 0;
let totalHexLength = 0;
if (isPartial && hexSegments) {
totalHexLength = hexSegments.reduce((sum, seg) => sum + seg.length, 0);
}
return { isPartial, ratio: totalHexLength / s.length };
},
},
{
type: "rawHexadecimal",
fn: utils.hasRawHexString,
score: 0.85,
minLength: 4,
},
{
type: "unicode",
fn: utils.isUnicode,
score: 0.8,
partialDetectionFn: (s) => {
const unicodeMatches = s.match(/\\u[0-9A-Fa-f]{4}/g);
const isPartial = unicodeMatches !== null && unicodeMatches.length > 0;
let totalUnicodeLength = 0;
if (isPartial && unicodeMatches) {
totalUnicodeLength = unicodeMatches.reduce((sum, seg) => sum + seg.length, 0);
}
return { isPartial, ratio: totalUnicodeLength / s.length };
},
},
{
type: "htmlEntity",
fn: utils.isHtmlEntity,
score: 0.8,
partialDetectionFn: (s) => {
const entityMatches = s.match(/&[a-zA-Z]+;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g);
const isPartial = entityMatches !== null && entityMatches.length > 0;
let totalEntityLength = 0;
if (isPartial && entityMatches) {
totalEntityLength = entityMatches.reduce((sum, seg) => sum + seg.length, 0);
}
return { isPartial, ratio: totalEntityLength / s.length };
},
},
{ type: "decimalHtmlEntity", fn: utils.isDecimalHtmlEntity, score: 0.83 },
{ type: "quotedPrintable", fn: utils.isQuotedPrintable, score: 0.77 },
{ type: "punycode", fn: utils.isPunycode, score: 0.9 },
{ type: "rot13", fn: utils.isRot13.bind(utils), score: 0.9 },
{ type: "utf7", fn: utils.isUtf7, score: 0.75 },
{
type: "jsEscape",
fn: utils.isJsEscape,
score: 0.8,
partialDetectionFn: (s) => {
const jsEscapeMatches = s.match(/\\x[0-9A-Fa-f]{2}|\\u[0-9A-Fa-f]{4}|\\[0-7]{3}/g);
const isPartial = jsEscapeMatches !== null && jsEscapeMatches.length > 0;
let totalEscapeLength = 0;
if (isPartial && jsEscapeMatches) {
totalEscapeLength = jsEscapeMatches.reduce((sum, seg) => sum + seg.length, 0);
}
return { isPartial, ratio: totalEscapeLength / s.length };
},
},
{ type: "cssEscape", fn: utils.isCssEscape, score: 0.78 },
{ type: "jwt", fn: utils.hasJWTFormat, score: 0.95, minLength: 15 },
];
for (const { type, fn, score, minLength, partialDetectionFn, } of detectionChecks) {
// Skip checks if input is too short for this encoding
if (minLength && input.length < minLength)
continue;
try {
// First, try full detection
if (fn(input)) {
detectionScores[type] = score;
// Try to verify by decoding and checking result
try {
const decoded = NDS.decodeSingle(input, type);
if (decoded && decoded !== input) {
// Calculate how "sensible" the decoded result is
const printableChars = decoded.replace(/[^\x20-\x7E]/g, "").length;
const printableRatio = printableChars / decoded.length;
if (printableRatio > 0.8) {
// Boost confidence for successful decoding
detectionScores[type] += 0.05;
}
else if (printableRatio < 0.5) {
// Reduce confidence for gibberish output
detectionScores[type] -= 0.1;
}
}
}
catch (_) {
// Failed to decode, reduce confidence slightly
detectionScores[type] -= 0.1;
}
}
// Then, try partial detection if available
else if (partialDetectionFn) {
const partialResult = partialDetectionFn(input);
if (partialResult.isPartial) {
// Calculate confidence based on the ratio of encoded content
const partialConfidence = 0.6 + partialResult.ratio * 0.3;
detectionScores[`partial${type.charAt(0).toUpperCase() + type.slice(1)}`] = partialConfidence;
// If a significant portion is encoded, try to decode those parts
if (partialResult.ratio > 0.3) {
try {
const partialDecode = NDS.tryPartialDecode(input, type);
if (partialDecode.success) {
// Successful partial decoding boosts confidence
detectionScores[`partial${type.charAt(0).toUpperCase() + type.slice(1)}`] += 0.05;
}
}
catch (_) {
// Partial decoding failed, continue
}
}
}
}
}
catch (e) {
// Skip failed detection checks
}
}
// Try recursive nested encoding detection if we're still shallow
if (depth < MAX_DEPTH) {
const nested = NDS.detectNestedEncoding(input, depth + 1);
if (nested.isNested) {
const nestedKey = `nested:${nested.outerType}+${nested.innerType}`;
detectionScores[nestedKey] = nested.confidenceScore;
}
}
// Check for mixed encoding patterns
if (Object.keys(detectionScores).length > 1) {
// If we have multiple different encodings, it might be mixed
const encodingTypes = Object.keys(detectionScores);
if (encodingTypes.some((type) => type.startsWith("partial"))) {
detectionScores["mixedEncoding"] = 0.85; // High confidence this is mixed encoding
}
}
// Fallback: plain text if no encodings detected
if (Object.keys(detectionScores).length === 0) {
detectionScores["plainText"] = 1.0;
}
else {
// Always include plainText as a possibility with appropriate confidence
// The more encoded it seems, the less likely it's plain text
const maxNonPlainTextScore = Math.max(...Object.entries(detectionScores)
.filter(([type]) => type !== "plainText")
.map(([_, score]) => score));
if (maxNonPlainTextScore < 0.8) {
// If other encoding confidence is low, plain text is still likely
detectionScores["plainText"] = 1.0 - maxNonPlainTextScore;
}
}
// Sort by confidence
const sorted = Object.entries(detectionScores).sort((a, b) => b[1] - a[1]);
// Build the result
const result = {
types: sorted.map(([type]) => type),
mostLikely: sorted[0][0],
confidence: sorted[0][1],
};
// Add partial encoding info if detected
const partialEncodings = sorted
.filter(([type]) => type.startsWith("partial"))
.map(([type, score]) => ({
type: type.replace("partial", "").toLowerCase(),
confidence: score,
}));
if (partialEncodings.length > 0) {
result.partialEncodings = partialEncodings;
}
// Include nested encoding info if available
if (depth < MAX_DEPTH) {
const nested = NDS.detectNestedEncoding(input, depth + 1);
if (nested.isNested) {
result.isNested = true;
if (nested.outerType && nested.innerType)
result.nestedTypes = [nested.outerType, nested.innerType];
}
}
return result;
}
/**
* Attempts to decode parts of a string that appear to be encoded
* @param input The potentially partially encoded string
* @param encodingType The encoding type to try
* @returns Object indicating success and decoded parts
*/
static tryPartialDecode(input, encodingType) {
try {
switch (encodingType) {
case "percentEncoding":
// Replace percent-encoded segments
return {
success: true,
decoded: input.replace(/%[0-9A-Fa-f]{2}/g, (match) => {
try {
return decodeURIComponent(match);
}
catch {
return match;
}
}),
};
case "htmlEntity":
// Replace HTML entities
return {
success: true,
decoded: input.replace(/&[a-zA-Z]+;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g, (match) => {
try {
const tempEl = document.createElement("div");
tempEl.innerHTML = match;
return tempEl.textContent || match;
}
catch {
return match;
}
}),
};
case "unicode":
// Replace Unicode escape sequences
return {
success: true,
decoded: input.replace(/\\u[0-9A-Fa-f]{4}/g, (match) => {
try {
return String.fromCharCode(parseInt(match.slice(2), 16));
}
catch {
return match;
}
}),
};
case "jsEscape":
// Replace JavaScript escape sequences
return {
success: true,
decoded: input.replace(/\\x[0-9A-Fa-f]{2}|\\u[0-9A-Fa-f]{4}|\\[0-7]{3}/g, (match) => {
try {
return JSON.parse(`"${match}"`);
}
catch {
return match;
}
}),
};
default:
return { success: false };
}
}
catch (e) {
return { success: false };
}
}
/**
* Helper function to detect nested encodings
* @param input The string to analyze
* @param depth Current recursion depth
* @returns Information about detected nested encodings
*/
static detectNestedEncoding(input, depth = 0) {
// Implementation similar to the original, with improved detection for partial encodings
const MAX_DEPTH = 3;
if (depth > MAX_DEPTH) {
return { isNested: false, confidenceScore: 0 };
}
try {
// First identify the most likely outer encoding
const outerResult = NDS.detectEncoding(input, depth);
if (outerResult.mostLikely === "plainText") {
return { isNested: false, confidenceScore: 0 };
}
// Try to decode the outer layer
let decoded;
try {
decoded = NDS.decodeSingle(input, outerResult.mostLikely);
}
catch (e) {
return { isNested: false, confidenceScore: 0 };
}
if (!decoded || decoded === input) {
return { isNested: false, confidenceScore: 0 };
}
// Check for inner encoding in the decoded result
const innerResult = NDS.detectEncoding(decoded, depth + 1);
if (innerResult.mostLikely === "plainText") {
return { isNested: false, confidenceScore: 0 };
}
// Validate by trying to decode both layers
try {
const fullyDecoded = NDS.decodeSingle(decoded, innerResult.mostLikely);
if (fullyDecoded &&
fullyDecoded !== decoded &&
fullyDecoded !== input) {
// Calculate confidence based on how "clean" the decoded result is
const printableRatio = fullyDecoded.replace(/[^\x20-\x7E]/g, "").length /
fullyDecoded.length;
const confidenceBoost = printableRatio > 0.8 ? 0.1 : 0;
return {
isNested: true,
outerType: outerResult.mostLikely,
innerType: innerResult.mostLikely,
confidenceScore: Math.min(0.95, outerResult.confidence * 0.7 +
innerResult.confidence * 0.3 +
confidenceBoost),
};
}
}
catch (e) {
// Decoding failed, probably not nested
}
return { isNested: false, confidenceScore: 0 };
}
catch (e) {
return { isNested: false, confidenceScore: 0 };
}
}
//new
/**
* Decodes ROT13 encoded text
*/
static decodeRot13(input) {
return input.replace(/[a-zA-Z]/g, (char) => {
const code = char.charCodeAt(0);
// For uppercase letters (A-Z)
if (code >= 65 && code <= 90) {
return String.fromCharCode(((code - 65 + 13) % 26) + 65);
}
// For lowercase letters (a-z)
else if (code >= 97 && code <= 122) {
return String.fromCharCode(((code - 97 + 13) % 26) + 97);
}
return char;
});
}
/**
* Decodes Base32 encoded text
*/
static decodeBase32(input) {
// Base32 alphabet (RFC 4648)
const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
// Remove padding characters and whitespace
const cleanInput = input
.toUpperCase()
.replace(/=+$/, "")
.replace(/\s/g, "");
let bits = "";
let result = "";
// Convert each character to its 5-bit binary representation
for (let i = 0; i < cleanInput.length; i++) {
const char = cleanInput[i];
const index = alphabet.indexOf(char);
if (index === -1)
throw new Error(`Invalid Base32 character: ${char}`);
// Convert to 5-bit binary
bits += index.toString(2).padStart(5, "0");
}
// Process 8 bits at a time to construct bytes
for (let i = 0; i + 8 <= bits.length; i += 8) {
const byte = bits.substring(i, i + 8);
result += String.fromCharCode(parseInt(byte, 2));
}
return result;
}
/**
* Decodes URL-safe Base64 encoded text
*/
static decodeUrlSafeBase64(input) {
// Convert URL-safe characters back to standard Base64
const standardBase64 = input
.replace(/-/g, "+")
.replace(/_/g, "/")
.replace(/=+$/, ""); // Remove padding if present
// Add padding if needed
let padded = standardBase64;
while (padded.length % 4 !== 0) {
padded += "=";
}
return NehonixCommonUtils.decodeB64(padded);
}
/**
* Decodes JavaScript escape sequences
*/
static decodeJsEscape(input) {
if (!input.includes("\\"))
return input;
try {
// Handle various JavaScript escape sequences
return input.replace(/\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|[0-7]{1,3}|.)/g, (match, escape) => {
if (escape.startsWith("x")) {
// Hex escape \xFF
return String.fromCharCode(parseInt(escape.substring(1), 16));
}
else if (escape.startsWith("u")) {
// Unicode escape \uFFFF
return String.fromCharCode(parseInt(escape.substring(1), 16));