utf8-sanitize
Version:
A performant zero-dependency utility to clean UTF-8 text, fix mojibake from latin1, verify string length, and sanitize input
83 lines (72 loc) • 3.7 kB
JavaScript
// test util
const test = require('node:test');
const assert = require('node:assert');
const {
FixLatin1Corrupt,
VerifyByteLength,
SanitizeInput,
FullSanitize,
MAX_SAFE_CHAR_LIMIT,
} = require('../index.js');
// --- Test Suite for FixLatin1Corrupt ---
test.describe('FixLatin1Corrupt', () => {
test('should not alter correct strings', () => {
const correct = 'This is a correct string.';
assert.strictEqual(FixLatin1Corrupt(correct), correct);
});
const corruptionTests = [
{ desc: 'Basic accented characters', input: 'François', expected: 'François' },
{ desc: 'Accented café', input: 'café', expected: 'café' },
{ desc: 'Spanish ñ', input: 'El Niño', expected: 'El Niño' },
{ desc: 'Curly quotes', input: '“quotedâ€�', expected: '“quoted”' },
{ desc: 'Curly apostrophe', input: 'that’s right', expected: 'that’s right' },
{ desc: 'En and Em dashes', input: '– and —', expected: '– and —' },
{ desc: 'Trademark symbol', input: '80’s musicâ„¢', expected: '80’s music™' },
{ desc: 'German Umlaut', input: 'Künstler', expected: 'Künstler' },
{ desc: 'Ellipsis', input: '… etc.', expected: '… etc.' },
{ desc: 'Full sample sentence', input: 'François said “café”. That’s all.', expected: 'François said “café”. That’s all.' },
];
corruptionTests.forEach(({ desc, input, expected }) => {
test(`should repair: ${desc}`, () => {
const repaired = FixLatin1Corrupt(input);
assert.strictEqual(repaired, expected);
});
});
});
// --- Test Suite for VerifyByteLength ---
test.describe('VerifyByteLength', () => {
test('should return true for valid strings', () => {
assert.strictEqual(VerifyByteLength('hello'), true, 'Should pass for a simple string');
});
test('should return false for non-string or invalid input', () => {
assert.strictEqual(VerifyByteLength(null), false, 'Should fail for null');
assert.strictEqual(VerifyByteLength(undefined), false, 'Should fail for undefined');
assert.strictEqual(VerifyByteLength(123), false, 'Should fail for a number');
assert.strictEqual(VerifyByteLength({}), false, 'Should fail for an object');
});
// NEW TEST BLOCK
test('should return true for strings with valid structural integrity', () => {
// This test confirms the `input.split('').length === input.length` check is working.
// This check will always pass for any valid JavaScript string.
const simpleString = 'abc'; // .length is 3, .split('').length is 3
const emojiString = '👍'; // .length is 2, .split('').length is 2 (due to surrogate pairs)
assert.strictEqual(VerifyByteLength(simpleString), true, 'Should pass for simple ASCII');
assert.strictEqual(VerifyByteLength(emojiString), true, 'Should pass for multi-byte Unicode characters');
});
});
// --- Test Suite for SanitizeInput ---
test('SanitizeInput should sanitize for HTML', () => {
const input = '<script>alert("XSS")</script>';
const expected = '<script>alert("XSS")</script>';
assert.strictEqual(SanitizeInput(input, { mode: 'html' }), expected);
});
// --- Test Suite for the Full Pipeline ---
test('FullSanitize should repair, then sanitize a string', () => {
const input = 'é <script>go</script>';
const expected = 'é <script>go</script>';
assert.strictEqual(FullSanitize(input, { mode: 'html' }), expected);
});
// --- Test valid MAX_SAFE_CHAR_LIMIT---
test('Value test for MAX_SAFE_CHAR_LIMIT', () => {
assert.strictEqual(MAX_SAFE_CHAR_LIMIT, 268435400);
});