UNPKG

@himorishige/noren-core

Version:

Core PII detection, masking, and tokenization library built on Web Standards

59 lines (58 loc) 2.29 kB
// Stream utilities for PII redaction with binary data support import { redactText } from './index.js'; import { isBinaryChunk } from './utils.js'; /** * Create a TransformStream that processes text chunks for PII redaction * while preserving binary chunks unchanged */ export function createRedactionTransform(registry, options = {}) { const { window = 96, policy = {} } = options; const dec = new TextDecoder(); const enc = new TextEncoder(); let tail = ''; let tailIsBinary = false; return new TransformStream({ async transform(chunk, controller) { // Check if current chunk is binary data if (isBinaryChunk(chunk)) { // If we have text tail, process it first if (tail && !tailIsBinary) { const redacted = await redactText(registry, tail, policy); controller.enqueue(enc.encode(redacted)); tail = ''; } // Pass binary chunk through unchanged controller.enqueue(chunk); return; } try { // Decode as text const text = dec.decode(chunk, { stream: true }); const buf = tail + text; const cut = Math.max(0, buf.length - window); const head = buf.slice(0, cut); if (head) { const redacted = await redactText(registry, head, policy); controller.enqueue(enc.encode(redacted)); } tail = buf.slice(cut); tailIsBinary = false; } catch (_error) { // If text decoding fails, treat as binary if (tail && !tailIsBinary) { const redacted = await redactText(registry, tail, policy); controller.enqueue(enc.encode(redacted)); tail = ''; } controller.enqueue(chunk); } }, async flush(controller) { if (tail && !tailIsBinary) { const redacted = await redactText(registry, tail, policy); controller.enqueue(enc.encode(redacted)); } }, }); }