mixpart
Version:
High-performance streaming multipart/mixed parser for Node.js
477 lines (475 loc) • 15.7 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
MultipartParseError: () => MultipartParseError,
extractBoundary: () => extractBoundary,
parseMultipartStream: () => parseMultipartStream
});
module.exports = __toCommonJS(index_exports);
// src/parser.ts
var MultipartParseError = class extends Error {
constructor(message) {
super(message);
this.name = "MultipartParseError";
}
};
function createSearch(pattern) {
const needle = new TextEncoder().encode(pattern);
return (haystack, start = 0) => Buffer.prototype.indexOf.call(haystack, needle, start);
}
function createPartialTailSearch(pattern) {
const needle = new TextEncoder().encode(pattern);
const byteIndexes = {};
for (let i = 0; i < needle.length; ++i) {
const byte = needle[i];
if (byteIndexes[byte] === void 0) byteIndexes[byte] = [];
byteIndexes[byte].push(i);
}
return function(haystack) {
const haystackEnd = haystack.length - 1;
if (haystack[haystackEnd] in byteIndexes) {
const indexes = byteIndexes[haystack[haystackEnd]];
for (let i = indexes.length - 1; i >= 0; --i) {
for (let j = indexes[i], k = haystackEnd; j >= 0 && haystack[k] === needle[j]; --j, --k) {
if (j === 0) return k;
}
}
}
return -1;
};
}
function parseHeaders(headerBytes) {
const headerText = new TextDecoder("iso-8859-1").decode(headerBytes);
const lines = headerText.trim().split(/\r?\n/);
const headerInit = [];
for (const line of lines) {
const colonIndex = line.indexOf(":");
if (colonIndex > 0) {
const name = line.slice(0, colonIndex).trim();
const value = line.slice(colonIndex + 1).trim();
headerInit.push([name, value]);
}
}
return new Headers(headerInit);
}
function extractBoundary(contentType) {
const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^;]+))/i);
if (!boundaryMatch) {
throw new MultipartParseError("No boundary found in Content-Type header");
}
return boundaryMatch[1] ?? boundaryMatch[2];
}
var AsyncMessageQueue = class {
queue = [];
waiters = [];
finished = false;
cancelled = false;
error = null;
/**
* Producer: Enqueue a message for consumption
*/
enqueue(message) {
if (this.finished || this.cancelled) return;
if (this.waiters.length > 0) {
const waiter = this.waiters.shift();
waiter.resolve(message);
} else {
this.queue.push(message);
}
}
/**
* Producer: Signal completion (with optional error)
*/
finish(error) {
if (this.finished) return;
this.finished = true;
this.error = error || null;
while (this.waiters.length > 0) {
const waiter = this.waiters.shift();
if (error) {
waiter.reject(error);
} else {
waiter.resolve(null);
}
}
}
/**
* Consumer: Cancel the queue (stops accepting new messages and notifies waiters)
*/
cancel() {
if (this.cancelled || this.finished) return;
this.cancelled = true;
while (this.waiters.length > 0) {
const waiter = this.waiters.shift();
waiter.resolve(null);
}
}
/**
* Consumer: Dequeue next message (or null if finished/cancelled)
*/
async dequeue() {
if (this.queue.length > 0) {
return this.queue.shift();
}
if (this.finished || this.cancelled) {
if (this.error) throw this.error;
return null;
}
return new Promise((resolve, reject) => {
this.waiters.push({ resolve, reject });
});
}
/**
* Check if the queue is in a terminal state
*/
get isTerminal() {
return this.finished || this.cancelled;
}
};
async function* parseMultipartStream(response, options) {
if (!response.body) {
throw new MultipartParseError("Response body is null");
}
const contentType = response.headers.get("content-type");
if (!contentType) {
throw new MultipartParseError("Missing Content-Type header");
}
const boundary = extractBoundary(contentType);
const parser = new StreamingMultipartParser(boundary, options);
yield* parser.parseStream(response.body);
}
var StreamingMultipartParser = class {
boundary;
findOpeningBoundary;
openingBoundaryLength;
findBoundary;
findPartialTailBoundary;
boundaryLength;
findDoubleNewline;
// Safety limits
maxHeaderSize;
maxBoundaryBuffer;
state = 0 /* Start */;
buffer = null;
currentHeaders = new Headers();
currentPayloadController = null;
constructor(boundary, options = {}) {
this.boundary = boundary;
this.findOpeningBoundary = createSearch(`--${boundary}`);
this.openingBoundaryLength = 2 + boundary.length;
this.findBoundary = createSearch(`\r
--${boundary}`);
this.findPartialTailBoundary = createPartialTailSearch(`\r
--${boundary}`);
this.boundaryLength = 4 + boundary.length;
this.findDoubleNewline = createSearch("\r\n\r\n");
this.maxHeaderSize = options.maxHeaderSize ?? 65536;
this.maxBoundaryBuffer = options.maxBoundaryBuffer ?? 8192;
}
async *parseStream(stream) {
const reader = stream.getReader();
const messageQueue = new AsyncMessageQueue();
const producer = this.startProducer(reader, messageQueue);
try {
yield* this.consumeMessages(messageQueue);
} finally {
messageQueue.cancel();
this.closeCurrentPayload();
try {
await reader.cancel();
} catch (error) {
}
await producer;
}
}
/**
* Producer: Continuously read chunks and parse messages
*/
async startProducer(reader, messageQueue) {
try {
while (!messageQueue.isTerminal) {
let result;
try {
result = await reader.read();
} catch (readError) {
if (readError instanceof Error && (readError.name === "AbortError" || readError.constructor.name === "AbortError" || readError.name === "TimeoutError" || readError.constructor.name === "TimeoutError")) {
break;
}
throw readError;
}
const { done, value } = result;
if (done) {
if (this.buffer !== null && this.buffer.length > 0) {
const messages2 = this.write(new Uint8Array(0));
for (const message of messages2) {
if (messageQueue.isTerminal) break;
messageQueue.enqueue(message);
}
}
if (this.state !== 4 /* Done */) {
if (this.state === 0 /* Start */) {
throw new MultipartParseError(
"Invalid multipart stream: missing initial boundary"
);
}
throw new MultipartParseError("Unexpected end of stream");
}
break;
}
if (!(value instanceof Uint8Array)) {
throw new MultipartParseError(
`Invalid chunk type: expected Uint8Array, got ${typeof value}`
);
}
const messages = this.write(value);
for (const message of messages) {
if (messageQueue.isTerminal) break;
messageQueue.enqueue(message);
}
}
if (!messageQueue.isTerminal) {
messageQueue.finish();
}
} catch (error) {
this.closeCurrentPayload(error);
if (!messageQueue.isTerminal) {
messageQueue.finish(error);
}
} finally {
try {
reader.releaseLock();
} catch (error) {
}
}
}
/**
* Consumer: Yield messages from the queue
*/
async *consumeMessages(messageQueue) {
while (true) {
const message = await messageQueue.dequeue();
if (message === null) {
break;
}
yield message;
}
}
/**
* Process a chunk of data through the state machine and return any complete messages.
*
* Returns an array because a single chunk can contain multiple complete messages
* when small messages with headers + body + boundary all fit in one network chunk.
* All messages must be captured and queued to maintain proper message ordering.
*/
write(chunk) {
const newMessages = [];
if (this.state === 4 /* Done */) {
throw new MultipartParseError("Unexpected data after end of stream");
}
let index = 0;
let chunkLength = chunk.length;
if (this.buffer !== null) {
const newSize = this.buffer.length + chunkLength;
const maxAllowedSize = this.state === 2 /* Header */ ? this.maxHeaderSize : this.maxBoundaryBuffer;
if (newSize > maxAllowedSize) {
throw new MultipartParseError(
`Buffer size limit exceeded: ${newSize} bytes > ${maxAllowedSize} bytes. This may indicate malformed multipart data with ${this.state === 2 /* Header */ ? "oversized headers" : "invalid boundaries"}.`
);
}
const newChunk = new Uint8Array(newSize);
newChunk.set(this.buffer, 0);
newChunk.set(chunk, this.buffer.length);
chunk = newChunk;
chunkLength = chunk.length;
this.buffer = null;
}
if (chunkLength === 0 && this.state === 0 /* Start */) {
throw new MultipartParseError(
"Invalid multipart stream: missing initial boundary"
);
}
while (true) {
if (this.state === 3 /* Body */) {
if (chunkLength - index < this.boundaryLength) {
const remainingData = chunk.subarray(index);
if (remainingData.length > this.maxBoundaryBuffer) {
throw new MultipartParseError(
`Boundary buffer limit exceeded: ${remainingData.length} > ${this.maxBoundaryBuffer}`
);
}
this.buffer = remainingData;
break;
}
const boundaryIndex = this.findBoundary(chunk, index);
if (boundaryIndex === -1) {
const partialTailIndex = this.findPartialTailBoundary(chunk);
if (partialTailIndex === -1) {
this.writeBody(index === 0 ? chunk : chunk.subarray(index));
} else {
this.writeBody(chunk.subarray(index, partialTailIndex));
const partialBoundary = chunk.subarray(partialTailIndex);
if (partialBoundary.length > this.maxBoundaryBuffer) {
throw new MultipartParseError(
`Partial boundary too large: ${partialBoundary.length} > ${this.maxBoundaryBuffer}`
);
}
this.buffer = partialBoundary;
}
break;
}
this.writeBody(chunk.subarray(index, boundaryIndex));
this.finishMessage();
index = boundaryIndex + this.boundaryLength;
this.state = 1 /* AfterBoundary */;
}
if (this.state === 1 /* AfterBoundary */) {
if (chunkLength - index < 2) {
const remainingData = chunk.subarray(index);
if (remainingData.length > this.maxBoundaryBuffer) {
throw new MultipartParseError(
`After-boundary buffer limit exceeded: ${remainingData.length} > ${this.maxBoundaryBuffer}`
);
}
this.buffer = remainingData;
break;
}
if (chunk[index] === 45 && chunk[index + 1] === 45) {
this.state = 4 /* Done */;
break;
}
if (chunk[index] === 13 && chunk[index + 1] === 10) {
index += 2;
} else if (chunk[index] === 10) {
index += 1;
} else {
throw new MultipartParseError(
`Invalid character after boundary: expected CRLF or LF, got 0x${chunk[index].toString(16)}`
);
}
this.state = 2 /* Header */;
}
if (this.state === 2 /* Header */) {
if (chunkLength - index < 4) {
const remainingData = chunk.subarray(index);
if (remainingData.length > this.maxHeaderSize) {
throw new MultipartParseError(
`Header buffer limit exceeded: ${remainingData.length} > ${this.maxHeaderSize}`
);
}
this.buffer = remainingData;
break;
}
let headerEndIndex = this.findDoubleNewline(chunk, index);
let headerEndOffset = 4;
if (headerEndIndex === -1) {
const lfDoubleNewline = createSearch("\n\n");
headerEndIndex = lfDoubleNewline(chunk, index);
headerEndOffset = 2;
}
if (headerEndIndex === -1) {
const headerData = chunk.subarray(index);
if (headerData.length > this.maxHeaderSize) {
throw new MultipartParseError(
`Headers too large: ${headerData.length} > ${this.maxHeaderSize} bytes`
);
}
this.buffer = headerData;
break;
}
const headerBytes = chunk.subarray(index, headerEndIndex);
this.currentHeaders = parseHeaders(headerBytes);
const message = this.createStreamingMessage();
newMessages.push(message);
index = headerEndIndex + headerEndOffset;
this.state = 3 /* Body */;
continue;
}
if (this.state === 0 /* Start */) {
if (chunkLength < this.openingBoundaryLength) {
if (chunk.length > this.maxBoundaryBuffer) {
throw new MultipartParseError(
`Initial chunk too large for boundary detection: ${chunk.length} > ${this.maxBoundaryBuffer}`
);
}
this.buffer = chunk;
break;
}
const boundaryIndex = this.findOpeningBoundary(chunk);
if (boundaryIndex !== 0) {
throw new MultipartParseError(
"Invalid multipart stream: missing initial boundary"
);
}
index = this.openingBoundaryLength;
this.state = 1 /* AfterBoundary */;
}
}
return newMessages;
}
createStreamingMessage() {
const headers = new Headers(this.currentHeaders);
const payload = new ReadableStream({
start: (controller) => {
this.currentPayloadController = controller;
}
});
this.currentHeaders = new Headers();
return {
headers,
payload
};
}
writeBody(chunk) {
if (this.currentPayloadController) {
this.currentPayloadController.enqueue(chunk);
}
}
finishMessage() {
if (this.currentPayloadController) {
this.currentPayloadController.close();
this.currentPayloadController = null;
}
}
/**
* Close current payload controller if open (used during cleanup)
* If an error is provided, forwards it to the payload consumer
*/
closeCurrentPayload(error) {
if (this.currentPayloadController) {
try {
if (error) {
this.currentPayloadController.error(error);
} else {
this.currentPayloadController.close();
}
} catch (controllerError) {
}
this.currentPayloadController = null;
}
}
};
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
MultipartParseError,
extractBoundary,
parseMultipartStream
});
//# sourceMappingURL=index.js.map