parse-multipart-data
Version:
A javascript/nodejs multipart/form-data parser which operates on raw data.
206 lines (191 loc) • 6.35 kB
text/typescript
/**
* Multipart Parser (Finite State Machine)
* usage:
* const multipart = require('./multipart.js');
* const body = multipart.DemoData(); // raw body
* const body = Buffer.from(event['body-json'].toString(),'base64'); // AWS case
* const boundary = multipart.getBoundary(event.params.header['content-type']);
* const parts = multipart.Parse(body,boundary);
* each part is:
* { filename: 'A.txt', type: 'text/plain', data: <Buffer 41 41 41 41 42 42 42 42> }
* or { name: 'key', data: <Buffer 41 41 41 41 42 42 42 42> }
*/
type Part = {
contentDispositionHeader: string
contentTypeHeader: string
part: number[]
}
type Input = {
filename?: string
name?: string
type: string
data: Buffer
}
enum ParsingState {
INIT,
READING_HEADERS,
READING_DATA,
READING_PART_SEPARATOR
}
export function parse(multipartBodyBuffer: Buffer, boundary: string): Input[] {
let lastline = ''
let contentDispositionHeader = ''
let contentTypeHeader = ''
let state: ParsingState = ParsingState.INIT
let buffer: number[] = []
const allParts: Input[] = []
let currentPartHeaders: string[] = []
for (let i = 0; i < multipartBodyBuffer.length; i++) {
const oneByte: number = multipartBodyBuffer[i]
const prevByte: number | null = i > 0 ? multipartBodyBuffer[i - 1] : null
// 0x0a => \n
// 0x0d => \r
const newLineDetected: boolean = oneByte === 0x0a && prevByte === 0x0d
const newLineChar: boolean = oneByte === 0x0a || oneByte === 0x0d
if (!newLineChar) lastline += String.fromCharCode(oneByte)
if (ParsingState.INIT === state && newLineDetected) {
// searching for boundary
if ('--' + boundary === lastline) {
state = ParsingState.READING_HEADERS // found boundary. start reading headers
}
lastline = ''
} else if (ParsingState.READING_HEADERS === state && newLineDetected) {
// parsing headers. Headers are separated by an empty line from the content. Stop reading headers when the line is empty
if (lastline.length) {
currentPartHeaders.push(lastline)
} else {
// found empty line. search for the headers we want and set the values
for (const h of currentPartHeaders) {
if (h.toLowerCase().startsWith('content-disposition:')) {
contentDispositionHeader = h
} else if (h.toLowerCase().startsWith('content-type:')) {
contentTypeHeader = h
}
}
state = ParsingState.READING_DATA
buffer = []
}
lastline = ''
} else if (ParsingState.READING_DATA === state) {
// parsing data
if (lastline.length > boundary.length + 4) {
lastline = '' // mem save
}
if ('--' + boundary === lastline) {
const j = buffer.length - lastline.length
const part = buffer.slice(0, j - 1)
allParts.push(
process({ contentDispositionHeader, contentTypeHeader, part })
)
buffer = []
currentPartHeaders = []
lastline = ''
state = ParsingState.READING_PART_SEPARATOR
contentDispositionHeader = ''
contentTypeHeader = ''
} else {
buffer.push(oneByte)
}
if (newLineDetected) {
lastline = ''
}
} else if (ParsingState.READING_PART_SEPARATOR === state) {
if (newLineDetected) {
state = ParsingState.READING_HEADERS
}
}
}
return allParts
}
// read the boundary from the content-type header sent by the http client
// this value may be similar to:
// 'multipart/form-data; boundary=----WebKitFormBoundaryvm5A9tzU1ONaGP5B',
export function getBoundary(header: string): string {
const items = header.split(';')
if (items) {
for (let i = 0; i < items.length; i++) {
const item = new String(items[i]).trim()
if (item.indexOf('boundary') >= 0) {
const k = item.split('=')
return new String(k[1]).trim().replace(/^["']|["']$/g, '')
}
}
}
return ''
}
export function DemoData(): { body: Buffer; boundary: string } {
let body = 'trash1\r\n'
body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n'
body += 'Content-Type: text/plain\r\n'
body +=
'Content-Disposition: form-data; name="uploads[]"; filename="A.txt"\r\n'
body += '\r\n'
body += '@11X'
body += '111Y\r\n'
body += '111Z\rCCCC\nCCCC\r\nCCCCC@\r\n\r\n'
body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n'
body += 'Content-Type: text/plain\r\n'
body +=
'Content-Disposition: form-data; name="uploads[]"; filename="B.txt"\r\n'
body += '\r\n'
body += '@22X'
body += '222Y\r\n'
body += '222Z\r222W\n2220\r\n666@\r\n'
body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n'
body += 'Content-Disposition: form-data; name="input1"\r\n'
body += '\r\n'
body += 'value1\r\n'
body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp--\r\n'
return {
body: Buffer.from(body),
boundary: '----WebKitFormBoundaryvef1fLxmoUdYZWXp'
}
}
function process(part: Part): Input {
// will transform this object:
// { header: 'Content-Disposition: form-data; name="uploads[]"; filename="A.txt"',
// info: 'Content-Type: text/plain',
// part: 'AAAABBBB' }
// into this one:
// { filename: 'A.txt', type: 'text/plain', data: <Buffer 41 41 41 41 42 42 42 42> }
const obj = function (str: string) {
const k = str.split('=')
const a = k[0].trim()
const b = JSON.parse(k[1].trim())
const o = {}
Object.defineProperty(o, a, {
value: b,
writable: true,
enumerable: true,
configurable: true
})
return o
}
const header = part.contentDispositionHeader.split(';')
const filenameData = header[2]
let input = {}
if (filenameData) {
input = obj(filenameData)
const contentType = part.contentTypeHeader.split(':')[1].trim()
Object.defineProperty(input, 'type', {
value: contentType,
writable: true,
enumerable: true,
configurable: true
})
}
// always process the name field
Object.defineProperty(input, 'name', {
value: header[1].split('=')[1].replace(/"/g, ''),
writable: true,
enumerable: true,
configurable: true
})
Object.defineProperty(input, 'data', {
value: Buffer.from(part.part),
writable: true,
enumerable: true,
configurable: true
})
return input as Input
}