id3-parser
Version:
A pure JavaScript id3 tag parser.
344 lines (324 loc) • 12.1 kB
text/typescript
import FARME_TYPES, { FrameTypeValueMap } from '../constants/frameTypes';
import GENRES from '../constants/genres';
import IMAGE_TYPES from '../constants/imageTypes';
import { IBytes, IID3V2Tag, ITXXXMap, IV2VersionInfo } from '../interface';
import { getEndpointOfBytes, readBytesToISO8859, readBytesToString, readBytesToUTF8, skipPaddingZeros } from '../bytesUtil';
const V2_MIN_LENGTH = 20; // TAG HEADER(10) + ONE FRAME HEADER(10)
/**
* Parse ID3v2 metadata from binary bytes.
*/
export default function parseV2Data(bytes: IBytes): false | IID3V2Tag {
if (!bytes || bytes.length < V2_MIN_LENGTH) {
return false;
}
const tags = parseV2Header(bytes.slice(0, 10));
if (!tags) {
return false;
}
const flags = tags.version.flags;
// Currently do not support unsynchronisation
if (flags.unsync) {
throw new Error('no support for unsynchronisation');
}
let headerSize = 10;
// Increment the header size if an extended header exists.
if (flags.xheader) {
// Usually extended header size is 6 or 10 bytes
headerSize += calcTagSize(bytes.slice(10, 14));
}
const tagSize = calcTagSize(bytes.slice(6, 10));
parseV2Frames(bytes.slice(headerSize, tagSize + headerSize), tags);
return tags;
}
/**
* Parse ID3v2 tag header.
* @description
* A typical ID3v2 tag (header) is like:
* $49 44 33 yy yy xx zz zz zz zz
*
* Where yy is less than $FF, xx is the 'flags' byte and zz is less than $80.
* @param bytes binary bytes.
*/
function parseV2Header(bytes: IBytes) {
if (!bytes || bytes.length < 10) {
return false;
}
const identity = readBytesToUTF8(bytes, 3);
if (identity !== 'ID3') {
return false;
}
const flagByte = bytes[5];
const version: IV2VersionInfo = {
major: 2,
minor: bytes[3],
revision: bytes[4],
flags: {
unsync: (flagByte & 0x80) !== 0, // Unsynchronisation
xheader: (flagByte & 0x40) !== 0, // Extended header
experimental: (flagByte & 0x20) !== 0, // Experimental indicator
},
};
return { version };
}
/**
* Calculate the total tag size, but excluding the header size(10 bytes).
* @param bytes binary bytes.
*/
export function calcTagSize(bytes: IBytes): number {
return (bytes[0] & 0x7f) * 0x200000 +
(bytes[1] & 0x7f) * 0x4000 +
(bytes[2] & 0x7f) * 0x80 +
(bytes[3] & 0x7f);
}
/**
* Calculate frame size (just content size, exclude 10 bytes header size).
* @param bytes binary bytes.
*/
export function calcFrameSize(bytes: IBytes): number {
return bytes.length < 4 ? 0 : bytes[0] * 0x1000000 +
bytes[1] * 0x10000 +
bytes[2] * 0x100 +
bytes[3];
}
function parseV2Frames(bytes: IBytes, tags: IID3V2Tag) {
let position = 0;
const version = tags.version;
while (position < bytes.length) {
const size = calcFrameSize(bytes.slice(position + 4));
// the left data would be '\u0000\u0000...', just a padding
if (size === 0) {
break;
}
// * < v2.3, frame ID is 3 chars, size is 3 bytes making a total size of 6 bytes
// * >= v2.3, frame ID is 4 chars, size is 4 bytes, flags are 2 bytes, total 10 bytes
const slice = bytes.slice(position, position + 10 + size);
if (!slice.length) {
break;
}
const frame = parseFrame(slice, version.minor, size);
if (frame.tag) {
if (FrameTypeValueMap[frame.id as string] === 'array') {
if (tags[frame.tag]) {
tags[frame.tag].push(frame.value);
} else {
tags[frame.tag] = [frame.value];
}
} else {
tags[frame.tag] = frame.value;
}
}
position += slice.length;
}
}
/**
* Parse id3 frame.
* @description
* Declared ID3v2 frames are of different types:
* 1. Unique file identifier
* 2. Text information frames
* 3. ...
*
* For frames that allow different types of text encoding, the first byte after header (bytes[10])
* represents encoding. Its value is of:
* 1. 00 <---> ISO-8859-1 (ASCII), default encoding, represented as <text string>/<full text string>
* 2. 01 <---> UCS-2 encoded Unicode with BOM.
* 3. 02 <---> UTF-16BE encoded Unicode without BOM.
* 4. 03 <---> UTF-8 encoded Unicode.
*
* And 2-4 represented as <text string according to encoding>/<full text string according to encoding>
* @param bytes Binary bytes.
* @param minor Minor version, 2/3/4
* @param size Frame size.
*/
function parseFrame(bytes: IBytes, minor: number, size: number) {
const result = {
id: null as string | null,
tag: null as string | null,
value: null as string | object | null,
};
const header = {
id: readBytesToUTF8(bytes, 4),
type: null as string | null,
size,
flags: [
bytes[8],
bytes[9],
],
};
header.type = header.id[0];
result.id = header.id;
if (minor === 4) {
// TODO: parse v2.4 frame
}
// No support for compressed, unsychronised, etc frames
if (header.flags[1] !== 0) {
return result;
}
if (!(header.id in FARME_TYPES)) {
return result;
}
result.tag = FARME_TYPES[header.id];
let encoding = 0;
let variableStart = 0;
let variableLength = 0;
let i = 0;
/**
* Text information frames, structure is:
* <Header for 'Text information frame', ID: "T000" - "TZZZ", excluding "TXXX">
* Text encoding $xx
* Information <text string according to encoding>
*/
if (header.type === 'T') {
encoding = bytes[10];
// If is User defined text information frame (TXXX), then we should handle specially.
// <Header for 'User defined text information frame', ID: "TXXX" >
// Text encoding $xx
// Description < text string according to encoding > $00(00)
// Value < text string according to encoding >
if (header.id === 'TXXX') {
variableStart = 11;
variableLength = getEndpointOfBytes(bytes, encoding, variableStart) - variableStart;
const value = {
description: readBytesToString(bytes.slice(variableStart), encoding, variableLength),
value: '',
} as ITXXXMap;
variableStart += variableLength + 1;
variableStart = skipPaddingZeros(bytes, variableStart);
value.value = readBytesToString(bytes.slice(variableStart), encoding);
result.value = value;
} else {
result.value = readBytesToString(bytes.slice(11), encoding);
// Specially handle the 'Content type'.
if (header.id === 'TCON' && result.value !== null) {
if (result.value[0] === '(') {
const handledTCON = result.value.match(/\(\d+\)/g);
if (handledTCON) {
result.value = handledTCON.map(
(v: string) => GENRES[+v.slice(1, -1)],
).join(',');
}
} else {
const genre = parseInt(result.value, 10);
if (!isNaN(genre)) {
result.value = GENRES[genre];
}
}
}
}
}
// URL link frames
// Always encoded as ISO-8859-1.
else if (header.type === 'W') {
// User defined URL link frame
if (header.id === 'WXXX' && bytes[10] === 0) {
result.value = readBytesToISO8859(bytes.slice(11));
} else {
result.value = readBytesToISO8859(bytes.slice(10));
}
}
// Comments or Unsychronized lyric/text transcription.
/**
* Comments frame:
* <Header for 'Comment', ID: "COMM">
* Text encoding $xx
* Language $xx xx xx
* Short content descrip. <text string according to encoding> $00 (00)
* The actual text <full text string according to encoding>
*
* Unsychronised lyrics/text transcription frame:
* <Header for 'Unsynchronised lyrics/text transcription', ID: "USLT">
* Text encoding $xx
* Language $xx xx xx
* Content descriptor <text string according to encoding> $00 (00)
* Lyrics/text <full text string according to encoding>
*/
else if (header.id === 'COMM' || header.id === 'USLT') {
encoding = bytes[10];
variableStart = 14;
variableLength = 0;
const language = readBytesToISO8859(bytes.slice(11), 3);
variableLength = getEndpointOfBytes(bytes, encoding, variableStart) - variableStart;
const description = readBytesToString(bytes.slice(variableStart), encoding, variableLength);
variableStart = skipPaddingZeros(bytes, variableStart + variableLength + 1);
result.value = {
language,
description,
value: readBytesToString(bytes.slice(variableStart), encoding),
};
}
/**
* Attached picture frame, format is:
* <Header for 'Attached picture', ID: "APIC">
* Text encoding $xx
* MIME type <text string> $00
* Picture type $xx
* Description <text string according to encoding> $00 (00)
* Picture data <binary data>
*/
else if (header.id === 'APIC') {
encoding = bytes[10];
const image = {
type: null as string | null,
mime: null as string | null,
description: null as string | null,
data: null as ArrayLike<number> | null,
};
variableStart = 11;
// MIME is always encoded as ISO-8859, So always pass 0 to encoding argument.
variableLength = getEndpointOfBytes(bytes, 0, variableStart) - variableStart;
image.mime = readBytesToString(bytes.slice(variableStart), 0, variableLength);
image.type = IMAGE_TYPES[bytes[variableStart + variableLength + 1]] || 'other';
// Skip $00 and $xx(Picture type).
variableStart += variableLength + 2;
variableLength = 0;
for (i = variableStart; ; i++) {
if (bytes[i] === 0) {
variableLength = i - variableStart;
break;
}
}
image.description = variableLength === 0
? null
: readBytesToString(bytes.slice(variableStart), encoding, variableLength);
// check $00 at start of the image binary data
variableStart = skipPaddingZeros(bytes, variableStart + variableLength + 1);
image.data = bytes.slice(variableStart);
result.value = image;
}
/**
* Involved people list frame:
* <Header for 'Involved people list', ID: "IPLS">
* Text encoding $xx
* People list strings <text strings according to encoding>
*/
else if (header.id === 'IPLS') {
encoding = bytes[10];
result.value = readBytesToString(bytes.slice(11), encoding);
}
/**
* Ownership frame:
* <Header for 'Ownership frame', ID: "OWNE">
* Text encoding $xx
* Price payed <text string> $00
* Date of purch. <text string>
* Seller <text string according to encoding>
*/
else if (header.id === 'OWNE') {
encoding = bytes[10];
variableStart = 11;
variableLength = getEndpointOfBytes(bytes, encoding, variableStart);
const pricePayed = readBytesToISO8859(bytes.slice(variableStart), variableLength);
variableStart += variableLength + 1;
const dateOfPurch = readBytesToISO8859(bytes.slice(variableStart), 8);
variableStart += 8;
result.value = {
pricePayed,
dateOfPurch,
seller: readBytesToString(bytes.slice(variableStart), encoding),
};
}
else {
// Do nothing to other frames.
}
return result;
}