@lingo-reader/mobi-parser
Version:
A mobi and kf8 parser
1,349 lines (1,341 loc) • 43.4 kB
JavaScript
import { unzlibSync } from 'fflate';
import { parsexml } from '@lingo-reader/shared';
const htmlEntityMap = {
"<": "<",
">": ">",
"&": "&",
""": '"',
"'": "'"
};
function unescapeHTML(str) {
if (!str.includes("&")) {
return str;
}
return str.replace(/&(#x[\dA-Fa-f]+|#\d+|[a-zA-Z]+);/g, (match, entity) => {
if (entity.startsWith("#x")) {
return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
} else if (entity.startsWith("#")) {
return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
} else {
return htmlEntityMap[match] || match;
}
});
}
const MIME = {
XML: "application/xml",
XHTML: "application/xhtml+xml",
HTML: "text/html",
CSS: "text/css",
SVG: "image/svg+xml"
};
const fileSignatures = {
"ffd8ff": "image/jpeg",
"89504e47": "image/png",
"47494638": "image/gif",
"424d": "image/bmp",
"3c737667": "image/svg+xml",
"00000018": "video/mp4",
"00000020": "video/mp4",
"1a45dfa3": "video/mkv",
"1f43b675": "video/webm",
"494433": "audio/mp3",
"52494646": "audio/wav",
"4f676753": "audio/ogg",
"00010000": "font/ttf",
"74727565": "font/ttf",
"4f54544f": "font/otf",
"774f4646": "font/woff",
"774f4632": "font/woff2",
"504c": "font/eot"
};
function getFileMimeType(fileBuffer) {
const header = fileBuffer.slice(0, 12);
const hexHeader = Array.from(header).map((b) => b.toString(16).padStart(2, "0")).join("");
for (const [signature, type] of Object.entries(fileSignatures)) {
if (hexHeader.startsWith(signature)) {
return type;
}
}
return "unknown";
}
function saveResource(data, type, filename, imageSaveDir) {
{
return URL.createObjectURL(new Blob([data], { type }));
}
}
const mobiEncoding = {
1252: "windows-1252",
65001: "utf-8"
};
const mobiLang = {
1: ["ar", "ar-SA", "ar-IQ", "ar-EG", "ar-LY", "ar-DZ", "ar-MA", "ar-TN", "ar-OM", "ar-YE", "ar-SY", "ar-JO", "ar-LB", "ar-KW", "ar-AE", "ar-BH", "ar-QA"],
2: ["bg"],
3: ["ca"],
4: ["zh", "zh-TW", "zh-CN", "zh-HK", "zh-SG"],
5: ["cs"],
6: ["da"],
7: ["de", "de-DE", "de-CH", "de-AT", "de-LU", "de-LI"],
8: ["el"],
9: ["en", "en-US", "en-GB", "en-AU", "en-CA", "en-NZ", "en-IE", "en-ZA", "en-JM", null, "en-BZ", "en-TT", "en-ZW", "en-PH"],
10: ["es", "es-ES", "es-MX", null, "es-GT", "es-CR", "es-PA", "es-DO", "es-VE", "es-CO", "es-PE", "es-AR", "es-EC", "es-CL", "es-UY", "es-PY", "es-BO", "es-SV", "es-HN", "es-NI", "es-PR"],
11: ["fi"],
12: ["fr", "fr-FR", "fr-BE", "fr-CA", "fr-CH", "fr-LU", "fr-MC"],
13: ["he"],
14: ["hu"],
15: ["is"],
16: ["it", "it-IT", "it-CH"],
17: ["ja"],
18: ["ko"],
19: ["nl", "nl-NL", "nl-BE"],
20: ["no", "nb", "nn"],
21: ["pl"],
22: ["pt", "pt-BR", "pt-PT"],
23: ["rm"],
24: ["ro"],
25: ["ru"],
26: ["hr", null, "sr"],
27: ["sk"],
28: ["sq"],
29: ["sv", "sv-SE", "sv-FI"],
30: ["th"],
31: ["tr"],
32: ["ur"],
33: ["id"],
34: ["uk"],
35: ["be"],
36: ["sl"],
37: ["et"],
38: ["lv"],
39: ["lt"],
41: ["fa"],
42: ["vi"],
43: ["hy"],
44: ["az"],
45: ["eu"],
46: ["hsb"],
47: ["mk"],
48: ["st"],
49: ["ts"],
50: ["tn"],
52: ["xh"],
53: ["zu"],
54: ["af"],
55: ["ka"],
56: ["fo"],
57: ["hi"],
58: ["mt"],
59: ["se"],
62: ["ms"],
63: ["kk"],
65: ["sw"],
67: ["uz", null, "uz-UZ"],
68: ["tt"],
69: ["bn"],
70: ["pa"],
71: ["gu"],
72: ["or"],
73: ["ta"],
74: ["te"],
75: ["kn"],
76: ["ml"],
77: ["as"],
78: ["mr"],
79: ["sa"],
82: ["cy", "cy-GB"],
83: ["gl", "gl-ES"],
87: ["kok"],
97: ["ne"],
98: ["fy"]
};
const pdbHeader = {
name: [0, 32, "string"],
type: [60, 4, "string"],
creator: [64, 4, "string"],
numRecords: [76, 2, "uint"]
};
const palmdocHeader = {
compression: [0, 2, "uint"],
numTextRecords: [8, 2, "uint"],
recordSize: [10, 2, "uint"],
encryption: [12, 2, "uint"]
};
const mobiHeader = {
magic: [16, 4, "string"],
length: [20, 4, "uint"],
type: [24, 4, "uint"],
encoding: [28, 4, "uint"],
uid: [32, 4, "uint"],
version: [36, 4, "uint"],
titleOffset: [84, 4, "uint"],
titleLength: [88, 4, "uint"],
localeRegion: [94, 1, "uint"],
localeLanguage: [95, 1, "uint"],
resourceStart: [108, 4, "uint"],
huffcdic: [112, 4, "uint"],
numHuffcdic: [116, 4, "uint"],
exthFlag: [128, 4, "uint"],
trailingFlags: [240, 4, "uint"],
indx: [244, 4, "uint"]
};
const kf8Header = {
resourceStart: [108, 4, "uint"],
fdst: [192, 4, "uint"],
numFdst: [196, 4, "uint"],
frag: [248, 4, "uint"],
skel: [252, 4, "uint"],
guide: [260, 4, "uint"]
};
const exthHeader = {
magic: [0, 4, "string"],
length: [4, 4, "uint"],
count: [8, 4, "uint"]
};
const indxHeader = {
magic: [0, 4, "string"],
length: [4, 4, "uint"],
type: [8, 4, "uint"],
idxt: [20, 4, "uint"],
numRecords: [24, 4, "uint"],
encoding: [28, 4, "uint"],
language: [32, 4, "uint"],
total: [36, 4, "uint"],
ordt: [40, 4, "uint"],
ligt: [44, 4, "uint"],
numLigt: [48, 4, "uint"],
numCncx: [52, 4, "uint"]
};
const tagxHeader = {
magic: [0, 4, "string"],
length: [4, 4, "uint"],
numControlBytes: [8, 4, "uint"]
};
const huffHeader = {
magic: [0, 4, "string"],
offset1: [8, 4, "uint"],
offset2: [12, 4, "uint"]
};
const cdicHeader = {
magic: [0, 4, "string"],
length: [4, 4, "uint"],
numEntries: [8, 4, "uint"],
codeLength: [12, 4, "uint"]
};
const fdstHeader = {
magic: [0, 4, "string"],
numEntries: [8, 4, "uint"]
};
const fontHeader = {
flags: [8, 4, "uint"],
dataStart: [12, 4, "uint"],
keyLength: [16, 4, "uint"],
keyStart: [20, 4, "uint"]
};
function getMobiFileName(file) {
let fileName = "";
{
fileName = file.name ?? "";
}
return fileName;
}
function bufferToArrayBuffer(buffer) {
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
}
async function toArrayBuffer(file) {
{
return file instanceof Uint8Array ? bufferToArrayBuffer(file) : await file.arrayBuffer();
}
}
const decoder = new TextDecoder();
const getString = (buffer) => decoder.decode(buffer);
function getUint(buffer) {
const l = buffer.byteLength;
const func = l === 4 ? "getUint32" : l === 2 ? "getUint16" : "getUint8";
return new DataView(buffer)[func](0);
}
function getStruct(def, buffer) {
const res = {};
for (const key in def) {
const [start, len, type] = def[key];
res[key] = type === "string" ? getString(buffer.slice(start, start + len)) : getUint(buffer.slice(start, start + len));
}
return res;
}
function concatTypedArrays(arrays) {
const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
const result = new arrays[0].constructor(totalLength);
let offset = 0;
for (const array of arrays) {
result.set(array, offset);
offset += array.length;
}
return result;
}
const getDecoder = (x) => new TextDecoder(mobiEncoding[x]);
function getVarLen(byteArray, i = 0) {
let value = 0;
let length = 0;
for (const byte of byteArray.subarray(i, i + 4)) {
value = value << 7 | (byte & 127) >>> 0;
length++;
if (byte & 128) {
break;
}
}
return { value, length };
}
function getVarLenFromEnd(byteArray) {
let value = 0;
for (const byte of byteArray.subarray(-4)) {
if (byte & 128) {
value = 0;
}
value = value << 7 | byte & 127;
}
return value;
}
function countBitsSet(x) {
let count = 0;
for (; x > 0; x = x >> 1) {
if ((x & 1) === 1) {
count++;
}
}
return count;
}
function countUnsetEnd(x) {
let count = 0;
while ((x & 1) === 0) {
x = x >> 1;
count++;
}
return count;
}
function decompressPalmDOC(array) {
const output = [];
for (let i = 0; i < array.length; i++) {
const byte = array[i];
if (byte === 0) {
output.push(0);
} else if (byte <= 8) {
for (const x of array.subarray(i + 1, (i += byte) + 1))
output.push(x);
} else if (byte <= 127) {
output.push(byte);
} else if (byte <= 191) {
const bytes = byte << 8 | array[i++ + 1];
const distance = (bytes & 16383) >>> 3;
const length = (bytes & 7) + 3;
for (let j = 0; j < length; j++)
output.push(output[output.length - distance]);
} else {
output.push(32, byte ^ 128);
}
}
return Uint8Array.from(output);
}
function huffcdic(mobi, loadRecord) {
const huffRecord = loadRecord(mobi.huffcdic);
const { magic, offset1, offset2 } = getStruct(huffHeader, huffRecord);
if (magic !== "HUFF") {
throw new Error("Invalid HUFF record");
}
const table1 = Array.from(
{ length: 256 },
(_, i) => offset1 + i * 4
).map((offset) => getUint(huffRecord.slice(offset, offset + 4))).map((x) => [x & 128, x & 31, x >>> 8]);
const table2 = [[0, 0], ...Array.from(
{ length: 32 },
(_, i) => offset2 + i * 8
).map((offset) => [
getUint(huffRecord.slice(offset, offset + 4)),
getUint(huffRecord.slice(offset + 4, offset + 8))
])];
const dictionary = [];
for (let i = 1; i < mobi.numHuffcdic; i++) {
const record = loadRecord(mobi.huffcdic + i);
const cdic = getStruct(cdicHeader, record);
if (cdic.magic !== "CDIC") {
throw new Error("Invalid CDIC record");
}
const n = Math.min(1 << cdic.codeLength, cdic.numEntries - dictionary.length);
const buffer = record.slice(cdic.length);
for (let i2 = 0; i2 < n; i2++) {
const offset = getUint(buffer.slice(i2 * 2, i2 * 2 + 2));
const x = getUint(buffer.slice(offset, offset + 2));
const length = x & 32767;
const decompressed = x & 32768;
const value = new Uint8Array(buffer.slice(offset + 2, offset + 2 + length));
dictionary.push([value, decompressed]);
}
}
const decompress = (byteArray) => {
let output = new Uint8Array();
const bitLength = byteArray.byteLength * 8;
for (let i = 0; i < bitLength; ) {
const bits = Number(read32Bits(byteArray, i));
let [found, codeLength, value] = table1[bits >>> 24];
if (!found) {
while (bits >>> 32 - codeLength < table2[codeLength][0])
codeLength += 1;
value = table2[codeLength][1];
}
i += codeLength;
if (i > bitLength) {
break;
}
const code = value - (bits >>> 32 - codeLength);
let [result, decompressed] = dictionary[code];
if (!decompressed) {
result = decompress(result);
dictionary[code] = [result, true];
}
output = concatTypedArrays([output, result]);
}
return output;
};
return decompress;
}
function read32Bits(byteArray, from) {
const startByte = from >> 3;
const end = from + 32;
const endByte = end >> 3;
let bits = 0n;
for (let i = startByte; i <= endByte; i++) {
bits = bits << 8n | BigInt(byteArray[i] ?? 0);
}
return bits >> 8n - BigInt(end & 7) & 0xFFFFFFFFn;
}
const exthRecordType = {
100: ["creator", "string", true],
// many
101: ["publisher", "string", false],
103: ["description", "string", false],
104: ["isbn", "string", false],
105: ["subject", "string", true],
// many
106: ["date", "string", false],
108: ["contributor", "string", true],
// many
109: ["rights", "string", false],
110: ["subjectCode", "string", true],
// many
112: ["source", "string", true],
// many
113: ["asin", "string", false],
121: ["boundary", "uint", false],
122: ["fixedLayout", "string", false],
125: ["numResources", "uint", false],
126: ["originalResolution", "string", false],
127: ["zeroGutter", "string", false],
128: ["zeroMargin", "string", false],
129: ["coverURI", "string", false],
132: ["regionMagnification", "string", false],
201: ["coverOffset", "uint", false],
202: ["thumbnailOffset", "uint", false],
503: ["title", "string", false],
524: ["language", "string", true],
// many
527: ["pageProgressionDirection", "string", false]
};
function getExth(buf, encoding) {
const { magic, count } = getStruct(exthHeader, buf);
if (magic !== "EXTH") {
throw new Error("Invalid EXTH header");
}
const decoder2 = getDecoder(encoding.toString());
const results = {};
let offset = 12;
for (let i = 0; i < count; i++) {
const type = getUint(buf.slice(offset, offset + 4));
const length = getUint(buf.slice(offset + 4, offset + 8));
if (type in exthRecordType) {
const [name, typ, ismany] = exthRecordType[type];
const data = buf.slice(offset + 8, offset + length);
const value = typ === "uint" ? getUint(data) : decoder2.decode(data);
if (ismany) {
results[name] ?? (results[name] = []);
results[name].push(value);
} else {
results[name] = value;
}
}
offset += length;
}
return results;
}
function getRemoveTrailingEntries(trailingFlags) {
const multibyte = trailingFlags & 1;
const numTrailingEntries = countBitsSet(trailingFlags >>> 1);
return (array) => {
for (let i = 0; i < numTrailingEntries; i++) {
const length = getVarLenFromEnd(array);
array = array.subarray(0, -length);
}
if (multibyte) {
const length = (array[array.length - 1] & 3) + 1;
array = array.subarray(0, -length);
}
return array;
};
}
function getFont(buf) {
const { flags, dataStart, keyLength, keyStart } = getStruct(fontHeader, buf);
const array = new Uint8Array(buf.slice(dataStart));
if (flags & 2) {
const bytes = keyLength === 16 ? 1024 : 1040;
const key = new Uint8Array(buf.slice(keyStart, keyStart + keyLength));
const length = Math.min(bytes, array.length);
for (let i = 0; i < length; i++) array[i] = array[i] ^ key[i % key.length];
}
if (flags & 1) {
try {
return unzlibSync(array);
} catch (e) {
console.warn(e);
console.warn("Failed to decompress font");
}
}
return array;
}
function getIndexData(indxIndex, loadRecord) {
const indxRecord = loadRecord(indxIndex);
const indx = getStruct(indxHeader, indxRecord);
if (indx.magic !== "INDX")
throw new Error("Invalid INDX record");
const decoder2 = getDecoder(indx.encoding.toString());
const cncx = {};
let cncxRecordOffset = 0;
for (let i = 0; i < indx.numCncx; i++) {
const record = loadRecord(indxIndex + indx.numRecords + i + 1);
const array = new Uint8Array(record);
for (let pos = 0; pos < array.byteLength; ) {
const index = pos;
const { value, length } = getVarLen(array, pos);
pos += length;
const result = record.slice(pos, pos + value);
pos += value;
cncx[cncxRecordOffset + index] = decoder2.decode(result);
}
cncxRecordOffset += 65536;
}
const tagxBuffer = indxRecord.slice(indx.length);
const tagx = getStruct(tagxHeader, tagxBuffer);
if (tagx.magic !== "TAGX")
throw new Error("Invalid TAGX section");
const numTags = (tagx.length - 12) / 4;
const tagTable = Array.from(
{ length: numTags },
(_, i) => new Uint8Array(tagxBuffer.slice(12 + i * 4, 12 + i * 4 + 4))
);
const table = [];
for (let i = 0; i < indx.numRecords; i++) {
const record = loadRecord(indxIndex + 1 + i);
const array = new Uint8Array(record);
const indx2 = getStruct(indxHeader, record);
if (indx2.magic !== "INDX") {
throw new Error("Invalid INDX record");
}
for (let j = 0; j < indx2.numRecords; j++) {
const offsetOffset = indx2.idxt + 4 + 2 * j;
const offset = getUint(record.slice(offsetOffset, offsetOffset + 2));
const length = getUint(record.slice(offset, offset + 1));
const name = getString(record.slice(offset + 1, offset + 1 + length));
const tags = [];
const startPos = offset + 1 + length;
let controlByteIndex = 0;
let pos = startPos + tagx.numControlBytes;
for (const [tag, numValues, mask, end] of tagTable) {
if (end & 1) {
controlByteIndex++;
continue;
}
const offset2 = startPos + controlByteIndex;
const value = getUint(record.slice(offset2, offset2 + 1)) & mask;
if (value === mask) {
if (countBitsSet(mask) > 1) {
const { value: value2, length: length2 } = getVarLen(array, pos);
tags.push([tag, 0, value2, numValues]);
pos += length2;
} else {
tags.push([tag, 1, 0, numValues]);
}
} else {
tags.push([tag, value >> countUnsetEnd(mask), 0, numValues]);
}
}
const tagMap = {};
for (const [tag, valueCount, valueBytes, numValues] of tags) {
const values = [];
if (valueCount !== 0) {
for (let i2 = 0; i2 < valueCount * numValues; i2++) {
const { value, length: length2 } = getVarLen(array, pos);
values.push(value);
pos += length2;
}
} else {
let count = 0;
while (count < valueBytes) {
const { value, length: length2 } = getVarLen(array, pos);
values.push(value);
pos += length2;
count += length2;
}
}
tagMap[tag] = values;
}
table.push({ name, tagMap });
}
}
return { table, cncx };
}
function getNCX(indxIndex, loadRecord) {
const { table, cncx } = getIndexData(indxIndex, loadRecord);
const items = table.map(({ tagMap }, index) => ({
index,
offset: tagMap[1]?.[0],
size: tagMap[2]?.[0],
label: cncx[tagMap[3]?.[0]] ?? "",
headingLevel: tagMap[4]?.[0],
pos: tagMap[6],
parent: tagMap[21]?.[0],
firstChild: tagMap[22]?.[0],
lastChild: tagMap[23]?.[0]
}));
const getChildren = (item) => {
if (item.firstChild == null)
return item;
item.children = items.filter((x) => x.parent === item.index).map(getChildren);
return item;
};
return items.filter((item) => item.headingLevel === 0).map(getChildren);
}
const mbpPagebreakRegex = /<\s*(?:mbp:)?pagebreak[^>]*>/gi;
function makePosURI(fid = 0, off = 0) {
return `kindle:pos:fid:${fid.toString(32).toUpperCase().padStart(4, "0")}:off:${off.toString(32).toUpperCase().padStart(10, "0")}`;
}
const selectorReg = /\s(id|name|aid)\s*=\s*['"]([^'"]*)['"]/i;
function getFragmentSelector(str) {
const match = str.match(selectorReg);
if (!match) {
return "";
}
const [, attr, value] = match;
return `[${attr}="${value}"]`;
}
const kindlePosRegex = /kindle:pos:fid:(\w+):off:(\w+)/;
function parsePosURI(str) {
const [fid, off] = str.match(kindlePosRegex).slice(1);
return {
fid: Number.parseInt(fid, 32),
off: Number.parseInt(off, 32)
};
}
const kindleResourceRegex = /kindle:(flow|embed):(\w+)(?:\?mime=(\w+\/[-+.\w]+))?/;
var __defProp$2 = Object.defineProperty;
var __defNormalProp$2 = (obj, key, value) => key in obj ? __defProp$2(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __publicField$2 = (obj, key, value) => __defNormalProp$2(obj, typeof key !== "symbol" ? key + "" : key, value);
class MobiFile {
constructor(file) {
__publicField$2(this, "fileArrayBuffer");
// extract from pdb header
__publicField$2(this, "recordsOffset");
__publicField$2(this, "recordsMagic");
// book start index in records
__publicField$2(this, "start", 0);
// extract from first record
__publicField$2(this, "pdbHeader");
__publicField$2(this, "mobiHeader");
__publicField$2(this, "palmdocHeader");
__publicField$2(this, "kf8Header");
__publicField$2(this, "exth");
__publicField$2(this, "isKf8", false);
// resource start index in records
__publicField$2(this, "resourceStart");
__publicField$2(this, "decoder");
__publicField$2(this, "encoder");
__publicField$2(this, "removeTrailingEntries");
__publicField$2(this, "decompress");
this.fileArrayBuffer = file;
this.parsePdbHeader();
this.parseFirstRecord(this.loadRecord(0));
this.resourceStart = this.mobiHeader.resourceStart;
if (!this.isKf8) {
const boundary = this.exth.boundary ?? 4294967295;
if (boundary < 4294967295) {
try {
this.parseFirstRecord(this.loadRecord(boundary));
this.resourceStart = this.kf8Header?.resourceStart ?? this.mobiHeader.resourceStart;
this.start = boundary;
this.isKf8 = true;
} catch (e) {
}
}
}
this.setup();
}
decode(arr) {
return this.decoder.decode(arr);
}
encode(str) {
return this.encoder.encode(str);
}
loadRecord(index) {
const [start, end] = this.recordsOffset[this.start + index];
return this.fileArrayBuffer.slice(start, end);
}
loadMagic(index) {
return this.recordsMagic[this.start + index];
}
loadTextBuffer(index) {
return this.decompress(
this.removeTrailingEntries(
new Uint8Array(
this.loadRecord(index + 1)
)
)
);
}
loadResource(index) {
const buf = this.loadRecord(this.resourceStart + index);
const magic = getString(buf.slice(0, 4));
let data;
if (magic === "FONT") {
data = getFont(buf);
} else if (magic === "VIDE" || magic === "AUDI") {
data = new Uint8Array(buf.slice(12));
} else {
data = new Uint8Array(buf);
}
return {
type: getFileMimeType(data),
raw: data
};
}
getNCX() {
const index = this.mobiHeader.indx;
if (index < 4294967295) {
return getNCX(index, this.loadRecord.bind(this));
}
return void 0;
}
getMetadata() {
const mobi = this.mobiHeader;
const exth = this.exth;
return {
identifier: this.mobiHeader.uid.toString(),
title: exth?.title || mobi.title,
author: exth?.creator?.map(unescapeHTML) ?? [],
publisher: exth?.publisher ?? "",
// language in exth is many, we use the first one in this case
language: exth?.language?.[0] ?? mobi.language,
published: exth?.date ?? "",
description: exth?.description ?? "",
subject: exth?.subject?.map(unescapeHTML) ?? [],
rights: exth?.rights ?? "",
contributor: exth?.contributor ?? []
};
}
getCoverImage() {
const exth = this.exth;
const coverOffset = Number(exth.coverOffset ?? 4294967295);
const thumbnailOffset = Number(exth.thumbnailOffset ?? 4294967295);
const offset = coverOffset < 4294967295 ? coverOffset : thumbnailOffset < 4294967295 ? thumbnailOffset : void 0;
if (offset) {
return this.loadResource(offset);
}
return void 0;
}
parsePdbHeader() {
const pdb = getStruct(pdbHeader, this.fileArrayBuffer.slice(0, 78));
pdb.name = pdb.name.replace(/\0.*$/, "");
this.pdbHeader = pdb;
const recordsBuffer = this.fileArrayBuffer.slice(78, 78 + pdb.numRecords * 8);
const recordsStart = Array.from(
{ length: pdb.numRecords },
(_, i) => getUint(recordsBuffer.slice(i * 8, i * 8 + 4))
);
this.recordsOffset = recordsStart.map(
(start, i) => [start, recordsStart[i + 1]]
);
this.recordsMagic = recordsStart.map(
(val) => getString(this.fileArrayBuffer.slice(val, val + 4))
);
}
// palmdocHeader, mobiHeader, isKf8, exth
parseFirstRecord(firstRecord) {
this.palmdocHeader = getStruct(palmdocHeader, firstRecord.slice(0, 16));
const mobi = getStruct(mobiHeader, firstRecord);
if (mobi.magic !== "MOBI") {
throw new Error("Missing MOBI header");
}
const { titleOffset, titleLength, localeLanguage, localeRegion } = mobi;
const lang = mobiLang[localeLanguage.toString()];
const mobiHeaderExtends = {
title: getString(firstRecord.slice(titleOffset, titleOffset + titleLength)),
language: lang[localeRegion >> 2] ?? lang[0] ?? "unknown"
};
this.mobiHeader = Object.assign(mobi, mobiHeaderExtends);
this.kf8Header = mobi.version >= 8 ? getStruct(kf8Header, firstRecord) : void 0;
this.isKf8 = mobi.version >= 8;
this.exth = mobi.exthFlag & 64 ? getExth(firstRecord.slice(mobi.length + 16), mobi.encoding) : void 0;
}
// setup decoder, encoder, decompress, removeTrailingEntries
setup() {
this.decoder = getDecoder(this.mobiHeader.encoding.toString());
this.encoder = new TextEncoder();
const compression = this.palmdocHeader.compression;
if (compression === 1) {
this.decompress = (f) => f;
} else if (compression === 2) {
this.decompress = decompressPalmDOC;
} else if (compression === 17480) {
this.decompress = huffcdic(this.mobiHeader, this.loadRecord.bind(this));
} else {
throw new Error("Unsupported compression");
}
const trailingFlags = this.mobiHeader.trailingFlags;
this.removeTrailingEntries = getRemoveTrailingEntries(trailingFlags);
}
}
var __defProp$1 = Object.defineProperty;
var __defNormalProp$1 = (obj, key, value) => key in obj ? __defProp$1(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __publicField$1 = (obj, key, value) => __defNormalProp$1(obj, typeof key !== "symbol" ? key + "" : key, value);
async function initKf8File(file, resourceSaveDir) {
const kf8 = new Kf8(file, resourceSaveDir);
await kf8.innerLoadFile();
await kf8.innerInit();
return kf8;
}
class Kf8 {
constructor(file, resourceSaveDir = "./images") {
this.file = file;
__publicField$1(this, "fileArrayBuffer");
__publicField$1(this, "mobiFile");
__publicField$1(this, "fileName", "");
__publicField$1(this, "fdstTable", []);
__publicField$1(this, "fullRawLength", 0);
__publicField$1(this, "skelTable", []);
__publicField$1(this, "fragTable", []);
__publicField$1(this, "chapters", []);
__publicField$1(this, "toc", []);
__publicField$1(this, "fragmentOffsets", /* @__PURE__ */ new Map());
__publicField$1(this, "fragmentSelectors", /* @__PURE__ */ new Map());
__publicField$1(this, "rawHead", new Uint8Array());
__publicField$1(this, "rawTail", new Uint8Array());
__publicField$1(this, "lastLoadedHead", -1);
__publicField$1(this, "lastLoadedTail", -1);
__publicField$1(this, "resourceCache", /* @__PURE__ */ new Map());
__publicField$1(this, "chapterCache", /* @__PURE__ */ new Map());
__publicField$1(this, "idToChapter", /* @__PURE__ */ new Map());
__publicField$1(this, "resourceSaveDir", "./images");
this.fileName = getMobiFileName(file);
this.resourceSaveDir = resourceSaveDir;
}
getFileInfo() {
return {
fileName: this.fileName
};
}
getMetadata() {
return this.mobiFile.getMetadata();
}
getCoverImage() {
if (this.resourceCache.has("cover")) {
return this.resourceCache.get("cover");
}
const coverImage = this.mobiFile.getCoverImage();
let coverUrl = "";
if (coverImage) {
coverUrl = saveResource(coverImage.raw, coverImage.type, "cover", this.resourceSaveDir);
this.resourceCache.set("cover", coverUrl);
}
return coverUrl;
}
getSpine() {
return this.chapters;
}
getToc() {
return this.toc;
}
async innerLoadFile() {
this.fileArrayBuffer = await toArrayBuffer(this.file);
this.mobiFile = new MobiFile(this.fileArrayBuffer);
}
async innerInit() {
const loadRecord = this.mobiFile.loadRecord.bind(this.mobiFile);
const kf8Header = this.mobiFile.kf8Header;
const fdstBuffer = this.mobiFile.loadRecord(kf8Header.fdst);
const fdst = getStruct(fdstHeader, fdstBuffer);
if (fdst.magic !== "FDST") {
throw new Error("Missing FDST record");
}
const fdstTable = Array.from(
{ length: fdst.numEntries },
(_, i) => 12 + i * 8
).map((offset) => [
getUint(fdstBuffer.slice(offset, offset + 4)),
getUint(fdstBuffer.slice(offset + 4, offset + 8))
]);
this.fdstTable = fdstTable;
this.fullRawLength = fdstTable[fdstTable.length - 1][1];
const skelData = getIndexData(kf8Header.skel, loadRecord);
const skelTable = skelData.table.map(({ name, tagMap }, index) => ({
index,
name,
numFrag: tagMap[1][0],
offset: tagMap[6][0],
length: tagMap[6][1]
}));
this.skelTable = skelTable;
const fragData = getIndexData(kf8Header.frag, loadRecord);
const fragTable = fragData.table.map(({ name, tagMap }) => ({
insertOffset: Number.parseInt(name),
selector: fragData.cncx[tagMap[2][0]],
index: tagMap[4][0],
offset: tagMap[6][0],
length: tagMap[6][1]
}));
this.fragTable = fragTable;
const chapters = this.skelTable.reduce((acc, skel, index) => {
const last = acc[acc.length - 1];
const fragStart = last?.fragEnd ?? 0;
const fragEnd = fragStart + skel.numFrag;
const frags = this.fragTable.slice(fragStart, fragEnd);
const length = skel.length + frags.reduce((a, v) => a + v.length, 0);
const totalLength = (last?.totalLength ?? 0) + length;
const chapter = { id: index.toString(), skel, frags, fragEnd, length, totalLength };
this.idToChapter.set(index, chapter);
acc.push(chapter);
return acc;
}, []);
this.chapters = chapters;
const ncx = this.mobiFile.getNCX();
if (ncx) {
const map = ({ label, pos, children }) => {
const [fid, off] = pos;
const href = makePosURI(fid, off);
const arr = this.fragmentOffsets.get(fid);
if (arr) {
arr.push(off);
} else {
this.fragmentOffsets.set(fid, [off]);
}
return { label, href, children: children?.map(map) };
};
this.toc = ncx.map(map);
}
}
getGuide() {
const index = this.mobiFile.kf8Header.guide;
if (index < 4294967295) {
const loadRecord = this.mobiFile.loadRecord.bind(this.mobiFile);
const { table, cncx } = getIndexData(index, loadRecord);
return table.map(({ name, tagMap }) => ({
label: cncx[tagMap[1][0]] ?? "",
type: name?.split(/\s/),
href: makePosURI(tagMap[6]?.[0] ?? tagMap[3]?.[0])
}));
}
return void 0;
}
loadRaw(start, end) {
const distanceHead = end - this.rawHead.length;
const distanceEnd = this.fullRawLength === 0 ? Infinity : this.fullRawLength - this.rawTail.length - start;
if (distanceHead < 0 || distanceHead < distanceEnd) {
while (this.rawHead.length < end) {
this.lastLoadedHead++;
const index = this.lastLoadedHead;
const data = this.mobiFile.loadTextBuffer(index);
this.rawHead = concatTypedArrays([this.rawHead, data]);
}
return this.rawHead.slice(start, end);
}
while (this.fullRawLength - this.rawTail.length > start) {
this.lastLoadedTail++;
const index = this.mobiFile.palmdocHeader.numTextRecords - 1 - this.lastLoadedTail;
const data = this.mobiFile.loadTextBuffer(index);
this.rawTail = concatTypedArrays([data, this.rawTail]);
}
const rawTailStart = this.fullRawLength - this.rawTail.length;
return this.rawTail.slice(start - rawTailStart, end - rawTailStart);
}
loadText(chapter) {
const { skel, frags, length } = chapter;
const raw = this.loadRaw(skel.offset, skel.offset + length);
let skeleton = raw.slice(0, skel.length);
for (const frag of frags) {
const insertOffset = frag.insertOffset - skel.offset;
const offset = skel.length + frag.offset;
const fragRaw = raw.slice(offset, offset + frag.length);
skeleton = concatTypedArrays([
skeleton.slice(0, insertOffset),
fragRaw,
skeleton.slice(insertOffset)
]);
const offsets = this.fragmentOffsets.get(frag.index);
if (offsets) {
for (const offset2 of offsets) {
const str = this.mobiFile.decode(fragRaw.buffer).slice(offset2);
const selector = getFragmentSelector(str);
if (selector) {
this.cacheFragmentSelector(frag.index, offset2, selector);
}
}
}
}
return this.mobiFile.decode(skeleton.buffer);
}
loadChapter(id) {
const numId = Number.parseInt(id);
if (Number.isNaN(numId)) {
return void 0;
}
if (this.chapterCache.has(numId)) {
return this.chapterCache.get(numId);
}
const chapter = this.idToChapter.get(numId);
if (chapter) {
const processed = this.replace(this.loadText(chapter));
this.chapterCache.set(numId, processed);
return processed;
}
return void 0;
}
cacheFragmentSelector(id, offset, selector) {
const map = this.fragmentSelectors.get(id);
if (map) {
map.set(offset, selector);
} else {
const map2 = /* @__PURE__ */ new Map();
this.fragmentSelectors.set(id, map2);
map2.set(offset, selector);
}
}
loadFlow(index) {
if (index < 4294967295) {
return this.loadRaw(this.fdstTable[index][0], this.fdstTable[index][1]);
}
return void 0;
}
resolveHref(href) {
if (/^(?!blob|kindle)\w+:/i.test(href)) {
return void 0;
}
const { fid, off } = parsePosURI(href);
const chapter = this.chapters.find(
(chapter2) => chapter2.frags.some(
(frag2) => frag2.index === fid
)
);
if (!chapter) {
return void 0;
}
const id = chapter.id;
const savedSelector = this.fragmentSelectors.get(fid)?.get(off);
if (savedSelector) {
return { id, selector: savedSelector };
}
const { skel, frags } = chapter;
const frag = frags.find((frag2) => frag2.index === fid);
const offset = skel.offset + skel.length + frag.offset;
const fragRaw = this.loadRaw(offset, offset + frag.length);
const str = this.mobiFile.decode(fragRaw.buffer).slice(off);
const selector = getFragmentSelector(str);
this.cacheFragmentSelector(fid, off, selector);
return { id, selector };
}
replaceResources(str) {
return str.replace(
new RegExp(kindleResourceRegex, "gi"),
(matched, resourceType, id, type) => {
if (this.resourceCache.has(matched)) {
return this.resourceCache.get(matched);
}
const raw = resourceType === "flow" ? this.loadFlow(Number.parseInt(id)) : this.mobiFile.loadResource(Number.parseInt(id) - 1).raw;
let blobData = "";
if (type === MIME.CSS || type === MIME.SVG) {
const text = this.mobiFile.decode(raw?.buffer);
const textReplaced = this.replaceResources(text);
blobData = textReplaced;
} else {
blobData = raw;
}
const url = saveResource(blobData, type, id, this.resourceSaveDir);
this.resourceCache.set(matched, url);
return url;
}
);
}
replace(str) {
const cssUrls = [];
const head = str.match(/<head[^>]*>([\s\S]*)<\/head>/i)[1];
const links = head.match(/<link[^>]*>/gi) ?? [];
for (const link of links) {
const linkHref = link.match(/href="([^"]*)"/i)[1];
const id = link.match(kindleResourceRegex)[2];
const href = this.replaceResources(linkHref);
cssUrls.push({
id,
href
});
}
const body = str.match(/<body[^>]*>([\s\S]*)<\/body>/i)[1];
const bodyReplaced = this.replaceResources(body);
return {
html: bodyReplaced,
css: cssUrls
};
}
destroy() {
this.resourceCache.forEach((url) => {
{
URL.revokeObjectURL(url);
}
});
}
}
var __defProp = Object.defineProperty;
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
async function initMobiFile(file, resourceSaveDir) {
const mobi = new Mobi(file, resourceSaveDir);
await mobi.innerLoadFile();
await mobi.innerInit();
return mobi;
}
class Mobi {
constructor(file, resourceSaveDir = "./images") {
this.file = file;
__publicField(this, "fileArrayBuffer");
__publicField(this, "mobiFile");
__publicField(this, "fileName", "");
// chapter
__publicField(this, "chapters", []);
__publicField(this, "idToChapter", /* @__PURE__ */ new Map());
__publicField(this, "toc", []);
__publicField(this, "resourceSaveDir", "./images");
__publicField(this, "chapterCache", /* @__PURE__ */ new Map());
__publicField(this, "resourceCache", /* @__PURE__ */ new Map());
// TODO: optimize the logic
__publicField(this, "recindexReg", /recindex=["']?(\d+)["']?/);
__publicField(this, "mediarecindexReg", /mediarecindex=["']?(\d+)["']?/);
__publicField(this, "fileposReg", /filepos=["']?(\d+)["']?/);
this.fileName = getMobiFileName(file);
this.resourceSaveDir = resourceSaveDir;
}
getFileInfo() {
return {
fileName: this.fileName
};
}
getSpine() {
return this.chapters;
}
loadChapter(id) {
const numId = Number.parseInt(id);
if (Number.isNaN(numId)) {
return void 0;
}
if (this.chapterCache.has(numId)) {
return this.chapterCache.get(numId);
}
const chapter = this.idToChapter.get(numId);
if (!chapter) {
return void 0;
}
const processedChapter = this.replace(chapter.text);
this.chapterCache.set(numId, processedChapter);
return processedChapter;
}
getToc() {
return this.toc;
}
getCoverImage() {
if (this.resourceCache.has("cover")) {
return this.resourceCache.get("cover");
}
const coverImage = this.mobiFile.getCoverImage();
let coverUrl = "";
if (coverImage) {
coverUrl = saveResource(coverImage.raw, coverImage.type, "cover", this.resourceSaveDir);
this.resourceCache.set("cover", coverUrl);
}
return coverUrl;
}
getMetadata() {
return this.mobiFile.getMetadata();
}
async innerLoadFile() {
this.fileArrayBuffer = await toArrayBuffer(this.file);
this.mobiFile = new MobiFile(this.fileArrayBuffer);
}
async innerInit() {
const { palmdocHeader } = this.mobiFile;
const buffers = [];
for (let i = 0; i < palmdocHeader.numTextRecords; i++) {
buffers.push(this.mobiFile.loadTextBuffer(i));
}
const array = concatTypedArrays(buffers);
const str = Array.from(
array,
(val) => String.fromCharCode(val)
).join("");
const chapters = [];
const idToChapter = /* @__PURE__ */ new Map();
let id = 0;
const matches = Array.from(str.matchAll(mbpPagebreakRegex));
matches.unshift({ index: 0, input: "", groups: void 0, 0: "" });
for (let i = 0; i < matches.length; i++) {
const match = matches[i];
const start = match.index;
const matched = match[0];
const end = matches[i + 1]?.index;
const section = str.slice(start + matched.length, end);
const buffer = Uint8Array.from(section, (c) => c.charCodeAt(0));
const text = this.mobiFile.decode(buffer.buffer);
const chapter = {
id: String(id),
text,
start,
end,
size: buffer.length
};
chapters.push(chapter);
idToChapter.set(id, chapter);
id++;
}
const lastChapterText = chapters[chapters.length - 1].text;
chapters[chapters.length - 1].text = lastChapterText.slice(0, lastChapterText.indexOf("</body>"));
const firstChapterText = chapters[0].text;
const bodyOpenTagIndex = firstChapterText.indexOf("<body>");
chapters[0].text = firstChapterText.slice(bodyOpenTagIndex + "<body>".length);
this.chapters = chapters;
this.idToChapter = idToChapter;
const referenceStr = firstChapterText.slice(0, bodyOpenTagIndex);
const tocChapterStr = this.findTocChapter(referenceStr);
if (tocChapterStr) {
const wrappedChapterStr = `<wrapper>${tocChapterStr.text.replace(/filepos=(\d+)/gi, 'filepos="$1"')}</wrapper>`;
const tocAst = await parsexml(wrappedChapterStr, {
preserveChildrenOrder: true,
explicitChildren: true,
childkey: "children"
});
const toc = [];
this.parseNavMap(tocAst.wrapper.children, toc);
this.toc = toc;
}
}
findTocChapter(referenceStr) {
const tocPosReg = /<reference.*\/>/g;
const refs = referenceStr.match(tocPosReg);
const typeReg = /type="(.+?)"/;
const fileposReg = /filepos=(.*)/;
if (refs) {
for (const ref of refs) {
const type = ref.match(typeReg)?.[1].trim();
const filepos = ref.match(fileposReg)?.[1].trim();
if (type === "toc" && filepos) {
const tocPos = Number.parseInt(filepos, 10);
const chapter = this.chapters.find((ch) => ch.end > tocPos);
return chapter;
}
}
}
return void 0;
}
parseNavMap(children, toc) {
for (const child of children) {
const childName = child["#name"];
if (childName === "p" || childName === "blockquote") {
let subItem = {
label: "",
href: ""
};
if (child.a) {
const a = child.a[0];
const label = a._;
const filepos = Number(a.$.filepos);
subItem = {
label,
href: `filepos:${filepos}`
};
toc.push(subItem);
}
if (child.p || child.blockquote) {
subItem.children = [];
this.parseNavMap(child.children, subItem.children);
}
}
}
}
loadResource(index) {
if (this.resourceCache.has(String(index))) {
return this.resourceCache.get(String(index));
}
const { type, raw } = this.mobiFile.loadResource(index - 1);
const resourceUrl = saveResource(raw, type, String(index), this.resourceSaveDir);
this.resourceCache.set(String(index), resourceUrl);
return resourceUrl;
}
replace(html) {
html = html.replace(
/<img[^>]*>/g,
(matched) => {
const recindex = matched.match(this.recindexReg)[1];
const url = this.loadResource(Number.parseInt(recindex));
return matched.replace(this.recindexReg, `src="${url}"`);
}
);
html = html.replace(
/<(video|audio)[^>]*>/g,
(matched) => {
const mediarecindex = matched.match(this.recindexReg)[1];
const mediaUrl = this.loadResource(Number.parseInt(mediarecindex));
matched = matched.replace(this.mediarecindexReg, `src="${mediaUrl}"`);
const recindex = matched.match(this.recindexReg)?.[1];
if (recindex) {
const posterUrl = this.loadResource(Number.parseInt(recindex));
matched = matched.replace(this.recindexReg, `poster="${posterUrl}"`);
}
return matched;
}
);
html = html.replace(
/<a[^>]*>/g,
(matched) => {
const fileposMatch = matched.match(this.fileposReg);
if (!fileposMatch) {
return matched;
}
const filepos = fileposMatch[1];
return matched.replace(this.fileposReg, `href="filepos:${filepos}"`);
}
);
return {
html,
css: []
};
}
resolveHref(href) {
const hrefmatch = href.match(/filepos:(\d+)/);
if (!hrefmatch) {
return void 0;
}
const filepos = hrefmatch[1];
const fileposNum = Number(filepos);
const chapter = this.chapters.find((ch) => ch.end > fileposNum);
if (chapter) {
return { id: chapter.id, selector: `[id="filepos:${filepos}"]` };
}
return void 0;
}
destroy() {
this.resourceCache.forEach((url) => {
{
URL.revokeObjectURL(url);
}
});
this.resourceCache.clear();
}
}
export { initKf8File, initMobiFile };