UNPKG

@lingo-reader/mobi-parser

Version:
1,349 lines (1,341 loc) 43.4 kB
import { unzlibSync } from 'fflate'; import { parsexml } from '@lingo-reader/shared'; const htmlEntityMap = { "&lt;": "<", "&gt;": ">", "&amp;": "&", "&quot;": '"', "&#39;": "'" }; function unescapeHTML(str) { if (!str.includes("&")) { return str; } return str.replace(/&(#x[\dA-Fa-f]+|#\d+|[a-zA-Z]+);/g, (match, entity) => { if (entity.startsWith("#x")) { return String.fromCodePoint(Number.parseInt(entity.slice(2), 16)); } else if (entity.startsWith("#")) { return String.fromCodePoint(Number.parseInt(entity.slice(1), 10)); } else { return htmlEntityMap[match] || match; } }); } const MIME = { XML: "application/xml", XHTML: "application/xhtml+xml", HTML: "text/html", CSS: "text/css", SVG: "image/svg+xml" }; const fileSignatures = { "ffd8ff": "image/jpeg", "89504e47": "image/png", "47494638": "image/gif", "424d": "image/bmp", "3c737667": "image/svg+xml", "00000018": "video/mp4", "00000020": "video/mp4", "1a45dfa3": "video/mkv", "1f43b675": "video/webm", "494433": "audio/mp3", "52494646": "audio/wav", "4f676753": "audio/ogg", "00010000": "font/ttf", "74727565": "font/ttf", "4f54544f": "font/otf", "774f4646": "font/woff", "774f4632": "font/woff2", "504c": "font/eot" }; function getFileMimeType(fileBuffer) { const header = fileBuffer.slice(0, 12); const hexHeader = Array.from(header).map((b) => b.toString(16).padStart(2, "0")).join(""); for (const [signature, type] of Object.entries(fileSignatures)) { if (hexHeader.startsWith(signature)) { return type; } } return "unknown"; } function saveResource(data, type, filename, imageSaveDir) { { return URL.createObjectURL(new Blob([data], { type })); } } const mobiEncoding = { 1252: "windows-1252", 65001: "utf-8" }; const mobiLang = { 1: ["ar", "ar-SA", "ar-IQ", "ar-EG", "ar-LY", "ar-DZ", "ar-MA", "ar-TN", "ar-OM", "ar-YE", "ar-SY", "ar-JO", "ar-LB", "ar-KW", "ar-AE", "ar-BH", "ar-QA"], 2: ["bg"], 3: ["ca"], 4: ["zh", "zh-TW", "zh-CN", "zh-HK", "zh-SG"], 5: ["cs"], 6: ["da"], 7: ["de", "de-DE", "de-CH", "de-AT", "de-LU", "de-LI"], 8: ["el"], 9: ["en", "en-US", "en-GB", "en-AU", "en-CA", "en-NZ", "en-IE", "en-ZA", "en-JM", null, "en-BZ", "en-TT", "en-ZW", "en-PH"], 10: ["es", "es-ES", "es-MX", null, "es-GT", "es-CR", "es-PA", "es-DO", "es-VE", "es-CO", "es-PE", "es-AR", "es-EC", "es-CL", "es-UY", "es-PY", "es-BO", "es-SV", "es-HN", "es-NI", "es-PR"], 11: ["fi"], 12: ["fr", "fr-FR", "fr-BE", "fr-CA", "fr-CH", "fr-LU", "fr-MC"], 13: ["he"], 14: ["hu"], 15: ["is"], 16: ["it", "it-IT", "it-CH"], 17: ["ja"], 18: ["ko"], 19: ["nl", "nl-NL", "nl-BE"], 20: ["no", "nb", "nn"], 21: ["pl"], 22: ["pt", "pt-BR", "pt-PT"], 23: ["rm"], 24: ["ro"], 25: ["ru"], 26: ["hr", null, "sr"], 27: ["sk"], 28: ["sq"], 29: ["sv", "sv-SE", "sv-FI"], 30: ["th"], 31: ["tr"], 32: ["ur"], 33: ["id"], 34: ["uk"], 35: ["be"], 36: ["sl"], 37: ["et"], 38: ["lv"], 39: ["lt"], 41: ["fa"], 42: ["vi"], 43: ["hy"], 44: ["az"], 45: ["eu"], 46: ["hsb"], 47: ["mk"], 48: ["st"], 49: ["ts"], 50: ["tn"], 52: ["xh"], 53: ["zu"], 54: ["af"], 55: ["ka"], 56: ["fo"], 57: ["hi"], 58: ["mt"], 59: ["se"], 62: ["ms"], 63: ["kk"], 65: ["sw"], 67: ["uz", null, "uz-UZ"], 68: ["tt"], 69: ["bn"], 70: ["pa"], 71: ["gu"], 72: ["or"], 73: ["ta"], 74: ["te"], 75: ["kn"], 76: ["ml"], 77: ["as"], 78: ["mr"], 79: ["sa"], 82: ["cy", "cy-GB"], 83: ["gl", "gl-ES"], 87: ["kok"], 97: ["ne"], 98: ["fy"] }; const pdbHeader = { name: [0, 32, "string"], type: [60, 4, "string"], creator: [64, 4, "string"], numRecords: [76, 2, "uint"] }; const palmdocHeader = { compression: [0, 2, "uint"], numTextRecords: [8, 2, "uint"], recordSize: [10, 2, "uint"], encryption: [12, 2, "uint"] }; const mobiHeader = { magic: [16, 4, "string"], length: [20, 4, "uint"], type: [24, 4, "uint"], encoding: [28, 4, "uint"], uid: [32, 4, "uint"], version: [36, 4, "uint"], titleOffset: [84, 4, "uint"], titleLength: [88, 4, "uint"], localeRegion: [94, 1, "uint"], localeLanguage: [95, 1, "uint"], resourceStart: [108, 4, "uint"], huffcdic: [112, 4, "uint"], numHuffcdic: [116, 4, "uint"], exthFlag: [128, 4, "uint"], trailingFlags: [240, 4, "uint"], indx: [244, 4, "uint"] }; const kf8Header = { resourceStart: [108, 4, "uint"], fdst: [192, 4, "uint"], numFdst: [196, 4, "uint"], frag: [248, 4, "uint"], skel: [252, 4, "uint"], guide: [260, 4, "uint"] }; const exthHeader = { magic: [0, 4, "string"], length: [4, 4, "uint"], count: [8, 4, "uint"] }; const indxHeader = { magic: [0, 4, "string"], length: [4, 4, "uint"], type: [8, 4, "uint"], idxt: [20, 4, "uint"], numRecords: [24, 4, "uint"], encoding: [28, 4, "uint"], language: [32, 4, "uint"], total: [36, 4, "uint"], ordt: [40, 4, "uint"], ligt: [44, 4, "uint"], numLigt: [48, 4, "uint"], numCncx: [52, 4, "uint"] }; const tagxHeader = { magic: [0, 4, "string"], length: [4, 4, "uint"], numControlBytes: [8, 4, "uint"] }; const huffHeader = { magic: [0, 4, "string"], offset1: [8, 4, "uint"], offset2: [12, 4, "uint"] }; const cdicHeader = { magic: [0, 4, "string"], length: [4, 4, "uint"], numEntries: [8, 4, "uint"], codeLength: [12, 4, "uint"] }; const fdstHeader = { magic: [0, 4, "string"], numEntries: [8, 4, "uint"] }; const fontHeader = { flags: [8, 4, "uint"], dataStart: [12, 4, "uint"], keyLength: [16, 4, "uint"], keyStart: [20, 4, "uint"] }; function getMobiFileName(file) { let fileName = ""; { fileName = file.name ?? ""; } return fileName; } function bufferToArrayBuffer(buffer) { return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength); } async function toArrayBuffer(file) { { return file instanceof Uint8Array ? bufferToArrayBuffer(file) : await file.arrayBuffer(); } } const decoder = new TextDecoder(); const getString = (buffer) => decoder.decode(buffer); function getUint(buffer) { const l = buffer.byteLength; const func = l === 4 ? "getUint32" : l === 2 ? "getUint16" : "getUint8"; return new DataView(buffer)[func](0); } function getStruct(def, buffer) { const res = {}; for (const key in def) { const [start, len, type] = def[key]; res[key] = type === "string" ? getString(buffer.slice(start, start + len)) : getUint(buffer.slice(start, start + len)); } return res; } function concatTypedArrays(arrays) { const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0); const result = new arrays[0].constructor(totalLength); let offset = 0; for (const array of arrays) { result.set(array, offset); offset += array.length; } return result; } const getDecoder = (x) => new TextDecoder(mobiEncoding[x]); function getVarLen(byteArray, i = 0) { let value = 0; let length = 0; for (const byte of byteArray.subarray(i, i + 4)) { value = value << 7 | (byte & 127) >>> 0; length++; if (byte & 128) { break; } } return { value, length }; } function getVarLenFromEnd(byteArray) { let value = 0; for (const byte of byteArray.subarray(-4)) { if (byte & 128) { value = 0; } value = value << 7 | byte & 127; } return value; } function countBitsSet(x) { let count = 0; for (; x > 0; x = x >> 1) { if ((x & 1) === 1) { count++; } } return count; } function countUnsetEnd(x) { let count = 0; while ((x & 1) === 0) { x = x >> 1; count++; } return count; } function decompressPalmDOC(array) { const output = []; for (let i = 0; i < array.length; i++) { const byte = array[i]; if (byte === 0) { output.push(0); } else if (byte <= 8) { for (const x of array.subarray(i + 1, (i += byte) + 1)) output.push(x); } else if (byte <= 127) { output.push(byte); } else if (byte <= 191) { const bytes = byte << 8 | array[i++ + 1]; const distance = (bytes & 16383) >>> 3; const length = (bytes & 7) + 3; for (let j = 0; j < length; j++) output.push(output[output.length - distance]); } else { output.push(32, byte ^ 128); } } return Uint8Array.from(output); } function huffcdic(mobi, loadRecord) { const huffRecord = loadRecord(mobi.huffcdic); const { magic, offset1, offset2 } = getStruct(huffHeader, huffRecord); if (magic !== "HUFF") { throw new Error("Invalid HUFF record"); } const table1 = Array.from( { length: 256 }, (_, i) => offset1 + i * 4 ).map((offset) => getUint(huffRecord.slice(offset, offset + 4))).map((x) => [x & 128, x & 31, x >>> 8]); const table2 = [[0, 0], ...Array.from( { length: 32 }, (_, i) => offset2 + i * 8 ).map((offset) => [ getUint(huffRecord.slice(offset, offset + 4)), getUint(huffRecord.slice(offset + 4, offset + 8)) ])]; const dictionary = []; for (let i = 1; i < mobi.numHuffcdic; i++) { const record = loadRecord(mobi.huffcdic + i); const cdic = getStruct(cdicHeader, record); if (cdic.magic !== "CDIC") { throw new Error("Invalid CDIC record"); } const n = Math.min(1 << cdic.codeLength, cdic.numEntries - dictionary.length); const buffer = record.slice(cdic.length); for (let i2 = 0; i2 < n; i2++) { const offset = getUint(buffer.slice(i2 * 2, i2 * 2 + 2)); const x = getUint(buffer.slice(offset, offset + 2)); const length = x & 32767; const decompressed = x & 32768; const value = new Uint8Array(buffer.slice(offset + 2, offset + 2 + length)); dictionary.push([value, decompressed]); } } const decompress = (byteArray) => { let output = new Uint8Array(); const bitLength = byteArray.byteLength * 8; for (let i = 0; i < bitLength; ) { const bits = Number(read32Bits(byteArray, i)); let [found, codeLength, value] = table1[bits >>> 24]; if (!found) { while (bits >>> 32 - codeLength < table2[codeLength][0]) codeLength += 1; value = table2[codeLength][1]; } i += codeLength; if (i > bitLength) { break; } const code = value - (bits >>> 32 - codeLength); let [result, decompressed] = dictionary[code]; if (!decompressed) { result = decompress(result); dictionary[code] = [result, true]; } output = concatTypedArrays([output, result]); } return output; }; return decompress; } function read32Bits(byteArray, from) { const startByte = from >> 3; const end = from + 32; const endByte = end >> 3; let bits = 0n; for (let i = startByte; i <= endByte; i++) { bits = bits << 8n | BigInt(byteArray[i] ?? 0); } return bits >> 8n - BigInt(end & 7) & 0xFFFFFFFFn; } const exthRecordType = { 100: ["creator", "string", true], // many 101: ["publisher", "string", false], 103: ["description", "string", false], 104: ["isbn", "string", false], 105: ["subject", "string", true], // many 106: ["date", "string", false], 108: ["contributor", "string", true], // many 109: ["rights", "string", false], 110: ["subjectCode", "string", true], // many 112: ["source", "string", true], // many 113: ["asin", "string", false], 121: ["boundary", "uint", false], 122: ["fixedLayout", "string", false], 125: ["numResources", "uint", false], 126: ["originalResolution", "string", false], 127: ["zeroGutter", "string", false], 128: ["zeroMargin", "string", false], 129: ["coverURI", "string", false], 132: ["regionMagnification", "string", false], 201: ["coverOffset", "uint", false], 202: ["thumbnailOffset", "uint", false], 503: ["title", "string", false], 524: ["language", "string", true], // many 527: ["pageProgressionDirection", "string", false] }; function getExth(buf, encoding) { const { magic, count } = getStruct(exthHeader, buf); if (magic !== "EXTH") { throw new Error("Invalid EXTH header"); } const decoder2 = getDecoder(encoding.toString()); const results = {}; let offset = 12; for (let i = 0; i < count; i++) { const type = getUint(buf.slice(offset, offset + 4)); const length = getUint(buf.slice(offset + 4, offset + 8)); if (type in exthRecordType) { const [name, typ, ismany] = exthRecordType[type]; const data = buf.slice(offset + 8, offset + length); const value = typ === "uint" ? getUint(data) : decoder2.decode(data); if (ismany) { results[name] ?? (results[name] = []); results[name].push(value); } else { results[name] = value; } } offset += length; } return results; } function getRemoveTrailingEntries(trailingFlags) { const multibyte = trailingFlags & 1; const numTrailingEntries = countBitsSet(trailingFlags >>> 1); return (array) => { for (let i = 0; i < numTrailingEntries; i++) { const length = getVarLenFromEnd(array); array = array.subarray(0, -length); } if (multibyte) { const length = (array[array.length - 1] & 3) + 1; array = array.subarray(0, -length); } return array; }; } function getFont(buf) { const { flags, dataStart, keyLength, keyStart } = getStruct(fontHeader, buf); const array = new Uint8Array(buf.slice(dataStart)); if (flags & 2) { const bytes = keyLength === 16 ? 1024 : 1040; const key = new Uint8Array(buf.slice(keyStart, keyStart + keyLength)); const length = Math.min(bytes, array.length); for (let i = 0; i < length; i++) array[i] = array[i] ^ key[i % key.length]; } if (flags & 1) { try { return unzlibSync(array); } catch (e) { console.warn(e); console.warn("Failed to decompress font"); } } return array; } function getIndexData(indxIndex, loadRecord) { const indxRecord = loadRecord(indxIndex); const indx = getStruct(indxHeader, indxRecord); if (indx.magic !== "INDX") throw new Error("Invalid INDX record"); const decoder2 = getDecoder(indx.encoding.toString()); const cncx = {}; let cncxRecordOffset = 0; for (let i = 0; i < indx.numCncx; i++) { const record = loadRecord(indxIndex + indx.numRecords + i + 1); const array = new Uint8Array(record); for (let pos = 0; pos < array.byteLength; ) { const index = pos; const { value, length } = getVarLen(array, pos); pos += length; const result = record.slice(pos, pos + value); pos += value; cncx[cncxRecordOffset + index] = decoder2.decode(result); } cncxRecordOffset += 65536; } const tagxBuffer = indxRecord.slice(indx.length); const tagx = getStruct(tagxHeader, tagxBuffer); if (tagx.magic !== "TAGX") throw new Error("Invalid TAGX section"); const numTags = (tagx.length - 12) / 4; const tagTable = Array.from( { length: numTags }, (_, i) => new Uint8Array(tagxBuffer.slice(12 + i * 4, 12 + i * 4 + 4)) ); const table = []; for (let i = 0; i < indx.numRecords; i++) { const record = loadRecord(indxIndex + 1 + i); const array = new Uint8Array(record); const indx2 = getStruct(indxHeader, record); if (indx2.magic !== "INDX") { throw new Error("Invalid INDX record"); } for (let j = 0; j < indx2.numRecords; j++) { const offsetOffset = indx2.idxt + 4 + 2 * j; const offset = getUint(record.slice(offsetOffset, offsetOffset + 2)); const length = getUint(record.slice(offset, offset + 1)); const name = getString(record.slice(offset + 1, offset + 1 + length)); const tags = []; const startPos = offset + 1 + length; let controlByteIndex = 0; let pos = startPos + tagx.numControlBytes; for (const [tag, numValues, mask, end] of tagTable) { if (end & 1) { controlByteIndex++; continue; } const offset2 = startPos + controlByteIndex; const value = getUint(record.slice(offset2, offset2 + 1)) & mask; if (value === mask) { if (countBitsSet(mask) > 1) { const { value: value2, length: length2 } = getVarLen(array, pos); tags.push([tag, 0, value2, numValues]); pos += length2; } else { tags.push([tag, 1, 0, numValues]); } } else { tags.push([tag, value >> countUnsetEnd(mask), 0, numValues]); } } const tagMap = {}; for (const [tag, valueCount, valueBytes, numValues] of tags) { const values = []; if (valueCount !== 0) { for (let i2 = 0; i2 < valueCount * numValues; i2++) { const { value, length: length2 } = getVarLen(array, pos); values.push(value); pos += length2; } } else { let count = 0; while (count < valueBytes) { const { value, length: length2 } = getVarLen(array, pos); values.push(value); pos += length2; count += length2; } } tagMap[tag] = values; } table.push({ name, tagMap }); } } return { table, cncx }; } function getNCX(indxIndex, loadRecord) { const { table, cncx } = getIndexData(indxIndex, loadRecord); const items = table.map(({ tagMap }, index) => ({ index, offset: tagMap[1]?.[0], size: tagMap[2]?.[0], label: cncx[tagMap[3]?.[0]] ?? "", headingLevel: tagMap[4]?.[0], pos: tagMap[6], parent: tagMap[21]?.[0], firstChild: tagMap[22]?.[0], lastChild: tagMap[23]?.[0] })); const getChildren = (item) => { if (item.firstChild == null) return item; item.children = items.filter((x) => x.parent === item.index).map(getChildren); return item; }; return items.filter((item) => item.headingLevel === 0).map(getChildren); } const mbpPagebreakRegex = /<\s*(?:mbp:)?pagebreak[^>]*>/gi; function makePosURI(fid = 0, off = 0) { return `kindle:pos:fid:${fid.toString(32).toUpperCase().padStart(4, "0")}:off:${off.toString(32).toUpperCase().padStart(10, "0")}`; } const selectorReg = /\s(id|name|aid)\s*=\s*['"]([^'"]*)['"]/i; function getFragmentSelector(str) { const match = str.match(selectorReg); if (!match) { return ""; } const [, attr, value] = match; return `[${attr}="${value}"]`; } const kindlePosRegex = /kindle:pos:fid:(\w+):off:(\w+)/; function parsePosURI(str) { const [fid, off] = str.match(kindlePosRegex).slice(1); return { fid: Number.parseInt(fid, 32), off: Number.parseInt(off, 32) }; } const kindleResourceRegex = /kindle:(flow|embed):(\w+)(?:\?mime=(\w+\/[-+.\w]+))?/; var __defProp$2 = Object.defineProperty; var __defNormalProp$2 = (obj, key, value) => key in obj ? __defProp$2(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __publicField$2 = (obj, key, value) => __defNormalProp$2(obj, typeof key !== "symbol" ? key + "" : key, value); class MobiFile { constructor(file) { __publicField$2(this, "fileArrayBuffer"); // extract from pdb header __publicField$2(this, "recordsOffset"); __publicField$2(this, "recordsMagic"); // book start index in records __publicField$2(this, "start", 0); // extract from first record __publicField$2(this, "pdbHeader"); __publicField$2(this, "mobiHeader"); __publicField$2(this, "palmdocHeader"); __publicField$2(this, "kf8Header"); __publicField$2(this, "exth"); __publicField$2(this, "isKf8", false); // resource start index in records __publicField$2(this, "resourceStart"); __publicField$2(this, "decoder"); __publicField$2(this, "encoder"); __publicField$2(this, "removeTrailingEntries"); __publicField$2(this, "decompress"); this.fileArrayBuffer = file; this.parsePdbHeader(); this.parseFirstRecord(this.loadRecord(0)); this.resourceStart = this.mobiHeader.resourceStart; if (!this.isKf8) { const boundary = this.exth.boundary ?? 4294967295; if (boundary < 4294967295) { try { this.parseFirstRecord(this.loadRecord(boundary)); this.resourceStart = this.kf8Header?.resourceStart ?? this.mobiHeader.resourceStart; this.start = boundary; this.isKf8 = true; } catch (e) { } } } this.setup(); } decode(arr) { return this.decoder.decode(arr); } encode(str) { return this.encoder.encode(str); } loadRecord(index) { const [start, end] = this.recordsOffset[this.start + index]; return this.fileArrayBuffer.slice(start, end); } loadMagic(index) { return this.recordsMagic[this.start + index]; } loadTextBuffer(index) { return this.decompress( this.removeTrailingEntries( new Uint8Array( this.loadRecord(index + 1) ) ) ); } loadResource(index) { const buf = this.loadRecord(this.resourceStart + index); const magic = getString(buf.slice(0, 4)); let data; if (magic === "FONT") { data = getFont(buf); } else if (magic === "VIDE" || magic === "AUDI") { data = new Uint8Array(buf.slice(12)); } else { data = new Uint8Array(buf); } return { type: getFileMimeType(data), raw: data }; } getNCX() { const index = this.mobiHeader.indx; if (index < 4294967295) { return getNCX(index, this.loadRecord.bind(this)); } return void 0; } getMetadata() { const mobi = this.mobiHeader; const exth = this.exth; return { identifier: this.mobiHeader.uid.toString(), title: exth?.title || mobi.title, author: exth?.creator?.map(unescapeHTML) ?? [], publisher: exth?.publisher ?? "", // language in exth is many, we use the first one in this case language: exth?.language?.[0] ?? mobi.language, published: exth?.date ?? "", description: exth?.description ?? "", subject: exth?.subject?.map(unescapeHTML) ?? [], rights: exth?.rights ?? "", contributor: exth?.contributor ?? [] }; } getCoverImage() { const exth = this.exth; const coverOffset = Number(exth.coverOffset ?? 4294967295); const thumbnailOffset = Number(exth.thumbnailOffset ?? 4294967295); const offset = coverOffset < 4294967295 ? coverOffset : thumbnailOffset < 4294967295 ? thumbnailOffset : void 0; if (offset) { return this.loadResource(offset); } return void 0; } parsePdbHeader() { const pdb = getStruct(pdbHeader, this.fileArrayBuffer.slice(0, 78)); pdb.name = pdb.name.replace(/\0.*$/, ""); this.pdbHeader = pdb; const recordsBuffer = this.fileArrayBuffer.slice(78, 78 + pdb.numRecords * 8); const recordsStart = Array.from( { length: pdb.numRecords }, (_, i) => getUint(recordsBuffer.slice(i * 8, i * 8 + 4)) ); this.recordsOffset = recordsStart.map( (start, i) => [start, recordsStart[i + 1]] ); this.recordsMagic = recordsStart.map( (val) => getString(this.fileArrayBuffer.slice(val, val + 4)) ); } // palmdocHeader, mobiHeader, isKf8, exth parseFirstRecord(firstRecord) { this.palmdocHeader = getStruct(palmdocHeader, firstRecord.slice(0, 16)); const mobi = getStruct(mobiHeader, firstRecord); if (mobi.magic !== "MOBI") { throw new Error("Missing MOBI header"); } const { titleOffset, titleLength, localeLanguage, localeRegion } = mobi; const lang = mobiLang[localeLanguage.toString()]; const mobiHeaderExtends = { title: getString(firstRecord.slice(titleOffset, titleOffset + titleLength)), language: lang[localeRegion >> 2] ?? lang[0] ?? "unknown" }; this.mobiHeader = Object.assign(mobi, mobiHeaderExtends); this.kf8Header = mobi.version >= 8 ? getStruct(kf8Header, firstRecord) : void 0; this.isKf8 = mobi.version >= 8; this.exth = mobi.exthFlag & 64 ? getExth(firstRecord.slice(mobi.length + 16), mobi.encoding) : void 0; } // setup decoder, encoder, decompress, removeTrailingEntries setup() { this.decoder = getDecoder(this.mobiHeader.encoding.toString()); this.encoder = new TextEncoder(); const compression = this.palmdocHeader.compression; if (compression === 1) { this.decompress = (f) => f; } else if (compression === 2) { this.decompress = decompressPalmDOC; } else if (compression === 17480) { this.decompress = huffcdic(this.mobiHeader, this.loadRecord.bind(this)); } else { throw new Error("Unsupported compression"); } const trailingFlags = this.mobiHeader.trailingFlags; this.removeTrailingEntries = getRemoveTrailingEntries(trailingFlags); } } var __defProp$1 = Object.defineProperty; var __defNormalProp$1 = (obj, key, value) => key in obj ? __defProp$1(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __publicField$1 = (obj, key, value) => __defNormalProp$1(obj, typeof key !== "symbol" ? key + "" : key, value); async function initKf8File(file, resourceSaveDir) { const kf8 = new Kf8(file, resourceSaveDir); await kf8.innerLoadFile(); await kf8.innerInit(); return kf8; } class Kf8 { constructor(file, resourceSaveDir = "./images") { this.file = file; __publicField$1(this, "fileArrayBuffer"); __publicField$1(this, "mobiFile"); __publicField$1(this, "fileName", ""); __publicField$1(this, "fdstTable", []); __publicField$1(this, "fullRawLength", 0); __publicField$1(this, "skelTable", []); __publicField$1(this, "fragTable", []); __publicField$1(this, "chapters", []); __publicField$1(this, "toc", []); __publicField$1(this, "fragmentOffsets", /* @__PURE__ */ new Map()); __publicField$1(this, "fragmentSelectors", /* @__PURE__ */ new Map()); __publicField$1(this, "rawHead", new Uint8Array()); __publicField$1(this, "rawTail", new Uint8Array()); __publicField$1(this, "lastLoadedHead", -1); __publicField$1(this, "lastLoadedTail", -1); __publicField$1(this, "resourceCache", /* @__PURE__ */ new Map()); __publicField$1(this, "chapterCache", /* @__PURE__ */ new Map()); __publicField$1(this, "idToChapter", /* @__PURE__ */ new Map()); __publicField$1(this, "resourceSaveDir", "./images"); this.fileName = getMobiFileName(file); this.resourceSaveDir = resourceSaveDir; } getFileInfo() { return { fileName: this.fileName }; } getMetadata() { return this.mobiFile.getMetadata(); } getCoverImage() { if (this.resourceCache.has("cover")) { return this.resourceCache.get("cover"); } const coverImage = this.mobiFile.getCoverImage(); let coverUrl = ""; if (coverImage) { coverUrl = saveResource(coverImage.raw, coverImage.type, "cover", this.resourceSaveDir); this.resourceCache.set("cover", coverUrl); } return coverUrl; } getSpine() { return this.chapters; } getToc() { return this.toc; } async innerLoadFile() { this.fileArrayBuffer = await toArrayBuffer(this.file); this.mobiFile = new MobiFile(this.fileArrayBuffer); } async innerInit() { const loadRecord = this.mobiFile.loadRecord.bind(this.mobiFile); const kf8Header = this.mobiFile.kf8Header; const fdstBuffer = this.mobiFile.loadRecord(kf8Header.fdst); const fdst = getStruct(fdstHeader, fdstBuffer); if (fdst.magic !== "FDST") { throw new Error("Missing FDST record"); } const fdstTable = Array.from( { length: fdst.numEntries }, (_, i) => 12 + i * 8 ).map((offset) => [ getUint(fdstBuffer.slice(offset, offset + 4)), getUint(fdstBuffer.slice(offset + 4, offset + 8)) ]); this.fdstTable = fdstTable; this.fullRawLength = fdstTable[fdstTable.length - 1][1]; const skelData = getIndexData(kf8Header.skel, loadRecord); const skelTable = skelData.table.map(({ name, tagMap }, index) => ({ index, name, numFrag: tagMap[1][0], offset: tagMap[6][0], length: tagMap[6][1] })); this.skelTable = skelTable; const fragData = getIndexData(kf8Header.frag, loadRecord); const fragTable = fragData.table.map(({ name, tagMap }) => ({ insertOffset: Number.parseInt(name), selector: fragData.cncx[tagMap[2][0]], index: tagMap[4][0], offset: tagMap[6][0], length: tagMap[6][1] })); this.fragTable = fragTable; const chapters = this.skelTable.reduce((acc, skel, index) => { const last = acc[acc.length - 1]; const fragStart = last?.fragEnd ?? 0; const fragEnd = fragStart + skel.numFrag; const frags = this.fragTable.slice(fragStart, fragEnd); const length = skel.length + frags.reduce((a, v) => a + v.length, 0); const totalLength = (last?.totalLength ?? 0) + length; const chapter = { id: index.toString(), skel, frags, fragEnd, length, totalLength }; this.idToChapter.set(index, chapter); acc.push(chapter); return acc; }, []); this.chapters = chapters; const ncx = this.mobiFile.getNCX(); if (ncx) { const map = ({ label, pos, children }) => { const [fid, off] = pos; const href = makePosURI(fid, off); const arr = this.fragmentOffsets.get(fid); if (arr) { arr.push(off); } else { this.fragmentOffsets.set(fid, [off]); } return { label, href, children: children?.map(map) }; }; this.toc = ncx.map(map); } } getGuide() { const index = this.mobiFile.kf8Header.guide; if (index < 4294967295) { const loadRecord = this.mobiFile.loadRecord.bind(this.mobiFile); const { table, cncx } = getIndexData(index, loadRecord); return table.map(({ name, tagMap }) => ({ label: cncx[tagMap[1][0]] ?? "", type: name?.split(/\s/), href: makePosURI(tagMap[6]?.[0] ?? tagMap[3]?.[0]) })); } return void 0; } loadRaw(start, end) { const distanceHead = end - this.rawHead.length; const distanceEnd = this.fullRawLength === 0 ? Infinity : this.fullRawLength - this.rawTail.length - start; if (distanceHead < 0 || distanceHead < distanceEnd) { while (this.rawHead.length < end) { this.lastLoadedHead++; const index = this.lastLoadedHead; const data = this.mobiFile.loadTextBuffer(index); this.rawHead = concatTypedArrays([this.rawHead, data]); } return this.rawHead.slice(start, end); } while (this.fullRawLength - this.rawTail.length > start) { this.lastLoadedTail++; const index = this.mobiFile.palmdocHeader.numTextRecords - 1 - this.lastLoadedTail; const data = this.mobiFile.loadTextBuffer(index); this.rawTail = concatTypedArrays([data, this.rawTail]); } const rawTailStart = this.fullRawLength - this.rawTail.length; return this.rawTail.slice(start - rawTailStart, end - rawTailStart); } loadText(chapter) { const { skel, frags, length } = chapter; const raw = this.loadRaw(skel.offset, skel.offset + length); let skeleton = raw.slice(0, skel.length); for (const frag of frags) { const insertOffset = frag.insertOffset - skel.offset; const offset = skel.length + frag.offset; const fragRaw = raw.slice(offset, offset + frag.length); skeleton = concatTypedArrays([ skeleton.slice(0, insertOffset), fragRaw, skeleton.slice(insertOffset) ]); const offsets = this.fragmentOffsets.get(frag.index); if (offsets) { for (const offset2 of offsets) { const str = this.mobiFile.decode(fragRaw.buffer).slice(offset2); const selector = getFragmentSelector(str); if (selector) { this.cacheFragmentSelector(frag.index, offset2, selector); } } } } return this.mobiFile.decode(skeleton.buffer); } loadChapter(id) { const numId = Number.parseInt(id); if (Number.isNaN(numId)) { return void 0; } if (this.chapterCache.has(numId)) { return this.chapterCache.get(numId); } const chapter = this.idToChapter.get(numId); if (chapter) { const processed = this.replace(this.loadText(chapter)); this.chapterCache.set(numId, processed); return processed; } return void 0; } cacheFragmentSelector(id, offset, selector) { const map = this.fragmentSelectors.get(id); if (map) { map.set(offset, selector); } else { const map2 = /* @__PURE__ */ new Map(); this.fragmentSelectors.set(id, map2); map2.set(offset, selector); } } loadFlow(index) { if (index < 4294967295) { return this.loadRaw(this.fdstTable[index][0], this.fdstTable[index][1]); } return void 0; } resolveHref(href) { if (/^(?!blob|kindle)\w+:/i.test(href)) { return void 0; } const { fid, off } = parsePosURI(href); const chapter = this.chapters.find( (chapter2) => chapter2.frags.some( (frag2) => frag2.index === fid ) ); if (!chapter) { return void 0; } const id = chapter.id; const savedSelector = this.fragmentSelectors.get(fid)?.get(off); if (savedSelector) { return { id, selector: savedSelector }; } const { skel, frags } = chapter; const frag = frags.find((frag2) => frag2.index === fid); const offset = skel.offset + skel.length + frag.offset; const fragRaw = this.loadRaw(offset, offset + frag.length); const str = this.mobiFile.decode(fragRaw.buffer).slice(off); const selector = getFragmentSelector(str); this.cacheFragmentSelector(fid, off, selector); return { id, selector }; } replaceResources(str) { return str.replace( new RegExp(kindleResourceRegex, "gi"), (matched, resourceType, id, type) => { if (this.resourceCache.has(matched)) { return this.resourceCache.get(matched); } const raw = resourceType === "flow" ? this.loadFlow(Number.parseInt(id)) : this.mobiFile.loadResource(Number.parseInt(id) - 1).raw; let blobData = ""; if (type === MIME.CSS || type === MIME.SVG) { const text = this.mobiFile.decode(raw?.buffer); const textReplaced = this.replaceResources(text); blobData = textReplaced; } else { blobData = raw; } const url = saveResource(blobData, type, id, this.resourceSaveDir); this.resourceCache.set(matched, url); return url; } ); } replace(str) { const cssUrls = []; const head = str.match(/<head[^>]*>([\s\S]*)<\/head>/i)[1]; const links = head.match(/<link[^>]*>/gi) ?? []; for (const link of links) { const linkHref = link.match(/href="([^"]*)"/i)[1]; const id = link.match(kindleResourceRegex)[2]; const href = this.replaceResources(linkHref); cssUrls.push({ id, href }); } const body = str.match(/<body[^>]*>([\s\S]*)<\/body>/i)[1]; const bodyReplaced = this.replaceResources(body); return { html: bodyReplaced, css: cssUrls }; } destroy() { this.resourceCache.forEach((url) => { { URL.revokeObjectURL(url); } }); } } var __defProp = Object.defineProperty; var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value); async function initMobiFile(file, resourceSaveDir) { const mobi = new Mobi(file, resourceSaveDir); await mobi.innerLoadFile(); await mobi.innerInit(); return mobi; } class Mobi { constructor(file, resourceSaveDir = "./images") { this.file = file; __publicField(this, "fileArrayBuffer"); __publicField(this, "mobiFile"); __publicField(this, "fileName", ""); // chapter __publicField(this, "chapters", []); __publicField(this, "idToChapter", /* @__PURE__ */ new Map()); __publicField(this, "toc", []); __publicField(this, "resourceSaveDir", "./images"); __publicField(this, "chapterCache", /* @__PURE__ */ new Map()); __publicField(this, "resourceCache", /* @__PURE__ */ new Map()); // TODO: optimize the logic __publicField(this, "recindexReg", /recindex=["']?(\d+)["']?/); __publicField(this, "mediarecindexReg", /mediarecindex=["']?(\d+)["']?/); __publicField(this, "fileposReg", /filepos=["']?(\d+)["']?/); this.fileName = getMobiFileName(file); this.resourceSaveDir = resourceSaveDir; } getFileInfo() { return { fileName: this.fileName }; } getSpine() { return this.chapters; } loadChapter(id) { const numId = Number.parseInt(id); if (Number.isNaN(numId)) { return void 0; } if (this.chapterCache.has(numId)) { return this.chapterCache.get(numId); } const chapter = this.idToChapter.get(numId); if (!chapter) { return void 0; } const processedChapter = this.replace(chapter.text); this.chapterCache.set(numId, processedChapter); return processedChapter; } getToc() { return this.toc; } getCoverImage() { if (this.resourceCache.has("cover")) { return this.resourceCache.get("cover"); } const coverImage = this.mobiFile.getCoverImage(); let coverUrl = ""; if (coverImage) { coverUrl = saveResource(coverImage.raw, coverImage.type, "cover", this.resourceSaveDir); this.resourceCache.set("cover", coverUrl); } return coverUrl; } getMetadata() { return this.mobiFile.getMetadata(); } async innerLoadFile() { this.fileArrayBuffer = await toArrayBuffer(this.file); this.mobiFile = new MobiFile(this.fileArrayBuffer); } async innerInit() { const { palmdocHeader } = this.mobiFile; const buffers = []; for (let i = 0; i < palmdocHeader.numTextRecords; i++) { buffers.push(this.mobiFile.loadTextBuffer(i)); } const array = concatTypedArrays(buffers); const str = Array.from( array, (val) => String.fromCharCode(val) ).join(""); const chapters = []; const idToChapter = /* @__PURE__ */ new Map(); let id = 0; const matches = Array.from(str.matchAll(mbpPagebreakRegex)); matches.unshift({ index: 0, input: "", groups: void 0, 0: "" }); for (let i = 0; i < matches.length; i++) { const match = matches[i]; const start = match.index; const matched = match[0]; const end = matches[i + 1]?.index; const section = str.slice(start + matched.length, end); const buffer = Uint8Array.from(section, (c) => c.charCodeAt(0)); const text = this.mobiFile.decode(buffer.buffer); const chapter = { id: String(id), text, start, end, size: buffer.length }; chapters.push(chapter); idToChapter.set(id, chapter); id++; } const lastChapterText = chapters[chapters.length - 1].text; chapters[chapters.length - 1].text = lastChapterText.slice(0, lastChapterText.indexOf("</body>")); const firstChapterText = chapters[0].text; const bodyOpenTagIndex = firstChapterText.indexOf("<body>"); chapters[0].text = firstChapterText.slice(bodyOpenTagIndex + "<body>".length); this.chapters = chapters; this.idToChapter = idToChapter; const referenceStr = firstChapterText.slice(0, bodyOpenTagIndex); const tocChapterStr = this.findTocChapter(referenceStr); if (tocChapterStr) { const wrappedChapterStr = `<wrapper>${tocChapterStr.text.replace(/filepos=(\d+)/gi, 'filepos="$1"')}</wrapper>`; const tocAst = await parsexml(wrappedChapterStr, { preserveChildrenOrder: true, explicitChildren: true, childkey: "children" }); const toc = []; this.parseNavMap(tocAst.wrapper.children, toc); this.toc = toc; } } findTocChapter(referenceStr) { const tocPosReg = /<reference.*\/>/g; const refs = referenceStr.match(tocPosReg); const typeReg = /type="(.+?)"/; const fileposReg = /filepos=(.*)/; if (refs) { for (const ref of refs) { const type = ref.match(typeReg)?.[1].trim(); const filepos = ref.match(fileposReg)?.[1].trim(); if (type === "toc" && filepos) { const tocPos = Number.parseInt(filepos, 10); const chapter = this.chapters.find((ch) => ch.end > tocPos); return chapter; } } } return void 0; } parseNavMap(children, toc) { for (const child of children) { const childName = child["#name"]; if (childName === "p" || childName === "blockquote") { let subItem = { label: "", href: "" }; if (child.a) { const a = child.a[0]; const label = a._; const filepos = Number(a.$.filepos); subItem = { label, href: `filepos:${filepos}` }; toc.push(subItem); } if (child.p || child.blockquote) { subItem.children = []; this.parseNavMap(child.children, subItem.children); } } } } loadResource(index) { if (this.resourceCache.has(String(index))) { return this.resourceCache.get(String(index)); } const { type, raw } = this.mobiFile.loadResource(index - 1); const resourceUrl = saveResource(raw, type, String(index), this.resourceSaveDir); this.resourceCache.set(String(index), resourceUrl); return resourceUrl; } replace(html) { html = html.replace( /<img[^>]*>/g, (matched) => { const recindex = matched.match(this.recindexReg)[1]; const url = this.loadResource(Number.parseInt(recindex)); return matched.replace(this.recindexReg, `src="${url}"`); } ); html = html.replace( /<(video|audio)[^>]*>/g, (matched) => { const mediarecindex = matched.match(this.recindexReg)[1]; const mediaUrl = this.loadResource(Number.parseInt(mediarecindex)); matched = matched.replace(this.mediarecindexReg, `src="${mediaUrl}"`); const recindex = matched.match(this.recindexReg)?.[1]; if (recindex) { const posterUrl = this.loadResource(Number.parseInt(recindex)); matched = matched.replace(this.recindexReg, `poster="${posterUrl}"`); } return matched; } ); html = html.replace( /<a[^>]*>/g, (matched) => { const fileposMatch = matched.match(this.fileposReg); if (!fileposMatch) { return matched; } const filepos = fileposMatch[1]; return matched.replace(this.fileposReg, `href="filepos:${filepos}"`); } ); return { html, css: [] }; } resolveHref(href) { const hrefmatch = href.match(/filepos:(\d+)/); if (!hrefmatch) { return void 0; } const filepos = hrefmatch[1]; const fileposNum = Number(filepos); const chapter = this.chapters.find((ch) => ch.end > fileposNum); if (chapter) { return { id: chapter.id, selector: `[id="filepos:${filepos}"]` }; } return void 0; } destroy() { this.resourceCache.forEach((url) => { { URL.revokeObjectURL(url); } }); this.resourceCache.clear(); } } export { initKf8File, initMobiFile };