@lenml/char-card-reader
Version:
SillyTavern character card info reader
191 lines (160 loc) • 6.29 kB
text/typescript
import { WebPChunk } from "./types";
export namespace WebP {
export function extract_user_comment(
fullFileBuffer: Uint8Array,
chunk: { offset: number; length: number }
): string | undefined {
// 1. 确定数据起始位置
// 假设 chunk.offset 指向的是 "EXIF" 标签的起始位置
// 结构: [TAG: 4bytes][SIZE: 4bytes][DATA...]
let tiffStart = chunk.offset + 8;
// 边界检查
if (tiffStart >= fullFileBuffer.length) return undefined;
// --- 检测并跳过 "Exif\0\0" 前缀 ---
// 0x45('E'), 0x78('x'), 0x69('i'), 0x66('f')
if (
fullFileBuffer[tiffStart] === 0x45 &&
fullFileBuffer[tiffStart + 1] === 0x78 &&
fullFileBuffer[tiffStart + 2] === 0x69 &&
fullFileBuffer[tiffStart + 3] === 0x66
) {
// 这是一个脏数据(JPEG APP1 header),WebP 中应当跳过这 6 字节
tiffStart += 6;
}
// --- 确定字节序 (Endianness) ---
// MM (0x4D4D) = Big Endian
// II (0x4949) = Little Endian
const byteOrderMarker =
(fullFileBuffer[tiffStart] << 8) | fullFileBuffer[tiffStart + 1];
let isLittleEndian = false;
if (byteOrderMarker === 0x4949) {
isLittleEndian = true;
} else if (byteOrderMarker === 0x4d4d) {
isLittleEndian = false;
} else {
console.error("无法识别的 TIFF 头:", byteOrderMarker.toString(16));
return undefined;
}
// 定义读取器
const readU16 = (offset: number) => {
const p = tiffStart + offset;
if (p + 2 > fullFileBuffer.length) return 0;
if (isLittleEndian) {
return fullFileBuffer[p] | (fullFileBuffer[p + 1] << 8);
} else {
return (fullFileBuffer[p] << 8) | fullFileBuffer[p + 1];
}
};
const readU32 = (offset: number) => {
const p = tiffStart + offset;
if (p + 4 > fullFileBuffer.length) return 0;
// 使用 >>> 0 确保是无符号整数
if (isLittleEndian) {
return (
(fullFileBuffer[p] |
(fullFileBuffer[p + 1] << 8) |
(fullFileBuffer[p + 2] << 16) |
(fullFileBuffer[p + 3] << 24)) >>>
0
);
} else {
return (
((fullFileBuffer[p] << 24) |
(fullFileBuffer[p + 1] << 16) |
(fullFileBuffer[p + 2] << 8) |
fullFileBuffer[p + 3]) >>>
0
);
}
};
// 2. 读取第一个 IFD 的偏移量 (Header 之后的 4 字节)
const firstIFDOffset = readU32(4);
if (firstIFDOffset === 0) return undefined;
// --- 查找 Tag 的通用函数 ---
const findTagInIFD = (ifdOffset: number, targetTagId: number) => {
const numEntries = readU16(ifdOffset);
for (let i = 0; i < numEntries; i++) {
// 每个 Entry 12 字节
const entryOffset = ifdOffset + 2 + i * 12;
const tag = readU16(entryOffset);
if (tag === targetTagId) {
return {
type: readU16(entryOffset + 2),
count: readU32(entryOffset + 4),
// 注意:valueOffset 是存储“数据指针”或者“直接数据”的位置
valueOffsetOrData: entryOffset + 8,
};
}
}
return null;
};
// 3. 逻辑流程:0th IFD -> Exif SubIFD (0x8769) -> UserComment (0x9286)
// A. 在 0th IFD 找 ExifOffset
let targetIFDOffset = firstIFDOffset;
const exifPtrEntry = findTagInIFD(firstIFDOffset, 0x8769); // 0x8769 = ExifOffset
if (exifPtrEntry) {
// 读取指针指向的位置
targetIFDOffset = readU32(exifPtrEntry.valueOffsetOrData);
}
// B. 在目标 IFD (Exif IFD) 找 UserComment
const userCommentEntry = findTagInIFD(targetIFDOffset, 0x9286); // 0x9286 = UserComment
if (userCommentEntry) {
const { count, valueOffsetOrData } = userCommentEntry;
// 获取实际数据位置
// 规则:如果 count * unit_size > 4,则 valueOffsetOrData 存的是指针
// UserComment 类型是 7 (Undefined),单位 1 字节。角色卡通常很大,所以一定是指针。
let rawDataOffset = readU32(valueOffsetOrData);
// 防御性编程:万一数据极短(<=4字节),虽然不可能存下有效角色卡
if (count <= 4) {
// 数据直接存在这 4 个字节里
// 但对于 UserComment 来说,这几乎不可能发生
rawDataOffset = valueOffsetOrData;
}
const absDataStart = tiffStart + rawDataOffset;
const rawData = fullFileBuffer.slice(absDataStart, absDataStart + count);
// 4. 解码数据 (处理 ASCII\0\0\0 头)
const header = String.fromCharCode(...rawData.slice(0, 8));
if (header.startsWith("ASCII\0\0\0")) {
return new TextDecoder("utf-8").decode(rawData.slice(8));
} else if (header.startsWith("UNICODE\0")) {
return new TextDecoder("utf-16").decode(rawData.slice(8));
} else {
// 尝试直接解码(兼容没有 Header 的情况)
return new TextDecoder("utf-8").decode(rawData);
}
}
return undefined;
}
export function parse_chunks(data: Uint8Array): WebPChunk[] {
const chunks: WebPChunk[] = [];
let offset = 12; // skip RIFF header (12 bytes)
const len = data.length;
while (offset + 8 <= len) {
const type = String.fromCharCode(
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3]
);
const chunkLength =
data[offset + 4] |
(data[offset + 5] << 8) |
(data[offset + 6] << 16) |
(data[offset + 7] << 24);
const payloadStart = offset + 8;
const payloadEnd = payloadStart + chunkLength;
if (payloadEnd > len) break;
chunks.push({
type,
offset,
length: chunkLength,
preview: Array.from(data.slice(payloadStart, payloadStart + 16))
.map((b) => b.toString(16).padStart(2, "0"))
.join(" "),
});
// chunks are padded to even sizes
offset = payloadEnd + (chunkLength % 2);
}
return chunks;
}
}