UNPKG

@sanity/import

Version:

Import documents to a Sanity dataset

119 lines (106 loc) 3.6 kB
const REPLACEMENT_CHAR = '\uFFFD' export class ReplacementCharError extends Error { constructor(message: string) { super(message) this.name = 'ReplacementCharError' } } /** * Check if a string contains a Unicode replacement character (U+FFFD). * Returns the index of the first occurrence, or null if not found. */ export function checkStringForReplacementChar(str: string): number | null { const index = str.indexOf(REPLACEMENT_CHAR) return index === -1 ? null : index } /** * Recursively search an object for strings containing U+FFFD. * Returns the path to the first occurrence, or null if not found. */ export function findReplacementCharInObject(obj: unknown, currentPath: string = ''): string | null { if (obj === null || obj === undefined) { return null } if (typeof obj === 'string') { const index = checkStringForReplacementChar(obj) if (index === null) { return null } return currentPath } if (Array.isArray(obj)) { for (const [i, element] of obj.entries()) { const result = findReplacementCharInObject(element, `${currentPath}[${i}]`) if (result !== null) { return result } } return null } if (typeof obj === 'object') { for (const key of Object.keys(obj)) { // Check the key itself for replacement characters if (checkStringForReplacementChar(key) !== null) { const keyPath = currentPath ? `${currentPath}["${key}"]` : `["${key}"]` return keyPath } // Build the path for this key const needsBracketNotation = !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(key) let keyPath: string if (!currentPath) { keyPath = key } else if (needsBracketNotation) { keyPath = `${currentPath}["${key}"]` } else { keyPath = `${currentPath}.${key}` } const result = findReplacementCharInObject((obj as Record<string, unknown>)[key], keyPath) if (result !== null) { return result } } } return null } /** * Validate that a raw NDJSON line doesn't contain U+FFFD. * Returns an error message if found, or null if clean. */ export function validateLineForReplacementChar(line: string, lineNumber: number): string | null { const index = checkStringForReplacementChar(line) if (index !== null) { return `Unicode replacement character (U+FFFD) found on line ${lineNumber}. This usually indicates encoding issues in the source data.` } return null } /** * Validate that an assetMap doesn't contain U+FFFD in any string values. * Throws an error if found. */ export function validateAssetMapForReplacementChars(assetMap: Record<string, unknown>): void { // Check keys first for (const key of Object.keys(assetMap)) { if (checkStringForReplacementChar(key) !== null) { throw new ReplacementCharError( `Unicode replacement character (U+FFFD) found at assetMap["${key}"] (in key). This usually indicates encoding issues in the source data.`, ) } } // Check values for (const [key, value] of Object.entries(assetMap)) { const path = findReplacementCharInObject(value, '') if (path !== null) { let fullPath: string if (!path) { fullPath = `assetMap["${key}"]` } else if (path.startsWith('[')) { fullPath = `assetMap["${key}"]${path}` } else { fullPath = `assetMap["${key}"].${path}` } throw new ReplacementCharError( `Unicode replacement character (U+FFFD) found at ${fullPath}. This usually indicates encoding issues in the source data.`, ) } } }