@sanity/import
Version:
Import documents to a Sanity dataset
119 lines (106 loc) • 3.6 kB
text/typescript
const REPLACEMENT_CHAR = '\uFFFD'
export class ReplacementCharError extends Error {
constructor(message: string) {
super(message)
this.name = 'ReplacementCharError'
}
}
/**
* Check if a string contains a Unicode replacement character (U+FFFD).
* Returns the index of the first occurrence, or null if not found.
*/
export function checkStringForReplacementChar(str: string): number | null {
const index = str.indexOf(REPLACEMENT_CHAR)
return index === -1 ? null : index
}
/**
* Recursively search an object for strings containing U+FFFD.
* Returns the path to the first occurrence, or null if not found.
*/
export function findReplacementCharInObject(obj: unknown, currentPath: string = ''): string | null {
if (obj === null || obj === undefined) {
return null
}
if (typeof obj === 'string') {
const index = checkStringForReplacementChar(obj)
if (index === null) {
return null
}
return currentPath
}
if (Array.isArray(obj)) {
for (const [i, element] of obj.entries()) {
const result = findReplacementCharInObject(element, `${currentPath}[${i}]`)
if (result !== null) {
return result
}
}
return null
}
if (typeof obj === 'object') {
for (const key of Object.keys(obj)) {
// Check the key itself for replacement characters
if (checkStringForReplacementChar(key) !== null) {
const keyPath = currentPath ? `${currentPath}["${key}"]` : `["${key}"]`
return keyPath
}
// Build the path for this key
const needsBracketNotation = !/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(key)
let keyPath: string
if (!currentPath) {
keyPath = key
} else if (needsBracketNotation) {
keyPath = `${currentPath}["${key}"]`
} else {
keyPath = `${currentPath}.${key}`
}
const result = findReplacementCharInObject((obj as Record<string, unknown>)[key], keyPath)
if (result !== null) {
return result
}
}
}
return null
}
/**
* Validate that a raw NDJSON line doesn't contain U+FFFD.
* Returns an error message if found, or null if clean.
*/
export function validateLineForReplacementChar(line: string, lineNumber: number): string | null {
const index = checkStringForReplacementChar(line)
if (index !== null) {
return `Unicode replacement character (U+FFFD) found on line ${lineNumber}. This usually indicates encoding issues in the source data.`
}
return null
}
/**
* Validate that an assetMap doesn't contain U+FFFD in any string values.
* Throws an error if found.
*/
export function validateAssetMapForReplacementChars(assetMap: Record<string, unknown>): void {
// Check keys first
for (const key of Object.keys(assetMap)) {
if (checkStringForReplacementChar(key) !== null) {
throw new ReplacementCharError(
`Unicode replacement character (U+FFFD) found at assetMap["${key}"] (in key). This usually indicates encoding issues in the source data.`,
)
}
}
// Check values
for (const [key, value] of Object.entries(assetMap)) {
const path = findReplacementCharInObject(value, '')
if (path !== null) {
let fullPath: string
if (!path) {
fullPath = `assetMap["${key}"]`
} else if (path.startsWith('[')) {
fullPath = `assetMap["${key}"]${path}`
} else {
fullPath = `assetMap["${key}"].${path}`
}
throw new ReplacementCharError(
`Unicode replacement character (U+FFFD) found at ${fullPath}. This usually indicates encoding issues in the source data.`,
)
}
}
}