UNPKG

@hey-api/json-schema-ref-parser

Version:

Parse, Resolve, and Dereference JSON Schema $ref pointers

688 lines (635 loc) 23 kB
import type { $RefParser } from '.'; import type { ParserOptions } from './options'; import Pointer from './pointer'; import $Ref from './ref'; import type $Refs from './refs'; import type { JSONSchema } from './types'; import { MissingPointerError } from './util/errors'; import * as url from './util/url'; export interface InventoryEntry { $ref: any; circular: any; depth: any; extended: any; external: any; file: any; hash: any; indirections: any; key: any; originalContainerType?: 'schemas' | 'parameters' | 'requestBodies' | 'responses' | 'headers'; parent: any; pathFromRoot: any; value: any; } /** * Fast lookup using Map instead of linear search with deep equality */ const createInventoryLookup = () => { const lookup = new Map<string, InventoryEntry>(); const objectIds = new WeakMap<object, string>(); // Use WeakMap to avoid polluting objects let idCounter = 0; const getObjectId = (obj: any) => { if (!objectIds.has(obj)) { objectIds.set(obj, `obj_${++idCounter}`); } return objectIds.get(obj)!; }; const createInventoryKey = ($refParent: any, $refKey: any) => // Use WeakMap-based lookup to avoid polluting the actual schema objects `${getObjectId($refParent)}_${$refKey}`; return { add: (entry: InventoryEntry) => { const key = createInventoryKey(entry.parent, entry.key); lookup.set(key, entry); }, find: ($refParent: any, $refKey: any) => { const key = createInventoryKey($refParent, $refKey); const result = lookup.get(key); return result; }, remove: (entry: InventoryEntry) => { const key = createInventoryKey(entry.parent, entry.key); lookup.delete(key); }, }; }; /** * Determine the container type from a JSON Pointer path. * Analyzes the path tokens to identify the appropriate OpenAPI component container. * * @param path - The JSON Pointer path to analyze * @returns The container type: "schemas", "parameters", "requestBodies", "responses", or "headers" */ const getContainerTypeFromPath = ( path: string, ): 'schemas' | 'parameters' | 'requestBodies' | 'responses' | 'headers' => { const tokens = Pointer.parse(path); const has = (t: string) => tokens.includes(t); // Prefer more specific containers first if (has('parameters')) { return 'parameters'; } if (has('requestBody')) { return 'requestBodies'; } if (has('headers')) { return 'headers'; } if (has('responses')) { return 'responses'; } if (has('schema')) { return 'schemas'; } // default: treat as schema-like return 'schemas'; }; /** * Inventories the given JSON Reference (i.e. records detailed information about it so we can * optimize all $refs in the schema), and then crawls the resolved value. */ const inventory$Ref = <S extends object = JSONSchema>({ $refKey, $refParent, $refs, indirections, inventory, inventoryLookup, options, path, pathFromRoot, resolvedRefs = new Map(), visitedObjects = new WeakSet(), }: { /** * The key in `$refParent` that is a JSON Reference */ $refKey: string | null; /** * The object that contains a JSON Reference as one of its keys */ $refParent: any; $refs: $Refs<S>; /** * unknown */ indirections: number; /** * An array of already-inventoried $ref pointers */ inventory: Array<InventoryEntry>; /** * Fast lookup for inventory entries */ inventoryLookup: ReturnType<typeof createInventoryLookup>; options: ParserOptions; /** * The full path of the JSON Reference at `$refKey`, possibly with a JSON Pointer in the hash */ path: string; /** * The path of the JSON Reference at `$refKey`, from the schema root */ pathFromRoot: string; /** * Cache for resolved $ref targets to avoid redundant resolution */ resolvedRefs?: Map<string, any>; /** * Set of already visited objects to avoid infinite loops and redundant processing */ visitedObjects?: WeakSet<object>; }) => { const $ref = $refKey === null ? $refParent : $refParent[$refKey]; const $refPath = url.resolve(path, $ref.$ref); // Check cache first to avoid redundant resolution let pointer = resolvedRefs.get($refPath); if (!pointer) { try { pointer = $refs._resolve($refPath, pathFromRoot, options); } catch (error) { if (error instanceof MissingPointerError) { // The ref couldn't be resolved in the target file. This commonly // happens when a wrapper file redirects via $ref to a versioned // file, and the bundler's crawl path retains the wrapper URL. // Try resolving the hash fragment against other files in $refs // that might contain the target schema. const hash = url.getHash($refPath); if (hash) { const baseFile = url.stripHash($refPath); for (const filePath of Object.keys($refs._$refs)) { if (filePath === baseFile) continue; try { pointer = $refs._resolve(filePath + hash, pathFromRoot, options); if (pointer) break; } catch { // try next file } } } if (!pointer) { console.warn(`Skipping unresolvable $ref: ${$refPath}`); return; } } else { throw error; } } if (pointer) { resolvedRefs.set($refPath, pointer); } } if (pointer === null) return; const parsed = Pointer.parse(pathFromRoot); const depth = parsed.length; const file = url.stripHash(pointer.path); const hash = url.getHash(pointer.path); const external = file !== $refs._root$Ref.path; const extended = $Ref.isExtended$Ref($ref); indirections += pointer.indirections; // Check if this exact location (parent + key + pathFromRoot) has already been inventoried const existingEntry = inventoryLookup.find($refParent, $refKey); if (existingEntry && existingEntry.pathFromRoot === pathFromRoot) { // This exact location has already been inventoried, so we don't need to process it again if (depth < existingEntry.depth || indirections < existingEntry.indirections) { removeFromInventory(inventory, existingEntry); inventoryLookup.remove(existingEntry); } else { return; } } const newEntry: InventoryEntry = { $ref, // The JSON Reference (e.g. {$ref: string}) circular: pointer.circular, // Is this $ref pointer DIRECTLY circular? (i.e. it references itself) depth, // How far from the JSON Schema root is this $ref pointer? extended, // Does this $ref extend its resolved value? (i.e. it has extra properties, in addition to "$ref") external, // Does this $ref pointer point to a file other than the main JSON Schema file? file, // The file that the $ref pointer resolves to hash, // The hash within `file` that the $ref pointer resolves to indirections, // The number of indirect references that were traversed to resolve the value key: $refKey, // The resolved value of the $ref pointer originalContainerType: external ? getContainerTypeFromPath(pointer.path) : undefined, // The key in `parent` that is the $ref pointer parent: $refParent, // The object that contains this $ref pointer pathFromRoot, // The path to the $ref pointer, from the JSON Schema root value: pointer.value, // The original container type in the external file }; inventory.push(newEntry); inventoryLookup.add(newEntry); // Recursively crawl the resolved value. // When the resolution followed a $ref chain to a different file, // use the resolved file as the base path so that local $ref values // (e.g. #/components/schemas/SiblingSchema) inside the resolved // value resolve against the correct file. if (!existingEntry || external) { let crawlPath = pointer.path; const originalFile = url.stripHash($refPath); if (file !== originalFile) { crawlPath = file + url.getHash(pointer.path); } crawl({ $refs, indirections: indirections + 1, inventory, inventoryLookup, key: null, options, parent: pointer.value, path: crawlPath, pathFromRoot, resolvedRefs, visitedObjects, }); } }; /** * Recursively crawls the given value, and inventories all JSON references. */ const crawl = <S extends object = JSONSchema>({ $refs, indirections, inventory, inventoryLookup, key, options, parent, path, pathFromRoot, resolvedRefs = new Map(), visitedObjects = new WeakSet(), }: { $refs: $Refs<S>; indirections: number; /** * An array of already-inventoried $ref pointers */ inventory: Array<InventoryEntry>; /** * Fast lookup for inventory entries */ inventoryLookup: ReturnType<typeof createInventoryLookup>; /** * The property key of `parent` to be crawled */ key: string | null; options: ParserOptions; /** * The object containing the value to crawl. If the value is not an object or array, it will be ignored. */ parent: object | $RefParser; /** * The full path of the property being crawled, possibly with a JSON Pointer in the hash */ path: string; /** * The path of the property being crawled, from the schema root */ pathFromRoot: string; /** * Cache for resolved $ref targets to avoid redundant resolution */ resolvedRefs?: Map<string, any>; /** * Set of already visited objects to avoid infinite loops and redundant processing */ visitedObjects?: WeakSet<object>; }) => { const obj = key === null ? parent : parent[key as keyof typeof parent]; if (obj && typeof obj === 'object' && !ArrayBuffer.isView(obj)) { // Early exit if we've already processed this exact object if (visitedObjects.has(obj)) return; if ($Ref.isAllowed$Ref(obj)) { inventory$Ref({ $refKey: key, $refParent: parent, $refs, indirections, inventory, inventoryLookup, options, path, pathFromRoot, resolvedRefs, visitedObjects, }); } else { // Mark this object as visited BEFORE processing its children visitedObjects.add(obj); // Crawl the object in a specific order that's optimized for bundling. // This is important because it determines how `pathFromRoot` gets built, // which later determines which keys get dereferenced and which ones get remapped const keys = Object.keys(obj).sort((a, b) => { // Most people will expect references to be bundled into the "definitions" property, // so we always crawl that property first, if it exists. if (a === 'definitions') { return -1; } else if (b === 'definitions') { return 1; } else { // Otherwise, crawl the keys based on their length. // This produces the shortest possible bundled references return a.length - b.length; } }) as Array<keyof typeof obj>; for (const key of keys) { const keyPath = Pointer.join(path, key); const keyPathFromRoot = Pointer.join(pathFromRoot, key); const value = obj[key]; if ($Ref.isAllowed$Ref(value)) { inventory$Ref({ $refKey: key, $refParent: obj, $refs, indirections, inventory, inventoryLookup, options, path, pathFromRoot: keyPathFromRoot, resolvedRefs, visitedObjects, }); } else { crawl({ $refs, indirections, inventory, inventoryLookup, key, options, parent: obj, path: keyPath, pathFromRoot: keyPathFromRoot, resolvedRefs, visitedObjects, }); } } } } }; /** * Remap external refs by hoisting resolved values into a shared container in the root schema * and pointing all occurrences to those internal definitions. Internal refs remain internal. */ function remap(parser: $RefParser, inventory: Array<InventoryEntry>) { const root = parser.schema as any; // Group & sort all the $ref pointers, so they're in the order that we need to dereference/remap them inventory.sort((a: InventoryEntry, b: InventoryEntry) => { if (a.file !== b.file) { // Group all the $refs that point to the same file return a.file < b.file ? -1 : +1; } else if (a.hash !== b.hash) { // Group all the $refs that point to the same part of the file return a.hash < b.hash ? -1 : +1; } else if (a.circular !== b.circular) { // If the $ref points to itself, then sort it higher than other $refs that point to this $ref return a.circular ? -1 : +1; } else if (a.extended !== b.extended) { // If the $ref extends the resolved value, then sort it lower than other $refs that don't extend the value return a.extended ? +1 : -1; } else if (a.indirections !== b.indirections) { // Sort direct references higher than indirect references return a.indirections - b.indirections; } else if (a.depth !== b.depth) { // Sort $refs by how close they are to the JSON Schema root return a.depth - b.depth; } else { // Determine how far each $ref is from the "definitions" property. // Most people will expect references to be bundled into the the "definitions" property if possible. const aDefinitionsIndex = a.pathFromRoot.lastIndexOf('/definitions'); const bDefinitionsIndex = b.pathFromRoot.lastIndexOf('/definitions'); if (aDefinitionsIndex !== bDefinitionsIndex) { // Give higher priority to the $ref that's closer to the "definitions" property return bDefinitionsIndex - aDefinitionsIndex; } else { // All else is equal, so use the shorter path, which will produce the shortest possible reference return a.pathFromRoot.length - b.pathFromRoot.length; } } }); // Ensure or return a container by component type. Prefer OpenAPI-aware placement; // otherwise use existing root containers; otherwise create components/*. const ensureContainer = ( type: 'schemas' | 'parameters' | 'requestBodies' | 'responses' | 'headers', ) => { const isOas3 = !!(root && typeof root === 'object' && typeof root.openapi === 'string'); const isOas2 = !!(root && typeof root === 'object' && typeof root.swagger === 'string'); if (isOas3) { if (!root.components || typeof root.components !== 'object') { root.components = {}; } if (!root.components[type] || typeof root.components[type] !== 'object') { root.components[type] = {}; } return { obj: root.components[type], prefix: `#/components/${type}` } as const; } if (isOas2) { if (type === 'schemas') { if (!root.definitions || typeof root.definitions !== 'object') { root.definitions = {}; } return { obj: root.definitions, prefix: '#/definitions' } as const; } if (type === 'parameters') { if (!root.parameters || typeof root.parameters !== 'object') { root.parameters = {}; } return { obj: root.parameters, prefix: '#/parameters' } as const; } if (type === 'responses') { if (!root.responses || typeof root.responses !== 'object') { root.responses = {}; } return { obj: root.responses, prefix: '#/responses' } as const; } // requestBodies/headers don't exist as reusable containers in OAS2; fallback to definitions if (!root.definitions || typeof root.definitions !== 'object') { root.definitions = {}; } return { obj: root.definitions, prefix: '#/definitions' } as const; } // No explicit version: prefer existing containers if (root && typeof root === 'object') { if (root.components && typeof root.components === 'object') { if (!root.components[type] || typeof root.components[type] !== 'object') { root.components[type] = {}; } return { obj: root.components[type], prefix: `#/components/${type}` } as const; } if (root.definitions && typeof root.definitions === 'object') { return { obj: root.definitions, prefix: '#/definitions' } as const; } // Create components/* by default if nothing exists if (!root.components || typeof root.components !== 'object') { root.components = {}; } if (!root.components[type] || typeof root.components[type] !== 'object') { root.components[type] = {}; } return { obj: root.components[type], prefix: `#/components/${type}` } as const; } // Fallback root.definitions = root.definitions || {}; return { obj: root.definitions, prefix: '#/definitions' } as const; }; /** * Choose the appropriate component container for bundling. * Prioritizes the original container type from external files over usage location. * * @param entry - The inventory entry containing reference information * @returns The container type to use for bundling */ const chooseComponent = (entry: InventoryEntry) => { // If we have the original container type from the external file, use it if (entry.originalContainerType) { return entry.originalContainerType; } // Fallback to usage path for internal references or when original type is not available return getContainerTypeFromPath(entry.pathFromRoot); }; // Track names per (container prefix) and per target const targetToNameByPrefix = new Map<string, Map<string, string>>(); const usedNamesByObj = new Map<any, Set<string>>(); const sanitize = (name: string) => name.replace(/[^A-Za-z0-9_-]/g, '_'); const baseName = (filePath: string) => { try { const withoutHash = filePath.split('#')[0]!; const parts = withoutHash.split('/'); const filename = parts[parts.length - 1] || 'schema'; const dot = filename.lastIndexOf('.'); return sanitize(dot > 0 ? filename.substring(0, dot) : filename); } catch { return 'schema'; } }; const lastToken = (hash: string) => { if (!hash || hash === '#') { return 'root'; } const tokens = hash.replace(/^#\//, '').split('/'); return sanitize(tokens[tokens.length - 1] || 'root'); }; const uniqueName = (containerObj: any, proposed: string) => { if (!usedNamesByObj.has(containerObj)) { usedNamesByObj.set(containerObj, new Set<string>(Object.keys(containerObj || {}))); } const used = usedNamesByObj.get(containerObj)!; let name = proposed; let i = 2; while (used.has(name)) { name = `${proposed}_${i++}`; } used.add(name); return name; }; for (const entry of inventory) { // Safety check: ensure entry and entry.$ref are valid objects if (!entry || !entry.$ref || typeof entry.$ref !== 'object') { continue; } // Keep internal refs internal. However, if the $ref extends the resolved value // (i.e. it has additional properties in addition to "$ref"), then we must // preserve the original $ref rather than rewriting it to the resolved hash. if (!entry.external) { if (!entry.extended && entry.$ref && typeof entry.$ref === 'object') { entry.$ref.$ref = entry.hash; } continue; } // Avoid changing direct self-references; keep them internal if (entry.circular) { if (entry.$ref && typeof entry.$ref === 'object') { entry.$ref.$ref = entry.pathFromRoot; } continue; } // Choose appropriate container based on original location in external file const component = chooseComponent(entry); const { obj: container, prefix } = ensureContainer(component); const targetKey = `${entry.file}::${entry.hash}`; if (!targetToNameByPrefix.has(prefix)) { targetToNameByPrefix.set(prefix, new Map<string, string>()); } const namesForPrefix = targetToNameByPrefix.get(prefix)!; let defName = namesForPrefix.get(targetKey); if (!defName) { // If the external file is one of the original input sources, prefer its assigned prefix let proposedBase = baseName(entry.file); try { const parserAny: any = parser as any; if ( parserAny && parserAny.sourcePathToPrefix && typeof parserAny.sourcePathToPrefix.get === 'function' ) { const withoutHash = (entry.file || '').split('#')[0]; const mapped = parserAny.sourcePathToPrefix.get(withoutHash); if (mapped && typeof mapped === 'string') { proposedBase = mapped; } } } catch { // Ignore errors } // Try without prefix first (cleaner names) const schemaName = lastToken(entry.hash); let proposed = schemaName; // Check if this name would conflict with existing schemas from other files if (!usedNamesByObj.has(container)) { usedNamesByObj.set(container, new Set<string>(Object.keys(container || {}))); } const used = usedNamesByObj.get(container)!; // If the name is already used, add the file prefix if (used.has(proposed)) { proposed = `${proposedBase}_${schemaName}`; } defName = uniqueName(container, proposed); namesForPrefix.set(targetKey, defName); // Store the resolved value under the container container[defName] = entry.value; } // Point the occurrence to the internal definition, preserving extensions const refPath = `${prefix}/${defName}`; if (entry.extended && entry.$ref && typeof entry.$ref === 'object') { entry.$ref.$ref = refPath; } else { entry.parent[entry.key] = { $ref: refPath }; } } } function removeFromInventory(inventory: Array<InventoryEntry>, entry: any) { const index = inventory.indexOf(entry); inventory.splice(index, 1); } /** * Bundles all external JSON references into the main JSON schema, thus resulting in a schema that * only has *internal* references, not any *external* references. * This method mutates the JSON schema object, adding new references and re-mapping existing ones. * * @param parser * @param options */ export function bundle(parser: $RefParser, options: ParserOptions): void { const inventory: Array<InventoryEntry> = []; const inventoryLookup = createInventoryLookup(); const visitedObjects = new WeakSet<object>(); const resolvedRefs = new Map<string, any>(); crawl<JSONSchema>({ $refs: parser.$refs, indirections: 0, inventory, inventoryLookup, key: 'schema', options, parent: parser, path: parser.$refs._root$Ref.path + '#', pathFromRoot: '#', resolvedRefs, visitedObjects, }); remap(parser, inventory); }