UNPKG

@docling/docling-core

Version:

TypeScript definitions and functions for using Docling output.

110 lines (106 loc) 3.19 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { isDocling: () => isDocling, isDoclingDocItem: () => isDoclingDocItem, iterateDocumentItems: () => iterateDocumentItems, resolveDocumentItem: () => resolveDocumentItem }); module.exports = __toCommonJS(index_exports); // src/types/typing.ts function isByLabel(...labels) { return function(item) { return isDocling.DocItem(item) && labels.includes(item.label); }; } var isDoclingDocItem = { CodeItem: isByLabel("code"), ListItem: isByLabel("list_item"), PictureItem: isByLabel("picture"), SectionHeaderItem: isByLabel("section_header"), TableItem: isByLabel("document_index", "table"), TextItem: isByLabel( "caption", "checkbox_selected", "checkbox_unselected", "footnote", "page_footer", "page_header", "paragraph", "reference", "text" ) }; var isDocling = { Document(item) { return "schema_name" in item && item.schema_name === "DoclingDocument"; }, NodeItem(item) { return "self_ref" in item; }, GroupItem(item) { return isDocling.NodeItem(item) && (item.self_ref.startsWith("#/groups/") || item.self_ref === "#/body"); }, DocItem(item) { return isDocling.NodeItem(item) && !isDocling.GroupItem(item); }, ...isDoclingDocItem }; // src/index.ts function* iterateDocumentItems(doc, options = {}) { if (doc) { yield* traverse(options.root ?? doc.body); } function* traverse(item, level = 0) { if (!isDocling.GroupItem(item) || options.withGroups) { if (isDocling.DocItem(item)) { if (options.pageNo === void 0 || item.prov?.some((prov) => prov.page_no === options.pageNo)) { yield [item, level]; } } else { yield [item, level]; } } if (isDocling.PictureItem(item) && !options.traversePictures) { return; } for (const childRef of item.children ?? []) { const child = resolveDocumentItem(doc, childRef); if (isDocling.NodeItem(child)) { yield* traverse(child, level + 1); } } } } function resolveDocumentItem(doc, item) { const parts = item.$ref.split("/").slice(1); return parts.reduce( (item2, p) => item2[p], doc ); } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { isDocling, isDoclingDocItem, iterateDocumentItems, resolveDocumentItem });