UNPKG

@docling/docling-core

Version:

TypeScript definitions and functions for using Docling output.

131 lines (127 loc) 4.07 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { isDocling: () => isDocling, isDoclingAnnotation: () => isDoclingAnnotation, isDoclingDocItem: () => isDoclingDocItem, iterateDocumentItems: () => iterateDocumentItems, resolveDocumentItem: () => resolveDocumentItem }); module.exports = __toCommonJS(index_exports); // src/types/typing.ts function isItemByLabel(...labels) { return function(item) { return isDocling.DocItem(item) && labels.includes(item.label); }; } var isDoclingDocItem = { CodeItem: isItemByLabel("code"), ListItem: isItemByLabel("list_item"), PictureItem: isItemByLabel("chart", "picture"), SectionHeaderItem: isItemByLabel("section_header"), TableItem: isItemByLabel("document_index", "table"), TextItem: isItemByLabel( "caption", "checkbox_selected", "checkbox_unselected", "footnote", "page_footer", "page_header", "paragraph", "reference", "text" ) }; function isPictureAnnotationByKind(...kinds) { return function(item) { return kinds.includes(item.kind); }; } var isDoclingAnnotation = { PictureBarChart: isPictureAnnotationByKind("bar_chart_data"), PictureClassification: isPictureAnnotationByKind("classification"), PictureDescription: isPictureAnnotationByKind("description"), PictureMisc: isPictureAnnotationByKind("misc"), PictureMolecule: isPictureAnnotationByKind("molecule_data"), PictureLineChart: isPictureAnnotationByKind("line_chart_data"), PicturePieChart: isPictureAnnotationByKind("pie_chart_data"), PictureScatterChart: isPictureAnnotationByKind("scatter_chart_data"), PictureStackedBarChart: isPictureAnnotationByKind( "stacked_bar_chart_data" ) }; var isDocling = { DocItem(item) { return isDocling.NodeItem(item) && !isDocling.GroupItem(item); }, Document(item) { return "schema_name" in item && item.schema_name === "DoclingDocument"; }, GroupItem(item) { return isDocling.NodeItem(item) && (item.self_ref.startsWith("#/groups/") || item.self_ref === "#/body"); }, NodeItem(item) { return "self_ref" in item; }, ...isDoclingAnnotation, ...isDoclingDocItem }; // src/index.ts function* iterateDocumentItems(doc, options = {}) { if (doc) { yield* traverse(options.root ?? doc.body); } function* traverse(item, level = 0) { if (!isDocling.GroupItem(item) || options.withGroups) { if (isDocling.DocItem(item)) { if (options.pageNo === void 0 || item.prov?.some((prov) => prov.page_no === options.pageNo)) { yield [item, level]; } } else { yield [item, level]; } } if (isDocling.PictureItem(item) && !options.traversePictures) { return; } for (const childRef of item.children ?? []) { const child = resolveDocumentItem(doc, childRef); if (isDocling.NodeItem(child)) { yield* traverse(child, level + 1); } } } } function resolveDocumentItem(doc, item) { const parts = item.$ref.split("/").slice(1); return parts.reduce( (item2, p) => item2[p], doc ); } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { isDocling, isDoclingAnnotation, isDoclingDocItem, iterateDocumentItems, resolveDocumentItem });