UNPKG

@docling/docling-core

Version:

TypeScript definitions and functions for using Docling output.

80 lines (78 loc) 2.02 kB
// src/types/typing.ts function isByLabel(...labels) { return function(item) { return isDocling.DocItem(item) && labels.includes(item.label); }; } var isDoclingDocItem = { CodeItem: isByLabel("code"), ListItem: isByLabel("list_item"), PictureItem: isByLabel("picture"), SectionHeaderItem: isByLabel("section_header"), TableItem: isByLabel("document_index", "table"), TextItem: isByLabel( "caption", "checkbox_selected", "checkbox_unselected", "footnote", "page_footer", "page_header", "paragraph", "reference", "text" ) }; var isDocling = { Document(item) { return "schema_name" in item && item.schema_name === "DoclingDocument"; }, NodeItem(item) { return "self_ref" in item; }, GroupItem(item) { return isDocling.NodeItem(item) && (item.self_ref.startsWith("#/groups/") || item.self_ref === "#/body"); }, DocItem(item) { return isDocling.NodeItem(item) && !isDocling.GroupItem(item); }, ...isDoclingDocItem }; // src/index.ts function* iterateDocumentItems(doc, options = {}) { if (doc) { yield* traverse(options.root ?? doc.body); } function* traverse(item, level = 0) { if (!isDocling.GroupItem(item) || options.withGroups) { if (isDocling.DocItem(item)) { if (options.pageNo === void 0 || item.prov?.some((prov) => prov.page_no === options.pageNo)) { yield [item, level]; } } else { yield [item, level]; } } if (isDocling.PictureItem(item) && !options.traversePictures) { return; } for (const childRef of item.children ?? []) { const child = resolveDocumentItem(doc, childRef); if (isDocling.NodeItem(child)) { yield* traverse(child, level + 1); } } } } function resolveDocumentItem(doc, item) { const parts = item.$ref.split("/").slice(1); return parts.reduce( (item2, p) => item2[p], doc ); } export { isDocling, isDoclingDocItem, iterateDocumentItems, resolveDocumentItem };