UNPKG

@docling/docling-core

Version:

TypeScript definitions and functions for using Docling output.

100 lines (98 loc) 2.85 kB
// src/types/typing.ts function isItemByLabel(...labels) { return function(item) { return isDocling.DocItem(item) && labels.includes(item.label); }; } var isDoclingDocItem = { CodeItem: isItemByLabel("code"), ListItem: isItemByLabel("list_item"), PictureItem: isItemByLabel("chart", "picture"), SectionHeaderItem: isItemByLabel("section_header"), TableItem: isItemByLabel("document_index", "table"), TextItem: isItemByLabel( "caption", "checkbox_selected", "checkbox_unselected", "footnote", "page_footer", "page_header", "paragraph", "reference", "text" ) }; function isPictureAnnotationByKind(...kinds) { return function(item) { return kinds.includes(item.kind); }; } var isDoclingAnnotation = { PictureBarChart: isPictureAnnotationByKind("bar_chart_data"), PictureClassification: isPictureAnnotationByKind("classification"), PictureDescription: isPictureAnnotationByKind("description"), PictureMisc: isPictureAnnotationByKind("misc"), PictureMolecule: isPictureAnnotationByKind("molecule_data"), PictureLineChart: isPictureAnnotationByKind("line_chart_data"), PicturePieChart: isPictureAnnotationByKind("pie_chart_data"), PictureScatterChart: isPictureAnnotationByKind("scatter_chart_data"), PictureStackedBarChart: isPictureAnnotationByKind( "stacked_bar_chart_data" ) }; var isDocling = { DocItem(item) { return isDocling.NodeItem(item) && !isDocling.GroupItem(item); }, Document(item) { return "schema_name" in item && item.schema_name === "DoclingDocument"; }, GroupItem(item) { return isDocling.NodeItem(item) && (item.self_ref.startsWith("#/groups/") || item.self_ref === "#/body"); }, NodeItem(item) { return "self_ref" in item; }, ...isDoclingAnnotation, ...isDoclingDocItem }; // src/index.ts function* iterateDocumentItems(doc, options = {}) { if (doc) { yield* traverse(options.root ?? doc.body); } function* traverse(item, level = 0) { if (!isDocling.GroupItem(item) || options.withGroups) { if (isDocling.DocItem(item)) { if (options.pageNo === void 0 || item.prov?.some((prov) => prov.page_no === options.pageNo)) { yield [item, level]; } } else { yield [item, level]; } } if (isDocling.PictureItem(item) && !options.traversePictures) { return; } for (const childRef of item.children ?? []) { const child = resolveDocumentItem(doc, childRef); if (isDocling.NodeItem(child)) { yield* traverse(child, level + 1); } } } } function resolveDocumentItem(doc, item) { const parts = item.$ref.split("/").slice(1); return parts.reduce( (item2, p) => item2[p], doc ); } export { isDocling, isDoclingAnnotation, isDoclingDocItem, iterateDocumentItems, resolveDocumentItem };