@docling/docling-core
Version:
TypeScript definitions and functions for using Docling output.
80 lines (78 loc) • 2.02 kB
JavaScript
// src/types/typing.ts
function isByLabel(...labels) {
return function(item) {
return isDocling.DocItem(item) && labels.includes(item.label);
};
}
var isDoclingDocItem = {
CodeItem: isByLabel("code"),
ListItem: isByLabel("list_item"),
PictureItem: isByLabel("picture"),
SectionHeaderItem: isByLabel("section_header"),
TableItem: isByLabel("document_index", "table"),
TextItem: isByLabel(
"caption",
"checkbox_selected",
"checkbox_unselected",
"footnote",
"page_footer",
"page_header",
"paragraph",
"reference",
"text"
)
};
var isDocling = {
Document(item) {
return "schema_name" in item && item.schema_name === "DoclingDocument";
},
NodeItem(item) {
return "self_ref" in item;
},
GroupItem(item) {
return isDocling.NodeItem(item) && (item.self_ref.startsWith("#/groups/") || item.self_ref === "#/body");
},
DocItem(item) {
return isDocling.NodeItem(item) && !isDocling.GroupItem(item);
},
...isDoclingDocItem
};
// src/index.ts
function* iterateDocumentItems(doc, options = {}) {
if (doc) {
yield* traverse(options.root ?? doc.body);
}
function* traverse(item, level = 0) {
if (!isDocling.GroupItem(item) || options.withGroups) {
if (isDocling.DocItem(item)) {
if (options.pageNo === void 0 || item.prov?.some((prov) => prov.page_no === options.pageNo)) {
yield [item, level];
}
} else {
yield [item, level];
}
}
if (isDocling.PictureItem(item) && !options.traversePictures) {
return;
}
for (const childRef of item.children ?? []) {
const child = resolveDocumentItem(doc, childRef);
if (isDocling.NodeItem(child)) {
yield* traverse(child, level + 1);
}
}
}
}
function resolveDocumentItem(doc, item) {
const parts = item.$ref.split("/").slice(1);
return parts.reduce(
(item2, p) => item2[p],
doc
);
}
export {
isDocling,
isDoclingDocItem,
iterateDocumentItems,
resolveDocumentItem
};