@docling/docling-core
Version:
TypeScript definitions and functions for using Docling output.
131 lines (127 loc) • 4.07 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
isDocling: () => isDocling,
isDoclingAnnotation: () => isDoclingAnnotation,
isDoclingDocItem: () => isDoclingDocItem,
iterateDocumentItems: () => iterateDocumentItems,
resolveDocumentItem: () => resolveDocumentItem
});
module.exports = __toCommonJS(index_exports);
// src/types/typing.ts
function isItemByLabel(...labels) {
return function(item) {
return isDocling.DocItem(item) && labels.includes(item.label);
};
}
var isDoclingDocItem = {
CodeItem: isItemByLabel("code"),
ListItem: isItemByLabel("list_item"),
PictureItem: isItemByLabel("chart", "picture"),
SectionHeaderItem: isItemByLabel("section_header"),
TableItem: isItemByLabel("document_index", "table"),
TextItem: isItemByLabel(
"caption",
"checkbox_selected",
"checkbox_unselected",
"footnote",
"page_footer",
"page_header",
"paragraph",
"reference",
"text"
)
};
function isPictureAnnotationByKind(...kinds) {
return function(item) {
return kinds.includes(item.kind);
};
}
var isDoclingAnnotation = {
PictureBarChart: isPictureAnnotationByKind("bar_chart_data"),
PictureClassification: isPictureAnnotationByKind("classification"),
PictureDescription: isPictureAnnotationByKind("description"),
PictureMisc: isPictureAnnotationByKind("misc"),
PictureMolecule: isPictureAnnotationByKind("molecule_data"),
PictureLineChart: isPictureAnnotationByKind("line_chart_data"),
PicturePieChart: isPictureAnnotationByKind("pie_chart_data"),
PictureScatterChart: isPictureAnnotationByKind("scatter_chart_data"),
PictureStackedBarChart: isPictureAnnotationByKind(
"stacked_bar_chart_data"
)
};
var isDocling = {
DocItem(item) {
return isDocling.NodeItem(item) && !isDocling.GroupItem(item);
},
Document(item) {
return "schema_name" in item && item.schema_name === "DoclingDocument";
},
GroupItem(item) {
return isDocling.NodeItem(item) && (item.self_ref.startsWith("#/groups/") || item.self_ref === "#/body");
},
NodeItem(item) {
return "self_ref" in item;
},
...isDoclingAnnotation,
...isDoclingDocItem
};
// src/index.ts
function* iterateDocumentItems(doc, options = {}) {
if (doc) {
yield* traverse(options.root ?? doc.body);
}
function* traverse(item, level = 0) {
if (!isDocling.GroupItem(item) || options.withGroups) {
if (isDocling.DocItem(item)) {
if (options.pageNo === void 0 || item.prov?.some((prov) => prov.page_no === options.pageNo)) {
yield [item, level];
}
} else {
yield [item, level];
}
}
if (isDocling.PictureItem(item) && !options.traversePictures) {
return;
}
for (const childRef of item.children ?? []) {
const child = resolveDocumentItem(doc, childRef);
if (isDocling.NodeItem(child)) {
yield* traverse(child, level + 1);
}
}
}
}
function resolveDocumentItem(doc, item) {
const parts = item.$ref.split("/").slice(1);
return parts.reduce(
(item2, p) => item2[p],
doc
);
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
isDocling,
isDoclingAnnotation,
isDoclingDocItem,
iterateDocumentItems,
resolveDocumentItem
});