@reactodia/workspace
Version:
Reactodia Workspace -- library for visual interaction with graphs in a form of a diagram.
661 lines (616 loc) • 25.3 kB
text/typescript
import { HashMap, HashSet } from '@reactodia/hashmap';
import {
ElementTypeGraph, ElementTypeModel, LinkTypeModel, ElementModel, LinkModel, PropertyTypeModel,
ElementIri, ElementTypeIri, LinkTypeIri, PropertyTypeIri, SubtypeEdge,
hashSubtypeEdge, equalSubtypeEdges,
} from '../model';
import {
DataProvider, DataProviderLinkCount, DataProviderLookupParams, DataProviderLookupItem,
} from '../dataProvider';
import { makeCaseInsensitiveFilter } from '../utils';
import { MemoryDataset, IndexQuadBy, indexedDataset } from './memoryDataset';
import * as Rdf from './rdfModel';
import { owl, rdf, rdfs, schema } from './vocabulary';
/**
* Options for {@link RdfDataProvider}.
*
* @see {@link RdfDataProvider}
*/
export interface RdfDataProviderOptions {
/**
* Whether to support blank node terms when accessing the data.
*
* @default true
*/
readonly acceptBlankNodes?: boolean;
/**
* RDF/JS-compatible term factory to create RDF terms.
*/
readonly factory?: Rdf.DataFactory;
/**
* @default "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
*/
readonly typePredicate?: string;
/**
* @default "http://www.w3.org/2000/01/rdf-schema#label"
*/
readonly labelPredicate?: string | null;
/**
* @default "https://schema.org/thumbnailUrl"
*/
readonly imagePredicate?: string | null;
/**
* **Default**:
* ```json
* [
* "http://www.w3.org/2002/07/owl#Class",
* "http://www.w3.org/2000/01/rdf-schema#Class"
* ]
* ```
*/
readonly elementTypeBaseTypes?: ReadonlyArray<string>;
/**
* @default "http://www.w3.org/2000/01/rdf-schema#subClassOf"
*/
readonly elementSubtypePredicate?: string | null;
/**
* **Default**:
* ```json
* [
* "http://www.w3.org/2002/07/owl#ObjectProperty",
* "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"
* ]
* ```
*/
readonly linkTypeBaseTypes?: ReadonlyArray<string>;
}
const BLANK_PREFIX = 'urn:reactodia:blank:rdf:';
/**
* Provides graph data from in-memory [RDF/JS-compatible](https://rdf.js.org/data-model-spec/)
* graph dataset.
*
* @category Data
*/
export class RdfDataProvider implements DataProvider {
readonly factory: Rdf.DataFactory;
private readonly dataset: MemoryDataset;
private readonly acceptBlankNodes: boolean;
private readonly typePredicate: Rdf.NamedNode;
private readonly labelPredicate: Rdf.NamedNode | null;
private readonly imagePredicate: Rdf.NamedNode | null;
private readonly elementTypeBaseTypes: ReadonlyArray<Rdf.NamedNode>;
private readonly elementSubtypePredicate: Rdf.NamedNode | null;
private readonly linkTypeBaseTypes: ReadonlyArray<Rdf.NamedNode>;
private readonly EMPTY_LINKS: ReadonlySet<LinkTypeIri> = new Set();
constructor(options: RdfDataProviderOptions = {}) {
this.factory = options.factory ?? Rdf.DefaultDataFactory;
this.dataset = indexedDataset(
IndexQuadBy.S |
IndexQuadBy.SP |
IndexQuadBy.O |
IndexQuadBy.OP
);
this.acceptBlankNodes = options.acceptBlankNodes ?? true;
this.typePredicate = this.factory.namedNode(options.typePredicate ?? rdf.type);
this.labelPredicate = options.labelPredicate === null
? null : this.factory.namedNode(options.labelPredicate ?? rdfs.label);
this.imagePredicate = options.imagePredicate === null
? null : this.factory.namedNode(options.imagePredicate ?? schema.thumbnailUrl);
this.elementTypeBaseTypes = (options.elementTypeBaseTypes ?? [owl.Class, rdfs.Class])
.map(iri => this.factory.namedNode(iri));
this.elementSubtypePredicate = options.elementSubtypePredicate === null
? null : this.factory.namedNode(options.elementSubtypePredicate ?? rdfs.subClassOf);
this.linkTypeBaseTypes = (options.linkTypeBaseTypes ?? [owl.ObjectProperty, rdf.Property])
.map(iri => this.factory.namedNode(iri));
}
addGraph(quads: Iterable<Rdf.Quad>): void {
if (this.acceptBlankNodes) {
this.dataset.addAll(quads);
} else {
for (const q of quads) {
if (!(
q.subject.termType === 'BlankNode' ||
q.object.termType === 'BlankNode' ||
q.graph.termType === 'BlankNode'
)) {
this.dataset.add(q);
}
}
}
}
encodeTerm(term: Rdf.NamedNode | Rdf.BlankNode): string {
return encodeTerm(term);
}
decodeTerm(
iri: ElementIri | ElementTypeIri | LinkTypeIri | PropertyTypeIri
): Rdf.NamedNode | Rdf.BlankNode {
return decodeTerm(iri, this.factory);
}
knownElementTypes(params: {
signal?: AbortSignal;
}): Promise<ElementTypeGraph> {
const typeCounts = this.computeTypeCounts();
for (const baseType of this.elementTypeBaseTypes) {
for (const t of this.dataset.iterateMatches(null, this.typePredicate, baseType)) {
if (isResourceTerm(t.subject)) {
const elementTypeId: ElementTypeIri = this.encodeTerm(t.subject);
if (!typeCounts.has(elementTypeId)) {
typeCounts.set(elementTypeId, 0);
}
}
}
}
const foundEdges = new HashSet(hashSubtypeEdge, equalSubtypeEdges);
if (this.elementSubtypePredicate) {
for (const t of this.dataset.iterateMatches(null, this.elementSubtypePredicate, null)) {
if (isResourceTerm(t.subject) && isResourceTerm(t.object)) {
const derivedTypeId: ElementTypeIri = this.encodeTerm(t.subject);
if (!typeCounts.has(derivedTypeId)) {
typeCounts.set(derivedTypeId, 0);
}
const baseTypeId: ElementTypeIri = this.encodeTerm(t.object);
if (!typeCounts.has(baseTypeId)) {
typeCounts.set(baseTypeId, 0);
}
foundEdges.add([
this.encodeTerm(t.subject),
this.encodeTerm(t.object),
]);
}
}
}
const elementTypes: ElementTypeModel[] = [];
const excluded = new Set<ElementTypeIri>();
for (const [typeId, count] of typeCounts) {
const typeIri = this.decodeTerm(typeId);
const label = this.labelPredicate
? findLiterals(this.dataset, typeIri, this.labelPredicate)
: [];
if (typeIri.termType === 'BlankNode' && label.length === 0) {
excluded.add(typeId);
} else {
elementTypes.push({id: typeId, label, count});
}
}
const subtypeOf: SubtypeEdge[] = [];
for (const edge of foundEdges.values()) {
const [from, to] = edge;
if (!excluded.has(from) && !excluded.has(to)) {
subtypeOf.push(edge);
}
}
const classTree: ElementTypeGraph = {elementTypes, subtypeOf};
return Promise.resolve(classTree);
}
knownLinkTypes(params: {
signal?: AbortSignal;
}): Promise<LinkTypeModel[]> {
const linkCounts = this.computeLinkCounts();
for (const baseType of this.linkTypeBaseTypes) {
for (const t of this.dataset.iterateMatches(null, this.typePredicate, baseType)) {
if (isResourceTerm(t.subject)) {
const linkTypeId: LinkTypeIri = this.encodeTerm(t.subject);
if (!linkCounts.has(linkTypeId)) {
linkCounts.set(linkTypeId, 0);
}
}
}
}
const models = new Map<LinkTypeIri, LinkTypeModel>();
for (const [linkTypeId, count] of linkCounts) {
const linkTypeIri = this.decodeTerm(linkTypeId);
const label = this.labelPredicate
? findLiterals(this.dataset, linkTypeIri, this.labelPredicate)
: [];
if (linkTypeIri.termType === 'BlankNode' && label.length === 0) {
continue;
}
models.set(linkTypeId, {id: linkTypeId, label, count});
}
return Promise.resolve(Array.from(models.values()));
}
elementTypes(params: {
classIds: ReadonlyArray<ElementTypeIri>;
signal?: AbortSignal;
}): Promise<Map<ElementTypeIri, ElementTypeModel>> {
const {classIds} = params;
const models = new Map<ElementTypeIri, ElementTypeModel>();
for (const classId of classIds) {
const classIri = this.decodeTerm(classId);
let instanceCount = 0;
for (const t of this.dataset.iterateMatches(null, this.typePredicate, classIri)) {
instanceCount++;
}
const model: ElementTypeModel = {
id: classId,
label: this.labelPredicate
? findLiterals(this.dataset, classIri, this.labelPredicate)
: [],
count: instanceCount,
};
models.set(classId, model);
}
return Promise.resolve(models);
}
propertyTypes(params: {
propertyIds: ReadonlyArray<PropertyTypeIri>;
signal?: AbortSignal;
}): Promise<Map<PropertyTypeIri, PropertyTypeModel>> {
const {propertyIds} = params;
const models = new Map<PropertyTypeIri, PropertyTypeModel>();
for (const propertyId of propertyIds) {
const propertyIri = this.decodeTerm(propertyId);
const model: PropertyTypeModel = {
id: propertyId,
label: this.labelPredicate
? findLiterals(this.dataset, propertyIri, this.labelPredicate)
: [],
};
models.set(propertyId, model);
}
return Promise.resolve(models);
}
linkTypes(params: {
linkTypeIds: ReadonlyArray<LinkTypeIri>;
signal?: AbortSignal;
}): Promise<Map<LinkTypeIri, LinkTypeModel>> {
const {linkTypeIds} = params;
const linkCounts = this.computeLinkCounts(linkTypeIds);
const models = new Map<LinkTypeIri, LinkTypeModel>();
for (const linkTypeId of linkTypeIds) {
const linkTypeIri = this.decodeTerm(linkTypeId);
const model: LinkTypeModel = {
id: linkTypeId,
label: this.labelPredicate
? findLiterals(this.dataset, linkTypeIri, this.labelPredicate)
: [],
count: linkCounts.get(linkTypeId) ?? 0,
};
models.set(linkTypeId, model);
}
return Promise.resolve(models);
}
elements(params: {
elementIds: ReadonlyArray<ElementIri>;
signal?: AbortSignal;
}): Promise<Map<ElementIri, ElementModel>> {
const {elementIds} = params;
const result = new Map<ElementIri, ElementModel>();
for (const elementId of elementIds) {
const elementIri = this.decodeTerm(elementId);
if (this.dataset.hasMatches(elementIri, null, null)) {
const model: ElementModel = {
id: elementId,
types: findTypes(this.dataset, elementIri, this.typePredicate),
properties: findProperties(this.dataset, elementIri),
};
result.set(elementId, model);
}
}
return Promise.resolve(result);
}
links(params: {
primary: ReadonlyArray<ElementIri>;
secondary: ReadonlyArray<ElementIri>;
linkTypeIds?: ReadonlyArray<LinkTypeIri>;
signal?: AbortSignal;
}): Promise<LinkModel[]> {
const {primary, secondary, linkTypeIds} = params;
const primarySet = new HashSet<Rdf.NamedNode | Rdf.BlankNode>(Rdf.hashTerm, Rdf.equalTerms);
for (const elementIri of primary) {
primarySet.add(this.decodeTerm(elementIri));
}
const secondarySet = new HashSet<Rdf.NamedNode | Rdf.BlankNode>(Rdf.hashTerm, Rdf.equalTerms);
for (const elementIri of secondary) {
secondarySet.add(this.decodeTerm(elementIri));
}
const linkTypeSet = linkTypeIds ? new Set<string>(linkTypeIds) : undefined;
const links: LinkModel[] = [];
// TODO avoid full scan
for (const t of this.dataset) {
if (
isResourceTerm(t.subject) &&
t.predicate.termType === 'NamedNode' &&
isResourceTerm(t.object) &&
(
primarySet.has(t.subject) && secondarySet.has(t.object) ||
secondarySet.has(t.subject) && primarySet.has(t.object)
) &&
(!linkTypeSet || linkTypeSet.has(t.predicate.value))
) {
const properties = findProperties(this.dataset, t);
links.push({
sourceId: this.encodeTerm(t.subject),
targetId: this.encodeTerm(t.object),
linkTypeId: this.encodeTerm(t.predicate),
properties,
});
}
}
return Promise.resolve(links);
}
connectedLinkStats(params: {
elementId: ElementIri;
inexactCount?: boolean;
signal?: AbortSignal;
}): Promise<DataProviderLinkCount[]> {
const {elementId} = params;
const elementIri = this.decodeTerm(elementId);
const outCounts = new Map<LinkTypeIri, number>();
for (const t of this.dataset.iterateMatches(elementIri, null, null)) {
if (t.predicate.termType === 'NamedNode' && isResourceTerm(t.object)) {
const linkTypeIri: LinkTypeIri = this.encodeTerm(t.predicate);
outCounts.set(linkTypeIri, (outCounts.get(linkTypeIri) ?? 0) + 1);
}
}
const inCounts = new Map<LinkTypeIri, number>();
for (const t of this.dataset.iterateMatches(null, null, elementIri)) {
if (t.predicate.termType === 'NamedNode' && isResourceTerm(t.subject)) {
const linkTypeIri: LinkTypeIri = this.encodeTerm(t.predicate);
inCounts.set(linkTypeIri, (inCounts.get(linkTypeIri) ?? 0) + 1);
}
}
const counts: DataProviderLinkCount[] = [];
for (const [linkTypeId, outCount] of outCounts) {
counts.push({
id: linkTypeId,
inCount: inCounts.get(linkTypeId) ?? 0,
outCount,
});
}
for (const [linkTypeId, inCount] of inCounts) {
if (outCounts.has(linkTypeId)) {
continue;
}
counts.push({
id: linkTypeId,
inCount,
outCount: 0,
});
}
return Promise.resolve(counts);
}
lookup(params: DataProviderLookupParams): Promise<DataProviderLookupItem[]> {
interface ResultItem {
readonly term: Rdf.NamedNode | Rdf.BlankNode;
outLinks?: Set<LinkTypeIri>;
inLinks?: Set<LinkTypeIri>;
}
const items = new HashMap<Rdf.NamedNode | Rdf.BlankNode, ResultItem>(
Rdf.hashTerm, Rdf.equalTerms
);
let requiredTextFilter = params.text ? makeCaseInsensitiveFilter(params.text) : undefined;
if (params.refElementId) {
const refElementIri = this.decodeTerm(params.refElementId);
const refLinkIri = params.refElementLinkId
? this.decodeTerm(params.refElementLinkId) : null;
if (!params.linkDirection || params.linkDirection === 'out') {
for (const t of this.dataset.iterateMatches(refElementIri, refLinkIri, null)) {
if (t.predicate.termType === 'NamedNode' && isResourceTerm(t.object)) {
const term = t.object;
let item = items.get(term);
if (!item) {
item = {term};
items.set(term, item);
}
const predicate: LinkTypeIri = this.encodeTerm(t.predicate);
if (!item.outLinks) {
item.outLinks = new Set();
}
item.outLinks.add(predicate);
}
}
}
if (!params.linkDirection || params.linkDirection === 'in') {
for (const t of this.dataset.iterateMatches(null, refLinkIri, refElementIri)) {
if (t.predicate.termType === 'NamedNode' && isResourceTerm(t.subject)) {
const term = t.subject;
let item = items.get(term);
if (!item) {
item = {term};
items.set(term, item);
}
const predicate: LinkTypeIri = this.encodeTerm(t.predicate);
if (!item.inLinks) {
item.inLinks = new Set();
}
item.inLinks.add(predicate);
}
}
}
// join with filtered by type
if (params.elementTypeId) {
const typeTerm = this.decodeTerm(params.elementTypeId);
for (const item of Array.from(items.values())) {
if (!this.dataset.hasMatches(item.term, this.typePredicate, typeTerm)) {
items.delete(item.term);
}
}
}
} else if (params.elementTypeId) {
const typeTerm = this.decodeTerm(params.elementTypeId);
for (const t of this.dataset.iterateMatches(null, this.typePredicate, typeTerm)) {
if (isResourceTerm(t.subject) && !items.has(t.subject)) {
items.set(t.subject, {term: t.subject});
}
}
} else if (requiredTextFilter && this.labelPredicate) {
for (const t of this.dataset.iterateMatches(null, this.labelPredicate, null)) {
if (
isResourceTerm(t.subject) &&
t.object.termType === 'Literal' &&
requiredTextFilter(t.object.value) &&
!items.has(t.subject)
) {
items.set(t.subject, {term: t.subject});
}
}
requiredTextFilter = undefined;
}
const linkedElements: DataProviderLookupItem[] = [];
const limit = typeof params.limit === 'number' ? params.limit : Number.POSITIVE_INFINITY;
for (const item of items.values()) {
if (linkedElements.length >= limit) {
break;
}
let properties: ElementModel['properties'] = {};
if (this.labelPredicate) {
const labels = findLiterals(this.dataset, item.term, this.labelPredicate);
if (requiredTextFilter) {
let foundMatch = false;
for (const label of labels) {
if (requiredTextFilter(label.value)) {
foundMatch = true;
break;
}
}
if (!foundMatch) {
continue;
}
}
properties = {
...properties,
[this.labelPredicate.value]: labels,
};
}
if (this.imagePredicate) {
const imageTerm = findFirstIriOrLiteral(this.dataset, item.term, this.imagePredicate);
if (imageTerm) {
properties = {
...properties,
[this.imagePredicate.value]: [imageTerm],
};
}
}
const model: ElementModel = {
id: this.encodeTerm(item.term),
types: findTypes(this.dataset, item.term, this.typePredicate),
properties,
};
linkedElements.push({
element: model,
inLinks: item.inLinks ?? this.EMPTY_LINKS,
outLinks: item.outLinks ?? this.EMPTY_LINKS,
});
}
return Promise.resolve(linkedElements);
}
private computeTypeCounts(): Map<ElementTypeIri, number> {
const instanceCounts = new Map<ElementTypeIri, number>();
for (const t of this.dataset.iterateMatches(null, this.typePredicate, null)) {
if (isResourceTerm(t.object)) {
const elementTypeId: ElementTypeIri = this.encodeTerm(t.object);
instanceCounts.set(elementTypeId, (instanceCounts.get(elementTypeId) ?? 0) + 1);
}
}
return instanceCounts;
}
private computeLinkCounts(
onlyLinkTypes?: ReadonlyArray<LinkTypeIri>
): Map<LinkTypeIri, number> {
const linkTypeSet = onlyLinkTypes ? new Set(onlyLinkTypes) : undefined;
const linkStats = new Map<LinkTypeIri, number>();
for (const t of this.dataset) {
if (t.predicate.termType === 'NamedNode') {
const linkTypeId: LinkTypeIri = this.encodeTerm(t.predicate);
if (!linkTypeSet || linkTypeSet.has(linkTypeId)) {
linkStats.set(linkTypeId, (linkStats.get(linkTypeId) ?? 0) + 1);
}
}
}
return linkStats;
}
}
function isResourceTerm(term: Rdf.Term): term is Rdf.NamedNode | Rdf.BlankNode {
switch (term.termType) {
case 'NamedNode':
case 'BlankNode':
return true;
default:
return false;
}
}
function findFirstIriOrLiteral(
dataset: MemoryDataset,
subject: Rdf.NamedNode | Rdf.BlankNode | Rdf.Quad,
predicate: Rdf.NamedNode
): Rdf.NamedNode | Rdf.Literal | undefined {
for (const t of dataset.iterateMatches(subject, predicate, null)) {
if (
Rdf.equalTerms(t.predicate, predicate) &&
(t.object.termType === 'NamedNode' || t.object.termType === 'Literal')
) {
return t.object;
}
}
return undefined;
}
function findTypes(
dataset: MemoryDataset,
subject: Rdf.NamedNode | Rdf.BlankNode | Rdf.Quad,
predicate: Rdf.NamedNode
): ElementTypeIri[] {
const typeSet = new Set<ElementTypeIri>();
for (const t of dataset.iterateMatches(subject, predicate, null)) {
if (isResourceTerm(t.object)) {
const typeId: ElementTypeIri = encodeTerm(t.object);
typeSet.add(typeId);
}
}
return Array.from(typeSet).sort();
}
function findLiterals(
dataset: MemoryDataset,
subject: Rdf.NamedNode | Rdf.BlankNode | Rdf.Quad,
predicate: Rdf.NamedNode
): Rdf.Literal[] {
const literals: Rdf.Literal[] = [];
for (const t of dataset.iterateMatches(subject, predicate, null)) {
if (Rdf.equalTerms(t.predicate, predicate) && t.object.termType === 'Literal') {
literals.push(t.object);
}
}
return literals;
}
function findProperties(
dataset: MemoryDataset,
subject: Rdf.NamedNode | Rdf.BlankNode | Rdf.Quad
): { [id: string]: ReadonlyArray<Rdf.NamedNode | Rdf.Literal> } {
const properties: { [id: string]: Array<Rdf.NamedNode | Rdf.Literal> } = {};
for (const t of dataset.iterateMatches(subject, null, null)) {
if (t.predicate.termType === 'NamedNode' && t.object.termType === 'Literal') {
const propertyId = encodeTerm(t.predicate);
let values: Array<Rdf.NamedNode | Rdf.Literal>;
if (Object.prototype.hasOwnProperty.call(properties, propertyId)) {
values = properties[propertyId];
} else {
values = [];
properties[propertyId] = values;
}
values.push(t.object);
}
}
return properties;
}
function encodeTerm(term: Rdf.NamedNode | Rdf.BlankNode): string {
switch (term.termType) {
case 'NamedNode':
return term.value;
case 'BlankNode':
return BLANK_PREFIX + term.value;
default:
throw new Error(
`Unexpected term type to encode: ${(term as Rdf.Term).termType}`
);
}
}
function decodeTerm(
iri: ElementIri | ElementTypeIri | LinkTypeIri | PropertyTypeIri,
factory: Rdf.DataFactory
): Rdf.NamedNode | Rdf.BlankNode {
if (iri.startsWith(BLANK_PREFIX)) {
return factory.blankNode(iri.substring(BLANK_PREFIX.length));
} else {
return factory.namedNode(iri);
}
}