@comake/skl-js-engine
Version:
Standard Knowledge Language Javascript Engine
428 lines (397 loc) • 17.5 kB
text/typescript
/* eslint-disable capitalized-comments */
/* eslint-disable @typescript-eslint/prefer-nullish-coalescing */
/* eslint-disable indent */
/* eslint-disable @typescript-eslint/naming-convention */
import type { OrArray } from '@comake/rmlmapper-js';
import type { GraphObject, NodeObject } from 'jsonld';
import type { Frame } from 'jsonld/jsonld-spec';
import type {
AggregateExpression,
ConstructQuery,
Pattern,
Triple,
Variable
} from 'sparqljs';
import { Logger } from '../../../logger';
import { PerformanceLogger } from '../../../util/PerformanceLogger';
import {
createSparqlBasicGraphPattern, createSparqlCountSelectQuery, createSparqlGraphPattern,
createSparqlSelectGroup,
createSparqlSelectQuery,
createValuesPatternsForVariables,
creteSparqlAskQuery, entityGraphTriple, entityVariable, getEntityVariableValuesFromVariables,
getRdfTypeVariableValuesFromVariables,
groupSelectQueryResultsByKey,
rdfTypeNamedNode,
rdfTypesVariable,
rdfTypeVariable,
selectQueryResultsAsJSValues
} from '../../../util/SparqlUtil';
import {
triplesToJsonld,
triplesToJsonldWithFrame
} from '../../../util/TripleUtil';
import type { Entity } from '../../../util/Types';
import { ensureArray } from '../../../util/Util';
import type {
FindAllOptions, FindCountOptions,
FindExistsOptions, FindOneOptions, FindOptionsWhere
} from '../../FindOptionsTypes';
import type { GroupByOptions, GroupByResponse, GroupResult } from '../../GroupOptionTypes';
import type { QueryAdapter, RawQueryResult } from '../QueryAdapter';
import { InMemorySparqlQueryExecutor } from './query-executor/InMemorySparqlQueryExecutor';
import { SparqlEndpointQueryExecutor } from './query-executor/SparqlEndpointQueryExecutor';
import type { QueryExecutor } from './query-executor/SparqlQueryExecutor';
import type { SparqlQueryAdapterOptions } from './SparqlQueryAdapterOptions';
import { SparqlQueryBuilder } from './SparqlQueryBuilder';
import { SparqlUpdateBuilder } from './SparqlUpdateBuilder';
/**
* A {@link QueryAdapter} that stores data in a database through a sparql endpoint.
*/
export class SparqlQueryAdapter implements QueryAdapter {
protected readonly queryExecutor: QueryExecutor;
private readonly setTimestamps: boolean;
private readonly logger: Logger;
public constructor(options: SparqlQueryAdapterOptions) {
this.setTimestamps = options.setTimestamps ?? false;
switch (options.type) {
case 'memory':
this.queryExecutor = new InMemorySparqlQueryExecutor();
break;
case 'sparql':
this.queryExecutor = new SparqlEndpointQueryExecutor(options);
break;
default:
throw new Error('No schema source found in setSchema args.');
}
this.logger = Logger.getInstance();
}
public async executeRawQuery<T extends RawQueryResult>(query: string): Promise<T[]> {
const response =
await this.queryExecutor.executeSparqlSelectAndGetDataRaw(query);
if (response.length === 0) {
return [] as T[];
}
return selectQueryResultsAsJSValues<T>(response);
}
public async executeRawConstructQuery(query: string, frame?: Frame): Promise<GraphObject> {
const response = await this.queryExecutor.executeSparqlConstructAndGetDataRaw(query);
if (response.length === 0) {
return { '@graph': []};
}
return await triplesToJsonldWithFrame(response, frame);
}
public async executeRawUpdate(
query: string
): Promise<void> {
await this.queryExecutor.executeRawSparqlUpdate(query);
}
public async find(options?: FindOneOptions): Promise<Entity | null> {
return PerformanceLogger.withSpan('Adapter.find', async() => {
const jsonld = await this.findAllAsJsonLd({ ...options, limit: 1 });
if (Array.isArray(jsonld) && !options?.skipFraming) {
if (jsonld.length === 0) {
return null;
}
if (jsonld.length === 1) {
return jsonld[0] as Entity;
}
}
return jsonld as Entity;
}, { options });
}
public async findBy(where: FindOptionsWhere): Promise<Entity | null> {
return PerformanceLogger.withSpan('Adapter.findBy', async() => this.find({ where }), { where });
}
public async findAll(options?: FindAllOptions): Promise<Entity[]> {
return PerformanceLogger.withSpan('Adapter.findAll', async() => {
const jsonld = await this.findAllAsJsonLd(options);
if (Array.isArray(jsonld)) {
return jsonld as Entity[];
}
return [ jsonld ] as Entity[];
}, { options });
}
private async findAllAsJsonLd(options?: FindAllOptions): Promise<OrArray<NodeObject>> {
const queryBuilder = new SparqlQueryBuilder();
const { where, selectionTriples, entityOrder, rdfTypes } = await this.buildFindAllQueryData(queryBuilder, options);
if (entityOrder && entityOrder.length === 0) {
return [];
}
const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options);
const query = queryBuilder.buildConstructFromEntitySelectQuery(
where,
selectionTriples,
options?.select,
queryData.selectVariables
);
return await this.executeEntitySelectQuery(query, options, entityOrder, rdfTypes);
}
private async buildFindAllQueryData(
queryBuilder: SparqlQueryBuilder,
options?: FindAllOptions
): Promise<{ where: Pattern[]; selectionTriples: Triple[]; entityOrder?: string[], rdfTypes?: string[] }> {
const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options);
const selectQueryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, {
...options,
relations: undefined
});
let rdfTypes: string[] | undefined;
const wherePatterns: Pattern[] = [ ...selectQueryData.where, ...selectQueryData.graphWhere ];
wherePatterns.push({
type: 'bgp',
triples: [
{
subject: entityVariable,
predicate: rdfTypeNamedNode,
object: rdfTypeVariable
}
]
});
const entitySelectVariable = options?.entitySelectVariable ?? entityVariable;
const groupBy = ensureArray(selectQueryData?.group ?? options?.group ?? []);
groupBy.push(entitySelectVariable);
// All non-aggregated variables in SELECT must be in GROUP BY
for (const selectVariable of selectQueryData.selectVariables ?? []) {
if (!('aggregation' in (selectVariable.expression as AggregateExpression)) && selectVariable.expression.constructor.name === 'Variable') {
groupBy.push(selectVariable.expression as Variable);
}
}
const entitySelectQuery = selectQueryData.where.length > 0
? createSparqlSelectQuery(
[
entitySelectVariable,
// (GROUP_CONCAT(DISTINCT str(?rdfType); SEPARATOR = " | ") AS ?rdfTypes)
{
expression: {
type: "aggregate",
aggregation: "group_concat",
separator: " | ",
distinct: true,
expression: {
type: "operation",
operator: "STR",
args: [ rdfTypeVariable ]
}
},
variable: rdfTypesVariable
},
...selectQueryData.selectVariables?.map(({ variable, expression }) => {
if (!expression) return variable;
return {
variable,
expression
};
}) ?? []
],
wherePatterns,
selectQueryData.orders,
groupBy,
options?.limit,
options?.offset
)
: undefined;
let entityOrder: string[] | undefined;
/* If relations are present add them to where */
if ((queryData?.relationsQueryData?.unionPatterns ?? []).length > 0) {
queryData?.relationsQueryData?.unionPatterns.push(
createSparqlGraphPattern(entityVariable, [ createSparqlBasicGraphPattern([ entityGraphTriple ]) ])
);
}
if (queryData.orders.length > 0 && options?.limit !== 1 && entitySelectQuery) {
const entitySelectResponse =
await this.queryExecutor.executeSparqlSelectAndGetData(entitySelectQuery);
const valuesByVariable = groupSelectQueryResultsByKey(entitySelectResponse);
entityOrder = getEntityVariableValuesFromVariables(valuesByVariable);
if (entityOrder.length === 0) {
return {
where: queryData.where,
selectionTriples: queryData.graphSelectionTriples,
entityOrder: []
};
}
const variableValueFilters = createValuesPatternsForVariables({
[entityVariable.value]: valuesByVariable[entityVariable.value]
});
queryData.graphWhere = [ ...variableValueFilters, ...queryData.graphWhere ];
} else if (entitySelectQuery) {
// We need entity IDs for framing when:
// 1. There are relations (to distinguish root entities from related entities)
// 2. There's a type constraint (to handle subclass matching where SPARQL finds subclasses but JSON-LD needs exact types)
const hasRelations = (queryData?.relationsQueryData?.unionPatterns ?? []).length > 0;
const hasTypeConstraint = options?.where?.type !== undefined;
if ((hasRelations || hasTypeConstraint) && queryData.orders.length > 0) {
const entitySelectResponse =
await this.queryExecutor.executeSparqlSelectAndGetData(entitySelectQuery);
const valuesByVariable = groupSelectQueryResultsByKey(entitySelectResponse);
entityOrder = queryData.orders.length > 0 ? getEntityVariableValuesFromVariables(valuesByVariable) : [];
if (entityOrder.length === 0) {
return {
where: queryData.where,
selectionTriples: queryData.graphSelectionTriples,
entityOrder: []
};
}
}
else if (hasRelations || hasTypeConstraint) {
const entitySelectResponse = await this.queryExecutor.executeSparqlSelectAndGetData(entitySelectQuery);
const groupedResults = groupSelectQueryResultsByKey(entitySelectResponse);
const valuesByVariable = getRdfTypeVariableValuesFromVariables(groupedResults);
rdfTypes = [...new Set(valuesByVariable)];
// Also get entity IDs for framing to distinguish root entities from related ones
// entityOrder = getEntityVariableValuesFromVariables(groupedResults);
// if (entityOrder.length === 0) {
// return {
// where: queryData.where,
// selectionTriples: queryData.graphSelectionTriples,
// entityOrder: [],
// rdfTypes
// };
// }
}
// Always add the select group query to the CONSTRUCT
const entitySelectGroupQuery = createSparqlSelectGroup([ entitySelectQuery ]);
queryData.graphWhere.unshift(entitySelectGroupQuery);
// queryData.graphWhere = [ ...queryData.where, ...queryData.graphWhere ];
}
return {
where: queryData.graphWhere,
selectionTriples: queryData.graphSelectionTriples,
entityOrder,
rdfTypes
};
}
private async executeEntitySelectQuery(
query: ConstructQuery,
options?: FindAllOptions,
entityOrder?: string[],
rdfTypes?: string[]
): Promise<OrArray<NodeObject>> {
const responseTriples = await this.queryExecutor.executeSparqlSelectAndGetData(query);
return await triplesToJsonld(
responseTriples,
options?.skipFraming,
options?.relations,
options?.where,
entityOrder,
rdfTypes
);
}
public async findAllBy(where: FindOptionsWhere): Promise<Entity[]> {
return PerformanceLogger.withSpan('Adapter.findAllBy', async() => this.findAll({ where }), { where });
}
public async exists(options: FindExistsOptions): Promise<boolean> {
return PerformanceLogger.withSpan('Adapter.exists', async() => {
const queryBuilder = new SparqlQueryBuilder();
const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options);
const values = queryData.graphWhere.filter((pattern): boolean => pattern.type === 'values');
const query = creteSparqlAskQuery([ ...values, ...queryData.where ]);
return await this.queryExecutor.executeAskQueryAndGetResponse(query);
}, { options });
}
public async count(options: FindCountOptions): Promise<number> {
return PerformanceLogger.withSpan('Adapter.count', async() => {
const queryBuilder = new SparqlQueryBuilder();
const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options);
const values = queryData.graphWhere.filter((pattern): boolean => pattern.type === 'values');
const query = createSparqlCountSelectQuery(
entityVariable,
[ ...values, ...queryData.where ],
queryData.orders,
options?.offset
);
return await this.queryExecutor.executeSelectCountAndGetResponse(query);
}, { options });
}
public async save(entity: Entity): Promise<Entity>;
public async save(entities: Entity[]): Promise<Entity[]>;
public async save(entityOrEntities: Entity | Entity[]): Promise<Entity | Entity[]> {
return PerformanceLogger.withSpan('Adapter.save', async() => {
const queryBuilder = new SparqlUpdateBuilder({ setTimestamps: this.setTimestamps });
const query = queryBuilder.buildUpdate(entityOrEntities);
await this.queryExecutor.executeSparqlUpdate(query);
return entityOrEntities;
}, { entityCount: Array.isArray(entityOrEntities) ? entityOrEntities.length : 1 });
}
public async groupBy(options: GroupByOptions): Promise<GroupByResponse> {
return PerformanceLogger.withSpan('Adapter.groupBy', async() => {
const queryBuilder = new SparqlQueryBuilder();
const { query: selectQuery, variableMapping } = await queryBuilder.buildGroupByQuery(options);
const results = await this.queryExecutor.executeSparqlSelectAndGetData(
selectQuery
);
// Create reverse mapping from path to variable name
const reverseMapping = Object.entries(variableMapping).reduce<Record<string, string>>((acc, [ varName, path ]) => {
acc[path] = varName;
return acc;
}, {});
// Transform results
const groupResults: GroupResult[] = results.map(result => {
const group: Record<string, string | number> = {};
options.groupBy?.forEach(path => {
const varName = reverseMapping[path];
if (!varName) {
throw new Error(`No variable mapping found for path: ${path}`);
}
const { value } = result[varName];
// Try to convert to number if possible
group[path] = Number.isNaN(Number(value)) ? value : Number(value);
});
if (options.dateGrouping) {
const dateGroupVarName = reverseMapping.dateGroup;
group.dateGroup = result[dateGroupVarName].value;
}
const countVarName = reverseMapping.count;
const entityIdsVarName = reverseMapping.entityIds;
return {
group,
count: Number.parseInt(result[countVarName].value, 10),
entityIds: result[entityIdsVarName].value.split(' ')
};
});
return {
results: groupResults,
meta: {
totalCount: groupResults.reduce((sum, curr) => sum + curr.count, 0),
dateRange: options.dateRange,
groupings: options.groupBy || []
}
};
}, { options });
}
public async update(id: string, attributes: Partial<Entity>): Promise<void>;
public async update(ids: string[], attributes: Partial<Entity>): Promise<void>;
public async update(idOrIds: string | string[], attributes: Partial<Entity>): Promise<void> {
return PerformanceLogger.withSpan('Adapter.update', async() => {
const queryBuilder = new SparqlUpdateBuilder({ setTimestamps: this.setTimestamps });
const query = queryBuilder.buildPartialUpdate(idOrIds, attributes);
await this.queryExecutor.executeSparqlUpdate(query);
}, { idCount: Array.isArray(idOrIds) ? idOrIds.length : 1 });
}
public async delete(id: string): Promise<void>;
public async delete(ids: string[]): Promise<void>;
public async delete(idOrIds: string | string[]): Promise<void> {
return PerformanceLogger.withSpan('Adapter.delete', async() => {
const queryBuilder = new SparqlUpdateBuilder();
const query = queryBuilder.buildDeleteById(idOrIds);
await this.queryExecutor.executeSparqlUpdate(query);
}, { idCount: Array.isArray(idOrIds) ? idOrIds.length : 1 });
}
public async destroy(entity: Entity): Promise<Entity>;
public async destroy(entities: Entity[]): Promise<Entity[]>;
public async destroy(entityOrEntities: Entity | Entity[]): Promise<Entity | Entity[]> {
return PerformanceLogger.withSpan('Adapter.destroy', async() => {
const queryBuilder = new SparqlUpdateBuilder();
const query = queryBuilder.buildDelete(entityOrEntities);
await this.queryExecutor.executeSparqlUpdate(query);
return entityOrEntities;
}, { entityCount: Array.isArray(entityOrEntities) ? entityOrEntities.length : 1 });
}
public async destroyAll(): Promise<void> {
return PerformanceLogger.withSpan('Adapter.destroyAll', async() => {
const queryBuilder = new SparqlUpdateBuilder();
const query = queryBuilder.buildDeleteAll();
await this.queryExecutor.executeSparqlUpdate(query);
});
}
}