UNPKG

@comake/skl-js-engine

Version:

Standard Knowledge Language Javascript Engine

428 lines (397 loc) 17.5 kB
/* eslint-disable capitalized-comments */ /* eslint-disable @typescript-eslint/prefer-nullish-coalescing */ /* eslint-disable indent */ /* eslint-disable @typescript-eslint/naming-convention */ import type { OrArray } from '@comake/rmlmapper-js'; import type { GraphObject, NodeObject } from 'jsonld'; import type { Frame } from 'jsonld/jsonld-spec'; import type { AggregateExpression, ConstructQuery, Pattern, Triple, Variable } from 'sparqljs'; import { Logger } from '../../../logger'; import { PerformanceLogger } from '../../../util/PerformanceLogger'; import { createSparqlBasicGraphPattern, createSparqlCountSelectQuery, createSparqlGraphPattern, createSparqlSelectGroup, createSparqlSelectQuery, createValuesPatternsForVariables, creteSparqlAskQuery, entityGraphTriple, entityVariable, getEntityVariableValuesFromVariables, getRdfTypeVariableValuesFromVariables, groupSelectQueryResultsByKey, rdfTypeNamedNode, rdfTypesVariable, rdfTypeVariable, selectQueryResultsAsJSValues } from '../../../util/SparqlUtil'; import { triplesToJsonld, triplesToJsonldWithFrame } from '../../../util/TripleUtil'; import type { Entity } from '../../../util/Types'; import { ensureArray } from '../../../util/Util'; import type { FindAllOptions, FindCountOptions, FindExistsOptions, FindOneOptions, FindOptionsWhere } from '../../FindOptionsTypes'; import type { GroupByOptions, GroupByResponse, GroupResult } from '../../GroupOptionTypes'; import type { QueryAdapter, RawQueryResult } from '../QueryAdapter'; import { InMemorySparqlQueryExecutor } from './query-executor/InMemorySparqlQueryExecutor'; import { SparqlEndpointQueryExecutor } from './query-executor/SparqlEndpointQueryExecutor'; import type { QueryExecutor } from './query-executor/SparqlQueryExecutor'; import type { SparqlQueryAdapterOptions } from './SparqlQueryAdapterOptions'; import { SparqlQueryBuilder } from './SparqlQueryBuilder'; import { SparqlUpdateBuilder } from './SparqlUpdateBuilder'; /** * A {@link QueryAdapter} that stores data in a database through a sparql endpoint. */ export class SparqlQueryAdapter implements QueryAdapter { protected readonly queryExecutor: QueryExecutor; private readonly setTimestamps: boolean; private readonly logger: Logger; public constructor(options: SparqlQueryAdapterOptions) { this.setTimestamps = options.setTimestamps ?? false; switch (options.type) { case 'memory': this.queryExecutor = new InMemorySparqlQueryExecutor(); break; case 'sparql': this.queryExecutor = new SparqlEndpointQueryExecutor(options); break; default: throw new Error('No schema source found in setSchema args.'); } this.logger = Logger.getInstance(); } public async executeRawQuery<T extends RawQueryResult>(query: string): Promise<T[]> { const response = await this.queryExecutor.executeSparqlSelectAndGetDataRaw(query); if (response.length === 0) { return [] as T[]; } return selectQueryResultsAsJSValues<T>(response); } public async executeRawConstructQuery(query: string, frame?: Frame): Promise<GraphObject> { const response = await this.queryExecutor.executeSparqlConstructAndGetDataRaw(query); if (response.length === 0) { return { '@graph': []}; } return await triplesToJsonldWithFrame(response, frame); } public async executeRawUpdate( query: string ): Promise<void> { await this.queryExecutor.executeRawSparqlUpdate(query); } public async find(options?: FindOneOptions): Promise<Entity | null> { return PerformanceLogger.withSpan('Adapter.find', async() => { const jsonld = await this.findAllAsJsonLd({ ...options, limit: 1 }); if (Array.isArray(jsonld) && !options?.skipFraming) { if (jsonld.length === 0) { return null; } if (jsonld.length === 1) { return jsonld[0] as Entity; } } return jsonld as Entity; }, { options }); } public async findBy(where: FindOptionsWhere): Promise<Entity | null> { return PerformanceLogger.withSpan('Adapter.findBy', async() => this.find({ where }), { where }); } public async findAll(options?: FindAllOptions): Promise<Entity[]> { return PerformanceLogger.withSpan('Adapter.findAll', async() => { const jsonld = await this.findAllAsJsonLd(options); if (Array.isArray(jsonld)) { return jsonld as Entity[]; } return [ jsonld ] as Entity[]; }, { options }); } private async findAllAsJsonLd(options?: FindAllOptions): Promise<OrArray<NodeObject>> { const queryBuilder = new SparqlQueryBuilder(); const { where, selectionTriples, entityOrder, rdfTypes } = await this.buildFindAllQueryData(queryBuilder, options); if (entityOrder && entityOrder.length === 0) { return []; } const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options); const query = queryBuilder.buildConstructFromEntitySelectQuery( where, selectionTriples, options?.select, queryData.selectVariables ); return await this.executeEntitySelectQuery(query, options, entityOrder, rdfTypes); } private async buildFindAllQueryData( queryBuilder: SparqlQueryBuilder, options?: FindAllOptions ): Promise<{ where: Pattern[]; selectionTriples: Triple[]; entityOrder?: string[], rdfTypes?: string[] }> { const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options); const selectQueryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, { ...options, relations: undefined }); let rdfTypes: string[] | undefined; const wherePatterns: Pattern[] = [ ...selectQueryData.where, ...selectQueryData.graphWhere ]; wherePatterns.push({ type: 'bgp', triples: [ { subject: entityVariable, predicate: rdfTypeNamedNode, object: rdfTypeVariable } ] }); const entitySelectVariable = options?.entitySelectVariable ?? entityVariable; const groupBy = ensureArray(selectQueryData?.group ?? options?.group ?? []); groupBy.push(entitySelectVariable); // All non-aggregated variables in SELECT must be in GROUP BY for (const selectVariable of selectQueryData.selectVariables ?? []) { if (!('aggregation' in (selectVariable.expression as AggregateExpression)) && selectVariable.expression.constructor.name === 'Variable') { groupBy.push(selectVariable.expression as Variable); } } const entitySelectQuery = selectQueryData.where.length > 0 ? createSparqlSelectQuery( [ entitySelectVariable, // (GROUP_CONCAT(DISTINCT str(?rdfType); SEPARATOR = " | ") AS ?rdfTypes) { expression: { type: "aggregate", aggregation: "group_concat", separator: " | ", distinct: true, expression: { type: "operation", operator: "STR", args: [ rdfTypeVariable ] } }, variable: rdfTypesVariable }, ...selectQueryData.selectVariables?.map(({ variable, expression }) => { if (!expression) return variable; return { variable, expression }; }) ?? [] ], wherePatterns, selectQueryData.orders, groupBy, options?.limit, options?.offset ) : undefined; let entityOrder: string[] | undefined; /* If relations are present add them to where */ if ((queryData?.relationsQueryData?.unionPatterns ?? []).length > 0) { queryData?.relationsQueryData?.unionPatterns.push( createSparqlGraphPattern(entityVariable, [ createSparqlBasicGraphPattern([ entityGraphTriple ]) ]) ); } if (queryData.orders.length > 0 && options?.limit !== 1 && entitySelectQuery) { const entitySelectResponse = await this.queryExecutor.executeSparqlSelectAndGetData(entitySelectQuery); const valuesByVariable = groupSelectQueryResultsByKey(entitySelectResponse); entityOrder = getEntityVariableValuesFromVariables(valuesByVariable); if (entityOrder.length === 0) { return { where: queryData.where, selectionTriples: queryData.graphSelectionTriples, entityOrder: [] }; } const variableValueFilters = createValuesPatternsForVariables({ [entityVariable.value]: valuesByVariable[entityVariable.value] }); queryData.graphWhere = [ ...variableValueFilters, ...queryData.graphWhere ]; } else if (entitySelectQuery) { // We need entity IDs for framing when: // 1. There are relations (to distinguish root entities from related entities) // 2. There's a type constraint (to handle subclass matching where SPARQL finds subclasses but JSON-LD needs exact types) const hasRelations = (queryData?.relationsQueryData?.unionPatterns ?? []).length > 0; const hasTypeConstraint = options?.where?.type !== undefined; if ((hasRelations || hasTypeConstraint) && queryData.orders.length > 0) { const entitySelectResponse = await this.queryExecutor.executeSparqlSelectAndGetData(entitySelectQuery); const valuesByVariable = groupSelectQueryResultsByKey(entitySelectResponse); entityOrder = queryData.orders.length > 0 ? getEntityVariableValuesFromVariables(valuesByVariable) : []; if (entityOrder.length === 0) { return { where: queryData.where, selectionTriples: queryData.graphSelectionTriples, entityOrder: [] }; } } else if (hasRelations || hasTypeConstraint) { const entitySelectResponse = await this.queryExecutor.executeSparqlSelectAndGetData(entitySelectQuery); const groupedResults = groupSelectQueryResultsByKey(entitySelectResponse); const valuesByVariable = getRdfTypeVariableValuesFromVariables(groupedResults); rdfTypes = [...new Set(valuesByVariable)]; // Also get entity IDs for framing to distinguish root entities from related ones // entityOrder = getEntityVariableValuesFromVariables(groupedResults); // if (entityOrder.length === 0) { // return { // where: queryData.where, // selectionTriples: queryData.graphSelectionTriples, // entityOrder: [], // rdfTypes // }; // } } // Always add the select group query to the CONSTRUCT const entitySelectGroupQuery = createSparqlSelectGroup([ entitySelectQuery ]); queryData.graphWhere.unshift(entitySelectGroupQuery); // queryData.graphWhere = [ ...queryData.where, ...queryData.graphWhere ]; } return { where: queryData.graphWhere, selectionTriples: queryData.graphSelectionTriples, entityOrder, rdfTypes }; } private async executeEntitySelectQuery( query: ConstructQuery, options?: FindAllOptions, entityOrder?: string[], rdfTypes?: string[] ): Promise<OrArray<NodeObject>> { const responseTriples = await this.queryExecutor.executeSparqlSelectAndGetData(query); return await triplesToJsonld( responseTriples, options?.skipFraming, options?.relations, options?.where, entityOrder, rdfTypes ); } public async findAllBy(where: FindOptionsWhere): Promise<Entity[]> { return PerformanceLogger.withSpan('Adapter.findAllBy', async() => this.findAll({ where }), { where }); } public async exists(options: FindExistsOptions): Promise<boolean> { return PerformanceLogger.withSpan('Adapter.exists', async() => { const queryBuilder = new SparqlQueryBuilder(); const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options); const values = queryData.graphWhere.filter((pattern): boolean => pattern.type === 'values'); const query = creteSparqlAskQuery([ ...values, ...queryData.where ]); return await this.queryExecutor.executeAskQueryAndGetResponse(query); }, { options }); } public async count(options: FindCountOptions): Promise<number> { return PerformanceLogger.withSpan('Adapter.count', async() => { const queryBuilder = new SparqlQueryBuilder(); const queryData = queryBuilder.buildEntitySelectPatternsFromOptions(entityVariable, options); const values = queryData.graphWhere.filter((pattern): boolean => pattern.type === 'values'); const query = createSparqlCountSelectQuery( entityVariable, [ ...values, ...queryData.where ], queryData.orders, options?.offset ); return await this.queryExecutor.executeSelectCountAndGetResponse(query); }, { options }); } public async save(entity: Entity): Promise<Entity>; public async save(entities: Entity[]): Promise<Entity[]>; public async save(entityOrEntities: Entity | Entity[]): Promise<Entity | Entity[]> { return PerformanceLogger.withSpan('Adapter.save', async() => { const queryBuilder = new SparqlUpdateBuilder({ setTimestamps: this.setTimestamps }); const query = queryBuilder.buildUpdate(entityOrEntities); await this.queryExecutor.executeSparqlUpdate(query); return entityOrEntities; }, { entityCount: Array.isArray(entityOrEntities) ? entityOrEntities.length : 1 }); } public async groupBy(options: GroupByOptions): Promise<GroupByResponse> { return PerformanceLogger.withSpan('Adapter.groupBy', async() => { const queryBuilder = new SparqlQueryBuilder(); const { query: selectQuery, variableMapping } = await queryBuilder.buildGroupByQuery(options); const results = await this.queryExecutor.executeSparqlSelectAndGetData( selectQuery ); // Create reverse mapping from path to variable name const reverseMapping = Object.entries(variableMapping).reduce<Record<string, string>>((acc, [ varName, path ]) => { acc[path] = varName; return acc; }, {}); // Transform results const groupResults: GroupResult[] = results.map(result => { const group: Record<string, string | number> = {}; options.groupBy?.forEach(path => { const varName = reverseMapping[path]; if (!varName) { throw new Error(`No variable mapping found for path: ${path}`); } const { value } = result[varName]; // Try to convert to number if possible group[path] = Number.isNaN(Number(value)) ? value : Number(value); }); if (options.dateGrouping) { const dateGroupVarName = reverseMapping.dateGroup; group.dateGroup = result[dateGroupVarName].value; } const countVarName = reverseMapping.count; const entityIdsVarName = reverseMapping.entityIds; return { group, count: Number.parseInt(result[countVarName].value, 10), entityIds: result[entityIdsVarName].value.split(' ') }; }); return { results: groupResults, meta: { totalCount: groupResults.reduce((sum, curr) => sum + curr.count, 0), dateRange: options.dateRange, groupings: options.groupBy || [] } }; }, { options }); } public async update(id: string, attributes: Partial<Entity>): Promise<void>; public async update(ids: string[], attributes: Partial<Entity>): Promise<void>; public async update(idOrIds: string | string[], attributes: Partial<Entity>): Promise<void> { return PerformanceLogger.withSpan('Adapter.update', async() => { const queryBuilder = new SparqlUpdateBuilder({ setTimestamps: this.setTimestamps }); const query = queryBuilder.buildPartialUpdate(idOrIds, attributes); await this.queryExecutor.executeSparqlUpdate(query); }, { idCount: Array.isArray(idOrIds) ? idOrIds.length : 1 }); } public async delete(id: string): Promise<void>; public async delete(ids: string[]): Promise<void>; public async delete(idOrIds: string | string[]): Promise<void> { return PerformanceLogger.withSpan('Adapter.delete', async() => { const queryBuilder = new SparqlUpdateBuilder(); const query = queryBuilder.buildDeleteById(idOrIds); await this.queryExecutor.executeSparqlUpdate(query); }, { idCount: Array.isArray(idOrIds) ? idOrIds.length : 1 }); } public async destroy(entity: Entity): Promise<Entity>; public async destroy(entities: Entity[]): Promise<Entity[]>; public async destroy(entityOrEntities: Entity | Entity[]): Promise<Entity | Entity[]> { return PerformanceLogger.withSpan('Adapter.destroy', async() => { const queryBuilder = new SparqlUpdateBuilder(); const query = queryBuilder.buildDelete(entityOrEntities); await this.queryExecutor.executeSparqlUpdate(query); return entityOrEntities; }, { entityCount: Array.isArray(entityOrEntities) ? entityOrEntities.length : 1 }); } public async destroyAll(): Promise<void> { return PerformanceLogger.withSpan('Adapter.destroyAll', async() => { const queryBuilder = new SparqlUpdateBuilder(); const query = queryBuilder.buildDeleteAll(); await this.queryExecutor.executeSparqlUpdate(query); }); } }