@jahed/sparql-engine

// SPDX-License-Identifier: MIT import { intersectionWith, isUndefined, sum, zip } from "lodash-es"; import type { VariableTerm } from "sparqljs"; import type { EngineTripleValue } from "../../types.ts"; import { asJS, createFloat, literalIsNumeric, termIsLiteral, } from "../../utils/rdf.ts"; type Term = EngineTripleValue; type TermRows = { [key: string]: Term[] }; function precision(expected: Term[], predicted: Term[]): number { const intersection = intersectionWith(expected, predicted, (x, y) => x.equals(y) ); return intersection.length / predicted.length; } function recall(expected: Term[], predicted: Term[]): number { const intersection = intersectionWith(expected, predicted, (x, y) => x.equals(y) ); return intersection.length / expected.length; } /** * Implementation of Non standard SPARQL aggregations offered by the framework * All arguments are pre-compiled from string to js terms */ export default { /* Accuracy metrics, often used in machine learning */ // Accuracy: computes percentage of times two variables have different values // In regular SPARQL, equivalent to sum(if(?a = ?b, 1, 0)) / count(*) "https://callidon.github.io/sparql-engine/aggregates#accuracy": function ( { value: a }: VariableTerm, { value: b }: VariableTerm, rows: TermRows ): Term { const tests = zip(rows[a], rows[b]).map((v) => { if (isUndefined(v[0]) || isUndefined(v[1])) { return 0; } return v[0].equals(v[1]) ? 1 : 0; }); return createFloat(sum(tests) / tests.length); }, // Geometric mean (https://en.wikipedia.org/wiki/Geometric_mean) // "The geometric mean is a mean or average, which indicates the central tendency or typical value of a set of // numbers by using the product of their values (as opposed to the arithmetic mean which uses their sum)." "https://callidon.github.io/sparql-engine/aggregates#gmean": function ( { value: variable }: VariableTerm, rows: TermRows ): Term { if (variable in rows) { const count = rows[variable].length; const product = rows[variable] .map((term) => { if (termIsLiteral(term) && literalIsNumeric(term)) { return asJS(term.value, term.datatype.value); } return 1; }) .reduce((acc, value) => acc * value, 1); return createFloat(Math.pow(product, 1 / count)); } throw new SyntaxError( `SPARQL aggregation error: the variable ${variable} cannot be found in the groups ${rows}` ); }, // Mean Square error: computes the average of the squares of the errors, that is // the average squared difference between the estimated values and the actual value. // In regular SPARQL, equivalent to sum(?a - ?b) * (?a - ?b / count(*)) "https://callidon.github.io/sparql-engine/aggregates#mse": function ( { value: a }: VariableTerm, { value: b }: VariableTerm, rows: TermRows ): Term { const values = zip(rows[a], rows[b]).map((v) => { const expected = v[0]; const predicted = v[1]; if (isUndefined(predicted) || isUndefined(expected)) { return 0; } else if ( termIsLiteral(predicted) && termIsLiteral(expected) && literalIsNumeric(predicted) && literalIsNumeric(expected) ) { return Math.pow( asJS(expected.value, expected.datatype.value) - asJS(predicted.value, predicted.datatype.value), 2 ); } throw new SyntaxError( `SPARQL aggregation error: cannot compute mean square error between RDF Terms ${expected} and ${predicted}, as they are not numbers` ); }); return createFloat((1 / values.length) * sum(values)); }, // Root mean Square error: computes the root of the average of the squares of the errors // In regular SPARQL, equivalent to sqrt(sum(?a - ?b) * (?a - ?b / count(*))) "https://callidon.github.io/sparql-engine/aggregates#rmse": function ( { value: a }: VariableTerm, { value: b }: VariableTerm, rows: TermRows ): Term { const values = zip(rows[a], rows[b]).map((v) => { const expected = v[0]; const predicted = v[1]; if (isUndefined(predicted) || isUndefined(expected)) { return 0; } else if ( termIsLiteral(predicted) && termIsLiteral(expected) && literalIsNumeric(predicted) && literalIsNumeric(expected) ) { return Math.pow( asJS(expected.value, expected.datatype.value) - asJS(predicted.value, predicted.datatype.value), 2 ); } throw new SyntaxError( `SPARQL aggregation error: cannot compute mean square error between RDF Terms ${expected} and ${predicted}, as they are not numbers` ); }); return createFloat(Math.sqrt((1 / values.length) * sum(values))); }, // Precision: the fraction of retrieved values that are relevant to the query "https://callidon.github.io/sparql-engine/aggregates#precision": function ( { value: a }: VariableTerm, { value: b }: VariableTerm, rows: TermRows ): Term { if (!(a in rows) || !(b in rows)) { return createFloat(0); } return createFloat(precision(rows[a], rows[b])); }, // Recall: the fraction of retrieved values that are successfully retrived "https://callidon.github.io/sparql-engine/aggregates#recall": function ( { value: a }: VariableTerm, { value: b }: VariableTerm, rows: TermRows ): Term { if (!(a in rows) || !(b in rows)) { return createFloat(0); } return createFloat(recall(rows[a], rows[b])); }, // F1 score: The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. "https://callidon.github.io/sparql-engine/aggregates#f1": function ( { value: a }: VariableTerm, { value: b }: VariableTerm, rows: TermRows ): Term { if (!(a in rows) || !(b in rows)) { return createFloat(0); } const prec = precision(rows[a], rows[b]); const rec = recall(rows[a], rows[b]); return createFloat((2 * (prec * rec)) / (prec + rec)); }, };