@jahed/sparql-engine
Version:
SPARQL query engine for servers and web browsers.
180 lines (167 loc) • 6.23 kB
text/typescript
// SPDX-License-Identifier: MIT
import { intersectionWith, isUndefined, sum, zip } from "lodash-es";
import type { VariableTerm } from "sparqljs";
import type { EngineTripleValue } from "../../types.ts";
import {
asJS,
createFloat,
literalIsNumeric,
termIsLiteral,
} from "../../utils/rdf.ts";
type Term = EngineTripleValue;
type TermRows = { [key: string]: Term[] };
function precision(expected: Term[], predicted: Term[]): number {
const intersection = intersectionWith(expected, predicted, (x, y) =>
x.equals(y)
);
return intersection.length / predicted.length;
}
function recall(expected: Term[], predicted: Term[]): number {
const intersection = intersectionWith(expected, predicted, (x, y) =>
x.equals(y)
);
return intersection.length / expected.length;
}
/**
* Implementation of Non standard SPARQL aggregations offered by the framework
* All arguments are pre-compiled from string to js terms
*/
export default {
/*
Accuracy metrics, often used in machine learning
*/
// Accuracy: computes percentage of times two variables have different values
// In regular SPARQL, equivalent to sum(if(?a = ?b, 1, 0)) / count(*)
"https://callidon.github.io/sparql-engine/aggregates#accuracy": function (
{ value: a }: VariableTerm,
{ value: b }: VariableTerm,
rows: TermRows
): Term {
const tests = zip(rows[a], rows[b]).map((v) => {
if (isUndefined(v[0]) || isUndefined(v[1])) {
return 0;
}
return v[0].equals(v[1]) ? 1 : 0;
});
return createFloat(sum(tests) / tests.length);
},
// Geometric mean (https://en.wikipedia.org/wiki/Geometric_mean)
// "The geometric mean is a mean or average, which indicates the central tendency or typical value of a set of
// numbers by using the product of their values (as opposed to the arithmetic mean which uses their sum)."
"https://callidon.github.io/sparql-engine/aggregates#gmean": function (
{ value: variable }: VariableTerm,
rows: TermRows
): Term {
if (variable in rows) {
const count = rows[variable].length;
const product = rows[variable]
.map((term) => {
if (termIsLiteral(term) && literalIsNumeric(term)) {
return asJS(term.value, term.datatype.value);
}
return 1;
})
.reduce((acc, value) => acc * value, 1);
return createFloat(Math.pow(product, 1 / count));
}
throw new SyntaxError(
`SPARQL aggregation error: the variable ${variable} cannot be found in the groups ${rows}`
);
},
// Mean Square error: computes the average of the squares of the errors, that is
// the average squared difference between the estimated values and the actual value.
// In regular SPARQL, equivalent to sum(?a - ?b) * (?a - ?b / count(*))
"https://callidon.github.io/sparql-engine/aggregates#mse": function (
{ value: a }: VariableTerm,
{ value: b }: VariableTerm,
rows: TermRows
): Term {
const values = zip(rows[a], rows[b]).map((v) => {
const expected = v[0];
const predicted = v[1];
if (isUndefined(predicted) || isUndefined(expected)) {
return 0;
} else if (
termIsLiteral(predicted) &&
termIsLiteral(expected) &&
literalIsNumeric(predicted) &&
literalIsNumeric(expected)
) {
return Math.pow(
asJS(expected.value, expected.datatype.value) -
asJS(predicted.value, predicted.datatype.value),
2
);
}
throw new SyntaxError(
`SPARQL aggregation error: cannot compute mean square error between RDF Terms ${expected} and ${predicted}, as they are not numbers`
);
});
return createFloat((1 / values.length) * sum(values));
},
// Root mean Square error: computes the root of the average of the squares of the errors
// In regular SPARQL, equivalent to sqrt(sum(?a - ?b) * (?a - ?b / count(*)))
"https://callidon.github.io/sparql-engine/aggregates#rmse": function (
{ value: a }: VariableTerm,
{ value: b }: VariableTerm,
rows: TermRows
): Term {
const values = zip(rows[a], rows[b]).map((v) => {
const expected = v[0];
const predicted = v[1];
if (isUndefined(predicted) || isUndefined(expected)) {
return 0;
} else if (
termIsLiteral(predicted) &&
termIsLiteral(expected) &&
literalIsNumeric(predicted) &&
literalIsNumeric(expected)
) {
return Math.pow(
asJS(expected.value, expected.datatype.value) -
asJS(predicted.value, predicted.datatype.value),
2
);
}
throw new SyntaxError(
`SPARQL aggregation error: cannot compute mean square error between RDF Terms ${expected} and ${predicted}, as they are not numbers`
);
});
return createFloat(Math.sqrt((1 / values.length) * sum(values)));
},
// Precision: the fraction of retrieved values that are relevant to the query
"https://callidon.github.io/sparql-engine/aggregates#precision": function (
{ value: a }: VariableTerm,
{ value: b }: VariableTerm,
rows: TermRows
): Term {
if (!(a in rows) || !(b in rows)) {
return createFloat(0);
}
return createFloat(precision(rows[a], rows[b]));
},
// Recall: the fraction of retrieved values that are successfully retrived
"https://callidon.github.io/sparql-engine/aggregates#recall": function (
{ value: a }: VariableTerm,
{ value: b }: VariableTerm,
rows: TermRows
): Term {
if (!(a in rows) || !(b in rows)) {
return createFloat(0);
}
return createFloat(recall(rows[a], rows[b]));
},
// F1 score: The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
"https://callidon.github.io/sparql-engine/aggregates#f1": function (
{ value: a }: VariableTerm,
{ value: b }: VariableTerm,
rows: TermRows
): Term {
if (!(a in rows) || !(b in rows)) {
return createFloat(0);
}
const prec = precision(rows[a], rows[b]);
const rec = recall(rows[a], rows[b]);
return createFloat((2 * (prec * rec)) / (prec + rec));
},
};