rdflib
Version:
an RDF library for node.js. Suitable for client and server side.
518 lines (492 loc) • 15.9 kB
JavaScript
// Converting between SPARQL queries and the $rdf query API
/*
function SQuery () {
this.terms = []
return this
}
STerm.prototype.toString = STerm.val
SQuery.prototype.add = function (str) {this.terms.push()}*/
import log from './log'
import { Query } from './query'
/**
* @SPARQL: SPARQL text that is converted to a query object which is returned.
* @testMode: testing flag. Prevents loading of sources.
*/
export default function SPARQLToQuery (SPARQL, testMode, kb) {
// AJAR_ClearTable()
var variableHash = []
function makeVar (name) {
if (variableHash[name]) {
return variableHash[name]
}
var newVar = kb.variable(name)
variableHash[name] = newVar
return newVar
}
// term type functions
function isRealText (term) {
return (typeof term === 'string' && term.match(/[^ \n\t]/))
}
function isVar (term) {
return (typeof term === 'string' && term.match(/^[\?\$]/))
}
function fixSymbolBrackets (term) {
if (typeof term === 'string') {
return term.replace(/^</, '<').replace(/>$/, '>')
} else {
return term
}
}
function isSymbol (term) {
return (typeof term === 'string' && term.match(/^<[^>]*>$/))
}
function isBnode (term) {
return (typeof term === 'string' && (term.match(/^_:/) || term.match(/^$/)))
}
function isPrefix (term) {
return (typeof term === 'string' && term.match(/:$/))
}
function isPrefixedSymbol (term) {
return (typeof term === 'string' && term.match(/^:|^[^_][^:]*:/))
}
function getPrefix (term) {
var a = term.split(':')
return a[0]
}
function getSuffix (term) {
var a = term.split(':')
return a[1]
}
function removeBrackets (term) {
if (isSymbol(term)) {
return term.slice(1, term.length - 1)
} else {
return term
}
}
// takes a string and returns an array of strings and Literals in the place of literals
function parseLiterals (str) {
// var sin = (str.indexOf(/[ \n]\'/)==-1)?null:str.indexOf(/[ \n]\'/), doub = (str.indexOf(/[ \n]\"/)==-1)?null:str.indexOf(/[ \n]\"/)
var sin = (str.indexOf("'") === -1)
? null
: str.indexOf("'")
var doub = (str.indexOf('"') === -1)
? null
: str.indexOf('"')
// alert("S: "+sin+" D: "+doub)
if (!sin && !doub) {
var a = new Array(1)
a[0] = str
return a
}
var res = new Array(2)
var br
var ind
if (!sin || (doub && doub < sin)) {
br = '"'
ind = doub
} else if (!doub || (sin && sin < doub)) {
br = "'"
ind = sin
} else {
log.error('SQARQL QUERY OOPS!')
return res
}
res[0] = str.slice(0, ind)
var end = str.slice(ind + 1).indexOf(br)
if (end === -1) {
log.error('SPARQL parsing error: no matching parentheses in literal ' + str)
return str
}
// alert(str.slice(end + ind + 2).match(/^\^\^/))
var end2
if (str.slice(end + ind + 2).match(/^\^\^/)) {
end2 = str.slice(end + ind + 2).indexOf(' ')
// alert(end2)
res[1] = kb.literal(
str.slice(ind + 1, ind + 1 + end),
kb.sym(removeBrackets(
str.slice(ind + 4 + end, ind + 2 + end + end2))
)
)
// alert(res[1].datatype.uri)
res = res.concat(parseLiterals(str.slice(end + ind + 3 + end2)))
} else if (str.slice(end + ind + 2).match(/^@/)) {
end2 = str.slice(end + ind + 2).indexOf(' ')
// alert(end2)
res[1] = kb.literal(
str.slice(ind + 1, ind + 1 + end),
str.slice(ind + 3 + end, ind + 2 + end + end2), null
)
// alert(res[1].datatype.uri)
res = res.concat(
parseLiterals(str.slice(end + ind + 2 + end2))
)
} else {
res[1] = kb.literal(str.slice(ind + 1, ind + 1 + end))
log.info('Literal found: ' + res[1])
res = res.concat(parseLiterals(str.slice(end + ind + 2))) // finds any other literals
}
return res
}
function spaceDelimit (str) {
str = str.replace(/\(/g, ' ( ')
.replace(/\)/g, ' ) ')
.replace(/</g, ' <')
.replace(/>/g, '> ')
.replace(/{/g, ' { ')
.replace(/}/g, ' } ')
.replace(/[\t\n\r]/g, ' ')
.replace(/; /g, ' ; ')
.replace(/\. /g, ' . ')
.replace(/, /g, ' , ')
log.info('New str into spaceDelimit: \n' + str)
var res = []
var br = str.split(' ')
for (var x in br) {
if (isRealText(br[x])) {
res = res.concat(br[x])
}
}
return res
}
function replaceKeywords (input) {
var strarr = input
for (var x = 0; x < strarr.length; x++) {
if (strarr[x] === 'a') {
strarr[x] = '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>'
}
if (strarr[x] === 'is' && strarr[x + 2] === 'of') {
strarr.splice(x, 1)
strarr.splice(x + 1, 1)
var s = strarr[x - 1]
strarr[x - 1] = strarr[x + 1]
strarr[x + 1] = s
}
}
return strarr
}
function toTerms (input) {
var res = []
for (var x = 0; x < input.length; x++) {
if (typeof input[x] !== 'string') {
res[x] = input[x]
continue
}
input[x] = fixSymbolBrackets(input[x])
if (isVar(input[x])) {
res[x] = makeVar(input[x].slice(1))
} else if (isBnode(input[x])) {
log.info(input[x] + ' was identified as a bnode.')
res[x] = kb.bnode()
} else if (isSymbol(input[x])) {
log.info(input[x] + ' was identified as a symbol.')
res[x] = kb.sym(removeBrackets(input[x]))
} else if (isPrefixedSymbol(input[x])) {
log.info(input[x] + ' was identified as a prefixed symbol')
if (prefixes[getPrefix(input[x])]) {
res[x] = kb.sym(input[x] = prefixes[getPrefix(input[x])] +
getSuffix(input[x]))
} else {
log.error('SPARQL error: ' + input[x] + ' with prefix ' +
getPrefix(input[x]) + ' does not have a correct prefix entry.')
res[x] = input[x]
}
} else {
res[x] = input[x]
}
}
return res
}
function tokenize (str) {
var token1 = parseLiterals(str)
var token2 = []
for (var x in token1) {
if (typeof token1[x] === 'string') {
token2 = token2.concat(spaceDelimit(token1[x]))
} else {
token2 = token2.concat(token1[x])
}
}
token2 = replaceKeywords(token2)
log.info('SPARQL Tokens: ' + token2)
return token2
}
// CASE-INSENSITIVE
function arrayIndexOf (str, arr) {
for (var i = 0; i < arr.length; i++) {
if (typeof arr[i] !== 'string') {
continue
}
if (arr[i].toLowerCase() === str.toLowerCase()) {
return i
}
}
// log.warn("No instance of "+str+" in array "+arr)
return null
}
// CASE-INSENSITIVE
function arrayIndicesOf (str, arr) {
var ind = []
for (var i = 0; i < arr.length; i++) {
if (typeof arr[i] !== 'string') {
continue
}
if (arr[i].toLowerCase() === str.toLowerCase()) {
ind.push(i)
}
}
return ind
}
function setVars (input, query) {
log.info('SPARQL vars: ' + input)
for (var x in input) {
if (isVar(input[x])) {
log.info('Added ' + input[x] + ' to query variables from SPARQL')
var v = makeVar(input[x].slice(1))
query.vars.push(v)
v.label = input[x].slice(1)
} else {
log.warn('Incorrect SPARQL variable in SELECT: ' + input[x])
}
}
}
function getPrefixDeclarations (input) {
var prefInd = arrayIndicesOf('PREFIX', input)
var res = []
for (var i in prefInd) {
var a = input[prefInd[i] + 1]
var b = input[prefInd[i] + 2]
if (!isPrefix(a)) {
log.error('Invalid SPARQL prefix: ' + a)
} else if (!isSymbol(b)) {
log.error('Invalid SPARQL symbol: ' + b)
} else {
log.info('Prefix found: ' + a + ' -> ' + b)
var pref = getPrefix(a)
var symbol = removeBrackets(b)
res[pref] = symbol
}
}
return res
}
function getMatchingBracket (arr, open, close) {
log.info('Looking for a close bracket of type ' + close + ' in ' + arr)
var index = 0
for (var i = 0; i < arr.length; i++) {
if (arr[i] === open) {
index++
}
if (arr[i] === close) {
index--
}
if (index < 0) {
return i
}
}
log.error('Statement had no close parenthesis in SPARQL query')
return 0
}
function constraintGreaterThan (value) {
this.describe = function (varstr) {
return varstr + ' > ' + value.toNT()
}
this.test = function (term) {
if (term.value.match(/[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?/)) {
return (parseFloat(term.value) > parseFloat(value))
} else {
return (term.toNT() > value.toNT())
}
}
return this
}
function constraintLessThan (value) { // this is not the recommended usage. Should only work on literal, numeric, dateTime
this.describe = function (varstr) {
return varstr + ' < ' + value.toNT()
}
this.test = function (term) {
// this.describe = function (varstr) { return varstr + " < "+value }
if (term.value.match(/[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?/)) {
return (parseFloat(term.value) < parseFloat(value))
} else {
return (term.toNT() < value.toNT())
}
}
return this
}
// This should only work on literals but doesn't.
function ConstraintEqualTo (value) {
this.describe = function (varstr) {
return varstr + ' = ' + value.toNT()
}
this.test = function (term) {
return value.equals(term)
}
return this
}
// value must be a literal
function ConstraintRegexp (value) {
this.describe = function (varstr) {
return "REGEXP( '" + value + "' , " + varstr + ' )'
}
this.test = function (term) {
var str = value
// str = str.replace(/^//,"").replace(//$/,"")
var rg = new RegExp(str)
if (term.value) {
return rg.test(term.value)
} else {
return false
}
}
}
function setConstraint (input, pat) {
if (input.length === 3 && input[0].termType === 'Variable' &&
(input[2].termType === 'NamedNode' || input[2].termType === 'Literal')) {
if (input[1] === '=') {
log.debug('Constraint added: ' + input)
pat.constraints[input[0]] = new ConstraintEqualTo(input[2])
} else if (input[1] === '>') {
log.debug('Constraint added: ' + input)
pat.constraints[input[0]] = new ConstraintEqualTo(input[2])
} else if (input[1] === '<') {
log.debug('Constraint added: ' + input)
pat.constraints[input[0]] = new ConstraintEqualTo(input[2])
} else {
log.warn("I don't know how to handle the constraint: " + input)
}
} else if (input.length === 6 && typeof input[0] === 'string' &&
input[0].toLowerCase() === 'regexp' &&
input[1] === '(' && input[5] === ')' && input[3] === ',' &&
input[4].termType === 'Variable' && input[2].termType === 'Literal') {
log.debug('Constraint added: ' + input)
pat.constraints[input[4]] = new ConstraintRegexp(input[2].value)
}
// log.warn("I don't know how to handle the constraint: "+input)
// alert("length: "+input.length+" input 0 type: "+input[0].termType+" input 1: "+input[1]+" input[2] type: "+input[2].termType)
}
function setOptional (terms, pat) {
log.debug('Optional query: ' + terms + ' not yet implemented.')
var opt = kb.formula()
setWhere(terms, opt)
pat.optional.push(opt)
}
function setWhere (input, pat) {
var terms = toTerms(input)
var end
log.debug('WHERE: ' + terms)
var opt
// var opt = arrayIndicesOf("OPTIONAL",terms)
while (arrayIndexOf('OPTIONAL', terms)) {
opt = arrayIndexOf('OPTIONAL', terms)
log.debug('OPT: ' + opt + ' ' + terms[opt] + ' in ' + terms)
if (terms[opt + 1] !== '{') {
log.warn('Bad optional opening bracket in word ' + opt)
}
end = getMatchingBracket(terms.slice(opt + 2), '{', '}')
if (end === -1) {
log.error('No matching bracket in word ' + opt)
} else {
setOptional(terms.slice(opt + 2, opt + 2 + end), pat)
// alert(pat.statements[0].toNT())
opt = arrayIndexOf('OPTIONAL', terms)
end = getMatchingBracket(terms.slice(opt + 2), '{', '}')
terms.splice(opt, end + 3)
}
}
log.debug('WHERE after optionals: ' + terms)
while (arrayIndexOf('FILTER', terms)) {
var filt = arrayIndexOf('FILTER', terms)
if (terms[filt + 1] !== '(') {
log.warn('Bad filter opening bracket in word ' + filt)
}
end = getMatchingBracket(terms.slice(filt + 2), '(', ')')
if (end === -1) {
log.error('No matching bracket in word ' + filt)
} else {
setConstraint(terms.slice(filt + 2, filt + 2 + end), pat)
filt = arrayIndexOf('FILTER', terms)
end = getMatchingBracket(terms.slice(filt + 2), '(', ')')
terms.splice(filt, end + 3)
}
}
log.debug('WHERE after filters and optionals: ' + terms)
extractStatements(terms, pat)
}
function extractStatements (terms, formula) {
var arrayZero = new Array(1)
arrayZero[0] = -1 // this is just to add the beginning of the where to the periods index.
var per = arrayZero.concat(arrayIndicesOf('.', terms))
var stat = []
for (var x = 0; x < per.length - 1; x++) {
stat[x] = terms.slice(per[x] + 1, per[x + 1])
}
// Now it's in an array of statements
for (x in stat) { // THIS MUST BE CHANGED FOR COMMA, SEMICOLON
log.info('s+p+o ' + x + ' = ' + stat[x])
var subj = stat[x][0]
stat[x].splice(0, 1)
var sem = arrayZero.concat(arrayIndicesOf(';', stat[x]))
sem.push(stat[x].length)
var stat2 = []
for (var y = 0; y < sem.length - 1; y++) {
stat2[y] = stat[x].slice(sem[y] + 1, sem[y + 1])
}
for (x in stat2) {
log.info('p+o ' + x + ' = ' + stat[x])
var pred = stat2[x][0]
stat2[x].splice(0, 1)
var com = arrayZero.concat(arrayIndicesOf(',', stat2[x]))
com.push(stat2[x].length)
var stat3 = []
for (y = 0; y < com.length - 1; y++) {
stat3[y] = stat2[x].slice(com[y] + 1, com[y + 1])
}
for (x in stat3) {
var obj = stat3[x][0]
log.info('Subj=' + subj + ' Pred=' + pred + ' Obj=' + obj)
formula.add(subj, pred, obj)
}
}
}
}
// ******************************* Body of SPARQLToQuery ***************************//
log.info('SPARQL input: \n' + SPARQL)
var q = new Query()
var sp = tokenize(SPARQL) // first tokenize everything
var prefixes = getPrefixDeclarations(sp)
if (!prefixes.rdf) {
prefixes.rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
}
if (!prefixes.rdfs) {
prefixes.rdfs = 'http://www.w3.org/2000/01/rdf-schema#'
}
var selectLoc = arrayIndexOf('SELECT', sp)
var whereLoc = arrayIndexOf('WHERE', sp)
if (selectLoc < 0 || whereLoc < 0 || selectLoc > whereLoc) {
log.error('Invalid or nonexistent SELECT and WHERE tags in SPARQL query')
return false
}
setVars(sp.slice(selectLoc + 1, whereLoc), q)
setWhere(sp.slice(whereLoc + 2, sp.length - 1), q.pat)
if (testMode) {
return q
}
for (var x in q.pat.statements) {
var st = q.pat.statements[x]
if (st.subject.termType === 'NamedNode') {
if (kb.fetcher) {
kb.fetcher.lookUpThing(st.subject, 'sparql:' + st.subject)
}
}
if (st.object.termType === 'NamedNode') {
if (kb.fetcher) {
kb.fetcher.lookUpThing(st.object, 'sparql:' + st.object)
}
}
}
// alert(q.pat)
return q
// checkVars()
// *******************************************************************//
}