UNPKG

wikibase-cli

Version:

A command-line interface to Wikibase

141 lines (127 loc) 4.94 kB
import { grey } from 'tiny-chalk' import { simplifySparqlResults, minimizeSimplifiedSparqlResults, buildBlazeGraphSparqlQueryUrl, buildQLeverSparqlQueryUrl } from 'wikibase-sdk' import errors_ from '#lib/errors' import { exitOnMissingSparqlEndpoint } from '#lib/exit_on_missing' import { outputFactory } from '#lib/output' import program from '#lib/program' import { customGet, post } from '#lib/request' export async function makeSparqlQuery (sparql, format = 'json') { const { sparqlEndpoint } = program // JIT require to be sure program.process was already called // and program.sparqlEndpoint is set exitOnMissingSparqlEndpoint(sparqlEndpoint) if (localFormatsDerivedFromJSON.includes(format)) format = 'json' const engine = getCustomSparqlEngine(sparqlEndpoint) try { if (engine === 'qlever') { return await makeQleverSparqlQuery(sparqlEndpoint, sparql, format) } else { return await makeBlazeGraphSparqlQuery(sparql, format) } } catch (err) { errors_.exit(err) } } async function makeBlazeGraphSparqlQuery (sparql, format) { const { sparqlEndpoint } = program const url = buildBlazeGraphSparqlQueryUrl(sparqlEndpoint, sparql, format) const results = await makeBlazeGraphRequest(url, format) return parseResults(results, format) } async function makeBlazeGraphRequest (url, format) { const output = outputFactory(program) const acceptHeader = formatAcceptHeader[format] if (!acceptHeader) throw new Error(`unknown format: ${format}`) // Avoid making a POST request when not necessary as those aren't cached // see https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#SPARQL_endpoint if (url.length < 5000) { output(`${grey('Generated get query:')} ${url}`, true, true) return customGet({ url, headers: { Accept: formatAcceptHeader[format], }, parseResponseAsJson: format === 'json', }) } else { const [ postUrl, urlencodedSparql ] = url.split('?') output(`${grey('Generated post body:')} ${urlencodedSparql}`, true, true) return post({ url: postUrl, body: new URLSearchParams(urlencodedSparql).get('query'), headers: { 'Content-type': 'application/sparql-query', Accept: formatAcceptHeader[format], }, parseResponseAsJson: format === 'json', }) } } const localFormatsDerivedFromJSON = [ 'table', 'inline', ] // Source: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Supported_formats const formatAcceptHeader = { xml: 'application/sparql-results+xml', json: 'application/sparql-results+json', tsv: 'text/tab-separated-values', csv: 'text/csv', binrdf: 'application/x-binary-rdf-results-table', } function parseResults (results, format) { const simplifyOption = !program.raw const { index: indexAttribute, sparqlEndpoint } = program try { if (format === 'json' && simplifyOption) { results = minimizeSimplifiedSparqlResults(simplifySparqlResults(results)) if (indexAttribute) results = indexBy(results, indexAttribute) } if (format === 'csv') { results = results.replace(/\r\n/g, '\n') } return results } catch (err) { if (err.name === 'SyntaxError' && looksLikeHtml(results)) { if (sparqlEndpoint === 'https://commons-query.wikimedia.org/sparql') { throw new Error(`${sparqlEndpoint} returned an invalid response (HTML), probably due to its restrictive access policy, see https://commons.wikimedia.org/wiki/Commons:SPARQL_query_service/API_endpoint`) } else { throw new Error(`Expected a JSON response, got HTML: Are you sure you provided a valid SPARQL endpoint? Example of a valid SPARQL endpoint: https://query.wikidata.org/sparql Provided SPARQL endpoint: ${sparqlEndpoint} `) } } else { err.context = { results } throw err } } } const indexBy = (array, attribute) => { const index = {} array.forEach(obj => { let key = obj[attribute] delete obj[attribute] if (typeof key === 'object' && key.value) key = key.value // Not setting the obj as direct value, as several obj might share the same key if (!index[key]) index[key] = [ obj ] else index[key].push(obj) }) return index } function looksLikeHtml (results) { return typeof results === 'string' && /(DOCTYPE html|<html)/.test(results) } function getCustomSparqlEngine (sparqlEndpoint) { const { host, pathname } = new URL(sparqlEndpoint) if (host.includes('qlever') || pathname.startsWith('/api/')) { return 'qlever' } } async function makeQleverSparqlQuery (sparqlEndpoint, sparql, format) { const output = outputFactory(program) const url = buildQLeverSparqlQueryUrl(sparqlEndpoint, sparql, format) output(`${grey('Generated get query:')} ${url}`, true, true) const results = await customGet({ url, parseResponseAsJson: format === 'json' }) return parseResults(results, format) }