UNPKG

freebase

Version:

a very masculine way to work with data from freebase.com

619 lines (567 loc) 22.6 kB
//By Spencer Kelly (@spencermountain) //https://github.com/spencermountain/Freebase-nodejs //if nodejs, load these modules, otherwise assume they're included if (typeof module !== 'undefined' && module.exports) { var fns = require('./helpers/helpers'); } var freebase = (function() { "use strict"; var freebase = {}; //////////// /// to use mqlwrite, generate a access token by running 'node ./mqlwrite/create_access_token.js', and paste it in here ////////// freebase.access_token = "" /////////// freebase.globals = { host: 'https://www.googleapis.com/freebase/v1/', image_host: "https://www.googleapis.com/freebase/v1/image", geosearch: 'http://api.freebase.com/api/service/geosearch', wikipedia_host: 'http://en.wikipedia.org/w/api.php', generic_query: { id: null, name: null, mid: null, type: [] } } freebase.mqlread = function(query, options, callback) { this.doc = "interface to freebase's mql api"; this.reference = "http://wiki.freebase.com/wiki/MQL"; callback = callback || console.log; if (typeof options == "function") { callback = options; options = {}; } //flexible parameters if (!query) { return options.nodeCallback ? callback(null, {}) : callback({}) } options = options || {}; options.uniqueness_failure = options.uniqueness_failure || "soft"; options.cursor = options.cursor || ""; var url = freebase.globals.host + 'mqlread?query=' + JSON.stringify(query) + "&cursor=" + options.cursor //options object contains some cruft, but we can still splat it onto the url var params = fns.set_params(options) url+="&"+params if (options.debug) { console.log(url) } fns.http(url, options, function(result) { if (result && result.error) { console.log(JSON.stringify(result.error, null, 2)); } return options.nodeCallback ? callback(null, result) : callback(result) }) } // freebase.mqlread([{id:"/en/radiohead",name:null}]) freebase.search = function(q, options, callback) { this.doc = "regular search api"; this.reference = "http://wiki.freebase.com/wiki/ApiSearch"; var ps = fns.settle_params(arguments, freebase.search, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } if (ps.is_id) { return freebase.lookup_id(ps.q, ps.options, function(r) { ps.callback([r]) }); } //if its a url if (ps.url) { return freebase.url_lookup(ps.q, ps.options, function(result) { if (result && result.result && result.result[0]) { return ps.callback(result.result); } return ps.callback([]) }) } //if its an id if (ps.is_id) { ps.options.limit = 1; return freebase.lookup_id(ps.q, ps.options, ps.callback) } ps.options.query = encodeURIComponent(ps.q); //the options object has some cruft to remove delete ps.options.property delete ps.options.strict var params = fns.set_params(ps.options) var url = freebase.globals.host + 'search/?' + params; if (ps.options.type == "/type/type" || ps.options.type == "/type/property") { url += "&scoring=schema&stemmed=true" } if(ps.options.debug){ console.log(url) } fns.http(url, ps.options, function(result) { if (!result || !result.result || !result.result[0]) { if (result && result.error) { console.log(JSON.stringify(result.error, null, 2)); } return ps.callback([]) } return ps.callback(result.result) }) } // freebase.search("bill murray") // freebase.search("/m/01sh40") // freebase.search("/en/radiohead") freebase.lookup = function(q, options, callback) { this.doc = "freebase search with filters to ensure only a confident, unambiguous result"; this.reference = "http://wiki.freebase.com/wiki/ApiSearch" var ps = fns.settle_params(arguments, freebase.lookup, { type: "/common/topic", strict: true }); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } //if its a url if (ps.url) { return freebase.url_lookup(ps.q, ps.options, ps.callback) } //if its an id if (ps.is_id) { ps.options.limit = 1; return freebase.lookup_id(ps.q, ps.options, ps.callback) } //craft the url var strength = ps.options.strength || "full"; if (!ps.options.strict) { strength = "word" } var filter= encodeURIComponent('(any name{' + strength + '}:"' + ps.q + '" alias{' + strength + '}:"' + ps.q + '")'); var output= ps.options.output || "(type description:wikipedia)" var url = freebase.globals.host + 'search?limit=2&lang=en&type=' + ps.options.type + '&filter='+filter+'&output='+output; if (ps.options.type == "/type/type" || ps.options.type == "/type/property") { url += "&scoring=schema&stemmed=true" } if (ps.options.debug) { console.log(url) } return fns.http(url, ps.options, function(result) { if (!result || !result.result || !result.result[0]) { return ps.callback({}) } //filter-out shit results result = result.result || [] result[0] = result[0] || {} result[1] = result[1] || {} //kill low-relevance if (!result[0].score && result[0].score < 30) { return ps.callback({}) } //kill if 2nd result is also good if (((result[0].score || 0) * 0.7) < (result[1].score || 0)) { return ps.callback({}) } //kill if types are crap var types= ((result[0].output.type||{})["/type/object/type"]||[]).map(function(o){return o.id}) var kill_list = ["/music/track", "/music/release_track", "/tv/tv_episode", "/music/recording", "/book/book_edition"] for(var i=0; i<=types.length; i++){ if (fns.isin(types[i], kill_list)) { return ps.callback({}) } } return ps.callback(result[0]) }) } // freebase.lookup(["/en/radiohead", "http://myspace.com/u2"]) // freebase.lookup("/m/01sh40") //freebase.search("/en/radiohead") // freebase.lookup("pulp fiction") // freebase.lookup('australia',{type:"/location/location", debug:true}) freebase.lookup_id = function(q, options, callback) { this.doc = "generic info for an id"; var ps = fns.settle_params(arguments, freebase.lookup, { type: "/common/topic" }); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } var query = fns.clone(freebase.globals.generic_query); query.id = ps.q; freebase.mqlread([query], options, function(r) { r = r.result || [] return ps.callback(r[0] || {}) }) } // freebase.lookup_id('/en/radiohead') // freebase.lookup_id('/m/07jnt') //************* //slightly different lookup when its a url freebase.url_lookup = function(q, options, callback) { this.doc = "freebase search tuned for looking up a url"; this.reference = "http://wiki.freebase.com/wiki/ApiSearch" var ps = fns.settle_params(arguments, freebase.url_lookup, { type: "/common/topic", strict: true }); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } var output = fns.clone(freebase.globals.generic_query); var url = freebase.globals.host + 'search?type=/common/topic&limit=1&query=' + encodeURIComponent(ps.q); url += "&mql_output=" + encodeURIComponent(JSON.stringify(output)); if (ps.options.debug) { console.log(url) } fns.http(url, ps.options, function(result) { if (!result || !result.result) { return ps.callback({}) } var r = result.result || [] return ps.callback(r[0]) }) } // freebase.url_lookup("http://myspace.com/u2") freebase.get_id = function(q, options, callback) { this.doc = "like freebase.lookup but satisfied with an id" this.reference = "http://wiki.freebase.com/wiki/ApiSearch" var ps = fns.settle_params(arguments, freebase.get_id, { type: "/common/topic" }); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } //is an id if (!ps.q || (ps.q.match(/\/.{1,32}\/.{3}/) != null)) { return ps.callback({ id: ps.q }) } //is a normal search freebase.lookup(ps.q, ps.options, function(result) { if (!result) { return ps.callback({}) } if (ps.options.type == "/type/type") { result.mid = result.id; return ps.callback(result) } if (result.mid) { result.id = result.id || result.mid; return ps.callback(result) } return ps.callback({}) }) } //freebase.get_id("/en/radiohead") freebase.topic = function(q, options, callback) { this.doc = "topic api" this.reference = "http://wiki.freebase.com/wiki/Topic_API" var ps = fns.settle_params(arguments, freebase.topic, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } freebase.get_id(ps.q, ps.options, function(topic) { var id = topic.id; if (!id) { return ps.callback({}) } ps.options.filter = ps.options.filter || 'all' var url = freebase.globals.host + 'topic' + id + '?' + fns.set_params(ps.options) if (ps.options.debug) { console.log(url) } fns.http(url, ps.options, function(result) { return ps.callback(result) }) }) } // freebase.topic("toronto", {filter:"allproperties"}) freebase.paginate = function(query, options, callback) { this.doc = "get all of the results to your query"; this.reference = "http://wiki.freebase.com/wiki/MQL"; if (typeof options == "function") { callback = options; options = {}; } //flexible parameter options = options || {} callback = callback || console.log options.max = options.max || 99999999; var all = []; //recursive mqlread until cursor is false, or maximum reached var iterate = function(cursor) { options.cursor = cursor || "" freebase.mqlread(query, options, function(result) { if (!result || !result.result) { return options.nodeCallback ? callback(null, all) : callback(all) } all = all.concat(result.result); if (result.cursor && (!options.max || all.length < options.max)) { iterate(result.cursor) } else { return options.nodeCallback ? callback(null, all) : callback(all) } }) } iterate('') } // freebase.paginate([{"type":"/astronomy/moon","name":null, limit:2}],{max:13}) freebase.wikipedia_page = function(q, options, callback) { this.doc = "get a url for wikipedia based on this topic" var ps = fns.settle_params(arguments, freebase.wikipedia, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } freebase.get_id(ps.q, ps.options, function(topic) { if (!topic || !topic.id) { return ps.callback("") } var query = [{ "id": topic.id, "name": null, "key": { "namespace": "/wikipedia/en_title", "value": null } }] freebase.mqlread(query, ps.options, function(result) { if (!result || !result.result || !result.result[0]) { return ps.callback('') } return ps.callback('http://en.wikipedia.org/wiki/' + fns.mql_unencode(result.result[0].key.value)) }) }) } // freebase.wikipedia_page('toronto') freebase.dbpedia_page = function(q, options, callback) { this.doc = "get a url for dbpedia based on this topic" var ps = fns.settle_params(arguments, freebase.dbpedia, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } freebase.get_id(ps.q, ps.options, function(topic) { if (!topic || !topic.id) { return options.nodeCallback ? ps.callback(null, "") : ps.callback("") } var query = [{ "id": topic.id, "name": null, "key": { "namespace": "/wikipedia/en_title", "value": null } }] freebase.mqlread(query, ps.options, function(result) { if (!result || !result.result || !result.result[0]) { return options.nodeCallback ? ps.callback(null, {}) : ps.callback({}) } var key = fns.mql_unencode(result.result[0].key.value) var obj = { html: 'http://dbpedia.org/page/' + key, json: 'http://dbpedia.org/data/' + key + '.json', } return options.nodeCallback ? ps.callback(null, obj) : ps.callback(obj) }) }) } // freebase.dbpedia_page('toronto') freebase.mql_encode = function(s) { this.doc = "quote a unicode string to turn it into a valid mql /type/key/value" if (!s) { return '' } s = s.replace(/ /, ' '); s = s.replace(/^\s+|\s+$/, ''); s = s.replace(/ /g, '_'); var mqlkey_start = 'A-Za-z0-9'; var mqlkey_char = 'A-Za-z0-9_-'; var MQLKEY_VALID = new RegExp('^[' + mqlkey_start + '][' + mqlkey_char + ']*$'); var MQLKEY_CHAR_MUSTQUOTE = new RegExp('([^' + mqlkey_char + '])', 'g'); if (MQLKEY_VALID.exec(s)) // fastpath return s; var convert = function(a, b) { var hex = b.charCodeAt(0).toString(16).toUpperCase(); if (hex.length == 2) hex = '00' + hex; if (hex.length == 3) hex = '0' + hex; return '$' + hex; }; var x = s.replace(MQLKEY_CHAR_MUSTQUOTE, convert); if (x.charAt(0) == '-' || x.charAt(0) == '_') { x = convert(x, x.charAt(0)) + x.substr(1); } return x; } freebase.rdf = function(q, options, callback) { this.doc = "RDF api" this.reference = "http://wiki.freebase.com/wiki/RDF" var ps = fns.settle_params(arguments, freebase.topic, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return options.nodeCallback ? ps.callback(null, {}) : ps.callback({}) } freebase.get_id(ps.q, ps.options, function(topic) { var id = topic.id; if (!id) { return options.nodeCallback ? ps.callback(null, {}) : ps.callback({}) } ps.options.filter = ps.options.filter || 'all' var url = freebase.globals.host + "rdf" + id; fns.softget(url, ps.options, function(result) { result= result||'' return options.nodeCallback ? ps.callback(null, result) : ps.callback(result) }) }) } // freebase.rdf("toronto") freebase.description = function(q, options, callback) { this.doc = "get a text blurb from freebase"; this.reference = "http://wiki.freebase.com/wiki/ApiText" var ps = fns.settle_params(arguments, freebase.description, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } freebase.get_id(ps.q, ps.options, function(topic) { if (!topic || !topic.id) { return ps.callback("") } var url = freebase.globals.host + 'text' + topic.id; if (ps.options.debug) { console.log(url) } fns.http(url, ps.options, function(result) { if (!result.result) { return ps.callback('') } return ps.callback(result.result) }) }); } // freebase.description("tunisia") // freebase.description("mike myers") freebase.image = function(q, options, callback) { this.doc = "get a url for image href of on this topic" this.reference = "http://wiki.freebase.com/wiki/ApiImage" var ps = fns.settle_params(arguments, freebase.image, { maxheight: 250, maxwidth: 250, errorid: "/m/0djw4wd" }); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.callback({}); } freebase.get_id(ps.q, ps.options, function(topic) { if (!topic || !topic.id) { return ps.callback("") } var query = [{ "id": topic.id, "name": null, "/common/topic/image": [{ "id": null }] }] freebase.mqlread(query, ps.options, function(result) { if (!result || !result.result || !result.result[0] || !result.result[0]["/common/topic/image"][0]) { return ps.callback(''); } var url = freebase.globals.image_host + result.result[0]["/common/topic/image"][0].id; delete ps.options.strict delete ps.options.cursor delete ps.options.uniqueness_failure var params = fns.set_params(ps.options); url += '?' + params; return ps.callback(url) }) }) } // freebase.image('toronto',{type:"/location/citytown"}) freebase.notable = function(q, options, callback) { this.doc = "get a topic's notable type" var ps = fns.settle_params(arguments, freebase.notable, {}); if (ps.array) { return fns.doit_async(ps); } if (!ps.valid) { return ps.options.nodeCallback ? ps.callback(null, {}) : ps.callback({}) } freebase.topic(ps.q, { filter: "/common/topic/notable_types" }, function(result) { if (!result || !result.property || !result.property['/common/topic/notable_types']) { return ps.options.nodeCallback ? ps.callback(null, {}) : ps.callback({}) } var notable = result.property['/common/topic/notable_types'] || { values: [] }; notable.values[0].name = notable.values[0].text; var result=notable.values[0] return ps.options.nodeCallback ? ps.callback(null, result) : ps.callback(result) }); } // freebase.notable("toronto maple leafs", {nodeCallback:true}) freebase.documentation = function(f, options, callback) { Object.keys(freebase).filter(function(v) { return v != "documentation" && v != "access_token" && v != "test_writes" && v != "globals" }).forEach(function(k) { var x = new freebase[k]("", {}, function() {}) console.log("* **" + k + "**\n -" + x.doc) }) } // freebase.documentation() var aliases = { mqlread: ["query", "mql_read"], topic: ["topic_api", "all_data", "data", "everything", "get_data"], paginate: ["continue", "all", "each"], same_as_links: ["sameas", "sameaslinks", "links", "external_links", "weblinks"], translate: ["translate_to", "i8n", "translation"], image: ["picture", "get_image"], description: ["blurb", "get_blurb", "blurb_api", "text"], notable: ["notable_type", "notable_for", "notable_as", "main_type", "type"], place_data: ["place", "place_info", "location_info", "location"], incoming: ["incoming_links"], outgoing: ["outgoing_links"], related: ["related_topics", "similar"], geolocation: ["geo", "geocoordinates", "geo_location", "lat_lng", "location"], nearby: ["near", "close_to"], inside: ["inside_of", "within", "contained_by", "contains"], mql_encode: ["encode", "escape"] } Object.keys(aliases).forEach(function(k) { if (freebase[k]) { aliases[k].forEach(function(a) { freebase[a] = freebase[k] }) } }) // export for Node.js if (typeof module !== 'undefined' && module.exports) { module.exports = freebase; } return freebase; })() // q=[{id:"/en/radiohead",name:null}] // freebase.mqlread(q, {cost:true, html_escape:false, debug:true,dinosaur:"yessir"}, function(r){console.log(r)}) // freebase.search("tony hawk", {debug:true, dinosaur:"yessir"}, function(r){console.log(r)})