freebase
Version:
a very masculine way to work with data from freebase.com
1,350 lines (1,285 loc) • 42.5 kB
JavaScript
//make sure we have core methods
// export for Node.js
if (typeof module !== 'undefined' && module.exports) {
var freebase = require("./core")
var fns = require('./helpers/helpers');
var data = require('./helpers/data').data;
}
if (typeof freebase == 'undefined' || !freebase || !freebase.search) {
console.warn('freebase sugar methods loaded without freebase core methods')
freebase = {}
}
//
freebase.drilldown = function(q, options, callback) {
this.doc = "get insight into the breakdown of the topics in this type, by type and quality"
var ps = fns.settle_params(arguments, freebase.drilldown, {
limit: 1000
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback([]);
}
//singularize it if its not an id
if (!ps.q.match(/\/.{1,12}\/.{3}/)) {
ps.q = singularize(ps.q);
}
//get its id
freebase.get_id(ps.q, {
type: "/type/type"
}, function(topic) {
if (!topic || !topic.id) {
return ps.callback([])
}
var query = [{
"s:type": topic.id,
"type": [],
"name": null,
"id": null,
"limit": 150,
"estimate-count": null,
"/common/topic/image": [{
"id": null,
"limit": 1,
"optional": true
}],
"key": [{
"namespace": "/wikipedia/en",
"limit": 1,
"value": null,
"optional": true
}],
"/common/topic/alias": [{
"value": null,
"limit": 1,
"optional": true
}]
}]
if (options.extend) {
for (var i in options.extend) {
query[0][i] = options.extend[i]
}
}
freebase.paginate(query, ps.options, function(result) {
var types = fns.flatten(result.map(function(v) {
return v.type
}));
types = types.filter(function(v) {
return !v.match(/\/topic$/)
})
var topk = fns.topk(types, result.length);
var aliases = result.filter(function(r) {
return r["/common/topic/alias"].length > 0
})
var images = result.filter(function(r) {
return r["/common/topic/image"].length > 0
})
var wikipedia = result.filter(function(r) {
return r["key"].length > 0
})
var obj = {
types: topk,
alias_percent: fns.percentage(aliases.length, result.length),
image_percent: fns.percentage(images.length, result.length),
wikipedia_percent: fns.percentage(wikipedia.length, result.length),
subset: result.length,
"estimate-count": result[0]["estimate-count"]
}
ps.callback(obj)
})
})
}
// freebase.drilldown("/chemistry/chemical_compound", {
// max: 100
// }, console.log)
freebase.property_introspection = function(q, options, callback) {
this.doc = "common lookups for freebase property data"
callback = callback || console.log;
if (typeof options == "function") {
callback = options;
options = {};
} //flexible parameter
if (options.nodeCallback) {
callback = callback.bind(undefined, null)
options.nodeCallback = false
}
if (!q) {
return callback({})
}
options = options || {};
var ps = fns.settle_params(arguments, freebase.property_introspection);
//handle an array
if (fns.isarray(q) && q.length > 1) {
return fns.doit_async(ps)
}
var query = [{
"id": q,
"mid": null,
"name": null,
"type": "/type/property",
"reverse_property": [{
"id": null,
"name": null,
"optional": true
}],
"expected_type": [{
"id": null,
"name": null,
"optional": true,
"/freebase/type_hints/mediator": null
}],
"unique": null,
"schema": {
"id": null,
"name": null,
"/freebase/type_profile/instance_count": null,
"/freebase/type_hints/mediator": null
},
"/common/topic/description": null
}]
freebase.mqlread(query, options, function(r) {
var obj = {}
if (!r || !r.result || !r.result[0]) {
return callback(obj)
}
r = r.result[0]
obj.name = r.name
obj.id = r.id
obj.type = r.schema
obj.description = r["/common/topic/description"]
obj.unique = r.unique || false;
obj.reverse_property = r.reverse_property
obj.expected_type = r.expected_type
//get its metaschema
var query = [{
"name": null,
"type": "/base/fbontology/semantic_predicate",
"paths": {
"a:properties": q,
"b:properties": [{
"id": null
}]
}
}]
freebase.mqlread(query, options, function(r) {
obj.meta = r.result
return callback(obj)
})
})
}
// freebase.property_introspection("/government/politician/party")
freebase.schema = function(q, options, callback) {
this.doc = "common lookups for types and properties"
callback = callback || console.log;
if (typeof options == "function") {
callback = options;
options = {};
} //flexible parameter
if (options.nodeCallback) {
callback = callback.bind(undefined, null)
options.nodeCallback = false;
}
if (!q) {
return callback({})
}
options = options || {};
//handle an array
if (fns.isarray(q) && q.length > 1) {
var ps = fns.settle_params(arguments, freebase.schema, {});
return fns.doit_async(ps)
}
//see if its a type
options.type = "/type/type"
freebase.search(q, options, function(r) {
if (r && r[0] && r[0].id) {
r = r[0]
var query = [{
"id": r.id,
"mid": null,
"name": null,
"properties": [{
"id": null,
"name": null,
"optional": true,
"/type/property/reverse_property": [{
"id": null,
"name": null,
"optional": true
}]
}],
"/freebase/type_hints/mediator": null,
"/freebase/type_hints/included_types": [{
"id": null,
"name": null,
"optional": true
}],
"/freebase/type_profile/published": null,
"/type/type/expected_by": [{
"id": null,
"name": null,
"optional": true
}],
"/freebase/type_profile/instance_count": null,
"/freebase/type_profile/property_count": null,
"domain": {
"id": null,
"name": null
},
"/freebase/type_profile/equivalent_topic": [{
"id": null,
"name": null,
"optional": true
}],
"type": "/type/type"
}]
freebase.mqlread(query, options, function(r) {
if (!r || !r.result || !r.result[0]) {
return callback({})
}
r = r.result[0]
var obj = {}
obj.domain = r.domain
obj.id = r.id
obj.name = r.name
obj.included_types = r["/freebase/type_hints/included_types"]
obj.incoming_properties = r["/type/type/expected_by"]
obj.is_compound_value = r["/freebase/type_hints/mediator"] || false
obj.is_commons = r["/freebase/type_profile/published"] || false
obj.equivalent_topic = r["/freebase/type_profile/equivalent_topic"]
obj.topic_count = r["/freebase/type_profile/instance_count"] || 0
obj.property_count = r["/freebase/type_profile/property_count"] || 0;
//types that include this one
var query = [{
"id": null,
"name": null,
"s:name": {
"value": null,
"lang": "/lang/en",
"optional": "required"
},
"/freebase/type_hints/included_types": [{
"id": obj.id
}]
}]
freebase.mqlread(query, options, function(r) {
if (!r || !r.result) {
return callback(obj)
}
obj.included_by = r.result.map(function(v) {
return {
id: v.id,
name: v.name
}
})
return callback(obj)
})
})
} else {
freebase.property_lookup(q, options, function(r) {
if (!r || !r[0] || !r[0].id) {
return callback({})
}
return freebase.property_introspection(r[0].id, {}, callback)
})
}
})
}
// freebase.schema("politician")
freebase.grammar = function(q, options, callback) {
this.doc = "get the proper pronoun to use for a topic eg. he/she/they/it"
var ps = fns.settle_params(arguments, freebase.grammar, {});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
freebase.get_id(ps.q, ps.options, function(topic) {
if (!topic || !topic.id) {
return ps.callback({})
}
var query = [{
"id": topic.id,
"name": null,
"type": [],
"/people/person/gender": [{
"id": null,
"optional": true
}],
"/fictional_universe/fictional_character/gender": [{
"id": null,
"optional": true
}]
}]
freebase.mqlread(query, options, function(result) {
if (!result || !result.result || !result.result[0]) {
return ps.callback({})
}
result = result.result[0];
var grammar = {
plural: false,
gender: null,
article: "a",
pronoun: "it",
copula: "is"
}
//people grammar
if (fns.isin('/people/person', result.type) || fns.isin('/fictional_universe/fictional_character', result.type)) {
var gender = result["/people/person/gender"][0] || result["/fictional_universe/fictional_character/gender"][0];
if (gender) {
if (gender.id == "/en/male") { //male
grammar.gender = "male";
grammar.pronoun = "he";
} else if (gender.id == "/en/female") { //female
grammar.gender = "female";
grammar.pronoun = "she";
}
} else { //no gender person
grammar.gender = "unknown";
grammar.pronoun = "they";
}
} else { //not a person
//plural topics
if (fns.intersection(data.plural_types, result.type).length > 0) {
grammar.plural = true;
grammar.pronoun = "they";
grammar.copula = "are"
}
//categories that need a 'the' instead of 'a'
if (fns.intersection(data.definate_articles, result.type).length > 0) {
grammar.article = "the";
}
}
return ps.callback(grammar);
})
})
}
// freebase.grammar("toronto maple leafs")
freebase.same_as_links = function(q, options, callback) {
this.doc = "turns a url into a freebase topic and list its same:as links"
var ps = fns.settle_params(arguments, freebase.same_as_links, {});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
var url = freebase.globals.host + 'search?type=/common/topic&limit=1&query=' + encodeURIComponent(ps.q);
if (ps.options.debug) {
console.log(url)
}
fns.http(url, ps.options, function(result) {
if (!result || !result.result || !result.result[0]) {
return ps.callback({})
}
//get its formatted links from the topic api
freebase.topic(result.result[0].mid, ps.options, function(all) {
if (fns.isempty(all)) {
return ps.callback([]);
}
var links = [];
//same-as ones
if (all.property && all.property['/common/topic/topic_equivalent_webpage']) {
links = all.property['/common/topic/topic_equivalent_webpage'].values.map(function(v) {
return {
href: v.value,
title: fns.parseurl(v.value).authority
}
})
}
//webpage ones
if (all.property && all.property['/common/topic/topical_webpage']) {
links = links.concat(all.property['/common/topic/topical_webpage'].values.map(function(v) {
var host = fns.parseurl(v.value).authority || ''
return {
href: v.value,
title: host.replace(/^www\./, '')
}
}))
}
var obj = {
topic: result.result[0],
links: links
}
return ps.callback(obj)
})
})
}
// freebase.same_as_links("toronto maple leafs")
freebase.translate = function(q, options, callback) {
this.doc = "return specific language title for a topic"
this.reference = "http://wiki.freebase.com/wiki/I18n"
var ps = fns.settle_params(arguments, freebase.translate, {
lang: "fr"
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
if (!ps.options.lang.match(/\/lang\//)) {
ps.options.lang = '/lang/' + ps.options.lang
}
freebase.get_id(ps.q, ps.options, function(topic) {
if (!topic || !topic.id) {
return ps.callback("")
}
var query = [{
"id": topic.id,
"name": [{
"lang": ps.options.lang,
"value": null
}]
}]
freebase.mqlread(query, {}, function(result) {
if (!result || !result.result || !result.result[0]) {
return ps.callback('')
}
var name = result.result[0].name || [{}]
name = name[0].value || '';
return ps.callback(name)
})
})
}
// freebase.translate("toronto maple leafs", {lang:"/lang/ja"})
freebase.sentence = function(q, options, callback) {
this.doc = "get the first sentence of a topic description"
this.reference = "http://wiki.freebase.com/wiki/APIText"
var ps = fns.settle_params(arguments, freebase.sentence, {});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
freebase.description(ps.q, ps.options, function(desc) {
if (!desc) {
return ps.callback("")
}
desc = sentenceparser(desc) || []
desc = desc[0] || ''
desc = desc.replace(/\(.*?\)/g, '') //remove birthdates
desc = desc.replace(/ /g, ' ')
return ps.callback(desc)
})
}
// freebase.sentence('john malkovich', {}, console.log)
// freebase.sentence(['radiohead', 'john malkovich'], {}, console.log)
freebase.list = function(q, options, callback) {
this.doc = "get a list of topics in a type"
var ps = fns.settle_params(arguments, freebase.list, {
limit: 2000
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback([]);
}
//singularize it if its not an id
if (!ps.q.match(/\/.{1,12}\/.{3}/)) {
ps.q = singularize(ps.q);
}
//get its id
freebase.get_id(ps.q, {
type: "/type/type"
}, function(topic) {
if (!topic || !topic.id) {
return ps.callback([])
}
var query = [{
"type": topic.id,
"name": null,
"id": null,
"mid": null,
"limit": 100
}]
if (ps.options.extend) {
for (var i in ps.options.extend) {
query[0][i] = ps.options.extend[i]
}
}
freebase.paginate(query, ps.options, ps.callback)
})
}
// freebase.list("hurricanes",{}, function(r){console.log(r)})
// freebase.list("moons",{}, function(r){console.log(r)})
// freebase.list("planets",{}, function(r){console.log(r)})
freebase.place_data = function(geo, options, callback) {
this.doc = "from a geo-coordinate and area radius (in feet), get the town, province, country, and timezone for it"
callback = callback || console.log;
if (options.nodeCallback) {
callback = callback.bind(undefined, null)
options.nodeCallback = false;
}
if (!geo) {
return callback({})
}
options = options || {};
//handle an array
if (fns.isarray(geo) && geo.length > 1) {
var ps = fns.settle_params(arguments, freebase.place_data, {});
return fns.doit_async(ps)
}
var location = {
"coordinates": [geo.lng, geo.lat],
"type": "Point"
}
var out = [{
"mid": null,
"name": null,
"type": []
}]
//999000ft == 30k
geo.radius = geo.radius || 999000
var filter = '(all type:/location/citytown (within radius:' + geo.radius + 'ft lon:' + geo.lng + ' lat:' + geo.lat + '))'
var url = freebase.globals.host + 'search?filter=' + filter + '&limit=200'
if (options.debug) {
console.log(url)
}
fns.http(url, options, function(r) {
var all = {
city: null,
country: null,
province: null,
timezone: null
}
if(!r.result || !r.result[0]){
return callback({})
}
all.city = r.result[0];
var query = [{
"name": null,
"id": r.result[0].mid,
"/location/location/containedby": [{
"id": null,
"name": null,
"type": [],
"optional": true,
"/location/location/time_zones": [{
"/time/time_zone/offset_from_uct": null,
"id": null,
"name": null,
"optional": true
}],
"/location/location/containedby": [{
"id": null,
"name": null,
"type": [],
"optional": true,
"/location/location/time_zones": [{
"/time/time_zone/offset_from_uct": null,
"id": null,
"name": null,
"optional": true
}]
}]
}]
}]
freebase.mqlread(query, {}, function(r) {
//hunt for the most appropriate topics in 2 layers
for (var i in r.result[0]['/location/location/containedby']) {
var v = r.result[0]['/location/location/containedby'][i]
if (v.type.filter(function(t) {
return t == "/location/country"
})[0]) {
all.country = {
id: v.id,
name: v.name
}
} else if (v.type.filter(function(t) {
return t == "/location/administrative_division"
})[0]) {
all.province = {
id: v.id,
name: v.name
}
}
if (v["/location/location/time_zones"][0] && v["/location/location/time_zones"].length == 1) {
all.timezone = v["/location/location/time_zones"][0];
}
if (all.country) {
return callback(all)
}
//second layer looks good too
v['/location/location/containedby'].map(function(o) {
if (o.type.filter(function(t) {
return t == "/location/country"
})[0]) {
all.country = {
id: o.id,
name: o.name
}
} else if (!all.province && o.type.filter(function(t) {
return t == "/location/administrative_division"
})[0]) {
all.province = {
id: o.id,
name: o.name
}
}
if (!all.timezone && o["/location/location/time_zones"][0] && o["/location/location/time_zones"].length == 1) {
all.timezone = o["/location/location/time_zones"][0];
}
})
}
return callback(all)
})
})
}
// freebase.place_data({lat:51.545414293637286,lng:-0.07589578628540039}, {}, console.log)
freebase.is_a = function(q, options, callback) {
this.doc = "get a list of identifiers for a topic"
var ps = fns.settle_params(arguments, freebase.is_a, {
max: 25
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
freebase.topic(ps.q, ps.options, function(r) {
if (fns.isempty(r)) {
return ps.callback({});
}
var types = r.property["/type/object/type"] || {}
types = types.values || []
types = types.filter(function(v) {
return !v.text.match(/Topic/)
})
types = types.map(function(v) {
return {
name: v.text,
id: v.id,
property: "/type/object/type"
}
})
r = fns.parse_topic_api(r.property)
r = r.filter(function(v) {
return fns.isin(v.property, data.is_a)
})
r = r.concat(types)
return ps.callback(r)
})
}
// freebase.is_a("toronto")
// freebase.is_a("george clooney")
freebase.property_lookup = function(q, options, callback) {
this.doc = "lookup soft property matches, like 'birthday' vs 'date of birth'"
var ps = fns.settle_params(arguments, freebase.property_lookup, {
type: "/type/property"
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
freebase.search(ps.q, ps.options, function(candidate_properties) {
//look up offline for property aliases
if (!q.match(/\/.*?\/.*?\//)) {
q = q.toLowerCase();
q = q.replace(/ /, ' ');
q = q.replace(/^\s+|\s+$/, '');
var property_singular = singularize(q);
candidate_properties = candidate_properties.concat(data.properties.filter(function(v) {
return v.n == q || v.n == property_singular
}))
}
return ps.callback(candidate_properties)
})
}
// freebase.property_lookup("albums")
freebase.question = function(q, options, callback) {
this.doc = "give a topic and a property, and get a list of results"
var ps = fns.settle_params(arguments, freebase.question, {
max: 25
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid || !ps.options.property) {
return ps.callback({});
}
var property = ps.options.property
var type = ps.options.property.match(/\/.*?\/.*?\//)
//straight-up id search
if (property.match(/^\/.{1,12}\/.{3}/)) {
return freebase.topic(q, {}, function(r) {
if (!r || !r.property || !r.property[property]) {
return ps.callback([])
}
return ps.callback(r.property[property].values)
})
}
var candidate_metaschema = fns.metaschema_lookup(property);
if (candidate_metaschema) {
ps.options.filter = '(all ' + candidate_metaschema + ':"' + q + '")'
freebase.search('', options, function(result) {
return ps.callback(result)
})
} else {
freebase.property_lookup(property, {}, function(candidate_properties) {
if (candidate_properties.length === 0) {
return ps.callback([])
}
ps.options.filter = type;
//look for these properties in the topic api
freebase.topic(ps.q, ps.options, function(result) {
if (fns.isempty(result)) {
return ps.callback({});
}
var all = [];
candidate_properties.forEach(function(p) {
if (result.property[p.id]) {
all = all.concat(result.property[p.id].values)
}
})
all = fns.json_unique(all, "id")
return ps.callback(all)
})
})
}
}
// freebase.question("keanu reeves", {property:"children"})
//freebase.question("thom yorke", "produced") //******
// freebase.question("pulp fiction", {property:"/film/film/initial_release_date"})
// freebase.question("keanu reeves", {property:"films"}) //******
freebase.wordnet = function(q, options, callback) {
this.doc = "query wordnet via freebase"
var ps = fns.settle_params(arguments, freebase.wordnet, {});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
var query = [{
"id": null,
"type": "/base/wordnet/synset",
"gloss": null,
"syntactic_category": null,
"sort": [
"syntactic_category",
"word.sense_number",
"a:word.word_number"
],
"word": {
"sense_number": null,
"derivationally_related_forms": [{
"sense": {
"name": null,
"id": null
},
"optional": true
}],
"word": {
"word": ps.q
}
},
"a:word": [{
"word_number": null,
"word": {
"word": null
}
}]
}]
if (ps.options.limit) {
query[0].limit = ps.options.limit;
}
freebase.mqlread(query, ps.options, function(r) {
r= r || {result:{}}
return ps.callback(r.result)
})
}
// freebase.wordnet("charming")
freebase.dig = function(q, options, callback) {
this.doc = "transitive query on a specific property, maximum 3-ply"
var ps = fns.settle_params(arguments, freebase.property_lookup, {
max: 25
});
if (ps.array) {
return fns.doit_async(ps);
}
if (!ps.valid) {
return ps.callback({});
}
var all = [];
freebase.question(ps.q, ps.options, function(r) {
if (!r || !fns.isarray(r) || r.length === 0) {
return ps.callback(all)
}
all = all.concat(r);
r = r.slice(0, ps.options.max).map(function(v) {
return v.id
})
return fns.doit_async({
q: r,
options: ps.options,
method: freebase.question,
callback: function(big) {
if (!big || !fns.isarray(big) || big.length === 0) {
return ps.callback(all)
}
all = all.concat(fns.flatten(big, 'shallow'))
all = fns.json_unique(all, "id")
//todo: fix
obj = {
q: r,
options: ps.options,
method: freebase.question,
callback: function(big) {
if (!big || !fns.isarray(big) || big.length === 0) {
return ps.callback(all)
}
all = all.concat(fns.flatten(big, 'shallow'))
all = fns.json_unique(all, "id")
return callback(all)
}
}
fns.doit_async(obj)
}
})
})
}
// freebase.dig('/en/bovid', {
// property: '/biology/organism_classification/lower_classifications'
// }, function(r) {
// console.log(r)
// })
// freebase.dig('/en/toronto', {
// property: '/location/location/contains'
// }, function(r) {
// console.log(r)
// })
//originally by david huynh 2010, http://www.freebase.com/appeditor/#!path=//cubed.dfhuynh.user.dev/index
//Algorithm is adopted from
//http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html
function singularize(text) {
if (text.match(' ')) { //multiple words
var words = text.split(' ');
var last = words[words.length - 1];
var firsts = words.slice(0, -1);
return firsts.join(" ") + ' ' + singularize(last);
}
var prepositions = {
"about": 1,
"above": 1,
"across": 1,
"after": 1,
"against": 1,
"along": 1,
"among": 1,
"around": 1,
"at": 1,
"before": 1,
"behind": 1,
"below": 1,
"beneath": 1,
"beside": 1,
"between": 1,
"beyond": 1,
"but": 1,
"by": 1,
"despite": 1,
"down": 1,
"during": 1,
"except": 1,
"for": 1,
"from": 1,
"in": 1,
"inside": 1,
"into": 1,
"like": 1,
"near": 1,
"of": 1,
"off": 1,
"on": 1,
"onto": 1,
"out": 1,
"outside": 1,
"over": 1,
"past": 1,
"since": 1,
"through": 1,
"throughout": 1,
"till": 1,
"to": 1,
"toward": 1,
"under": 1,
"underneath": 1,
"until": 1,
"up": 1,
"upon": 1,
"with": 1,
"within": 1,
"without": 1
};
var userDefinedNouns = [{
"p": "people",
"s": "person"
}, {
"p": "tornadoes",
"s": "tornado"
}, {
"p": "churches",
"s": "church"
}, {
"p": "countries",
"s": "country"
}, {
"p": "cities",
"s": "city"
}, {
"p": "companies",
"s": "company"
}, {
"p": "monkies",
"s": "monkey"
}, {
"p": "donkies",
"s": "donkey"
}, {
"p": "mysteries",
"s": "mystery"
}, {
"p": "authors",
"s": "author"
}];
// Table A.1
var irregularNouns = {
"beef": {
anglicized: "beefs",
classical: "beeves"
},
"brother": {
anglicized: "brothers",
classical: "brethren"
},
"child": {
anglicized: null,
classical: "children"
},
"cow": {
anglicized: null,
classical: "kine"
},
"ephemeris": {
anglicized: null,
classical: "ephemerides"
},
"genie": {
anglicized: null,
classical: "genii"
},
"money": {
anglicized: "moneys",
classical: "monies"
},
"mongoose": {
anglicized: "mongooses",
classical: null
},
"mythos": {
anglicized: null,
classical: "mythoi"
},
"octopus": {
anglicized: "octopuses",
classical: "octopodes"
},
"ox": {
anglicized: null,
classical: "oxen"
},
"soliloquy": {
anglicized: "soliloquies",
classical: null
},
"trilby": {
anglicized: "trilbys",
classical: null
}
};
var uninflectedSuffixes = ["fish", "ois", "sheep", "deer", "pox", "itis"];
// Table A.2
var uninflectedNouns = {
"bison": 1,
"flounder": 1,
"pliers": 1,
"bream": 1,
"gallows": 1,
"proceedings": 1,
"breeches": 1,
"graffiti": 1,
"rabies": 1,
"britches": 1,
"headquarters": 1,
"salmon": 1,
"carp": 1,
"herpes": 1,
"scissors": 1,
"chassis": 1,
"high-jinks": 1,
"sea-bass": 1,
"seabass": 1,
"clippers": 1,
"homework": 1,
"series": 1,
"cod": 1,
"innings": 1,
"shears": 1,
"contretemps": 1,
"jackanapes": 1,
"species": 1,
"corps": 1,
"mackerel": 1,
"swine": 1,
"debris": 1,
"measles": 1,
"trout": 1,
"diabetes": 1,
"mews": 1,
"tuna": 1,
"djinn": 1,
"mumps": 1,
"whiting": 1,
"eland": 1,
"news": 1,
"wildebeest": 1,
"elk": 1,
"pincers": 1,
"moose": 1,
"shrimp": 1,
"hoi polloi": 1,
"riffraff": 1,
"rabble": 1
};
var inflectionCategories = [{ // Table A.10
from: "a",
to: "ae",
words: ["alumna", "alga", "vertebra"]
}, {
// Table A.11
from: "a",
anglicized: "as",
classical: "ae",
words: ["abscissa", "amoeba", "antenna", "aurora", "formula", "hydra", "hyperbola", "lacuna", "medusa", "nebula", "nova", "parabola"]
}, {
// Table A.12
from: "a",
anglicized: "as",
classical: "ata",
words: ["anathema", "bema", "carcinoma", "charisma", "diploma", "dogma", "drama", "edema", "enema", "enigma", "gumma", "lemma", "lymphoma", "magma", "melisma", "miasma", "oedema", "sarcoma", "schema", "soma", "stigma", "stoma", "trauma"]
}, {
// Table A.13
from: "en",
anglicized: "ens",
classical: "ina",
words: ["stamen", "foramen", "lumen"]
}, {
// Table A.14
from: "ex",
to: "ices",
words: ["codex", "murex", "silex"]
}, {
// Table A.15
from: "ex",
anglicized: "exes",
classical: "ices",
words: ["apex", "cortex", "index", "latex", "pontifex", "simplex", "vertex", "vortex"]
}, {
// Table A.16
from: "is",
anglicized: "ises",
classical: "ides",
words: ["iris", "clitoris"]
}, {
// Table A.17
from: "o",
to: "os",
words: ["albino", "archipelago", "armadillo", "commando", "ditto", "dynamo", "embryo", "fiasco", "generalissimo", "ghetto", "guano", "inferno", "jumbo", "lingo", "lumbago", "magneto", "manifesto", "medico", "octavo", "photo", "pro", "quarto", "rhino", "stylo"]
}, {
// Table A.18
from: "o",
anglicized: "os",
classical: "i",
words: ["alto", "basso", "canto", "contralto", "crescendo", "solo", "soprano", "tempo"]
}, {
// Table A.19
from: "on",
to: "a",
words: ["aphelion", "asyndeton", "criterion", "hyperbaton", "noumenon", "organon", "perihelion", "phenomenon", "prolegomenon"]
}, {
// Table A.20
from: "um",
to: "a",
words: ["agendum", "bacterium", "candelabrum", "datum", "desideratum", "erratum", "extremum", "stratum", "ovum"]
}, {
// Table A.21
from: "um",
anglicized: "ums",
classical: "a",
words: ["aquarium", "compendium", "consortium", "cranium", "curriculum", "dictum", "emporium", "enconium", "gymnasium", "honorarium", "interregnum", "lustrum", "maximum", "medium", "memorandum", "millenium", "minimum", "momentum", "optimum", "phylum", "quantum", "rostrum", "spectrum", "speculum", "stadium", "trapezium", "ultimatum", "vacuum", "velum"]
}, {
// Table A.22
from: "us",
anglicized: "uses",
classical: "i",
words: ["focus", "fungus", "genius", "incubus", "nimbus", "nucleolus", "radius", "stylus", "succubus", "torus", "umbilicus", "uterus"]
}, {
// Table A.23
from: "us",
anglicized: "uses",
classical: "us",
words: ["apparatus", "cantus", "coitus", "hiatus", "impetus", "nexus", "plexus", "prospectus", "sinus", "status"]
}, {
// Table A.24
from: "",
to: "i",
words: ["afreet", "afrit", "efreet"]
}, {
// Table A.25
from: "",
to: "im",
words: ["cherub", "goy", "geraph"]
}];
function suffix(text, s) {
return text.length >= s.length && text.substring(text.length - s.length) == s;
}
function capIfCap(s, s2) {
if (typeof s == "string") {
var isCap = s2.charAt(0).toLowerCase() != s2.charAt(0);
return isCap ? (s.charAt(0).toUpperCase() + s.substr(1)) : s;
} else {
var a = [];
for (var i in s) {
var s3 = s[i];
a.push(capIfCap(s3, s2));
}
return a;
}
}
function inflection(text, from, to) {
return text.substring(0, text.length - from.length) + to;
}
function isOneOf(c, chars) {
return chars.indexOf(c) >= 0;
}
function isVowel(c) {
return isOneOf(c, "aeiou");
}
var text2 = text.toLowerCase();
for (var o in userDefinedNouns) {
if (userDefinedNouns[o].p == text) {
return userDefinedNouns[o].s;
}
}
for (var singular in irregularNouns) {
var entry = irregularNouns[singular];
if (entry.anglicized === text2 || entry.classical === text2) {
return capIfCap(singular, text);
}
}
for (var s in uninflectedSuffixes) {
if (suffix(text2, s)) {
return text;
}
}
if (uninflectedNouns && uninflectedNouns[text2]) {
return text;
}
var checkWords = function(from, to, words) {
if (suffix(text, to)) {
var prefix = text.substring(text.length - to.length);
var text3 = prefix + entry.from;
for (var word in words) {
if (text3 === word) {
return capIfCap(text3, text);
}
}
}
return null;
}
for (var e in inflectionCategories) {
var entry = inflectionCategories[e];
var text3 = ("to" in entry && checkWords(entry.from, entry.to, entry.words)) || ("anglicized" in entry && checkWords(entry.from, entry.anglicized, entry.words)) || ("classical" in entry && checkWords(entry.from, entry.classical, entry.words));
if (text3 != null && typeof text3 == "string") {
return text3;
}
}
for (var prep in prepositions) {
var n = text.indexOf(" " + prep + " ");
if (n > 0) {
var prefix = text.substring(0, n);
var r = singularize(prefix);
if (r != null) {
return r + " " + prep + " " + text.substr(n + prep.length + 2);
} else {
return null;
}
}
n = text.indexOf("-" + prep + "-");
if (n > 0) {
var prefix = text.substring(0, n);
var r = singularize(prefix);
if (r != null) {
return r + "-" + prep + "-" + text.substr(n + prep.length + 2);
} else {
return null;
}
}
}
var j = text.lastIndexOf(" ");
if (j > 0) {
var r = singularize(text.substring(j + 1));
if (r != null) {
return text.substring(0, j + 1) + r;
} else {
return null;
}
}
if (suffix(text, "xes") || suffix(text, "ses")) {
return text.substring(0, text.length - 2);
}
if (suffix(text, "s") && !suffix(text, "ss")) {
return text.substring(0, text.length - 1);
}
return text;
}
//console.log(singularize("george soros"));
//console.log(singularize("mama cass"));
//by spencer kelly (@spencermountain)
function sentenceparser(text) {
var tmp = text.split(/(\S.+?[.])(?=\s+|$)/g);
var sentences = [];
//join acronyms, titles
for (var i in tmp) {
if (tmp[i]) {
tmp[i] = tmp[i].replace(/^\s+|\s+$/g, ''); //trim extra whitespace
//join common abbreviations + acronyms
if (tmp[i].match(/(^| )(mr|dr|llb|md|bl|phd|ma|ba|mrs|miss|misses|mister|sir|esq|mstr|jr|sr|st|lit|inc|fl|ex|eg|jan|feb|mar|apr|jun|aug|sept?|oct|nov|dec)\. ?$/i) || tmp[i].match(/[ |\.][a-z]\.?$/i)) {
tmp[parseInt(i, 10) + 1] = tmp[i] + ' ' + tmp[parseInt(i, 10) + 1];
} else {
sentences.push(tmp[i]);
tmp[i] = '';
}
}
}
//cleanup afterwards
var clean = [];
for (var i2 in sentences) {
sentences[i2] = sentences[i2].replace(/^\s+|\s+$/g, ''); //trim extra whitespace
if (sentences[i2]) {
clean.push(sentences[i2]);
}
}
return clean;
}
//console.log(fns.sentenceparser('Dr. calm is me. lkj'))
if (typeof module !== 'undefined' && module.exports) {
module.exports = freebase
}