UNPKG

factbook

Version:

Serves as an independent data scraping module, complete with ontology and full scraping ability for the CIA World Factbook site

77 lines (76 loc) 4.81 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var funktologies_1 = require("funktologies"); var getUuid = require("uuid-by-string"); var constants_1 = require("../constants/constants"); var globalStore_1 = require("../constants/globalStore"); function parseSingleLocation(cheerio, country, countryId) { var content = cheerio.find('div.category_data.subfield.text').text().trim(); globalStore_1.store.countries[countryId].datatypeProperties[constants_1.consts.ONTOLOGY.DT_LOCATION_REF_DESCRIPTION] = content; var geoId = constants_1.consts.ONTOLOGY.INST_GEO_LOCATION + getUuid(country); var objectProp = {}; if (globalStore_1.store.locations[geoId]) { objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION] = globalStore_1.store.locations[geoId]; } else { objectProp = funktologies_1.entityMaker(constants_1.consts.ONTOLOGY.HAS_LOCATION, constants_1.consts.ONTOLOGY.ONT_GEO_LOCATION, geoId, "Geographic Location for " + country); globalStore_1.store.locations[geoId] = objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION]; var datatypeProp = {}; datatypeProp[constants_1.consts.ONTOLOGY.DT_LOCATION_DESCRIPTION] = content; objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION].datatypeProperties = datatypeProp; } globalStore_1.store.countries[countryId].objectProperties.push(funktologies_1.entityRefMaker(constants_1.consts.ONTOLOGY.HAS_LOCATION, objectProp)); } function parseMultipleLocations(cheerioElem, country, countryId, scope) { cheerioElem(scope).find('p').each(function (index, element) { var content = cheerioElem(element).text().trim(); var strongTag = cheerioElem(element).find('strong').text().trim().slice(0, -1); var locations = globalStore_1.store.countries[countryId].objectProperties .filter(function (objProp) { return objProp[constants_1.consts.ONTOLOGY.HAS_LOCATION]; }) .map(function (objProp) { return objProp[constants_1.consts.ONTOLOGY.HAS_LOCATION]; }); var objectProp = {}; if (!strongTag) { var description = content.substring(0, content.indexOf(strongTag)).trim(); globalStore_1.store.countries[countryId].datatypeProperties[constants_1.consts.ONTOLOGY.DT_LOCATION_REF_DESCRIPTION] = description; } else { var geoId_1 = constants_1.consts.ONTOLOGY.INST_GEO_LOCATION + getUuid(country) + getUuid(strongTag); var geoAttr = locations.find(function (loc) { return loc && loc['@id'] === geoId_1; }); if (!geoAttr) { if (globalStore_1.store.locations[geoId_1]) { objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION] = globalStore_1.store.locations[geoId_1]; } else { objectProp = funktologies_1.entityMaker(constants_1.consts.ONTOLOGY.HAS_LOCATION, constants_1.consts.ONTOLOGY.ONT_GEO_LOCATION, geoId_1, "Geographic Location for " + country + " - " + strongTag); globalStore_1.store.locations[geoId_1] = objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION]; var datatypeProp = {}; datatypeProp[constants_1.consts.ONTOLOGY.DT_LOCATION_DESCRIPTION] = content; objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION].datatypeProperties = datatypeProp; } geoAttr = objectProp[constants_1.consts.ONTOLOGY.HAS_LOCATION]; globalStore_1.store.countries[countryId].objectProperties.push(funktologies_1.entityRefMaker(constants_1.consts.ONTOLOGY.HAS_LOCATION, objectProp)); } } }); } function getGeography(cheerioElem, country, countryId) { cheerioElem('#field-location').each(function (index, element) { var hasMultLocations = cheerioElem(element).find('div.category_data.subfield.text > p'); // Multiple p tags suggests the nation has multiple locations in different parts of the world. // This means distinct description and geographic coordinates. Each must be handled separately. if (hasMultLocations.length) { parseMultipleLocations(cheerioElem, country, countryId, element); } else { parseSingleLocation(cheerioElem(element), country, countryId); } }); cheerioElem('#field-map-references').each(function (index, element) { var mapRef = cheerioElem(element).find('div.category_data.subfield.text').text().trim(); if (mapRef) { globalStore_1.store.countries[countryId].datatypeProperties[constants_1.consts.ONTOLOGY.DT_MAP_REFERENCES] = mapRef; } }); } exports.getGeography = getGeography; ;