UNPKG

factbook

Version:

Serves as an independent data scraping module, complete with ontology and full scraping ability for the CIA World Factbook site

98 lines (97 loc) 6.93 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var funktologies_1 = require("funktologies"); var getUuid = require("uuid-by-string"); var constants_1 = require("../constants/constants"); var globalStore_1 = require("../constants/globalStore"); function getBorders(cheerioElem, country, countryId) { var objectProperties = globalStore_1.store.countries[countryId].objectProperties; var brdMap = funktologies_1.getRelation(objectProperties, constants_1.consts.ONTOLOGY.HAS_BORDER); var brdId = constants_1.consts.ONTOLOGY.INST_BORDER + getUuid(country); var objectProp = {}; if (!brdMap) { if (globalStore_1.store.borders[brdId]) { objectProp[constants_1.consts.ONTOLOGY.HAS_BORDER] = globalStore_1.store.borders[brdId]; } else { objectProp = funktologies_1.entityMaker(constants_1.consts.ONTOLOGY.HAS_BORDER, constants_1.consts.ONTOLOGY.ONT_BORDER, brdId, "Border of " + country); globalStore_1.store.borders[brdId] = objectProp[constants_1.consts.ONTOLOGY.HAS_BORDER]; } brdMap = objectProp[constants_1.consts.ONTOLOGY.HAS_BORDER]; globalStore_1.store.countries[countryId].objectProperties.push(funktologies_1.entityRefMaker(constants_1.consts.ONTOLOGY.HAS_BORDER, objectProp)); } cheerioElem('#field-land-boundaries').each(function (indexOut, elementOuter) { cheerioElem(elementOuter).find('div.category_data.subfield.numeric').each(function (indexIn, elementInner) { var bordGrd = cheerioElem(elementInner).find('span.subfield-number').text().trim(); if (bordGrd) { brdMap.datatypeProperties[constants_1.consts.ONTOLOGY.DT_TOTAL_BORDER] = Number(bordGrd.replace(/,|[a-z]/g, '').trim()) || null; } brdMap.datatypeProperties[constants_1.consts.ONTOLOGY.DT_UNIT] = 'km'; }); var numBrdGrd = cheerioElem(elementOuter).find('div.category_data.subfield.text > span.subfield-name').text(); if (numBrdGrd) { var openParam = numBrdGrd.indexOf('('); var closeParam = numBrdGrd.indexOf(')'); var num = (openParam > -1 && -1 < closeParam) ? numBrdGrd.substring(openParam + 1, closeParam) : 0; try { brdMap.datatypeProperties[constants_1.consts.ONTOLOGY.DT_TOTAL_BORDER_COUNTRIES] = Number(num); } catch (err) { globalStore_1.store.LOG_STREAM.error(new Date().toISOString() + '\n\n' + err.toString() + '\n\n'); } } var brderContrs = cheerioElem(elementOuter).find('div.category_data.subfield.text').text().trim(); if (brderContrs) { var existBrdContrs_1 = objectProperties.filter(function (rel) { return rel[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY]; }); brderContrs = brderContrs.substring(brderContrs.indexOf(':') + 1).replace(/\\n/g, '').trim(); var brdrContrsArr = brderContrs.split(',').map(function (bc) { return bc.trim(); }); brdrContrsArr.forEach(function (bc) { var endingNameIndex = bc.search(/[\d]/g); var borderCountry = bc.substring(0, endingNameIndex).trim(); var endingDistIndex = bc.substring(endingNameIndex).search(/[^\d]/g); var distance = bc.substring(endingNameIndex, endingNameIndex + endingDistIndex).trim(); var orderedContrs = [country, borderCountry].sort(); var bcId = constants_1.consts.ONTOLOGY.INST_BORDER_COUNTRY + getUuid(orderedContrs[0] + "-" + orderedContrs[1]); if (!existBrdContrs_1.some(function (brco) { return brco[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY]['@id'].includes(bcId); })) { var objProp = {}; if (globalStore_1.store.borderCountries[bcId]) { objProp[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY] = globalStore_1.store.borderCountries[bcId]; } else { objProp = funktologies_1.entityMaker(constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY, constants_1.consts.ONTOLOGY.ONT_BORDER_COUNTRY, bcId, "Border Country Pair of " + orderedContrs[0] + " and " + orderedContrs[1]); objProp[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY] .datatypeProperties[constants_1.consts.ONTOLOGY.DT_BORDER_LENGTH] = Number(distance) || null; objProp[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY] .datatypeProperties[constants_1.consts.ONTOLOGY.DT_UNIT] = 'km'; var borderCountryObj1 = {}; borderCountryObj1[constants_1.consts.ONTOLOGY.HAS_COUNTRY] = { '@id': globalStore_1.store.countries[countryId]['@id'], '@type': globalStore_1.store.countries[countryId]['@type'] }; borderCountryObj1[constants_1.consts.ONTOLOGY.HAS_COUNTRY][constants_1.consts.RDFS.label] = globalStore_1.store.countries[countryId][constants_1.consts.RDFS.label]; objProp[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY].objectProperties.push(borderCountryObj1); //////// Bail out if counterpart id is not in the system var foundBorderCountry = Object.values(globalStore_1.store.countries) .find(function (c) { return c['http://www.w3.org/2000/01/rdf-schema#label'].toLowerCase() === borderCountry.toLowerCase(); }); var borderCountryId = foundBorderCountry && foundBorderCountry['@id']; if (!borderCountryId) { return; } //////// var borderCountryObj2 = {}; borderCountryObj2[constants_1.consts.ONTOLOGY.HAS_COUNTRY] = { '@id': borderCountryId, '@type': constants_1.consts.ONTOLOGY.INST_COUNTRY }; borderCountryObj2[constants_1.consts.ONTOLOGY.HAS_COUNTRY][constants_1.consts.RDFS.label] = borderCountry; objProp[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY].objectProperties.push(borderCountryObj2); globalStore_1.store.borderCountries[bcId] = objProp[constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY]; } globalStore_1.store.countries[countryId].objectProperties.push(funktologies_1.entityRefMaker(constants_1.consts.ONTOLOGY.HAS_BORDER_COUNTRY, objProp)); } }); } }); } exports.getBorders = getBorders; ;