UNPKG

cia-world-leaders

Version:

Serves as an independent data scraping module, complete with ontology and full scraping ability for the CIA World Leaders site

119 lines (118 loc) 6.28 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var fs = require("graceful-fs"); var constants_1 = require("../constants/constants"); var globalStore_1 = require("../constants/globalStore"); function saveFile(fileName, storeName, context) { globalStore_1.store.debugLogger("--- Saving " + storeName + " in " + fileName + ".json"); // Normal JSON file. fs.writeFileSync("dist/json/" + fileName + ".json", JSON.stringify(globalStore_1.store[storeName])); globalStore_1.store.debugLogger("+++ Saved " + storeName + " in " + fileName + ".json"); // JSON-LD file construction. globalStore_1.store.jsonLD = []; // const jsonLD = { // '@context': context, // '@graph': [] // }; globalStore_1.store.debugLogger("--- Saving " + storeName + " in " + fileName + ".schema.jsonld"); var asAList = Object.values(globalStore_1.store[storeName]); var length = asAList.length; globalStore_1.store[storeName] = {}; globalStore_1.store.debugLogger("--- Making JsonLD List"); var _loop_1 = function (i) { var entity = asAList.pop(); if (!entity) { return "continue"; } // Grab the basic @id, @type, and rdfs label var mainObj = { '@id': entity['@id'], '@type': entity['@type'], 'http://www.w3.org/2000/01/rdf-schema#label': entity[constants_1.consts.RDFS.label] }; // Pull datatype properties out of their singleton object and make them direct props. var dataProps = entity.datatypeProperties; Object.keys(dataProps).forEach(function (key2) { mainObj[key2] = dataProps[key2]; }); // Pull out object properties, and make them direct properties but with array groups for multiples. var objectProps = entity.objectProperties; objectProps.forEach(function (objP) { // Should be one key per object var key = Object.keys(objP)[0]; if (mainObj[key]) { if (Array.isArray(mainObj[key])) { mainObj[key].push(objP[key]); } else { mainObj[key] = [mainObj[key], objP[key]]; } } else { mainObj[key] = objP[key]; } }); // Add it to the graph that belongs to this entity type. // jsonLD['@graph'].push(mainObj); globalStore_1.store.jsonLD.push(mainObj); }; for (var i = 0; i < length; i++) { _loop_1(i); } ; fs.writeFileSync("dist/jsonld/" + fileName + ".schema.jsonld", JSON.stringify(globalStore_1.store.jsonLD)); globalStore_1.store.debugLogger("+++ Saved " + storeName + " in " + fileName + ".schema.jsonld"); globalStore_1.store.debugLogger("~~~ Converting jsonld to n-triples"); convertJsonldToNTriples(); globalStore_1.store.debugLogger("~~~ Saving " + storeName + " to " + fileName + ".schema.nt"); fs.writeFileSync("dist/n-triples/" + fileName + ".schema.nt", globalStore_1.store.jsonNT); globalStore_1.store.jsonNT = ''; } exports.saveFile = saveFile; ; function convertJsonldToNTriples() { var length = globalStore_1.store.jsonLD.length; var _loop_2 = function (i) { var entity = globalStore_1.store.jsonLD.pop(); if (entity) { var mainId_1 = entity['@id']; var mainLabel = entity['http://www.w3.org/2000/01/rdf-schema#label']; var mainType = entity['@type']; globalStore_1.store.jsonNT += "<" + mainId_1 + "> <http://www.w3.org/2000/01/rdf-schema#label> " + JSON.stringify(mainLabel) + " .\n"; globalStore_1.store.jsonNT += "<" + mainId_1 + "> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" + mainType + "> .\n"; Object.entries(entity).forEach(function (entry) { if (['@id', '@type', 'http://www.w3.org/2000/01/rdf-schema#label'].includes(entry[0])) { // Taken care of already. } else if (Array.isArray(entry[1])) { entry[1].forEach(function (innerEntry) { globalStore_1.store.jsonNT += "<" + mainId_1 + "> <" + entry[0] + "> <" + innerEntry['@id'] + "> .\n"; globalStore_1.store.jsonNT += "<" + innerEntry['@id'] + "> <http://www.w3.org/2000/01/rdf-schema#label> " + JSON.stringify(innerEntry['http://www.w3.org/2000/01/rdf-schema#label']) + " .\n"; globalStore_1.store.jsonNT += "<" + innerEntry['@id'] + "> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" + innerEntry['@type'] + "> .\n"; }); } else if (entry[1] && typeof entry[1] === 'object') { globalStore_1.store.jsonNT += "<" + mainId_1 + "> <" + entry[0] + "> <" + entry[1]['@id'] + "> .\n"; globalStore_1.store.jsonNT += "<" + entry[1]['@id'] + "> <http://www.w3.org/2000/01/rdf-schema#label> " + JSON.stringify(entry[1]['http://www.w3.org/2000/01/rdf-schema#label']) + " .\n"; globalStore_1.store.jsonNT += "<" + entry[1]['@id'] + "> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" + entry[1]['@type'] + "> .\n"; } else { var val = JSON.stringify(entry[1]); if (val.split('"').length > 1) { globalStore_1.store.jsonNT += "<" + mainId_1 + "> <" + entry[0] + "> " + val + "^^<http://www.w3.org/2001/XMLSchema#string> .\n"; } else if (val.split('.').length > 1) { globalStore_1.store.jsonNT += "<" + mainId_1 + "> <" + entry[0] + "> \"" + val + "\"^^<http://www.w3.org/2001/XMLSchema#double> .\n"; } else { globalStore_1.store.jsonNT += "<" + mainId_1 + "> <" + entry[0] + "> \"" + val + "\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n"; } } }); } }; for (var i = 0; i < length; i++) { _loop_2(i); } } ;