UNPKG

factbook

Version:

Serves as an independent data scraping module, complete with ontology and full scraping ability for the CIA World Factbook site

60 lines (59 loc) 3.63 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var funktologies_1 = require("funktologies"); var htmlToText = require("html-to-text"); var getUuid = require("uuid-by-string"); var constants_1 = require("../constants/constants"); var globalStore_1 = require("../constants/globalStore"); function getSupplementalImages(cheerioElem, country, countryId) { var objectProperties = globalStore_1.store.countries[countryId].objectProperties; cheerioElem('div.item.photo-all').each(function (index, element) { var suppImages = objectProperties.filter(function (rel) { return rel[constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG]; }); var a = cheerioElem(element).find('img').attr('src'); var b = cheerioElem(element).find('img').attr('alt'); var c = cheerioElem(element).find(cheerioElem('div.carousel-photo-info .photoInfo .flag_description_text')); var imageProps = []; c.each(function () { imageProps.push(cheerioElem(element).text().trim()); }); b = b && htmlToText.fromString(b); var imgId; var suppImgUrl; if (a && a.replace('../', '')) { var cleanSrc = a.replace('../', ''); imgId = constants_1.consts.ONTOLOGY.INST_IMAGE + getUuid(cleanSrc); suppImgUrl = constants_1.consts.BASE.URL_BASE + cleanSrc; } if (suppImgUrl && !suppImages.some(function (img) { return img[constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG]['@id'].includes(imgId); })) { var objectProp = {}; if (globalStore_1.store.images[imgId]) { objectProp[constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG] = globalStore_1.store.images[imgId]; } else { objectProp = funktologies_1.entityMaker(constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG, constants_1.consts.ONTOLOGY.ONT_IMAGE, imgId, "Supplemental Image for " + country); globalStore_1.store.images[imgId] = objectProp[constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG]; } globalStore_1.store.countries[countryId].objectProperties.push(funktologies_1.entityRefMaker(constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG, objectProp)); var datatypeProp = {}; datatypeProp[constants_1.consts.ONTOLOGY.DT_LOCATOR_URI] = suppImgUrl; datatypeProp[constants_1.consts.ONTOLOGY.DT_CONTENT_DESCRIPTION] = b || null; datatypeProp[constants_1.consts.ONTOLOGY.DT_IMAGE_DIMENSIONS] = imageProps[0] || 'N/A'; datatypeProp[constants_1.consts.ONTOLOGY.DT_IMAGE_SIZE] = imageProps[1] || 'N/A'; objectProp[constants_1.consts.ONTOLOGY.HAS_SUPPLEMENTAL_IMG].datatypeProperties = datatypeProp; var pathSplit = suppImgUrl.split('/'); var fileName = pathSplit[pathSplit.length - 1].split('?')[0].toLowerCase(); datatypeProp[constants_1.consts.ONTOLOGY.DT_MIME_TYPE] = fileName.split('.')[1]; datatypeProp[constants_1.consts.ONTOLOGY.DT_COLLECTION_TIMESTAMP] = (new Date()).toISOString(); datatypeProp[constants_1.consts.ONTOLOGY.DT_CONTENTS] = fileName; var options = { dest: "dist/images/" + fileName, timeout: constants_1.consts.BASE.DATA_REQUEST_TIMEOUT, url: suppImgUrl }; globalStore_1.store.IMAGES_TO_SCRAPE.push({ fileName: fileName, options: options }); } }); } exports.getSupplementalImages = getSupplementalImages; ;