epha-robot
Version:
Fetching, cleaning, transforming of pharmaceutical data from public resources
166 lines (128 loc) • 4.86 kB
JavaScript
var fs = require("fs");
var pathResolve = require("path").resolve;
var splitter = require('xml-splitter');
var defaultLog = require("../").log;
var repairSubstances = require("./repairSubstances");
var repairAuthHolder = require('./repairAuthHolder');
var repairHTML = require('./repairHTML');
/**
*
* @param {object} cfg
* @param {{doing: Function, debug: Function}} log
* @returns {Promise}
*/
function parseKompendium(cfg, log) {
return new Promise(function (resolve, reject) {
var xs = new splitter('/medicalInformations/medicalInformation');
var items = fs.createWriteStream(cfg.release.catalog);
log = log || defaultLog;
items.on("error", reject);
items.write("[");
var done = 0;
var writtenFiles = {};
var filesPerAuthNr = {};
function writePart(data, tag, path)
{
if( done > 0 ) items.write(",");
var item =
{
title : data.title.$t,
type : data.type,
version : data.version,
lang :data.lang,
safetyRelevant : data.safetyRelevant, // "false" | "true"
authHolder : data.authHolder.$t,
substances : repairSubstances( data ),
};
if( data.atcCode ) item.atcCode = data.atcCode.$t;
if( data.sections && data.sections.section && data.sections.section.length )
{
item.sections = [];
data.sections.section.forEach( function( element ) {
item.sections.push( { id: element.id, label: element.title.$t} );
});
}
repairAuthHolder( data ).forEach( function( element )
{
var file = item.lang + "/" +item.type + "/" +element + ".htm";
var fullFile = cfg.release.dir + "/" + file;
item.authNrs = (item.authNrs) ? item.authNrs+ " "+element : element;
if (!Array.isArray(filesPerAuthNr[item.authNrs])) {
filesPerAuthNr[item.authNrs] = [];
}
filesPerAuthNr[item.authNrs].push(file);
if (!writtenFiles[fullFile]) {
log.doing( "Kompendium", "Files #", done++ );
fs.writeFile(fullFile, repairHTML( data ));
writtenFiles[fullFile] = true;
}
});
items.write( JSON.stringify( item ) );
}
xs.on('data', writePart );
xs.on("error", reject);
xs.on('end', function(counter)
{
items.on("finish", function()
{
// Memory free for grouping stuff
var liste = JSON.parse( fs.readFileSync(cfg.release.catalog));
// GROUPING ZULASSUNG AND Filtering
var zulassungen = Object.create( null );
liste.forEach( function( item )
{
zulassungen[ item.authNrs ] = zulassungen[ item.authNrs ] || Object.create(null);
zulassungen[ item.authNrs ].files = item.files;
// SPRACHE
zulassungen[ item.authNrs ][ item.lang ] = zulassungen[ item.authNrs ][ item.lang ] || Object.create(null);
// PATINFO & FACHINFO
zulassungen[ item.authNrs ][ item.lang ][ item.type ] = {
"produkt": item.title,
"substanz": item.substances,
"hersteller": item.authHolder,
"atc": item.atcCode,
"files": item.files
//,"sections":item.sections
};
});
var kompendium = {
"documents":[],
"_searchterms":["produkt", "substanz", "hersteller", "zulassung", "atc"],
"version": new Date().getTime()
};
for( var key in zulassungen )
{
var item = zulassungen[ key ];
var group = Object.create(null);
group.zulassung = key;
if (key === '53005') {
log.debug('53005!!!!!!!!!!!!!!!!!!');
}
var lang = [];
if( item.de ) lang.push( "de" );
if( item.fr ) lang.push( "fr" );
if( item.it ) lang.push( "it" );
group.lang = lang.join(" ");
var type = [];
if( item.de && item.de.fi ) type.push( "fi" );
if( item.de && item.de.pi ) type.push( "pi" );
group.type = type.join(" ");
group.produkt = ( item.de.fi ) ? item.de.fi.produkt : item.de.pi.produkt;
group.substanz = ( item.de.fi ) ? item.de.fi.substanz : item.de.pi.substanz;
group.hersteller = ( item.de.fi ) ? item.de.fi.hersteller : item.de.pi.hersteller;
group.atc = ( item.de.fi ) ? item.de.fi.atc : item.de.pi.atc;
group.files = filesPerAuthNr[key];
kompendium.documents.push( group );
}
log.debug("Kompendium", "Files in weird xml", { count:counter});
resolve( kompendium );
});
items.write("]");
items.end();
});
var streamIn = fs.createReadStream(cfg.download.zipFiles[0].dest, { start:3 });
xs.parseStream( streamIn );
});
}
module.exports = parseKompendium;
;