festivals-importer
Version:
259 lines (211 loc) • 5.96 kB
JavaScript
var fs = require('fs');
var async = require('async');
var request = require('request');
var cheerio = require('cheerio');
var moment = require('moment-timezone');
var startAtMapper = {
'Piątek': '2015-09-25',
'Sobota': '2015-09-26',
'Niedziela': '2015-09-27'
};
var categoryMapper = {
'Dunwich': 'Blok RPG',
'Salem': 'Blok literacki',
'Ikebukuro': 'Blok mangi i anime',
'Carcosa': 'Blok LARP',
'Niewidoczny Uniwersytet': 'Blok popularnonaukowy',
'Transylvania': 'Blok popkulturowy',
'Whitechapel': 'Blok popkulturowy',
'Ishimura': 'Blok komputerowy',
'Pracownia Pickmana': 'Sala Kreatywna',
'Sala turniejowa': 'Blok turniejowy'
};
var placesMapper = {
'Dunwich': 'Dunwich (113)',
'Salem': 'Salem (226)',
'Ikebukuro': 'Ikebukuro (127)',
'Carcosa': 'Carcosa (Piwnica)',
'Niewidoczny Uniwersytet': 'Niewidoczny Uniwersytet (213)',
'Transylvania': 'Transylvania (211)',
'Whitechapel': 'Whitechapel (212)',
'Ishimura': 'Ishimura (118)',
'Pracownia Pickmana': 'Pracownia Pickmana (66)',
'Laboratorium Frankensteina': 'Laboratorium Frankensteina (64)',
'Sala turniejowa': 'Sala turniejowa (68)',
'Blok dziecięcy': 'Sala dziecięca (132)',
'Elizjum': 'Elizjum (Aula)',
'Leng': 'Leng (Dziedziniec)'
};
var removeTags = function removeTags(val) {
var regex = /(<([^>]+)>)/ig;
return val.replace(regex, '');
};
var descriptionParser = function descriptionParser(html) {
html = removeTags(html);
html = html.trim();
return html;
};
var startAtParser = function startAtParser(val) {
if (val) {
for (var day in startAtMapper) {
val = val.replace(day, startAtMapper[day]);
}
if (val) {
val = val.replace(',', '');
return moment(val).toISOString();
}
}
return null;
};
var durationParser = function durationParser(val, minutes) {
var startAt = startAtParser(val);
const start = moment(startAt);
const endAt = moment(start).add(minutes, 'minutes');
return {
startAt: start.toISOString(),
finishAt: endAt.toISOString()
};
};
var entitiesParser = function nameParser(val) {
val = val.replace(/(<p>|<\/p>|<span.+?>|<\/span>|<p.+?\">)/g, '');
val = val.replace(/ /g, ' ');
val = val.replace(/&/g, '&');
val = val.replace(/ó/gi, 'ó');
val = val.replace(/'/g, "'");
val = val.replace(/…/g, '...');
val = val.replace(/(–|—)/g, '-');
val = val.replace(/("|“|”|„)/g, '"');
return val.trim();
};
var categoriesParser = function categoriesParser(val) {
val = val || '';
val = val.trim();
val = val.replace('(', '');
val = val.replace(')', '');
for (var cat in categoryMapper) {
val = val.replace(cat, categoryMapper[cat]);
}
var categories = [];
if (val) {
const category = {
parent: null,
name: val
};
categories.push(category);
}
return categories;
};
var placesParser = function placesParser(val) {
val = val || '';
val = val.trim();
val = val.replace('Sala: ', '');
val = val.replace('(', '');
val = val.replace(')', '');
val = val.trim();
for (var loc in placesMapper) {
val = val.replace(loc, placesMapper[loc]);
}
var places = [];
if (val) {
const place = {
parent: null,
name: val,
openingTimes: []
};
places.push(place);
}
return places;
};
var authorsParser = function authorsParser(val) {
val = val || '';
val = val.trim();
val = val.replace('Prowadzący: ', '');
val = val.trim();
var result = [];
if (val) {
var el = {
name: val,
organization: null
};
result.push(el);
}
return result;
};
var parseEvents = function parseEvents(body, callback) {
body = entitiesParser(body);
var $ = cheerio.load(body, {decodeEntities: false});
var html = $('div#events');
var events = [];
$('div.event', html).each(function (i, elem) {
var event = parseEvent($, elem);
if (event) {
events.push(event);
}
});
return callback(null, events);
};
var parseEvent = function parseEvent($, elem) {
var data = {
name: $('.event-name', elem).text().trim(),
description: '',
duration: null,
authors: [],
places: [],
categories: [],
metadata: [],
images: [],
tags: []
};
if ($(elem).parents().prev('h3').text()) {
data.duration = durationParser($(elem).parents().prev('h3').text(), 50);
}
data.categories = categoriesParser($('.event-line', elem).text());
//if ($('.room', elem).text()) {
// data.places = placesParser($('.room', elem).text());
//}
//else {
data.places = placesParser($('.event-line', elem).text());
//}
data.description = descriptionParser($('.event-description', elem).html());
data.authors = authorsParser($('.authors', elem).text());
if ($(elem).attr('id')) {
data.metadata.push($(elem).attr('id'));
}
return data;
};
var getEventsContent = function getEventsContent(callback) {
request('http://kapitularz.pl/program/pelen-program-festiwalu/', function (error, response, body) {
if (!error && response.statusCode == 200) {
return parseEvents(body, callback);
}
});
};
var importEvents = function importEvents(callback) {
return getEventsContent(callback);
};
var handle = function handle(data, callback) {
importEvents(function (err, results) {
return callback(null, {events: results});
});
};
module.exports = {
parseEvent: parseEvent,
parseEvents: parseEvents,
getEventsContent: getEventsContent,
importEvents: importEvents,
handle: handle
};
//
//importEvents(function (err, results) {
// fs.writeFile('kapitularz2015.json', JSON.stringify(results), function (err) {
// if (err) {
// throw err;
// }
// //
// // console.log('It\'s saved!');
// //console.log(err, results);
// console.dir(results, {depth: null});
// //console.dir(results[0], {depth: null});
// });
//
//});