festivals-importer
Version:
223 lines (176 loc) • 4.47 kB
JavaScript
var async = require('async');
var request = require('request');
var cheerio = require('cheerio');
var moment = require('moment-timezone');
var dataMapper = {
termin: 'startAt',
'czas trwania': 'minutes',
blok: 'categories',
miejsce: 'places'
};
var startAtMapper = {
czw: '2015-08-20',
pt: '2015-08-21',
sob: '2015-08-22',
nd: '2015-08-23'
};
var startAtParser = function startAtParser(val) {
if (val) {
val = val.replace(' ', ' ');
for (var day in startAtMapper) {
val = val.replace(day, startAtMapper[day]);
}
return moment(val).toISOString();
}
return null;
};
var minutesParser = function minutesParser(val) {
var time = moment(val, 'HH:mm min');
return time.hours() * 60 + time.minutes();
};
var durationParser = function durationParser(startAt, minutes) {
const start = moment(startAt);
const endAt = moment(start).add(minutes, 'minutes');
return {
startAt: start.toISOString(),
finishAt: endAt.toISOString()
};
};
var categoriesParser = function categoriesParser(val) {
const category = {
parent: null,
name: val
};
return [category];
};
var placesParser = function placesParser(val) {
var places = [];
if (val) {
const place = {
parent: null,
name: val,
openingTimes: []
};
places.push(place);
}
return places;
};
var authorsParser = function authorsParser(val) {
var result = [];
if (val) {
var re = /(, | i | oraz )/;
var authors = val.split(re).filter(function (el) {
return el !== ', ' && el !== ' i ' && el !== ' oraz ';
});
for (var i in authors) {
var author = authors[i];
var organization = null;
var split = author.split(' - ');
if (split.length > 1) {
author = split[1];
organization = split[0];
}
var el = {
name: author.trim(),
organization: organization
};
result.push(el);
}
}
return result;
};
var parseEvent = function parseEvent(id, body, callback) {
var $ = cheerio.load(body);
var data = {
name: $('h1', 'div.content').text(),
description: '',
duration: null,
authors: [],
places: [],
categories: [],
metadata: [],
images: [],
tags: [id + '']
};
var authors = $('strong', 'div.content').text();
data.authors = authorsParser(authors);
var children = $('p', 'div.content').children();
children.each(function (i, elem) {
var text = $(this).text();
text = text.replace(':', '').trim();
var val = '';
var el = $(this)[0];
if (el.hasOwnProperty('next') && el.next && el.next.hasOwnProperty('data')) {
val = el.next.data;
}
if (undefined === val) {
val = '';
}
val = val.trim();
if (!val) {
val = null;
}
if (text) {
var key = dataMapper[text];
switch (key) {
case 'startAt':
val = startAtParser(val);
break;
case 'minutes':
val = minutesParser(val);
break;
case 'categories':
val = categoriesParser(val);
break;
case 'places':
val = placesParser(val);
break;
}
data[key] = val;
}
});
var prev = children.last()[0].prev;
if (prev.prev.prev.data) {
data.description = prev.prev.prev.data.trim();
}
if (prev.prev.data) {
data.description += prev.prev.data.trim();
}
if (prev.data) {
data.description += prev.data.trim();
}
if (data.startAt && data.minutes) {
data.duration = durationParser(data.startAt, data.minutes);
}
return callback(null, data);
};
var getEventContent = function getEventContent(id, callback) {
request('http://polcon2015.org/index.php?go2=event&id=' + id, function (error, response, body) {
if (!error && response.statusCode == 200) {
return parseEvent(id, body, callback);
}
});
};
var importEvents = function importEvents(ids, callback) {
async.map(ids, getEventContent, callback);
};
Array.range = function (a, b, step) {
var A = [];
A[0] = a;
step = step || 1;
while (a + step <= b) {
A[A.length] = a += step;
}
return A;
};
var handle = function handle(data, callback) {
importEvents(Array.range(1, 522), function (err, results) {
return callback(null, {events: results});
});
};
module.exports = {
parseEvent: parseEvent,
getEventContent: getEventContent,
importEvents: importEvents,
handle: handle
};