@slash-tmp/rgaa-scraper
Version:
Scrapes RGAA's website and fetch topics, criteria and tests into a JSON format.
57 lines • 2.21 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import Crawler from 'crawler';
import RgaaResultWrapper from './RgaaResultWrapper';
import { parseCriteriaArticle, parseTestLi, parseTopicA } from './parser';
const RGAA_URL = 'https://www.numerique.gouv.fr/publications/rgaa-accessibilite/methode-rgaa/criteres/';
function queueAsPromise(uri, crawler) {
return new Promise((resolve, reject) => {
crawler.direct({
uri,
callback(error, response) {
if (error) {
reject(error);
}
resolve(response);
},
});
});
}
function parseRgaaPage({ $ }) {
const criteria = $('#criteres article')
.toArray()
.map(el => parseCriteriaArticle($(el)));
const tests = $('#criteres li[id*="test"]')
.toArray()
.map(el => parseTestLi($(el)));
const topics = $('ol#topics-list li a')
.toArray()
.map(el => parseTopicA($(el)));
return {
criteria,
tests,
topics,
};
}
/**
* Scrapes the RGAA website and returns a promise to an object representing
* topics, criteria and tests.
*/
export default function scrapeRgaa() {
return __awaiter(this, void 0, void 0, function* () {
const crawler = new Crawler({});
// fetch RGAA page
const res = yield queueAsPromise(RGAA_URL, crawler);
// parse the page for criteria and tests
const data = parseRgaaPage(res);
return new RgaaResultWrapper(data);
});
}
//# sourceMappingURL=scrape.js.map