UNPKG

@slash-tmp/rgaa-scraper

Version:

Scrapes RGAA's website and fetch topics, criteria and tests into a JSON format.

57 lines 2.21 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import Crawler from 'crawler'; import RgaaResultWrapper from './RgaaResultWrapper'; import { parseCriteriaArticle, parseTestLi, parseTopicA } from './parser'; const RGAA_URL = 'https://www.numerique.gouv.fr/publications/rgaa-accessibilite/methode-rgaa/criteres/'; function queueAsPromise(uri, crawler) { return new Promise((resolve, reject) => { crawler.direct({ uri, callback(error, response) { if (error) { reject(error); } resolve(response); }, }); }); } function parseRgaaPage({ $ }) { const criteria = $('#criteres article') .toArray() .map(el => parseCriteriaArticle($(el))); const tests = $('#criteres li[id*="test"]') .toArray() .map(el => parseTestLi($(el))); const topics = $('ol#topics-list li a') .toArray() .map(el => parseTopicA($(el))); return { criteria, tests, topics, }; } /** * Scrapes the RGAA website and returns a promise to an object representing * topics, criteria and tests. */ export default function scrapeRgaa() { return __awaiter(this, void 0, void 0, function* () { const crawler = new Crawler({}); // fetch RGAA page const res = yield queueAsPromise(RGAA_URL, crawler); // parse the page for criteria and tests const data = parseRgaaPage(res); return new RgaaResultWrapper(data); }); } //# sourceMappingURL=scrape.js.map