UNPKG

@slash-tmp/rgaa-scraper

Version:

Scrapes RGAA's website and fetch topics, criteria and tests into a JSON format.

63 lines 2.5 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const crawler_1 = __importDefault(require("crawler")); const RgaaResultWrapper_1 = __importDefault(require("./RgaaResultWrapper")); const parser_1 = require("./parser"); const RGAA_URL = 'https://www.numerique.gouv.fr/publications/rgaa-accessibilite/methode-rgaa/criteres/'; function queueAsPromise(uri, crawler) { return new Promise((resolve, reject) => { crawler.direct({ uri, callback(error, response) { if (error) { reject(error); } resolve(response); }, }); }); } function parseRgaaPage({ $ }) { const criteria = $('#criteres article') .toArray() .map(el => parser_1.parseCriteriaArticle($(el))); const tests = $('#criteres li[id*="test"]') .toArray() .map(el => parser_1.parseTestLi($(el))); const topics = $('ol#topics-list li a') .toArray() .map(el => parser_1.parseTopicA($(el))); return { criteria, tests, topics, }; } /** * Scrapes the RGAA website and returns a promise to an object representing * topics, criteria and tests. */ function scrapeRgaa() { return __awaiter(this, void 0, void 0, function* () { const crawler = new crawler_1.default({}); // fetch RGAA page const res = yield queueAsPromise(RGAA_URL, crawler); // parse the page for criteria and tests const data = parseRgaaPage(res); return new RgaaResultWrapper_1.default(data); }); } exports.default = scrapeRgaa; //# sourceMappingURL=scrape.js.map