UNPKG

pageviews

Version:

A lightweight JavaScript client library for the Wikimedia Pageviews API for Wikipedia and various of its sister projects for Node.js and the browser.

652 lines (620 loc) 22.3 kB
/** * @license * Copyright 2017 Thomas Steiner (@tomayac). All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ var request; var USER_AGENT = 'pageviews.js'; // Dynamically adapt to the runtime environment var environment = typeof window === 'undefined' ? 'node' : 'browser'; if (environment === 'node') { // Node.js request = require('request'); var packageJson = require('./package.json'); // The user agent to use USER_AGENT = 'pageviews.js-v' + packageJson.version + ' (' + packageJson.repository.url + ')'; } else { // Browser request = function(options, callback) { var xhr = new XMLHttpRequest(); xhr.addEventListener('load', function() { return callback(null, {statusCode: this.status}, this.responseText); }); xhr.addEventListener('error', function(e) { return callback(e); }); xhr.open('GET', options.url); xhr.send(); }; } var pageviews = (function() { // The Pageviews base URL var BASE_URL = 'https://wikimedia.org/api/rest_v1'; var _access = { default: 'all-access', allowed: ['all-access', 'desktop', 'mobile-web', 'mobile-app'] }; var _accessSite = { default: 'all-sites', allowed: ['all-sites', 'desktop-site', 'mobile-site', 'all-access'] }; var _agent = { default: 'all-agents', allowed: ['all-agents', 'user', 'spider', 'bot'] }; var _granularityAggregated = { default: 'hourly', allowed: ['daily', 'hourly', 'monthly'] }; var _granularityPerArticle = { default: 'daily', allowed: ['daily', 'monthly'] }; var _granularityUniques = { default: 'daily', allowed: ['daily', 'monthly'] }; /** * Checks the input parameters for validity. */ var _checkParams = function(params, caller) { var pad = function(d) { return d < 10 ? '0' + d : d.toString(); }; if (!params) { return new Error('Required parameters missing.'); } // Required: project or projects if ((!params.project) && (!params.projects)) { if ((caller === 'getAggregatedPageviews') || (caller === 'getTopPageviews') || (caller === 'getTopPageviewsByCountry') || (caller === 'getAggregatedLegacyPagecounts')) { return new Error('Required parameter "project" or "projects" missing.'); } else { return new Error('Required parameter "project" missing.'); } } if (params.project) { if ((params.project !== 'all-projects') && (params.project !== 'wikidata') && (params.project.indexOf('.') === -1)) { return new Error('Required parameter "project" invalid.'); } } if ((caller === 'getAggregatedPageviews') || (caller === 'getAggregatedLegacyPagecounts') || (caller === 'getTopPageviews') || (caller === 'getTopPageviewsByCountry')) { if (params.projects && params.projects !== 'all-projects') { if ((!Array.isArray(params.projects)) || (!params.projects.length) || (params.projects.filter(function(project) { return project.indexOf('.') === -1 && project !== 'all-projects' && project !== 'wikidata'; }).length) ) { return new Error('Required parameter "projects" invalid.'); } } } // Required: article or articles if (caller === 'getPerArticlePageviews') { if ((!params.article) && (!params.articles)) { return new Error('Required parameter "article" or "articles" missing.'); } if (params.articles) { if ((!Array.isArray(params.articles)) || (!params.articles.length)) { return new Error('Required parameter "articles" invalid.'); } } } if (caller === 'getPerArticlePageviews' || caller === 'getUniqueDevices') { // Required: start if (!params.start) { return new Error('Required parameter "start" missing.'); } params.start = typeof params.start === 'object' ? (params.start.getUTCFullYear() + (pad(params.start.getUTCMonth() + 1)) + (pad(params.start.getUTCDate()))) : params.start; if (!/^(?:19|20)\d\d[- /.]?(?:0[1-9]|1[012])[- /.]?(?:0[1-9]|[12][0-9]|3[01])$/.test(params.start)) { return new Error('Required parameter "start" invalid.'); } // Required: end if (!params.end) { return new Error('Required parameter "end" missing.'); } params.end = typeof params.end === 'object' ? (params.end.getUTCFullYear() + (pad(params.end.getUTCMonth() + 1)) + pad(params.end.getUTCDate())) : params.end; if (!/^(19|20)\d\d[- /.]?(0[1-9]|1[012])[- /.]?(0[1-9]|[12][0-9]|3[01])$/.test(params.end)) { return new Error('Required parameter "end" invalid.'); } } else if ((caller === 'getAggregatedPageviews') || (caller === 'getAggregatedLegacyPagecounts')) { // Required: start if (!params.start) { return new Error('Required parameter "start" missing.'); } params.start = typeof params.start === 'object' ? (params.start.getUTCFullYear() + (pad(params.start.getUTCMonth() + 1)) + (pad(params.start.getUTCDate()) + pad(params.start.getUTCHours()))) : params.start; if (!/^(?:19|20)\d\d[- /.]?(?:0[1-9]|1[012])[- /.]?(?:0[1-9]|[12][0-9]|3[01])[- /.]?(?:[012][0-9])$/.test(params.start)) { return new Error('Required parameter "start" missing or invalid.'); } // Required: end if (!params.end) { return new Error('Required parameter "end" missing.'); } params.end = typeof params.end === 'object' ? (params.end.getUTCFullYear() + (pad(params.end.getUTCMonth() + 1)) + pad(params.end.getUTCDate()) + pad(params.end.getUTCHours())) : params.end; if (!/^(19|20)\d\d[- /.]?(0[1-9]|1[012])[- /.]?(0[1-9]|[12][0-9]|3[01])[- /.]?(?:[012][0-9])$/.test(params.end)) { return new Error('Required parameter "end" missing or invalid.'); } } if (caller === 'getTopPageviewsByCountry') { // Required: year if ((!params.year) || (!/^(?:19|20)\d\d$/.test(params.year))) { return new Error('Required parameter "year" missing or invalid.'); } // Required: month if ((!params.month) || (!/^(?:0?[1-9]|1[012])$/.test(params.month))) { return new Error('Required parameter "month" missing or invalid.'); } // Optional: access if ((params.access) && (_access.allowed.indexOf(params.access) === -1)) { return new Error('Invalid optional parameter "access".'); } } if (caller === 'getTopPageviews') { if (params.date) { params.date = typeof params.date === 'object' ? params.date : new Date( params.date.substr(0, 4) + '-' + params.date.substr(4, 2) + '-' + params.date.substr(6, 2)); params.year = params.date.getUTCFullYear(); params.month = pad(params.date.getUTCMonth() + 1); params.day = pad(params.date.getUTCDate()); } // Required: year if ((!params.year) || (!/^(?:19|20)\d\d$/.test(params.year))) { return new Error('Required parameter "year" missing or invalid.'); } // Required: month if ((!params.month) || (!/^(?:0?[1-9]|1[012])$/.test(params.month))) { return new Error('Required parameter "month" missing or invalid.'); } // Required: day if ((!params.day) || (!/^(?:0?[1-9]|[12][0-9]|3[01]|all-days)$/.test(params.day))) { return new Error('Required parameter "day" missing or invalid.'); } if ((params.limit) && !/^\d+$/.test(params.limit) && (0 < params.limit) && (params.limit <= 1000)) { return new Error('Invalid optional parameter "limit".'); } } // Optional: access if ((params.access) && (_access.allowed.indexOf(params.access) === -1)) { return new Error('Invalid optional parameter "access".'); } // Optional: accessSite if ((params.accessSite) && (_accessSite.allowed.indexOf(params.accessSite) === -1)) { return new Error('Invalid optional parameter "accessSite".'); } // Optional: agent if ((params.agent) && (_agent.allowed.indexOf(params.agent) === -1)) { return new Error('Invalid optional parameter "agent".'); } // Optional: granularity if (params.granularity) { if ((caller === 'getAggregatedPageviews') || (caller === 'getAggregatedLegacyPagecounts')) { if (_granularityAggregated.allowed.indexOf(params.granularity) === -1) { return new Error('Invalid optional parameter "granularity".'); } } else if (caller === 'getPerArticlePageviews') { if (_granularityPerArticle.allowed.indexOf(params.granularity) === -1) { return new Error('Invalid optional parameter "granularity".'); } } else if (caller === 'getUniqueDevices') { if (_granularityUniques.allowed.indexOf(params.granularity) === -1) { return new Error('Invalid optional parameter "granularity".'); } } } return params; }; /** * Checks the results for validity, in case of success returns the parsed * data, else returns the error details. */ var _checkResult = function(error, response, body) { var data; if (error || response.statusCode !== 200) { if (error) { return error; } if (response.statusCode === 404) { try { data = JSON.parse(body); return new Error(data.detail || data.title); } catch (e) { return new Error(e); } } return new Error('Status code ' + response.statusCode); } try { data = JSON.parse(body); } catch (e) { return new Error(e); } return data; }; var _getPerArticlePageviews = function(params) { return new Promise(function(resolve, reject) { params = _checkParams(params, 'getPerArticlePageviews'); if (params.stack) { return reject(params); } // Call yourself recursively in case of multiple articles if (params.articles) { var promises = []; params.articles.map(function(article, i) { var newParams = params; delete newParams.articles; newParams.article = article; promises[i] = _getPerArticlePageviews(newParams); }); return resolve(Promise.all(promises)); } // Required params var project = params.project; var article = encodeURIComponent(params.article.replace(/\s/g, '_')); var start = params.start; var end = params.end; // Optional params var access = params.access ? params.access : _access.default; var agent = params.agent ? params.agent : _agent.default; var granularity = params.granularity ? params.granularity : _granularityPerArticle.default; var options = { url: BASE_URL + '/metrics/pageviews/per-article' + '/' + project + '/' + access + '/' + agent + '/' + article + '/' + granularity + '/' + start + '/' + end, headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } return resolve(result); }); }); }; var _getAggregatedPageviews = function(params) { return new Promise(function(resolve, reject) { params = _checkParams(params, 'getAggregatedPageviews'); if (params.stack) { return reject(params); } if (params.projects === 'all-projects') { params.projects = null; params.project = 'all-projects'; } // Call yourself recursively in case of multiple projects if (params.projects) { var promises = []; params.projects.map(function(project, i) { var newParams = params; delete newParams.projects; newParams.project = project; promises[i] = _getAggregatedPageviews(newParams); }); return resolve(Promise.all(promises)); } // Required params var project = params.project; var start = params.start; var end = params.end; // Optional params var access = params.access ? params.access : _access.default; var agent = params.agent ? params.agent : _agent.default; var granularity = params.granularity ? params.granularity : _granularityAggregated.default; var options = { url: BASE_URL + '/metrics/pageviews/aggregate' + '/' + project + '/' + access + '/' + agent + '/' + granularity + '/' + start + '/' + end, headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } return resolve(result); }); }); }; var _getAggregatedLegacyPagecounts = function(params) { return new Promise(function(resolve, reject) { params = _checkParams(params, 'getAggregatedLegacyPagecounts'); if (params.stack) { return reject(params); } if (params.projects === 'all-projects') { params.projects = null; params.project = 'all-projects'; } // Call yourself recursively in case of multiple projects if (params.projects) { var promises = []; params.projects.map(function(project, i) { var newParams = params; delete newParams.projects; newParams.project = project; promises[i] = _getAggregatedLegacyPagecounts(newParams); }); return resolve(Promise.all(promises)); } // Required params var project = params.project; var start = params.start; var end = params.end; // Optional params var accessSite = params.accessSite ? params.accessSite : _accessSite.default; var granularity = params.granularity ? params.granularity : _granularityAggregated.default; var options = { url: BASE_URL + '/metrics/legacy/pagecounts/aggregate' + '/' + project + '/' + accessSite + '/' + granularity + '/' + start + '/' + end, headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } return resolve(result); }); }); }; var _getTopPageviews = function(params) { return new Promise(function(resolve, reject) { params = _checkParams(params, 'getTopPageviews'); if (params.stack) { return reject(params); } // Call yourself recursively in case of multiple projects if (params.projects) { var promises = []; params.projects.map(function(project, i) { var newParams = params; delete newParams.projects; newParams.project = project; promises[i] = _getTopPageviews(newParams); }); return resolve(Promise.all(promises)); } // Required params var project = params.project; var year = params.year; var month = typeof params.month === 'number' && params.month < 10 ? '0' + params.month : params.month; var day = typeof params.day === 'number' && params.day < 10 ? '0' + params.day : params.day; // Optional params var limit = params.limit || false; var access = params.access ? params.access : _access.default; var options = { url: BASE_URL + '/metrics/pageviews/top' + '/' + project + '/' + access + '/' + year + '/' + month + '/' + day, headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } if (limit) { result.items[0].articles = result.items[0].articles.slice(0, limit); } return resolve(result); }); }); }; var _getTopPageviewsByCountry = function(params) { return new Promise(function(resolve, reject) { params = _checkParams(params, 'getTopPageviewsByCountry'); if (params.stack) { return reject(params); } // Call yourself recursively in case of multiple projects if (params.projects) { var promises = []; params.projects.map(function(project, i) { var newParams = params; delete newParams.projects; newParams.project = project; promises[i] = _getTopPageviewsByCountry(newParams); }); return resolve(Promise.all(promises)); } // Required params var project = params.project; var year = params.year; var month = typeof params.month === 'number' && params.month < 10 ? '0' + params.month : params.month; // Optional params var access = params.access ? params.access : _access.default; var options = { url: BASE_URL + '/metrics/pageviews/top-by-country' + '/' + project + '/' + access + '/' + year + '/' + month, headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } return resolve(result); }); }); }; var _getPageviewsDimensions = function() { return new Promise(function(resolve, reject) { var options = { url: BASE_URL + '/metrics/pageviews/', headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } return resolve(result); }); }); }; var _getUniqueDevices = function(params) { return new Promise(function(resolve, reject) { params = _checkParams(params, 'getUniqueDevices'); if (params.stack) { return reject(params); } // Required params var project = params.project; var start = params.start; var end = params.end; // Optional params var accessSite = params.accessSite ? params.accessSite : _accessSite.default; var granularity = params.granularity ? params.granularity : _granularityUniques.default; var options = { url: BASE_URL + '/metrics/unique-devices' + '/' + project + '/' + accessSite + '/' + granularity + '/' + start + '/' + end, headers: { 'User-Agent': USER_AGENT } }; request(options, function(error, response, body) { var result = _checkResult(error, response, body); if (result.stack) { return reject(result); } return resolve(result); }); }); }; return { /** * This is the root of all pageview data endpoints. The list of paths that * this returns includes ways to query by article, project, top articles, * etc. If browsing the interactive documentation, see the specifics for * each endpoint below. */ getPageviewsDimensions: _getPageviewsDimensions, /** * Given a Mediawiki article and a date range, returns a daily timeseries of * its pageview counts. You can also filter by access method and/or agent * type. */ getPerArticlePageviews: _getPerArticlePageviews, /** * Given a date range, returns a timeseries of pageview counts. You can * filter by project, access method and/or agent type. You can choose * between daily and hourly granularity as well. */ getAggregatedPageviews: _getAggregatedPageviews, /** * Given a date range between December 2007 and August 2016, * returns a timeseries of pageview counts. You can filter by * project and access method. You can choose between daily, * hourly and monthly granularity as well. */ getAggregatedLegacyPagecounts: _getAggregatedLegacyPagecounts, /** * Lists the 1000 most viewed articles for a given project and timespan * (year, month or day). You can filter by access method. */ getTopPageviews: _getTopPageviews, /** * Lists the pageviews to this project, split by country of origin for a * given month. Because of privacy reasons, pageviews are given in a * bucketed format, and countries with less than 100 views do not get * reported. */ getTopPageviewsByCountry: _getTopPageviewsByCountry, /** * Given a project and a date range, returns a timeseries of unique devices * counts. You can filter by access site and choose between daily and * monthly granularity. */ getUniqueDevices: _getUniqueDevices }; })(); if (environment === 'node') { module.exports = pageviews; }