UNPKG

pouchdb-mapreduce

Version:

PouchDB's map/reduce query API as a plugin.

933 lines (848 loc) 27.3 kB
import { flatten, guardedConsole, nextTick } from 'pouchdb-utils'; import { base64StringToBlobOrBuffer as b64ToBluffer } from 'pouchdb-binary-utils'; import { collate, toIndexableString, normalizeKey, parseIndexableString } from 'pouchdb-collate'; import TaskQueue from './taskqueue'; import createView from './createView'; import evalFunction from './evalFunction'; import { callbackify, sequentialize, uniq, fin, promisedCallback, mapToKeysArray } from 'pouchdb-mapreduce-utils'; import { QueryParseError, NotFoundError, BuiltInError } from './errors'; import { Map, Set } from 'pouchdb-collections'; import Promise from 'pouchdb-promise'; import sum from './sum'; var persistentQueues = {}; var tempViewQueue = new TaskQueue(); var CHANGES_BATCH_SIZE = 50; function parseViewName(name) { // can be either 'ddocname/viewname' or just 'viewname' // (where the ddoc name is the same) return name.indexOf('/') === -1 ? [name, name] : name.split('/'); } function isGenOne(changes) { // only return true if the current change is 1- // and there are no other leafs return changes.length === 1 && /^1-/.test(changes[0].rev); } function emitError(db, e) { try { db.emit('error', e); } catch (err) { guardedConsole('error', 'The user\'s map/reduce function threw an uncaught error.\n' + 'You can debug this error by doing:\n' + 'myDatabase.on(\'error\', function (err) { debugger; });\n' + 'Please double-check your map/reduce function.'); guardedConsole('error', e); } } function tryMap(db, fun, doc) { // emit an event if there was an error thrown by a map function. // putting try/catches in a single function also avoids deoptimizations. try { fun(doc); } catch (e) { emitError(db, e); } } function tryReduce(db, fun, keys, values, rereduce) { // same as above, but returning the result or an error. there are two separate // functions to avoid extra memory allocations since the tryCode() case is used // for custom map functions (common) vs this function, which is only used for // custom reduce functions (rare) try { return {output : fun(keys, values, rereduce)}; } catch (e) { emitError(db, e); return {error: e}; } } function sortByKeyThenValue(x, y) { var keyCompare = collate(x.key, y.key); return keyCompare !== 0 ? keyCompare : collate(x.value, y.value); } function sliceResults(results, limit, skip) { skip = skip || 0; if (typeof limit === 'number') { return results.slice(skip, limit + skip); } else if (skip > 0) { return results.slice(skip); } return results; } function rowToDocId(row) { var val = row.value; // Users can explicitly specify a joined doc _id, or it // defaults to the doc _id that emitted the key/value. var docId = (val && typeof val === 'object' && val._id) || row.id; return docId; } function readAttachmentsAsBlobOrBuffer(res) { res.rows.forEach(function (row) { var atts = row.doc && row.doc._attachments; if (!atts) { return; } Object.keys(atts).forEach(function (filename) { var att = atts[filename]; atts[filename].data = b64ToBluffer(att.data, att.content_type); }); }); } function postprocessAttachments(opts) { return function (res) { if (opts.include_docs && opts.attachments && opts.binary) { readAttachmentsAsBlobOrBuffer(res); } return res; }; } var builtInReduce = { _sum: function (keys, values) { return sum(values); }, _count: function (keys, values) { return values.length; }, _stats: function (keys, values) { // no need to implement rereduce=true, because Pouch // will never call it function sumsqr(values) { var _sumsqr = 0; for (var i = 0, len = values.length; i < len; i++) { var num = values[i]; _sumsqr += (num * num); } return _sumsqr; } return { sum : sum(values), min : Math.min.apply(null, values), max : Math.max.apply(null, values), count : values.length, sumsqr : sumsqr(values) }; } }; function addHttpParam(paramName, opts, params, asJson) { // add an http param from opts to params, optionally json-encoded var val = opts[paramName]; if (typeof val !== 'undefined') { if (asJson) { val = encodeURIComponent(JSON.stringify(val)); } params.push(paramName + '=' + val); } } function coerceInteger(integerCandidate) { if (typeof integerCandidate !== 'undefined') { var asNumber = Number(integerCandidate); // prevents e.g. '1foo' or '1.1' being coerced to 1 if (!isNaN(asNumber) && asNumber === parseInt(integerCandidate, 10)) { return asNumber; } else { return integerCandidate; } } } function coerceOptions(opts) { opts.group_level = coerceInteger(opts.group_level); opts.limit = coerceInteger(opts.limit); opts.skip = coerceInteger(opts.skip); return opts; } function checkPositiveInteger(number) { if (number) { if (typeof number !== 'number') { return new QueryParseError('Invalid value for integer: "' + number + '"'); } if (number < 0) { return new QueryParseError('Invalid value for positive integer: ' + '"' + number + '"'); } } } function checkQueryParseError(options, fun) { var startkeyName = options.descending ? 'endkey' : 'startkey'; var endkeyName = options.descending ? 'startkey' : 'endkey'; if (typeof options[startkeyName] !== 'undefined' && typeof options[endkeyName] !== 'undefined' && collate(options[startkeyName], options[endkeyName]) > 0) { throw new QueryParseError('No rows can match your key range, ' + 'reverse your start_key and end_key or set {descending : true}'); } else if (fun.reduce && options.reduce !== false) { if (options.include_docs) { throw new QueryParseError('{include_docs:true} is invalid for reduce'); } else if (options.keys && options.keys.length > 1 && !options.group && !options.group_level) { throw new QueryParseError('Multi-key fetches for reduce views must use ' + '{group: true}'); } } ['group_level', 'limit', 'skip'].forEach(function (optionName) { var error = checkPositiveInteger(options[optionName]); if (error) { throw error; } }); } function httpQuery(db, fun, opts) { // List of parameters to add to the PUT request var params = []; var body; var method = 'GET'; // If opts.reduce exists and is defined, then add it to the list // of parameters. // If reduce=false then the results are that of only the map function // not the final result of map and reduce. addHttpParam('reduce', opts, params); addHttpParam('include_docs', opts, params); addHttpParam('attachments', opts, params); addHttpParam('limit', opts, params); addHttpParam('descending', opts, params); addHttpParam('group', opts, params); addHttpParam('group_level', opts, params); addHttpParam('skip', opts, params); addHttpParam('stale', opts, params); addHttpParam('conflicts', opts, params); addHttpParam('startkey', opts, params, true); addHttpParam('start_key', opts, params, true); addHttpParam('endkey', opts, params, true); addHttpParam('end_key', opts, params, true); addHttpParam('inclusive_end', opts, params); addHttpParam('key', opts, params, true); // Format the list of parameters into a valid URI query string params = params.join('&'); params = params === '' ? '' : '?' + params; // If keys are supplied, issue a POST to circumvent GET query string limits // see http://wiki.apache.org/couchdb/HTTP_view_API#Querying_Options if (typeof opts.keys !== 'undefined') { var MAX_URL_LENGTH = 2000; // according to http://stackoverflow.com/a/417184/680742, // the de facto URL length limit is 2000 characters var keysAsString = 'keys=' + encodeURIComponent(JSON.stringify(opts.keys)); if (keysAsString.length + params.length + 1 <= MAX_URL_LENGTH) { // If the keys are short enough, do a GET. we do this to work around // Safari not understanding 304s on POSTs (see pouchdb/pouchdb#1239) params += (params[0] === '?' ? '&' : '?') + keysAsString; } else { method = 'POST'; if (typeof fun === 'string') { body = {keys: opts.keys}; } else { // fun is {map : mapfun}, so append to this fun.keys = opts.keys; } } } // We are referencing a query defined in the design doc if (typeof fun === 'string') { var parts = parseViewName(fun); return db.request({ method: method, url: '_design/' + parts[0] + '/_view/' + parts[1] + params, body: body }).then(postprocessAttachments(opts)); } // We are using a temporary view, terrible for performance, good for testing body = body || {}; Object.keys(fun).forEach(function (key) { if (Array.isArray(fun[key])) { body[key] = fun[key]; } else { body[key] = fun[key].toString(); } }); return db.request({ method: 'POST', url: '_temp_view' + params, body: body }).then(postprocessAttachments(opts)); } // custom adapters can define their own api._query // and override the default behavior /* istanbul ignore next */ function customQuery(db, fun, opts) { return new Promise(function (resolve, reject) { db._query(fun, opts, function (err, res) { if (err) { return reject(err); } resolve(res); }); }); } // custom adapters can define their own api._viewCleanup // and override the default behavior /* istanbul ignore next */ function customViewCleanup(db) { return new Promise(function (resolve, reject) { db._viewCleanup(function (err, res) { if (err) { return reject(err); } resolve(res); }); }); } function defaultsTo(value) { return function (reason) { /* istanbul ignore else */ if (reason.status === 404) { return value; } else { throw reason; } }; } // returns a promise for a list of docs to update, based on the input docId. // the order doesn't matter, because post-3.2.0, bulkDocs // is an atomic operation in all three adapters. function getDocsToPersist(docId, view, docIdsToChangesAndEmits) { var metaDocId = '_local/doc_' + docId; var defaultMetaDoc = {_id: metaDocId, keys: []}; var docData = docIdsToChangesAndEmits.get(docId); var indexableKeysToKeyValues = docData[0]; var changes = docData[1]; function getMetaDoc() { if (isGenOne(changes)) { // generation 1, so we can safely assume initial state // for performance reasons (avoids unnecessary GETs) return Promise.resolve(defaultMetaDoc); } return view.db.get(metaDocId).catch(defaultsTo(defaultMetaDoc)); } function getKeyValueDocs(metaDoc) { if (!metaDoc.keys.length) { // no keys, no need for a lookup return Promise.resolve({rows: []}); } return view.db.allDocs({ keys: metaDoc.keys, include_docs: true }); } function processKeyValueDocs(metaDoc, kvDocsRes) { var kvDocs = []; var oldKeys = new Set(); for (var i = 0, len = kvDocsRes.rows.length; i < len; i++) { var row = kvDocsRes.rows[i]; var doc = row.doc; if (!doc) { // deleted continue; } kvDocs.push(doc); oldKeys.add(doc._id); doc._deleted = !indexableKeysToKeyValues.has(doc._id); if (!doc._deleted) { var keyValue = indexableKeysToKeyValues.get(doc._id); if ('value' in keyValue) { doc.value = keyValue.value; } } } var newKeys = mapToKeysArray(indexableKeysToKeyValues); newKeys.forEach(function (key) { if (!oldKeys.has(key)) { // new doc var kvDoc = { _id: key }; var keyValue = indexableKeysToKeyValues.get(key); if ('value' in keyValue) { kvDoc.value = keyValue.value; } kvDocs.push(kvDoc); } }); metaDoc.keys = uniq(newKeys.concat(metaDoc.keys)); kvDocs.push(metaDoc); return kvDocs; } return getMetaDoc().then(function (metaDoc) { return getKeyValueDocs(metaDoc).then(function (kvDocsRes) { return processKeyValueDocs(metaDoc, kvDocsRes); }); }); } // updates all emitted key/value docs and metaDocs in the mrview database // for the given batch of documents from the source database function saveKeyValues(view, docIdsToChangesAndEmits, seq) { var seqDocId = '_local/lastSeq'; return view.db.get(seqDocId) .catch(defaultsTo({_id: seqDocId, seq: 0})) .then(function (lastSeqDoc) { var docIds = mapToKeysArray(docIdsToChangesAndEmits); return Promise.all(docIds.map(function (docId) { return getDocsToPersist(docId, view, docIdsToChangesAndEmits); })).then(function (listOfDocsToPersist) { var docsToPersist = flatten(listOfDocsToPersist); lastSeqDoc.seq = seq; docsToPersist.push(lastSeqDoc); // write all docs in a single operation, update the seq once return view.db.bulkDocs({docs : docsToPersist}); }); }); } function getQueue(view) { var viewName = typeof view === 'string' ? view : view.name; var queue = persistentQueues[viewName]; if (!queue) { queue = persistentQueues[viewName] = new TaskQueue(); } return queue; } function updateView(view) { return sequentialize(getQueue(view), function () { return updateViewInQueue(view); })(); } function updateViewInQueue(view) { // bind the emit function once var mapResults; var doc; function emit(key, value) { var output = {id: doc._id, key: normalizeKey(key)}; // Don't explicitly store the value unless it's defined and non-null. // This saves on storage space, because often people don't use it. if (typeof value !== 'undefined' && value !== null) { output.value = normalizeKey(value); } mapResults.push(output); } var mapFun; // for temp_views one can use emit(doc, emit), see #38 if (typeof view.mapFun === "function" && view.mapFun.length === 2) { var origMap = view.mapFun; mapFun = function (doc) { return origMap(doc, emit); }; } else { mapFun = evalFunction(view.mapFun.toString(), emit); } var currentSeq = view.seq || 0; function processChange(docIdsToChangesAndEmits, seq) { return function () { return saveKeyValues(view, docIdsToChangesAndEmits, seq); }; } var queue = new TaskQueue(); function processNextBatch() { return view.sourceDB.changes({ conflicts: true, include_docs: true, style: 'all_docs', since: currentSeq, limit: CHANGES_BATCH_SIZE }).then(processBatch); } function processBatch(response) { var results = response.results; if (!results.length) { return; } var docIdsToChangesAndEmits = createDocIdsToChangesAndEmits(results); queue.add(processChange(docIdsToChangesAndEmits, currentSeq)); if (results.length < CHANGES_BATCH_SIZE) { return; } return processNextBatch(); } function createDocIdsToChangesAndEmits(results) { var docIdsToChangesAndEmits = new Map(); for (var i = 0, len = results.length; i < len; i++) { var change = results[i]; if (change.doc._id[0] !== '_') { mapResults = []; doc = change.doc; if (!doc._deleted) { tryMap(view.sourceDB, mapFun, doc); } mapResults.sort(sortByKeyThenValue); var indexableKeysToKeyValues = createIndexableKeysToKeyValues(mapResults); docIdsToChangesAndEmits.set(change.doc._id, [ indexableKeysToKeyValues, change.changes ]); } currentSeq = change.seq; } return docIdsToChangesAndEmits; } function createIndexableKeysToKeyValues(mapResults) { var indexableKeysToKeyValues = new Map(); var lastKey; for (var i = 0, len = mapResults.length; i < len; i++) { var emittedKeyValue = mapResults[i]; var complexKey = [emittedKeyValue.key, emittedKeyValue.id]; if (i > 0 && collate(emittedKeyValue.key, lastKey) === 0) { complexKey.push(i); // dup key+id, so make it unique } indexableKeysToKeyValues.set(toIndexableString(complexKey), emittedKeyValue); lastKey = emittedKeyValue.key; } return indexableKeysToKeyValues; } return processNextBatch().then(function () { return queue.finish(); }).then(function () { view.seq = currentSeq; }); } function reduceView(view, results, options) { if (options.group_level === 0) { delete options.group_level; } var shouldGroup = options.group || options.group_level; var reduceFun; if (builtInReduce[view.reduceFun]) { reduceFun = builtInReduce[view.reduceFun]; } else { reduceFun = evalFunction(view.reduceFun.toString()); } var groups = []; var lvl = isNaN(options.group_level) ? Number.POSITIVE_INFINITY : options.group_level; results.forEach(function (e) { var last = groups[groups.length - 1]; var groupKey = shouldGroup ? e.key : null; // only set group_level for array keys if (shouldGroup && Array.isArray(groupKey)) { groupKey = groupKey.slice(0, lvl); } if (last && collate(last.groupKey, groupKey) === 0) { last.keys.push([e.key, e.id]); last.values.push(e.value); return; } groups.push({ keys: [[e.key, e.id]], values: [e.value], groupKey: groupKey }); }); results = []; for (var i = 0, len = groups.length; i < len; i++) { var e = groups[i]; var reduceTry = tryReduce(view.sourceDB, reduceFun, e.keys, e.values, false); if (reduceTry.error && reduceTry.error instanceof BuiltInError) { // CouchDB returns an error if a built-in errors out throw reduceTry.error; } results.push({ // CouchDB just sets the value to null if a non-built-in errors out value: reduceTry.error ? null : reduceTry.output, key: e.groupKey }); } // no total_rows/offset when reducing return {rows: sliceResults(results, options.limit, options.skip)}; } function queryView(view, opts) { return sequentialize(getQueue(view), function () { return queryViewInQueue(view, opts); })(); } function queryViewInQueue(view, opts) { var totalRows; var shouldReduce = view.reduceFun && opts.reduce !== false; var skip = opts.skip || 0; if (typeof opts.keys !== 'undefined' && !opts.keys.length) { // equivalent query opts.limit = 0; delete opts.keys; } function fetchFromView(viewOpts) { viewOpts.include_docs = true; return view.db.allDocs(viewOpts).then(function (res) { totalRows = res.total_rows; return res.rows.map(function (result) { // implicit migration - in older versions of PouchDB, // we explicitly stored the doc as {id: ..., key: ..., value: ...} // this is tested in a migration test /* istanbul ignore next */ if ('value' in result.doc && typeof result.doc.value === 'object' && result.doc.value !== null) { var keys = Object.keys(result.doc.value).sort(); // this detection method is not perfect, but it's unlikely the user // emitted a value which was an object with these 3 exact keys var expectedKeys = ['id', 'key', 'value']; if (!(keys < expectedKeys || keys > expectedKeys)) { return result.doc.value; } } var parsedKeyAndDocId = parseIndexableString(result.doc._id); return { key: parsedKeyAndDocId[0], id: parsedKeyAndDocId[1], value: ('value' in result.doc ? result.doc.value : null) }; }); }); } function onMapResultsReady(rows) { var finalResults; if (shouldReduce) { finalResults = reduceView(view, rows, opts); } else { finalResults = { total_rows: totalRows, offset: skip, rows: rows }; } if (opts.include_docs) { var docIds = uniq(rows.map(rowToDocId)); return view.sourceDB.allDocs({ keys: docIds, include_docs: true, conflicts: opts.conflicts, attachments: opts.attachments, binary: opts.binary }).then(function (allDocsRes) { var docIdsToDocs = new Map(); allDocsRes.rows.forEach(function (row) { docIdsToDocs.set(row.id, row.doc); }); rows.forEach(function (row) { var docId = rowToDocId(row); var doc = docIdsToDocs.get(docId); if (doc) { row.doc = doc; } }); return finalResults; }); } else { return finalResults; } } if (typeof opts.keys !== 'undefined') { var keys = opts.keys; var fetchPromises = keys.map(function (key) { var viewOpts = { startkey : toIndexableString([key]), endkey : toIndexableString([key, {}]) }; return fetchFromView(viewOpts); }); return Promise.all(fetchPromises).then(flatten).then(onMapResultsReady); } else { // normal query, no 'keys' var viewOpts = { descending : opts.descending }; if (opts.start_key) { opts.startkey = opts.start_key; } if (opts.end_key) { opts.endkey = opts.end_key; } if (typeof opts.startkey !== 'undefined') { viewOpts.startkey = opts.descending ? toIndexableString([opts.startkey, {}]) : toIndexableString([opts.startkey]); } if (typeof opts.endkey !== 'undefined') { var inclusiveEnd = opts.inclusive_end !== false; if (opts.descending) { inclusiveEnd = !inclusiveEnd; } viewOpts.endkey = toIndexableString( inclusiveEnd ? [opts.endkey, {}] : [opts.endkey]); } if (typeof opts.key !== 'undefined') { var keyStart = toIndexableString([opts.key]); var keyEnd = toIndexableString([opts.key, {}]); if (viewOpts.descending) { viewOpts.endkey = keyStart; viewOpts.startkey = keyEnd; } else { viewOpts.startkey = keyStart; viewOpts.endkey = keyEnd; } } if (!shouldReduce) { if (typeof opts.limit === 'number') { viewOpts.limit = opts.limit; } viewOpts.skip = skip; } return fetchFromView(viewOpts).then(onMapResultsReady); } } function httpViewCleanup(db) { return db.request({ method: 'POST', url: '_view_cleanup' }); } function localViewCleanup(db) { return db.get('_local/mrviews').then(function (metaDoc) { var docsToViews = new Map(); Object.keys(metaDoc.views).forEach(function (fullViewName) { var parts = parseViewName(fullViewName); var designDocName = '_design/' + parts[0]; var viewName = parts[1]; var views = docsToViews.get(designDocName); if (!views) { views = new Set(); docsToViews.set(designDocName, views); } views.add(viewName); }); var opts = { keys : mapToKeysArray(docsToViews), include_docs : true }; return db.allDocs(opts).then(function (res) { var viewsToStatus = {}; res.rows.forEach(function (row) { var ddocName = row.key.substring(8); // cuts off '_design/' docsToViews.get(row.key).forEach(function (viewName) { var fullViewName = ddocName + '/' + viewName; /* istanbul ignore if */ if (!metaDoc.views[fullViewName]) { // new format, without slashes, to support PouchDB 2.2.0 // migration test in pouchdb's browser.migration.js verifies this fullViewName = viewName; } var viewDBNames = Object.keys(metaDoc.views[fullViewName]); // design doc deleted, or view function nonexistent var statusIsGood = row.doc && row.doc.views && row.doc.views[viewName]; viewDBNames.forEach(function (viewDBName) { viewsToStatus[viewDBName] = viewsToStatus[viewDBName] || statusIsGood; }); }); }); var dbsToDelete = Object.keys(viewsToStatus).filter( function (viewDBName) { return !viewsToStatus[viewDBName]; }); var destroyPromises = dbsToDelete.map(function (viewDBName) { return sequentialize(getQueue(viewDBName), function () { return new db.constructor(viewDBName, db.__opts).destroy(); })(); }); return Promise.all(destroyPromises).then(function () { return {ok: true}; }); }); }, defaultsTo({ok: true})); } var viewCleanup = callbackify(function () { var db = this; if (db.type() === 'http') { return httpViewCleanup(db); } /* istanbul ignore next */ if (typeof db._viewCleanup === 'function') { return customViewCleanup(db); } return localViewCleanup(db); }); function queryPromised(db, fun, opts) { if (db.type() === 'http') { return httpQuery(db, fun, opts); } /* istanbul ignore next */ if (typeof db._query === 'function') { return customQuery(db, fun, opts); } if (typeof fun !== 'string') { // temp_view checkQueryParseError(opts, fun); var createViewOpts = { db : db, viewName : 'temp_view/temp_view', map : fun.map, reduce : fun.reduce, temporary : true }; tempViewQueue.add(function () { return createView(createViewOpts).then(function (view) { function cleanup() { return view.db.destroy(); } return fin(updateView(view).then(function () { return queryView(view, opts); }), cleanup); }); }); return tempViewQueue.finish(); } else { // persistent view var fullViewName = fun; var parts = parseViewName(fullViewName); var designDocName = parts[0]; var viewName = parts[1]; return db.get('_design/' + designDocName).then(function (doc) { var fun = doc.views && doc.views[viewName]; if (!fun || typeof fun.map !== 'string') { throw new NotFoundError('ddoc ' + designDocName + ' has no view named ' + viewName); } checkQueryParseError(opts, fun); var createViewOpts = { db : db, viewName : fullViewName, map : fun.map, reduce : fun.reduce }; return createView(createViewOpts).then(function (view) { if (opts.stale === 'ok' || opts.stale === 'update_after') { if (opts.stale === 'update_after') { nextTick(function () { updateView(view); }); } return queryView(view, opts); } else { // stale not ok return updateView(view).then(function () { return queryView(view, opts); }); } }); }); } } var query = function (fun, opts, callback) { if (typeof opts === 'function') { callback = opts; opts = {}; } opts = opts ? coerceOptions(opts) : {}; if (typeof fun === 'function') { fun = {map : fun}; } var db = this; var promise = Promise.resolve().then(function () { return queryPromised(db, fun, opts); }); promisedCallback(promise, callback); return promise; }; export default { query: query, viewCleanup: viewCleanup };