blackhighlighter
Version:
Client and server for widget implementing secure and committed web redaction
594 lines (449 loc) • 17.5 kB
JavaScript
;
//
// blackhighlighter.js
// Black Highlighter main Node.JS **server-side** routines
// Copyright (C) 2012-2014 HostileFork.com
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// See http://blackhighlighter.hostilefork.com for documentation.
//
//
// CONFIGURE REQUIREJS
//
// Explanation and griping about requirejs and JS modularization in general:
//
// http://blog.hostilefork.com/sharing-code-client-server-nodejs/
//
var requirejs = require('requirejs');
requirejs.config({
// Use node's special variable __dirname to
// get the directory containing this file.
// Useful if building a library that will
// be used in node but does not require the
// use of node outside
// https://github.com/jrburke/requirejs/issues/150
baseUrl: __dirname,
// Pass the top-level main.js/index.js require
// function to requirejs so that node modules
// are loaded relative to the top-level JS file.
nodeRequire: require,
// Note: do not include the '.js' at the end of these paths!
paths: {
'jquery-blackhighlighter':
'jquery-blackhighlighter/jquery-blackhighlighter',
// http://stackoverflow.com/q/22471822
'jquery': 'jquery-fake'
}
});
//
// MONGODB DATABASE CONFIGURATION
//
// Mongodb interface from
// http://blog.mongodb.org/post/6587009156/cloudfoundry-mongodb-and-nodejs
//
// Best reference for Node.js driver
// http://mongodb.github.com/node-mongodb-native/
//
var mongodb = require('mongodb');
var MongoClient = mongodb.MongoClient;
//
// COMMON ROUTINES BETWEEN CLIENT AND SERVER
//
// In order to reduce the total number of files that clients need to use, the
// common code between the client and server lives inside the jquery widget.
// Since the server doesn't use jQuery, a "fake" jQuery is used instead.
//
var common = requirejs('jquery-blackhighlighter');
// http://blog.hostilefork.com/error-handling-internal-badrequest-node/
exports.ClientError = common.ClientError;
//
// UTILITY LIBRARIES
//
// http://blog.hostilefork.com/underscore-use-with-node-jquery/
var _ = require('underscore')._;
// Q Promises library
//
// https://github.com/kriskowal/q
// http://stackoverflow.com/questions/22138759/
var Q = require('q');
//
// CONFIGURATION
//
// Should be improved with something like the jQuery $.extend mechanism.
// Not sure what things besides the database will wind up going in here.
//
var configuration = {
/* MONGO_CONNECT_URI: ... */
};
exports.configure = function (config) {
configuration = config;
}
//
// DIRECTORY FOR STATIC FILES JQUERY-BLACKHIGHLIGHTER
//
exports.pathForJqueryBlackhighlighter = function () {
return __dirname + '/jquery-blackhighlighter';
}
//
// COMMITTING
//
function throwIfCommitIsMalformed (commit) {
// REVIEW: This seems pretty tedious, but what else can we do when
// storing JSON from a potentially hostile/hacked client?
// Must be an object
if (!_.isObject(commit)) {
throw ClientError('commit must be an object');
}
// Verify it doesn't have more than just "spans"
if (!_.isEqual(_.keys(commit).sort(), ["spans"])) {
console.log(commit.toString());
throw ClientError('commit should have a .spans key, only');
}
// Spans can be either strings or objects with 2 keys
_.each(commit.spans, function (commitSpan) {
if (_.isString(commitSpan)) {
return;
}
if (!_.isObject(commitSpan)) {
throw ClientError('commit spans must be string or object');
}
if (!_.isEqual(
_.keys(commitSpan).sort(), ["display_length", "sha256"])
) {
throw ClientError(
'span objects can only have sha256 and display_length'
);
}
if (!_.isNumber(commitSpan.display_length)) {
throw ClientError('display_length must be a number');
}
if (!_.isString(commitSpan.sha256)) {
throw ClientError('sha256 of span must be string');
}
});
}
exports.makeCommitments = function (commit_array, callback) {
var requestTime = new Date();
if (
!_.isArray(commit_array) || !commit_array.length
) {
throw ClientError(
'commit_array should be a non-empty array'
);
}
_.each(commit_array, function (commit) {
// We don't want to allow clients to slip "extra junk" into the MongoDB
// database, as it will just store whatever JSON blobs we ask to
// put in it (no schema).
throwIfCommitIsMalformed(commit);
// Calculate commit_id from the content hashed with the timestamp.
commit.commit_date = requestTime;
commit.commit_id = common.commitIdFromCommit(commit);
});
// Okay, the written content itself may be junk, but at least it's
// all "in-band" junk. Start the database work...
var result = null;
const client = new MongoClient(configuration.mongoConnectURI);
const db = client.db();
const collection = db.collection('commits');
// Add commits to collection
// There is no transactionality in MongoDB, so when we insert an array
// of JS objects it doesn't guarantee us all will succeed or fail.
// We can ask to stop on the first failure, though.
//
// https://github.com/hostilefork/blackhighlighter/issues/52
Q(collection.insertMany(commit_array)) // safe: true ?
.then(function (result) {
// Success (via .then), so give commit_ids and times to the client
// We know the async insertion actually succeeded due to {safe: true}
var commit_id_and_date_array = [];
_.each(commit_array, function (commit) {
// Note: MongoDB stuck its own _id on there, and the client
// doesn't need to know about that; it's not a hash so not suitable
// for our ID purposes.
// We guarantee this output will be in the same order as the array
// used for input, so it can be lined up and verified by client
commit_id_and_date_array.push({
'commit_id': commit.commit_id,
'commit_date': commit.commit_date
});
});
callback(null, {
'commit_id_and_date_array': commit_id_and_date_array
});
})
.catch(function (err) {
callback(err);
})
.finally(function () {
// add general cleanup code here if necessary
})
.done();
}
//
// READING
//
exports.generateHtmlFromCommitAndReveals = function (commit, reveal_array) {
return common.generateHtmlFromCommitAndReveals(commit, reveal_array);
}
exports.getCommitsWithReveals = function (commit_id_array, callback) {
const client = new MongoClient(configuration.mongoConnectURI);
const db = client.db();
const commitsCollection = db.collection('commits');
const revealsCollection = db.collection('reveals');
// Query for specific commits and reveals objects in parallel
// We want to batch these up, so use $or:
// http://mongodb.github.io/node-mongodb-native/markdown-docs/queries.html
var orList = [];
_.each(commit_id_array, function (commit_id) {
orList.push({'commit_id': commit_id});
});
Q.all([
commitsCollection.find(
{$or: orList}
, null
, {sort:[['_id', 'ascending']]}
)
, revealsCollection.find(
{$or: orList}
, null
, {sort:[['sha256', 'ascending']]}
)
])
.spread(function (commitsCursor, revealsCursor) {
// Convert the result cursors to arrays
return [
Q(commitsCursor.toArray())
, Q(revealsCursor.toArray())
];
})
.spread(function (commitsArray, revealsArray) {
// Check the arrays for validity and formulate results
// REVIEW: is the length the only thing we need to check?
if (commitsArray.length < commit_id_array.length) {
throw ClientError("Request for non-existent commit_id.");
} else if (commitsArray.length > commit_id_array.length) {
throw Error("Multiple commits with same _id found.");
}
// The result we return is an array of objects, which have a
// "commit" and "reveals" field. If there were no reveals, then
// the reveals will be an empty array.
var revealsByCommitId = _.groupBy(revealsArray, 'commit_id');
var commits_and_reveals = [];
_.each(commitsArray, function(commit) {
// http://stackoverflow.com/questions/4035232/
var revealsForCommit = revealsByCommitId[commit.commit_id];
_.each(revealsForCommit, function(reveal) {
// https://github.com/hostilefork/blackhighlighter/issues/48
if (!('commit_id' in reveal)) {
throw Error('commit_id expected in server-side reveal');
}
delete reveal['commit_id'];
if (!('_id' in reveal)) {
throw Error('_id expected in server-side reveal');
}
delete reveal['_id'];
});
commits_and_reveals.push({
'commit': commit,
'reveals': revealsForCommit != null
? revealsForCommit
: []
});
});
callback(null, commits_and_reveals);
})
.catch(function (err) {
callback(err);
})
.finally(function () {
// add general cleanup code here if necessary
})
.done();
};
//
// REVEALING
//
function throwIfRevealIsMalformedOrLying (reveal) {
// REVIEW: This seems pretty tedious, but what else can we do when
// storing JSON from a potentially hostile/hacked client?
if (!_.isObject(reveal)) {
throw ClientError('all reveals must be objects');
}
if (!_.isEqual(_.keys(reveal).sort(),
["salt", "sha256", "value"])
) {
throw ClientError('reveal has extra or missing keys');
}
if (!_.isString(reveal.salt)) {
throw ClientError('reveal salt should be a string');
}
if (!_.isString(reveal.sha256)) {
throw ClientError('reveal sha256 should be a string');
}
if (!_.isString(reveal.value)) {
throw ClientError('reveal value should be a string');
}
// Now make sure the reveal isn't lying about its contents hash
var actualHash = common.hashOfReveal(reveal);
if (actualHash != reveal.sha256) {
throw ClientError(
'Actual redaction hash is ' + actualHash
+ ' while claimed hash is ' + reveal.sha256
);
}
}
exports.revealSecrets = function (commit_id_with_reveals_array, callback) {
var requestTime = new Date();
// We don't want to put "extra junk" in the MongoDB database, as it
// will just store whatever objects we put in it (no schema).
if (
!_.isArray(commit_id_with_reveals_array)
|| !commit_id_with_reveals_array.length
) {
throw ClientError(
'commit_id_with_reveals_array should be a non-empty array'
);
}
_.each(commit_id_with_reveals_array, function (commit_id_with_reveals) {
if (!_.isEqual(_.keys(commit_id_with_reveals).sort(),
["commit_id", "reveal_array"])
) {
throw ClientError(
'commit_id_with_reveals has extra or missing keys'
);
}
if (!_.isString(commit_id_with_reveals.commit_id)) {
throw ClientError('commit_id should be a string');
}
if (
!_.isArray(commit_id_with_reveals.reveal_array)
|| !commit_id_with_reveals.reveal_array.length
) {
throw ClientError('reveal_array should be a non-empty array');
}
_.each(commit_id_with_reveals.reveal_array, function (reveal) {
// Check the reveal contents, including that it hashes to
// what the claim is.
throwIfRevealIsMalformedOrLying(reveal);
});
});
// Okay the certificate is "well-formed". For more we have to start
// talking to the database...
const client = new MongoClient(configuration.mongoConnectURI);
const db = client.db();
const commitsCollection = db.collection('commits');
const revealsCollection = db.collection('reveals');
// Query for specific commit and reveals objects in parallel
// We want to batch these up, so use $or:
// http://mongodb.github.io/node-mongodb-native/markdown-docs/queries.html
var orList = [];
_.each(commit_id_with_reveals_array, function (commit_id_with_reveals) {
orList.push({'commit_id': commit_id_with_reveals.commit_id});
});
// REVIEW: necessary to use ObjectID conversion?
// http://stackoverflow.com/questions/4902569/
Q.all([
commitsCollection.find(
{$or: orList}
, null
, {limit: 1, sort:[['_id', 'ascending']]}
)
, revealsCollection.find(
{$or: orList}
, null
, {sort:[['sha256', 'ascending']]}
)
])
.spread(function (commitsCursor, oldRevealsCursor) {
// Convert the result cursors to arrays
return [
Q(commitsCursor.toArray())
, Q(oldRevealsCursor.toArray())
];
})
.spread(function (commitsArray, oldRevealsArray) {
// Add new reveals if they pass verification
// Make sure we got as many commits back as we asked for. Note that
// this will not be true if you used the same commit_id twice in the
// same request rather than grouping those reveals under a single
// commit_id...which might be useful, but this check may help make
// sure the client meant to do that.
if (commitsArray.length < commit_id_with_reveals_array.length) {
throw ClientError("Not all commit_ids found (or not all unique)");
}
else if (commitsArray.length > commit_id_with_reveals_array.length) {
throw Error("Multiple commits with same _id.");
}
var commitsById = _.groupBy(commitsArray, function(commit) {
return commit.commit_id;
});
_.each(commitsById, function(value) {
if (value.length !== 1) {
throw Error("More than one commit with same ID on server.");
}
});
// Now verify the reveals for the redacted portions against the hash
// values we stored at the time of commit.
var newRevealsArray = [];
_.each(commit_id_with_reveals_array, function (commit_id_with_reveals) {
_.each(commit_id_with_reveals.reveal_array, function(reveal) {
// Ensure the redaction hasn't *already* been revealed
_.each(oldRevealsArray, function(oldReveal) {
if (reveal.sha256 == oldReveal.sha256) {
throw ClientError(
"Redaction " + reveal.sha256 + " was already published."
);
}
});
// Make sure the hash matches an existing hash in the commit
// Note: Some spans are strings! .sha256 is not defined for them.
var matchedSpan = null;
var commit = commitsById[commit_id_with_reveals.commit_id][0];
_.every(commit.spans, function(span) {
if (span.sha256 == reveal.sha256) {
matchedSpan = span;
// http://stackoverflow.com/a/8779920/211160
return false;
}
return true;
});
if (!matchedSpan) {
throw ClientError("A reveal hash matched no span in commit.");
}
// we need to poke the commit_id into the reveal so that the
// database can connect them to the commit in our query
reveal.commit_id = commit.commit_id;
reveal.reveal_date = requestTime;
newRevealsArray.push(reveal);
});
});
return Q(revealsCollection.insertMany(newRevealsArray));
})
.then(function (result) {
// Respond with reveal's insertion date
// We know asynchronous insert actually succeeded due to {safe: true}
callback(null, {
reveal_date: result.insertedIds[0].reveal_date
});
})
.catch(function (err) {
callback(err);
})
.finally(function () {
// add general cleanup code here if necessary
})
.done();
};