UNPKG

oboe

Version:

Oboe.js reads json, giving you the objects as they are found without waiting for the stream to finish

jimhigson/oboe.js

1,663 lines (1,380 loc) • 74.1 kB

JavaScript

// this file is the concatenation of several js files. See http://github.com/jimhigson/oboe.js // for the unconcatenated source module.exports = (function () { // v1.14.4-17-gea92b18 /* Copyright (c) 2013, Jim Higson All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Partially complete a function. * * var add3 = partialComplete( function add(a,b){return a+b}, 3 ); * * add3(4) // gives 7 * * function wrap(left, right, cen){return left + " " + cen + " " + right;} * * var pirateGreeting = partialComplete( wrap , "I'm", ", a mighty pirate!" ); * * pirateGreeting("Guybrush Threepwood"); * // gives "I'm Guybrush Threepwood, a mighty pirate!" */ var partialComplete = varArgs(function( fn, args ) { // this isn't the shortest way to write this but it does // avoid creating a new array each time to pass to fn.apply, // otherwise could just call boundArgs.concat(callArgs) var numBoundArgs = args.length; return varArgs(function( callArgs ) { for (var i = 0; i < callArgs.length; i++) { args[numBoundArgs + i] = callArgs[i]; } args.length = numBoundArgs + callArgs.length; return fn.apply(this, args); }); }), /** * Compose zero or more functions: * * compose(f1, f2, f3)(x) = f1(f2(f3(x)))) * * The last (inner-most) function may take more than one parameter: * * compose(f1, f2, f3)(x,y) = f1(f2(f3(x,y)))) */ compose = varArgs(function(fns) { var fnsList = arrayAsList(fns); function next(params, curFn) { return [apply(params, curFn)]; } return varArgs(function(startParams){ return foldR(next, startParams, fnsList)[0]; }); }); /** * A more optimised version of compose that takes exactly two functions * @param f1 * @param f2 */ function compose2(f1, f2){ return function(){ return f1.call(this,f2.apply(this,arguments)); } } /** * Generic form for a function to get a property from an object * * var o = { * foo:'bar' * } * * var getFoo = attr('foo') * * fetFoo(o) // returns 'bar' * * @param {String} key the property name */ function attr(key) { return function(o) { return o[key]; }; } /** * Call a list of functions with the same args until one returns a * truthy result. Similar to the || operator. * * So: * lazyUnion([f1,f2,f3 ... fn])( p1, p2 ... pn ) * * Is equivalent to: * apply([p1, p2 ... pn], f1) || * apply([p1, p2 ... pn], f2) || * apply([p1, p2 ... pn], f3) ... apply(fn, [p1, p2 ... pn]) * * @returns the first return value that is given that is truthy. */ var lazyUnion = varArgs(function(fns) { return varArgs(function(params){ var maybeValue; for (var i = 0; i < len(fns); i++) { maybeValue = apply(params, fns[i]); if( maybeValue ) { return maybeValue; } } }); }); /** * This file declares various pieces of functional programming. * * This isn't a general purpose functional library, to keep things small it * has just the parts useful for Oboe.js. */ /** * Call a single function with the given arguments array. * Basically, a functional-style version of the OO-style Function#apply for * when we don't care about the context ('this') of the call. * * The order of arguments allows partial completion of the arguments array */ function apply(args, fn) { return fn.apply(undefined, args); } /** * Define variable argument functions but cut out all that tedious messing about * with the arguments object. Delivers the variable-length part of the arguments * list as an array. * * Eg: * * var myFunction = varArgs( * function( fixedArgument, otherFixedArgument, variableNumberOfArguments ){ * console.log( variableNumberOfArguments ); * } * ) * * myFunction('a', 'b', 1, 2, 3); // logs [1,2,3] * * var myOtherFunction = varArgs(function( variableNumberOfArguments ){ * console.log( variableNumberOfArguments ); * }) * * myFunction(1, 2, 3); // logs [1,2,3] * */ function varArgs(fn){ var numberOfFixedArguments = fn.length -1, slice = Array.prototype.slice; if( numberOfFixedArguments == 0 ) { // an optimised case for when there are no fixed args: return function(){ return fn.call(this, slice.call(arguments)); } } else if( numberOfFixedArguments == 1 ) { // an optimised case for when there are is one fixed args: return function(){ return fn.call(this, arguments[0], slice.call(arguments, 1)); } } // general case // we know how many arguments fn will always take. Create a // fixed-size array to hold that many, to be re-used on // every call to the returned function var argsHolder = Array(fn.length); return function(){ for (var i = 0; i < numberOfFixedArguments; i++) { argsHolder[i] = arguments[i]; } argsHolder[numberOfFixedArguments] = slice.call(arguments, numberOfFixedArguments); return fn.apply( this, argsHolder); } } /** * Swap the order of parameters to a binary function * * A bit like this flip: http://zvon.org/other/haskell/Outputprelude/flip_f.html */ function flip(fn){ return function(a, b){ return fn(b,a); } } /** * Create a function which is the intersection of two other functions. * * Like the && operator, if the first is truthy, the second is never called, * otherwise the return value from the second is returned. */ function lazyIntersection(fn1, fn2) { return function (param) { return fn1(param) && fn2(param); }; } /** * A function which does nothing */ function noop(){} /** * A function which is always happy */ function always(){return true} /** * Create a function which always returns the same * value * * var return3 = functor(3); * * return3() // gives 3 * return3() // still gives 3 * return3() // will always give 3 */ function functor(val){ return function(){ return val; } } /** * This file defines some loosely associated syntactic sugar for * Javascript programming */ /** * Returns true if the given candidate is of type T */ function isOfType(T, maybeSomething){ return maybeSomething && maybeSomething.constructor === T; } var len = attr('length'), isString = partialComplete(isOfType, String); /** * I don't like saying this: * * foo !=== undefined * * because of the double-negative. I find this: * * defined(foo) * * easier to read. */ function defined( value ) { return value !== undefined; } /** * Returns true if object o has a key named like every property in * the properties array. Will give false if any are missing, or if o * is not an object. */ function hasAllProperties(fieldList, o) { return (o instanceof Object) && all(function (field) { return (field in o); }, fieldList); } /** * Like cons in Lisp */ function cons(x, xs) { /* Internally lists are linked 2-element Javascript arrays. Ideally the return here would be Object.freeze([x,xs]) so that bugs related to mutation are found fast. However, cons is right on the critical path for performance and this slows oboe-mark down by ~25%. Under theoretical future JS engines that freeze more efficiently (possibly even use immutability to run faster) this should be considered for restoration. */ return [x,xs]; } /** * The empty list */ var emptyList = null, /** * Get the head of a list. * * Ie, head(cons(a,b)) = a */ head = attr(0), /** * Get the tail of a list. * * Ie, head(cons(a,b)) = a */ tail = attr(1); /** * Converts an array to a list * * asList([a,b,c]) * * is equivalent to: * * cons(a, cons(b, cons(c, emptyList))) **/ function arrayAsList(inputArray){ return reverseList( inputArray.reduce( flip(cons), emptyList ) ); } /** * A varargs version of arrayAsList. Works a bit like list * in LISP. * * list(a,b,c) * * is equivalent to: * * cons(a, cons(b, cons(c, emptyList))) */ var list = varArgs(arrayAsList); /** * Convert a list back to a js native array */ function listAsArray(list){ return foldR( function(arraySoFar, listItem){ arraySoFar.unshift(listItem); return arraySoFar; }, [], list ); } /** * Map a function over a list */ function map(fn, list) { return list ? cons(fn(head(list)), map(fn,tail(list))) : emptyList ; } /** * foldR implementation. Reduce a list down to a single value. * * @pram {Function} fn (rightEval, curVal) -> result */ function foldR(fn, startValue, list) { return list ? fn(foldR(fn, startValue, tail(list)), head(list)) : startValue ; } /** * foldR implementation. Reduce a list down to a single value. * * @pram {Function} fn (rightEval, curVal) -> result */ function foldR1(fn, list) { return tail(list) ? fn(foldR1(fn, tail(list)), head(list)) : head(list) ; } /** * Return a list like the one given but with the first instance equal * to item removed */ function without(list, test, removedFn) { return withoutInner(list, removedFn || noop); function withoutInner(subList, removedFn) { return subList ? ( test(head(subList)) ? (removedFn(head(subList)), tail(subList)) : cons(head(subList), withoutInner(tail(subList), removedFn)) ) : emptyList ; } } /** * Returns true if the given function holds for every item in * the list, false otherwise */ function all(fn, list) { return !list || ( fn(head(list)) && all(fn, tail(list)) ); } /** * Call every function in a list of functions with the same arguments * * This doesn't make any sense if we're doing pure functional because * it doesn't return anything. Hence, this is only really useful if the * functions being called have side-effects. */ function applyEach(fnList, args) { if( fnList ) { head(fnList).apply(null, args); applyEach(tail(fnList), args); } } /** * Reverse the order of a list */ function reverseList(list){ // js re-implementation of 3rd solution from: // http://www.haskell.org/haskellwiki/99_questions/Solutions/5 function reverseInner( list, reversedAlready ) { if( !list ) { return reversedAlready; } return reverseInner(tail(list), cons(head(list), reversedAlready)) } return reverseInner(list, emptyList); } function first(test, list) { return list && (test(head(list)) ? head(list) : first(test,tail(list))); } /* This is a slightly hacked-up browser only version of clarinet * some features removed to help keep browser Oboe under the 5k micro-library limit * plug directly into event bus For the original go here: https://github.com/dscape/clarinet */ function clarinet(eventBus) { "use strict"; var // shortcut some events on the bus emitSaxOpenObject = eventBus(SAX_OPEN_OBJECT).emit, emitSaxCloseObject = eventBus(SAX_CLOSE_OBJECT).emit, emitSaxOpenArray = eventBus(SAX_OPEN_ARRAY).emit, emitSaxCloseArray = eventBus(SAX_CLOSE_ARRAY).emit, emitSaxKey = eventBus(SAX_KEY).emit, emitSaxValue = eventBus(SAX_VALUE).emit, emitFail = eventBus(FAIL_EVENT).emit, MAX_BUFFER_LENGTH = 64 * 1024 , stringTokenPattern = /[\\"\n]/g , _n = 0 // states , BEGIN = _n++ , VALUE = _n++ // general stuff , OPEN_OBJECT = _n++ // { , CLOSE_OBJECT = _n++ // } , OPEN_ARRAY = _n++ // [ , CLOSE_ARRAY = _n++ // ] , STRING = _n++ // "" , OPEN_KEY = _n++ // , "a" , CLOSE_KEY = _n++ // : , TRUE = _n++ // r , TRUE2 = _n++ // u , TRUE3 = _n++ // e , FALSE = _n++ // a , FALSE2 = _n++ // l , FALSE3 = _n++ // s , FALSE4 = _n++ // e , NULL = _n++ // u , NULL2 = _n++ // l , NULL3 = _n++ // l , NUMBER_DECIMAL_POINT = _n++ // . , NUMBER_DIGIT = _n // [0-9] // setup initial parser values , bufferCheckPosition = MAX_BUFFER_LENGTH , latestError , c , p , textNode = "" , numberNode = "" , slashed = false , closed = false , state = BEGIN , stack = [] , unicodeS = null , unicodeI = 0 , depth = 0 , position = 0 , column = 0 //mostly for error reporting , line = 1 ; function checkBufferLength () { var maxActual = 0; if (textNode.length > MAX_BUFFER_LENGTH) { emitError("Max buffer length exceeded: textNode"); maxActual = Math.max(maxActual, textNode.length); } if (numberNode.length > MAX_BUFFER_LENGTH) { emitError("Max buffer length exceeded: numberNode"); maxActual = Math.max(maxActual, numberNode.length); } bufferCheckPosition = (MAX_BUFFER_LENGTH - maxActual) + position; } eventBus(STREAM_DATA).on(write); /* At the end of the http content close the clarinet This will provide an error if the total content provided was not valid json, ie if not all arrays, objects and Strings closed properly */ eventBus(STREAM_END).on(end); function emitError (errorString) { if (textNode) { emitSaxValue(textNode); textNode = ""; } latestError = Error(errorString + "\nLn: "+line+ "\nCol: "+column+ "\nChr: "+c); emitFail(errorReport(undefined, undefined, latestError)); } function end() { if (state !== VALUE || depth !== 0) emitError("Unexpected end"); if (textNode) { emitSaxValue(textNode); textNode = ""; } closed = true; } function whitespace(c){ return c == '\r' || c == '\n' || c == ' ' || c == '\t'; } function write (chunk) { // this used to throw the error but inside Oboe we will have already // gotten the error when it was emitted. The important thing is to // not continue with the parse. if (latestError) return; if (closed) { return emitError("Cannot write after close"); } var i = 0; c = chunk[0]; while (c) { p = c; c = chunk[i++]; if(!c) break; position ++; if (c == "\n") { line ++; column = 0; } else column ++; switch (state) { case BEGIN: if (c === "{") state = OPEN_OBJECT; else if (c === "[") state = OPEN_ARRAY; else if (!whitespace(c)) return emitError("Non-whitespace before {[."); continue; case OPEN_KEY: case OPEN_OBJECT: if (whitespace(c)) continue; if(state === OPEN_KEY) stack.push(CLOSE_KEY); else { if(c === '}') { emitSaxOpenObject(); emitSaxCloseObject(); state = stack.pop() || VALUE; continue; } else stack.push(CLOSE_OBJECT); } if(c === '"') state = STRING; else return emitError("Malformed object key should start with \" "); continue; case CLOSE_KEY: case CLOSE_OBJECT: if (whitespace(c)) continue; if(c===':') { if(state === CLOSE_OBJECT) { stack.push(CLOSE_OBJECT); if (textNode) { // was previously (in upstream Clarinet) one event // - object open came with the text of the first emitSaxOpenObject(); emitSaxKey(textNode); textNode = ""; } depth++; } else { if (textNode) { emitSaxKey(textNode); textNode = ""; } } state = VALUE; } else if (c==='}') { if (textNode) { emitSaxValue(textNode); textNode = ""; } emitSaxCloseObject(); depth--; state = stack.pop() || VALUE; } else if(c===',') { if(state === CLOSE_OBJECT) stack.push(CLOSE_OBJECT); if (textNode) { emitSaxValue(textNode); textNode = ""; } state = OPEN_KEY; } else return emitError('Bad object'); continue; case OPEN_ARRAY: // after an array there always a value case VALUE: if (whitespace(c)) continue; if(state===OPEN_ARRAY) { emitSaxOpenArray(); depth++; state = VALUE; if(c === ']') { emitSaxCloseArray(); depth--; state = stack.pop() || VALUE; continue; } else { stack.push(CLOSE_ARRAY); } } if(c === '"') state = STRING; else if(c === '{') state = OPEN_OBJECT; else if(c === '[') state = OPEN_ARRAY; else if(c === 't') state = TRUE; else if(c === 'f') state = FALSE; else if(c === 'n') state = NULL; else if(c === '-') { // keep and continue numberNode += c; } else if(c==='0') { numberNode += c; state = NUMBER_DIGIT; } else if('123456789'.indexOf(c) !== -1) { numberNode += c; state = NUMBER_DIGIT; } else return emitError("Bad value"); continue; case CLOSE_ARRAY: if(c===',') { stack.push(CLOSE_ARRAY); if (textNode) { emitSaxValue(textNode); textNode = ""; } state = VALUE; } else if (c===']') { if (textNode) { emitSaxValue(textNode); textNode = ""; } emitSaxCloseArray(); depth--; state = stack.pop() || VALUE; } else if (whitespace(c)) continue; else return emitError('Bad array'); continue; case STRING: // thanks thejh, this is an about 50% performance improvement. var starti = i-1; STRING_BIGLOOP: while (true) { // zero means "no unicode active". 1-4 mean "parse some more". end after 4. while (unicodeI > 0) { unicodeS += c; c = chunk.charAt(i++); if (unicodeI === 4) { // TODO this might be slow? well, probably not used too often anyway textNode += String.fromCharCode(parseInt(unicodeS, 16)); unicodeI = 0; starti = i-1; } else { unicodeI++; } // we can just break here: no stuff we skipped that still has to be sliced out or so if (!c) break STRING_BIGLOOP; } if (c === '"' && !slashed) { state = stack.pop() || VALUE; textNode += chunk.substring(starti, i-1); if(!textNode) { emitSaxValue(""); } break; } if (c === '\\' && !slashed) { slashed = true; textNode += chunk.substring(starti, i-1); c = chunk.charAt(i++); if (!c) break; } if (slashed) { slashed = false; if (c === 'n') { textNode += '\n'; } else if (c === 'r') { textNode += '\r'; } else if (c === 't') { textNode += '\t'; } else if (c === 'f') { textNode += '\f'; } else if (c === 'b') { textNode += '\b'; } else if (c === 'u') { // \uxxxx. meh! unicodeI = 1; unicodeS = ''; } else { textNode += c; } c = chunk.charAt(i++); starti = i-1; if (!c) break; else continue; } stringTokenPattern.lastIndex = i; var reResult = stringTokenPattern.exec(chunk); if (!reResult) { i = chunk.length+1; textNode += chunk.substring(starti, i-1); break; } i = reResult.index+1; c = chunk.charAt(reResult.index); if (!c) { textNode += chunk.substring(starti, i-1); break; } } continue; case TRUE: if (!c) continue; // strange buffers if (c==='r') state = TRUE2; else return emitError( 'Invalid true started with t'+ c); continue; case TRUE2: if (!c) continue; if (c==='u') state = TRUE3; else return emitError('Invalid true started with tr'+ c); continue; case TRUE3: if (!c) continue; if(c==='e') { emitSaxValue(true); state = stack.pop() || VALUE; } else return emitError('Invalid true started with tru'+ c); continue; case FALSE: if (!c) continue; if (c==='a') state = FALSE2; else return emitError('Invalid false started with f'+ c); continue; case FALSE2: if (!c) continue; if (c==='l') state = FALSE3; else return emitError('Invalid false started with fa'+ c); continue; case FALSE3: if (!c) continue; if (c==='s') state = FALSE4; else return emitError('Invalid false started with fal'+ c); continue; case FALSE4: if (!c) continue; if (c==='e') { emitSaxValue(false); state = stack.pop() || VALUE; } else return emitError('Invalid false started with fals'+ c); continue; case NULL: if (!c) continue; if (c==='u') state = NULL2; else return emitError('Invalid null started with n'+ c); continue; case NULL2: if (!c) continue; if (c==='l') state = NULL3; else return emitError('Invalid null started with nu'+ c); continue; case NULL3: if (!c) continue; if(c==='l') { emitSaxValue(null); state = stack.pop() || VALUE; } else return emitError('Invalid null started with nul'+ c); continue; case NUMBER_DECIMAL_POINT: if(c==='.') { numberNode += c; state = NUMBER_DIGIT; } else return emitError('Leading zero not followed by .'); continue; case NUMBER_DIGIT: if('0123456789'.indexOf(c) !== -1) numberNode += c; else if (c==='.') { if(numberNode.indexOf('.')!==-1) return emitError('Invalid number has two dots'); numberNode += c; } else if (c==='e' || c==='E') { if(numberNode.indexOf('e')!==-1 || numberNode.indexOf('E')!==-1 ) return emitError('Invalid number has two exponential'); numberNode += c; } else if (c==="+" || c==="-") { if(!(p==='e' || p==='E')) return emitError('Invalid symbol in number'); numberNode += c; } else { if (numberNode) { emitSaxValue(parseFloat(numberNode)); numberNode = ""; } i--; // go back one state = stack.pop() || VALUE; } continue; default: return emitError("Unknown state: " + state); } } if (position >= bufferCheckPosition) checkBufferLength(); } } /** * A bridge used to assign stateless functions to listen to clarinet. * * As well as the parameter from clarinet, each callback will also be passed * the result of the last callback. * * This may also be used to clear all listeners by assigning zero handlers: * * ascentManager( clarinet, {} ) */ function ascentManager(oboeBus, handlers){ "use strict"; var id = {}, state; function nextState(handler) { return function(param){ state = handler( state, param); } } for( var i in handlers ) { oboeBus(i).on(nextState(handlers[i]), id); } oboeBus(ABORTING).on(function(){ for( var i in handlers ) { oboeBus(i).un(id); } }); } function httpTransport(){ return require('http'); } /** * A wrapper around the browser XmlHttpRequest object that raises an * event whenever a new part of the response is available. * * In older browsers progressive reading is impossible so all the * content is given in a single call. For newer ones several events * should be raised, allowing progressive interpretation of the response. * * @param {Function} oboeBus an event bus local to this Oboe instance * @param {XMLHttpRequest} http the http implementation to use as the transport. Under normal * operation, will have been created using httpTransport() above * and therefore be Node's http * but for tests a stub may be provided instead. * @param {String} method one of 'GET' 'POST' 'PUT' 'PATCH' 'DELETE' * @param {String} contentSource the url to make a request to, or a stream to read from * @param {String|Null} data some content to be sent with the request. * Only valid if method is POST or PUT. * @param {Object} [headers] the http request headers to send */ function streamingHttp(oboeBus, http, method, contentSource, data, headers) { "use strict"; function readStreamToEventBus(readableStream) { // use stream in flowing mode readableStream.on('data', function (chunk) { oboeBus(STREAM_DATA).emit( chunk.toString() ); }); readableStream.on('end', function() { oboeBus( STREAM_END ).emit(); }); } function readStreamToEnd(readableStream, callback){ var content = ''; readableStream.on('data', function (chunk) { content += chunk.toString(); }); readableStream.on('end', function() { callback( content ); }); } function fetchHttpUrl( url ) { if( !contentSource.match(/http:\/\//) ) { contentSource = 'http://' + contentSource; } var parsedUrl = require('url').parse(contentSource); var req = http.request({ hostname: parsedUrl.hostname, port: parsedUrl.port, path: parsedUrl.path, method: method, headers: headers }); req.on('response', function(res){ var statusCode = res.statusCode, sucessful = String(statusCode)[0] == 2; oboeBus(HTTP_START).emit( res.statusCode, res.headers); if( sucessful ) { readStreamToEventBus(res) } else { readStreamToEnd(res, function(errorBody){ oboeBus(FAIL_EVENT).emit( errorReport( statusCode, errorBody ) ); }); } }); req.on('error', function(e) { oboeBus(FAIL_EVENT).emit( errorReport(undefined, undefined, e ) ); }); oboeBus(ABORTING).on( function(){ req.abort(); }); if( data ) { req.write(data); } req.end(); } if( isString(contentSource) ) { fetchHttpUrl(contentSource); } else { // contentsource is a stream readStreamToEventBus(contentSource); } } var jsonPathSyntax = (function() { var /** * Export a regular expression as a simple function by exposing just * the Regex#exec. This allows regex tests to be used under the same * interface as differently implemented tests, or for a user of the * tests to not concern themselves with their implementation as regular * expressions. * * This could also be expressed point-free as: * Function.prototype.bind.bind(RegExp.prototype.exec), * * But that's far too confusing! (and not even smaller once minified * and gzipped) */ regexDescriptor = function regexDescriptor(regex) { return regex.exec.bind(regex); } /** * Join several regular expressions and express as a function. * This allows the token patterns to reuse component regular expressions * instead of being expressed in full using huge and confusing regular * expressions. */ , jsonPathClause = varArgs(function( componentRegexes ) { // The regular expressions all start with ^ because we // only want to find matches at the start of the // JSONPath fragment we are inspecting componentRegexes.unshift(/^/); return regexDescriptor( RegExp( componentRegexes.map(attr('source')).join('') ) ); }) , possiblyCapturing = /(\$?)/ , namedNode = /([\w-_]+|\*)/ , namePlaceholder = /()/ , nodeInArrayNotation = /\["([^"]+)"\]/ , numberedNodeInArrayNotation = /\[(\d+|\*)\]/ , fieldList = /{([\w ]*?)}/ , optionalFieldList = /(?:{([\w ]*?)})?/ // foo or * , jsonPathNamedNodeInObjectNotation = jsonPathClause( possiblyCapturing, namedNode, optionalFieldList ) // ["foo"] , jsonPathNamedNodeInArrayNotation = jsonPathClause( possiblyCapturing, nodeInArrayNotation, optionalFieldList ) // [2] or [*] , jsonPathNumberedNodeInArrayNotation = jsonPathClause( possiblyCapturing, numberedNodeInArrayNotation, optionalFieldList ) // {a b c} , jsonPathPureDuckTyping = jsonPathClause( possiblyCapturing, namePlaceholder, fieldList ) // .. , jsonPathDoubleDot = jsonPathClause(/\.\./) // . , jsonPathDot = jsonPathClause(/\./) // ! , jsonPathBang = jsonPathClause( possiblyCapturing, /!/ ) // nada! , emptyString = jsonPathClause(/$/) ; /* We export only a single function. When called, this function injects into another function the descriptors from above. */ return function (fn){ return fn( lazyUnion( jsonPathNamedNodeInObjectNotation , jsonPathNamedNodeInArrayNotation , jsonPathNumberedNodeInArrayNotation , jsonPathPureDuckTyping ) , jsonPathDoubleDot , jsonPathDot , jsonPathBang , emptyString ); }; }()); /** * Get a new key->node mapping * * @param {String|Number} key * @param {Object|Array|String|Number|null} node a value found in the json */ function namedNode(key, node) { return {key:key, node:node}; } /** get the key of a namedNode */ var keyOf = attr('key'); /** get the node from a namedNode */ var nodeOf = attr('node'); /** * This file provides various listeners which can be used to build up * a changing ascent based on the callbacks provided by Clarinet. It listens * to the low-level events from Clarinet and emits higher-level ones. * * The building up is stateless so to track a JSON file * ascentManager.js is required to store the ascent state * between calls. */ /** * A special value to use in the path list to represent the path 'to' a root * object (which doesn't really have any path). This prevents the need for * special-casing detection of the root object and allows it to be treated * like any other object. We might think of this as being similar to the * 'unnamed root' domain ".", eg if I go to * http://en.wikipedia.org./wiki/En/Main_page the dot after 'org' deliminates * the unnamed root of the DNS. * * This is kept as an object to take advantage that in Javascript's OO objects * are guaranteed to be distinct, therefore no other object can possibly clash * with this one. Strings, numbers etc provide no such guarantee. **/ var ROOT_PATH = {}; /** * Create a new set of handlers for clarinet's events, bound to the emit * function given. */ function incrementalContentBuilder( oboeBus ) { var emitNodeOpened = oboeBus(NODE_OPENED).emit, emitNodeClosed = oboeBus(NODE_CLOSED).emit, emitRootOpened = oboeBus(ROOT_PATH_FOUND).emit, emitRootClosed = oboeBus(ROOT_NODE_FOUND).emit; function arrayIndicesAreKeys( possiblyInconsistentAscent, newDeepestNode) { /* for values in arrays we aren't pre-warned of the coming paths (Clarinet gives no call to onkey like it does for values in objects) so if we are in an array we need to create this path ourselves. The key will be len(parentNode) because array keys are always sequential numbers. */ var parentNode = nodeOf( head( possiblyInconsistentAscent)); return isOfType( Array, parentNode) ? keyFound( possiblyInconsistentAscent, len(parentNode), newDeepestNode ) : // nothing needed, return unchanged possiblyInconsistentAscent ; } function nodeOpened( ascent, newDeepestNode ) { if( !ascent ) { // we discovered the root node, emitRootOpened( newDeepestNode); return keyFound( ascent, ROOT_PATH, newDeepestNode); } // we discovered a non-root node var arrayConsistentAscent = arrayIndicesAreKeys( ascent, newDeepestNode), ancestorBranches = tail( arrayConsistentAscent), previouslyUnmappedName = keyOf( head( arrayConsistentAscent)); appendBuiltContent( ancestorBranches, previouslyUnmappedName, newDeepestNode ); return cons( namedNode( previouslyUnmappedName, newDeepestNode ), ancestorBranches ); } /** * Add a new value to the object we are building up to represent the * parsed JSON */ function appendBuiltContent( ancestorBranches, key, node ){ nodeOf( head( ancestorBranches))[key] = node; } /** * For when we find a new key in the json. * * @param {String|Number|Object} newDeepestName the key. If we are in an * array will be a number, otherwise a string. May take the special * value ROOT_PATH if the root node has just been found * * @param {String|Number|Object|Array|Null|undefined} [maybeNewDeepestNode] * usually this won't be known so can be undefined. Can't use null * to represent unknown because null is a valid value in JSON **/ function keyFound(ascent, newDeepestName, maybeNewDeepestNode) { if( ascent ) { // if not root // If we have the key but (unless adding to an array) no known value // yet. Put that key in the output but against no defined value: appendBuiltContent( ascent, newDeepestName, maybeNewDeepestNode ); } var ascentWithNewPath = cons( namedNode( newDeepestName, maybeNewDeepestNode), ascent ); emitNodeOpened( ascentWithNewPath); return ascentWithNewPath; } /** * For when the current node ends */ function nodeClosed( ascent ) { emitNodeClosed( ascent); // pop the complete node and its path off the list. If we have // nothing left emit that the root closed return tail( ascent) || emitRootClosed(nodeOf(head(ascent))); } var contentBuilderHandlers = {}; contentBuilderHandlers[SAX_OPEN_OBJECT] = function (ascent) { return nodeOpened(ascent, {}); }; contentBuilderHandlers[SAX_OPEN_ARRAY] = function (ascent) { return nodeOpened(ascent, []); }; contentBuilderHandlers[SAX_KEY] = keyFound; contentBuilderHandlers[SAX_VALUE] = compose2( nodeClosed, nodeOpened ); contentBuilderHandlers[SAX_CLOSE_OBJECT] = nodeClosed; contentBuilderHandlers[SAX_CLOSE_ARRAY] = nodeClosed; return contentBuilderHandlers; } /** * The jsonPath evaluator compiler used for Oboe.js. * * One function is exposed. This function takes a String JSONPath spec and * returns a function to test candidate ascents for matches. * * String jsonPath -> (List ascent) -> Boolean|Object * * This file is coded in a pure functional style. That is, no function has * side effects, every function evaluates to the same value for the same * arguments and no variables are reassigned. */ // the call to jsonPathSyntax injects the token syntaxes that are needed // inside the compiler var jsonPathCompiler = jsonPathSyntax(function (pathNodeSyntax, doubleDotSyntax, dotSyntax, bangSyntax, emptySyntax ) { var CAPTURING_INDEX = 1; var NAME_INDEX = 2; var FIELD_LIST_INDEX = 3; var headKey = compose2(keyOf, head), headNode = compose2(nodeOf, head); /** * Create an evaluator function for a named path node, expressed in the * JSONPath like: * foo * ["bar"] * [2] */ function nameClause(previousExpr, detection ) { var name = detection[NAME_INDEX], matchesName = ( !name || name == '*' ) ? always : function(ascent){return headKey(ascent) == name}; return lazyIntersection(matchesName, previousExpr); } /** * Create an evaluator function for a a duck-typed node, expressed like: * * {spin, taste, colour} * .particle{spin, taste, colour} * *{spin, taste, colour} */ function duckTypeClause(previousExpr, detection) { var fieldListStr = detection[FIELD_LIST_INDEX]; if (!fieldListStr) return previousExpr; // don't wrap at all, return given expr as-is var hasAllrequiredFields = partialComplete( hasAllProperties, arrayAsList(fieldListStr.split(/\W+/)) ), isMatch = compose2( hasAllrequiredFields, headNode ); return lazyIntersection(isMatch, previousExpr); } /** * Expression for $, returns the evaluator function */ function capture( previousExpr, detection ) { // extract meaning from the detection var capturing = !!detection[CAPTURING_INDEX]; if (!capturing) return previousExpr; // don't wrap at all, return given expr as-is return lazyIntersection(previousExpr, head); } /** * Create an evaluator function that moves onto the next item on the * lists. This function is the place where the logic to move up a * level in the ascent exists. * * Eg, for JSONPath ".foo" we need skip1(nameClause(always, [,'foo'])) */ function skip1(previousExpr) { if( previousExpr == always ) { /* If there is no previous expression this consume command is at the start of the jsonPath. Since JSONPath specifies what we'd like to find but not necessarily everything leading down to it, when running out of JSONPath to check against we default to true */ return always; } /** return true if the ascent we have contains only the JSON root, * false otherwise */ function notAtRoot(ascent){ return headKey(ascent) != ROOT_PATH; } return lazyIntersection( /* If we're already at the root but there are more expressions to satisfy, can't consume any more. No match. This check is why none of the other exprs have to be able to handle empty lists; skip1 is the only evaluator that moves onto the next token and it refuses to do so once it reaches the last item in the list. */ notAtRoot, /* We are not at the root of the ascent yet. Move to the next level of the ascent by handing only the tail to the previous expression */ compose2(previousExpr, tail) ); } /** * Create an evaluator function for the .. (double dot) token. Consumes * zero or more levels of the ascent, the fewest that are required to find * a match when given to previousExpr. */ function skipMany(previousExpr) { if( previousExpr == always ) { /* If there is no previous expression this consume command is at the start of the jsonPath. Since JSONPath specifies what we'd like to find but not necessarily everything leading down to it, when running out of JSONPath to check against we default to true */ return always; } var // In JSONPath .. is equivalent to !.. so if .. reaches the root // the match has succeeded. Ie, we might write ..foo or !..foo // and both should match identically. terminalCaseWhenArrivingAtRoot = rootExpr(), terminalCaseWhenPreviousExpressionIsSatisfied = previousExpr, recursiveCase = skip1(skipManyInner), cases = lazyUnion( terminalCaseWhenArrivingAtRoot , terminalCaseWhenPreviousExpressionIsSatisfied , recursiveCase ); function skipManyInner(ascent) { if( !ascent ) { // have gone past the start, not a match: return false; } return cases(ascent); } return skipManyInner; } /** * Generate an evaluator for ! - matches only the root element of the json * and ignores any previous expressions since nothing may precede !. */ function rootExpr() { return function(ascent){ return headKey(ascent) == ROOT_PATH; }; } /** * Generate a statement wrapper to sit around the outermost * clause evaluator. * * Handles the case where the capturing is implicit because the JSONPath * did not contain a '$' by returning the last node. */ function statementExpr(lastClause) { return function(ascent) { // kick off the evaluation by passing through to the last clause var exprMatch = lastClause(ascent); return exprMatch === true ? head(ascent) : exprMatch; }; } /** * For when a token has been found in the JSONPath input. * Compiles the parser for that token and returns in combination with the * parser already generated. * * @param {Function} exprs a list of the clause evaluator generators for * the token that was found * @param {Function} parserGeneratedSoFar the parser already found * @param {Array} detection the match given by the regex engine when * the feature was found */ function expressionsReader( exprs, parserGeneratedSoFar, detection ) { // if exprs is zero-length foldR will pass back the // parserGeneratedSoFar as-is so we don't need to treat // this as a special case return foldR( function( parserGeneratedSoFar, expr ){ return expr(parserGeneratedSoFar, detection); }, parserGeneratedSoFar, exprs ); } /** * If jsonPath matches the given detector function, creates a function which * evaluates against every clause in the clauseEvaluatorGenerators. The * created function is propagated to the onSuccess function, along with *