UNPKG

@knod/prose-stepper

Version:

Navigate through the words and sentences of prose text, stepping backward and forward sequentially

github.com/knod/prose-stepper

knod/prose-stepper

651 lines (516 loc) • 24.4 kB

JavaScript

/* prose-stepper.js * * Step back and forth through sentences and words. * Also splits up the words into fragments if needed. */ (function (root, stepFactory) { // root is usually `window` if (typeof define === 'function' && define.amd) { // amd if possible // AMD. Register as an anonymous module. define( ['@knod/hyphenaxe'], function (hyphenaxe) { return ( root.ProseStepper = stepFactory(hyphenaxe) ) }); } else if (typeof module === 'object' && module.exports) { // Node-ish next // Node. Does not work with strict CommonJS, but only CommonJS-like // environments that support module.exports, like Node. module.exports = stepFactory( require('@knod/hyphenaxe') ); } else { // Global if nothing else // Browser globals root.ProseStepper = stepFactory( root.hyphenaxe ); } }(this, function ( split ) { /* ( func ) -> ProseStepper */ "use strict"; var ProseStepper = function ( state ) { /* ( {} ) -> ProseStepper * * `state` is a reference to a never-destroyed object containing * options for the word splitter, as well as for the ProseStepper * instance itself. * * Provides commands for getting the sentences/words passed into * its `.process()`. * * For more info, see the README. */ var pst = {}; // ===================================== // EXTERNAL // ===================================== // TODO: Discuss external availability of these values pst.index = 0; pst.position = [ 0, 0, 0 ]; pst.rawWord = null; // `pst.fragments` isn't just a string. It's not from the sentence/word // array, it's a word once it has been fragmented into a list of strings pst.fragments = null; // [ Str ] // ===================================== // INTERNAL // ===================================== var sentences, positions; // Just the required stuff. Other stuff can be undefined. var defaults = { maxNumCharacters: 13, minLengthForSeparator: 3, }; var oldState, stateChanged = false, relevantProps = [ 'maxNumCharacters', 'minLengthForSeparator', 'separator', 'fractionOfMax', 'redistribute' ]; // ===================================== // SET UP NEW DATA // ===================================== pst.process = function ( sentenceArray ) { /* ( [[Str]] ) -> ProseStepper * * Creates maps/arrays that will be used to jump around */ notArrayOfArraysOfStringsErrors( sentenceArray ); // Store clone of array. Can't handle changes atm. sentences = pst._sentences = sentenceArray.slice(0); positions.splice( 0, positions.length ); // Empty non-destructively for ( let senti = 0; senti < sentences.length; senti++ ) { let sentence = sentences[senti]; for (let wordi = 0; wordi < sentence.length; wordi++) { positions.push([ senti, wordi ]); } } pst.restart(); return pst; }; // ===================================== // GETS // ===================================== pst.getProgressions = function () { /* ( None ) -> [ {}, {}, {} ] * * Returns an array containing 3 objects each with * the `total` number of items in their group and their own * `index` location within that group. * * TODO: Object containing 3 objects instead? Nested objects? * Nested arrays? * TODO: Include relative progress calculations too? */ var pos = pst.position; var data = [ { total: sentences.length, index: pos[0] }, { total: sentences[ pos[0] ].length, index: pos[1] }, { total: pst.fragments.length, index: pos[2] } ]; return data; }; // End pst.getProgressions pst.getRelativeProgress = function () { /* ( None ) -> [ Float ] * * Returns an array containing * 1. sentence index/num sentences * 2. word index/num words in sentence * 3. fragment index/num fragments in word */ var progs = pst.getProgressions(); var sentPr = (progs[0].index + 1) / progs[0].total, // sentence/num sentences wordPr = (progs[1].index + 1) / progs[1].total, // word/num sentence words fragPr = (progs[2].index + 1) / progs[2].total; // fragment/num word fragments return [ sentPr, wordPr, fragPr ]; }; // End pst.getRelativeProgress pst.getProgress = function () { /* ( None ) -> Float * * Should start at some fraction and reach 1 at the last * fragment. Uses word progress and fragment progress to make * sure we get 1 at the last fragment, not just the last word */ // +1 so that one-fragment words don't have 0 fragment progress // Can't do length - 1 in case the length is 1 (0 denominator) var fragProgress = (pst.position[2] + 1) / pst.fragments.length, progress = (pst.index + fragProgress) / positions.length; return progress; }; pst.getLength = function () { return positions.length; }; pst.getIndex = function () { return pst.index; } // For consistency // ===================================== // RUNTIME // ===================================== // Traveling the words/sentences (for external use) pst.setState = function ( newState ) { /* ( {} ) -> ProseStepper * * Be careful, this is not where most of the errors will * happen. Some won't happen till you try to run stuff * (some happen in the splitter). */ handleStateChange( newState ); return pst; }; // End pst.setState() pst.restart = function () { /* ( None ) -> ProseStepper * * Resets values and makes calculations based on those values. * TODO: Discuss returning first fragment instead */ pst.index = 0; pst.position = [ 0, 0, 0 ]; pst.rawWord = pst._stepWord( pst.index ); pst.fragments = pst._split( pst.rawWord, getStateProp( pst._state, 'maxNumCharacters' ) ); return pst; }; pst.getFragment = function ( changesOrIndex ) { /* ( [int, int, int] or int ) -> Str * * Only one of the ints can be something other than 0. * TODO: Is another method ever needed? User testing needed. * * This thing is complicated. See the README. */ notArrayOfIntsOrIntErrors( changesOrIndex ); // Throw errors if needed var pos = pst.position, fragIndex = 0; // If state is changed, store new value internally, // re-fragment word and start at the beginning of word // Will set stateChanged to `true` if necessary stateChanged = handleStateChange( pst._state ) // Plain index change/jump if ( typeof changesOrIndex === 'number' ) { pst.rawWord = pst._indexJump( changesOrIndex ); // !!! CAN ONLY CHANGE ONE POSITION AT A TIME !!! \\ } else if ( changesOrIndex[0] !== 0 ) { // sentence change pst.rawWord = pst._stepSentence( changesOrIndex[0] ); } else if ( changesOrIndex[1] !== 0 ) { // word change pst.rawWord = pst._stepWord( pst.index + changesOrIndex[1] ); } else if ( changesOrIndex[2] !== 0 ) { // fragment change // This is confusing because it invisibly changes `.rawWord` sometimes fragIndex = pst._stepFragment( changesOrIndex[2] ) // If no change, get whatever's current } else { // currently, [0,0,0] === get current fragment fragIndex = pos[2]; } // end if index or which position changed // Get the array of strings that is the split word pst.fragments = pst._getTheSplit(); // If any state property was changed, the old fragment // position's should be reset. Things have changed! if ( stateChanged ) { pos[2] = 0; stateChanged = false; // Reset for next time } else { pos[2] = fragIndex; } var frag = pst.fragments[ pos[2] ]; return frag; } // End pst.getFragment() // ===================================== // TRAVEL INTERNAL HELPERS // ===================================== pst._getTheSplit = function () { /* ( None ) -> [ Str ] * * Uses all internal stuff to split the `.rawWord` and return * it as an array of strings. */ // Values for splitter var sep = getStateProp( pst._state, 'separator' ); var maxChars = getStateProp( pst._state, 'maxNumCharacters' ); if ( maxChars < getStateProp( pst._state, 'minLengthForSeparator' ) ) { sep = ''; } var toPass = { separator: sep, fractionOfMax: getStateProp( pst._state, 'fractionOfMax' ), redistribute: getStateProp( pst._state, 'redistribute' ) } return pst._split( pst.rawWord, maxChars, toPass ); }; // End pst._getTheSplit() pst._indexJump = function ( index ) { /* ( int ) -> Str * * Return a word at that "index" as if the collection were flat. * Circle to the end of the collection if the index is negative. * Don't ever go past the end of the collection (behavior being * debated). */ // act like array indexes - negative numbers come back from the end if ( index < 0 ) { index = pst.getLength() + index; } return pst._stepWord( index ); }; // End pst._indexJump() pst._stepFragment = function ( fragChange ) { /* ( int ) -> Int * * NOTE: This returns an index number for the fragment position * (`.position[2]`), not a new word (unlike other steps/jumps) * May also change the `.rawWord` value. * * Progresses forward or backward through fragments, but with a * maximum change of one word, no matter how big the change. * Returns the new fragment position, which will be 0 if there's * a word change. */ var pos = pst.position, fragi = pos[2] + fragChange, returnIndex = 0; // if current fragment starts new word // Note: doesn't skip more than one word at a time and starts // at the beginning of the next word, no matter the step value if ( fragi >= pst.fragments.length ) { // If at last word, stay at/go to end? var wordProgress = (pst.getIndex() + 1) / pst.getLength() // Had already reached last word (100% of words) if ( wordProgress === 1 ) { // Since we were already in the last word, we have the right .fragments returnIndex = pst.fragments.length - 1; // If maxChars changed, this will be changed into 0 anyway, so // no worries about wrong indexes because of that change // Otherwise start new word } else { pst.rawWord = pst._stepWord( pst.index + 1 ); } } else if (fragi < 0) { pst.rawWord = pst._stepWord( pst.index - 1 ); } else { // Don't change index or current word, just current fragment position // The only place where pos[2] doesn't end up at 0 returnIndex = fragi; } return returnIndex; }; // End pst._stepFragment() pst._stepWord = function ( index ) { /* ( int ) -> Str * * Return a string that is the word at that `index` position. * A ton of things use this. * ??: Name? jumpWord, getWord, other? */ pst.index = pst.normalizeIndex( index ); var pos = positions[ pst.index ]; pst.position[0] = pos[0]; pst.position[1] = pos[1]; var word = sentences[ pst.position[0] ][ pst.position[1] ]; return word; }; // End pst._stepWord() pst._stepSentence = function ( sentenceChange ) { /* ( int ) -> Str * * Return a string that is the word reached with that * `sentenceChange`. Some more details are in the README */ if ( sentenceChange === 0 ) { return 0; } var pos = [ pst.position[0], pst.position[1] ], senti = pos[0], wordi = pos[1]; // If in the last sentence, go to the last word if ( (sentenceChange + senti) > (sentences.length - 1) ) { senti = sentences.length - 1; wordi = sentences[ senti ].length - 1; } else { // If we're in the middle of a sentence and we're // only going back one step, go back to the beginning of the sentence if ( sentenceChange === -1 && wordi > 0 ) {} // No change to sentence // otherwise change sentence else { senti += sentenceChange; } // Either way, word is first word of sentence wordi = 0; } // end if at last sentence pos[1] = wordi; pos[0] = pst.normalizeSentencePos( senti ); var newIndex = pst._sentenceChangeToIndex( sentenceChange, pos ); if ( newIndex === null ) { newIndex = pst.index; } return pst._stepWord( newIndex ); }; // End pst._stepSentence pst._sentenceChangeToIndex = function ( sentenceChange, newPos ) { /* ( int ) -> Int or null * * Given the direction of change and the position desired, find the * index of the new position. * Only used for sentence changes. If we need something else, * we'll see about that then. Just trying to speed up the search. */ if ( sentenceChange === 0 ) { return 0; } // signOf shouldn't return NaN now var incrementor = signOf( sentenceChange ), // 1 or -1 tempi = pst.index, found = false; // Until we find the position or there are no more positions left while ( !found && positions[ tempi ] ) { // Test out positions var pos = positions[ tempi ]; if ( pos[0] === newPos[0] && pos[1] === newPos[1] ) { found = true; } // If not found, keep going until there are no more positions left in the list if (!found) { tempi += incrementor; } } // If we went through all the list we could and didn't find anything, say so // Not quite sure why that would happen, though if ( !positions[tempi] ) { tempi = null; } return tempi; }; // End pst._sentenceChangeToIndex() // ===================================== // UTILITIES // ===================================== var signOf = function ( num ) { // Returns 1 or -1 depending on the positivity or negativity of `num` return typeof num === 'number' ? num ? num < 0 ? -1 : 1 : num === num ? num : NaN : NaN; } var isInt = function ( arg ) { return ( typeof arg === 'number' ) && !isNaN( arg ) && ( arg % 1 === 0 ); }; pst.normalizeIndex = function ( index ) { /* Don't go out of the array */ index = Math.min( index, positions.length - 1 ); // max return Math.max( index, 0 ); // min }; pst.normalizeSentencePos = function ( senti ) { /* Don't go out of the array */ senti = Math.min( senti, (sentences.length - 1) ); return Math.max( senti, 0 ); }; var handleStateChange = function ( testState ) { /* ( {} ) -> Bool * * If `testState` has different values for relevant properties * than the current state, change our semi-internal `._state`, our * internal `oldState`, and return `true`, otherwise return `false`. */ var changed; for ( let propi = 0; propi < relevantProps.length; propi++ ) { let prop = relevantProps[ propi ] if ( oldState && testState && oldState[ prop ] !== testState[ prop ] ) { stateChanged = true; // Trigger any errors if necessary getStateProp( testState, prop ); // ??: Do this here, or after it's all over? // oldState[ prop ] = testState[ prop ]; // Will changing some of them mess stuff up if a later // one throws an error? } } if ( stateChanged ) { pst._state = testState; // May just equal itself most times setOldState( pst._state ); // Reset for next time } return stateChanged; }; // End handleStateChange(); var setOldState = function ( newState ) { /* ( {} ) -> other {} * * Create and return a new `oldState` object. Future state * changes will be compared to this old version. Can't just * do `oldState` = `newState`, because mutability - if the * `newState`s properties change, so will `oldState`s and no * change will be detected. */ oldState = {}; // "global" in ProseStepper for ( let propi = 0; propi < relevantProps.length; propi++ ) { let prop = relevantProps[ propi ] if ( newState ) { oldState[ prop ] = newState[ prop ]; } } return oldState; }; // End setOldState() // ===================================== // ERRORS/GETTER HELPERS // ===================================== var getStateProp = function ( state, propName ) { /* ( str ) -> Various * * Either get a property from `state`, return a default * value, or throw an error. We'll trigger any errors * we can, but some of these only error in the splitter. */ var val = null; if ( state ) { var funcName = '_getValid_' + propName; // In case there are properties on state that aren't relevant if ( pst[ funcName ] ) { val = pst[ funcName ]( state[ propName ] ); } } else { val = defaults[ propName ]; } return val; }; // End getStateProp() // ---- Non-splitter values ---- pst._getValid_minLengthForSeparator = function ( arg ) { /* ( int ) -> Int or throw error * * Will throw necessary errors for bad references or things that * aren't ints or arrays of ints. Otherwise, will return `true` */ var msg = 'Was expecting an array of integers. Recieved: ' + arg + ', an ' + Object.prototype.toString.call( arg ); if ( arg === undefined ) { return defaults.minLengthForSeparator; } if ( !isInt( arg ) ) { throw new TypeError( msg ) } // Otherwise, we're cool return arg; }; // End pst._getValid_minLengthForSeparator() // ---- Splitter values ---- // TODO: Some way to use splitter's error checking? // All defaults are valid values for the splitter // Otherwise, invalid values will be handled by the splitter pst._getValid_maxNumCharacters = function ( val ) { return val || defaults.maxNumCharacters; } pst._getValid_redistribute = function ( val ) { return val || defaults.redistribute; } pst._getValid_fractionOfMax = function ( val ) { return val || defaults.fractionOfMax; } pst._getValid_separator = function ( val ) { // We want to allow empty string, but it's falsy. Solution: if ( val == '' ) { return val } else { return val || defaults.separator; } } var notArrayOfArraysOfStringsErrors = function ( arg ) { /* ( [[str]] ) -> True or throw error * * Will throw necessary errors for bad references or things that * aren't arrays of arrays of strings. Otherwise, will return `true` */ var msg = 'Was expecting an array of array of strings. Recieved: ' + arg + ', an ' + Object.prototype.toString.call( arg ); if ( arg === undefined ) { throw new ReferenceError( msg ); } try { var first = arg[0], // string would pass (which would be wrong) second = first[0], // string would pass (which would be wrong) third = second.substr(0, 1), // In case a string passed those eariler tests wasStr = typeof arg === 'string' || typeof first === 'string'; if (wasStr) { throw new TypeError( msg ); } } catch (err) { throw new TypeError( msg ); } return true; }; // End notArrayOfArraysOfStringsErrors() var notArrayOfIntsOrIntErrors = function ( arg ) { /* ( int || [ int ] ) -> True or throw error * * Will throw necessary errors for bad references or things that * aren't ints or arrays of ints. Otherwise, will return `true` */ var msg = 'Was expecting an array of integers. Recieved: ' + arg + ', an ' + Object.prototype.toString.call( arg ); // Can be positive or negative if ( isInt( arg ) ) { return true; } // Can be an index position // Otherwise must be an array of ints if ( arg === undefined ) { throw new ReferenceError( msg ) } try { arg[0] } catch (err) { throw new TypeError( msg ) } // Otherwise, things like `true` pass. Can't just if( arg[0] ) // because then [0, 0, 0] wouldn't get through if ( !isInt( arg[0] ) ) { throw new TypeError( msg ) } for (var itemi = 0; itemi < arg.length; itemi++) { if ( !isInt( arg[ itemi ] ) ) { throw new TypeError( msg ) } // Throw error } // Otherwise, we're cool return true; }; // End notArrayOfIntsOrIntErrors() // ===================================== // INITIALIZE // ===================================== pst.init = function ( state ) { /* ( {} ) -> ProseStepper * * Now that functions exist, use them to check and set * values */ pst._split = split; // func pst._state = state; setOldState( state ); sentences = pst._sentences = null; positions = pst._positions = []; return pst; }; // End pst.init() // ===================================== // DONE // ===================================== pst.init( state ); return pst; }; // End ProseStepper() -> {} return ProseStepper; }));