text-machine
Version:
A state machine for text processing.
599 lines (471 loc) • 20.3 kB
JavaScript
/*
Text Machine
Copyright (c) 2018 - 2022 Cédric Ronvel
The MIT License (MIT)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
"use strict" ;
const isPlainObject = v => v && typeof v === 'object' && ! Array.isArray( v ) ;
/*
Optimization ideas:
* save states at different point in the buffer
* when something changes, resume the TextMachine at the closest previous saved state
* if the state machine pass to a checkpoint where the saved state has not changed, it is stopped
*/
function TextMachine( options ) {
this.program = options.program ;
this.embeddedTextMachines = {} ;
this.api = options.api || {} ;
this.stateStack = null ;
this.index = 0 ;
this.store = {} ;
// TODO
//this.savedStateStack = [] ;
if ( this.program.embedded ) {
for ( let name in this.program.embedded ) {
this.embeddedTextMachines[ name ] = new TextMachine( Object.assign( {} , options , { program: this.program.embedded[ name ] } ) ) ;
}
}
this.sanitize() ;
this.reset() ;
}
module.exports = TextMachine ;
const MATCH_ALWAYS = 0 ;
const MATCH_STRING = 1 ;
const MATCH_SET = 2 ;
const MATCH_REGEXP = 3 ;
const MATCH_FUNCTION = 4 ;
TextMachine.prototype.sanitize = function() {
if ( ! isPlainObject( this.program.config ) ) { this.program.config = {} ; }
if ( ! isPlainObject( this.program.hostConfig ) ) { this.program.hostConfig = {} ; }
if ( ! isPlainObject( this.program.styles ) ) { this.program.styles = {} ; }
if ( ! isPlainObject( this.program.states ) ) { this.program.states = {} ; }
for ( let name in this.program.states ) {
let stateProgram = this.program.states[ name ] ;
if ( stateProgram.branches ) {
for ( let branchProgram of stateProgram.branches ) {
this.sanitizeBranchProgram( branchProgram ) ;
if ( branchProgram.spanBranches ) {
for ( let spanBranchProgram of branchProgram.spanBranches ) {
this.sanitizeBranchProgram( spanBranchProgram ) ;
}
}
}
}
}
} ;
TextMachine.prototype.sanitizeBranchProgram = function( branchProgram ) {
if ( branchProgram.match === true || branchProgram.match === undefined ) {
branchProgram.matchType = MATCH_ALWAYS ;
}
else if ( typeof branchProgram.match === 'string' ) {
branchProgram.matchType = MATCH_STRING ;
}
else if ( Array.isArray( branchProgram.match ) ) {
branchProgram.match = new Set( branchProgram.match ) ;
branchProgram.matchType = MATCH_SET ;
}
else if ( branchProgram.match instanceof Set ) {
branchProgram.matchType = MATCH_SET ;
}
else if ( branchProgram.match instanceof RegExp ) {
branchProgram.matchType = MATCH_REGEXP ;
}
else if ( typeof branchProgram.match === 'function' ) {
branchProgram.matchType = MATCH_FUNCTION ;
}
} ;
TextMachine.prototype.reset = function() {
this.index = 0 ;
this.store = {} ;
this.stateStack = [ {
name: this.program.config.initState || 'init' ,
parent: null ,
embedded: null ,
microState: {} ,
span: {}
} ] ;
} ;
TextMachine.prototype.pushEvent = function( event , context ) {
var initialState , state ,
initialStateProgram , stateProgram , nextStateName , branchProgram ,
isDelayed = false ,
isTransition = false ,
stateHasSwitched = false ;
//console.error( "\n>>> PUSH: '" + event + "'" ) ;
// Get the current state
initialState = state = this.stateStack[ this.stateStack.length - 1 ] ;
// Active state program
initialStateProgram = stateProgram = this.program.states[ state.name ] ;
if ( state.embedded ) {
// If there is an embedded textMachine, run it now!
// The host textMachine will have to overwrite with its own actions.
this.embeddedTextMachines[ state.embedded ].pushEvent( event , context ) ;
}
do {
// First we select the branch and apply its feats
branchProgram = this.branchMatch( stateProgram.branches , event , state ) ;
if ( branchProgram ) {
if ( branchProgram.spanBranches && branchProgram.branchOn && state.span[ branchProgram.branchOn ] ) {
// Try to match a span branch
//console.error( "Branch on:" , branchProgram.branchOn , state.span[ branchProgram.branchOn ].content ) ;
branchProgram = this.branchMatch( branchProgram.spanBranches , state.span[ branchProgram.branchOn ].content , state )
|| branchProgram ;
}
if ( branchProgram.expandSpan ) { this.manageSpans( branchProgram.expandSpan , event , context , state , EXPAND_SPAN ) ; }
if ( branchProgram.span ) { this.manageSpans( branchProgram.span , event , context , state , CONTINUE_SPAN ) ; }
if ( branchProgram.startSpan ) { this.manageSpans( branchProgram.startSpan , event , context , state , START_SPAN ) ; }
if ( branchProgram.copySpan ) { this.copySpans( branchProgram.copySpan , state ) ; }
if ( branchProgram.microState ) { this.setMicroState( branchProgram.microState , state ) ; }
if ( branchProgram.transition ) { isTransition = true ; }
else if ( branchProgram.delay ) { isDelayed = true ; }
if ( branchProgram.embedded ) {
let embedded = Array.isArray( branchProgram.embedded ) ? this.getDynamicValue( branchProgram.embedded , state ) : branchProgram.embedded ;
let embeddedTextMachine = this.embeddedTextMachines[ embedded ] ;
if ( ! embeddedTextMachine ) { throw new Error( "Embedded program not found: " + embedded + " from " + state.name ) ; }
embeddedTextMachine.reset() ;
state.embedded = embedded ;
}
else if ( branchProgram.embedded === null ) {
state.embedded = null ;
}
if ( branchProgram.store ) {
let key = branchProgram.store[ 0 ] ,
value = this.getDynamicValue( branchProgram.store[ 1 ] , state ) ;
if ( value ) {
if ( ! this.store[ key ] ) { this.store[ key ] = new Set() ; }
this.store[ key ].add( value ) ;
}
}
// Exec the branch action on the current state now, if any...
if ( branchProgram.return && this.useReturnErrorAction( branchProgram.return , state ) ) {
// This is a return error (mostly parenthesis/brace/bracket parse errors)
//console.error( ">>> APPLY branch returnErrorAction" ) ;
this.execActions( branchProgram.return.errorAction , state , context ) ;
}
else if ( branchProgram.action ) {
this.execActions( branchProgram.action , state , context ) ;
}
}
// Now it depends on which branching mode occured
if ( this.stateStack.length > 1 && ( stateProgram.return || branchProgram?.return ) ) {
// Returning from sub-state (recursion)
//console.error( "RETURN" , state.name , '-->' , state.openingState , '(wanted: ' + ( stateProgram.return || branchProgram?.return ) + ')' , '-->-->' , state.parent?.returnState ) ;
stateHasSwitched = true ;
this.stateStack.length -- ;
state = this.stateStack[ this.stateStack.length - 1 ] ;
stateProgram = this.program.states[ state.name ] ;
if ( state.returnState ) {
// Overwrite the old state with the new one
nextStateName = state.returnState ;
stateProgram = this.program.states[ nextStateName ] ;
if ( ! stateProgram ) {
let errorMessage = "State not found: " + state.returnState + " from " + state.name ;
if ( this.program.devMode?.fallbackState ) {
nextStateName = this.program.devMode.fallbackState ;
stateProgram = this.program.states[ nextStateName ] ;
console.error( errorMessage , stateProgram ) ;
}
else {
throw new Error( errorMessage ) ;
}
}
state = this.stateStack[ this.stateStack.length - 1 ] = {
name: nextStateName ,
parent: state.parent ,
embedded: state.embedded ,
context: context ,
previousName: state.name ,
previousContext: state.context ,
microState: state.microState ,
span: state.span ,
openingContext: state.openingContext ,
openingState: state.openingState ,
startingStateContext: context
} ;
}
}
else if ( ! branchProgram || (
( ! branchProgram.state || branchProgram.state === state.name )
&& ! branchProgram.subState
&& ( ! branchProgram.return || this.stateStack.length <= 1 )
) ) {
// Continue / No state change
//console.error( "CONTINUE" , state.name ) ;
state.previousName = state.name ;
state.previousContext = state.context ;
state.context = context ;
}
else if ( branchProgram.subState ) {
// Entering sub-state (recursion)
//console.error( "ENTERING SUB STATE" , state.name , '-->' , branchProgram.subState ) ;
stateHasSwitched = true ;
nextStateName = branchProgram.subState ;
stateProgram = this.program.states[ nextStateName ] ;
if ( ! stateProgram ) {
let errorMessage = "State not found: " + branchProgram.subState + " from " + state.name ;
if ( this.program.devMode?.fallbackState ) {
nextStateName = this.program.devMode.fallbackState ;
stateProgram = this.program.states[ nextStateName ] ;
console.error( errorMessage , stateProgram ) ;
}
else {
throw new Error( errorMessage ) ;
}
}
// Save the opening context
state.returnState = branchProgram.state ;
// Create a new state and push it at the end of the stack
state = this.stateStack[ this.stateStack.length ] = {
name: nextStateName ,
parent: state ,
embedded: state.embedded ,
context: context ,
microState: {} ,
span: {} ,
openingContext: context ,
openingState: branchProgram.subState ,
startingStateContext: context
} ;
}
else {
// Switch to state
//console.error( "SWITCH" , state.name , '-->' , branchProgram.state ) ;
stateHasSwitched = true ;
// Now change the state
nextStateName = branchProgram.state ;
stateProgram = this.program.states[ nextStateName ] ;
if ( ! stateProgram ) {
let errorMessage = "State not found: " + branchProgram.state + " from " + state.name ;
if ( this.program.devMode?.fallbackState ) {
nextStateName = this.program.devMode.fallbackState ;
stateProgram = this.program.states[ nextStateName ] ;
console.error( errorMessage , stateProgram ) ;
}
else {
throw new Error( errorMessage ) ;
}
}
state = this.stateStack[ this.stateStack.length - 1 ] = {
name: nextStateName ,
parent: state.parent ,
embedded: state.embedded ,
context: context ,
previousName: state.name ,
previousContext: state.context ,
microState: state.microState ,
span: state.span ,
openingContext: state.openingContext ,
openingState: state.openingState ,
startingStateContext: context
} ;
}
// Finally apply state feats
if ( stateProgram.expandSpan ) { this.manageSpans( stateProgram.expandSpan , event , context , state , EXPAND_SPAN ) ; }
if ( stateProgram.span ) { this.manageSpans( stateProgram.span , event , context , state , CONTINUE_SPAN ) ; }
if ( stateProgram.startSpan ) { this.manageSpans( stateProgram.startSpan , event , context , state , START_SPAN ) ; }
if ( stateProgram.copySpan ) { this.copySpans( stateProgram.copySpan , state ) ; }
if ( stateProgram.microState ) { this.setMicroState( stateProgram.microState , state ) ; }
// Exec the action for the state, if any...
if ( isTransition || isDelayed ) {
if ( ! isTransition && initialStateProgram.action ) {
this.execActions( initialStateProgram.action , initialState , context ) ;
}
}
else if ( stateProgram.return && this.useReturnErrorAction( stateProgram.return , state ) ) {
// This is a return error (mostly parenthesis/brace/bracket parse errors)
//console.error( ">>> APPLY state returnErrorAction" , this.stateStack.length , stateProgram.return , state.openingState ) ;
this.execActions( stateProgram.return.errorAction , state , context ) ;
}
else if ( stateProgram.action ) {
this.execActions( stateProgram.action , state , context ) ;
}
// Propagate the event to the next state now?
} while ( stateHasSwitched && branchProgram?.propagate ) ;
this.index ++ ;
} ;
TextMachine.prototype.useReturnErrorAction = function( returnProgram , state ) {
return (
typeof returnProgram === 'object' && returnProgram.errorAction
&& (
this.stateStack.length <= 1
|| ( returnProgram.matchState && returnProgram.matchState !== state.openingState )
|| ( returnProgram.matchMicroState && ! this.isMicroStateEqual( returnProgram.matchMicroState , state , state.parent ) )
)
) ;
} ;
// Get the first matching branchProgram
TextMachine.prototype.branchMatch = function( branches , str , state ) {
if ( ! Array.isArray( branches ) ) { return ; }
for ( let branchProgram of branches ) {
let isMatching =
branchProgram.matchType === MATCH_ALWAYS ? true :
branchProgram.matchType === MATCH_STRING ? branchProgram.match === str :
branchProgram.matchType === MATCH_SET ? branchProgram.match.has( str ) :
branchProgram.matchType === MATCH_REGEXP ? branchProgram.match.test( str ) :
branchProgram.matchType === MATCH_FUNCTION ? !! branchProgram.match( str ) :
false ;
if ( isMatching === ! branchProgram.inverse ) {
if ( ! branchProgram.matchMicroState || this.isMicroStateEqual( branchProgram.matchMicroState , state ) ) {
return branchProgram ;
}
}
}
} ;
const CONTINUE_SPAN = 0 ;
const START_SPAN = 1 ;
const EXPAND_SPAN = 2 ;
TextMachine.prototype.manageSpans = function( spanProgram , event , context , state , type = CONTINUE_SPAN ) {
if ( Array.isArray( spanProgram ) ) {
spanProgram.forEach( element => this.manageSpan( element , event , context , state , type ) ) ;
}
else {
this.manageSpan( spanProgram , event , context , state , type ) ;
}
} ;
TextMachine.prototype.manageSpan = function( spanProgram , event , context , state , type = CONTINUE_SPAN ) {
//console.error( "Spans:" , Object.entries( state.span ).map( e => [ e[ 0 ] , e[ 1 ].content ] ) ) ;
var existingSpan = state.span[ spanProgram ] ;
//if ( existingSpan ) { console.error( " ==> Existing Span:" , existingSpan.start , existingSpan.end , this.index , '|' , type ) ; }
if ( type === START_SPAN || ! existingSpan || ( existingSpan.end < this.index - 1 && type !== EXPAND_SPAN ) ) {
//console.error( " ==> New Span?" ) ;
// Already started here? no need to create a new object (propagate case)
if ( existingSpan && existingSpan.start === this.index ) { return ; }
// Start a new span
existingSpan = state.span[ spanProgram ] = {
startingContext: context ,
endingContext: context ,
start: this.index ,
end: this.index ,
// existingSpan.content is unused ATM, except for debugging purpose, but could be useful in the future
content: event
} ;
//console.error( "Start new span:" , spanProgram , existingSpan.content ) ;
return ;
}
//console.error( " ==> Continue Span?" ) ;
// In case of propagate, the span could have been already expanded to the current index, in that case there is nothing to do
if ( existingSpan && existingSpan.end === this.index ) { return ; }
// Start continue an existing span
existingSpan.end = this.index ;
existingSpan.endingContext = context ;
// existingSpan.content is unused ATM, except for debugging purpose, but could be useful in the future
existingSpan.content += event ;
//console.error( "Continue span:" , spanProgram , existingSpan.content ) ;
} ;
TextMachine.prototype.copySpans = function( spanProgram , state ) {
if ( Array.isArray( spanProgram[ 0 ] ) ) {
spanProgram.forEach( element => state.span[ element[ 1 ] ] = state.span[ element[ 0 ] ] ) ;
}
else {
state.span[ spanProgram[ 1 ] ] = state.span[ spanProgram[ 0 ] ] ;
}
} ;
TextMachine.prototype.setMicroState = function( microStateProgram , state ) {
if ( ! microStateProgram || typeof microStateProgram !== 'object' ) { throw new TypeError( "'microState' should be an object" ) ; }
for ( let name in microStateProgram ) {
let value = microStateProgram[ name ] ;
if ( typeof value === 'string' || typeof value === 'number' ) { state.microState[ name ] = value ; }
else if ( ! value ) { delete state.microState[ name ] ; }
else if ( Array.isArray( value ) ) { state.microState[ name ] = this.getDynamicValue( value , state ) ; }
else { state.microState[ name ] = true ; }
}
} ;
TextMachine.prototype.isMicroStateEqual = function( microStateProgram , contextState , toState = contextState ) {
//console.error( ".isMicroStateEqual()" , microStateProgram , toState.microState ) ;
if ( microStateProgram && typeof microStateProgram === 'object' ) {
if ( Array.isArray( microStateProgram ) ) {
return microStateProgram.every( name => toState.microState[ name ] ) ;
}
for ( let name in microStateProgram ) {
// First, coerce or substitute
let value = microStateProgram[ name ] ;
value =
value === false || value === null || value === undefined ? undefined :
Array.isArray( value ) ? this.getDynamicValue( value , contextState ) :
value ;
//if ( microStateProgram[ name ] !== value ) { console.error( "Value coerced or substituted:" , name , microStateProgram[ name ] , value ) ; }
if ( value !== toState.microState[ name ] ) {
return false ;
}
}
return true ;
}
return !! toState.microState[ microStateProgram ] ;
} ;
TextMachine.prototype.getDynamicValue = function( path , state ) {
var rootIndex = 0 ;
while ( path[ rootIndex ] === 'parent' ) {
rootIndex ++ ;
state = state.parent ;
if ( ! state ) { return ; }
}
if ( path[ rootIndex ] === 'microState' ) { return state.microState[ path[ rootIndex + 1 ] ] ; }
if ( path[ rootIndex ] === 'span' ) { return state.span[ path[ rootIndex + 1 ] ]?.content ; }
} ;
TextMachine.prototype.execActions = function( actions , state , context ) {
if ( ! actions || ! actions.length ) { return ; }
if ( ! Array.isArray( actions[ 0 ] ) ) {
return this.execAction( actions , state , context ) ;
}
for ( let action of actions ) {
this.execAction( action , state , context ) ;
}
} ;
TextMachine.prototype.execAction = function( action , state , context ) {
var styles = this.program.styles ;
//console.error( "ACTION:" , action[ 0 ] ) ;
switch ( action[ 0 ] ) {
case 'style' :
this.api.style( context , styles[ action[ 1 ] ] ) ;
break ;
case 'starterStyle' :
//console.error( " -> " , state.startingStateContext.x , state.startingStateContext.y ) ;
this.api.style( state.startingStateContext , styles[ action[ 1 ] ] ) ;
break ;
case 'openerStyle' :
//console.error( " -> " , state.openingContext.x , state.openingContext.y ) ;
this.api.style( state.openingContext , styles[ action[ 1 ] ] ) ;
break ;
case 'streakStyle' :
this.api.blockStyle( state.startingStateContext , context , styles[ action[ 1 ] ] ) ;
break ;
case 'spanStyle' : {
let span = state.span[ action[ 1 ] ] ;
//console.error( " -> " , span ) ;
if ( ! span ) { break ; }
this.api.blockStyle( span.startingContext , span.endingContext , styles[ action[ 2 ] ] ) ;
break ;
}
case 'returnSpanStyle' : {
if ( ! state.parent ) { break ; }
let span = state.parent.span[ action[ 1 ] ] ;
//console.error( " -> " , span ) ;
if ( ! span ) { break ; }
this.api.blockStyle( span.startingContext , span.endingContext , styles[ action[ 2 ] ] ) ;
break ;
}
case 'hint' : {
// /!\ Should be refactored...
let span = state.span[ action[ 1 ] ] ;
//console.error( " -> " , span ) ;
if ( ! span ) { break ; }
this.api.hint( context , span.content , action[ 2 ] ) ;
break ;
}
}
} ;