@ibgib/helper-gib
Version:
common helper/utils/etc used in ibgib libs. Node v19+ needed for heavily-used isomorphic webcrypto hashing consumed in both node and browsers.
1,357 lines (1,314 loc) • 63.5 kB
text/typescript
import { HELPER_LOG_A_LOT } from '../constants.mjs';
import { Ssml } from './ssml-helper.mjs';
const logalot = HELPER_LOG_A_LOT;
export type LanguageCode = "en" | "de" | "en-US" | "en-GB" | "en-CA" | "en-IN" | "de-DE";
export const LanguageCode = {
en: "en" as LanguageCode,
de: "de" as LanguageCode,
enUS: "en-US" as LanguageCode,
enGB: "en-GB" as LanguageCode,
enCA: "en-CA" as LanguageCode,
enIN: "en-IN" as LanguageCode,
deDE: "de-DE" as LanguageCode,
};
/**
* Interface for each record in the lex data.
*
* When specifying `TProps`, be sure it is a flat key: value object. (I think...) Definitely not a circular reference complex object, so maybe any non-circular POCO will work.
*/
export interface LexDatum<TProps extends PropsData = PropsData> {
/**
* Language of the lexical datum.
*/
language?: LanguageCode;
/**
* Lexical items with the same id are considered alternatives for
* the equivalent message, e.g. "Hello" and "Howdy".
*
* If you have multiple LexDatums with the same id, then this
* can differentiate among them to get the _exact_ record.
*
* Basically this is an optional unique id, but the data overall
* is indexed by a non-unique "id" string. :nose:? maybe...
*/
specifier?: string;
/**
* Lexical items with the same id are considered alternatives for
* the equivalent message, e.g. "Hello" and "Howdy".
*
* If you have multiple LexDatums with the same id, then this
* can define the weighting of chance when selecting one of those
* alternatives.
*
* Must be non-zero. Default weighting is 1.
*
* @see Lex.pickDatum
*/
weighting?: number;
/**
* Keywords relating to the datum. You can specify keywords.
*/
keywords?: string[];
/**
* This is the lines of plain text.
*
* There should be no ssml tags in here.
*/
texts?: string[];
/**
* This is the ssml equivalent to `text`.
*
* Do NOT include any <speak> tags in here.
*/
ssmls?: string[];
/**
* Additional dynamic properties to filter/match against.
*
* Use Case:
*
* Currently, I'm doing x=123 in keywords, and this is meant to
* improve upon that.
*/
props?: TProps;
}
export interface LexGetOptions<TProps extends PropsData = PropsData> {
/**
* Language of datum to get.
*/
language?: LanguageCode;
/**
* Specifier of datum to get.
*/
specifier?: string;
/**
* Keywords of datum to get.
*/
keywords?: string[];
/**
* Determines how the `LexGetOptions.keywords` match with filtering
* data.
* @see {KeywordMode}
*/
keywordMode?: KeywordMode;
/**
* Index (0-based) into text/ssml of LexDatum to get.
* If not provided, will return all lines.
* Basically, use this if you just want a single string from the
* texts/ssmls array but don't want to duplicate that in a separate
* data entry.
*
* @example If you have texts of [ "a", "b", "c" ], but you just
* want the "b" line, you would pass in a lineIndex of 1.
*/
lineIndex?: number;
/**
* If provided, and if there are multiple lines in datum's
* text/ssml, then this determines how they are concatenated.
*
* @see LexLineConcat
*/
lineConcat?: LexLineConcat;
/**
* If lineConcat is "delim", then it will concat using this
* string as a delimiter. I'll have a default in the `get`
* function.
*
* @see Lex.get
*/
lineConcatDelim?: string;
/**
* How to capitalize the lex item text output when concatenating
* multiple lines in texts/ssmls.
*
* NOTE:
* I'm not sure if I should make it capitalize the ssml, but
* my hunch is to NOT do this, since that is supposed to be spoken
* text.
*
* @see LexCapitalize for individual options.
*/
capitalize?: LexCapitalize;
/**
* Template placeholder arguments. Each template in Lex can either
* be a reference to another Lex datum, e.g. $(hi), or it can be
* a template placeholder, e.g. $name, $age, $0, $1, etc. It simply
* has to start with a $ and be followed by one or more word
* characters ([a-zA-Z_0-9]+, i.e. \w+).
*
* So this javascript object is in the format of
* `"placeholder": "replacement"`, e.g.
*
* NOTE: If you want different variable replacements for texts and
* ssmls, then use this config option for the texts and use
* ssmlVars for the ssmls.
*
* @example
* In Lex data:
* "Welcome back, $username!"
* Calling code:
* `lex._('greeting', { vars: { username } })`
* If the text is and this is called
* with , then the output
* would be e.g. "Welcome back, Cotter!"
* (equivalent to `Welcome back, ${username}!`)
*/
vars?: { [key: string]: string; };
/**
* Same as vars option, but will only replace template variables
* in the `ssmls`.
*/
ssmlVars?: {
[key: string]: string;
};
/**
* Additional dynamic properties to filter/match against.
*
* Use Case:
*
* Currently, I'm doing x=123 in keywords, and this is meant to
* improve upon that.
*
* ## notes
*
* These property filters require lambda functions, and as such,
* these cannot be used from within template references in
* lex data. see {@link Lex.replaceTemplateRefs}
*/
props?: PropsFilter<TProps>;
/**
* Determines how the props predicate functions.
* @see PropertyPredicateLevel
*/
propsMode?: PropsFilterMode;
/**
* "Catchall" predicate filter that acts on the entire lex datum.
*
* So if you don't have a specific filter option in the params list, use this as
* a backup to just filter against the entire datum in a custom way.
*
* ## driving use case
*
* I want to reverse get where the `LexDatum.texts` property is exactly a
* certain value and get the reverse mapping to the "semantic id". So a user
* says some natural language, and I map that back to the possibility(s) for the
* semantic id of that user word/phrase.
*/
fnDatumPredicate?: LexDatumPredicate<TProps>,
}
/**
* Contains properties per datum that allow for more complex filtering with
* two strategies:
* 1. per property name via a lambda
* 2. per entire props object via a lambda
*
* This is the type in the LexDatum.props property.
*
* NOTE: atow this must be just a flat dictionary, i.e. cannot contain nested
* objects. However, if you want to use FilterPerProps filtering, I believe you
* can just `any` cast this and do your filtering predicate against the entire
* props object as you like.
* */
export type PropsData = { [propName: string]: any; };
/** This is the type used in LexGetOptions.prFilter either per property or per the entire prop data object. */
export type PropsFilter<TProps> = FilterPerProp | FilterPerProps<TProps>;
/** Individual property predicate */
export type PropertyPredicate = (propName: string) => boolean;
/** Filter on individual property level. */
export type FilterPerProp = { [propName: string]: PropertyPredicate; };
/** Filter at the entire props object level (`LexDatum.props`). */
export type FilterPerProps<TProps> = (props: TProps) => boolean;
export interface LexFindOptions<TProps extends PropsData = PropsData> {
fnDatumPredicate?: LexDatumPredicate<TProps>,
}
export type LexFindResults<TProps extends PropsData = PropsData> = { [id: string]: LexDatum<TProps>[] }
/**
* filter against the entier lex datum entry. see the LexGet options.
*/
export type LexDatumPredicate<TProps extends PropsData> = (value: LexDatum<TProps>) => boolean;
/**
* keywords are used to filter lex items. this sets how those keywords are
* interpreted.
*/
export type KeywordMode = "any" | "all" | "none";
/**
* keywords are used to filter lex items. this sets how those keywords are
* interpreted.
*/
export const KeywordMode = {
/** Any of the keywords must match to return a lex result. */
any: "any" as KeywordMode,
/** All of the keywords must match to return a lex result. */
all: "all" as KeywordMode,
/** Only return results that do NOT include any of the keywords. */
none: "none" as KeywordMode,
};
export type PropsFilterMode = "prop" | "props";
export const PropertyPredicateLevel = {
/**
* Predicate acts upon individual properties.
* So you'll pass an object in with prop key and a predicate(s).
*
* Check out advanced unit tests for more concrete examples.
*
* @example props: { id: x => x === "id2", color: x => x === "orange" }
*/
prop: "prop" as PropsFilterMode,
/**
* Predicate acts upon the datum's entire props object.
* So you'll pass a single predicate that is the entire props
* object, i.e. datum[props].
*
* Check out advanced unit tests for more concrete examples.
*
* @example { props: (p: PropsData) => { return p && p.id && p.id === "id1"; }, propsMode: "props"
*/
props: "props" as PropsFilterMode,
};
// export type LexResultType = "text" | "ssml" | "obj";
/**
* Result object when using `Lex._`
*
* @see Lex
* @see Lex.get
*/
export interface LexResultObj<TProps extends PropsData = PropsData> {
/**
* The text output of the single datum that was picked.
*/
text: string;
/**
* The ssml output of the single datum that was picked.
*/
ssml: string;
/**
* The single raw datum that was picked.
*/
datum: LexDatum<TProps>;
/**
* All of the data that matched the given params (specifier,
* keywords, language, etc.)
*/
rawData: LexDatum<TProps>[];
}
export type LexCapitalize = "upperfirst" | "uppereach" | "lowerfirst" | "lowereach" | "none";
export const LexCapitalize = {
/**
* Uppercase the first letter of only the first line in texts/ssmls.
*/
upperfirst: "upperfirst" as LexCapitalize,
/**
* Uppercase the first letter of each line in texts/ssmls.
*/
uppereach: "uppereach" as LexCapitalize,
/**
* Lowercase the first letter of only the first line in texts/ssmls.
*/
lowerfirst: "lowerfirst" as LexCapitalize,
/**
* Lowercase the first letter of each line in texts/ssmls.
*/
lowereach: "lowereach" as LexCapitalize,
/**
* Leave the casing as-is for texts/ssmls.
*/
none: "none" as LexCapitalize,
};
export type LexLineConcat = "paragraph" | "sentence" | "newline" | "delim";
export const LexLineConcat = {
/**
* Each line will be combined into a single string of paragraphs.
* @example
* ["Line 1.", "Line 2."] will become
* if text: "Line 1.\n\nLine 2."
* if ssml: "<p>Line 1.</p><p>Line 2.</p>"
*/
p: "paragraph" as LexLineConcat,
paragraph: "paragraph" as LexLineConcat,
/**
* Each line will be combined into a single string of sentences.
* @example
* ["Line 1.", "Line 2."] will become
* if text: "Line 1. Line 2."
* if ssml: "<s>Line 1</s><s>Line 2</s>"
*/
s: "sentence" as LexLineConcat,
sentence: "sentence" as LexLineConcat,
/**
* Each line will be combined into a single string with new
* line feeds between each line.
*
* Note: For Ssml, this is the same as "paragraph".
*
* @example
* ["Line 1.", "Line 2."] will become
* if text: "Line 1.\nLine 2."
* if ssml: "<p>Line 1.</p><p>Line 2.</p>"
*/
n: "newline" as LexLineConcat,
newline: "newline" as LexLineConcat,
/**
* Each line will be combined into a single string with each
* line delimited by the delimiter specified in the function.
*
* @example
* ["Line 1.", "Line 2."] with delim | will become
* if text: "Line 1.|Line 2."
* if ssml: "Line 1.|Line 2."
*/
delim: "delim" as LexLineConcat,
};
export type LexData<TProps extends PropsData = PropsData> = {
[key: string]: LexDatum<TProps>[];
};
/**
* These options control mostly the default behavior for filtering lex results
* when consuming via the `Lex._(someIdentifier, opts)` call. When you don't
* specify in the `opts` how to filter, these values will be used.
*
* These options are in the constructor of the {@link Lex} class.
*/
export interface LexCtorOpts {
/**
* This is the language that your data will default to.
*
* This means that entries defined in the Lex data that do not
* have an explicit 'language' set will be interpreted as this
* language.
*
* So basically, if you're an American with American data, leave
* this as en-US. If you're a German speaker writing a skill
* that is primarily targeted at a German-speaking audience,
* then set this to de-DE and you don't need to explicitly
* set each entry to this.
*
* Then, when you go to translate into other languages, you can
* add on the explicit language markers in data. The overall
* mechanism allows you to skip this for the first language
* you write the skill in.
*
* @see requestLanguage
*/
defaultLanguage?: LanguageCode;
/**
* This is the language that is coming in from the request.
*
* @see defaultLanguage
*/
requestLanguage?: LanguageCode;
/**
* Default setting when concatenating lines. This will depend on how most of
* your data is structured. For example, it's designed so that you input
* your data separated by paragraphs, so the concat would be "paragraph".
* But if you already have data with <p> tags in your ssml, then you may
* want to set this to "delim" and do your own interpretation of using the
* multiple strings for the data.
*
* Defaults to delim & "" because most of the time, I find I
* just have a single line and want the single thing returned.
* This helps with templating, chunking, etc.
*/
defaultLineConcat?: LexLineConcat;
/**
* Default delimiter used when using `lineConcatDelim`.
*
* Defaults to delim & "" because most of the time, I find I just have a
* single line and want the single thing returned. This helps with
* templating, chunking, etc.
*/
defaultDelim?: string;
/**
* Default capitalization action when getting texts/ssmls.
*/
defaultCapitalize?: LexCapitalize;
/**
* When using keyword filtering, this is the default mode to be used
* when not explicitly set in the lex consumer.
*/
defaultKeywordMode?: KeywordMode;
/**
* When using props filtering, this is the default mode to be used
* when not explicitly set in the lex consumer.
*/
defaultPropsMode?: PropsFilterMode;
}
// export * from "./types";
// import { LanguageCode, KeywordMode, LexData, LexDatum, LexGetOptions, LexResultObj, LexCapitalize, LexLineConcat, PropsFilterMode, PropsFilter, PropsData } from './types';
/**
* Imports helper that has logging, among other things.
*/
// import * as help from 'helper-gib';
/**
* Lex is a helper for your lexical data, i.e. the things that you get
* Alexa to say. This can be used for i18n, but really it's a broader
* helper to create more dynamic speech/text for Alexa to say and
* present via cards.
*
* I am making this after learning my lessons with creating dynamic,
* alternative-laden text and/or ssml generation for use with both
* Alexa's speech, as well as outputting plain text to
* cards. I'm designing it to be (actually) simple to use, but with
* robustness allowed the more you become comfortable with it.
*
* Simple Usage
*
* To use it, you simply init what you want her to be able to say.
* Then, when you want to create her speech, you call `text` or `ssml`
* and pass in your options, the primary one being the `id`.
*
* For example, you could define the following data:
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ]}
]
}
```
* To access this, you would call `Lex._('hi').text` to simply get the
* plain text entry for "hi".
*
* Alternatives
*
* But there are a LOT of ways to say "hi", and this is the primary
* reason for using Lex: Alternatives. With Lex, multiple items with
* the same id are considered alternatives.
*
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ] },
{ texts: [ "Hello" ] },
{ texts: [ "Howdy" ] }
]
}
```
* Again, to access this, you would call the same line:
* `Lex._('hi').text
*
* So by using the _same_ calling code, you could get any one of these
* texts as _alternatives_ for the "hi" lex datum. This is a huge
* difference between natural voice interaction and computer UI as we
* have known it up to now.
*
* If you want to get really fancy (looking forward to AI/ML), you can
* weight the various alternatives, for example if you want to only
* say "Howdy" a small percentage of the time. You could define this as follows:
*
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ] },
{ texts: [ "Hello" ] },
{ texts: [ "Howdy" ], weighting: 0.2 }
]
}
```
* Again, there is _no_ change to the calling code. This really allows
* for a wonderful layer of dynamicism, and is easy to do.
*
* Internationalization (i18n)
*
* You can have your text be localized, but not worry about it to start
* off with. It's _implicit_ i18n. So the above examples are actually
* not really attached to any language, even though I'm writing in
* English (en-US). This is because the i18n aspect relies on both the
* data and the retrieval of the data via the `language` param option.
*
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ] },
{ texts: [ "Hello" ] },
{ texts: [ "Howdy" ], weighting: 0.2 },
{ texts: [ "Cheers" ], language: "en-GB" },
{ texts: [ "Guten Tag" ], language: "de-DE" }
]
}
```
/**
* Lex is a helper for your lexical data, i.e. the things that you get
* Alexa to say. This can be used for i18n, but really it's a broader
* helper to create more dynamic speech/text for Alexa to say and
* present via cards.
*
* I am making this after learning my lessons with creating dynamic,
* alternative-laden text and/or ssml generation for use with both
* Alexa's speech, as well as outputting plain text to
* cards. I'm designing it to be (actually) simple to use, but with
* robustness allowed the more you become comfortable with it.
*
* Simple Usage
*
* To use it, you simply init what you want her to be able to say.
* Then, when you want to create her speech, you call `text` or `ssml`
* and pass in your options, the primary one being the `id`.
*
* For example, you could define the following data:
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ]}
]
}
```
* To access this, you would call `Lex._('hi').text` to simply get the
* plain text entry for "hi".
*
* Alternatives
*
* But there are a LOT of ways to say "hi", and this is the primary
* reason for using Lex: Alternatives. With Lex, multiple items with
* the same id are considered alternatives.
*
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ] },
{ texts: [ "Hello" ] },
{ texts: [ "Howdy" ] }
]
}
```
* Again, to access this, you would call the same line:
* `Lex._('hi').text
*
* So by using the _same_ calling code, you could get any one of these
* texts as _alternatives_ for the "hi" lex datum. This is a huge
* difference between natural voice interaction and computer UI as we
* have known it up to now.
*
* If you want to get really fancy (looking forward to AI/ML), you can
* weight the various alternatives, for example if you want to only
* say "Howdy" a small percentage of the time. You could define this as follows:
*
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ] },
{ texts: [ "Hello" ] },
{ texts: [ "Howdy" ], weighting: 0.2 }
]
}
```
* Again, there is _no_ change to the calling code. This really allows
* for a wonderful layer of dynamicism, and is easy to do.
*
* Internationalization (i18n)
*
* You can have your text be localized, but not worry about it to start
* off with. It's _implicit_ i18n. So the above examples are actually
* not really attached to any language, even though I'm writing in
* English (en-US). This is because the i18n aspect relies on both the
* data and the retrieval of the data via the `language` param option.
*
```
const data: LexData = {
'hi': [
{ texts: [ "Hi" ] },
{ texts: [ "Hello" ] },
{ texts: [ "Howdy" ], weighting: 0.2 },
{ texts: [ "Cheers" ], language: "en-GB" },
{ texts: [ "Guten Tag" ], language: "de-DE" }
]
}
```
*
* You can get at these languages multiple ways:
*
* 1) Choose the language when instantiating Lex.
* `let lex = new Lex(data, "de-DE");`
* Now, when you call `lex._(...)`, you will only return German
* data.
* 2) Override the default language upon calling for data:
* `lex._('hi', { language: "en-US" }).text;`
*/
export class Lex<TProps extends PropsData = PropsData> {
protected lc: string = `[${Lex.name}]`;
data: LexData<TProps>;
defaultLanguage: LanguageCode;
defaultLineConcat: LexLineConcat;
/**
* Defaults to delim & "" because most of the time, I find I
* just have a single line and want the single thing returned.
* This helps with templating, chunking, etc.
*
* BREAKING CHANGE: This formerly defaulted to paragraphs, as
* I thought lines would mean paragraphs. No longer the case.
*/
defaultDelim: string;
defaultCapitalize: LexCapitalize;
/**
* This is the language that is coming in from the request.
*
* @see defaultLanguage
*/
requestLanguage: LanguageCode;
defaultKeywordMode: KeywordMode;
defaultPropsMode: PropsFilterMode;
constructor(
/**
* This is the initial lexical data that you want Alexa to be able to
* say. You can always change this dynamically at runtime as well.
*/
data: LexData<TProps>,
/** optional opts */
{
/**
* This is the language that your data will default to.
* If a language isn't specified in `get`, `text`, or `ssml`, then this is used.
*
* This means that entries defined in the Lex data that do not
* have an explicit 'language' set will be interpreted as this
* language.
*
* So basically, if you're an American with American data, leave
* this as en-US. If you're a German speaker writing a skill
* that is primarily targeted at a German-speaking audience,
* then set this to de-DE and you don't need to explicitly
* set each entry to this.
*
* Then, when you go to translate into other languages, you can
* add on the explicit language markers in data. The overall
* mechanism allows you to skip this for the first language
* you write the skill in.
*
* @see requestLanguage
*/
defaultLanguage = "en-US",
requestLanguage = "en-US",
/**
* Defaults to delim & "" because most of the time, I find I
* just have a single line and want the single thing returned.
* This helps with templating, chunking, etc.
*
* BREAKING CHANGE: This formerly defaulted to paragraphs, as
* I thought lines would mean paragraphs. No longer the case.
*/
defaultLineConcat = LexLineConcat.delim,
/**
* Defaults to delim & "" because most of the time, I find I
* just have a single line and want the single thing returned.
* This helps with templating, chunking, etc.
*
* BREAKING CHANGE: This formerly defaulted to paragraphs, as
* I thought lines would mean paragraphs. No longer the case.
*/
defaultDelim = "",
defaultCapitalize = "none",
defaultKeywordMode = "any",
defaultPropsMode = "prop",
}: LexCtorOpts
) {
if (!data) { throw new Error(`data required (E: 2f8db30fa9d71d76db616ab110392c22)`); }
this.data = data;
this.defaultLanguage = defaultLanguage ?? "en-US";
this.defaultLineConcat = defaultLineConcat ?? LexLineConcat.delim;
this.defaultDelim = defaultDelim ?? "";
this.defaultCapitalize = defaultCapitalize ?? "none";
this.requestLanguage = requestLanguage ?? "en-US";
this.defaultKeywordMode = defaultKeywordMode ?? "any";
this.defaultPropsMode = defaultPropsMode ?? "prop";
}
/**
* Gets a string or array of strings of text or ssml.
* Builds the string or obj depending on the passed in options.
*
* NOTE: You'll probably want to actually use the `text` or `ssml`
* functions instead of this one.
*
* @param id Lexical items with the same id are considered alternatives for the equivalent message, e.g. "Hello" and "Howdy".
* @see LexGetOptions
*/
get(
id: string,
{
language = this.requestLanguage,
specifier,
keywords, keywordMode = this.defaultKeywordMode,
lineIndex,
lineConcat = this.defaultLineConcat, lineConcatDelim = this.defaultDelim,
capitalize = this.defaultCapitalize,
vars, ssmlVars,
props, propsMode = this.defaultPropsMode,
fnDatumPredicate,
}: LexGetOptions<TProps> = {
// simplest case default options
language: this.requestLanguage,
keywordMode: this.defaultKeywordMode,
lineConcat: this.defaultLineConcat,
lineConcatDelim: this.defaultDelim,
capitalize: this.defaultCapitalize,
propsMode: this.defaultPropsMode,
}): LexResultObj<TProps> | null {
const lc = `${this.lc}[${this.get.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: e4e76a6bb301010fc13cf4456efcdc22)`); }
let lexData = this.data[id];
if (!lexData) { throw new Error(`Data id not found: ${id} (E: 4a79897167714dd5a34df77953072aaf)`); }
lexData = this.filterLexData({
lexData,
language, specifier,
keywords, keywordMode,
props: props as PropsFilter<TProps>, propsMode,
fnDatumPredicate,
});
if (lexData.length === 0) {
// no data found matching filtering.
// just return null and do not error.
return null;
}
const lexDatum = this.pickDatum(lexData)!;
let textLines = this.extractLines({ lexDatum, resultAs: "text", lineIndex });
textLines = this.replaceTemplateRefs({ lines: textLines, resultAs: "text" });
// Replace vars after references, so all text is fully
// expanded first.
textLines = this.replaceTemplateVars({ lines: textLines, vars: vars });
textLines = this.capitalizeLines({ lines: textLines, resultAs: "text", capitalize });
const text = this.concatLines({
lines: textLines,
lineType: "text",
lineConcat,
lineConcatDelim
});
let ssmlLines = this.extractLines({ lexDatum, resultAs: "ssml", lineIndex });
ssmlLines = this.replaceTemplateRefs({ lines: ssmlLines, resultAs: "ssml" });
// Replace vars after references, so all text is fully
// expanded first.
ssmlLines = this.replaceTemplateVars({ lines: ssmlLines, vars: ssmlVars || vars });
ssmlLines = this.capitalizeLines({ lines: ssmlLines, resultAs: "ssml", capitalize });
const ssml = this.concatLines({ lines: ssmlLines, lineType: "ssml", lineConcat, lineConcatDelim });
return { text, ssml, datum: lexDatum, rawData: lexData };
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* Does a reverse lookup for lex data ids that correspond to the find
* criteria.
*
* @returns array of data ids that have at least one LexDatum entry that matches criteria.
*/
find({ fnDatumPredicate }: LexFindOptions<TProps>): LexFindResults<TProps> | null {
const lc = `${this.lc}[${this.find.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: 35e26d234ca2d0e987b39ee939f29c22)`); }
if (!fnDatumPredicate) { throw new Error(`only fnDatumPredicate implemented atow (E: 8c8babd2fa6fc4cb223b7b8c10ad5c22)`); }
const results: LexFindResults<TProps> = {};
const ids = Object.keys(this.data);
for (let i = 0; i < ids.length; i++) {
const id = ids[i];
const matchingDatums = this.data[id].filter(d => fnDatumPredicate(d));
if (matchingDatums.length > 0) { results[id] = matchingDatums; }
}
return Object.keys(results).length > 0 ? results : null;
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* This is the original single function. it is just for backwards compatibility at this point.
* Probably not needed...
*
* @deprecated
*/
_(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
// #region syntactic sugar calls for `get`
/**
* just syntactic sugar for {@link Lex.get} .
*/
getVariant(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
variant(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
getTranslation(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
translate(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
getSynonym(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
synonym(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
getI18n(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
/**
* just syntactic sugar for {@link Lex.get} .
*/
i18n(id: string, opts: LexGetOptions<TProps>): LexResultObj<TProps> | null {
return this.get(id, opts);
}
// #endregion syntactic sugar calls for `get`
/**
* Pulls out lines from the datum, based on the what is wanted
* and what exists in the data.
*
* For example, you may be trying to extract text but only ssml
* is defined in the data. So you'll have to strip the ssml and
* return that. Or if you want ssml and only text exists in the
* data, then you'll simply return the texts.
*
* If you only want a single line out of multiple strings in the
* texts/ssmls array, then use lineIndex.
*
* @param param0 info
*/
private extractLines({
lexDatum,
resultAs,
lineIndex
}: {
lexDatum: LexDatum<TProps>,
resultAs: "text" | "ssml",
lineIndex: number | undefined,
}): string[] {
const lc = `${this.lc}[${this.extractLines.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: 0c21a7ed7c1b56251f39342380c47922)`); }
// ensure that either texts or ssmls is defined in data
if ((!lexDatum.texts || lexDatum.texts.length === 0) &&
(!lexDatum.ssmls || lexDatum.ssmls.length === 0)) {
throw new Error(`Invalid lexDatum. Datum texts and ssmls are both undefined. lexDatum: ${JSON.stringify(lexDatum)}.`);
}
// let lines;
let useLineIndex = lineIndex || lineIndex === 0;
if (resultAs === "text" &&
lexDatum.texts && lexDatum.texts.length > 0) {
// text wanted, text defined in data
return useLineIndex ?
[lexDatum.texts[lineIndex!]] :
lexDatum.texts;
} else if (resultAs === "text") {
// text wanted, but no text defined in data
console.warn(`building text lines from ssml (W: cfb59a05efda475ab7511acc659a5ee3)`);
return useLineIndex ?
[Ssml.stripSsml(lexDatum.ssmls![lineIndex!])] :
lexDatum.ssmls!.map(ssml => Ssml.stripSsml(ssml));
} else if (lexDatum.ssmls && lexDatum.ssmls.length > 0) {
// ssml wanted, ssml defined in data
return useLineIndex ?
[lexDatum.ssmls[lineIndex!]] :
lexDatum.ssmls;
} else {
// ssml wanted, but no ssml defined in data
return useLineIndex ?
[lexDatum.texts![lineIndex!]] :
lexDatum.texts!;
}
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* Replaces any embedded template variables, e.g. $name, $0, etc.
* Note the format is "$" proceeded by any word characters
* ([a-zA-Z0-9_]).
*
* This is different than template references.
*
* @see {replaceTemplateRefs}
*/
private replaceTemplateVars({
lines,
vars,
}: {
lines: string[],
vars?: { [key: string]: string; },
}): string[] {
const lc = `${this.lc}[${this.replaceTemplateVars.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: a756e7603d03d427c60c202cb4f6dd22)`); }
let replaceVarsSingleLine = (line: string) => {
let varNames = Object.keys(vars || {});
if (logalot) { console.log(`${lc} varNames: ${JSON.stringify(varNames)} (I: d9351cf582420b35eb3eceb923e64f22)`); }
return varNames.reduce((l, varName) => {
if (logalot) { console.log(`${lc} varName: ${varName} (I: 8c0e4261972945d2bb4624efad14870b)`); }
return l.replace(new RegExp('\\$' + varName, "g"), vars![varName]);
}, line);
};
if (vars) {
return lines.map(line => replaceVarsSingleLine(line));
} else {
return lines;
}
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* Replaces any embedded template references, e.g. $(hi). Note the
* parenthesis around "hi". This means it is a reference to another lex
* datum.
*
* This is different than template variables, e.g. $name, $0, etc.
*
* The template refs can be recursive, i.e. datum A can include a ref to
* datum B which includes a template to datum C. But these cannot be
* self-referencing, i.e. C cannot then include a reference back to A.
*
* Template refs CANNOT work with props for filtering, as these require
* lambda functions.
*
* @see {replaceTemplateVars}
*/
private replaceTemplateRefs({
lines,
resultAs,
}: {
/**
* source lines from data
*/
lines: string[],
/**
* how do you want the lines back?
*/
resultAs: "ssml" | "text",
}): string[] {
const lc = `${this.lc}[${this.replaceTemplateRefs.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: 548949b651bbec84de5b25f3c01cf422)`); }
const regex = /\$\([\w-]+\|?[\w-|\{\}:'"\s,\[\].,<>]+\)/;
let replaceRefsSingleLine: (l: string) => any = (line: string) => {
let match = regex.exec(line);
if (match) {
let template = match[0];
// strip the $()
template = template.substring(2, template.length - 1);
// id|options
const idAndOptions = template.split('|');
const id = idAndOptions[0];
const options: LexGetOptions<TProps> = idAndOptions.length === 2 ?
JSON.parse(idAndOptions[1]) as LexGetOptions<TProps> :
{};
if (!options.lineConcat) {
options.lineConcat = LexLineConcat.delim;
options.lineConcatDelim = "";
}
const replacementResult = this.get(id, options);
const replacement = resultAs === "text" ?
replacementResult!.text :
replacementResult!.ssml;
line = line.replace(regex, replacement);
// recursively call if more templates in line
return regex.test(line) ?
replaceRefsSingleLine(line) :
line;
} else {
return line;
}
};
return lines.map(line => replaceRefsSingleLine(line));
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* Capitalizes the given lines depending on the given
* capitalize options.
*
* @param param0
*/
private capitalizeLines({
lines,
resultAs,
capitalize,
}: {
lines: string[],
resultAs: "ssml" | "text",
capitalize: LexCapitalize,
}) {
const lc = `${this.lc}[${this.capitalizeLines.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: b0ec8263fd94c68ee53fdf61534c8322)`); }
const replaceAt = (s: string, i: number, replacement: string) => {
// todo: change substr to use substring
return s.substr(0, i) +
replacement +
s.substr(i + replacement.length);
};
const upperText = (line: string) => {
if (line === "") {
return "";
}
// Thanks https://paulund.co.uk/capitalize-first-letter-string-javascript
return line.charAt(0).toUpperCase() + line.slice(1);
};
const upperSsml = (line: string) => {
if (line === "") {
return "";
}
if (line.charAt(0) === "<") {
let iFirstLetter = line.indexOf(">") + 1;
return replaceAt(line, iFirstLetter, line[iFirstLetter].toUpperCase());
} else {
return upperText(line);
}
};
const lowerText = (line: string) => {
if (line === "") {
return "";
}
// Thanks https://paulund.co.uk/capitalize-first-letter-string-javascript
return line.charAt(0).toLowerCase() + line.slice(1);
};
const lowerSsml = (line: string) => {
if (line === "") {
return "";
}
if (line.charAt(0) === "<") {
let iFirstLetter = line.indexOf(">") + 1;
return replaceAt(line, iFirstLetter, line[iFirstLetter].toLowerCase());
} else {
return lowerText(line);
}
};
const firstLine = lines[0];
switch (capitalize) {
case LexCapitalize.upperfirst:
lines[0] =
resultAs === "text" ?
upperText(firstLine) :
upperSsml(firstLine);
return lines;
case LexCapitalize.uppereach:
return lines.map(l => {
return resultAs === "text" ?
upperText(l) :
upperSsml(l);
});
case LexCapitalize.lowerfirst:
lines[0] =
resultAs === "text" ?
lowerText(firstLine) :
lowerSsml(firstLine);
return lines;
case LexCapitalize.lowereach:
return lines.map(l => {
return resultAs === "text" ?
lowerText(l) :
lowerSsml(l);
});
case LexCapitalize.none:
return lines;
default:
throw new Error(`Unknown LexCapitalize: ${capitalize}`);
}
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* Concatenates lines depending on given params.
*
* @param param0
*/
private concatLines({
lines,
lineType,
lineConcat,
lineConcatDelim
}: {
lines: string[],
lineType: "ssml" | "text",
lineConcat: LexLineConcat,
lineConcatDelim: string,
}): string {
const lc = `${this.lc}[${this.concatLines.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: 59e0a7b89457283751b376295b61bb22)`); }
const firstLine = lines[0];
// This used in both LexLineConcat.p and .n
const concatSsmlP = () => {
const pTag = "<p>";
if (firstLine.length < pTag.length ||
firstLine.substring(0, pTag.length).toLowerCase() !== pTag) {
return lines.map(l => `<p>${l}</p>`).join('');
} else {
// First line starts with <p> so
// we will simply concat all lines,
// assuming the user has wrapped all lines.
return lines.join('');
}
};
switch (lineConcat) {
case LexLineConcat.p:
if (lineType === "text") {
return lines.join("\n\n");
} else {
return concatSsmlP();
}
case LexLineConcat.s:
if (lineType === "text") {
// Append period if not in data.
// e.g. Data may just be "hello" and we want to
// make it a sentence by appending "."
return lines.map(l => {
let lastChar = l.substring(l.length - 1);
return [".", "!", "?"].includes(lastChar) ?
l :
l + ".";
}).join(' ');
} else {
const sTag = "<s>";
if (firstLine.length < sTag.length ||
firstLine.substring(0, sTag.length).toLowerCase() !== sTag) {
return lines.map(l => `<s>${l}</s>`).join('');
} else {
// First line starts with <s> so
// we will simply concat all lines,
// assuming the user has wrapped all lines.
return lines.join('');
}
}
case LexLineConcat.n:
if (lineType === "text") {
return lines.join('\n');
} else {
return concatSsmlP();
}
case LexLineConcat.delim:
return lines.join(lineConcatDelim);
default:
throw new Error(`Unknown LexLineConcat: ${lineConcat} (E: e19f9c44217f4eb5999e2085d3f77b5c)`);
}
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* Filters the given lexData per the language, specifier,
* and keywords.
*
* @param param0 Filter params
* @returns filtered datum array
*/
private filterLexData({
lexData,
language,
specifier,
keywords, keywordMode,
props, propsMode,
fnDatumPredicate,
}: {
lexData: LexDatum<TProps>[],
language: LanguageCode,
specifier: string | undefined,
keywords: string[] | undefined,
keywordMode: KeywordMode,
props: PropsFilter<TProps> | undefined,
propsMode: PropsFilterMode,
fnDatumPredicate: LexDatumPredicate<TProps> | undefined,
}): LexDatum<TProps>[] {
const lc = `${this.lc}[${this.filterLexData.name}]`;
try {
if (logalot) { console.log(`${lc} starting... (I: 1091755e98f390954d296575d1096722)`); }
let result = lexData.concat(); // makes a copy
if (language) {
result = this.filterLanguage(result, language);
}
if (specifier) {
result = result.filter(d => d.specifier && d.specifier === specifier);
}
if (keywords && keywords.length > 0) {
// Datum must contain keywords that overlap with given
// keywords args.
keywords = keywords.map(kw => kw.toLocaleLowerCase())!;
switch (keywordMode) {
case "any":
result =
result.filter(d => d.keywords &&
d.keywords.some(kwDatum => keywords!
.map(kwArg => kwArg.toLocaleLowerCase())
.some(kwArg => kwDatum === kwArg)));
break;
case "all":
result = result.filter(d => {
let dKeywords = (d.keywords || [])
.map(x => x.toLocaleLowerCase());
return keywords!.every(kwArg => dKeywords.includes(kwArg));
});
break;
case "none":
result = result.filter(d => {
let dKeywords = (d.keywords || [])
.map(x => x.toLocaleLowerCase());
return keywords!.every(kwArg => !dKeywords.includes(kwArg));
});
break;
default:
console.error(`${lc} Unknown keywordMode: ${keywordMode}`);
break;
}
}
if (props) {
result = this.filterProps({ result, props, propsMode });
}
if (fnDatumPredicate) {
result = result.filter(x => fnDatumPredicate(x));
}
return result;
} catch (error) {
console.error(`${lc} ${error.message}`);
throw error;
} finally {
if (logalot) { console.log(`${lc} complete.`); }
}
}
/**
* executes the property filter on the given (intermediate) result.
*/
private filterProps({
result,
props,
propsMode
}: {
result: LexDatum<TProps>[],
/**
* Filters either per prop or per the entire props object.
*
* Here are ho