@accordproject/concerto-util
Version:
Utilities for Concerto Modeling Language
63 lines (62 loc) • 2.74 kB
JavaScript
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ID_REGEX = void 0;
exports.normalizeIdentifier = normalizeIdentifier;
// Conforms to Concerto Spec for identifiers
exports.ID_REGEX = /^(\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\\u[0-9A-Fa-f]{4})(?:\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\\u[0-9A-Fa-f]{4}|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\u200C|\u200D)*$/u;
/**
* Function that attempts to normalize arbitrary strings
* into valid Concerto identifiers
*
* @param identifier - the input value
* @param truncateLength - Length at which to truncate the identifier
* @returns - An identifier that meets the Concerto specification
*/
function normalizeIdentifier(identifier, truncateLength = -1) {
const replacer = (_match, group1) => {
let escapedChar = '';
// Loop through characters with multiple code points
for (const codePoint of group1) {
// @ts-ignore
escapedChar += `_${codePoint.codePointAt(0).toString(16)}`;
}
return escapedChar;
};
// Stringify null & undefined values
let result = identifier !== null && identifier !== void 0 ? identifier : String(identifier);
if (typeof result !== 'string') {
throw new Error(`Unsupported identifier type, '${typeof result}'.`);
}
// 1. If the identifier begins with a number, add a leading underscore
result = result
.replace(/^\p{Nd}/u, '_$&')
// 2. Substitute Whitespace, and joiners
.replace(/[-‐−@#:;><|/\\\u200c\u200d]/g, '_')
.replace(/\s/g, '_')
// 3a. Replace Invalid Characters
.replace(/(?!\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\u200C|\u200D|\\u[0-9A-Fa-f]{4})(.)/gu, replacer)
// 3b. Escape Surrogate Pairs
.replace(/([\uD800-\uDFFF])/g, replacer);
// 4. Optionally truncate the identifier
if (truncateLength > 0) {
result = result.substring(0, truncateLength);
}
// Check validity
if (!exports.ID_REGEX.test(result)) {
throw new Error(`Unexpected error. Not able to escape identifier '${result}'.`);
}
return result;
}