@makakwastaken/ts-edifact
Version:
Edifact parser library
238 lines • 10 kB
JavaScript
/**
* @author Stefan Partheymüller
* @copyright 2021 Stefan Partheymüller
* @license Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { UNECEPageParser } from './unecePageParser';
var State;
(function (State) {
State["initial"] = "initial";
State["messageStructureStart"] = "messageStructureStart";
State["messageStructureEnd"] = "messageStructureEnd";
State["headerSection"] = "headerSection";
State["detailSection"] = "detailSection";
State["summarySection"] = "summarySection";
State["beforeDetailSection"] = "beforeDetailSection";
State["segmentPosition"] = "segmentPosition";
State["segmentGroup"] = "segmentGroup";
State["segmentName"] = "segmentName";
State["segmentDescription"] = "segmentDescription";
})(State || (State = {}));
const SM_DEFINITION = {
initial: State.initial,
transitions: [
{ from: State.initial, to: State.messageStructureStart },
{ from: State.messageStructureStart, to: State.headerSection },
{ from: State.messageStructureStart, to: State.segmentPosition },
{ from: State.headerSection, to: State.segmentPosition },
{ from: State.detailSection, to: State.segmentPosition },
{ from: State.summarySection, to: State.segmentPosition },
{ from: State.segmentPosition, to: State.segmentGroup },
{ from: State.segmentPosition, to: State.messageStructureEnd },
{ from: State.segmentGroup, to: State.segmentName },
{ from: State.segmentGroup, to: State.segmentPosition },
{ from: State.segmentName, to: State.segmentDescription },
{ from: State.segmentDescription, to: State.segmentPosition },
{ from: State.segmentDescription, to: State.detailSection },
{ from: State.segmentDescription, to: State.summarySection },
],
};
/**
* This class is capable to parse legacy UN/EDIFACT message type specification
* pages from UNECE up to version D99A.
*/
export class UNECEStructurePageParser extends UNECEPageParser {
segmentNames;
constructor(spec) {
super(SM_DEFINITION);
this._spec = spec;
this.segmentNames = [];
}
setupHandler() {
const helper = super.setupHandler();
let index = 0;
const stack = [];
const resetStack = () => {
for (; index > 0; index--) {
stack.pop();
}
};
let isSegmentGroupEnd = false;
let section;
let name;
stack.push(this.spec.messageStructureDefinition);
helper.onText = (text) => {
switch (this.sm.state) {
case State.initial:
if (text.includes('Message structure')) {
this.sm.transition(State.messageStructureStart);
}
break;
case State.messageStructureStart:
if (text.includes('HEADER SECTION')) {
this.sm.transition(State.headerSection);
section = 'header';
this.sm.transition(State.segmentPosition);
}
break;
case State.segmentPosition:
if (text.match(/[0-9]{4}/g)) {
this.sm.transition(State.segmentGroup);
}
else {
this.sm.transition(State.messageStructureEnd);
}
break;
case State.segmentGroup:
if (text.includes('Segment group')) {
isSegmentGroupEnd = false;
const group = this.parseSegmentGroup(section, text);
const level = this.parseSegmentGroupLevel(text);
const delta = level - index;
if (delta <= 0) {
for (let i = 0; i < delta * -1 + 1; i++) {
stack.pop();
index--;
}
}
stack[index].push(group);
stack.push(group?.content);
index++;
// reset section assignment after first segment group
section = undefined;
this.sm.transition(State.segmentPosition);
}
else {
// if the previous segment group was has ended and there
// is not a new segment group => reset the stack.
if (isSegmentGroupEnd) {
resetStack();
}
this.sm.transition(State.segmentName);
}
break;
case State.segmentName:
name = text;
this.addSegmentName(name);
this.sm.transition(State.segmentDescription);
break;
case State.segmentDescription: {
const item = this.parseSegment(name, section, text);
stack[index].push(item);
isSegmentGroupEnd = this.isSegmentGroupEnd(text);
const detailSection = text.includes('DETAIL SECTION');
const summarySection = text.includes('SUMMARY SECTION');
if (detailSection || summarySection) {
// reset the stack if a new section begins
resetStack();
if (detailSection) {
section = 'detail';
this.sm.transition(State.detailSection);
}
else {
section = 'summary';
this.sm.transition(State.summarySection);
}
}
this.sm.transition(State.segmentPosition);
break;
}
case State.messageStructureEnd:
break;
default:
this.throwInvalidParserState(this.sm.state);
}
};
helper.onOpenTag = (name) => {
if (this.sm.state === State.messageStructureStart && name === 'a') {
this.sm.transition(State.segmentPosition);
}
};
return helper;
}
addSegmentName(name) {
const excludeSegmentNames = ['UNH', 'UNS', 'UNT'];
if (!excludeSegmentNames.includes(name) &&
!this.segmentNames.includes(name)) {
this.segmentNames.push(name);
}
}
parseSegmentGroup(section, descriptionString) {
const regex = /^.* (Segment group \d*).*\s*([M|C])\s*(\d*).*/g;
const matches = regex.exec(descriptionString);
if (!matches) {
throw new Error('Invalid segment description string');
}
const name = matches[1];
const mandatoryString = matches[2];
const repetitionString = matches[3];
return {
name,
content: [],
mandatory: mandatoryString === 'M',
repetition: Number.parseInt(repetitionString),
data: undefined,
section: section || undefined,
};
}
/**
* Parses a segment (e.g. UNH, UNT, UNS)
* @param name The name of the segment (e.g. UNH)
* @param section The section as a whole (e.g. UNH+123+123+[...])
* @param descriptionString The description of the segment
* @returns The parsed segment
*/
parseSegment(name, section, descriptionString) {
const regex = /^([a-zA-Z /\\-]*)\s*?([M|C])\s*?([0-9]*?)([^0-9]*)$/g;
const matches = regex.exec(descriptionString);
if (!matches) {
throw new Error(`${name}: Invalid segment description string: ${descriptionString}`);
}
const mandatoryString = matches[2];
const repetitionString = matches[3];
return {
content: name,
mandatory: mandatoryString === 'M',
repetition: Number.parseInt(repetitionString),
data: undefined,
section: section || undefined,
};
}
parseSegmentGroupLevel(descriptionString) {
const regex = /^.*[0-9]+([^0-9]*)$/g;
const matches = regex.exec(descriptionString);
if (!matches) {
throw new Error('Invalid segment description string');
}
// Create the "level string" by reversing the segment group description.
const levelString = Array.from(matches[1]).reverse().join('');
let normalization = 0;
// In some message type specifications the segment group description
// ends with LF and sometimes with CRLF.
// Make sure both cases are covered:
if (levelString.charCodeAt(0) !== 10) {
console.warn(`Unrecognized character in level string: ${levelString[0]} (${levelString.charCodeAt(0)})`);
}
else if (levelString.charCodeAt(1) === 13) {
normalization = 1;
}
return levelString.indexOf('Ŀ') - normalization;
}
isSegmentGroupEnd(descriptionString) {
const regex = /\d+�+/g;
return !!regex.exec(descriptionString);
}
}
//# sourceMappingURL=uneceStructurePageParser.js.map