UNPKG

@makakwastaken/ts-edifact

Version:
477 lines 21.8 kB
/** * @author Roman Vottner * @copyright 2020 Roman Vottner * @license Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { DomHandler, Parser } from 'htmlparser2'; import { HttpClient } from '../httpClient'; import { isDefined } from '../util'; import { Dictionary, } from '../validator'; export class EdifactMessageSpecificationImpl { messageType; version; release; controllingAgency; segmentTable = new Dictionary(); elementTable = new Dictionary(); componentValueTable = new Dictionary(); messageStructureDefinition = []; constructor(messageType, version, release, controllingAgency) { this.messageType = messageType; this.version = version; this.release = release; this.controllingAgency = controllingAgency; } type() { return `${this.version + this.release}_${this.messageType}`; } versionAbbr() { return this.version + this.release; } } var Part; (function (Part) { Part[Part["BeforeStructureDef"] = 0] = "BeforeStructureDef"; Part[Part["RefLink"] = 1] = "RefLink"; Part[Part["Pos"] = 2] = "Pos"; Part[Part["Tag"] = 3] = "Tag"; Part[Part["Deprecated"] = 4] = "Deprecated"; Part[Part["Name"] = 5] = "Name"; Part[Part["AfterStructureDef"] = 6] = "AfterStructureDef"; })(Part || (Part = {})); var SegmentPart; (function (SegmentPart) { SegmentPart[SegmentPart["BeforeStructureDef"] = 0] = "BeforeStructureDef"; SegmentPart[SegmentPart["Data"] = 1] = "Data"; SegmentPart[SegmentPart["AfterStructureDef"] = 2] = "AfterStructureDef"; })(SegmentPart || (SegmentPart = {})); export class UNECEMessageStructureParser { version; type; httpClient; constructor(version, type) { this.version = version.toLowerCase(); this.type = type.toLowerCase(); const baseUrl = `https://service.unece.org/trade/untdid/${this.version}/trmd/${this.type}_c.htm`; this.httpClient = new HttpClient(baseUrl); } extractTextValue(text, regex, index = 0) { const arr = regex.exec(text); if (isDefined(arr)) { return arr[index]; } return ''; } async loadPage(page) { const data = await this.httpClient.get(page); return data; } formatComponentName(name) { if (!name || name === '') { return undefined; } const formattedName = name.replace(/\/|&|,|-/g, ' '); const split = formattedName.split(' '); if (split.length > 0) { const formattedNames = split.map((part) => part.charAt(0).toUpperCase() + part.slice(1).toLowerCase()); const result = formattedNames[0].toLowerCase() + formattedNames.slice(1).join(''); return result; } return undefined; } async parseComponentDefinitionPage(component, page, definition) { // Check if the component already exists (Meaning it has been handled) if (definition.componentValueTable.contains(component)) { return definition; } if (page.includes('Use code list for data element')) { const regexp = /Use code list for data element ([0-9]*)/gm; const arr = regexp.exec(page); if (isDefined(arr)) { const referencedComponent = arr[1]; const referencedComponentPage = await this.loadPage(`../tred/tred${referencedComponent}.htm`); return this.parseComponentDefinitionPage(component, referencedComponentPage, definition); } } if (!page.includes('Code Values:') // Does not contain possible values. ) { return definition; } let state = SegmentPart.BeforeStructureDef; const values = {}; for (let line of page.split(/\n\s*\n/)) { line = line.trimEnd(); if (state === SegmentPart.BeforeStructureDef && line.toLowerCase().includes('<h3>')) { state = SegmentPart.Data; } else if (state === SegmentPart.Data && !line.toLowerCase().includes('<p>')) { const regexp = /^(\S)?\s*([A-Z0-9]{1,3}) +(.*)\n*([\w\W]*)/gm; const arr = regexp.exec(line); if (isDefined(arr)) { const deprecated = arr[1] === 'X' ? true : undefined; const componentKey = arr[2]; const componentValue = arr[3]; const componentDescription = arr[4] .replace(/ {2,}/gm, ' ') // Convert all instances of multiple spaces to a single space .replace(/[\r\n]/gm, '') // Remove all newlines .trim(); // Remove excess whitespace values[componentKey] = { id: componentKey, value: componentValue, description: componentDescription, ...(deprecated && { deprecated }), // Only add if deprecated is true }; } } else if (state === SegmentPart.Data && line.toLowerCase().includes('<p>')) { state = SegmentPart.AfterStructureDef; break; } } if (component !== '' && Object.keys(values).length) { definition.componentValueTable.add(component, values); } return Promise.resolve(definition); } async parseSegmentDefinitionPage(segment, page, definition) { if (definition.segmentTable.contains(segment)) { return Promise.resolve(definition); } const segEntry = { requires: 0, elements: [] }; let state = SegmentPart.BeforeStructureDef; // only relevant for legacy UNECE segment specification pages: let dataSection = false; let skipAddingElement = false; let overflowLine = null; let complexEleId = null; let complexEleEntry = null; for (let line of page.split('\n')) { line = line.trimEnd(); if (overflowLine !== null) { line = `${overflowLine.trimStart()} ${line.trim()}`; overflowLine = null; } if (state === SegmentPart.BeforeStructureDef && line.includes('<HR>')) { dataSection = true; } else if (state === SegmentPart.BeforeStructureDef && // checking dataSection and <B> tag only relevant for legacy // UNECE segment specification pages: (line.includes('<H3>') || (dataSection && line.includes('<B>')))) { state = SegmentPart.Data; } else if (state === SegmentPart.Data && !line.includes('<P>')) { const regexp = /^\s*?([\d]*)\s*?([X|\\*]?)\s*<A(?:.*HREF.*"([\w /.]*)")?>([\w]*)<\/A>([\w ,\-\\/&]*)\W+([M|C])(?:\W|$)\s*([\d]*)\s*([\w\\.]*).*$/g; const arr = regexp.exec(line); if (isDefined(arr)) { const segGroupId = arr[1] === '' ? undefined : arr[1]; // const deprecated: boolean = arr[2] === "X" ? true : false; const href = arr[3]; const id = arr[4]; const mandatory = arr[6] === 'M'; // const repetition: number | undefined = isDefined(arr[7]) ? parseInt(arr[7]) : undefined; const elementDef = arr[8] === '' ? undefined : arr[8]; const componentName = this.formatComponentName(arr[5]?.trim()); // Check if possibility for coded values if (href.includes('/tred/') && componentName?.includes('Code')) { // Check if already exists if (!definition.componentValueTable.contains(id)) { await this.parseComponentDefinitionPage(id, await this.loadPage(href), definition); } } const similarComponents = complexEleEntry?.components.filter((component) => component.id === id)?.length || 0; const component = id && componentName && elementDef ? { id, name: componentName + (similarComponents >= 1 ? (similarComponents + 1).toString() : ''), format: elementDef, } : undefined; const similarElements = segEntry.elements.filter((ele) => ele.id === id).length; const eleEntry = { id, name: (componentName || '') + (similarElements >= 1 ? (similarElements + 1).toString() : ''), requires: 0, components: [], }; if (segGroupId) { if (id === '') { console.warn(`Could not determine element ID based on line ${line}`); continue; } skipAddingElement = false; if (mandatory) { segEntry.requires = segEntry.requires + 1; } if (component) { if (complexEleEntry !== null && complexEleId !== null) { segEntry.elements.push(complexEleEntry); } complexEleId = null; complexEleEntry = null; if (segEntry.elements.some((element) => element?.id === id)) { continue; } if (mandatory) { eleEntry.requires = eleEntry.requires + 1; } eleEntry.components.push(component); segEntry.elements.push(eleEntry); } else { if (complexEleEntry !== null && complexEleId !== null) { segEntry.elements.push(complexEleEntry); } // If the element already exists if (segEntry.elements.some((element) => element.id === id)) { skipAddingElement = true; continue; } complexEleId = id; complexEleEntry = eleEntry; } } else { if (!skipAddingElement) { if (complexEleEntry !== null && component) { complexEleEntry.components.push(component); complexEleEntry.requires = mandatory ? complexEleEntry.requires + 1 : complexEleEntry.requires; } else { // simple element definition if (segEntry.elements.some((element) => element.id === id)) { continue; } if (mandatory) { eleEntry.requires = eleEntry.requires + 1; } if (component) { eleEntry.components.push(component); } eleEntry.id = id; segEntry.elements.push(eleEntry); } } } } else { const regexpAlt = /^\s*([\d]*)\s*([X|\\*]?)\s*<A.*>([a-zA-Z0-9]*)<\/A>\s*([a-zA-Z0-9 \\-\\/&]*)/g; const arrAlt = regexpAlt.exec(line); if (isDefined(arrAlt)) { overflowLine = line; } } } else if (state === SegmentPart.Data && line.includes('<P>')) { state = SegmentPart.AfterStructureDef; break; } } if (complexEleEntry !== null && complexEleId !== null) { segEntry.elements.push(complexEleEntry); } if (segment !== '') { definition.segmentTable.add(segment, segEntry); } return Promise.resolve(definition); } async parseMessagePage(page) { let definition; const handler = new DomHandler(); let state = Part.BeforeStructureDef; let section = 'header'; const segStack = []; const lookupSegmentPromises = []; const nextState = () => { if (state === Part.RefLink) { state = Part.Pos; } else if (state === Part.Pos) { state = Part.Deprecated; } else if (state === Part.Deprecated) { state = Part.Tag; } else if (state === Part.Tag) { state = Part.Name; } else if (state === Part.Name) { state = Part.RefLink; } }; handler.ontext = (text) => { if (text.includes('Message Type') && text.includes('Version') && text.includes('Release')) { const messageType = this.extractTextValue(text, /Message Type\s*: ([A-Z]*)\s/g, 1); const version = this.extractTextValue(text, /Version\s*: ([A-Z]*)\s/g, 1); const release = this.extractTextValue(text, /Release\s*: ([0-9A-Z]*)\s/g, 1); const controllingAgency = this.extractTextValue(text, /Contr. Agency\s*: ([0-9A-Z]*)\s/g, 1); definition = new EdifactMessageSpecificationImpl(messageType, version, release, controllingAgency); segStack.push(definition.messageStructureDefinition); } else if (text.includes('Message structure')) { state = Part.RefLink; } else if (state !== Part.BeforeStructureDef && state !== Part.AfterStructureDef) { if (state === Part.RefLink) { // ignored console.debug(`RefLink: ${text}`); } else if (state === Part.Pos) { // console.debug(`Pos: ${text}`); } else if (state === Part.Deprecated) { if (text.includes('- Segment group')) { const regex = /^[\s*+-]*-* (Segment group \d*)\s*-*\s*([M|C])\s*(\d*)([-|\\+|\\|]*).*/g; const arr = regex.exec(text); if (isDefined(arr)) { const groupArray = []; const group = { content: groupArray, mandatory: arr[2] === 'M', repetition: Number.parseInt(arr[3]), name: arr[1], section: isDefined(section) ? section : undefined, }; section = null; // add the group to the end of the current top segments segStack[segStack.length - 1].push(group); // push the array managed by this group to the end of the stack to fill it down the road segStack.push(groupArray); } // no further tags available, continue on the next line with the RefLink state = Part.RefLink; } else { // console.debug(`Deprecated: ${text}`); nextState(); } } else if (state === Part.Tag) { // console.debug(`Tag: ${text}`); const _section = section !== null ? section : undefined; let _data; if (definition) { _data = text === 'UNH' ? [definition.versionAbbr(), definition.messageType] : undefined; } const segment = { content: text, mandatory: false, repetition: 0, data: _data, section: _section, }; if (definition) { segStack[segStack.length - 1].push(segment); } section = null; } else if (state === Part.Name) { // console.debug(`Name: ${text}`); const regex = /^([a-zA-Z /\\-]*)\s*?([M|C])\s*?([0-9]*?)([^0-9]*)$/g; const arr = regex.exec(text); if (isDefined(arr)) { // const name: string = arr[1].trim(); const sMandatory = arr[2]; const sRepetition = arr[3]; const remainder = arr[4]; // console.debug(`Processing segment: ${name}`); // update the last element on the top-most stack with the respective data const segArr = segStack[segStack.length - 1]; const segData = segArr[segArr.length - 1]; segData.mandatory = sMandatory === 'M'; segData.repetition = Number.parseInt(sRepetition); // check whether the remainder contains a closing hint for a subgroup: -...-++ if (remainder.includes('-') && remainder.includes('+')) { for (let i = 0; i < remainder.split('+').length - 1; i++) { segStack.pop(); } } nextState(); } if (text.includes('DETAIL SECTION')) { section = 'detail'; } else if (text.includes('SUMMARY SECTION')) { section = 'summary'; } } else { console.warn(`Unknown part: ${text}`); } } }; handler.onopentag = (name, attribs) => { if (name === 'p' && state !== Part.BeforeStructureDef && state !== Part.AfterStructureDef) { state = Part.AfterStructureDef; } if (state === Part.Tag && attribs.href !== undefined) { if (definition) { const end = attribs.href.indexOf('.htm'); const curSeg = attribs.href .substring(end - 3, end) .toUpperCase(); // skip segments that do not point to the right segment definition page if (curSeg !== 'UNH' && curSeg !== 'UNS' && curSeg !== 'UNT') { // console.debug(`Adding promise to lookup segment definition for segment ${curSeg} for URI ${attribs.href}`); const def = definition; lookupSegmentPromises.push(this.loadPage(attribs.href).then((result) => this.parseSegmentDefinitionPage(curSeg, result, def))); } } } }; handler.onclosetag = () => { nextState(); }; const parser = new Parser(handler); parser.write(page); parser.end(); if (definition) { return Promise.resolve({ specObj: definition, promises: lookupSegmentPromises, }); } return Promise.reject(new Error('Could not extract values from read page successfully')); } loadTypeSpec() { const url = `./${this.type}_c.htm`; return this.loadPage(url) .then((page) => this.parseMessagePage(page)) .then((result) => Promise.all(result.promises) .then(() => result.specObj) .catch((error) => { console.warn(`Error while processing segment definition promises: Reason ${error.message}`); return result.specObj; })); } } //# sourceMappingURL=messageStructureParser.js.map