UNPKG

sec-edgar-api

Version:

Fetch and parse SEC earnings reports and other filings. Useful for financial analysis.

132 lines (131 loc) 5.73 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var XMLParser = /** @class */ (function () { function XMLParser(params) { this.selfEnclosingTags = new Set([ '!doctype', '?xml', 'xml', 'hr', 'br', 'img', 'input', 'meta', 'filename', 'description', ]); this.tagsToIgnore = new Set(['script', '?xml']); var _a = params !== null && params !== void 0 ? params : {}, _b = _a.textSelectStrategy, textSelectStrategy = _b === void 0 ? 'useFirst' : _b, _c = _a.textConcatDivider, textConcatDivider = _c === void 0 ? '<>' : _c; this.textSelectStrategy = textSelectStrategy; this.textConcatDivider = textConcatDivider; } XMLParser.prototype.mapAttributes = function (attributes) { var attributesMap = new Map(); attributes.forEach(function (attr) { var _a = attr.split('='), key = _a[0], value = _a[1]; if (!value) return; attributesMap.set(key, value.replace(/"/g, '').trim()); }); return attributesMap; }; // eslint-disable-next-line @typescript-eslint/no-explicit-any XMLParser.prototype.parse = function (xml) { var _this = this; var currentObj = {}; var objPath = [currentObj]; this.iterateXML({ xml: xml, onOpenTag: function (tagName, attributes, isSelfEnclosing) { var newObj = {}; var obj = currentObj; var isComment = tagName.startsWith('!--'); if (isComment || _this.tagsToIgnore.has(tagName.toLowerCase())) return; if (obj[tagName] === undefined) { obj[tagName] = newObj; } else if (Array.isArray(obj[tagName])) { ; obj[tagName].push(newObj); } else if (typeof obj[tagName] === 'object') { obj[tagName] = [obj[tagName], newObj]; } _this.mapAttributes(attributes).forEach(function (value, att) { newObj["@_".concat(att)] = value; }); if (!isSelfEnclosing) { objPath.push(newObj); currentObj = newObj; } }, onInnerText: function (text) { var textTrimmed = text.trim(); if (!textTrimmed) return; var obj = currentObj; switch (_this.textSelectStrategy) { case 'useFirst': if (obj['#text']) return; obj['#text'] = textTrimmed; break; case 'useLast': obj['#text'] = textTrimmed; break; case 'concatenate': obj['#text'] = obj['#text'] ? "".concat(obj['#text'], " ").concat(_this.textConcatDivider, " ").concat(textTrimmed) : textTrimmed; break; } }, onCloseTag: function (tagName) { if (objPath.length === 1 || _this.tagsToIgnore.has(tagName.toLowerCase())) return; objPath.pop(); currentObj = objPath[objPath.length - 1]; }, }); return objPath[0]; }; XMLParser.prototype.iterateXML = function (params) { var onCloseTag = params.onCloseTag, onInnerText = params.onInnerText, onOpenTag = params.onOpenTag, xml = params.xml; for (var i = 0; i < xml.length; i++) { if (xml[i] === '<' && xml[i + 1] !== '/') { i++; var tagEndIndex = xml.indexOf('>', i); var currentTagStr = xml.substring(i, tagEndIndex); var tagName = currentTagStr.split(' ', 1)[0].trim(); var attributes = currentTagStr.split(' ').slice(1); var lastAttribute = attributes[attributes.length - 1]; if (lastAttribute === null || lastAttribute === void 0 ? void 0 : lastAttribute.endsWith('/')) { attributes[attributes.length - 1] = lastAttribute.substring(0, lastAttribute.length - 1); } if (!lastAttribute || !lastAttribute.includes('=')) { attributes.pop(); } i = tagEndIndex; var isSelfEnclosing = xml[tagEndIndex - 1] === '/' || this.selfEnclosingTags.has(tagName.toLowerCase()); onOpenTag === null || onOpenTag === void 0 ? void 0 : onOpenTag(tagName, attributes, isSelfEnclosing); } else if (xml[i] === '<' && xml[i + 1] === '/') { i += 2; var tagEndIndex = xml.indexOf('>', i); var currentTagStr = xml.substring(i, tagEndIndex); i = tagEndIndex; onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(currentTagStr); } else { var nextOpenTagIndex = xml.indexOf('<', i); var nextIndex = nextOpenTagIndex === -1 ? xml.length : nextOpenTagIndex; var text = xml.substring(i, nextIndex); onInnerText === null || onInnerText === void 0 ? void 0 : onInnerText(text); i = nextIndex - 1; } } }; return XMLParser; }()); exports.default = XMLParser;