sec-edgar-api
Version:
Fetch and parse SEC earnings reports and other filings. Useful for financial analysis.
132 lines (131 loc) • 5.73 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
var XMLParser = /** @class */ (function () {
function XMLParser(params) {
this.selfEnclosingTags = new Set([
'!doctype',
'?xml',
'xml',
'hr',
'br',
'img',
'input',
'meta',
'filename',
'description',
]);
this.tagsToIgnore = new Set(['script', '?xml']);
var _a = params !== null && params !== void 0 ? params : {}, _b = _a.textSelectStrategy, textSelectStrategy = _b === void 0 ? 'useFirst' : _b, _c = _a.textConcatDivider, textConcatDivider = _c === void 0 ? '<>' : _c;
this.textSelectStrategy = textSelectStrategy;
this.textConcatDivider = textConcatDivider;
}
XMLParser.prototype.mapAttributes = function (attributes) {
var attributesMap = new Map();
attributes.forEach(function (attr) {
var _a = attr.split('='), key = _a[0], value = _a[1];
if (!value)
return;
attributesMap.set(key, value.replace(/"/g, '').trim());
});
return attributesMap;
};
// eslint-disable-next-line @typescript-eslint/no-explicit-any
XMLParser.prototype.parse = function (xml) {
var _this = this;
var currentObj = {};
var objPath = [currentObj];
this.iterateXML({
xml: xml,
onOpenTag: function (tagName, attributes, isSelfEnclosing) {
var newObj = {};
var obj = currentObj;
var isComment = tagName.startsWith('!--');
if (isComment || _this.tagsToIgnore.has(tagName.toLowerCase()))
return;
if (obj[tagName] === undefined) {
obj[tagName] = newObj;
}
else if (Array.isArray(obj[tagName])) {
;
obj[tagName].push(newObj);
}
else if (typeof obj[tagName] === 'object') {
obj[tagName] = [obj[tagName], newObj];
}
_this.mapAttributes(attributes).forEach(function (value, att) {
newObj["@_".concat(att)] = value;
});
if (!isSelfEnclosing) {
objPath.push(newObj);
currentObj = newObj;
}
},
onInnerText: function (text) {
var textTrimmed = text.trim();
if (!textTrimmed)
return;
var obj = currentObj;
switch (_this.textSelectStrategy) {
case 'useFirst':
if (obj['#text'])
return;
obj['#text'] = textTrimmed;
break;
case 'useLast':
obj['#text'] = textTrimmed;
break;
case 'concatenate':
obj['#text'] = obj['#text']
? "".concat(obj['#text'], " ").concat(_this.textConcatDivider, " ").concat(textTrimmed)
: textTrimmed;
break;
}
},
onCloseTag: function (tagName) {
if (objPath.length === 1 || _this.tagsToIgnore.has(tagName.toLowerCase()))
return;
objPath.pop();
currentObj = objPath[objPath.length - 1];
},
});
return objPath[0];
};
XMLParser.prototype.iterateXML = function (params) {
var onCloseTag = params.onCloseTag, onInnerText = params.onInnerText, onOpenTag = params.onOpenTag, xml = params.xml;
for (var i = 0; i < xml.length; i++) {
if (xml[i] === '<' && xml[i + 1] !== '/') {
i++;
var tagEndIndex = xml.indexOf('>', i);
var currentTagStr = xml.substring(i, tagEndIndex);
var tagName = currentTagStr.split(' ', 1)[0].trim();
var attributes = currentTagStr.split(' ').slice(1);
var lastAttribute = attributes[attributes.length - 1];
if (lastAttribute === null || lastAttribute === void 0 ? void 0 : lastAttribute.endsWith('/')) {
attributes[attributes.length - 1] = lastAttribute.substring(0, lastAttribute.length - 1);
}
if (!lastAttribute || !lastAttribute.includes('=')) {
attributes.pop();
}
i = tagEndIndex;
var isSelfEnclosing = xml[tagEndIndex - 1] === '/' || this.selfEnclosingTags.has(tagName.toLowerCase());
onOpenTag === null || onOpenTag === void 0 ? void 0 : onOpenTag(tagName, attributes, isSelfEnclosing);
}
else if (xml[i] === '<' && xml[i + 1] === '/') {
i += 2;
var tagEndIndex = xml.indexOf('>', i);
var currentTagStr = xml.substring(i, tagEndIndex);
i = tagEndIndex;
onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(currentTagStr);
}
else {
var nextOpenTagIndex = xml.indexOf('<', i);
var nextIndex = nextOpenTagIndex === -1 ? xml.length : nextOpenTagIndex;
var text = xml.substring(i, nextIndex);
onInnerText === null || onInnerText === void 0 ? void 0 : onInnerText(text);
i = nextIndex - 1;
}
}
};
return XMLParser;
}());
exports.default = XMLParser;