UNPKG

sec-edgar-api

Version:

Fetch and parse SEC earnings reports and other filings. Useful for financial analysis.

309 lines (308 loc) 14.9 kB
"use strict"; var __assign = (this && this.__assign) || function () { __assign = Object.assign || function(t) { for (var s, i = 1, n = arguments.length; i < n; i++) { s = arguments[i]; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p]; } return t; }; return __assign.apply(this, arguments); }; Object.defineProperty(exports, "__esModule", { value: true }); var ColNode_1 = require("./XMLNode/ColNode"); var DocumentNode_1 = require("./XMLNode/DocumentNode"); var HRNode_1 = require("./XMLNode/HRNode"); var NonTableNode_1 = require("./XMLNode/NonTableNode"); var RowNode_1 = require("./XMLNode/RowNode"); var TableNode_1 = require("./XMLNode/TableNode"); /** * @deprecated use XMLParser */ var XMLParserLegacy = /** @class */ (function () { function XMLParserLegacy() { } XMLParserLegacy.prototype.iterateXML = function (params) { var _a, _b, _c; var xml = params.xml, onCharacter = params.onCharacter, onCloseTag = params.onCloseTag, onOpenTag = params.onOpenTag; var selfEnclosingTags = new Set([ 'filename', 'description', 'br', 'meta', 'link', 'img', 'input', 'hr', 'area', 'base', 'col', 'command', 'embed', 'keygen', 'param', 'source', 'track', 'wbr', ]); var spaceChars = new Set(['\n', '\r', '\t', ' ']); var pathOccurrenceCountMap = new Map(); var curPath = ''; var curTag = ''; var curAttributes = ''; var didStart = false; var pathsArr = []; for (var i = 0; i < xml.length; i++) { var char = xml[i]; var isOpenTag = char === '<' && xml[i + 1] !== '/' && xml[i + 1] !== '?' && xml[i + 1] !== '!'; var isCloseTag = char === '<' && xml[i + 1] === '/'; var onCharacterData = { char: char, index: i, path: curPath, pathOccurrenceCount: (_a = pathOccurrenceCountMap.get(curPath)) !== null && _a !== void 0 ? _a : 0, attributesStr: curAttributes, }; if (isOpenTag) { var didEndTagName = false; var j = 0; didStart = true; i++; while (xml[i] !== '>') { didEndTagName = didEndTagName || spaceChars.has(xml[i]) || xml[i] === '/'; if (!didEndTagName) { curTag += xml[i].toLowerCase(); } else if (xml[i] !== '/') { curAttributes += xml[i]; } i++; j++; if (j > 1000000) { throw new Error('too many iterations'); } } var pathNew = "".concat(curPath).concat(curPath.length > 0 ? '.' : '').concat(curTag).toLowerCase(); var countBefore = (_b = pathOccurrenceCountMap.get(pathNew)) !== null && _b !== void 0 ? _b : 0; var pathOccurrenceCount = (_c = pathOccurrenceCountMap.set(pathNew, countBefore + 1).get(pathNew)) !== null && _c !== void 0 ? _c : 0; onCharacterData.path = pathNew; onCharacterData.pathOccurrenceCount = pathOccurrenceCount; onCharacterData.attributesStr = curAttributes; pathsArr.push(pathNew); onOpenTag === null || onOpenTag === void 0 ? void 0 : onOpenTag(onCharacterData); if (selfEnclosingTags.has(curTag)) { onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(onCharacterData); } else { curPath = pathNew; } curTag = ''; } else if (isCloseTag) { while (xml[i] !== '>') { i++; } onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(onCharacterData); curPath = curPath.slice(0, curPath.lastIndexOf('.')); curAttributes = ''; } else if (didStart) { onCharacter === null || onCharacter === void 0 ? void 0 : onCharacter(onCharacterData); } } return pathsArr; }; /** * Returns text in each table cell mapped by `${table}.${row}.${col}` */ XMLParserLegacy.prototype.getTableTextMap = function (params) { var xml = params.xml, parentPath = params.parentPath, onCharacter = params.onCharacter, onCloseTag = params.onCloseTag, onOpenTag = params.onOpenTag, _a = params.trimSpaces, trimSpaces = _a === void 0 ? true : _a; var rowPaths = new Set([ "".concat(parentPath, ".table.tbody.tr"), "".concat(parentPath, ".table.thead.tr"), "".concat(parentPath, ".table.tfoot.tr"), "".concat(parentPath, ".table.tr"), ]); var colPaths = new Set([ "".concat(parentPath, ".table.tbody.tr.td"), "".concat(parentPath, ".table.thead.tr.td"), "".concat(parentPath, ".table.tfoot.tr.td"), "".concat(parentPath, ".table.tr.td"), "".concat(parentPath, ".table.tbody.tr.th"), "".concat(parentPath, ".table.thead.tr.th"), "".concat(parentPath, ".table.tfoot.tr.th"), "".concat(parentPath, ".table.tr.th"), ]); var table = 0; var row = 0; var col = 0; var textByColKey = new Map(); var spaceChars = new Set(['\n', '\r', '\t']); this.iterateXML({ xml: xml, onOpenTag: function (data) { var _a; var path = data.path; var colKey = "".concat(table, ".").concat(row, ".").concat(col); var textCur = (_a = textByColKey.get(colKey)) !== null && _a !== void 0 ? _a : ''; var pathLower = path.toLowerCase(); if (textCur.trim().length === 0 && col === 0) { textByColKey.delete(colKey); } var isTable = parentPath ? pathLower === "".concat(parentPath, ".table") : pathLower.endsWith('table'); var isRow = parentPath ? rowPaths.has(pathLower) : pathLower.endsWith('tr'); var isCol = parentPath ? colPaths.has(pathLower) : pathLower.endsWith('td') || pathLower.endsWith('th'); if (isTable) { table++; col = 0; row = 0; } else if (isRow) { row++; col = 0; } else if (isCol) { col++; } onOpenTag === null || onOpenTag === void 0 ? void 0 : onOpenTag(__assign(__assign({}, data), { textMap: textByColKey })); }, onCharacter: function (data) { var _a; var char = spaceChars.has(data.char) ? ' ' : data.char; var colKey = "".concat(table, ".").concat(row, ".").concat(col); var textCur = (_a = textByColKey.get(colKey)) !== null && _a !== void 0 ? _a : ''; if (!(trimSpaces && char === ' ' && textCur.endsWith(' '))) { textByColKey.set(colKey, "".concat(textCur).concat(char)); } onCharacter === null || onCharacter === void 0 ? void 0 : onCharacter(__assign(__assign({}, data), { textMap: textByColKey })); }, onCloseTag: function (data) { var _a; var colKey = "".concat(table, ".").concat(row, ".").concat(col); var textCur = (_a = textByColKey.get(colKey)) !== null && _a !== void 0 ? _a : ''; if (textCur.trim().length === 0 && col === 0) { textByColKey.delete(colKey); } else if (!textCur.endsWith(' ')) { textByColKey.set(colKey, "".concat(textCur, " ")); } onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(__assign(__assign({}, data), { textMap: textByColKey })); }, }); return textByColKey; }; XMLParserLegacy.prototype.getDocumentNode = function (params) { var xml = params.xml; var rowsArr = []; var colsArr = []; var documentNode = new DocumentNode_1.DocumentNode(); var curNode = null; var prevRowCols = []; var curRowCols = []; var isBold = false; var boldPath = null; var pushColToRow = function (col) { var _a; var colIndex = curRowCols.length; col.setIndex(colIndex); var colSpan = col.getColSpan(); Array.from({ length: colSpan }).forEach(function () { return curRowCols.push(col); }); var topSibling = (_a = prevRowCols[colIndex]) !== null && _a !== void 0 ? _a : null; topSibling === null || topSibling === void 0 ? void 0 : topSibling.addBottomSibling(col); }; this.iterateXML({ xml: xml, onCloseTag: function () { var _a; if ((curNode === null || curNode === void 0 ? void 0 : curNode.getPath()) === boldPath) { curNode === null || curNode === void 0 ? void 0 : curNode.setText("".concat((_a = curNode === null || curNode === void 0 ? void 0 : curNode.getText()) !== null && _a !== void 0 ? _a : '', "}}")); boldPath = null; } }, onCharacter: function (_a) { var _b; var char = _a.char; curNode === null || curNode === void 0 ? void 0 : curNode.setText(((_b = curNode === null || curNode === void 0 ? void 0 : curNode.getText()) !== null && _b !== void 0 ? _b : '') + char); }, onOpenTag: function (_a) { var _b; var path = _a.path, attributesStr = _a.attributesStr; // skip nested tables if (path.split('.').reduce(function (acc, cur) { return (cur === 'table' ? acc + 1 : acc); }, 0) > 1) return; var tag = path.split('.').pop(); var isInTable = path.includes('table'); var topLevelNodes = documentNode.getChildren(); var prevTopLevelNode = topLevelNodes[topLevelNodes.length - 1]; var wasHorizontalLine = prevTopLevelNode instanceof HRNode_1.HRNode; var wasNonTableNode = prevTopLevelNode instanceof NonTableNode_1.NonTableNode; var wasBold = isBold; var attributesLower = attributesStr.toLowerCase().replace(/\s/g, ''); isBold = tag === 'b' || tag === 'strong' || attributesLower.includes('font-weight:bold') || attributesLower.includes('font-weight:700') || attributesLower.includes('font-weight:800') || attributesLower.includes('font-weight:900'); if (!isInTable) { prevRowCols = []; curRowCols = []; } if (tag === 'hr' && !isInTable) { var hr = new HRNode_1.HRNode({ attributesStr: attributesStr, path: path }); hr.setPreviousSibling(prevTopLevelNode !== null && prevTopLevelNode !== void 0 ? prevTopLevelNode : null); topLevelNodes.push(hr); curNode = hr; } else if (tag === 'table') { var table = new TableNode_1.TableNode({ attributesStr: attributesStr, path: path }); table.setPreviousSibling(prevTopLevelNode !== null && prevTopLevelNode !== void 0 ? prevTopLevelNode : null); topLevelNodes.push(table); curNode = table; } else if (tag === 'tr') { var row = new RowNode_1.RowNode({ attributesStr: attributesStr, path: path }); var prevRow = rowsArr[rowsArr.length - 1]; row.setParent(prevTopLevelNode); row.setPreviousSibling((prevRow === null || prevRow === void 0 ? void 0 : prevRow.getParent()) === row.getParent() ? prevRow : null); rowsArr.push(row); prevRowCols = curRowCols; curRowCols = []; curNode = row; } else if (tag === 'td' || tag === 'th') { var col = new ColNode_1.ColNode({ attributesStr: attributesStr, path: path }); var prevCol = colsArr[colsArr.length - 1]; col.setParent(rowsArr[rowsArr.length - 1]); col.setPreviousSibling((prevCol === null || prevCol === void 0 ? void 0 : prevCol.getParent()) === col.getParent() ? prevCol : null); colsArr.push(col); pushColToRow(col); curNode = col; } else if ((!isInTable && !wasNonTableNode) || (wasHorizontalLine && tag !== 'hr')) { var node = new NonTableNode_1.NonTableNode({ attributesStr: attributesStr, path: path }); node.setPreviousSibling(prevTopLevelNode !== null && prevTopLevelNode !== void 0 ? prevTopLevelNode : null); topLevelNodes.push(node); curNode = node; } else if (curNode && !curNode.getText().endsWith('\n')) { curNode.setText("".concat(curNode.getText().trim(), "\n")); } if (isBold && !wasBold && !(curNode === null || curNode === void 0 ? void 0 : curNode.getText().endsWith('{{'))) { curNode === null || curNode === void 0 ? void 0 : curNode.setText("".concat(curNode === null || curNode === void 0 ? void 0 : curNode.getText().trim(), "{{")); } if (isBold) { boldPath = (_b = curNode === null || curNode === void 0 ? void 0 : curNode.getPath()) !== null && _b !== void 0 ? _b : null; } }, }); documentNode.setText(xml); return documentNode; }; return XMLParserLegacy; }()); exports.default = XMLParserLegacy;