UNPKG

@sugarcube/plugin-sec

Version:
111 lines (89 loc) 3.48 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.default = void 0; var _fp = require("lodash/fp"); var _dashp = require("dashp"); var _core = require("@sugarcube/core"); var _requestPromise = _interopRequireDefault(require("request-promise")); var _url = require("url"); var _cheerio = _interopRequireDefault(require("cheerio")); var _moment = _interopRequireDefault(require("moment")); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } const forEachObj = _fp.forEach.convert({ cap: false }); const url = "http://searchwww.sec.gov/EDGARFSClient/jsp/EDGAR_MainAccess.jsp"; const querySource = "sec_search"; const search = (0, _fp.curry)((numResults, count, term) => (0, _dashp.collectP2)(i => { const params = { sort: "Date", formType: "FormSD", isAdv: "true", stemming: "true", startDoc: i === 0 ? 0 : i * numResults + 1, numResults }; const requestUrl = new _url.URL(url); forEachObj((v, k) => requestUrl.searchParams.append(k, v), params); requestUrl.searchParams.append("search_text", term.replace(" ", "+")); return (0, _requestPromise.default)(requestUrl.toString()); }, [...Array(Math.ceil(count / numResults)).keys()])); const scrape = html => { const $ = _cheerio.default.load(html); const results = $("#ifrm2 table:nth-child(2) tr:not(:first-child)").toArray(); return (0, _dashp.collectP2)(async ([first, second]) => { const dateFiled = _moment.default.utc($("td:first-child i", first).text(), "DD/MM/YYYY").toDate(); const filing = $("td:nth-child(2) a", first).attr("href"); const [filingLink, name] = (0, _fp.flow)([_fp.compact, (0, _fp.nth)(1), (0, _fp.replace)(/["']/g, ""), (0, _fp.split)(",")])(filing.match(/opennew\((.*)\);$/i)); // const [filingLink, name] = compact(filing.match(/opennew\((.*)\);$/i)) // [1].replace(/["']/g, "") // .split(","); const cik = $("#cikSearch", second).text(); const sic = $("#sicSearch", second).text(); const media = [{ type: "url", term: filingLink }]; const $filing = _cheerio.default.load(await (0, _dashp.retryP)((0, _requestPromise.default)(filingLink))); return { date_filed: dateFiled, name, filing_link: filingLink, filing: $filing("body").text(), cik, sic: !sic || sic === "0000" ? null : sic, _sc_id_fields: ["cik"], _sc_relations: media, _sc_media: media }; }, (0, _fp.chunk)(4, results)); }; const plugin = (envelope, { log, cfg }) => { const pageCount = 100; const total = (0, _fp.get)("sec.results", cfg); const queries = _core.envelope.queriesByType(querySource, envelope); const doSearch = term => { log.info(`Searching the SEC for '${term}'`); return (0, _dashp.flowP)([(0, _dashp.flowP)([search(pageCount, total), (0, _dashp.flatmapP2)(scrape)]), (0, _dashp.tapP)(rs => log.info(`Fetched ${(0, _fp.size)(rs)} results.`)), (0, _dashp.collectP2)(unit => (0, _fp.merge)(unit, { _sc_queries: [{ type: "sec_search", term }] }))], term); }; return (0, _dashp.flatmapP2)(q => (0, _dashp.retryP)(doSearch(q)), queries).then(rs => _core.envelope.concatData(rs, envelope)); }; plugin.desc = "Search the SEC for EDGAR filings."; plugin.argv = { "sec.results": { nargs: 1, desc: "The number of results to fetch.", default: 500 } }; var _default = plugin; exports.default = _default;