UNPKG

@sugarcube/plugin-http

Version:
132 lines (102 loc) 3.75 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.default = exports.hypercubeImport = exports.basicImport = exports.urlContentType = exports.wget = exports.download = exports.assertDir = void 0; var _fs = _interopRequireDefault(require("fs")); var _fp = require("lodash/fp"); var _request = _interopRequireDefault(require("request")); var _pluginFs = require("@sugarcube/plugin-fs"); var _nodeFetch = _interopRequireDefault(require("node-fetch")); var _contentType = _interopRequireDefault(require("content-type")); var _utils = require("@sugarcube/utils"); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } const assertDir = (envelope, { cfg }) => { const dir = cfg.http.data_dir; return (0, _pluginFs.mkdirP)(dir).then(() => envelope); }; exports.assertDir = assertDir; const download = (0, _fp.curry)((from, to) => // eslint-disable-next-line promise/avoid-new new Promise((resolve, reject) => (0, _request.default)(from).on("end", resolve).on("error", reject).on("response", res => res.pipe(_fs.default.createWriteStream(to))))); exports.download = download; const wget = (0, _fp.curry)((cmd, target, term) => { const args = ["-q", "--no-check-certificate", "-e", "robots=off", "--page-requisites", "--adjust-extension", "--convert-links", "--no-clobber", "--directory-prefix", target, term]; return (0, _utils.runCmd)(cmd, args); }); exports.wget = wget; const urlContentType = async url => { const resp = await (0, _nodeFetch.default)(url, { method: "HEAD" }); if (!resp.ok) { // throw new Error(`${resp.status}: ${resp.statusText}`); return null; } const header = resp.headers.get("Content-Type"); if (header == null) return null; // The content type parser throws on content types of the form 'image/gif;' if // nothing follows the last semi colon. let type; try { const parseContent = _contentType.default.parse(header.replace(/;$/, "")); // eslint-disable-next-line prefer-destructuring type = parseContent.type; } catch (e) { return null; } if (type.startsWith("text")) return "url"; if (type.startsWith("image")) return "image"; if (type.startsWith("video")) return "video"; return "document"; }; exports.urlContentType = urlContentType; const basicImport = async location => { const contents = await (0, _utils.extract)(location); const { text, meta } = contents; return { body: text == null || text === "" ? null : text.trim(), ...(0, _utils.tikaMetaFields)(meta) }; }; exports.basicImport = basicImport; const hypercubeImport = async (browse, target, location) => { const images = []; let content; await browse(async ({ goto, page }) => { // We capture requests for images and add them to _sc_media. page.on("response", response => { const headers = response.headers(); if (headers["content-type"] == null) return; let type; try { const parseContent = _contentType.default.parse(headers["content-type"].replace(/;$/, "")); // eslint-disable-next-line prefer-destructuring type = parseContent.type; } catch (e) { return; } if (["image/png", "image/jpeg", "image/jpg"].includes(type) && response.url().startsWith("http")) images.push({ type: "image", term: response.url() }); }); await goto(location); await page.waitFor(1 * 1000); content = await page.content(); }); _fs.default.writeFileSync(target, content); const unit = await basicImport(target); return [unit, images]; }; exports.hypercubeImport = hypercubeImport; var _default = { assertDir, download, wget }; exports.default = _default;