@sugarcube/plugin-http
Version:
HTTP related plugins for sugarcube.
196 lines (165 loc) • 6.23 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = void 0;
var _fs = _interopRequireDefault(require("fs"));
var _os = _interopRequireDefault(require("os"));
var _path = _interopRequireDefault(require("path"));
var _util = require("util");
var _fp = require("lodash/fp");
var _dashp = _interopRequireWildcard(require("dashp"));
var _core = require("@sugarcube/core");
var _utils = require("@sugarcube/utils");
var _pluginFs = require("@sugarcube/plugin-fs");
var _utils2 = require("../utils");
var _browser = _interopRequireDefault(require("../browser"));
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
const mkdtemp = (0, _util.promisify)(_fs.default.mkdtemp);
const querySource = "http_url";
const plugin = async (envelope, {
log,
cfg,
stats
}) => {
const parallel = (0, _fp.get)("http.import_parallel", cfg);
const queries = _core.envelope.queriesByType(querySource, envelope);
let mod;
switch (parallel) {
case parallel < 1 ? parallel : null:
log.warn(`--http.import_parallel must be between 1 and 8. Setting to 1.`);
mod = "";
break;
case parallel === 1 ? parallel : null:
log.info(`Run a single import at a time.`);
mod = "";
break;
case parallel > 8 ? parallel : null:
log.warn(`--http.import_parallel must be between 1 and 8. Setting to 8.`);
mod = 8;
break;
default:
log.info(`Run ${parallel} imports concurrently.`);
mod = parallel;
}
const mapper = _dashp.default[`flatmapP${mod}`];
const {
browse,
dispose
} = await (0, _browser.default)();
const tmpdir = await mkdtemp(_path.default.join(_os.default.tmpdir(), "sugarcube-"));
await (0, _pluginFs.mkdirP)(tmpdir);
const logCounter = (0, _utils.counter)(envelope.data.length, ({
cnt,
total,
percent
}) => log.debug(`Progress: ${cnt}/${total} units (${percent}%).`));
const decisions = (0, _core.createFeatureDecisions)();
const data = await (0, _dashp.flowP)([mapper(async url => {
stats.count("total");
let unit;
let media = [];
let mediaType;
try {
mediaType = await (0, _utils2.urlContentType)(url);
} catch (e) {
stats.fail({
type: "http_import",
term: url,
reason: e.message
});
return null;
}
try {
if (mediaType === "url") {
// Import URLS using the hypercube model. See the readme for a
// link to referenced paper. Provide a location for a temporary
// download.
const target = _path.default.join(tmpdir, `${_core.crypto.uid(url)}.html`);
[unit, media] = await (0, _utils2.hypercubeImport)(browse, target, url);
} else {
// Images, videos and documents are imported using simply Apache Tika.
unit = await (0, _utils2.basicImport)(url);
media.push({
type: mediaType,
term: url
});
}
} catch (e) {
stats.fail({
type: "http_import",
term: url,
reason: e.message
});
return null;
}
if (unit == null) return null;
log.info(`Imported url ${url} as media type "${mediaType}".`);
stats.count("success");
logCounter(); // Test whether the new Ncube data format is enabled.
if (decisions.canNcube()) return {
_sc_id: url,
_sc_entity: "website",
_sc_id_fields: ["_sc_id"],
_sc_media: [{
type: "url",
term: url
}].concat(media),
_sc_queries: [{
type: querySource,
term: url
}],
_sc_href: url,
...(0, _utils.tikaToEntity)(unit),
_sc_data: {
location: url,
// Fields that couldn't be extracted are not added to the unit.
...Object.keys(unit).reduce((memo, key) => {
if (unit[key] == null) return memo;
return Object.assign(memo, {
[key]: unit[key]
});
}, {})
}
}; // Use the old data forma.
return {
_sc_id_fields: ["location"],
_sc_media: [{
type: "url",
term: url
}].concat(media),
_sc_queries: [{
type: querySource,
term: url
}],
_sc_href: url,
...(0, _utils.tikaToEntity)(unit),
location: url,
// Fields that couldn't be extracted are not added to the unit.
...Object.keys(unit).reduce((memo, key) => {
if (unit[key] == null) return memo;
return Object.assign(memo, {
[key]: unit[key]
});
}, {})
};
}), async rs => {
if (tmpdir != null) await (0, _pluginFs.cleanUp)(tmpdir);
if (dispose != null) await dispose();
return rs.filter(r => r !== null);
}], queries);
return _core.envelope.concatData(data, envelope);
};
plugin.argv = {
"http.import_parallel": {
type: "number",
nargs: 1,
desc: "The number of parallel HTTP imports. Can be between 1 and 8.",
default: 1
}
};
plugin.desc = "Import HTTP URI's as Sugarcube units.";
var _default = plugin;
exports.default = _default;