@sugarcube/plugin-tika
Version:
Parse files and metadata using Tika.
57 lines (45 loc) • 1.76 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.entity = exports.safeExtract = exports.extract = void 0;
var _fp = require("lodash/fp");
var _moment = _interopRequireDefault(require("moment"));
var _utils = require("@sugarcube/utils");
var _contentTypes = _interopRequireDefault(require("./content-types"));
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
const contentTypes = {
"image/jpeg": _contentTypes.default.imageJpeg,
"application/pdf": _contentTypes.default.applicationPdf
};
const extract = _utils.extract; // safeExtract is useful for fetching links, that might otherwise throw silly
// errors. E.g. LinkedIn return error 999 and extract throws up on that.
exports.extract = extract;
const safeExtract = location => extract(location).catch(() => ["", {}]);
exports.safeExtract = safeExtract;
const entity = (0, _fp.curry)((unit, text, meta) => {
const created = (0, _moment.default)(meta.date).utc().toDate();
const idFields = (0, _fp.reduce)((memo, f) => {
if ((0, _fp.has)(f, meta)) {
return (0, _fp.concat)(memo, f);
}
return memo;
}, unit._sc_id_fields || [], ["Content-Type"]);
const contentFields = (0, _fp.reduce)((memo, f) => {
if ((0, _fp.has)(f, meta)) {
return (0, _fp.concat)(memo, f);
}
return memo;
}, unit._sc_content_fields || [], ["title"]);
const contentType = (0, _fp.has)(meta["Content-Type"], contentTypes) ? contentTypes[meta["Content-Type"]] : _fp.identity;
return contentType((0, _fp.mergeAll)([unit, meta, {
text
}, {
_sc_pubdates: {
tika: created
},
_sc_id_fields: idFields,
_sc_content_fields: contentFields
}]));
});
exports.entity = entity;