@mitodl/course-search-utils
Version:
JS utils for interacting with MIT Open Course search
517 lines (516 loc) • 20.2 kB
JavaScript
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
if (ar || !(i in from)) {
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
ar[i] = from[i];
}
}
return to.concat(ar || Array.prototype.slice.call(from));
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildDefaultSort = exports.buildSuggestQuery = exports.buildOrQuery = exports.buildFacetSubQuery = exports.buildLearnQuery = exports.buildChannelQuery = exports.buildSearchQuery = exports.emptyOrNil = exports.normalizeDoubleQuotes = exports.isDoubleQuoted = exports.searchFields = exports.channelField = exports.SEARCH_FILTER_PROFILE = exports.SEARCH_FILTER_COMMENT = exports.SEARCH_FILTER_POST = exports.RESOURCEFILE_QUERY_FIELDS = exports.RESOURCE_QUERY_NESTED_FIELDS = exports.LEARN_SUGGEST_FIELDS = void 0;
var bodybuilder_1 = __importDefault(require("bodybuilder"));
var ramda_1 = require("ramda");
var constants_1 = require("./constants");
var PODCAST_QUERY_FIELDS = [
"title.english^3",
"short_description.english^2",
"full_description.english",
"topics"
];
exports.LEARN_SUGGEST_FIELDS = [
"title.trigram",
"short_description.trigram"
];
var CHANNEL_SUGGEST_FIELDS = ["suggest_field1", "suggest_field2"];
var PODCAST_EPISODE_QUERY_FIELDS = [
"title.english^3",
"short_description.english^2",
"full_description.english",
"topics",
"series_title^2"
];
var COURSE_QUERY_FIELDS = [
"title.english^3",
"short_description.english^2",
"full_description.english",
"topics",
"platform",
"course_id",
"offered_by",
"department_name",
"course_feature_tags"
];
var VIDEO_QUERY_FIELDS = [
"title.english^3",
"short_description.english^2",
"full_description.english",
"transcript.english^2",
"topics",
"platform",
"video_id",
"offered_by"
];
var LIST_QUERY_FIELDS = [
"title.english^3",
"short_description.english^2",
"topics"
];
var POST_QUERY_FIELDS = [
"text.english",
"post_title.english",
"plain_text.english"
];
var COMMENT_QUERY_FIELDS = ["text.english"];
var PROFILE_QUERY_FIELDS = [
"author_headline.english",
"author_bio.english",
"author_name.english"
];
exports.RESOURCE_QUERY_NESTED_FIELDS = [
"runs.year",
"runs.semester",
"runs.level",
"runs.instructors^5",
"department_name"
];
exports.RESOURCEFILE_QUERY_FIELDS = [
"content",
"title.english^3",
"short_description.english^2",
"department_name",
"resource_type"
];
var OBJECT_TYPE = "type";
var POST_CHANNEL_FIELD = "channel_name";
var COMMENT_CHANNEL_FIELD = "channel_name";
var PROFILE_CHANNEL_FIELD = "author_channel_membership";
exports.SEARCH_FILTER_POST = "post";
exports.SEARCH_FILTER_COMMENT = "comment";
exports.SEARCH_FILTER_PROFILE = "profile";
var channelField = function (type) {
if (type === constants_1.LearningResourceType.Post) {
return POST_CHANNEL_FIELD;
}
else if (type === constants_1.LearningResourceType.Comment) {
return COMMENT_CHANNEL_FIELD;
}
else if (type === constants_1.LearningResourceType.Profile) {
return PROFILE_CHANNEL_FIELD;
}
else {
throw new Error("Missing type");
}
};
exports.channelField = channelField;
var searchFields = function (type) {
switch (type) {
case constants_1.LearningResourceType.Course:
return COURSE_QUERY_FIELDS;
case constants_1.LearningResourceType.Video:
return VIDEO_QUERY_FIELDS;
case constants_1.LearningResourceType.Podcast:
return PODCAST_QUERY_FIELDS;
case constants_1.LearningResourceType.PodcastEpisode:
return PODCAST_EPISODE_QUERY_FIELDS;
case constants_1.LearningResourceType.ResourceFile:
return exports.RESOURCEFILE_QUERY_FIELDS;
case constants_1.LearningResourceType.Comment:
return COMMENT_QUERY_FIELDS;
case constants_1.LearningResourceType.Post:
return POST_QUERY_FIELDS;
case constants_1.LearningResourceType.Profile:
return PROFILE_QUERY_FIELDS;
case constants_1.LearningResourceType.Program:
case constants_1.LearningResourceType.Userlist:
case constants_1.LearningResourceType.LearningPath:
return LIST_QUERY_FIELDS;
default:
return (0, ramda_1.uniq)(__spreadArray(__spreadArray(__spreadArray([], POST_QUERY_FIELDS, true), COMMENT_QUERY_FIELDS, true), PROFILE_QUERY_FIELDS, true));
}
};
exports.searchFields = searchFields;
var isDoubleQuoted = function (str) {
return /^".+"$/.test((0, exports.normalizeDoubleQuotes)(str) || "");
};
exports.isDoubleQuoted = isDoubleQuoted;
var normalizeDoubleQuotes = function (text) { return (text || "").replace(/[\u201C\u201D]/g, '"'); };
exports.normalizeDoubleQuotes = normalizeDoubleQuotes;
exports.emptyOrNil = (0, ramda_1.either)(ramda_1.isEmpty, ramda_1.isNil);
var getTypes = function (activeFacets) {
if (activeFacets === null || activeFacets === void 0 ? void 0 : activeFacets.type) {
return activeFacets.type;
}
else {
return [exports.SEARCH_FILTER_COMMENT, exports.SEARCH_FILTER_POST, exports.SEARCH_FILTER_PROFILE];
}
};
/**
Generates an elasticsearch query object with nested string parameters from inputs of type SearchQueryParams.
*/
var buildSearchQuery = function (_a) {
var _b, _c;
var text = _a.text, from = _a.from, size = _a.size, sort = _a.sort, activeFacets = _a.activeFacets, channelName = _a.channelName, resourceTypes = _a.resourceTypes, aggregations = _a.aggregations;
var builder = (0, bodybuilder_1.default)();
if (!(0, ramda_1.isNil)(from)) {
builder = builder.from(from);
}
if (!(0, ramda_1.isNil)(size)) {
builder = builder.size(size);
}
if (sort &&
activeFacets &&
!((_b = activeFacets.type) !== null && _b !== void 0 ? _b : []).includes(constants_1.LearningResourceType.ResourceFile)) {
var field = sort.field, option = sort.option;
var fieldPieces = field.split(".");
var sortQuery = {
order: option,
nested: {
path: fieldPieces[0]
}
};
if (field === constants_1.COURSENUM_SORT_FIELD) {
if (((_c = activeFacets.department_name) !== null && _c !== void 0 ? _c : []).length === 0) {
sortQuery["nested"]["filter"] = {
term: {
"department_course_numbers.primary": true
}
};
}
else {
var filterClause = [];
addFacetClauseToArray(filterClause, "department_course_numbers.department", activeFacets.department_name || [], constants_1.LearningResourceType.Course);
sortQuery["nested"]["filter"] = filterClause[0];
}
}
builder.sort(field, sortQuery);
}
var types = resourceTypes !== null && resourceTypes !== void 0 ? resourceTypes : getTypes(activeFacets);
var searchText = (0, exports.normalizeDoubleQuotes)(text);
return (0, exports.emptyOrNil)((0, ramda_1.intersection)(__spreadArray(__spreadArray([], constants_1.LR_TYPE_ALL, true), [constants_1.LearningResourceType.ResourceFile], false), types)) ?
(0, exports.buildChannelQuery)(builder, searchText, types, channelName) :
(0, exports.buildLearnQuery)(builder, searchText, types, activeFacets, aggregations);
};
exports.buildSearchQuery = buildSearchQuery;
var buildChannelQuery = function (builder, text, types, channelName) {
var _a;
for (var _i = 0, types_1 = types; _i < types_1.length; _i++) {
var type = types_1[_i];
var textQuery = (0, exports.emptyOrNil)(text) ?
{} :
{
should: [
{
multi_match: {
query: text,
fields: (0, exports.searchFields)(type)
}
}
].filter(function (clause) { return clause !== null; })
};
// If channelName is present add a filter for the type
var channelClauses = channelName ?
[
{
term: (_a = {},
_a[(0, exports.channelField)(type)] = channelName,
_a)
}
] :
[];
builder = (0, exports.buildOrQuery)(builder, type, textQuery, channelClauses);
}
if (!(0, exports.emptyOrNil)(text)) {
builder = builder.rawOption("suggest",
// @ts-expect-error
(0, exports.buildSuggestQuery)(text, CHANNEL_SUGGEST_FIELDS));
}
return builder.build();
};
exports.buildChannelQuery = buildChannelQuery;
var buildLearnQuery = function (builder, text, types, facets, aggregations) {
var _a, _b, _c;
for (var _i = 0, types_2 = types; _i < types_2.length; _i++) {
var type = types_2[_i];
var queryType = (0, exports.isDoubleQuoted)(text) ? "query_string" : "multi_match";
var textQuery = (0, exports.emptyOrNil)(text) ?
{} :
{
should: [
(_a = {},
_a[queryType] = {
query: text,
fields: (0, exports.searchFields)(type)
},
_a),
{
wildcard: {
coursenum: {
value: "".concat((text || "").toUpperCase(), "*"),
boost: 100.0,
rewrite: "constant_score"
}
}
},
[
constants_1.LearningResourceType.Course,
constants_1.LearningResourceType.Program
].includes(type) ?
{
nested: {
path: "runs",
query: (_b = {},
_b[queryType] = {
query: text,
fields: exports.RESOURCE_QUERY_NESTED_FIELDS
},
_b)
}
} :
null,
type === constants_1.LearningResourceType.Course ?
{
has_child: {
type: "resourcefile",
query: (_c = {},
_c[queryType] = {
query: text,
fields: exports.RESOURCEFILE_QUERY_FIELDS
},
_c),
score_mode: "avg"
}
} :
null
]
.flat()
.filter(function (clause) { return clause !== null; })
};
// Add filters for facets if necessary
var facetClauses = (0, exports.buildFacetSubQuery)(facets, builder, type, aggregations);
builder = (0, exports.buildOrQuery)(builder, type, textQuery, []);
builder = builder.rawOption("post_filter", {
bool: {
must: __spreadArray([], facetClauses, true)
}
});
// Include suggest if search test is not null/empty
if (!(0, exports.emptyOrNil)(text)) {
builder = builder.rawOption("suggest",
// @ts-expect-error
(0, exports.buildSuggestQuery)(text, exports.LEARN_SUGGEST_FIELDS));
}
else if (facetClauses.length === 0 && (0, ramda_1.equals)(types, constants_1.LR_TYPE_ALL)) {
builder = builder.rawOption("sort", (0, exports.buildDefaultSort)());
}
}
return builder.build();
};
exports.buildLearnQuery = buildLearnQuery;
var buildLevelQuery = function (_builder, values, facetClauses) {
if (values && values.length > 0) {
var facetFilter = values.map(function (value) { return ({
nested: {
path: "runs",
query: {
match: {
"runs.level": value
}
}
}
}); });
facetClauses.push({
bool: {
should: facetFilter
}
});
}
};
var buildFacetSubQuery = function (facets, builder, objectType, aggregations) {
var facetClauses = [];
if (facets) {
Object.entries(facets).forEach(function (_a) {
var key = _a[0], values = _a[1];
var facetClausesForFacet = [];
if (values && values.length > 0) {
if (key === "level") {
buildLevelQuery(builder, values, facetClauses);
}
else {
addFacetClauseToArray(facetClauses, key, values, objectType);
}
}
if (aggregations && aggregations.includes(key)) {
// $FlowFixMe: we check for null facets earlier
Object.entries(facets).forEach(function (_a) {
var otherKey = _a[0], otherValues = _a[1];
if (otherKey !== key && otherValues && otherValues.length > 0) {
if (otherKey === "level") {
buildLevelQuery(builder, otherValues, facetClausesForFacet);
}
else {
addFacetClauseToArray(facetClausesForFacet, otherKey, otherValues, objectType);
}
}
});
if (facetClausesForFacet.length > 0) {
var filter_1 = {
filter: {
bool: {
must: __spreadArray([], facetClausesForFacet, true)
}
}
};
if (key === "level") {
// this is done seperately b/c it's a nested field
builder.agg("filter", key, function (aggr) {
return aggr
.orFilter("bool", filter_1)
.agg("nested", { path: "runs" }, "level", function (aggr) {
return aggr.agg("terms", "runs.level", { size: 10000 }, "level", function (aggr) {
return aggr.agg("reverse_nested", null, {}, "courses");
});
});
});
}
else {
builder.agg("filter", key, function (aggregation) {
return aggregation
.orFilter("bool", filter_1)
.agg("terms", key === OBJECT_TYPE ? "object_type.keyword" : key, { size: 10000 }, key);
});
}
}
else {
if (key === "level") {
// this is done seperately b/c it's a nested field
builder.agg("nested", { path: "runs" }, "level", function (aggr) {
return aggr.agg("terms", "runs.level", {
size: 10000
}, "level", function (aggr) { return aggr.agg("reverse_nested", null, {}, "courses"); });
});
}
else {
builder.agg("terms", key === OBJECT_TYPE ? "object_type.keyword" : key, { size: 10000 }, key);
}
}
}
});
}
return facetClauses;
};
exports.buildFacetSubQuery = buildFacetSubQuery;
var buildOrQuery = function (builder, searchType, textQuery, extraClauses) {
var textFilter = (0, exports.emptyOrNil)(textQuery) ? [] : [{ bool: textQuery }];
builder = builder.orQuery("bool", __assign({ filter: {
bool: {
must: __spreadArray(__spreadArray([
{
term: {
object_type: searchType
}
}
], extraClauses, true), textFilter, true)
}
} }, textQuery));
return builder;
};
exports.buildOrQuery = buildOrQuery;
var addFacetClauseToArray = function (facetClauses, facet, values, type) {
if (facet === OBJECT_TYPE &&
values.toString() === exports.buildSearchQuery.toString()) {
return;
}
var filterKey = facet === OBJECT_TYPE ? "object_type.keyword" : facet;
var valueClauses;
// Apply standard facet clause unless this is an offered_by facet for resources.
if (facet !== "offered_by" || type !== constants_1.LearningResourceType.ResourceFile) {
valueClauses = values.map(function (value) {
var _a;
return ({
term: (_a = {},
_a[filterKey] = value,
_a)
});
});
}
else {
// offered_by facet should apply to parent doc of resource
valueClauses = [
{
has_parent: {
parent_type: "resource",
query: {
bool: {
should: values.map(function (value) {
var _a;
return ({
term: (_a = {},
_a[filterKey] = value,
_a)
});
})
}
}
}
}
];
}
facetClauses.push({
bool: {
should: valueClauses
}
});
};
var buildSuggestQuery = function (text, suggestFields) {
var suggest = {
text: text
};
suggestFields.forEach(function (field) {
return (suggest[field] = {
phrase: {
field: "".concat(field),
size: 5,
gram_size: 1,
confidence: 0.0001,
max_errors: 3,
collate: {
query: {
source: {
match_phrase: {
"{{field_name}}": "{{suggestion}}"
}
}
},
params: { field_name: "".concat(field) },
prune: true
}
}
});
});
return suggest;
};
exports.buildSuggestQuery = buildSuggestQuery;
var buildDefaultSort = function () {
return [
{ minimum_price: { order: "asc" } },
{ default_search_priority: { order: "desc" } },
{ created: { order: "desc" } }
];
};
exports.buildDefaultSort = buildDefaultSort;
;