@stackbit/sdk
Version:
1,224 lines • 49.2 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateSchema = void 0;
const path_1 = __importDefault(require("path"));
const lodash_1 = __importDefault(require("lodash"));
const micromatch_1 = __importDefault(require("micromatch"));
const moment_1 = __importDefault(require("moment"));
const utils_1 = require("@stackbit/utils");
const file_browser_1 = require("./file-browser");
const consts_1 = require("../consts");
const analyze_schema_types_1 = require("./analyze-schema-types");
const SAME_FOLDER_PAGE_DSC_COEFFICIENT = 0.6;
const ROOT_FOLDER_PAGE_DSC_COEFFICIENT = 0.7;
const DIFF_FOLDER_PAGE_DSC_COEFFICIENT = 0.8;
const DATA_MODEL_DSC_COEFFICIENT = 0.8;
const LIST_OBJECT_DSC_COEFFICIENT = 0.8;
async function generateSchema({ ssgMatchResult, ...fileBrowserOptions }) {
const fileBrowser = (0, file_browser_1.getFileBrowserFromOptions)(fileBrowserOptions);
await fileBrowser.listFiles();
const ssgDir = ssgMatchResult?.ssgDir ?? '';
let pagesDir = ssgMatchResult?.pagesDir;
let dataDir = ssgMatchResult?.dataDir;
const { filePaths: pageFiles, contentDirFromRoot: pagesDirFromRoot } = await listContentFiles({
fileBrowser,
contentDir: pagesDir,
ssgMatchResult,
excludedFiles: consts_1.EXCLUDED_MARKDOWN_FILES,
allowedExtensions: consts_1.MARKDOWN_FILE_EXTENSIONS
});
const { filePaths: dataFiles, contentDirFromRoot: dataDirFromRoot } = await listContentFiles({
fileBrowser,
contentDir: dataDir,
ssgMatchResult,
excludedFiles: consts_1.EXCLUDED_DATA_FILES,
allowedExtensions: consts_1.DATA_FILE_EXTENSIONS,
excludedFilesInSSGDir: ['config.*', '_config.*']
});
const pageModelsResults = await generatePageModelsForFiles({
filePaths: pageFiles,
dirPathFromRoot: pagesDirFromRoot,
fileBrowser: fileBrowser,
pageTypeKey: ssgMatchResult?.pageTypeKey,
objectModels: []
});
const dataModelsResults = await generateDataModelsForFiles({
filePaths: dataFiles,
dirPathFromRoot: dataDirFromRoot,
fileBrowser: fileBrowser,
objectModels: pageModelsResults.objectModels
});
let pageModels = analyzePageFileMatchingProperties(pageModelsResults.pageModels);
let dataModels = analyzeDataFileMatchingProperties(dataModelsResults.dataModels);
if (pagesDir === undefined && pageModels.length > 0) {
const pagesLCADir = getLowestCommonAncestorFolderFromModels(pageModels);
pagesDir = getDir(ssgDir, pagesLCADir);
if (pagesLCADir !== '') {
pageModels = adjustModelsWithLowestCommonAncestor(pageModels, pagesLCADir);
}
}
if (dataDir === undefined && dataModels.length > 0) {
const dataLCADir = getLowestCommonAncestorFolderFromModels(dataModels);
dataDir = getDir(ssgDir, dataLCADir);
if (dataLCADir !== '') {
dataModels = adjustModelsWithLowestCommonAncestor(dataModels, dataLCADir);
}
}
const objectModels = lodash_1.default.map(dataModelsResults.objectModels, (objectModel, index) => {
const modelName = `object_${index + 1}`;
return {
type: 'object',
name: objectModel.name,
label: lodash_1.default.startCase(modelName),
fields: removeUnknownTypesFromFields(objectModel.fields)
};
});
const models = lodash_1.default.concat(pageModels, dataModels, objectModels);
return {
models: models,
pagesDir: pagesDir,
dataDir: dataDir
};
}
exports.generateSchema = generateSchema;
function getDir(ssgDir, contentDir) {
const fullDir = path_1.default.join(ssgDir, contentDir);
return fullDir === '.' ? '' : fullDir;
}
async function listContentFiles({ fileBrowser, contentDir, ssgMatchResult, excludedFiles, allowedExtensions, excludedFilesInSSGDir }) {
const ssgDir = ssgMatchResult?.ssgDir ?? '';
const contentDirs = ssgMatchResult?.contentDirs ?? [];
let filePaths;
let contentDirFromRoot;
if (contentDir !== undefined || contentDirs.length === 0) {
contentDirFromRoot = getDir(ssgDir, contentDir ?? '');
// TODO: in some projects, pages can be defined as JSON files as well
filePaths = await readDirRecursivelyWithFilter({
fileBrowser,
contentDir,
ssgMatchResult,
excludedFiles,
allowedExtensions,
excludedFilesInSSGDir
});
}
else {
contentDirFromRoot = ssgDir;
filePaths = await (0, utils_1.reducePromise)(contentDirs, async (pageFiles, contentDir) => {
const files = await readDirRecursivelyWithFilter({
fileBrowser,
contentDir,
ssgMatchResult,
excludedFiles,
allowedExtensions,
excludedFilesInSSGDir,
filesRelativeToSSGDir: true
});
return pageFiles.concat(files);
}, []);
}
return {
contentDirFromRoot,
filePaths
};
}
async function readDirRecursivelyWithFilter(options) {
const excludedFiles = [
...consts_1.EXCLUDED_COMMON_FILES,
...options.excludedFiles,
...getExcludedFiles(options.contentDir, options.excludedFilesInSSGDir, options.ssgMatchResult)
];
const ssgDir = options.ssgMatchResult?.ssgDir ?? '';
const contentDirFromRoot = getDir(ssgDir, options.contentDir ?? '');
const filePaths = options.fileBrowser.readFilesRecursively(contentDirFromRoot, {
filter: (fileResult) => {
if (micromatch_1.default.isMatch(fileResult.filePath, excludedFiles)) {
return false;
}
if (fileResult.isDirectory) {
return true;
}
const extension = path_1.default.extname(fileResult.filePath).substring(1);
return options.allowedExtensions.includes(extension);
}
});
if (options.filesRelativeToSSGDir) {
return lodash_1.default.map(filePaths, (filePath) => path_1.default.join(options.contentDir ?? '', filePath));
}
return filePaths;
}
function getExcludedFiles(contentDir, excludedFilesInSSGDir, ssgMatchResult) {
const excludedFiles = [];
if (contentDir === undefined || contentDir === '') {
if (excludedFilesInSSGDir) {
excludedFiles.push(...excludedFilesInSSGDir);
}
// if contentDir (pagesDir or dataDir) wasn't specifically set to empty string, ignore content files in the root folder
if (contentDir === undefined) {
excludedFiles.push('*.*');
}
if (ssgMatchResult?.publishDir) {
excludedFiles.push(ssgMatchResult.publishDir);
}
if (ssgMatchResult?.staticDir) {
excludedFiles.push(ssgMatchResult.staticDir);
}
}
return excludedFiles;
}
async function generatePageModelsForFiles({ filePaths, dirPathFromRoot, fileBrowser, pageTypeKey, objectModels }) {
const pageModels = [];
let modelNameCounter = 1;
for (const filePath of filePaths) {
const filePathFromRoot = path_1.default.join(dirPathFromRoot, filePath);
const filePathObjectFromRoot = path_1.default.parse(filePathFromRoot);
let data = await fileBrowser.getFileData(filePathFromRoot);
const extension = filePathObjectFromRoot.ext.substring(1);
// don't load plain files from root dir, even though we ignore files such as README.md when reading files,
// there still might be plain markdown files we don't want to include
if (filePathObjectFromRoot.dir === '' && consts_1.MARKDOWN_FILE_EXTENSIONS.includes(extension) && lodash_1.default.get(data, 'frontmatter') === null) {
continue;
}
if (lodash_1.default.has(data, 'frontmatter') && lodash_1.default.has(data, 'markdown')) {
data = lodash_1.default.assign(data.frontmatter, { markdown_content: data.markdown });
}
if (lodash_1.default.isPlainObject(data)) {
const modelName = `page_${modelNameCounter++}`;
const result = generateObjectFields(data, [modelName], objectModels);
if (result) {
const pageLayout = pageTypeKey && typeof data[pageTypeKey] === 'string' ? data[pageTypeKey] : undefined;
objectModels = result.objectModels;
pageModels.push({
type: 'page',
name: modelName,
...(pageLayout && { layout: pageLayout }),
fields: result.fields,
filePaths: [filePath]
});
}
}
}
if (pageModels.length === 0) {
return {
pageModels: [],
objectModels
};
}
// group models by folders, models from LCA folder put in a separate array and handle later with merged folder
const lcaFolder = findLowestCommonAncestorFolder(lodash_1.default.flatten(lodash_1.default.map(pageModels, 'filePaths')));
const lcaFolderModels = [];
const modelsByFolder = {};
for (const pageModel of pageModels) {
const filePath = pageModel.filePaths[0];
const dir = path_1.default.parse(filePath).dir;
if (dir === lcaFolder) {
lcaFolderModels.push(pageModel);
}
else {
(0, utils_1.append)(modelsByFolder, dir, pageModel);
}
}
let mergedPageModels = [];
// merge page models from same sub-folders (excluding LCA folder) with lowest similarity coefficient
for (const folderPath in modelsByFolder) {
const pageModelsInFolder = modelsByFolder[folderPath];
const mergeResult = mergeSimilarPageModels(pageModelsInFolder, objectModels, SAME_FOLDER_PAGE_DSC_COEFFICIENT);
mergedPageModels = mergedPageModels.concat(mergeResult.pageModels);
objectModels = mergeResult.objectModels;
}
// merge page models from LCA folder with medium similarity coefficient
const lcaFolderMergeResult = mergeSimilarPageModels(lcaFolderModels, objectModels, ROOT_FOLDER_PAGE_DSC_COEFFICIENT);
mergedPageModels = mergedPageModels.concat(lcaFolderMergeResult.pageModels);
objectModels = lcaFolderMergeResult.objectModels;
// merge all page models from all folders with high similarity coefficient
const mergeResult = mergeSimilarPageModels(mergedPageModels, objectModels, DIFF_FOLDER_PAGE_DSC_COEFFICIENT);
// remove 'unknown' field type
const pageModelsWithFilePaths = lodash_1.default.reduce(mergeResult.pageModels, (mergedPageModels, pageModel) => {
const fields = removeUnknownTypesFromFields(pageModel.fields);
if (lodash_1.default.isEmpty(fields)) {
return mergedPageModels;
}
return mergedPageModels.concat(Object.assign(pageModel, { fields }));
}, []);
return {
pageModels: pageModelsWithFilePaths,
objectModels: mergeResult.objectModels
};
}
async function generateDataModelsForFiles({ filePaths, dirPathFromRoot, fileBrowser, objectModels }) {
const dataModels = [];
let modelNameCounter = 1;
for (const filePath of filePaths) {
const data = await fileBrowser.getFileData(path_1.default.join(dirPathFromRoot, filePath));
const modelName = `data_${modelNameCounter++}`;
if (lodash_1.default.isPlainObject(data)) {
const result = generateObjectFields(data, [modelName], objectModels);
// generally, pages can be defined as JSON files as well.
if (result) {
objectModels = result.objectModels;
const dataFieldsList = dataModels.filter((dataModel) => dataModel.fields).map((dataModel) => dataModel.fields);
const mergeResult = mergeSimilarFields(result.fields, dataFieldsList, [modelName], objectModels, 'dsc', DATA_MODEL_DSC_COEFFICIENT);
objectModels = mergeResult.objectModels;
const mergedDataModels = lodash_1.default.pullAt(dataModels, mergeResult.mergedIndexes);
const mergedFilePaths = lodash_1.default.flatten(mergedDataModels.map((dataModel) => dataModel.filePaths));
dataModels.push({
type: 'data',
name: modelName,
fields: mergeResult.mergedFields,
filePaths: [filePath].concat(mergedFilePaths)
});
}
}
else if (lodash_1.default.isArray(data)) {
const result = generateListField(data, [modelName], objectModels);
if (result) {
objectModels = result.objectModels;
dataModels.push({
type: 'data',
name: modelName,
isList: true,
items: result.field.items,
filePaths: [filePath]
});
}
}
}
return {
dataModels,
objectModels
};
}
function generateObjectFields(value, fieldPath, objectModels) {
if (lodash_1.default.isEmpty(value)) {
return null;
}
const result = lodash_1.default.reduce(value, (accum, fieldValue, fieldName) => {
const result = generateField(fieldValue, fieldName, fieldPath.concat(fieldName), accum.objectModels);
return {
fields: result.field ? accum.fields.concat(result.field) : accum.fields,
objectModels: result.objectModels
};
}, { fields: [], objectModels: objectModels });
if (lodash_1.default.isEmpty(result.fields)) {
return null;
}
return result;
}
function generateField(fieldValue, fieldName, fieldPath, objectModels) {
let field = null;
if (fieldName === 'markdown_content') {
field = {
type: 'markdown',
name: fieldName,
label: 'Content'
};
}
else if (fieldValue === null) {
// we don't know what is the type of the field
field = {
type: 'unknown',
name: fieldName,
label: lodash_1.default.startCase(fieldName)
};
}
else if (lodash_1.default.isString(fieldValue)) {
field = {
...fieldFromStringValue(fieldValue),
name: fieldName,
label: lodash_1.default.startCase(fieldName)
};
}
else if (lodash_1.default.isDate(fieldValue)) {
// @iarna/toml returns date objects
field = {
type: 'datetime',
name: fieldName,
label: lodash_1.default.startCase(fieldName)
};
}
else if (lodash_1.default.isNumber(fieldValue)) {
field = {
type: 'number',
name: fieldName,
label: lodash_1.default.startCase(fieldName),
subtype: lodash_1.default.isInteger(fieldValue) ? 'int' : 'float'
};
}
else if (lodash_1.default.isBoolean(fieldValue)) {
field = {
type: 'boolean',
name: fieldName,
label: lodash_1.default.startCase(fieldName)
};
}
else if (lodash_1.default.isPlainObject(fieldValue)) {
const result = generateObjectFields(fieldValue, fieldPath, objectModels);
if (result) {
objectModels = result.objectModels;
field = {
type: 'object',
name: fieldName,
label: lodash_1.default.startCase(fieldName),
fields: result.fields
};
// const modelName = generateRandomModelName();
// field = {
// type: 'model',
// name: fieldName,
// models: [modelName]
// };
// objectModels = result.objectModels.concat({
// type: 'object',
// name: modelName,
// fields: result.fields
// });
}
}
else if (lodash_1.default.isArray(fieldValue)) {
const result = generateListField(fieldValue, fieldPath, objectModels);
if (result) {
objectModels = result.objectModels;
field = {
type: result.field.type,
name: fieldName,
label: lodash_1.default.startCase(fieldName),
items: result.field.items
};
}
}
return {
field,
objectModels
};
}
function generateListField(value, fieldPath, objectModels) {
if (lodash_1.default.isEmpty(value)) {
// the array is empty, so we don't know what is the type of its items, but we know there is an array.
// This fact will help us when we will try to consolidate this array with another array
return {
field: listFieldWithUnknownItems(),
objectModels: objectModels
};
}
const listItemsArr = [];
let updatedObjectModels = objectModels;
for (let index = 0; index < value.length; index++) {
const listItem = value[index];
if (lodash_1.default.isArray(listItem)) {
// array of arrays are not supported
return null;
}
const result = generateFieldListItems(listItem, fieldPath, updatedObjectModels);
if (result === null) {
continue;
}
updatedObjectModels = result.objectModels;
listItemsArr.push(result.items);
}
if (listItemsArr.length === 0) {
// the array is empty, so we don't know what is the type of its items, but we know there is an array.
// This fact will help us when we will try to consolidate this array with another array
return {
field: listFieldWithUnknownItems(),
objectModels: objectModels
};
}
const result = consolidateListItems(listItemsArr, fieldPath, updatedObjectModels);
if (result === null) {
return null;
}
return {
field: {
type: 'list',
items: result.items
},
objectModels: result.objectModels
};
}
function listFieldWithUnknownItems() {
return {
type: 'list',
items: { type: 'unknown' }
};
}
function generateFieldListItems(value, fieldPath, objectModels) {
let items = null;
if (value === null) {
// type-less value, return null to ignore. If array doesn't have any other items with types then items of that
// array will be marked as 'unknown'
return null;
}
else if (lodash_1.default.isString(value)) {
return {
items: fieldFromStringValue(value),
objectModels
};
}
else if (lodash_1.default.isDate(value)) {
// @iarna/toml returns date objects
items = {
type: 'datetime'
};
}
else if (lodash_1.default.isNumber(value)) {
items = {
type: 'number',
subtype: lodash_1.default.isInteger(value) ? 'int' : 'float'
};
}
else if (lodash_1.default.isBoolean(value)) {
items = {
type: 'boolean'
};
}
else if (lodash_1.default.isPlainObject(value)) {
const result = generateObjectFields(value, fieldPath, objectModels);
if (!result) {
return null;
}
objectModels = result.objectModels;
items = {
type: 'object',
fields: result.fields
};
// const modelName = generateRandomModelName();
// items = {
// type: 'model',
// models: [modelName]
// };
// model = {
// type: 'object',
// name: modelName,
// fields: result.fields
// };
}
else if (lodash_1.default.isArray(value)) {
// we don't support array of arrays
throw new Error('nested arrays are not supported');
}
else {
return null;
}
return { items, objectModels };
}
const COLOR_PATTERN = /^#(?:[A-Fa-f0-9]{3){1,2}$/;
const HTML_PATTERN = /<[a-zA-Z]+\s*\/>|<\/?[a-zA-Z]+>/g;
const MARKDOWN_PATTERN = /^#+\s|^>\s|^-\s|^\*\s|^\+\s|\*\*[\s\S]+\*\*|__[\s\S]+__|```/m;
const DATE_PATTERN = /^([12]\d{3}-(0?[1-9]|1[0-2])-(0?[1-9]|[12]\d|3[01]))/;
const STRING_TYPES = ['string', 'text', 'markdown', 'image', 'color', 'date', 'datetime'];
function fieldFromStringValue(value) {
let fieldType = 'string';
const fieldProps = {};
if (value.match(COLOR_PATTERN)) {
fieldType = 'color';
}
else if (HTML_PATTERN.test(value) || MARKDOWN_PATTERN.test(value)) {
// TODO separate between `markdown` and `html` fields
fieldType = 'markdown';
}
else if (value.trim().includes('\n')) {
fieldType = 'text';
}
else if (/\.(?:svg|png|jpg|jpeg)$/.test(value)) {
fieldType = 'image';
// TODO: handle asset referenceTypes
// if (value.startsWith('./') || value.startsWith('../')) {
// fieldProps.referenceType = 'relative';
// }
}
else if ((0, moment_1.default)(value, moment_1.default.ISO_8601).isValid() || value.match(DATE_PATTERN)) {
moment_1.default.suppressDeprecationWarnings = true;
fieldType = lodash_1.default.endsWith(moment_1.default.utc(value).toISOString(), '00:00:00.000Z') ? 'date' : 'datetime';
}
return {
type: fieldType,
...fieldProps
};
}
function consolidateListItems(listItemModels, fieldPath, objectModels) {
let itemTypes = lodash_1.default.uniq(lodash_1.default.map(listItemModels, 'type'));
// if list items have multiple types and one of them 'unknown', then we assume that 'unknown' item type is actually
// one of the other item types. So we remove it in favor of other types.
if (itemTypes.length > 1 && itemTypes.includes('unknown')) {
itemTypes = lodash_1.default.without(itemTypes, 'unknown');
}
if (itemTypes.length === 1) {
const type = itemTypes[0];
// handle fields with extra properties
switch (type) {
case 'unknown': {
return {
items: { type: 'unknown' },
objectModels
};
}
case 'number': {
const subtypes = lodash_1.default.compact(lodash_1.default.uniq(lodash_1.default.map(listItemModels, 'subtype')));
const subtype = subtypes.length === 1 ? subtypes[0] : 'float';
return {
items: {
type: 'number',
...(subtype && { subtype })
},
objectModels
};
}
case 'object': {
const fieldsList = lodash_1.default.map(listItemModels, (itemModels) => itemModels.fields);
const result = consolidateObjectFieldsListWithOverlap(fieldsList, fieldPath, LIST_OBJECT_DSC_COEFFICIENT, objectModels);
if (result.fieldsList.length === 1) {
return {
items: {
type: 'object',
fields: result.fieldsList[0]
},
objectModels: result.objectModels
};
}
else {
const models = result.fieldsList.map((fields) => {
const modelName = generateRandomModelName();
return {
type: 'object',
name: modelName,
fields: fields
// refFields: [items],
// refFieldPaths: [fieldPath]
};
});
const items = {
type: 'model',
models: lodash_1.default.map(models, 'name')
};
return {
items: items,
objectModels: result.objectModels.concat(models)
};
}
}
case 'model': {
const modelNames = lodash_1.default.compact(lodash_1.default.uniq(lodash_1.default.flatten(lodash_1.default.map(listItemModels, 'models'))));
return {
items: {
type: 'model',
models: modelNames
},
objectModels
};
}
case 'enum':
case 'reference':
case 'cross-reference':
// these cases cannot happen because we don't generate these fields,
return null;
default:
return {
items: { type },
objectModels
};
}
}
if (lodash_1.default.every(itemTypes, (itemsType) => ['object', 'model'].includes(itemsType))) {
const modelListItems = lodash_1.default.filter(listItemModels, analyze_schema_types_1.isModelListItemsWithUnknown);
const modelNames = lodash_1.default.compact(lodash_1.default.uniq(lodash_1.default.flatten(lodash_1.default.map(modelListItems, 'models'))));
const objectListItems = lodash_1.default.filter(listItemModels, analyze_schema_types_1.isObjectListItemsWithUnknown);
const fieldsList = lodash_1.default.map(objectListItems, (listItems) => listItems.fields);
const result = consolidateObjectFieldsListWithOverlap(fieldsList, fieldPath, LIST_OBJECT_DSC_COEFFICIENT, objectModels);
const models = result.fieldsList.map((fields) => {
const modelName = generateRandomModelName();
return {
type: 'object',
name: modelName,
fields: fields
// refFields: [items],
// refFieldPaths: [fieldPath]
};
});
const items = {
type: 'model',
models: modelNames.concat(lodash_1.default.map(models, 'name'))
};
return {
items: items,
objectModels: result.objectModels.concat(models)
};
}
const fieldType = coerceSimpleFieldTypes(itemTypes);
return fieldType
? {
items: { type: fieldType },
objectModels: objectModels
}
: null;
}
function consolidateObjectFieldsListWithOverlap(fieldsList, fieldPath, minCoefficient, objectModels) {
// to reduce fragmentation sort fields arrays by length to merge the larger objects with smaller objects first
let unmergedFieldsList = lodash_1.default.orderBy(fieldsList.slice(), ['length'], ['desc']);
const mergedFieldsList = [];
while (unmergedFieldsList.length > 0) {
const fields = unmergedFieldsList.shift();
const result = mergeSimilarFields(fields, unmergedFieldsList, fieldPath, objectModels, 'overlap', minCoefficient);
unmergedFieldsList = result.unmergedFieldsList;
mergedFieldsList.push(result.mergedFields);
objectModels = result.objectModels;
}
return {
fieldsList: mergedFieldsList,
objectModels: objectModels
};
}
function mergeSimilarFields(fields, fieldsList, fieldPath, objectModels, type, minCoefficient) {
const unmergedFieldsList = [];
const mergedIndexes = [];
const unmergedIndexes = [];
const similarityFunction = type === 'dsc' ? computeDSC : computeOverlap;
let mergedFields = fields;
for (let i = 0; i < fieldsList.length; i++) {
const otherFields = fieldsList[i];
const dscCoefficient = similarityFunction(mergedFields, otherFields);
// TODO: check if intersected fields have same types, otherwise don't try to merge
if (dscCoefficient >= minCoefficient) {
const result = mergeObjectFieldsList([mergedFields, otherFields], fieldPath, objectModels);
if (result) {
mergedIndexes.push(i);
mergedFields = result.fields;
objectModels = result.objectModels;
}
else {
unmergedIndexes.push(i);
unmergedFieldsList.push(otherFields);
}
}
else {
unmergedIndexes.push(i);
unmergedFieldsList.push(otherFields);
}
}
return {
mergedFields,
unmergedFieldsList,
mergedIndexes,
unmergedIndexes,
objectModels
};
}
/**
* https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
*
* The Sørensen–Dice coefficient gives better values than Jaccard index when
* more elements in the two sets match in relation to sets sizes
*
* @param fieldsA
* @param fieldsB
*/
function computeDSC(fieldsA, fieldsB) {
const setA = getFieldsSet(fieldsA);
const setB = getFieldsSet(fieldsB);
return (2 * lodash_1.default.intersection(setA, setB).length) / (setA.length + setB.length);
}
/**
* https://en.wikipedia.org/wiki/Overlap_coefficient
*
* The Overlap coefficient gives higher values than the Sørensen–Dice coefficient
* if one set is a subset of another
*
* @param fieldsA
* @param fieldsB
*/
function computeOverlap(fieldsA, fieldsB) {
const setA = getFieldsSet(fieldsA);
const setB = getFieldsSet(fieldsB);
return lodash_1.default.intersection(setA, setB).length / Math.min(setA.length, setB.length);
}
function getFieldsSet(fields) {
// TODO: 'unknown' field type can be anything
// TODO: do we even need to include type when computing similarity?
return lodash_1.default.map(fields, (field) => {
// const type = STRING_TYPES.includes(field.type) ? 'string' : field.type;
// return `${field.name}:${type}`;
return field.name;
});
}
function mergeObjectFieldsList(fieldsList, fieldPath, objectModels) {
const fieldsByName = lodash_1.default.groupBy(lodash_1.default.flatten(fieldsList), 'name');
const fieldNames = Object.keys(fieldsByName);
const consolidatedFields = [];
for (const fieldName of fieldNames) {
const fields = fieldsByName[fieldName];
const result = consolidateFields(fields, fieldName, fieldPath.concat(fieldName), objectModels);
// if one of the fields cannot be consolidated, then the object cannot be consolidated as well
if (!result) {
return null;
}
objectModels = result.objectModels;
consolidatedFields.push(lodash_1.default.defaults({
type: result.field.type,
name: fieldName,
label: lodash_1.default.startCase(fieldName)
}, result.field));
}
return {
fields: consolidatedFields,
objectModels: objectModels
};
}
function consolidateFields(fields, fieldName, fieldPath, objectModels) {
if (fields.length === 1) {
const field = fields[0];
return {
field: field,
objectModels
};
}
let fieldTypes = lodash_1.default.uniq(lodash_1.default.map(fields, 'type'));
// if field types have multiple types and one of them 'unknown', then we assume that 'unknown' type is actually
// one of the other field types. So we remove it in favor of other types.
if (fieldTypes.length > 1 && fieldTypes.includes('unknown')) {
fieldTypes = lodash_1.default.without(fieldTypes, 'unknown');
}
if (fieldTypes.length === 1) {
const type = fieldTypes[0];
// handle fields with extra properties
switch (type) {
case 'unknown': {
return {
field: {
type: 'unknown',
name: fieldName
},
objectModels
};
}
case 'number': {
const subtypes = lodash_1.default.compact(lodash_1.default.uniq(lodash_1.default.map(fields, 'subtype')));
const subtype = subtypes.length === 1 ? subtypes[0] : 'float';
return {
field: {
type: 'number',
name: fieldName,
...(subtype && { subtype })
},
objectModels
};
}
case 'object': {
const objectWithUnknownFields = lodash_1.default.filter(fields, analyze_schema_types_1.isObjectWithUnknownField);
const fieldsWithUnknownList = lodash_1.default.compact(lodash_1.default.map(objectWithUnknownFields, (field) => field.fields));
const mergeResult = mergeObjectFieldsList(fieldsWithUnknownList, fieldPath, objectModels);
if (!mergeResult) {
return null;
}
return {
field: {
type: 'object',
name: fieldName,
fields: mergeResult.fields
},
objectModels: mergeResult.objectModels
};
}
case 'list': {
const listWithUnknownFields = lodash_1.default.filter(fields, analyze_schema_types_1.isListWithUnknownField);
const listItemsWithUnknownArr = lodash_1.default.compact(lodash_1.default.map(listWithUnknownFields, (field) => field.items));
const itemsResult = consolidateListItems(listItemsWithUnknownArr, fieldPath, objectModels);
if (!itemsResult) {
return null;
}
return {
field: {
type: 'list',
name: fieldName,
items: itemsResult.items
},
objectModels: itemsResult.objectModels
};
}
case 'enum':
case 'model': // we don't produce 'model' fields as direct child of 'object' fields, only as list items
case 'reference':
case 'cross-reference':
// these cases cannot happen because we don't generate these fields,
return null;
default:
return {
field: { type, name: fieldName },
objectModels
};
}
}
const fieldType = coerceSimpleFieldTypes(fieldTypes);
return fieldType
? {
field: { type: fieldType, name: fieldName },
objectModels
}
: null;
}
function coerceSimpleFieldTypes(fieldTypes) {
if (lodash_1.default.isEmpty(lodash_1.default.difference(fieldTypes, ['date', 'datetime']))) {
return 'datetime';
}
// use markdown as the most specific type
if (fieldTypes.includes('markdown') && lodash_1.default.isEmpty(lodash_1.default.difference(fieldTypes, ['string', 'text', 'markdown', 'unknown']))) {
return 'markdown';
}
// use text as the most specific type
if (fieldTypes.includes('text') && lodash_1.default.isEmpty(lodash_1.default.difference(fieldTypes, ['string', 'text', 'unknown']))) {
return 'text';
}
// use string if all other types can be derived from it
if (lodash_1.default.every(fieldTypes, (fieldType) => STRING_TYPES.concat('unknown').includes(fieldType))) {
return 'string';
}
return null;
}
function generateRandomModelName(length = 10) {
const result = [];
const characters = 'abcdefghijklmnopqrstuvwxyz0123456789';
const charactersLength = characters.length;
for (let i = 0; i < length; i++) {
result.push(characters.charAt(Math.floor(Math.random() * charactersLength)));
}
return 'object_' + result.join('');
}
function mergeSimilarPageModels(pageModels, objectModels, similarityCoefficient) {
let unmergedPageModels = pageModels.slice();
let mergedPageModels = [];
while (unmergedPageModels.length > 0) {
let pageModel = unmergedPageModels.shift();
// merge with merged models
let mergeResult = mergePageModelWithSimilarPageModels(pageModel, mergedPageModels, objectModels, similarityCoefficient);
// unmerged models of mergedModels need to go back mergedModels, otherwise we will get infinite recursion
mergedPageModels = mergeResult.unmergedPageModels;
pageModel = mergeResult.pageModel;
objectModels = mergeResult.objectModels;
// merge with unmerged models
mergeResult = mergePageModelWithSimilarPageModels(pageModel, unmergedPageModels, objectModels, similarityCoefficient);
unmergedPageModels = mergeResult.unmergedPageModels;
mergedPageModels.push(mergeResult.pageModel);
objectModels = mergeResult.objectModels;
}
return {
pageModels: mergedPageModels,
objectModels
};
}
function mergePageModelWithSimilarPageModels(pageModel, pageModels, objectModels, similarityCoefficient) {
return pageModels.reduce((accum, rightPageModel) => {
const leftPageModel = accum.pageModel;
if (leftPageModel.layout && rightPageModel.layout) {
if (leftPageModel.layout !== rightPageModel.layout) {
// do not merge page models with different layouts
accum.unmergedPageModels.push(rightPageModel);
return accum;
}
else {
const mergeResult = mergeObjectFieldsList([leftPageModel.fields, rightPageModel.fields], [leftPageModel.name], accum.objectModels);
if (mergeResult) {
accum.objectModels = mergeResult.objectModels;
accum.pageModel.fields = mergeResult.fields;
accum.pageModel.filePaths = accum.pageModel.filePaths.concat(rightPageModel.filePaths);
return accum;
}
else {
accum.unmergedPageModels.push(rightPageModel);
return accum;
}
}
}
else {
const dscCoefficient = computeDSC(leftPageModel.fields, rightPageModel.fields);
if (dscCoefficient >= similarityCoefficient) {
const mergeResult = mergeObjectFieldsList([leftPageModel.fields, rightPageModel.fields], [leftPageModel.name], accum.objectModels);
if (mergeResult) {
accum.objectModels = mergeResult.objectModels;
accum.pageModel.fields = mergeResult.fields;
accum.pageModel.filePaths = accum.pageModel.filePaths.concat(rightPageModel.filePaths);
if (rightPageModel.layout) {
accum.pageModel.layout = rightPageModel.layout;
}
return accum;
}
}
accum.unmergedPageModels.push(rightPageModel);
return accum;
}
}, {
pageModel,
objectModels,
unmergedPageModels: []
});
}
function analyzePageFileMatchingProperties(partialPageModels) {
let pageCount = 1;
partialPageModels = lodash_1.default.map(partialPageModels, (partialPageModel) => {
const folder = findLowestCommonAncestorFolder(partialPageModel.filePaths);
const sameFolder = allFilePathInSameFolder(partialPageModel.filePaths);
let modelName;
if (folder !== '') {
modelName = getModelNameFromFilePath(folder);
}
else {
modelName = `page_${pageCount++}`;
}
return {
type: 'page',
name: modelName,
folder: folder,
match: sameFolder ? '*' : '**/*',
fields: partialPageModel.fields,
filePaths: partialPageModel.filePaths
};
});
const pageModels = [];
for (let index = 0; index < partialPageModels.length; index++) {
const model = partialPageModels[index];
const otherModels = partialPageModels.slice();
otherModels.splice(index, 1);
const glob = (model.folder ? model.folder + '/' : '') + model.match;
const otherFiles = lodash_1.default.flatten(lodash_1.default.map(otherModels, 'filePaths'));
const otherNames = lodash_1.default.map(pageModels, 'name');
const modelName = getUniqueName(model.name, otherNames);
const otherModelMatchedFiles = micromatch_1.default.match(otherFiles, glob);
let match = model.match;
let exclude = [];
if (otherModelMatchedFiles.length > 1) {
match = lodash_1.default.map(model.filePaths, (filePath) => path_1.default.relative(model.folder || '', filePath));
}
else if (otherModelMatchedFiles.length === 1) {
exclude = lodash_1.default.map(otherModelMatchedFiles, (filePath) => path_1.default.relative(model.folder || '', filePath));
}
pageModels.push({
type: 'page',
name: modelName,
label: lodash_1.default.startCase(modelName),
...(model.folder && { folder: model.folder }),
match: match,
...(!lodash_1.default.isEmpty(exclude) && { exclude }),
fields: model.fields
});
}
return pageModels;
}
function analyzeDataFileMatchingProperties(partialDataModels) {
const dataModels = [];
let dataCount = 1;
for (let index = 0; index < partialDataModels.length; index++) {
const dataModelWithFilePaths = partialDataModels[index];
const otherModels = partialDataModels.slice();
otherModels.splice(index, 1);
if (dataModelWithFilePaths.filePaths.length === 1) {
const pathObject = path_1.default.parse(dataModelWithFilePaths.filePaths[0]);
const otherNames = lodash_1.default.map(dataModels, 'name');
const modelName = getUniqueName(lodash_1.default.snakeCase(pathObject.name), otherNames);
const modelLabel = lodash_1.default.startCase(modelName);
const itemsOrFields = removeUnknownTypesFromDataModel(dataModelWithFilePaths);
if (!itemsOrFields) {
continue;
}
dataModels.push({
type: 'data',
name: modelName,
label: modelLabel,
file: dataModelWithFilePaths.filePaths[0],
...itemsOrFields
});
}
else {
const folder = findLowestCommonAncestorFolder(dataModelWithFilePaths.filePaths);
let modelName;
if (folder !== '') {
modelName = getModelNameFromFilePath(folder);
}
else {
modelName = `data_${dataCount++}`;
}
const otherNames = lodash_1.default.map(dataModels, 'name');
modelName = getUniqueName(modelName, otherNames);
const modelLabel = lodash_1.default.startCase(modelName);
const itemsOrFields = removeUnknownTypesFromDataModel(dataModelWithFilePaths);
if (!itemsOrFields) {
continue;
}
dataModels.push({
type: 'data',
name: modelName,
label: modelLabel,
folder: folder,
...itemsOrFields
});
}
}
return dataModels;
}
function removeUnknownTypesFromDataModel(partialDataModel) {
if (partialDataModel.isList && partialDataModel.items) {
const items = removeUnknownTypesFromListItem(partialDataModel.items);
if (items) {
return { isList: true, items };
}
}
else {
const fields = removeUnknownTypesFromFields(partialDataModel.fields);
if (!lodash_1.default.isEmpty(fields)) {
return { fields };
}
}
return null;
}
function removeUnknownTypesFromFields(fields) {
return lodash_1.default.reduce(fields, (accum, field) => {
switch (field.type) {
case 'unknown': {
return accum;
}
case 'object': {
const fields = removeUnknownTypesFromFields(field.fields);
if (lodash_1.default.isEmpty(fields)) {
return accum;
}
return accum.concat(Object.assign(field, { fields }));
}
case 'list': {
const { items: itemsWithUnknown } = field;
if (!itemsWithUnknown) {
return accum;
}
const items = removeUnknownTypesFromListItem(itemsWithUnknown);
if (!items) {
return accum;
}
return accum.concat({ ...field, items });
}
default: {
return accum.concat(field);
}
}
}, []);
}
function removeUnknownTypesFromListItem(items) {
if (items.type === 'unknown') {
return null;
}
else if (items.type === 'object') {
const fields = removeUnknownTypesFromFields(items.fields);
if (lodash_1.default.isEmpty(fields)) {
return null;
}
return Object.assign(items, { fields });
}
return items;
}
function getLowestCommonAncestorFolderFromModels(models) {
let commonDir = null;
for (const model of models) {
let dir;
if (model.file) {
dir = path_1.default.parse(model.file).dir;
}
else if (model.folder) {
dir = model.folder;
}
else {
dir = '';
}
dir = dir.split(path_1.default.sep);
if (commonDir === null) {
commonDir = dir;
}
else {
const common = [];
let j = 0;
while (j < commonDir.length && j < dir.length && commonDir[j] === dir[j]) {
common.push(commonDir[j]);
j++;
}
commonDir = common;
}
if (commonDir.length === 0 || (commonDir.length === 1 && commonDir[0] === '')) {
break;
}
}
return commonDir === null ? '' : commonDir.join(path_1.default.sep);
}
function adjustModelsWithLowestCommonAncestor(models, lowestCommonAncestorDir) {
return lodash_1.default.map(models, (model) => {
if (model.file) {
return Object.assign(model, {
file: path_1.default.relative(lowestCommonAncestorDir, model.file)
});
}
else {
const folder = path_1.default.relative(lowestCommonAncestorDir, model.folder);
if (folder) {
return Object.assign(model, {
folder: folder
});
}
else {
return lodash_1.default.omit(model, 'folder');
}
}
});
}
function findLowestCommonAncestorFolder(filePaths) {
if (filePaths.length === 0) {
throw new Error('findLowestCommonAncestorFolder can not be called with empty array');
}
let commonDir = path_1.default.parse(filePaths[0]).dir;
if (commonDir === '') {
return '';
}
filePaths = filePaths.slice(1);
for (let i = 0; i < filePaths.length; i++) {
const dir = path_1.default.parse(filePaths[i]).dir;
if (dir === '') {
return '';
}
const commonDirParts = lodash_1.default.split(commonDir, path_1.default.sep);
const dirParts = lodash_1.default.split(dir, path_1.default.sep);
const common = [];
let j = 0;
while (j < commonDirParts.length && j < dirParts.length && commonDirParts[j] === dirParts[j]) {
common.push(commonDirParts[j]);
j++;
}
commonDir = common.join(path_1.default.sep);
if (commonDir === '') {
return commonDir;
}
}
return commonDir;
}
function allFilePathInSameFolder(filePaths) {
const folder = path_1.default.parse(filePaths[0]).dir;
filePaths = filePaths.slice(1);
return lodash_1.default.every(filePaths, (filePath) => path_1.default.parse(filePath).dir === folder);
}
function getUniqueName(name, otherNames) {
if (!otherNames.includes(name)) {
return name;
}
let idx = 1;
let altName = `${name}_${idx}`;
while (otherNames.includes(altName)) {
idx += 1;
altName = `${name}_${idx}`;
}
return altName;
}
function getModelNameFromFilePath(filePath) {
const lastPathPart = lodash_1.default.last(filePath.split(path_1.default.sep));
let modelName;
if (lodash_1.default.endsWith(lastPathPart, 's')) {
modelName = lastPathPart.substring(0, lastPathPart.length - 1);
}
else {
modelName = lastPathPart;
}
return lodash_1.default.snakeCase(modelName);
}
//# sourceMappingURL=schema-generator.js.map