terriajs
Version:
Geospatial data visualization platform.
442 lines (385 loc) • 14.3 kB
text/typescript
import Bottleneck from "bottleneck";
import { shuffle } from "lodash-es";
import fs from "node:fs";
import { join, parse, dirname } from "node:path";
import TerriaError from "../lib/Core/TerriaError";
import filterOutUndefined from "../lib/Core/filterOutUndefined";
import timeout from "../lib/Core/timeout";
import CatalogMemberMixin, {
getName
} from "../lib/ModelMixins/CatalogMemberMixin";
import GroupMixin from "../lib/ModelMixins/GroupMixin";
import MappableMixin from "../lib/ModelMixins/MappableMixin";
import ReferenceMixin from "../lib/ModelMixins/ReferenceMixin";
import CatalogGroup from "../lib/Models/Catalog/CatalogGroup";
import CkanItemReference from "../lib/Models/Catalog/Ckan/CkanItemReference";
import registerCatalogMembers from "../lib/Models/Catalog/registerCatalogMembers";
import { BaseModel } from "../lib/Models/Definition/Model";
import hasTraits from "../lib/Models/Definition/hasTraits";
import { CatalogIndexFile } from "../lib/Models/SearchProviders/CatalogIndex";
import registerSearchProviders from "../lib/Models/SearchProviders/registerSearchProviders";
import Terria from "../lib/Models/Terria";
import CatalogMemberReferenceTraits from "../lib/Traits/TraitsClasses/CatalogMemberReferenceTraits";
import patchNetworkRequests from "./patchNetworkRequests";
import { program } from "commander";
const writeFileSync = (...args: Parameters<typeof fs.writeFileSync>) => {
const path = args[0];
const dir = dirname(path.toString());
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(...args);
};
/** Add model to index */
function indexModel(
terria: Terria,
index: CatalogIndexFile,
member: CatalogMemberMixin.Instance | GroupMixin.Instance
) {
if (
member.uniqueId &&
member.uniqueId !== "/" &&
member.uniqueId !== "__User-Added_Data__"
) {
const name = getName(member);
const nameInCatalog = CatalogMemberMixin.isMixedInto(member)
? member.nameInCatalog
: undefined;
// eslint-disable-next-line prefer-const
let description = "";
// Remove description from CatalogIndex - as it makes files too large
// if (CatalogMemberMixin.isMixedInto(member)) {
// description =
// member.description +
// "\n" +
// member.info
// .map(i => i.content)
// .filter(c => c)
// .join("\n");
// }
const shareKeys = terria.modelIdShareKeysMap.get(member.uniqueId);
// If model isn't already in index - create it
if (!index[member.uniqueId]) {
index[member.uniqueId] = {
name,
nameInCatalog: nameInCatalog !== name ? nameInCatalog : undefined,
description: description || undefined,
memberKnownContainerUniqueIds: [...member.knownContainerUniqueIds], // clone array
isGroup: GroupMixin.isMixedInto(member) ? true : undefined,
isMappable: MappableMixin.isMixedInto(member) ? true : undefined,
shareKeys:
shareKeys && shareKeys.length > 0 ? [...shareKeys] : undefined // clone array
};
// If model IS already in index - see if more info can be added
// Merge shareKeys and memberKnownContainerUniqueIds
//
} else {
const mergedShareKeys = Array.from(
new Set([
...(shareKeys ?? []),
...(index[member.uniqueId].shareKeys ?? [])
])
);
index[member.uniqueId].shareKeys =
mergedShareKeys && mergedShareKeys.length > 0
? mergedShareKeys
: undefined;
const mergedContainerIds = Array.from(
new Set([
...member.knownContainerUniqueIds,
...(index[member.uniqueId].memberKnownContainerUniqueIds ?? [])
])
);
index[member.uniqueId].memberKnownContainerUniqueIds = mergedContainerIds;
}
}
}
/** Gets full path of member */
function getPath(terria: Terria, member: BaseModel | undefined): string {
return filterOutUndefined([
...[
member?.knownContainerUniqueIds.map((id) =>
getPath(terria, terria.getModelById(BaseModel, id))
)
].reverse(),
member?.uniqueId
]).join("/");
}
async function loadGroup(
terria: Terria,
group: GroupMixin.Instance,
errors: TerriaError[]
) {
const name = getName(group);
const path = getPath(terria, group);
console.log(`Loading Group ${name} (${path})`);
const result = await group.loadMembers();
result.logError(`FAILED to load GROUP ${name} (${path})`);
result.pushErrorTo(errors, `FAILED to load GROUP ${name} (${path})`);
result.catchError((e) => console.error(e.toError().message));
}
async function loadReference(
terria: Terria,
reference: ReferenceMixin.Instance,
errors: TerriaError[]
) {
const name = getName(reference);
const path = getPath(terria, reference);
console.log(`Loading Reference ${name} (${path})`);
const result = await reference.loadReference();
result.logError(`FAILED to load Reference ${name} (${path})`);
result.pushErrorTo(errors, `FAILED to load Reference ${name} (${path})`);
result.catchError((e) => console.error(e.toError().message));
}
export default async function generateCatalogIndex(
configUrl: string,
baseUrl: string,
outPath: string | undefined,
speedString: string | undefined,
excludeIds: string[] | undefined,
basicAuth: string | undefined,
timeoutMs: number
) {
const debug = false;
let speed = speedString ? parseFloat(speedString) : 1;
if (speed < 1) speed = 1;
// Make sure baseURL has trailing slash
baseUrl = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
patchNetworkRequests(baseUrl, basicAuth, true);
console.log(`Config URL: ${configUrl}`);
// Limit load reference calls to 10 per second (on speed = 1)
const loadLimiter = new Bottleneck({
maxConcurrent: 10 * speed,
minTime: 100 / speed
});
let totalJobs = 0;
let completedJobs = 0;
const printStatus = () => {
console.log(
"\x1b[44m\x1b[37m%s\x1b[0m",
`${((completedJobs * 100) / (totalJobs || 1)).toPrecision(
3
)}% DONE - (${completedJobs}/${totalJobs || 1})`
);
};
const index: CatalogIndexFile = {};
const errors: TerriaError[] = [];
/** Recursively load all references and groups.
* This will add members to `index` after loading
*/
async function loadAndIndexMember(terria: Terria, member: BaseModel) {
let name = getName(member);
const path = getPath(terria, member);
if (member.uniqueId && excludeIds && excludeIds.includes(member.uniqueId)) {
console.log(`Excluding model \`${member.uniqueId}\`:"${name}" (${path}`);
return;
}
totalJobs++;
// Random priority between 3 and 9 for references/members
// Random priority between 0 and 5 for groups
// This is so we slightly priorities loading groups before references
// But we don't want to load all groups at once - as they can be very expensive network requests
const memberPriority = Math.round(Math.random() * 6) + 3;
const groupPriority = Math.round(Math.random() * 5);
// Load reference - this also handles nested references
while (ReferenceMixin.isMixedInto(member)) {
// We immediately de-reference CkanItemReferences with `_ckanDataset` - as they don't make any XHR
if (member instanceof CkanItemReference && member._ckanDataset) {
await loadReference(terria, member, errors);
}
// All other references are de-referenced through the queue
else {
try {
const priority =
hasTraits(member, CatalogMemberReferenceTraits, "isGroup") &&
member.isGroup
? groupPriority
: memberPriority;
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
debug
? console.log(
"\x1b[32m%s\x1b[0m",
`Adding Reference ${name} (${path})`
)
: null;
await timeout(Math.random() * 1000);
await loadLimiter.schedule(
{ expiration: timeoutMs, priority },
loadReference,
terria,
member,
errors
);
} catch (_timeout) {
errors.push(
TerriaError.from(
`TIMEOUT FAILED to load Reference ${name} (${path})`
)
);
console.error(`TIMEOUT FAILED to load Reference ${name}`);
}
}
if (member.target) {
member = member.target;
} else {
// Something has gone wrong
// After loading reference we don't have a target
// So break from while loop
break;
}
name = getName(member);
}
if (GroupMixin.isMixedInto(member)) {
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
debug
? console.log("\x1b[36m%s\x1b[0m", `Adding Group ${name} (${path})`)
: null;
// CatalogGroup can be loaded immediately
// Even though CatalogGroup doesn't have anything to load
// This needs to be called so GroupMixin.refreshKnownContainerUniqueIds is called
if (member instanceof CatalogGroup) {
await loadGroup(terria, member, errors);
} else {
try {
await timeout(Math.random() * 1000);
await loadLimiter.schedule(
{ expiration: timeoutMs, priority: groupPriority },
loadGroup,
terria,
member,
errors
);
} catch (_timeout) {
errors.push(
TerriaError.from(
`TIMEOUT FAILED to load GROUP ${name} (${path}) = ${groupPriority}`
)
);
console.error(`TIMEOUT FAILED to load GROUP ${name} (${path})`);
}
}
// Add catalog group to index (if it isn't empty)
// Note: this needs to happen after recursively loading group members - as after each member is loaded, the reference is removed.
// This would result in memberModels being empty!
if (member.memberModels.length > 0) indexModel(terria, index, member);
// Recursively load group members
await Promise.all(
shuffle(member.memberModels).map((child) => {
return loadAndIndexMember(terria, child);
})
);
} else if (CatalogMemberMixin.isMixedInto(member)) {
// Add catalog member to index
indexModel(terria, index, member);
}
// Remove model after it has been indexed
terria.removeModelReferences(member);
completedJobs++;
printStatus();
}
// Terria initialisation
const terriaOptions = {
baseUrl: "build/TerriaJS"
};
const terria = new Terria(terriaOptions);
registerCatalogMembers();
registerSearchProviders();
try {
terria.configParameters.serverConfigUrl = `${baseUrl}serverconfig`;
terria.configParameters.corsProxyBaseUrl = `${baseUrl}proxy/`;
await terria.start({ configUrl });
await terria.loadInitSources();
} catch (e) {
TerriaError.from(e, `Failed to initialise Terria`).log();
}
// Load group and references
// rootId can be set to change root group that is loaded for testing purposes
// If undefined, then terria root catalog group will be used
const rootId: string | undefined = undefined;
const model = rootId
? terria.getModelById(BaseModel, rootId)
: terria.catalog.group;
if (!model) throw new Error("No model to load");
await loadAndIndexMember(terria, model);
const outPathResolved = parse(outPath ?? "catalog-index.json");
// Sort index by ID (so we can compare them easier)
const sortedIndex = Object.keys(index)
.sort((a, b) => a.localeCompare(b))
.reduce<CatalogIndexFile>((acc, currentKey) => {
acc[currentKey] = index[currentKey];
return acc;
}, {});
// Save index to file
writeFileSync(outPath ?? "catalog-index.json", JSON.stringify(sortedIndex));
// Save errors to file
const terriaError = TerriaError.combine(errors, "Errors")?.toError();
if (terriaError?.stack) {
writeFileSync(
join(outPathResolved.dir, outPathResolved.name + "errors.json"),
terriaError.message
);
writeFileSync(
join(outPathResolved.dir, outPathResolved.name + "errors-stack.json"),
terriaError.stack
);
} else {
writeFileSync(
join(outPathResolved.dir, outPathResolved.name + "errors.json"),
"No errors"
);
writeFileSync(
join(outPathResolved.dir, outPathResolved.name + "errors-stack.json"),
"No errors"
);
}
}
program
.name("generateCatalogIndex")
.description(
`Generate catalog index (**experimental**)
This will "crawl" a terria JS catalog, load all groups and references and then create an "index" file which contains fully resolved tree of models.
Example usage
- node ./build/generateCatalogIndex.js -c http://localhost:3001/config.json -b http://localhost:3001/
`
)
.requiredOption("-c, --configUrl <configUrl>", "configUrl URL to map-config")
.requiredOption(
"-b, --baseUrl <baseUrl>",
"baseUrl will be used as:\n- `origin` property for CORS\n- URL for `serverConfig`\n- URL for `proxy`"
)
.option(
"-o, --outPath [outPath]",
"catalog-index JSON file path",
"catalog-index.json"
)
.option(
"--basicAuth [basicAuth]",
"basic auth token to add to requests which include `baseUrl` (or `proxy/`)"
)
.option(
"--excludeIds [ids...]",
'CSV of model IDs to exclude from catalog index (eg "some-id-1 some-id-2")'
)
.option(
"-s, --speed [speed]",
"speed will control number of concurrently loaded catalog groups/references:\n- default value is 1 (which is around 10 loads per second)\n- minimum value is 1\n- If speed = 10 - then expect around 100 loads per second\n- Note: loads are somewhat randomised across catalog, so you don't hit one server with many requests\n- Also note: one load may not equal one request. some groups/references do not make network requests",
parseFloat,
1
)
.option(
"-t, --timeout [timeout]",
"Network request timeout (in ms)",
parseFloat,
30000
);
program.parse();
const options = program.opts();
generateCatalogIndex(
options.configUrl,
options.baseUrl,
options.outPath,
options.speed,
options.excludeIds,
options.basicAuth,
options.timeout
);