@seasketch/geoprocessing
Version:
Geoprocessing and reporting framework for SeaSketch 2.0
274 lines (252 loc) • 9.15 kB
text/typescript
import { readDatasources } from "./datasources.js";
import {
Metric,
Datasource,
Geography,
VectorDatasource,
} from "../../../src/types/index.js";
import {
firstMatching,
isRasterDatasource,
isVectorDatasource,
} from "../../../src/datasources/index.js";
import ProjectClientBase from "../../../src/project/ProjectClientBase.js";
import { readGeographies } from "../geographies/geographies.js";
import { createOrUpdatePrecalcMetrics } from "./precalc.js";
import { precalcVectorDatasource } from "./precalcVectorDatasource.js";
import { precalcRasterDatasource } from "./precalcRasterDatasource.js";
import cloneDeep from "lodash/cloneDeep.js";
export interface PrecalcDatasourceOptions {
/** Alternative path to look for datasources than default if using internal.*/
newDatasourcePath?: string;
/** Alternative path to look for geographes than default. useful for testing */
newGeographyPath?: string;
/** Alternative path to store precalc data. useful for testing */
newPrecalcPath?: string;
/** Alternative dist path. useful for testing */
newDstPath?: string;
/** array of datasource ID's to precalc, for all geographies. Defaults to "*"" as matcher, which matches on all datasources */
datasourceMatcher?: string[];
/** array of geography ID's to precalc, for all datasources. Defaults to "*" as matcher, which matches on all geographies */
geographyMatcher?: string[];
/** Alternative port to fetch data from */
port?: number;
}
/**
* Precalc one or more datasources for a project, for one or more defined geographies
* one at a time, writing them out to disk as they complete
*/
export async function precalcDatasources<C extends ProjectClientBase>(
projectClient: C,
extraOptions: PrecalcDatasourceOptions = {},
): Promise<Metric[]> {
const {
newDatasourcePath,
newGeographyPath,
datasourceMatcher = ["*"],
geographyMatcher = ["*"],
} = extraOptions;
const allGeographies = await readGeographies(newGeographyPath);
// Start with no geographies to precalc. Matcher can specify all or some
let matchingGeographies: Geography[] = [];
if (geographyMatcher && geographyMatcher.length > 0) {
if (geographyMatcher.includes("*")) {
matchingGeographies = cloneDeep(allGeographies);
} else {
matchingGeographies = cloneDeep(allGeographies).filter((geog) =>
geographyMatcher.includes(geog.geographyId),
);
if (matchingGeographies.length === 0)
throw new Error(
`No matching geographies found for ${geographyMatcher}, exiting`,
);
}
}
const vectorDatasources: VectorDatasource[] = (await readDatasources(
newDatasourcePath,
).filter((ds) => isVectorDatasource(ds))) as VectorDatasource[];
const datasources = cloneDeep(await readDatasources(newDatasourcePath));
// Start with no datasources to precalc. Matcher can specify all (*) or some
let matchingDatasources: Datasource[] = [];
if (datasourceMatcher && datasourceMatcher.length > 0) {
if (datasourceMatcher.includes("*")) {
matchingDatasources = cloneDeep(datasources);
} else {
matchingDatasources = cloneDeep(datasources).filter((ds) =>
datasourceMatcher.includes(ds.datasourceId),
);
}
}
// Process one at a time
let failed = 0;
let skipped = 0;
let successfulDs = 0;
let successfulGs = 0;
let finalMetrics: Metric[] = [];
const processed = {}; // Track processed datasource/geography combinations to avoid duplicates
// console.log("vector (geog) datasources", vectorDatasources);
// console.log("datasources", datasources);
// console.log("matching datasources", matchingDatasources);
// console.log("all geographies", allGeographies);
// console.log("matching geographies", matchingGeographies);
// Run precalc on matching subset of datasources for all geographies
for (const ds of matchingDatasources) {
for (const geog of allGeographies) {
// Skip if either datasource or geography has precalc set to false
if (geog.precalc === false || ds.precalc === false) {
// console.log(
// `Precalc disabled for datasource ${ds.datasourceId} for geography ${geog.geographyId}`
// );
continue;
}
// Skip if already processed
if (processed[`${ds.datasourceId}-${geog.geographyId}`] === true) {
continue;
}
try {
console.log(
`Precalculating datasource ${ds.datasourceId} for geography ${geog.geographyId}`,
);
const geogDatasource = firstMatching(
vectorDatasources,
(item) => item.datasourceId === geog.datasourceId,
);
const metrics = await precalcMetrics(
projectClient,
ds,
geog,
geogDatasource,
extraOptions,
);
// console.log(ds.datasourceId, geog.geographyId, metrics);
finalMetrics = finalMetrics.concat(metrics);
console.log(" ");
successfulDs += 1;
processed[`${ds.datasourceId}-${geog.geographyId}`] = true;
} catch (error: unknown) {
if (error instanceof Error) {
console.log(error.message);
console.log(error.stack);
console.log(
`Updating precalc metrics for ${ds.datasourceId} failed, moving to next`,
);
failed += 1;
}
}
}
}
// Also run precalc on matching subset of geographies for all datasources, for completeness
for (const geog of matchingGeographies) {
for (const ds of datasources) {
if (geog.precalc === false || ds.precalc === false) {
// console.log(
// `Precalc disabled for datasource ${ds.datasourceId} + geography ${geog.geographyId}`
// );
skipped += 1;
continue;
}
// Skip if already processed
if (processed[`${ds.datasourceId}-${geog.geographyId}`] === true) {
continue;
}
try {
console.log(
`Precalculating datasource ${ds.datasourceId} + geography ${geog.geographyId}`,
);
const geogDatasource = firstMatching(
vectorDatasources,
(item) => item.datasourceId === geog.datasourceId,
);
const metrics = await precalcMetrics(
projectClient,
ds,
geog,
geogDatasource,
extraOptions,
);
finalMetrics = finalMetrics.concat(metrics);
console.log(`${ds.datasourceId} precalc complete`);
console.log(" ");
successfulGs += 1;
} catch (error: unknown) {
if (error instanceof Error) {
console.log(error.stack);
console.log(
`Precalculating metrics for datasource ${ds.datasourceId} + geography ${geog.geographyId} failed, moving to next`,
);
failed += 1;
}
}
}
}
const successful = successfulDs + successfulGs;
if (successful > 0)
console.log(
`${successfulDs} datasource/geography combinations precalculated successfully`,
);
if (skipped > 0)
console.log(
`${successfulDs} datasource/geography combinations skipped due to precalc disabled`,
);
if (successfulDs === 0 && successfulGs === 0 && skipped === 0) {
console.log(`No datasources or geographies found to precalculate`);
}
if (failed > 0) {
console.log(
`${failed} datasources failed to precalculate. Fix them and try again`,
);
}
return finalMetrics;
}
/**
* Precalculate metrics for datasource for given geography and write out to disk
*/
export const precalcMetrics = async (
projectClient: ProjectClientBase,
ds: Datasource,
geog: Geography,
geogDs: VectorDatasource,
extraOptions: PrecalcDatasourceOptions,
): Promise<Metric[]> => {
const { newPrecalcPath, newDstPath, port } = extraOptions;
// precalc if possible. If external datasource, then return nothing
const curMetrics = await (async () => {
if (isVectorDatasource(ds) && ds.geo_type === "vector") {
return await precalcVectorDatasource(projectClient, ds, geog, geogDs, {
newDstPath,
port,
});
} else if (isRasterDatasource(ds) && ds.geo_type === "raster") {
return await precalcRasterDatasource(projectClient, ds, geog, geogDs, {
newDstPath,
port,
});
} else {
console.log(`Skipping ${ds.datasourceId}, precalc not supported`);
return [];
}
})();
const staleMetricsFilterFn = staleMetricsFilterFnFactory(
ds.datasourceId,
geog.geographyId,
);
createOrUpdatePrecalcMetrics(curMetrics, {
matcher: staleMetricsFilterFn,
filePath: newPrecalcPath,
});
return curMetrics;
};
/**
* returns a function that will filter out metrics that don't match the geographyId or don't have a classId that starts with the datasourceId (total metrics)
*/
export const staleMetricsFilterFnFactory = (
datasourceId: string,
geographyId: string,
) => {
return (m: Metric) => {
return (
(!!m.classId && !m.classId.startsWith(datasourceId + "-")) ||
(!!m.geographyId && m.geographyId !== geographyId)
);
};
};