UNPKG

@seasketch/geoprocessing

Version:

Geoprocessing and reporting framework for SeaSketch 2.0

622 lines (579 loc) • 19 kB
import { FeatureCollection, Feature, Polygon, MultiPolygon, BBox, } from "../types/index.js"; import Flatbush from "flatbush"; import Pbf from "pbf"; import geobuf from "geobuf"; import rbushDefault from "rbush"; import mnemonist from "mnemonist"; import { bbox, featureCollection as fc } from "@turf/turf"; import isHostedOnLambda from "./isHostedOnLambda.js"; import { union } from "union-subdivided-polygons"; import { defaultImport } from "default-import"; const RBush = await defaultImport(rbushDefault); // import { recombineTree } from "./recombine"; export interface VectorFeature extends Feature<Polygon | MultiPolygon> { // always set by VectorDataSource bbox: BBox; } const getBBox = (feature: Feature) => { return feature.bbox || bbox(feature); }; export interface VectorDataSourceDetails { options: VectorDataSourceOptions; url: string; } let sources: VectorDataSourceDetails[] = []; export interface VectorDataSourceOptions { /** * Max number of feature bundles to keep in memory. * Calls to .fetch() will not return more than the contents these bundles, so * this acts as an effective limit on subsequent analysis. * @type {number} * @default 250 * @memberof VectorDataSourceOptions */ cacheSize: number; /** * Source will only preload bundles when the bounding box provided to hint() * contains less than hintPrefetchLimit bundles. * @type {number} * @default 8 * @memberof VectorDataSourceOptions */ hintPrefetchLimit: number; /** * When features are requested by fetch, bundled features with matching * union_id will be dissolved into a single feature. This dissolved feature is * expensive to create and so may be cached. A cache may contain more bundles * than needed, and this variable sets a cap on that number. * * @type {number} * @default 3 * @memberof VectorDataSourceOptions */ dissolvedFeatureCacheExcessLimit: number; } export const DEFAULTS: VectorDataSourceOptions = { cacheSize: 250, hintPrefetchLimit: 8, dissolvedFeatureCacheExcessLimit: 3, }; interface DataSourceMetadata { name: string; project: string; homepage: string; version: number; description: string; index: IndexSource; compositeIndexes: CompositeIndexSource[]; } interface IndexSource { length: number; bytes: number; location: string; rootDir: string; } interface CompositeIndexSource extends IndexSource { bbox: BBox; offset: number; } interface PendingRequest { promise: Promise<any>; abortController: AbortController; priority: "low" | "high"; } interface DissolvedFeatureCache { feature: Feature<Polygon | MultiPolygon>; bundleIds: { [key: string]: boolean }; } export interface FeatureTree { fid: number; root: Node; } export interface Node { nodeId: number; leaf?: VectorFeature; ancestors: number[]; cutline?: number; children?: Node[]; } class RBushIndex extends RBush<Feature> { toBBox(feature: Feature) { const [minX, minY, maxX, maxY] = feature.bbox!; return { minX, minY, maxX, maxY }; } compareMinX(a: Feature, b: Feature) { return a.bbox![0] - b.bbox![0]; } compareMinY(a: Feature, b: Feature) { return a.bbox![1] - b.bbox![1]; } } export class VectorDataSource<T extends Feature<Polygon | MultiPolygon>> { options: VectorDataSourceOptions; metadata?: DataSourceMetadata; private url: string; private initPromise?: Promise<void>; private initError?: Error; private bundleIndex?: Flatbush; private pendingRequests: Map<string, PendingRequest>; private cache: mnemonist.LRUCache<number, FeatureCollection>; private tree: RBushIndex; private dissolvedFeatureCache?: DissolvedFeatureCache; private needsRewinding?: boolean; private metadataFetched: boolean = false; /** * VectorDataSource aids client-side or lambda based geoprocessing tools fetch * data from binned static vector sources generated by @seasketch/datasources * commands. * * @param {string} url * @param {VectorDataSourceOptions} options * @memberof VectorDataSource */ constructor(url: string, options: Partial<VectorDataSourceOptions> = {}) { this.options = { ...DEFAULTS, ...options }; this.url = url.replace(/\/$/, ""); this.pendingRequests = new Map(); this.cache = new mnemonist.LRUCache( Uint32Array, Array, this.options.cacheSize, ); this.tree = new RBushIndex(); sources.push({ url: this.url, options: this.options, }); } static clearRegisteredSources() { sources = []; } static getRegisteredSources() { return sources; } private async fetchMetadata() { if (this.metadata && this.bundleIndex) { return; } else { delete this.initError; const metadataUrl = this.url + "/metadata.json"; return fetch(metadataUrl) .then((r) => r.json().then(async (metadata: DataSourceMetadata) => { this.metadata = metadata; await this.fetchBundleIndex(); return; }), ) .catch((error) => { // It's easier to deal with these errors at the point of use later, // rather than as a side-effect of instantiation. Otherwise it's easy // to run into unhandled promise exceptions or rejections // The identifyBundles method will check for initError console.error(error); this.initError = new Error( `Problem fetching VectorDataSource manifest from ${metadataUrl}`, ); }); } } private async fetchBundleIndex(): Promise<Flatbush> { // for now, prefer the entire index if (this.bundleIndex) { return this.bundleIndex; } if (!this.metadata) { throw new Error("Metadata not yet fetched"); } const i = this.metadata.index; if (!i) { throw new Error(`Expected "entire" index not found in manifest`); } let data; try { const response = await fetch(this.url + i.location); data = await response.arrayBuffer(); } catch (error) { console.error(error); throw new Error( `Problem fetching or parsing index data at ${i.location}`, ); } this.bundleIndex = Flatbush.from(data); return this.bundleIndex; } private async identifyBundles(bbox: BBox) { await this.fetchMetadata(); // It's easier to deal with these errors at the point of use, rather than // as a side-effect of instantiation. Otherwise it's easy to run into // unhandled promise exceptions or rejections if (this.initError) { throw this.initError; } // this will have to be more complex to accomadate nested indicies return this.bundleIndex!.search(bbox[0], bbox[1], bbox[2], bbox[3]); } async fetchBundle( id: number, priority: "low" | "high" = "low", ): Promise<FeatureCollection> { const key = id.toString(); const existingRequest = this.pendingRequests.get(key); const bundle = this.cache.get(id); if (bundle) { // debug(`Found bundle ${id}.proto in cache`); return bundle; } else if (existingRequest) { // debug(`Found bundle ${id}.proto request in progress`); return existingRequest.promise; // already fetched and processed bundle // return Promise.resolve("existing features"); } else { // start fetching and processing const url = `${this.url}${this.metadata?.index.rootDir}/${id}.pbf`; // debug(`Fetching bundle ${url}`); const abortController = new AbortController(); const promise: Promise<any> = fetch(url, { signal: abortController.signal, }) .then((r) => { if (abortController.signal.aborted) { throw new DOMException("Aborted", "AbortError"); } if (!r.ok) { this.pendingRequests.delete(key); throw new Error(`Problem fetching datasource bundle at ${url}`); } return r.arrayBuffer(); }) .then((arrayBuffer) => { if (abortController.signal.aborted) { throw new DOMException("Aborted", "AbortError"); } const geojson = geobuf.decode( new Pbf(arrayBuffer), ) as FeatureCollection; // if (this.needsRewinding === undefined) { // let ring: Position[]; // if (geojson.features[0].geometry.type === "MultiPolygon") { // ring = geojson.features[0].geometry.coordinates[0][0]; // } else if (geojson.features[0].geometry.type === "Polygon") { // ring = geojson.features[0].geometry.coordinates[0]; // } // this.needsRewinding = geojsonArea.ring(ring!) >= 0; // } // add to bundle cache const popped = this.cache.setpop(id, geojson); if (popped && popped.evicted) { // debug(`Evicting ${popped.key}.proto from cache.`); this.removeFeaturesFromIndex(popped.value.features); } // add individual features to spatial index // debug(`Adding features from ${key}.proto to spatial index`); for (const feature of geojson.features) { if (!feature.bbox) { feature.bbox = getBBox(feature); } feature.properties = feature.properties || {}; feature.properties._url = url; this.tree.insert(feature); } this.pendingRequests.delete(key); return geojson; }) .finally(() => { // Make sure this is always run this.pendingRequests.delete(key); }) .catch((error) => { this.pendingRequests.delete(key); if (error.name === "AbortError") { // do nothing. fetch aborted } else { throw error; } }); this.pendingRequests.set(key, { abortController, promise, priority, }); return promise; } } private async removeFeaturesFromIndex(features: Array<Feature>) { for (const feature of features) { this.tree.remove(feature); } } async clear() { this.tree.clear(); for (const key of this.pendingRequests.keys()) { const { abortController } = this.pendingRequests.get(key)!; abortController.abort(); this.pendingRequests.delete(key); } this.cache.clear(); } private cancelLowPriorityRequests(ignore: Array<number>) { for (const key of this.pendingRequests.keys()) { if (!ignore.includes(Number.parseInt(key))) { const { abortController, priority } = this.pendingRequests.get(key)!; if (priority === "low") { // debug(`Cancelling reqest for ${key}.proto`); abortController.abort(); this.pendingRequests.delete(key); } } } } /** * Triggers downloading of indexes and bundles for the defined extent. Bundle * data will only be downloaded if the number of bundles within the extent is * less than options.hintPrefetchLimit. * * An ideal use-case for this method is to update the datasource whenever a * user pans a web map in anticipation of using this source. * * @param {number} xmin * @param {number} ymin * @param {number} xmax * @param {number} ymax * @returns {Promise<void>} Resolves when all requests are complete * @memberof VectorDataSource */ async hint(bbox: BBox): Promise<void> { // TODO: fetch any indexes needed if using nested indexes // this.prefetchIndicies(xmin, ymin, xmax, ymax); const bundleIds = await this.identifyBundles(bbox); this.cancelLowPriorityRequests(bundleIds); if (bundleIds.length <= this.options.hintPrefetchLimit) { // debug(`hint() identified ${bundleIds.length} bundles`); return Promise.all(bundleIds.map((id) => this.fetchBundle(id))).then( () => { return; }, ); } else { // debug(`hint() identified no bundles`); Promise.resolve(); } } /** * Prefetch bundles for the given extent. If a Feature is provided, those * bundles that overlap will be prioritized for download first. * * This operation is *not* effected by `hintPrefetchLimit`. It's best used in * situations where the datasource will be used for analysis in the immediate * future. For example, when a user has started to draw a feature of interest * which will be overlaid. * * @param {number} minX * @param {number} minY * @param {number} maxX * @param {number} maxY * @param {Feature} [feature] * @returns {Promise<void>} * @memberof VectorDataSource */ async prefetch(bbox: BBox, feature?: Feature): Promise<void> { // TODO: fetch any indexes needed if using nested indexes // this.prefetchIndicies(xmin, ymin, xmax, ymax); let bundleIds = await this.identifyBundles(bbox); if (feature) { // Start with overlapping bundles, then ids of all other bundles in the // extent const overlapping = await this.identifyBundles(getBBox(feature)); for (const id of bundleIds) { if (!overlapping.includes(id)) { overlapping.push(id); } } bundleIds = overlapping; } this.cancelLowPriorityRequests(bundleIds); return Promise.all( bundleIds .slice(0, this.options.cacheSize) .map((id) => this.fetchBundle(id)), ).then(() => { // const features = this.tree.search({ // minX: bbox[0], // minY: bbox[1], // maxX: bbox[2], // maxY: bbox[3], // }); // this.preprocess(features); return; }); } /** * Fetches bundles of features within bbox * @param bbox * @returns */ async fetch(bbox: BBox): Promise<T[]> { const bundleIds = await this.identifyBundles(bbox); this.cancelLowPriorityRequests(bundleIds); if (isHostedOnLambda) { console.time(`Fetch ${bundleIds.length} bundles from ${this.url}`); } await Promise.all( bundleIds .slice(0, this.options.cacheSize) .map((id) => this.fetchBundle(id, "high")), ); if (isHostedOnLambda) { console.timeEnd(`Fetch ${bundleIds.length} bundles from ${this.url}`); } // console.time("retrieval and processing"); // debug(`Searching index`, bbox); const features = this.tree.search({ minX: bbox[0], minY: bbox[1], maxX: bbox[2], maxY: bbox[3], }) as unknown as T[]; // remove extra with overlap test since bundles sometimes aren't entirely well packed const a = bbox; return features.filter(() => { const b = bbox; return a[2] >= b[0] && b[2] >= a[0] && a[3] >= b[1] && b[3] >= a[1]; }); } /** * Fetches bundles of subdivided Polygon or MultiPolygon features within bbox and merges * them back into their original features. Merge performance is faster if passed an * additional unionProperty, a property that exists in each subdivided feature. */ async fetchUnion( bbox: BBox, unionProperty?: string, ): Promise<FeatureCollection<T["geometry"], T["properties"]>> { if (!this.metadataFetched) { this.fetchMetadata(); } const features = await this.fetch(bbox); if (features.length === 0) { return fc([]); } else { return union( fc(features as unknown as Feature<Polygon | MultiPolygon>[]), unionProperty || undefined, ); } } private buildTrees(features: Feature[]): FeatureTree[] { // console.time("buildTrees"); const trees: FeatureTree[] = []; const featuresById: { [key: number]: Feature[] } = {}; // Group features by _id. Each subdivided feature needs it's own tree for (const feature of features) { if ( feature.properties && feature.properties._ancestors && feature.properties._id ) { if (!(feature.properties._id in featuresById)) { featuresById[feature.properties._id] = []; } featuresById[feature.properties._id].push(feature); } } let nodeId = 0; for (const _id in featuresById) { const features = featuresById[_id]; const nodes: Node[] = features.map((f) => { return { nodeId: nodeId++, leaf: f as VectorFeature, ancestors: (f.properties ? f.properties._ancestors || "" : "") .split(",") .map((a: string) => Number.parseFloat(a)) .reverse(), }; }); trees.push({ fid: Number.parseInt(_id), root: this.createAncestors(nodes).children![0], }); } // console.timeEnd("buildTrees"); return trees; } private createAncestors(nodes: Node[]): Node { let nodeId = 0; // Get node ancestors and sort by deepness nodes.sort((a, b) => b.ancestors.length - a.ancestors.length).reverse(); const populateChildren = (node: Node, children: Node[]) => { if (children.length === 0) { return node; } children.sort((a, b) => a.ancestors[0] - b.ancestors[0]); // group children by their next ancestor const groups: { [key: string]: Node[] } = {}; for (const child of children) { const key = (child.ancestors[0] || "").toString(); if (!groups[key]) { groups[key] = []; } groups[key].push(child); } // for each group, push a new node onto the node's children for (const key in groups) { const cutline = groups[key][0].ancestors[0]; for (const n of groups[key]) n.ancestors = n.ancestors.slice(1); if (cutline) { (node.children as Node[]).push( populateChildren( { nodeId: nodeId++, cutline, ancestors: [...node.ancestors, cutline], children: [], }, groups[key], ), ); } else { (node.children as Node[]) = groups[key].map((n) => ({ nodeId: nodeId++, leaf: n.leaf, ancestors: node.ancestors, })); } } return node; }; const rootNode = { cutline: nodes[0].ancestors[0], children: [], ancestors: [], nodeId: nodeId++, }; populateChildren(rootNode, nodes); const pruneSingleNodedChildren = (root: Node) => { for (const child of root.children || []) { pruneSingleNodedChildren(child); } if (root.children && root.children.length === 1) { root.cutline = root.children[0].cutline; root.children = root.children[0].children; } }; for (const child of rootNode.children) { pruneSingleNodedChildren(child); } // pruneSingleNodedChildren(rootNode); return rootNode; } async fetchOverlapping(feature: Feature) { return this.fetch(getBBox(feature)); } }