@uwdata/mosaic-sql
Version:
SQL query construction and analysis.
120 lines (113 loc) • 4.16 kB
text/typescript
import type { ExprValue } from '../types.js';
import { Query, type SelectQuery } from '../ast/query.js';
import { count, max, sum } from '../functions/aggregate.js';
import { int32 } from '../functions/cast.js';
import { abs, floor, greatest, round, sign } from '../functions/numeric.js';
import { add, div, gt, isNull, lt, lte, mul, or, sub } from '../functions/operators.js';
import { asc } from '../functions/order-by.js';
import { sql } from '../functions/sql-template-tag.js';
import { lead } from '../functions/window.js';
import { over } from '../util/ast.js';
/**
* Compute line segment densities over a gridded 2D domain. The returned
* query uses multiple subqueries (CTEs) to identify line segment end point
* pairs, perform line rasterization in-database, normalize arc lengths,
* and then sum results for all line series to produce a density map.
* Based on Moritz and Fisher's work: https://arxiv.org/abs/1808.06019
* @param q The base query over the data.
* @param x Bin expression for x dimension.
* Provides gridded x coordinates, potentially with a fractional component.
* @param y Bin expression for x dimension.
* Provides gridded y coordinates, potentially with a fractional component.
* @param z Group by columns that segment data into individual line
* series. An empty array indicates there is only a single line series.
* @param xn The number of grid bins for the x dimension.
* @param yn The number of grid bins for the y dimension.
* @param groupby Additional group by expressions. Separate
* line density maps are created for each of these groups.
* @param normalize Flag toggling approximate arc-length normalization to
* improve accuracy and reduce artifacts (default `true`).
*/
export function lineDensity(
q: SelectQuery,
x: ExprValue,
y: ExprValue,
z: string[],
xn: number,
yn: number,
groupby: string[] = [],
normalize: boolean = true
) {
// select x, y points binned to the grid
q.select({
x: int32(floor(x)),
y: int32(floor(y))
});
// select line segment end point pairs
// retain only segments within the grid region
const groups = groupby.concat(z);
const pairs = Query
.from(q)
.select(groups, {
x0: 'x',
y0: 'y',
dx: sub(lead('x').over('sw'), 'x'),
dy: sub(lead('y').over('sw'), 'y')
})
.window({
sw: over().partitionby(groups).orderby(asc('x'))
})
.qualify([
or(lt('x0', xn), lt(add('x0', 'dx'), xn)),
or(lt('y0', yn), lt(add('y0', 'dy'), yn)),
or(gt('x0', 0), gt(add('x0', 'dx'), 0)),
or(gt('y0', 0), gt(add('y0', 'dy'), 0))
]);
// create indices to join against for rasterization
// generate the maximum number of indices needed
const num = Query
.select({ x: greatest(max(abs('dx')), max(abs('dy'))) })
.from('pairs');
const indices = Query.select({
i: int32(sql`UNNEST(range((${num})))`)
});
// rasterize line segments
const raster = Query.unionAll(
Query
.select(groups, {
x: add('x0', 'i'),
y: add('y0', int32(round(div(mul('i', 'dy'), 'dx'))))
})
.from('pairs', 'indices')
.where([lte(abs('dy'), abs('dx')), lt('i', abs('dx'))]),
Query
.select(groups, {
x: add('x0', int32(round(div(mul(mul(sign('dy'), 'i'), 'dx'), 'dy')))),
y: add('y0', mul(sign('dy'), 'i'))
})
.from('pairs', 'indices')
.where([gt(abs('dy'), abs('dx')), lt('i', abs('dy'))]),
Query
.select(groups, { x: 'x0', y: 'y0' })
.from('pairs')
.where(isNull('dx'))
);
// filter raster, normalize columns for each series
const points = Query
.from('raster')
.select(groups, 'x', 'y',
normalize
? { w: div(1, count().partitionby(['x'].concat(groups))) }
: null
)
.where([lte(0, 'x'), lt('x', xn), lte(0, 'y'), lt('y', yn)]);
// sum normalized, rasterized series into output grids
return Query
.with({ pairs, indices, raster, points })
.from('points')
.select(groupby, {
index: add('x', mul('y', int32(xn))),
density: normalize ? sum('w') : count()
})
.groupby('index', groupby);
}