dbscan_gps
Version:
A node.js package for clustering GPS coordinates by using DBSCAN algorithm
265 lines (221 loc) • 6.6 kB
JavaScript
// Euclidean distance
const one_km_in_miles=0.62137;
function degreesToRadians(degrees) {
return degrees * Math.PI / 180;
}
/**
* @param {double} lat1: latitude of first GPS point
* @param {double} lon1: longitude of first GPS point
* @param {double} lat2: latitude of second GPS point
* @param {double} lon2: longitude of second GPS point
* @return {double}: return distance between two GPS points in KM
*/
function distanceInKmBetweenEarthCoordinates(lat1, lon1, lat2, lon2) {
var earthRadiusKm = 6371;
var dLat = degreesToRadians(lat2-lat1);
var dLon = degreesToRadians(lon2-lon1);
lat1 = degreesToRadians(lat1);
lat2 = degreesToRadians(lat2);
var a = Math.sin(dLat/2) * Math.sin(dLat/2) +
Math.sin(dLon/2) * Math.sin(dLon/2) * Math.cos(lat1) * Math.cos(lat2);
var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));
return earthRadiusKm * c;
}
/**
* @param {double} lat1: latitude of first GPS point
* @param {double} lon1: longitude of first GPS point
* @param {double} lat2: latitude of second GPS point
* @param {double} lon2: longitude of second GPS point
* @return {double}: return distance between two GPS points in miles
*/
function distanceInMilesBetweenEarthCoordinates(lat1, lon1, lat2, lon2) {
return distanceInKmBetweenEarthCoordinates(lat1, lon1, lat2, lon2)*one_km_in_miles;
}
/**
* @param {integer} i: the index of first point
* @param {integer} j: the index of the second point
* @return {string}: a unique string denoting the index of the combo where the smaller index is always put ahead of the bigger one
*/
function createDistanceMapIndex(i, j)
{
return i>j?(j+ "-"+ i):(i+ "-"+ j );
}
function isNull(item)
{
return (item===null || typeof(item)=="undefined")
}
/**
* @param {array} dataset: an array of points, each point is shaped as [lat lon];
* @param {integer} eps: the radius of the cluster
* @param {string} eps_unit: in either "km" or "mi"
* @param {integer} minPts: the minimal number of points in a cluster
*/
module.exports=function(dataset, eps, eps_unit="km", minPts)
{
var external={};
var internal={};
var distance_map={
};
var labeled_points={
}
var noise_points={
};
var dist=null;
internal.calculateDistance=function(point1, point2)
{
if(isNull(point1) || isNull(point2))
{
throw "missing one of the points";
}
var index=createDistanceMapIndex(point1, point2);
if(isNull(distance_map[index]))
{
var distance=dist(dataset[point1][1], dataset[point1][0],dataset[point2][1], dataset[point2][0] );
distance_map[index]=distance;
return distance;
}
return distance_map[index];
}
internal.queryCluster=function(point, excluded)
{
var cluster=[];
for(var i=0;i<dataset.length;i++)
{
if(point==i || i==excluded)
{
continue;
}
else {
if(internal.calculateDistance(point, i)<=eps)
{
cluster.push(i);
}
}
}
return cluster;
}
internal.expandCluster=function(center, cluster, cluster_index)
{
for(var i=0;i<cluster.length;i++)
{
if(noise_points[cluster[i]])
{
delete noise_points[cluster[i]]
}
if(labeled_points[cluster[i]])
{
continue;
}
else {
labeled_points[cluster[i]]=cluster_index;
}
var subCluster=internal.queryCluster(cluster[i], center);
if(subCluster.length>=minPts)
{
for(var z=0;z<subCluster.length;z++)
{
var found=false;
for(var j=0;j<cluster.length;j++)
{
if(cluster[j]==subCluster[z])
{
found=true;
break;
}
}
if(!found)
{
cluster.push(subCluster[z]);
}
}
}
}
}
/**
* @callback {function} callback: (error, clusters):
error is a string describing the error,
clusters is an array of clusters; each cluster is another array of points with index coorresponding to the index in the input dataset
*/
external.fit=function(callback)
{
if(!callback)
{
throw "no callback function found...";
}
if(!Number.isInteger(minPts))
{
callback("minPts is not a valid integer.", null);
return;
}
if(minPts==0)
{
callback("minPts must be bigger than zero.", null);
return;
}
if(isNaN(eps) || eps<=0)
{
callback("Invalid eps", null);
return;
}
if(dataset.length==0)
{
callback("Need at least one point in the dataset", null);
return;
}
if(dataset.length<minPts)
{
callback("not enough points in the dataset to form a cluster", null);
return;
}
if(!eps_unit)
{
eps_unit="km"
}
eps_unit=eps_unit.toLowerCase().trim();
if(eps_unit!="km" && eps_unit!="mi")
{
callback("Invalid eps unit; it must be either 'km' or 'mi'. ", null);
return;
}
if(eps_unit=="km")
{
dist=distanceInKmBetweenEarthCoordinates;
}
else {
dist=distanceInMilesBetweenEarthCoordinates;
}
var cluster_index=0;
for(var i=0;i<dataset.length;i++)
{
if(!isNull(noise_points[i]) || !isNull(labeled_points[i])) // the point is already labeled as noise or has been labeled in another cluster
{
continue;
}
var cluster=internal.queryCluster(i, i);
if(cluster.length<minPts)
{
noise_points[i]=true;
continue;
}
else {
labeled_points[i]=cluster_index;
internal.expandCluster(i, cluster, cluster_index);
cluster_index++;
}
}
var clusters=[]
for (var data_point in labeled_points) {
if (labeled_points.hasOwnProperty(data_point)) {
if(isNull(clusters[ labeled_points[data_point] ]))
{
clusters[ labeled_points[data_point] ]=[Number(data_point)];
}
else {
clusters[ labeled_points[data_point] ].push(Number(data_point));
}
}
}
callback(null, clusters);
}
return external;
}