diff --git a/Cargo.toml b/Cargo.toml index 6b96969..2c5ad2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hdbscan" -version = "0.10.0" +version = "0.10.1" edition = "2021" authors = [ "Tom Whitehead ", ] description = "HDBSCAN clustering in pure Rust. A huge improvement on DBSCAN, capable of identifying clusters of varying densities." diff --git a/src/centers.rs b/src/centers.rs index 021490d..61446cb 100644 --- a/src/centers.rs +++ b/src/centers.rs @@ -98,27 +98,32 @@ impl Center { .filter(|&&label| label != -1) .collect::>() .len(); - let mut centers = vec![vec![T::zero(), T::zero()]; n_clusters]; + let mut centers = vec![vec![T::zero(), T::zero(), T::zero()]; n_clusters]; let mut counts = vec![T::zero(); n_clusters]; for (point, &label) in data.iter().zip(labels.iter()) { if label != -1 { let cluster_index = label as usize; - centers[cluster_index][0] = centers[cluster_index][0] + point[0].to_radians(); - centers[cluster_index][1] = centers[cluster_index][1] + point[1].to_radians(); + + let lat = point[0].to_radians(); + let lon = point[1].to_radians(); + + let x = lon.cos() * lat.cos(); + let y = lon.sin() * lat.cos(); + let z = lat.sin(); + + centers[cluster_index][0] = centers[cluster_index][0] + x; + centers[cluster_index][1] = centers[cluster_index][1] + y; + centers[cluster_index][2] = centers[cluster_index][2] + z; counts[cluster_index] = counts[cluster_index] + T::one(); } } - // Calculate final geo centroid for each cluster for (center, &count) in centers.iter_mut().zip(counts.iter()) { if count > T::zero() { - let avg_lat = center[0] / count; - let avg_lon = center[1] / count; - - let x = avg_lon.cos() * avg_lat.cos(); - let y = avg_lon.sin() * avg_lat.cos(); - let z = avg_lat.sin(); + let x = center[0] / count; + let y = center[1] / count; + let z = center[2] / count; let lon = y.atan2(x); let hyp = (x * x + y * y).sqrt(); @@ -130,7 +135,7 @@ impl Center { } } - centers + centers.iter().map(|c| vec![c[0], c[1]]).collect() } fn calc_medoids T>( diff --git a/tests/tests.rs b/tests/tests.rs index c3fe40e..355d588 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -276,6 +276,46 @@ fn test_nyc_landmarks_haversine() { assert_eq!(result[9], -1); } +#[test] +fn geo_cluster_across_180th_meridian() { + let data = vec![ + vec![-16.8410, 179.9813], // Taveuni, Fiji + vec![-16.7480, -179.9670], // Qamea, Fiji + vec![51.5085, -0.1257], // London - noise + ]; + + let hyper_params = HdbscanHyperParams::builder() + .dist_metric(DistanceMetric::Haversine) + .allow_single_cluster(true) + .min_cluster_size(2) + .min_samples(1) + .build(); + + let clusterer = Hdbscan::new(&data, hyper_params); + let labels = clusterer.cluster().unwrap(); + + // There is only one cluster + assert_eq!( + 1, + labels + .iter() + .filter(|&&x| x != -1) + .collect::>() + .len() + ); + // The last point is noise + assert_eq!(-1, labels[2]); + + let centroids = clusterer + .calc_centers(Center::GeoCentroid, &labels) + .unwrap(); + let cluster_longitude = centroids[0][1]; + + // The cluster centroid is not impacted by the longitudes being either side + // of the 180th meridian + assert!(cluster_longitude > 179.0 || cluster_longitude < -179.0); +} + #[test] fn test_cylindrical_hsv_colours() { // HSV colours re-ordered to SHV