From 661d5938f859f28c579b9161ae5fe4aed3aacfe5 Mon Sep 17 00:00:00 2001 From: tom-whitehead Date: Mon, 23 Jun 2025 20:58:58 +0100 Subject: [PATCH 1/4] fix: 180th meridian haversine bug --- src/centers.rs | 39 ++++++++++++++++++++++----------------- tests/tests.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/centers.rs b/src/centers.rs index 021490d..5a74e45 100644 --- a/src/centers.rs +++ b/src/centers.rs @@ -98,41 +98,46 @@ impl Center { .filter(|&&label| label != -1) .collect::>() .len(); - let mut centers = vec![vec![T::zero(), T::zero()]; n_clusters]; + let mut centers = vec![vec![T::zero(), T::zero(), T::zero()]; n_clusters]; let mut counts = vec![T::zero(); n_clusters]; - + for (point, &label) in data.iter().zip(labels.iter()) { if label != -1 { let cluster_index = label as usize; - centers[cluster_index][0] = centers[cluster_index][0] + point[0].to_radians(); - centers[cluster_index][1] = centers[cluster_index][1] + point[1].to_radians(); + + let lat = point[0].to_radians(); + let lon = point[1].to_radians(); + + let x = lon.cos() * lat.cos(); + let y = lon.sin() * lat.cos(); + let z = lat.sin(); + + centers[cluster_index][0] = centers[cluster_index][0] + x; + centers[cluster_index][1] = centers[cluster_index][1] + y; + centers[cluster_index][2] = centers[cluster_index][2] + z; counts[cluster_index] = counts[cluster_index] + T::one(); } } - - // Calculate final geo centroid for each cluster + for (center, &count) in centers.iter_mut().zip(counts.iter()) { if count > T::zero() { - let avg_lat = center[0] / count; - let avg_lon = center[1] / count; - - let x = avg_lon.cos() * avg_lat.cos(); - let y = avg_lon.sin() * avg_lat.cos(); - let z = avg_lat.sin(); - + let x = center[0] / count; + let y = center[1] / count; + let z = center[2] / count; + let lon = y.atan2(x); let hyp = (x * x + y * y).sqrt(); let lat = z.atan2(hyp); - + // Convert back to degrees center[0] = lat.to_degrees(); center[1] = lon.to_degrees(); } } - - centers + + centers.iter().map(|c| vec![c[0], c[1]]).collect() } - + fn calc_medoids T>( &self, data: &[Vec], diff --git a/tests/tests.rs b/tests/tests.rs index c3fe40e..7d9ff1b 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -276,6 +276,37 @@ fn test_nyc_landmarks_haversine() { assert_eq!(result[9], -1); } +#[test] +fn test_180th_meridian() { + let data = vec![ + vec![-16.8410, 179.9813], // Taveuni, Fiji + vec![-16.7480, -179.9670], // Qamea, Fiji + vec![51.5085, -0.1257], // London - noise + ]; + + let hyper_params = HdbscanHyperParams::builder() + .dist_metric(DistanceMetric::Haversine) + .allow_single_cluster(true) + .min_cluster_size(2) + .min_samples(1) + .build(); + + let clusterer = Hdbscan::new(&data, hyper_params); + let labels = clusterer.cluster().unwrap(); + + // There is only one cluster + assert_eq!(1, labels.iter().filter(|&&x| x != -1).collect::>().len()); + // The last point is noise + assert_eq!(-1, labels[2]); + + let centroids = clusterer.calc_centers(Center::GeoCentroid, &labels).unwrap(); + let cluster_longitude = centroids[0][1]; + + // The cluster centroid is not impacted by the longitudes being either side + // of the 180th meridian + assert!(cluster_longitude > 179.0 || cluster_longitude < -179.0); +} + #[test] fn test_cylindrical_hsv_colours() { // HSV colours re-ordered to SHV From f8dfce3dafb04aed2ea7939ee3193b391871a8e1 Mon Sep 17 00:00:00 2001 From: tom-whitehead Date: Mon, 23 Jun 2025 21:03:08 +0100 Subject: [PATCH 2/4] test name --- tests/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.rs b/tests/tests.rs index 7d9ff1b..bfbe949 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -277,7 +277,7 @@ fn test_nyc_landmarks_haversine() { } #[test] -fn test_180th_meridian() { +fn geo_cluster_across_180th_meridian() { let data = vec![ vec![-16.8410, 179.9813], // Taveuni, Fiji vec![-16.7480, -179.9670], // Qamea, Fiji From dfea4fcd25c0b01de582c8e7677fe0729c54b099 Mon Sep 17 00:00:00 2001 From: tom-whitehead Date: Mon, 23 Jun 2025 21:05:08 +0100 Subject: [PATCH 3/4] cargo fmt --- src/centers.rs | 18 +++++++++--------- tests/tests.rs | 27 ++++++++++++++++++--------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/centers.rs b/src/centers.rs index 5a74e45..61446cb 100644 --- a/src/centers.rs +++ b/src/centers.rs @@ -100,44 +100,44 @@ impl Center { .len(); let mut centers = vec![vec![T::zero(), T::zero(), T::zero()]; n_clusters]; let mut counts = vec![T::zero(); n_clusters]; - + for (point, &label) in data.iter().zip(labels.iter()) { if label != -1 { let cluster_index = label as usize; - + let lat = point[0].to_radians(); let lon = point[1].to_radians(); - + let x = lon.cos() * lat.cos(); let y = lon.sin() * lat.cos(); let z = lat.sin(); - + centers[cluster_index][0] = centers[cluster_index][0] + x; centers[cluster_index][1] = centers[cluster_index][1] + y; centers[cluster_index][2] = centers[cluster_index][2] + z; counts[cluster_index] = counts[cluster_index] + T::one(); } } - + for (center, &count) in centers.iter_mut().zip(counts.iter()) { if count > T::zero() { let x = center[0] / count; let y = center[1] / count; let z = center[2] / count; - + let lon = y.atan2(x); let hyp = (x * x + y * y).sqrt(); let lat = z.atan2(hyp); - + // Convert back to degrees center[0] = lat.to_degrees(); center[1] = lon.to_degrees(); } } - + centers.iter().map(|c| vec![c[0], c[1]]).collect() } - + fn calc_medoids T>( &self, data: &[Vec], diff --git a/tests/tests.rs b/tests/tests.rs index bfbe949..355d588 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -280,29 +280,38 @@ fn test_nyc_landmarks_haversine() { fn geo_cluster_across_180th_meridian() { let data = vec![ vec![-16.8410, 179.9813], // Taveuni, Fiji - vec![-16.7480, -179.9670], // Qamea, Fiji - vec![51.5085, -0.1257], // London - noise + vec![-16.7480, -179.9670], // Qamea, Fiji + vec![51.5085, -0.1257], // London - noise ]; - + let hyper_params = HdbscanHyperParams::builder() .dist_metric(DistanceMetric::Haversine) .allow_single_cluster(true) .min_cluster_size(2) .min_samples(1) .build(); - + let clusterer = Hdbscan::new(&data, hyper_params); let labels = clusterer.cluster().unwrap(); - + // There is only one cluster - assert_eq!(1, labels.iter().filter(|&&x| x != -1).collect::>().len()); + assert_eq!( + 1, + labels + .iter() + .filter(|&&x| x != -1) + .collect::>() + .len() + ); // The last point is noise assert_eq!(-1, labels[2]); - - let centroids = clusterer.calc_centers(Center::GeoCentroid, &labels).unwrap(); + + let centroids = clusterer + .calc_centers(Center::GeoCentroid, &labels) + .unwrap(); let cluster_longitude = centroids[0][1]; - // The cluster centroid is not impacted by the longitudes being either side + // The cluster centroid is not impacted by the longitudes being either side // of the 180th meridian assert!(cluster_longitude > 179.0 || cluster_longitude < -179.0); } From 4c8c7703ea3cd35dc4dff490c645b25b47c069d2 Mon Sep 17 00:00:00 2001 From: tom-whitehead Date: Mon, 23 Jun 2025 21:07:21 +0100 Subject: [PATCH 4/4] version bump --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6b96969..2c5ad2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hdbscan" -version = "0.10.0" +version = "0.10.1" edition = "2021" authors = [ "Tom Whitehead ", ] description = "HDBSCAN clustering in pure Rust. A huge improvement on DBSCAN, capable of identifying clusters of varying densities."