Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hdbscan"
version = "0.10.0"
version = "0.10.1"
edition = "2021"
authors = [ "Tom Whitehead <t.j.whitehead21@gmail.com>", ]
description = "HDBSCAN clustering in pure Rust. A huge improvement on DBSCAN, capable of identifying clusters of varying densities."
Expand Down
27 changes: 16 additions & 11 deletions src/centers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,32 @@ impl Center {
.filter(|&&label| label != -1)
.collect::<HashSet<_>>()
.len();
let mut centers = vec![vec![T::zero(), T::zero()]; n_clusters];
let mut centers = vec![vec![T::zero(), T::zero(), T::zero()]; n_clusters];
let mut counts = vec![T::zero(); n_clusters];

for (point, &label) in data.iter().zip(labels.iter()) {
if label != -1 {
let cluster_index = label as usize;
centers[cluster_index][0] = centers[cluster_index][0] + point[0].to_radians();
centers[cluster_index][1] = centers[cluster_index][1] + point[1].to_radians();

let lat = point[0].to_radians();
let lon = point[1].to_radians();

let x = lon.cos() * lat.cos();
let y = lon.sin() * lat.cos();
let z = lat.sin();

centers[cluster_index][0] = centers[cluster_index][0] + x;
centers[cluster_index][1] = centers[cluster_index][1] + y;
centers[cluster_index][2] = centers[cluster_index][2] + z;
counts[cluster_index] = counts[cluster_index] + T::one();
}
}

// Calculate final geo centroid for each cluster
for (center, &count) in centers.iter_mut().zip(counts.iter()) {
if count > T::zero() {
let avg_lat = center[0] / count;
let avg_lon = center[1] / count;

let x = avg_lon.cos() * avg_lat.cos();
let y = avg_lon.sin() * avg_lat.cos();
let z = avg_lat.sin();
let x = center[0] / count;
let y = center[1] / count;
let z = center[2] / count;

let lon = y.atan2(x);
let hyp = (x * x + y * y).sqrt();
Expand All @@ -130,7 +135,7 @@ impl Center {
}
}

centers
centers.iter().map(|c| vec![c[0], c[1]]).collect()
}

fn calc_medoids<T: Float, F: Fn(&[T], &[T]) -> T>(
Expand Down
40 changes: 40 additions & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,46 @@ fn test_nyc_landmarks_haversine() {
assert_eq!(result[9], -1);
}

#[test]
fn geo_cluster_across_180th_meridian() {
let data = vec![
vec![-16.8410, 179.9813], // Taveuni, Fiji
vec![-16.7480, -179.9670], // Qamea, Fiji
vec![51.5085, -0.1257], // London - noise
];

let hyper_params = HdbscanHyperParams::builder()
.dist_metric(DistanceMetric::Haversine)
.allow_single_cluster(true)
.min_cluster_size(2)
.min_samples(1)
.build();

let clusterer = Hdbscan::new(&data, hyper_params);
let labels = clusterer.cluster().unwrap();

// There is only one cluster
assert_eq!(
1,
labels
.iter()
.filter(|&&x| x != -1)
.collect::<HashSet<_>>()
.len()
);
// The last point is noise
assert_eq!(-1, labels[2]);

let centroids = clusterer
.calc_centers(Center::GeoCentroid, &labels)
.unwrap();
let cluster_longitude = centroids[0][1];

// The cluster centroid is not impacted by the longitudes being either side
// of the 180th meridian
assert!(cluster_longitude > 179.0 || cluster_longitude < -179.0);
}

#[test]
fn test_cylindrical_hsv_colours() {
// HSV colours re-ordered to SHV
Expand Down