Skip to content

Bug: Nani calculations with other metrics #79

@kzsigmond

Description

@kzsigmond

Issue

I was trying to run Nani calculations with MDance so that I can compare the results to the C++ implementation. However, I stumbled into a bug when I tried to run calculations with metrics other than MSD.

Code that reproduces the bug

This is the screen_nani.py file from the scripts folder. The only change is that the metric is JT instead of MSD.

import os

import numpy as np

from mdance.cluster.nani import KmeansNANI, compute_scores
from mdance import data
from mdance.tools.bts import extended_comparison


# System info
input_traj_numpy = data.sim_traj_numpy
N_atoms = 50
sieve = 1

# NANI parameters
output_dir = 'outputs'                        
init_type = 'comp_sim'                                             # Default
metric = 'JT'
start_n_clusters = 5                                                # At least 2 clusters
end_n_clusters = 30                                                 # Maximum number of clusters


if __name__ == '__main__':
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    traj_numpy = np.load(input_traj_numpy)[::sieve]
    if init_type in ['k-means++', 'random', 'vanilla_kmeans++']:
        percentage = ''
    
    # `comp_sim` and `div_select` are ran only once to get the initiators
    elif init_type in ['comp_sim', 'div_select', 'strat_all', 'strat_reduced']:
        percentage = 10
        mod = KmeansNANI(data=traj_numpy, n_clusters=end_n_clusters, metric=metric, 
                         N_atoms=N_atoms, init_type=init_type, percentage=percentage)
        initiators = mod.initiate_kmeans()
    
    all_scores = []
    for i in range(start_n_clusters, end_n_clusters+1):
        total = 0

        # Run k-means clustering
        if init_type in ['comp_sim', 'div_select', 'strat_all', 'strat_reduced']:
            mod = KmeansNANI(data=traj_numpy, n_clusters=i, metric=metric, N_atoms=N_atoms, 
                             init_type=init_type, percentage=percentage)
            print("metric: ", metric)
            labels, centers, n_iter = mod.kmeans_clustering(initiators[:i])
        elif init_type in ['k-means++', 'random']:
            mod = KmeansNANI(data=traj_numpy, n_clusters=i, metric=metric, 
                             N_atoms=N_atoms, init_type=init_type)
            labels, centers, n_iter = mod.kmeans_clustering(initiators=init_type)
        elif init_type == 'vanilla_kmeans++':
            mod = KmeansNANI(data=traj_numpy, n_clusters=i, metric=metric, 
                             N_atoms=N_atoms, init_type=init_type)
            initiators = mod.initiate_kmeans()
            labels, centers, n_iter = mod.kmeans_clustering(initiators=initiators)
        
        # Compute scores
        ch_score, db_score = compute_scores(data=traj_numpy, labels=labels)
        
        # Calculate MSD for each cluster
        dict = {}
        for j in range(i):
            dict[j] = np.where(labels == j)[0]
            dict[j] = traj_numpy[dict[j]]
        for key in dict:
            msd = extended_comparison(np.array(dict[key]), traj_numpy_type='full', 
                                        metric=metric, N_atoms=N_atoms)
            total += msd
        all_scores.append((i, n_iter, ch_score, db_score, total/i))
    
    all_scores = np.array(all_scores)
    header = f'init_type: {init_type}, percentage: {percentage}, metric: {metric}, sieve: {sieve}\n'
    header += 'Number of clusters, Number of iterations, Calinski-Harabasz score, Davies-Bouldin score, Average MSD'
    np.savetxt(f'{output_dir}/{percentage}{init_type}_summary.csv', all_scores, 
                delimiter=',', header=header, fmt='%s')

Traceback error

Traceback (most recent call last):
  File "/Users/krisztina/Documents/work/research/code/CPP-MDANCE/deps/MDANCE/scripts/nani/screen_nani.py", line 36, in <module>
    initiators = mod.initiate_kmeans()
                 ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/krisztina/Documents/work/research/code/CPP-MDANCE/deps/MDANCE/src/mdance/cluster/nani.py", line 121, in initiate_kmeans
    initiator_idxs = diversity_selection(top_cc_data, 100, self.metric, self.N_atoms, 
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/krisztina/Documents/work/research/code/CPP-MDANCE/deps/MDANCE/src/mdance/tools/bts.py", line 481, in diversity_selection
    selected_condensed += matrix[new_index_n]
                          ~~~~~~^^^^^^^^^^^^^
IndexError: index 601 is out of bounds for axis 0 with size 600

Package versions

  • python: 3.11.4
  • numpy: 2.3.3
  • mdance: 0.3.8

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions