From 28b13003a4897dc93d5700a35c6cdac844616b6d Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 18 Feb 2026 10:46:41 +0000 Subject: [PATCH 1/2] Add logs and metadata directories - Created logs/ directory for all log files: - output.log (Tee class) - results.log (blech_init.py) - ram_usage.log (ram_monitor.py) - blech_process.log (blech_process.py) - umap_results.log (umap_plots.py) - execution_log.json (pipeline_graph_check) - Created metadata/ directory for all metadata files: - electrode_layout_frame.csv - trial_info_frame.csv - dig_in_frame.csv - *.info file - *.params file - aggregated_characteristics.csv - unit_descriptor.csv - unit_profile.csv - waveform_classifier_recommendations.csv - Added backward compatibility to check old locations if files don't exist in new directories - Added helper functions: - ensure_dir(data_dir, subdir) - get_metadata_dir(data_dir) - get_logs_dir(data_dir) - find_file_in_dir(data_dir, filename_pattern) Co-authored-by: openhands --- blech_exp_info.py | 10 +- blech_init.py | 6 +- blech_make_arrays.py | 5 +- blech_post_process.py | 5 +- blech_process.py | 4 +- blech_units_characteristics.py | 5 +- emg/emg_freq_setup.py | 21 +++- tests/test_logs_metadata_dirs.py | 155 ++++++++++++++++++++++++++++++ utils/blech_data_summary.py | 17 +++- utils/blech_process_utils.py | 5 +- utils/blech_utils.py | 83 ++++++++++++++-- utils/ephys_data/ephys_data.py | 21 +++- utils/qa_utils/elbo_drift.py | 8 +- utils/ram_monitor.py | 4 +- utils/read_file.py | 23 +++-- utils/umap_plotting/umap_plots.py | 5 +- 16 files changed, 336 insertions(+), 41 deletions(-) create mode 100644 tests/test_logs_metadata_dirs.py diff --git a/blech_exp_info.py b/blech_exp_info.py index 9b4f6fd3..5f2cd56c 100644 --- a/blech_exp_info.py +++ b/blech_exp_info.py @@ -868,8 +868,11 @@ def process_electrode_layout(dir_path, dir_name, electrode_files, ports, electro Returns: Tuple containing layout_dict, fin_emg_port, orig_emg_electrodes, emg_muscle_str """ + # Use metadata directory for layout file + metadata_dir = os.path.join(dir_path, 'metadata') + os.makedirs(metadata_dir, exist_ok=True) layout_file_path = os.path.join( - dir_path, dir_name + "_electrode_layout.csv") + metadata_dir, dir_name + "_electrode_layout.csv") def yn_check(x): return x in ['y', 'yes', 'n', 'no', ''] @@ -930,6 +933,11 @@ def confirm_check(x): exit() # Read and process the layout file + # Check both metadata directory and old location for backward compatibility + if not os.path.exists(layout_file_path): + old_layout_path = os.path.join(dir_path, dir_name + "_electrode_layout.csv") + if os.path.exists(old_layout_path): + layout_file_path = old_layout_path layout_frame_filled = pd.read_csv(layout_file_path) if not args.programmatic: diff --git a/blech_init.py b/blech_init.py index 41a8f54f..f51f4462 100644 --- a/blech_init.py +++ b/blech_init.py @@ -170,7 +170,7 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram f.write(f'DIR={dir_name} \n') print(f"parallel -k -j {job_count} --noswap --load 100% --progress " + "--memfree 4G --ungroup --retry-failed " + - f"--joblog $DIR/results.log " + + f"--joblog $DIR/logs/results.log " + "bash $DIR/temp/blech_process_single.sh " + f"::: {' '.join([str(x) for x in bash_electrode_list])}", file=f) @@ -204,7 +204,9 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram this_pipeline_check.write_to_log(script_path, 'attempted') # If info_dict present but execution log is not # just create the execution log with blech_exp_info marked - if 'info_dict' in dir(metadata_handler) and not os.path.exists(metadata_handler.dir_name + '/execution_log.json'): + execution_log_path_new = os.path.join(metadata_handler.dir_name, 'logs', 'execution_log.json') + execution_log_path_old = os.path.join(metadata_handler.dir_name, 'execution_log.json') + if 'info_dict' in dir(metadata_handler) and not os.path.exists(execution_log_path_new) and not os.path.exists(execution_log_path_old): blech_exp_info_path = os.path.join( blech_clust_dir, 'blech_exp_info.py') this_pipeline_check.write_to_log(blech_exp_info_path, 'attempted') diff --git a/blech_make_arrays.py b/blech_make_arrays.py index 14fba2d1..9360944c 100644 --- a/blech_make_arrays.py +++ b/blech_make_arrays.py @@ -21,7 +21,7 @@ from tqdm import tqdm from blech_clust.utils.clustering import get_filtered_electrode from blech_clust.utils.blech_process_utils import return_cutoff_values -from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check +from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check, get_metadata_dir from blech_clust.utils.read_file import DigInHandler from ast import literal_eval import matplotlib.pyplot as plt @@ -376,7 +376,8 @@ def create_emg_trials_for_digin( trial_info_frame.to_hdf(metadata_handler.hdf5_name, 'trial_info_frame', mode='a') - csv_path = os.path.join(metadata_handler.dir_name, 'trial_info_frame.csv') + metadata_dir = get_metadata_dir(metadata_handler.dir_name) + csv_path = os.path.join(metadata_dir, 'trial_info_frame.csv') trial_info_frame.to_csv(csv_path, index=False) # Get list of units under the sorted_units group. diff --git a/blech_post_process.py b/blech_post_process.py index 411597c4..562ac919 100644 --- a/blech_post_process.py +++ b/blech_post_process.py @@ -87,7 +87,7 @@ os.environ['OPENBLAS_NUM_THREADS'] = '1' # noqa import blech_clust.utils.blech_post_process_utils as post_utils # noqa -from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check # noqa +from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check, get_metadata_dir # noqa from utils import blech_waveforms_datashader # noqa from multiprocessing import Pool, cpu_count # noqa from functools import partial # noqa @@ -588,8 +588,9 @@ print('==== Unit Table ====\n') print(current_unit_table) # Also write to disk +metadata_dir = get_metadata_dir(metadata_handler.dir_name) current_unit_table.to_csv( - os.path.join(metadata_handler.dir_name, 'unit_descriptor.csv'), + os.path.join(metadata_dir, 'unit_descriptor.csv'), index=False, ) diff --git a/blech_process.py b/blech_process.py index 8c80e897..294c3aa3 100644 --- a/blech_process.py +++ b/blech_process.py @@ -98,7 +98,9 @@ print(f'Processing electrode {electrode_num}') # Initialize or load processing log -log_path = pathlib.Path(metadata_handler.dir_name) / 'blech_process.log' +log_dir = os.path.join(metadata_handler.dir_name, 'logs') +os.makedirs(log_dir, exist_ok=True) +log_path = pathlib.Path(log_dir) / 'blech_process.log' if log_path.exists(): with open(log_path) as f: process_log = json.load(f) diff --git a/blech_units_characteristics.py b/blech_units_characteristics.py index 28ab153d..8179c114 100644 --- a/blech_units_characteristics.py +++ b/blech_units_characteristics.py @@ -26,7 +26,7 @@ import json import glob import itertools -from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check +from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check, get_metadata_dir from blech_clust.utils.ephys_data import ephys_data from blech_clust.utils.ephys_data import visualize as vz import pandas as pd @@ -765,7 +765,8 @@ def palatability_corr(x): [this_resp_neurons, taste_bin_pval_frame, this_pal_sig_frame]) out_frame.drop(columns=['p-unc'], inplace=True) -out_frame.to_csv(os.path.join(dir_name, 'aggregated_characteristics.csv'), +metadata_dir = get_metadata_dir(dir_name) +out_frame.to_csv(os.path.join(metadata_dir, 'aggregated_characteristics.csv'), index=False) out_frame.to_hdf( metadata_handler.hdf5_name, diff --git a/emg/emg_freq_setup.py b/emg/emg_freq_setup.py index ff8cafb2..24698722 100644 --- a/emg/emg_freq_setup.py +++ b/emg/emg_freq_setup.py @@ -240,6 +240,12 @@ # Also delete log print('Deleting results.log') +logs_dir = os.path.join(data_dir, 'logs') +os.makedirs(logs_dir, exist_ok=True) +results_log_path = os.path.join(logs_dir, 'results.log') +if os.path.exists(results_log_path): + os.remove(results_log_path) +# Also check old location for backward compatibility if os.path.exists('results.log'): os.remove('results.log') @@ -255,7 +261,7 @@ data_dir, len(emg_env_df)-1) print( - "parallel -k -j {:d} --noswap --load 100% --progress --ungroup --joblog {:s}/results.log bash blech_emg_jetstream_parallel1.sh ::: {{0..{:d}}}".format( + "parallel -k -j {:d} --noswap --load 100% --progress --ungroup --joblog {:s}/logs/results.log bash blech_emg_jetstream_parallel1.sh ::: {{0..{:d}}}".format( *format_args), file=f) f.close() @@ -281,8 +287,17 @@ ############################################################ # Merge the emg_env_df with trial_info_df ############################################################ -# Also get trial_info_frame -trial_info_frame = pd.read_csv(os.path.join(data_dir, 'trial_info_frame.csv')) +# Also get trial_info_frame (check both metadata dir and old location) +metadata_dir = os.path.join(data_dir, 'metadata') +trial_info_path_new = os.path.join(metadata_dir, 'trial_info_frame.csv') +trial_info_path_old = os.path.join(data_dir, 'trial_info_frame.csv') + +if os.path.exists(trial_info_path_new): + trial_info_frame = pd.read_csv(trial_info_path_new) +elif os.path.exists(trial_info_path_old): + trial_info_frame = pd.read_csv(trial_info_path_old) +else: + raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory") merge_frame = pd.merge(emg_env_df, trial_info_frame, left_on=['dig_in', 'trial_inds'], diff --git a/tests/test_logs_metadata_dirs.py b/tests/test_logs_metadata_dirs.py new file mode 100644 index 00000000..a6b8f0a1 --- /dev/null +++ b/tests/test_logs_metadata_dirs.py @@ -0,0 +1,155 @@ +""" +Tests for verifying that logs and metadata are placed in the correct directories. +These tests verify the source code changes directly without importing. +""" + + +def test_ensure_dir_function(): + """Test the ensure_dir function logic directly""" + # Read the function source + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + # Check that ensure_dir is defined + assert 'def ensure_dir(data_dir, subdir):' in source + + # Check that get_metadata_dir is defined + assert 'def get_metadata_dir(data_dir):' in source + + # Check that get_logs_dir is defined + assert 'def get_logs_dir(data_dir):' in source + + print("✓ All required functions are defined in blech_utils.py") + + +def test_logs_directory_creation_in_tee(): + """Test that Tee class uses logs directory""" + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + # Check that Tee uses logs directory + assert "logs_dir = ensure_dir(data_dir, 'logs')" in source + assert "self.log_path = os.path.join(logs_dir, name)" in source + print("✓ Tee class correctly uses logs directory") + + +def test_logs_directory_in_blech_process(): + """Test that blech_process.py uses logs directory""" + with open('/workspace/project/blech_clust/blech_process.py', 'r') as f: + source = f.read() + + # Check that it creates logs directory + assert "log_dir = os.path.join(metadata_handler.dir_name, 'logs')" in source + assert "os.makedirs(log_dir, exist_ok=True)" in source + print("✓ blech_process.py correctly uses logs directory") + + +def test_logs_directory_in_blech_init(): + """Test that blech_init.py uses logs directory""" + with open('/workspace/project/blech_clust/blech_init.py', 'r') as f: + source = f.read() + + # Check that it uses logs directory for results.log + assert "logs/results.log" in source + print("✓ blech_init.py correctly uses logs directory for results.log") + + +def test_logs_directory_in_ram_monitor(): + """Test that ram_monitor.py uses logs directory""" + with open('/workspace/project/blech_clust/utils/ram_monitor.py', 'r') as f: + source = f.read() + + # Check that it uses logs directory + assert "logs_dir = os.path.join(output_dir, 'logs')" in source + assert "ram_usage.log" in source + print("✓ ram_monitor.py correctly uses logs directory") + + +def test_metadata_directory_in_read_file(): + """Test that read_file.py uses metadata directory""" + with open('/workspace/project/blech_clust/utils/read_file.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir + assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source + assert "metadata_dir = get_metadata_dir(self.data_dir)" in source + print("✓ read_file.py correctly uses metadata directory") + + +def test_metadata_directory_in_ephys_data(): + """Test that ephys_data.py uses metadata directory""" + with open('/workspace/project/blech_clust/utils/ephys_data/ephys_data.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir + assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source + print("✓ ephys_data.py correctly imports get_metadata_dir") + + +def test_metadata_directory_in_blech_units_characteristics(): + """Test that blech_units_characteristics.py uses metadata directory""" + with open('/workspace/project/blech_clust/blech_units_characteristics.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir for aggregated_characteristics.csv + assert "get_metadata_dir" in source + assert "aggregated_characteristics.csv" in source + print("✓ blech_units_characteristics.py correctly uses metadata directory") + + +def test_metadata_directory_in_blech_post_process(): + """Test that blech_post_process.py uses metadata directory""" + with open('/workspace/project/blech_clust/blech_post_process.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir for unit_descriptor.csv + assert "get_metadata_dir" in source + assert "unit_descriptor.csv" in source + print("✓ blech_post_process.py correctly uses metadata directory") + + +def test_metadata_directory_in_blech_make_arrays(): + """Test that blech_make_arrays.py uses metadata directory""" + with open('/workspace/project/blech_clust/blech_make_arrays.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir for trial_info_frame.csv + assert "get_metadata_dir" in source + assert "trial_info_frame.csv" in source + print("✓ blech_make_arrays.py correctly uses metadata directory") + + +def test_backward_compatibility_check(): + """Test that backward compatibility is maintained""" + # Check execution_log.json backward compatibility + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + assert "old_log_path = os.path.join(self.data_dir, 'execution_log.json')" in source + assert "if not os.path.exists(self.log_path) and os.path.exists(old_log_path):" in source + print("✓ Backward compatibility for execution_log.json is maintained") + + +def test_find_file_in_dir_function(): + """Test that find_file_in_dir function exists for backward compatibility""" + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + assert "def find_file_in_dir(data_dir, filename_pattern):" in source + print("✓ find_file_in_dir function exists for backward compatibility") + + +if __name__ == '__main__': + test_ensure_dir_function() + test_logs_directory_creation_in_tee() + test_logs_directory_in_blech_process() + test_logs_directory_in_blech_init() + test_logs_directory_in_ram_monitor() + test_metadata_directory_in_read_file() + test_metadata_directory_in_ephys_data() + test_metadata_directory_in_blech_units_characteristics() + test_metadata_directory_in_blech_post_process() + test_metadata_directory_in_blech_make_arrays() + test_backward_compatibility_check() + test_find_file_in_dir_function() + print("\nAll tests passed!") diff --git a/utils/blech_data_summary.py b/utils/blech_data_summary.py index ae4f4dd4..098693ee 100644 --- a/utils/blech_data_summary.py +++ b/utils/blech_data_summary.py @@ -59,7 +59,7 @@ blech_clust_dir = os.path.dirname(os.path.dirname(script_path)) sys.path.append(blech_clust_dir) from blech_clust.utils.qa_utils.channel_corr import get_all_channels, intra_corr # noqa -from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check # noqa +from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check, get_metadata_dir # noqa def extract_unit_characteristics(dir_name): @@ -152,8 +152,12 @@ def extract_unit_characteristics(dir_name): print("Falling back to aggregated_characteristics.csv...") # Fallback: read from aggregated_characteristics.csv - characteristics_path = os.path.join( - dir_name, 'aggregated_characteristics.csv') + metadata_dir = get_metadata_dir(dir_name) + characteristics_path = os.path.join(metadata_dir, 'aggregated_characteristics.csv') + # Check old location for backward compatibility + old_path = os.path.join(dir_name, 'aggregated_characteristics.csv') + if not os.path.exists(characteristics_path) and os.path.exists(old_path): + characteristics_path = old_path if not os.path.exists(characteristics_path): print( @@ -385,7 +389,12 @@ def extract_laser_conditions(dir_name): Returns: Dictionary with laser condition information """ - trial_info_path = os.path.join(dir_name, 'trial_info_frame.csv') + metadata_dir = get_metadata_dir(dir_name) + trial_info_path = os.path.join(metadata_dir, 'trial_info_frame.csv') + # Check old location for backward compatibility + old_path = os.path.join(dir_name, 'trial_info_frame.csv') + if not os.path.exists(trial_info_path) and os.path.exists(old_path): + trial_info_path = old_path if not os.path.exists(trial_info_path): print(f"Warning: {trial_info_path} not found.") diff --git a/utils/blech_process_utils.py b/utils/blech_process_utils.py index c89fcfb3..d6926565 100644 --- a/utils/blech_process_utils.py +++ b/utils/blech_process_utils.py @@ -52,7 +52,7 @@ # Set environment variables to limit the number of threads used by various libraries # Do it at the start of the script to ensure it applies to all imported libraries -from blech_clust.utils.blech_utils import ifisdir_rmdir +from blech_clust.utils.blech_utils import ifisdir_rmdir, get_metadata_dir import os # noqa os.environ['OMP_NUM_THREADS'] = '1' # noqa os.environ['MKL_NUM_THREADS'] = '1' # noqa @@ -761,7 +761,8 @@ def write_out_recommendations(self): Write out electrode number, count, mean prediction probability, and 5,95th percentiles """ waveform_thresh = self.classifier_params['min_suggestion_count'] - out_file_path = self.data_dir + '/waveform_classifier_recommendations.csv' + metadata_dir = get_metadata_dir(self.data_dir) + out_file_path = os.path.join(metadata_dir, 'waveform_classifier_recommendations.csv') count = len(self.pos_spike_dict['waveforms']) if count > waveform_thresh: percentile_5 = np.percentile(self.pos_spike_dict['prob'], 5) diff --git a/utils/blech_utils.py b/utils/blech_utils.py index f0a71c93..cb4739bc 100644 --- a/utils/blech_utils.py +++ b/utils/blech_utils.py @@ -48,11 +48,50 @@ import shutil +def ensure_dir(data_dir, subdir): + """Ensure a subdirectory exists in data_dir and return its path.""" + dir_path = os.path.join(data_dir, subdir) + os.makedirs(dir_path, exist_ok=True) + return dir_path + + +def get_metadata_dir(data_dir): + """Return the metadata directory path, creating it if necessary.""" + return ensure_dir(data_dir, 'metadata') + + +def get_logs_dir(data_dir): + """Return the logs directory path, creating it if necessary.""" + return ensure_dir(data_dir, 'logs') + + +def find_file_in_dir(data_dir, filename_pattern): + """ + Find a file matching the pattern in data_dir or its subdirectories. + Searches in metadata/ first, then falls back to data_dir root for backward compatibility. + """ + # First check in metadata directory + metadata_dir = os.path.join(data_dir, 'metadata') + if os.path.exists(metadata_dir): + matches = glob.glob(os.path.join(metadata_dir, filename_pattern)) + if matches: + return matches[0] + + # Fall back to data_dir root + matches = glob.glob(os.path.join(data_dir, filename_pattern)) + if matches: + return matches[0] + + return None + + class Tee: """Tee output to both stdout/stderr and a log file""" def __init__(self, data_dir, name='output.log'): - self.log_path = os.path.join(data_dir, name) + # Create logs directory if it doesn't exist + logs_dir = ensure_dir(data_dir, 'logs') + self.log_path = os.path.join(logs_dir, name) self.file = open(self.log_path, 'a') self.stdout = sys.stdout self.stderr = sys.stderr @@ -217,7 +256,11 @@ def check_previous(self, script_path): if type(parent_script) != list: parent_script = [parent_script] # Check that parent script is present in log - self.log_path = os.path.join(self.data_dir, 'execution_log.json') + self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json') + # Check old location for backward compatibility + old_log_path = os.path.join(self.data_dir, 'execution_log.json') + if not os.path.exists(self.log_path) and os.path.exists(old_log_path): + self.log_path = old_log_path if os.path.exists(self.log_path): with open(self.log_path, 'r') as log_file_connect: log_dict = json.load(log_file_connect) @@ -240,7 +283,11 @@ def write_to_log(self, script_path, type='attempted'): """ if not hasattr(self, 'git_str'): raise ValueError('Run get_git_info() first') - self.log_path = os.path.join(self.data_dir, 'execution_log.json') + self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json') + # Check old location for backward compatibility + old_log_path = os.path.join(self.data_dir, 'execution_log.json') + if not os.path.exists(self.log_path) and os.path.exists(old_log_path): + self.log_path = old_log_path current_datetime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if os.path.exists(self.log_path): with open(self.log_path, 'r') as log_file_connect: @@ -592,18 +639,30 @@ def get_hdf5_name(self,): print('No HDF5 file found') def get_params_path(self,): - file_list = glob.glob(os.path.join(self.dir_name, '**.params')) + # Check metadata directory first, then fall back to data_dir root + metadata_dir = os.path.join(self.dir_name, 'metadata') + file_list = glob.glob(os.path.join(metadata_dir, '**.params')) if len(file_list) > 0: self.params_file_path = file_list[0] else: - print('No PARAMS file found') + file_list = glob.glob(os.path.join(self.dir_name, '**.params')) + if len(file_list) > 0: + self.params_file_path = file_list[0] + else: + print('No PARAMS file found') def get_layout_path(self,): - file_list = glob.glob(os.path.join(self.dir_name, '**layout.csv')) + # Check metadata directory first, then fall back to data_dir root + metadata_dir = os.path.join(self.dir_name, 'metadata') + file_list = glob.glob(os.path.join(metadata_dir, '**layout.csv')) if len(file_list) > 0: self.layout_file_path = file_list[0] else: - print('No LAYOUT file found') + file_list = glob.glob(os.path.join(self.dir_name, '**layout.csv')) + if len(file_list) > 0: + self.layout_file_path = file_list[0] + else: + print('No LAYOUT file found') def load_params(self,): self.get_params_path() @@ -612,11 +671,17 @@ def load_params(self,): self.params_dict = json.load(params_file_connect) def get_info_path(self,): - file_list = glob.glob(os.path.join(self.dir_name, '**.info')) + # Check metadata directory first, then fall back to data_dir root + metadata_dir = os.path.join(self.dir_name, 'metadata') + file_list = glob.glob(os.path.join(metadata_dir, '**.info')) if len(file_list) > 0: self.info_file_path = file_list[0] else: - print('No INFO file found') + file_list = glob.glob(os.path.join(self.dir_name, '**.info')) + if len(file_list) > 0: + self.info_file_path = file_list[0] + else: + print('No INFO file found') def load_info(self,): self.get_info_path() diff --git a/utils/ephys_data/ephys_data.py b/utils/ephys_data/ephys_data.py index c339bace..c8f90278 100755 --- a/utils/ephys_data/ephys_data.py +++ b/utils/ephys_data/ephys_data.py @@ -217,6 +217,7 @@ import numpy as np import pandas as pd import pingouin as pg +from blech_clust.utils.blech_utils import get_metadata_dir # ______ _ _____ _ # | ____| | | | __ \ | | @@ -1624,8 +1625,17 @@ def get_trial_info_frame(self): - start_taste_ms: Trial start time in ms - end_taste_ms: Trial end time in ms """ - self.trial_info_frame = pd.read_csv( - os.path.join(self.data_dir, 'trial_info_frame.csv')) + # Check new location (metadata dir) first, then fall back to old location + metadata_dir = get_metadata_dir(self.data_dir) + new_path = os.path.join(metadata_dir, 'trial_info_frame.csv') + old_path = os.path.join(self.data_dir, 'trial_info_frame.csv') + + if os.path.exists(new_path): + self.trial_info_frame = pd.read_csv(new_path) + elif os.path.exists(old_path): + self.trial_info_frame = pd.read_csv(old_path) + else: + raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory") def sequester_trial_inds(self): """Organize trial indices by taste and laser condition @@ -2088,7 +2098,12 @@ def profile_units(self, save_to_file=True, alpha=0.05, recalculate=False): print("="*40) print("Unit Profiling") print("="*40) - csv_path = os.path.join(self.data_dir, 'unit_profile.csv') + metadata_dir = get_metadata_dir(self.data_dir) + csv_path = os.path.join(metadata_dir, 'unit_profile.csv') + # Check old location for backward compatibility + old_path = os.path.join(self.data_dir, 'unit_profile.csv') + if not os.path.exists(csv_path) and os.path.exists(old_path): + csv_path = old_path if os.path.exists(csv_path) and not recalculate: print(f"Loading existing unit profile from {csv_path}") print("***To recalculate, set recalculate=True***") diff --git a/utils/qa_utils/elbo_drift.py b/utils/qa_utils/elbo_drift.py index ad086735..31f6a1d1 100644 --- a/utils/qa_utils/elbo_drift.py +++ b/utils/qa_utils/elbo_drift.py @@ -24,6 +24,7 @@ import sys from tqdm import tqdm, trange import argparse +from blech_clust.utils.blech_utils import get_metadata_dir parser = argparse.ArgumentParser() parser.add_argument('dir_name', type=str, help='Directory containing data') parser.add_argument('--force', action='store_true', help='Force re-fitting') @@ -152,7 +153,12 @@ def ridge_plot( # Load Data ############################################################ # Load trial times from trial_info_frame.csv -trial_info_path = os.path.join(dir_name, 'trial_info_frame.csv') +metadata_dir = get_metadata_dir(dir_name) +trial_info_path = os.path.join(metadata_dir, 'trial_info_frame.csv') +# Check old location for backward compatibility +old_path = os.path.join(dir_name, 'trial_info_frame.csv') +if not os.path.exists(trial_info_path) and os.path.exists(old_path): + trial_info_path = old_path trial_info_frame = pd.read_csv(trial_info_path) # Open the hdf5 file diff --git a/utils/ram_monitor.py b/utils/ram_monitor.py index 1ae5640a..d72f46fd 100644 --- a/utils/ram_monitor.py +++ b/utils/ram_monitor.py @@ -14,7 +14,9 @@ def monitor_ram(output_dir): """Monitor RAM usage and write to a log file""" - log_file = os.path.join(output_dir, "ram_usage.log") + logs_dir = os.path.join(output_dir, 'logs') + os.makedirs(logs_dir, exist_ok=True) + log_file = os.path.join(logs_dir, "ram_usage.log") with open(log_file, 'a') as f: f.write("Timestamp,RAM_Used_GB,RAM_Total_GB,RAM_Percent\n") diff --git a/utils/read_file.py b/utils/read_file.py index 28e2fb51..9889522a 100644 --- a/utils/read_file.py +++ b/utils/read_file.py @@ -27,6 +27,7 @@ # https://github.com/Intan-Technologies/load-rhd-notebook-python from blech_clust.utils.importrhdutilities import load_file, read_header +from blech_clust.utils.blech_utils import get_metadata_dir class DigInHandler: @@ -224,16 +225,24 @@ def get_trial_data(self): self.dig_in_frame = dig_in_frame def write_out_frame(self): - # Write out the dig-in frame + # Write out the dig-in frame to metadata directory + metadata_dir = get_metadata_dir(self.data_dir) self.dig_in_frame.to_csv(os.path.join( - self.data_dir, 'dig_in_frame.csv')) - print('Dig-in frame written out to dig_in_frame.csv') + metadata_dir, 'dig_in_frame.csv')) + print('Dig-in frame written out to metadata/dig_in_frame.csv') def load_dig_in_frame(self): - # Load the dig-in frame - self.dig_in_frame = pd.read_csv(os.path.join(self.data_dir, 'dig_in_frame.csv'), - index_col=0) - print('Dig-in frame loaded from dig_in_frame.csv') + # Load the dig-in frame, checking both metadata dir and old location + metadata_dir = os.path.join(self.data_dir, 'metadata') + old_path = os.path.join(self.data_dir, 'dig_in_frame.csv') + new_path = os.path.join(metadata_dir, 'dig_in_frame.csv') + + if os.path.exists(new_path): + self.dig_in_frame = pd.read_csv(new_path, index_col=0) + print('Dig-in frame loaded from metadata/dig_in_frame.csv') + elif os.path.exists(old_path): + self.dig_in_frame = pd.read_csv(old_path, index_col=0) + print('Dig-in frame loaded from dig_in_frame.csv') def read_traditional_intan( diff --git a/utils/umap_plotting/umap_plots.py b/utils/umap_plotting/umap_plots.py index 931eb799..398621f2 100644 --- a/utils/umap_plotting/umap_plots.py +++ b/utils/umap_plotting/umap_plots.py @@ -255,9 +255,12 @@ def run_pipeline(this_row, dir_name, zero_ind): print(f'Creating {bash_file}') job_count = cpu_count()-2 f = open(runner_path, 'w') + # Create logs directory if it doesn't exist + logs_dir = os.path.join(dir_name, 'logs') + os.makedirs(logs_dir, exist_ok=True) print(f"parallel -k -j {job_count} --noswap --load 100% --progress " + "--memfree 4G --retry-failed " + - f"--joblog {dir_name}/umap_results.log " + + f"--joblog {logs_dir}/umap_results.log " + f"python umap_plots.py " + f"::: {' '.join([str(x) for x in range(len(path_frame))])}", file=f) From 650a9701f04216335ea37d1ef84bbc2b6b161a1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 18 Feb 2026 10:48:35 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- blech_exp_info.py | 3 ++- blech_init.py | 6 ++++-- emg/emg_freq_setup.py | 3 ++- tests/test_logs_metadata_dirs.py | 30 +++++++++++++++--------------- utils/blech_data_summary.py | 3 ++- utils/blech_process_utils.py | 3 ++- utils/blech_utils.py | 10 ++++++---- utils/ephys_data/ephys_data.py | 5 +++-- utils/read_file.py | 2 +- 9 files changed, 37 insertions(+), 28 deletions(-) diff --git a/blech_exp_info.py b/blech_exp_info.py index 5f2cd56c..f88f3682 100644 --- a/blech_exp_info.py +++ b/blech_exp_info.py @@ -935,7 +935,8 @@ def confirm_check(x): # Read and process the layout file # Check both metadata directory and old location for backward compatibility if not os.path.exists(layout_file_path): - old_layout_path = os.path.join(dir_path, dir_name + "_electrode_layout.csv") + old_layout_path = os.path.join( + dir_path, dir_name + "_electrode_layout.csv") if os.path.exists(old_layout_path): layout_file_path = old_layout_path layout_frame_filled = pd.read_csv(layout_file_path) diff --git a/blech_init.py b/blech_init.py index f51f4462..5fd3df5b 100644 --- a/blech_init.py +++ b/blech_init.py @@ -204,8 +204,10 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram this_pipeline_check.write_to_log(script_path, 'attempted') # If info_dict present but execution log is not # just create the execution log with blech_exp_info marked - execution_log_path_new = os.path.join(metadata_handler.dir_name, 'logs', 'execution_log.json') - execution_log_path_old = os.path.join(metadata_handler.dir_name, 'execution_log.json') + execution_log_path_new = os.path.join( + metadata_handler.dir_name, 'logs', 'execution_log.json') + execution_log_path_old = os.path.join( + metadata_handler.dir_name, 'execution_log.json') if 'info_dict' in dir(metadata_handler) and not os.path.exists(execution_log_path_new) and not os.path.exists(execution_log_path_old): blech_exp_info_path = os.path.join( blech_clust_dir, 'blech_exp_info.py') diff --git a/emg/emg_freq_setup.py b/emg/emg_freq_setup.py index 24698722..f57a65f9 100644 --- a/emg/emg_freq_setup.py +++ b/emg/emg_freq_setup.py @@ -297,7 +297,8 @@ elif os.path.exists(trial_info_path_old): trial_info_frame = pd.read_csv(trial_info_path_old) else: - raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory") + raise FileNotFoundError( + "trial_info_frame.csv not found in metadata/ or data directory") merge_frame = pd.merge(emg_env_df, trial_info_frame, left_on=['dig_in', 'trial_inds'], diff --git a/tests/test_logs_metadata_dirs.py b/tests/test_logs_metadata_dirs.py index a6b8f0a1..47040d68 100644 --- a/tests/test_logs_metadata_dirs.py +++ b/tests/test_logs_metadata_dirs.py @@ -9,16 +9,16 @@ def test_ensure_dir_function(): # Read the function source with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: source = f.read() - + # Check that ensure_dir is defined assert 'def ensure_dir(data_dir, subdir):' in source - + # Check that get_metadata_dir is defined assert 'def get_metadata_dir(data_dir):' in source - + # Check that get_logs_dir is defined assert 'def get_logs_dir(data_dir):' in source - + print("✓ All required functions are defined in blech_utils.py") @@ -26,7 +26,7 @@ def test_logs_directory_creation_in_tee(): """Test that Tee class uses logs directory""" with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: source = f.read() - + # Check that Tee uses logs directory assert "logs_dir = ensure_dir(data_dir, 'logs')" in source assert "self.log_path = os.path.join(logs_dir, name)" in source @@ -37,7 +37,7 @@ def test_logs_directory_in_blech_process(): """Test that blech_process.py uses logs directory""" with open('/workspace/project/blech_clust/blech_process.py', 'r') as f: source = f.read() - + # Check that it creates logs directory assert "log_dir = os.path.join(metadata_handler.dir_name, 'logs')" in source assert "os.makedirs(log_dir, exist_ok=True)" in source @@ -48,7 +48,7 @@ def test_logs_directory_in_blech_init(): """Test that blech_init.py uses logs directory""" with open('/workspace/project/blech_clust/blech_init.py', 'r') as f: source = f.read() - + # Check that it uses logs directory for results.log assert "logs/results.log" in source print("✓ blech_init.py correctly uses logs directory for results.log") @@ -58,7 +58,7 @@ def test_logs_directory_in_ram_monitor(): """Test that ram_monitor.py uses logs directory""" with open('/workspace/project/blech_clust/utils/ram_monitor.py', 'r') as f: source = f.read() - + # Check that it uses logs directory assert "logs_dir = os.path.join(output_dir, 'logs')" in source assert "ram_usage.log" in source @@ -69,7 +69,7 @@ def test_metadata_directory_in_read_file(): """Test that read_file.py uses metadata directory""" with open('/workspace/project/blech_clust/utils/read_file.py', 'r') as f: source = f.read() - + # Check that it uses get_metadata_dir assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source assert "metadata_dir = get_metadata_dir(self.data_dir)" in source @@ -80,7 +80,7 @@ def test_metadata_directory_in_ephys_data(): """Test that ephys_data.py uses metadata directory""" with open('/workspace/project/blech_clust/utils/ephys_data/ephys_data.py', 'r') as f: source = f.read() - + # Check that it uses get_metadata_dir assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source print("✓ ephys_data.py correctly imports get_metadata_dir") @@ -90,7 +90,7 @@ def test_metadata_directory_in_blech_units_characteristics(): """Test that blech_units_characteristics.py uses metadata directory""" with open('/workspace/project/blech_clust/blech_units_characteristics.py', 'r') as f: source = f.read() - + # Check that it uses get_metadata_dir for aggregated_characteristics.csv assert "get_metadata_dir" in source assert "aggregated_characteristics.csv" in source @@ -101,7 +101,7 @@ def test_metadata_directory_in_blech_post_process(): """Test that blech_post_process.py uses metadata directory""" with open('/workspace/project/blech_clust/blech_post_process.py', 'r') as f: source = f.read() - + # Check that it uses get_metadata_dir for unit_descriptor.csv assert "get_metadata_dir" in source assert "unit_descriptor.csv" in source @@ -112,7 +112,7 @@ def test_metadata_directory_in_blech_make_arrays(): """Test that blech_make_arrays.py uses metadata directory""" with open('/workspace/project/blech_clust/blech_make_arrays.py', 'r') as f: source = f.read() - + # Check that it uses get_metadata_dir for trial_info_frame.csv assert "get_metadata_dir" in source assert "trial_info_frame.csv" in source @@ -124,7 +124,7 @@ def test_backward_compatibility_check(): # Check execution_log.json backward compatibility with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: source = f.read() - + assert "old_log_path = os.path.join(self.data_dir, 'execution_log.json')" in source assert "if not os.path.exists(self.log_path) and os.path.exists(old_log_path):" in source print("✓ Backward compatibility for execution_log.json is maintained") @@ -134,7 +134,7 @@ def test_find_file_in_dir_function(): """Test that find_file_in_dir function exists for backward compatibility""" with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: source = f.read() - + assert "def find_file_in_dir(data_dir, filename_pattern):" in source print("✓ find_file_in_dir function exists for backward compatibility") diff --git a/utils/blech_data_summary.py b/utils/blech_data_summary.py index 098693ee..25a0a21e 100644 --- a/utils/blech_data_summary.py +++ b/utils/blech_data_summary.py @@ -153,7 +153,8 @@ def extract_unit_characteristics(dir_name): # Fallback: read from aggregated_characteristics.csv metadata_dir = get_metadata_dir(dir_name) - characteristics_path = os.path.join(metadata_dir, 'aggregated_characteristics.csv') + characteristics_path = os.path.join( + metadata_dir, 'aggregated_characteristics.csv') # Check old location for backward compatibility old_path = os.path.join(dir_name, 'aggregated_characteristics.csv') if not os.path.exists(characteristics_path) and os.path.exists(old_path): diff --git a/utils/blech_process_utils.py b/utils/blech_process_utils.py index d6926565..5ba4bd0d 100644 --- a/utils/blech_process_utils.py +++ b/utils/blech_process_utils.py @@ -762,7 +762,8 @@ def write_out_recommendations(self): """ waveform_thresh = self.classifier_params['min_suggestion_count'] metadata_dir = get_metadata_dir(self.data_dir) - out_file_path = os.path.join(metadata_dir, 'waveform_classifier_recommendations.csv') + out_file_path = os.path.join( + metadata_dir, 'waveform_classifier_recommendations.csv') count = len(self.pos_spike_dict['waveforms']) if count > waveform_thresh: percentile_5 = np.percentile(self.pos_spike_dict['prob'], 5) diff --git a/utils/blech_utils.py b/utils/blech_utils.py index cb4739bc..b533bd18 100644 --- a/utils/blech_utils.py +++ b/utils/blech_utils.py @@ -76,12 +76,12 @@ def find_file_in_dir(data_dir, filename_pattern): matches = glob.glob(os.path.join(metadata_dir, filename_pattern)) if matches: return matches[0] - + # Fall back to data_dir root matches = glob.glob(os.path.join(data_dir, filename_pattern)) if matches: return matches[0] - + return None @@ -256,7 +256,8 @@ def check_previous(self, script_path): if type(parent_script) != list: parent_script = [parent_script] # Check that parent script is present in log - self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json') + self.log_path = os.path.join(ensure_dir( + self.data_dir, 'logs'), 'execution_log.json') # Check old location for backward compatibility old_log_path = os.path.join(self.data_dir, 'execution_log.json') if not os.path.exists(self.log_path) and os.path.exists(old_log_path): @@ -283,7 +284,8 @@ def write_to_log(self, script_path, type='attempted'): """ if not hasattr(self, 'git_str'): raise ValueError('Run get_git_info() first') - self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json') + self.log_path = os.path.join(ensure_dir( + self.data_dir, 'logs'), 'execution_log.json') # Check old location for backward compatibility old_log_path = os.path.join(self.data_dir, 'execution_log.json') if not os.path.exists(self.log_path) and os.path.exists(old_log_path): diff --git a/utils/ephys_data/ephys_data.py b/utils/ephys_data/ephys_data.py index c8f90278..d1c6904f 100755 --- a/utils/ephys_data/ephys_data.py +++ b/utils/ephys_data/ephys_data.py @@ -1629,13 +1629,14 @@ def get_trial_info_frame(self): metadata_dir = get_metadata_dir(self.data_dir) new_path = os.path.join(metadata_dir, 'trial_info_frame.csv') old_path = os.path.join(self.data_dir, 'trial_info_frame.csv') - + if os.path.exists(new_path): self.trial_info_frame = pd.read_csv(new_path) elif os.path.exists(old_path): self.trial_info_frame = pd.read_csv(old_path) else: - raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory") + raise FileNotFoundError( + "trial_info_frame.csv not found in metadata/ or data directory") def sequester_trial_inds(self): """Organize trial indices by taste and laser condition diff --git a/utils/read_file.py b/utils/read_file.py index 9889522a..8070cce4 100644 --- a/utils/read_file.py +++ b/utils/read_file.py @@ -236,7 +236,7 @@ def load_dig_in_frame(self): metadata_dir = os.path.join(self.data_dir, 'metadata') old_path = os.path.join(self.data_dir, 'dig_in_frame.csv') new_path = os.path.join(metadata_dir, 'dig_in_frame.csv') - + if os.path.exists(new_path): self.dig_in_frame = pd.read_csv(new_path, index_col=0) print('Dig-in frame loaded from metadata/dig_in_frame.csv')