diff --git a/blech_exp_info.py b/blech_exp_info.py index 9b4f6fd3..f88f3682 100644 --- a/blech_exp_info.py +++ b/blech_exp_info.py @@ -868,8 +868,11 @@ def process_electrode_layout(dir_path, dir_name, electrode_files, ports, electro Returns: Tuple containing layout_dict, fin_emg_port, orig_emg_electrodes, emg_muscle_str """ + # Use metadata directory for layout file + metadata_dir = os.path.join(dir_path, 'metadata') + os.makedirs(metadata_dir, exist_ok=True) layout_file_path = os.path.join( - dir_path, dir_name + "_electrode_layout.csv") + metadata_dir, dir_name + "_electrode_layout.csv") def yn_check(x): return x in ['y', 'yes', 'n', 'no', ''] @@ -930,6 +933,12 @@ def confirm_check(x): exit() # Read and process the layout file + # Check both metadata directory and old location for backward compatibility + if not os.path.exists(layout_file_path): + old_layout_path = os.path.join( + dir_path, dir_name + "_electrode_layout.csv") + if os.path.exists(old_layout_path): + layout_file_path = old_layout_path layout_frame_filled = pd.read_csv(layout_file_path) if not args.programmatic: diff --git a/blech_init.py b/blech_init.py index 41a8f54f..5fd3df5b 100644 --- a/blech_init.py +++ b/blech_init.py @@ -170,7 +170,7 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram f.write(f'DIR={dir_name} \n') print(f"parallel -k -j {job_count} --noswap --load 100% --progress " + "--memfree 4G --ungroup --retry-failed " + - f"--joblog $DIR/results.log " + + f"--joblog $DIR/logs/results.log " + "bash $DIR/temp/blech_process_single.sh " + f"::: {' '.join([str(x) for x in bash_electrode_list])}", file=f) @@ -204,7 +204,11 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram this_pipeline_check.write_to_log(script_path, 'attempted') # If info_dict present but execution log is not # just create the execution log with blech_exp_info marked - if 'info_dict' in dir(metadata_handler) and not os.path.exists(metadata_handler.dir_name + '/execution_log.json'): + execution_log_path_new = os.path.join( + metadata_handler.dir_name, 'logs', 'execution_log.json') + execution_log_path_old = os.path.join( + metadata_handler.dir_name, 'execution_log.json') + if 'info_dict' in dir(metadata_handler) and not os.path.exists(execution_log_path_new) and not os.path.exists(execution_log_path_old): blech_exp_info_path = os.path.join( blech_clust_dir, 'blech_exp_info.py') this_pipeline_check.write_to_log(blech_exp_info_path, 'attempted') diff --git a/blech_make_arrays.py b/blech_make_arrays.py index 14fba2d1..9360944c 100644 --- a/blech_make_arrays.py +++ b/blech_make_arrays.py @@ -21,7 +21,7 @@ from tqdm import tqdm from blech_clust.utils.clustering import get_filtered_electrode from blech_clust.utils.blech_process_utils import return_cutoff_values -from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check +from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check, get_metadata_dir from blech_clust.utils.read_file import DigInHandler from ast import literal_eval import matplotlib.pyplot as plt @@ -376,7 +376,8 @@ def create_emg_trials_for_digin( trial_info_frame.to_hdf(metadata_handler.hdf5_name, 'trial_info_frame', mode='a') - csv_path = os.path.join(metadata_handler.dir_name, 'trial_info_frame.csv') + metadata_dir = get_metadata_dir(metadata_handler.dir_name) + csv_path = os.path.join(metadata_dir, 'trial_info_frame.csv') trial_info_frame.to_csv(csv_path, index=False) # Get list of units under the sorted_units group. diff --git a/blech_post_process.py b/blech_post_process.py index 411597c4..562ac919 100644 --- a/blech_post_process.py +++ b/blech_post_process.py @@ -87,7 +87,7 @@ os.environ['OPENBLAS_NUM_THREADS'] = '1' # noqa import blech_clust.utils.blech_post_process_utils as post_utils # noqa -from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check # noqa +from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check, get_metadata_dir # noqa from utils import blech_waveforms_datashader # noqa from multiprocessing import Pool, cpu_count # noqa from functools import partial # noqa @@ -588,8 +588,9 @@ print('==== Unit Table ====\n') print(current_unit_table) # Also write to disk +metadata_dir = get_metadata_dir(metadata_handler.dir_name) current_unit_table.to_csv( - os.path.join(metadata_handler.dir_name, 'unit_descriptor.csv'), + os.path.join(metadata_dir, 'unit_descriptor.csv'), index=False, ) diff --git a/blech_process.py b/blech_process.py index 8c80e897..294c3aa3 100644 --- a/blech_process.py +++ b/blech_process.py @@ -98,7 +98,9 @@ print(f'Processing electrode {electrode_num}') # Initialize or load processing log -log_path = pathlib.Path(metadata_handler.dir_name) / 'blech_process.log' +log_dir = os.path.join(metadata_handler.dir_name, 'logs') +os.makedirs(log_dir, exist_ok=True) +log_path = pathlib.Path(log_dir) / 'blech_process.log' if log_path.exists(): with open(log_path) as f: process_log = json.load(f) diff --git a/blech_units_characteristics.py b/blech_units_characteristics.py index 28ab153d..8179c114 100644 --- a/blech_units_characteristics.py +++ b/blech_units_characteristics.py @@ -26,7 +26,7 @@ import json import glob import itertools -from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check +from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check, get_metadata_dir from blech_clust.utils.ephys_data import ephys_data from blech_clust.utils.ephys_data import visualize as vz import pandas as pd @@ -765,7 +765,8 @@ def palatability_corr(x): [this_resp_neurons, taste_bin_pval_frame, this_pal_sig_frame]) out_frame.drop(columns=['p-unc'], inplace=True) -out_frame.to_csv(os.path.join(dir_name, 'aggregated_characteristics.csv'), +metadata_dir = get_metadata_dir(dir_name) +out_frame.to_csv(os.path.join(metadata_dir, 'aggregated_characteristics.csv'), index=False) out_frame.to_hdf( metadata_handler.hdf5_name, diff --git a/emg/emg_freq_setup.py b/emg/emg_freq_setup.py index ff8cafb2..f57a65f9 100644 --- a/emg/emg_freq_setup.py +++ b/emg/emg_freq_setup.py @@ -240,6 +240,12 @@ # Also delete log print('Deleting results.log') +logs_dir = os.path.join(data_dir, 'logs') +os.makedirs(logs_dir, exist_ok=True) +results_log_path = os.path.join(logs_dir, 'results.log') +if os.path.exists(results_log_path): + os.remove(results_log_path) +# Also check old location for backward compatibility if os.path.exists('results.log'): os.remove('results.log') @@ -255,7 +261,7 @@ data_dir, len(emg_env_df)-1) print( - "parallel -k -j {:d} --noswap --load 100% --progress --ungroup --joblog {:s}/results.log bash blech_emg_jetstream_parallel1.sh ::: {{0..{:d}}}".format( + "parallel -k -j {:d} --noswap --load 100% --progress --ungroup --joblog {:s}/logs/results.log bash blech_emg_jetstream_parallel1.sh ::: {{0..{:d}}}".format( *format_args), file=f) f.close() @@ -281,8 +287,18 @@ ############################################################ # Merge the emg_env_df with trial_info_df ############################################################ -# Also get trial_info_frame -trial_info_frame = pd.read_csv(os.path.join(data_dir, 'trial_info_frame.csv')) +# Also get trial_info_frame (check both metadata dir and old location) +metadata_dir = os.path.join(data_dir, 'metadata') +trial_info_path_new = os.path.join(metadata_dir, 'trial_info_frame.csv') +trial_info_path_old = os.path.join(data_dir, 'trial_info_frame.csv') + +if os.path.exists(trial_info_path_new): + trial_info_frame = pd.read_csv(trial_info_path_new) +elif os.path.exists(trial_info_path_old): + trial_info_frame = pd.read_csv(trial_info_path_old) +else: + raise FileNotFoundError( + "trial_info_frame.csv not found in metadata/ or data directory") merge_frame = pd.merge(emg_env_df, trial_info_frame, left_on=['dig_in', 'trial_inds'], diff --git a/tests/test_logs_metadata_dirs.py b/tests/test_logs_metadata_dirs.py new file mode 100644 index 00000000..47040d68 --- /dev/null +++ b/tests/test_logs_metadata_dirs.py @@ -0,0 +1,155 @@ +""" +Tests for verifying that logs and metadata are placed in the correct directories. +These tests verify the source code changes directly without importing. +""" + + +def test_ensure_dir_function(): + """Test the ensure_dir function logic directly""" + # Read the function source + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + # Check that ensure_dir is defined + assert 'def ensure_dir(data_dir, subdir):' in source + + # Check that get_metadata_dir is defined + assert 'def get_metadata_dir(data_dir):' in source + + # Check that get_logs_dir is defined + assert 'def get_logs_dir(data_dir):' in source + + print("✓ All required functions are defined in blech_utils.py") + + +def test_logs_directory_creation_in_tee(): + """Test that Tee class uses logs directory""" + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + # Check that Tee uses logs directory + assert "logs_dir = ensure_dir(data_dir, 'logs')" in source + assert "self.log_path = os.path.join(logs_dir, name)" in source + print("✓ Tee class correctly uses logs directory") + + +def test_logs_directory_in_blech_process(): + """Test that blech_process.py uses logs directory""" + with open('/workspace/project/blech_clust/blech_process.py', 'r') as f: + source = f.read() + + # Check that it creates logs directory + assert "log_dir = os.path.join(metadata_handler.dir_name, 'logs')" in source + assert "os.makedirs(log_dir, exist_ok=True)" in source + print("✓ blech_process.py correctly uses logs directory") + + +def test_logs_directory_in_blech_init(): + """Test that blech_init.py uses logs directory""" + with open('/workspace/project/blech_clust/blech_init.py', 'r') as f: + source = f.read() + + # Check that it uses logs directory for results.log + assert "logs/results.log" in source + print("✓ blech_init.py correctly uses logs directory for results.log") + + +def test_logs_directory_in_ram_monitor(): + """Test that ram_monitor.py uses logs directory""" + with open('/workspace/project/blech_clust/utils/ram_monitor.py', 'r') as f: + source = f.read() + + # Check that it uses logs directory + assert "logs_dir = os.path.join(output_dir, 'logs')" in source + assert "ram_usage.log" in source + print("✓ ram_monitor.py correctly uses logs directory") + + +def test_metadata_directory_in_read_file(): + """Test that read_file.py uses metadata directory""" + with open('/workspace/project/blech_clust/utils/read_file.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir + assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source + assert "metadata_dir = get_metadata_dir(self.data_dir)" in source + print("✓ read_file.py correctly uses metadata directory") + + +def test_metadata_directory_in_ephys_data(): + """Test that ephys_data.py uses metadata directory""" + with open('/workspace/project/blech_clust/utils/ephys_data/ephys_data.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir + assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source + print("✓ ephys_data.py correctly imports get_metadata_dir") + + +def test_metadata_directory_in_blech_units_characteristics(): + """Test that blech_units_characteristics.py uses metadata directory""" + with open('/workspace/project/blech_clust/blech_units_characteristics.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir for aggregated_characteristics.csv + assert "get_metadata_dir" in source + assert "aggregated_characteristics.csv" in source + print("✓ blech_units_characteristics.py correctly uses metadata directory") + + +def test_metadata_directory_in_blech_post_process(): + """Test that blech_post_process.py uses metadata directory""" + with open('/workspace/project/blech_clust/blech_post_process.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir for unit_descriptor.csv + assert "get_metadata_dir" in source + assert "unit_descriptor.csv" in source + print("✓ blech_post_process.py correctly uses metadata directory") + + +def test_metadata_directory_in_blech_make_arrays(): + """Test that blech_make_arrays.py uses metadata directory""" + with open('/workspace/project/blech_clust/blech_make_arrays.py', 'r') as f: + source = f.read() + + # Check that it uses get_metadata_dir for trial_info_frame.csv + assert "get_metadata_dir" in source + assert "trial_info_frame.csv" in source + print("✓ blech_make_arrays.py correctly uses metadata directory") + + +def test_backward_compatibility_check(): + """Test that backward compatibility is maintained""" + # Check execution_log.json backward compatibility + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + assert "old_log_path = os.path.join(self.data_dir, 'execution_log.json')" in source + assert "if not os.path.exists(self.log_path) and os.path.exists(old_log_path):" in source + print("✓ Backward compatibility for execution_log.json is maintained") + + +def test_find_file_in_dir_function(): + """Test that find_file_in_dir function exists for backward compatibility""" + with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f: + source = f.read() + + assert "def find_file_in_dir(data_dir, filename_pattern):" in source + print("✓ find_file_in_dir function exists for backward compatibility") + + +if __name__ == '__main__': + test_ensure_dir_function() + test_logs_directory_creation_in_tee() + test_logs_directory_in_blech_process() + test_logs_directory_in_blech_init() + test_logs_directory_in_ram_monitor() + test_metadata_directory_in_read_file() + test_metadata_directory_in_ephys_data() + test_metadata_directory_in_blech_units_characteristics() + test_metadata_directory_in_blech_post_process() + test_metadata_directory_in_blech_make_arrays() + test_backward_compatibility_check() + test_find_file_in_dir_function() + print("\nAll tests passed!") diff --git a/utils/blech_data_summary.py b/utils/blech_data_summary.py index ae4f4dd4..25a0a21e 100644 --- a/utils/blech_data_summary.py +++ b/utils/blech_data_summary.py @@ -59,7 +59,7 @@ blech_clust_dir = os.path.dirname(os.path.dirname(script_path)) sys.path.append(blech_clust_dir) from blech_clust.utils.qa_utils.channel_corr import get_all_channels, intra_corr # noqa -from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check # noqa +from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check, get_metadata_dir # noqa def extract_unit_characteristics(dir_name): @@ -152,8 +152,13 @@ def extract_unit_characteristics(dir_name): print("Falling back to aggregated_characteristics.csv...") # Fallback: read from aggregated_characteristics.csv + metadata_dir = get_metadata_dir(dir_name) characteristics_path = os.path.join( - dir_name, 'aggregated_characteristics.csv') + metadata_dir, 'aggregated_characteristics.csv') + # Check old location for backward compatibility + old_path = os.path.join(dir_name, 'aggregated_characteristics.csv') + if not os.path.exists(characteristics_path) and os.path.exists(old_path): + characteristics_path = old_path if not os.path.exists(characteristics_path): print( @@ -385,7 +390,12 @@ def extract_laser_conditions(dir_name): Returns: Dictionary with laser condition information """ - trial_info_path = os.path.join(dir_name, 'trial_info_frame.csv') + metadata_dir = get_metadata_dir(dir_name) + trial_info_path = os.path.join(metadata_dir, 'trial_info_frame.csv') + # Check old location for backward compatibility + old_path = os.path.join(dir_name, 'trial_info_frame.csv') + if not os.path.exists(trial_info_path) and os.path.exists(old_path): + trial_info_path = old_path if not os.path.exists(trial_info_path): print(f"Warning: {trial_info_path} not found.") diff --git a/utils/blech_process_utils.py b/utils/blech_process_utils.py index c89fcfb3..5ba4bd0d 100644 --- a/utils/blech_process_utils.py +++ b/utils/blech_process_utils.py @@ -52,7 +52,7 @@ # Set environment variables to limit the number of threads used by various libraries # Do it at the start of the script to ensure it applies to all imported libraries -from blech_clust.utils.blech_utils import ifisdir_rmdir +from blech_clust.utils.blech_utils import ifisdir_rmdir, get_metadata_dir import os # noqa os.environ['OMP_NUM_THREADS'] = '1' # noqa os.environ['MKL_NUM_THREADS'] = '1' # noqa @@ -761,7 +761,9 @@ def write_out_recommendations(self): Write out electrode number, count, mean prediction probability, and 5,95th percentiles """ waveform_thresh = self.classifier_params['min_suggestion_count'] - out_file_path = self.data_dir + '/waveform_classifier_recommendations.csv' + metadata_dir = get_metadata_dir(self.data_dir) + out_file_path = os.path.join( + metadata_dir, 'waveform_classifier_recommendations.csv') count = len(self.pos_spike_dict['waveforms']) if count > waveform_thresh: percentile_5 = np.percentile(self.pos_spike_dict['prob'], 5) diff --git a/utils/blech_utils.py b/utils/blech_utils.py index f0a71c93..b533bd18 100644 --- a/utils/blech_utils.py +++ b/utils/blech_utils.py @@ -48,11 +48,50 @@ import shutil +def ensure_dir(data_dir, subdir): + """Ensure a subdirectory exists in data_dir and return its path.""" + dir_path = os.path.join(data_dir, subdir) + os.makedirs(dir_path, exist_ok=True) + return dir_path + + +def get_metadata_dir(data_dir): + """Return the metadata directory path, creating it if necessary.""" + return ensure_dir(data_dir, 'metadata') + + +def get_logs_dir(data_dir): + """Return the logs directory path, creating it if necessary.""" + return ensure_dir(data_dir, 'logs') + + +def find_file_in_dir(data_dir, filename_pattern): + """ + Find a file matching the pattern in data_dir or its subdirectories. + Searches in metadata/ first, then falls back to data_dir root for backward compatibility. + """ + # First check in metadata directory + metadata_dir = os.path.join(data_dir, 'metadata') + if os.path.exists(metadata_dir): + matches = glob.glob(os.path.join(metadata_dir, filename_pattern)) + if matches: + return matches[0] + + # Fall back to data_dir root + matches = glob.glob(os.path.join(data_dir, filename_pattern)) + if matches: + return matches[0] + + return None + + class Tee: """Tee output to both stdout/stderr and a log file""" def __init__(self, data_dir, name='output.log'): - self.log_path = os.path.join(data_dir, name) + # Create logs directory if it doesn't exist + logs_dir = ensure_dir(data_dir, 'logs') + self.log_path = os.path.join(logs_dir, name) self.file = open(self.log_path, 'a') self.stdout = sys.stdout self.stderr = sys.stderr @@ -217,7 +256,12 @@ def check_previous(self, script_path): if type(parent_script) != list: parent_script = [parent_script] # Check that parent script is present in log - self.log_path = os.path.join(self.data_dir, 'execution_log.json') + self.log_path = os.path.join(ensure_dir( + self.data_dir, 'logs'), 'execution_log.json') + # Check old location for backward compatibility + old_log_path = os.path.join(self.data_dir, 'execution_log.json') + if not os.path.exists(self.log_path) and os.path.exists(old_log_path): + self.log_path = old_log_path if os.path.exists(self.log_path): with open(self.log_path, 'r') as log_file_connect: log_dict = json.load(log_file_connect) @@ -240,7 +284,12 @@ def write_to_log(self, script_path, type='attempted'): """ if not hasattr(self, 'git_str'): raise ValueError('Run get_git_info() first') - self.log_path = os.path.join(self.data_dir, 'execution_log.json') + self.log_path = os.path.join(ensure_dir( + self.data_dir, 'logs'), 'execution_log.json') + # Check old location for backward compatibility + old_log_path = os.path.join(self.data_dir, 'execution_log.json') + if not os.path.exists(self.log_path) and os.path.exists(old_log_path): + self.log_path = old_log_path current_datetime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if os.path.exists(self.log_path): with open(self.log_path, 'r') as log_file_connect: @@ -592,18 +641,30 @@ def get_hdf5_name(self,): print('No HDF5 file found') def get_params_path(self,): - file_list = glob.glob(os.path.join(self.dir_name, '**.params')) + # Check metadata directory first, then fall back to data_dir root + metadata_dir = os.path.join(self.dir_name, 'metadata') + file_list = glob.glob(os.path.join(metadata_dir, '**.params')) if len(file_list) > 0: self.params_file_path = file_list[0] else: - print('No PARAMS file found') + file_list = glob.glob(os.path.join(self.dir_name, '**.params')) + if len(file_list) > 0: + self.params_file_path = file_list[0] + else: + print('No PARAMS file found') def get_layout_path(self,): - file_list = glob.glob(os.path.join(self.dir_name, '**layout.csv')) + # Check metadata directory first, then fall back to data_dir root + metadata_dir = os.path.join(self.dir_name, 'metadata') + file_list = glob.glob(os.path.join(metadata_dir, '**layout.csv')) if len(file_list) > 0: self.layout_file_path = file_list[0] else: - print('No LAYOUT file found') + file_list = glob.glob(os.path.join(self.dir_name, '**layout.csv')) + if len(file_list) > 0: + self.layout_file_path = file_list[0] + else: + print('No LAYOUT file found') def load_params(self,): self.get_params_path() @@ -612,11 +673,17 @@ def load_params(self,): self.params_dict = json.load(params_file_connect) def get_info_path(self,): - file_list = glob.glob(os.path.join(self.dir_name, '**.info')) + # Check metadata directory first, then fall back to data_dir root + metadata_dir = os.path.join(self.dir_name, 'metadata') + file_list = glob.glob(os.path.join(metadata_dir, '**.info')) if len(file_list) > 0: self.info_file_path = file_list[0] else: - print('No INFO file found') + file_list = glob.glob(os.path.join(self.dir_name, '**.info')) + if len(file_list) > 0: + self.info_file_path = file_list[0] + else: + print('No INFO file found') def load_info(self,): self.get_info_path() diff --git a/utils/ephys_data/ephys_data.py b/utils/ephys_data/ephys_data.py index c339bace..d1c6904f 100755 --- a/utils/ephys_data/ephys_data.py +++ b/utils/ephys_data/ephys_data.py @@ -217,6 +217,7 @@ import numpy as np import pandas as pd import pingouin as pg +from blech_clust.utils.blech_utils import get_metadata_dir # ______ _ _____ _ # | ____| | | | __ \ | | @@ -1624,8 +1625,18 @@ def get_trial_info_frame(self): - start_taste_ms: Trial start time in ms - end_taste_ms: Trial end time in ms """ - self.trial_info_frame = pd.read_csv( - os.path.join(self.data_dir, 'trial_info_frame.csv')) + # Check new location (metadata dir) first, then fall back to old location + metadata_dir = get_metadata_dir(self.data_dir) + new_path = os.path.join(metadata_dir, 'trial_info_frame.csv') + old_path = os.path.join(self.data_dir, 'trial_info_frame.csv') + + if os.path.exists(new_path): + self.trial_info_frame = pd.read_csv(new_path) + elif os.path.exists(old_path): + self.trial_info_frame = pd.read_csv(old_path) + else: + raise FileNotFoundError( + "trial_info_frame.csv not found in metadata/ or data directory") def sequester_trial_inds(self): """Organize trial indices by taste and laser condition @@ -2088,7 +2099,12 @@ def profile_units(self, save_to_file=True, alpha=0.05, recalculate=False): print("="*40) print("Unit Profiling") print("="*40) - csv_path = os.path.join(self.data_dir, 'unit_profile.csv') + metadata_dir = get_metadata_dir(self.data_dir) + csv_path = os.path.join(metadata_dir, 'unit_profile.csv') + # Check old location for backward compatibility + old_path = os.path.join(self.data_dir, 'unit_profile.csv') + if not os.path.exists(csv_path) and os.path.exists(old_path): + csv_path = old_path if os.path.exists(csv_path) and not recalculate: print(f"Loading existing unit profile from {csv_path}") print("***To recalculate, set recalculate=True***") diff --git a/utils/qa_utils/elbo_drift.py b/utils/qa_utils/elbo_drift.py index ad086735..31f6a1d1 100644 --- a/utils/qa_utils/elbo_drift.py +++ b/utils/qa_utils/elbo_drift.py @@ -24,6 +24,7 @@ import sys from tqdm import tqdm, trange import argparse +from blech_clust.utils.blech_utils import get_metadata_dir parser = argparse.ArgumentParser() parser.add_argument('dir_name', type=str, help='Directory containing data') parser.add_argument('--force', action='store_true', help='Force re-fitting') @@ -152,7 +153,12 @@ def ridge_plot( # Load Data ############################################################ # Load trial times from trial_info_frame.csv -trial_info_path = os.path.join(dir_name, 'trial_info_frame.csv') +metadata_dir = get_metadata_dir(dir_name) +trial_info_path = os.path.join(metadata_dir, 'trial_info_frame.csv') +# Check old location for backward compatibility +old_path = os.path.join(dir_name, 'trial_info_frame.csv') +if not os.path.exists(trial_info_path) and os.path.exists(old_path): + trial_info_path = old_path trial_info_frame = pd.read_csv(trial_info_path) # Open the hdf5 file diff --git a/utils/ram_monitor.py b/utils/ram_monitor.py index 1ae5640a..d72f46fd 100644 --- a/utils/ram_monitor.py +++ b/utils/ram_monitor.py @@ -14,7 +14,9 @@ def monitor_ram(output_dir): """Monitor RAM usage and write to a log file""" - log_file = os.path.join(output_dir, "ram_usage.log") + logs_dir = os.path.join(output_dir, 'logs') + os.makedirs(logs_dir, exist_ok=True) + log_file = os.path.join(logs_dir, "ram_usage.log") with open(log_file, 'a') as f: f.write("Timestamp,RAM_Used_GB,RAM_Total_GB,RAM_Percent\n") diff --git a/utils/read_file.py b/utils/read_file.py index 28e2fb51..8070cce4 100644 --- a/utils/read_file.py +++ b/utils/read_file.py @@ -27,6 +27,7 @@ # https://github.com/Intan-Technologies/load-rhd-notebook-python from blech_clust.utils.importrhdutilities import load_file, read_header +from blech_clust.utils.blech_utils import get_metadata_dir class DigInHandler: @@ -224,16 +225,24 @@ def get_trial_data(self): self.dig_in_frame = dig_in_frame def write_out_frame(self): - # Write out the dig-in frame + # Write out the dig-in frame to metadata directory + metadata_dir = get_metadata_dir(self.data_dir) self.dig_in_frame.to_csv(os.path.join( - self.data_dir, 'dig_in_frame.csv')) - print('Dig-in frame written out to dig_in_frame.csv') + metadata_dir, 'dig_in_frame.csv')) + print('Dig-in frame written out to metadata/dig_in_frame.csv') def load_dig_in_frame(self): - # Load the dig-in frame - self.dig_in_frame = pd.read_csv(os.path.join(self.data_dir, 'dig_in_frame.csv'), - index_col=0) - print('Dig-in frame loaded from dig_in_frame.csv') + # Load the dig-in frame, checking both metadata dir and old location + metadata_dir = os.path.join(self.data_dir, 'metadata') + old_path = os.path.join(self.data_dir, 'dig_in_frame.csv') + new_path = os.path.join(metadata_dir, 'dig_in_frame.csv') + + if os.path.exists(new_path): + self.dig_in_frame = pd.read_csv(new_path, index_col=0) + print('Dig-in frame loaded from metadata/dig_in_frame.csv') + elif os.path.exists(old_path): + self.dig_in_frame = pd.read_csv(old_path, index_col=0) + print('Dig-in frame loaded from dig_in_frame.csv') def read_traditional_intan( diff --git a/utils/umap_plotting/umap_plots.py b/utils/umap_plotting/umap_plots.py index 931eb799..398621f2 100644 --- a/utils/umap_plotting/umap_plots.py +++ b/utils/umap_plotting/umap_plots.py @@ -255,9 +255,12 @@ def run_pipeline(this_row, dir_name, zero_ind): print(f'Creating {bash_file}') job_count = cpu_count()-2 f = open(runner_path, 'w') + # Create logs directory if it doesn't exist + logs_dir = os.path.join(dir_name, 'logs') + os.makedirs(logs_dir, exist_ok=True) print(f"parallel -k -j {job_count} --noswap --load 100% --progress " + "--memfree 4G --retry-failed " + - f"--joblog {dir_name}/umap_results.log " + + f"--joblog {logs_dir}/umap_results.log " + f"python umap_plots.py " + f"::: {' '.join([str(x) for x in range(len(path_frame))])}", file=f)