From 28b13003a4897dc93d5700a35c6cdac844616b6d Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Wed, 18 Feb 2026 10:46:41 +0000
Subject: [PATCH 1/2] Add logs and metadata directories

- Created logs/ directory for all log files:
  - output.log (Tee class)
  - results.log (blech_init.py)
  - ram_usage.log (ram_monitor.py)
  - blech_process.log (blech_process.py)
  - umap_results.log (umap_plots.py)
  - execution_log.json (pipeline_graph_check)

- Created metadata/ directory for all metadata files:
  - electrode_layout_frame.csv
  - trial_info_frame.csv
  - dig_in_frame.csv
  - *.info file
  - *.params file
  - aggregated_characteristics.csv
  - unit_descriptor.csv
  - unit_profile.csv
  - waveform_classifier_recommendations.csv

- Added backward compatibility to check old locations if files
  don't exist in new directories

- Added helper functions:
  - ensure_dir(data_dir, subdir)
  - get_metadata_dir(data_dir)
  - get_logs_dir(data_dir)
  - find_file_in_dir(data_dir, filename_pattern)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 blech_exp_info.py                 |  10 +-
 blech_init.py                     |   6 +-
 blech_make_arrays.py              |   5 +-
 blech_post_process.py             |   5 +-
 blech_process.py                  |   4 +-
 blech_units_characteristics.py    |   5 +-
 emg/emg_freq_setup.py             |  21 +++-
 tests/test_logs_metadata_dirs.py  | 155 ++++++++++++++++++++++++++++++
 utils/blech_data_summary.py       |  17 +++-
 utils/blech_process_utils.py      |   5 +-
 utils/blech_utils.py              |  83 ++++++++++++++--
 utils/ephys_data/ephys_data.py    |  21 +++-
 utils/qa_utils/elbo_drift.py      |   8 +-
 utils/ram_monitor.py              |   4 +-
 utils/read_file.py                |  23 +++--
 utils/umap_plotting/umap_plots.py |   5 +-
 16 files changed, 336 insertions(+), 41 deletions(-)
 create mode 100644 tests/test_logs_metadata_dirs.py

diff --git a/blech_exp_info.py b/blech_exp_info.py
index 9b4f6fd3..5f2cd56c 100644
--- a/blech_exp_info.py
+++ b/blech_exp_info.py
@@ -868,8 +868,11 @@ def process_electrode_layout(dir_path, dir_name, electrode_files, ports, electro
     Returns:
         Tuple containing layout_dict, fin_emg_port, orig_emg_electrodes, emg_muscle_str
     """
+    # Use metadata directory for layout file
+    metadata_dir = os.path.join(dir_path, 'metadata')
+    os.makedirs(metadata_dir, exist_ok=True)
     layout_file_path = os.path.join(
-        dir_path, dir_name + "_electrode_layout.csv")
+        metadata_dir, dir_name + "_electrode_layout.csv")
 
     def yn_check(x):
         return x in ['y', 'yes', 'n', 'no', '']
@@ -930,6 +933,11 @@ def confirm_check(x):
                 exit()
 
     # Read and process the layout file
+    # Check both metadata directory and old location for backward compatibility
+    if not os.path.exists(layout_file_path):
+        old_layout_path = os.path.join(dir_path, dir_name + "_electrode_layout.csv")
+        if os.path.exists(old_layout_path):
+            layout_file_path = old_layout_path
     layout_frame_filled = pd.read_csv(layout_file_path)
 
     if not args.programmatic:
diff --git a/blech_init.py b/blech_init.py
index 41a8f54f..f51f4462 100644
--- a/blech_init.py
+++ b/blech_init.py
@@ -170,7 +170,7 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram
         f.write(f'DIR={dir_name} \n')
         print(f"parallel -k -j {job_count} --noswap --load 100% --progress " +
               "--memfree 4G --ungroup --retry-failed " +
-              f"--joblog $DIR/results.log " +
+              f"--joblog $DIR/logs/results.log " +
               "bash $DIR/temp/blech_process_single.sh " +
               f"::: {' '.join([str(x) for x in bash_electrode_list])}",
               file=f)
@@ -204,7 +204,9 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram
     this_pipeline_check.write_to_log(script_path, 'attempted')
     # If info_dict present but execution log is not
     # just create the execution log with blech_exp_info marked
-    if 'info_dict' in dir(metadata_handler) and not os.path.exists(metadata_handler.dir_name + '/execution_log.json'):
+    execution_log_path_new = os.path.join(metadata_handler.dir_name, 'logs', 'execution_log.json')
+    execution_log_path_old = os.path.join(metadata_handler.dir_name, 'execution_log.json')
+    if 'info_dict' in dir(metadata_handler) and not os.path.exists(execution_log_path_new) and not os.path.exists(execution_log_path_old):
         blech_exp_info_path = os.path.join(
             blech_clust_dir, 'blech_exp_info.py')
         this_pipeline_check.write_to_log(blech_exp_info_path, 'attempted')
diff --git a/blech_make_arrays.py b/blech_make_arrays.py
index 14fba2d1..9360944c 100644
--- a/blech_make_arrays.py
+++ b/blech_make_arrays.py
@@ -21,7 +21,7 @@
 from tqdm import tqdm
 from blech_clust.utils.clustering import get_filtered_electrode
 from blech_clust.utils.blech_process_utils import return_cutoff_values
-from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check
+from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check, get_metadata_dir
 from blech_clust.utils.read_file import DigInHandler
 from ast import literal_eval
 import matplotlib.pyplot as plt
@@ -376,7 +376,8 @@ def create_emg_trials_for_digin(
 
     trial_info_frame.to_hdf(metadata_handler.hdf5_name,
                             'trial_info_frame', mode='a')
-    csv_path = os.path.join(metadata_handler.dir_name, 'trial_info_frame.csv')
+    metadata_dir = get_metadata_dir(metadata_handler.dir_name)
+    csv_path = os.path.join(metadata_dir, 'trial_info_frame.csv')
     trial_info_frame.to_csv(csv_path, index=False)
 
     # Get list of units under the sorted_units group.
diff --git a/blech_post_process.py b/blech_post_process.py
index 411597c4..562ac919 100644
--- a/blech_post_process.py
+++ b/blech_post_process.py
@@ -87,7 +87,7 @@
 os.environ['OPENBLAS_NUM_THREADS'] = '1'  # noqa
 
 import blech_clust.utils.blech_post_process_utils as post_utils  # noqa
-from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check  # noqa
+from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check, get_metadata_dir  # noqa
 from utils import blech_waveforms_datashader  # noqa
 from multiprocessing import Pool, cpu_count  # noqa
 from functools import partial  # noqa
@@ -588,8 +588,9 @@
 print('==== Unit Table ====\n')
 print(current_unit_table)
 # Also write to disk
+metadata_dir = get_metadata_dir(metadata_handler.dir_name)
 current_unit_table.to_csv(
-    os.path.join(metadata_handler.dir_name, 'unit_descriptor.csv'),
+    os.path.join(metadata_dir, 'unit_descriptor.csv'),
     index=False,
 )
 
diff --git a/blech_process.py b/blech_process.py
index 8c80e897..294c3aa3 100644
--- a/blech_process.py
+++ b/blech_process.py
@@ -98,7 +98,9 @@
 print(f'Processing electrode {electrode_num}')
 
 # Initialize or load processing log
-log_path = pathlib.Path(metadata_handler.dir_name) / 'blech_process.log'
+log_dir = os.path.join(metadata_handler.dir_name, 'logs')
+os.makedirs(log_dir, exist_ok=True)
+log_path = pathlib.Path(log_dir) / 'blech_process.log'
 if log_path.exists():
     with open(log_path) as f:
         process_log = json.load(f)
diff --git a/blech_units_characteristics.py b/blech_units_characteristics.py
index 28ab153d..8179c114 100644
--- a/blech_units_characteristics.py
+++ b/blech_units_characteristics.py
@@ -26,7 +26,7 @@
 import json
 import glob
 import itertools
-from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check
+from blech_clust.utils.blech_utils import entry_checker, imp_metadata, pipeline_graph_check, get_metadata_dir
 from blech_clust.utils.ephys_data import ephys_data
 from blech_clust.utils.ephys_data import visualize as vz
 import pandas as pd
@@ -765,7 +765,8 @@ def palatability_corr(x):
     [this_resp_neurons, taste_bin_pval_frame, this_pal_sig_frame])
 out_frame.drop(columns=['p-unc'], inplace=True)
 
-out_frame.to_csv(os.path.join(dir_name, 'aggregated_characteristics.csv'),
+metadata_dir = get_metadata_dir(dir_name)
+out_frame.to_csv(os.path.join(metadata_dir, 'aggregated_characteristics.csv'),
                  index=False)
 out_frame.to_hdf(
     metadata_handler.hdf5_name,
diff --git a/emg/emg_freq_setup.py b/emg/emg_freq_setup.py
index ff8cafb2..24698722 100644
--- a/emg/emg_freq_setup.py
+++ b/emg/emg_freq_setup.py
@@ -240,6 +240,12 @@
 
 # Also delete log
 print('Deleting results.log')
+logs_dir = os.path.join(data_dir, 'logs')
+os.makedirs(logs_dir, exist_ok=True)
+results_log_path = os.path.join(logs_dir, 'results.log')
+if os.path.exists(results_log_path):
+    os.remove(results_log_path)
+# Also check old location for backward compatibility
 if os.path.exists('results.log'):
     os.remove('results.log')
 
@@ -255,7 +261,7 @@
     data_dir,
     len(emg_env_df)-1)
 print(
-    "parallel -k -j {:d} --noswap --load 100% --progress --ungroup --joblog {:s}/results.log bash blech_emg_jetstream_parallel1.sh ::: {{0..{:d}}}".format(
+    "parallel -k -j {:d} --noswap --load 100% --progress --ungroup --joblog {:s}/logs/results.log bash blech_emg_jetstream_parallel1.sh ::: {{0..{:d}}}".format(
         *format_args),
     file=f)
 f.close()
@@ -281,8 +287,17 @@
 ############################################################
 # Merge the emg_env_df with trial_info_df
 ############################################################
-# Also get trial_info_frame
-trial_info_frame = pd.read_csv(os.path.join(data_dir, 'trial_info_frame.csv'))
+# Also get trial_info_frame (check both metadata dir and old location)
+metadata_dir = os.path.join(data_dir, 'metadata')
+trial_info_path_new = os.path.join(metadata_dir, 'trial_info_frame.csv')
+trial_info_path_old = os.path.join(data_dir, 'trial_info_frame.csv')
+
+if os.path.exists(trial_info_path_new):
+    trial_info_frame = pd.read_csv(trial_info_path_new)
+elif os.path.exists(trial_info_path_old):
+    trial_info_frame = pd.read_csv(trial_info_path_old)
+else:
+    raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory")
 
 merge_frame = pd.merge(emg_env_df, trial_info_frame,
                        left_on=['dig_in', 'trial_inds'],
diff --git a/tests/test_logs_metadata_dirs.py b/tests/test_logs_metadata_dirs.py
new file mode 100644
index 00000000..a6b8f0a1
--- /dev/null
+++ b/tests/test_logs_metadata_dirs.py
@@ -0,0 +1,155 @@
+"""
+Tests for verifying that logs and metadata are placed in the correct directories.
+These tests verify the source code changes directly without importing.
+"""
+
+
+def test_ensure_dir_function():
+    """Test the ensure_dir function logic directly"""
+    # Read the function source
+    with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
+        source = f.read()
+    
+    # Check that ensure_dir is defined
+    assert 'def ensure_dir(data_dir, subdir):' in source
+    
+    # Check that get_metadata_dir is defined
+    assert 'def get_metadata_dir(data_dir):' in source
+    
+    # Check that get_logs_dir is defined
+    assert 'def get_logs_dir(data_dir):' in source
+    
+    print("✓ All required functions are defined in blech_utils.py")
+
+
+def test_logs_directory_creation_in_tee():
+    """Test that Tee class uses logs directory"""
+    with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
+        source = f.read()
+    
+    # Check that Tee uses logs directory
+    assert "logs_dir = ensure_dir(data_dir, 'logs')" in source
+    assert "self.log_path = os.path.join(logs_dir, name)" in source
+    print("✓ Tee class correctly uses logs directory")
+
+
+def test_logs_directory_in_blech_process():
+    """Test that blech_process.py uses logs directory"""
+    with open('/workspace/project/blech_clust/blech_process.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it creates logs directory
+    assert "log_dir = os.path.join(metadata_handler.dir_name, 'logs')" in source
+    assert "os.makedirs(log_dir, exist_ok=True)" in source
+    print("✓ blech_process.py correctly uses logs directory")
+
+
+def test_logs_directory_in_blech_init():
+    """Test that blech_init.py uses logs directory"""
+    with open('/workspace/project/blech_clust/blech_init.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses logs directory for results.log
+    assert "logs/results.log" in source
+    print("✓ blech_init.py correctly uses logs directory for results.log")
+
+
+def test_logs_directory_in_ram_monitor():
+    """Test that ram_monitor.py uses logs directory"""
+    with open('/workspace/project/blech_clust/utils/ram_monitor.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses logs directory
+    assert "logs_dir = os.path.join(output_dir, 'logs')" in source
+    assert "ram_usage.log" in source
+    print("✓ ram_monitor.py correctly uses logs directory")
+
+
+def test_metadata_directory_in_read_file():
+    """Test that read_file.py uses metadata directory"""
+    with open('/workspace/project/blech_clust/utils/read_file.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses get_metadata_dir
+    assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source
+    assert "metadata_dir = get_metadata_dir(self.data_dir)" in source
+    print("✓ read_file.py correctly uses metadata directory")
+
+
+def test_metadata_directory_in_ephys_data():
+    """Test that ephys_data.py uses metadata directory"""
+    with open('/workspace/project/blech_clust/utils/ephys_data/ephys_data.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses get_metadata_dir
+    assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source
+    print("✓ ephys_data.py correctly imports get_metadata_dir")
+
+
+def test_metadata_directory_in_blech_units_characteristics():
+    """Test that blech_units_characteristics.py uses metadata directory"""
+    with open('/workspace/project/blech_clust/blech_units_characteristics.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses get_metadata_dir for aggregated_characteristics.csv
+    assert "get_metadata_dir" in source
+    assert "aggregated_characteristics.csv" in source
+    print("✓ blech_units_characteristics.py correctly uses metadata directory")
+
+
+def test_metadata_directory_in_blech_post_process():
+    """Test that blech_post_process.py uses metadata directory"""
+    with open('/workspace/project/blech_clust/blech_post_process.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses get_metadata_dir for unit_descriptor.csv
+    assert "get_metadata_dir" in source
+    assert "unit_descriptor.csv" in source
+    print("✓ blech_post_process.py correctly uses metadata directory")
+
+
+def test_metadata_directory_in_blech_make_arrays():
+    """Test that blech_make_arrays.py uses metadata directory"""
+    with open('/workspace/project/blech_clust/blech_make_arrays.py', 'r') as f:
+        source = f.read()
+    
+    # Check that it uses get_metadata_dir for trial_info_frame.csv
+    assert "get_metadata_dir" in source
+    assert "trial_info_frame.csv" in source
+    print("✓ blech_make_arrays.py correctly uses metadata directory")
+
+
+def test_backward_compatibility_check():
+    """Test that backward compatibility is maintained"""
+    # Check execution_log.json backward compatibility
+    with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
+        source = f.read()
+    
+    assert "old_log_path = os.path.join(self.data_dir, 'execution_log.json')" in source
+    assert "if not os.path.exists(self.log_path) and os.path.exists(old_log_path):" in source
+    print("✓ Backward compatibility for execution_log.json is maintained")
+
+
+def test_find_file_in_dir_function():
+    """Test that find_file_in_dir function exists for backward compatibility"""
+    with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
+        source = f.read()
+    
+    assert "def find_file_in_dir(data_dir, filename_pattern):" in source
+    print("✓ find_file_in_dir function exists for backward compatibility")
+
+
+if __name__ == '__main__':
+    test_ensure_dir_function()
+    test_logs_directory_creation_in_tee()
+    test_logs_directory_in_blech_process()
+    test_logs_directory_in_blech_init()
+    test_logs_directory_in_ram_monitor()
+    test_metadata_directory_in_read_file()
+    test_metadata_directory_in_ephys_data()
+    test_metadata_directory_in_blech_units_characteristics()
+    test_metadata_directory_in_blech_post_process()
+    test_metadata_directory_in_blech_make_arrays()
+    test_backward_compatibility_check()
+    test_find_file_in_dir_function()
+    print("\nAll tests passed!")
diff --git a/utils/blech_data_summary.py b/utils/blech_data_summary.py
index ae4f4dd4..098693ee 100644
--- a/utils/blech_data_summary.py
+++ b/utils/blech_data_summary.py
@@ -59,7 +59,7 @@
 blech_clust_dir = os.path.dirname(os.path.dirname(script_path))
 sys.path.append(blech_clust_dir)
 from blech_clust.utils.qa_utils.channel_corr import get_all_channels, intra_corr  # noqa
-from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check  # noqa
+from blech_clust.utils.blech_utils import imp_metadata, pipeline_graph_check, get_metadata_dir  # noqa
 
 
 def extract_unit_characteristics(dir_name):
@@ -152,8 +152,12 @@ def extract_unit_characteristics(dir_name):
         print("Falling back to aggregated_characteristics.csv...")
 
     # Fallback: read from aggregated_characteristics.csv
-    characteristics_path = os.path.join(
-        dir_name, 'aggregated_characteristics.csv')
+    metadata_dir = get_metadata_dir(dir_name)
+    characteristics_path = os.path.join(metadata_dir, 'aggregated_characteristics.csv')
+    # Check old location for backward compatibility
+    old_path = os.path.join(dir_name, 'aggregated_characteristics.csv')
+    if not os.path.exists(characteristics_path) and os.path.exists(old_path):
+        characteristics_path = old_path
 
     if not os.path.exists(characteristics_path):
         print(
@@ -385,7 +389,12 @@ def extract_laser_conditions(dir_name):
     Returns:
         Dictionary with laser condition information
     """
-    trial_info_path = os.path.join(dir_name, 'trial_info_frame.csv')
+    metadata_dir = get_metadata_dir(dir_name)
+    trial_info_path = os.path.join(metadata_dir, 'trial_info_frame.csv')
+    # Check old location for backward compatibility
+    old_path = os.path.join(dir_name, 'trial_info_frame.csv')
+    if not os.path.exists(trial_info_path) and os.path.exists(old_path):
+        trial_info_path = old_path
 
     if not os.path.exists(trial_info_path):
         print(f"Warning: {trial_info_path} not found.")
diff --git a/utils/blech_process_utils.py b/utils/blech_process_utils.py
index c89fcfb3..d6926565 100644
--- a/utils/blech_process_utils.py
+++ b/utils/blech_process_utils.py
@@ -52,7 +52,7 @@
 
 # Set environment variables to limit the number of threads used by various libraries
 # Do it at the start of the script to ensure it applies to all imported libraries
-from blech_clust.utils.blech_utils import ifisdir_rmdir
+from blech_clust.utils.blech_utils import ifisdir_rmdir, get_metadata_dir
 import os  # noqa
 os.environ['OMP_NUM_THREADS'] = '1'  # noqa
 os.environ['MKL_NUM_THREADS'] = '1'  # noqa
@@ -761,7 +761,8 @@ def write_out_recommendations(self):
         Write out electrode number, count, mean prediction probability, and 5,95th percentiles
         """
         waveform_thresh = self.classifier_params['min_suggestion_count']
-        out_file_path = self.data_dir + '/waveform_classifier_recommendations.csv'
+        metadata_dir = get_metadata_dir(self.data_dir)
+        out_file_path = os.path.join(metadata_dir, 'waveform_classifier_recommendations.csv')
         count = len(self.pos_spike_dict['waveforms'])
         if count > waveform_thresh:
             percentile_5 = np.percentile(self.pos_spike_dict['prob'], 5)
diff --git a/utils/blech_utils.py b/utils/blech_utils.py
index f0a71c93..cb4739bc 100644
--- a/utils/blech_utils.py
+++ b/utils/blech_utils.py
@@ -48,11 +48,50 @@
 import shutil
 
 
+def ensure_dir(data_dir, subdir):
+    """Ensure a subdirectory exists in data_dir and return its path."""
+    dir_path = os.path.join(data_dir, subdir)
+    os.makedirs(dir_path, exist_ok=True)
+    return dir_path
+
+
+def get_metadata_dir(data_dir):
+    """Return the metadata directory path, creating it if necessary."""
+    return ensure_dir(data_dir, 'metadata')
+
+
+def get_logs_dir(data_dir):
+    """Return the logs directory path, creating it if necessary."""
+    return ensure_dir(data_dir, 'logs')
+
+
+def find_file_in_dir(data_dir, filename_pattern):
+    """
+    Find a file matching the pattern in data_dir or its subdirectories.
+    Searches in metadata/ first, then falls back to data_dir root for backward compatibility.
+    """
+    # First check in metadata directory
+    metadata_dir = os.path.join(data_dir, 'metadata')
+    if os.path.exists(metadata_dir):
+        matches = glob.glob(os.path.join(metadata_dir, filename_pattern))
+        if matches:
+            return matches[0]
+    
+    # Fall back to data_dir root
+    matches = glob.glob(os.path.join(data_dir, filename_pattern))
+    if matches:
+        return matches[0]
+    
+    return None
+
+
 class Tee:
     """Tee output to both stdout/stderr and a log file"""
 
     def __init__(self, data_dir, name='output.log'):
-        self.log_path = os.path.join(data_dir, name)
+        # Create logs directory if it doesn't exist
+        logs_dir = ensure_dir(data_dir, 'logs')
+        self.log_path = os.path.join(logs_dir, name)
         self.file = open(self.log_path, 'a')
         self.stdout = sys.stdout
         self.stderr = sys.stderr
@@ -217,7 +256,11 @@ def check_previous(self, script_path):
             if type(parent_script) != list:
                 parent_script = [parent_script]
             # Check that parent script is present in log
-            self.log_path = os.path.join(self.data_dir, 'execution_log.json')
+            self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json')
+            # Check old location for backward compatibility
+            old_log_path = os.path.join(self.data_dir, 'execution_log.json')
+            if not os.path.exists(self.log_path) and os.path.exists(old_log_path):
+                self.log_path = old_log_path
             if os.path.exists(self.log_path):
                 with open(self.log_path, 'r') as log_file_connect:
                     log_dict = json.load(log_file_connect)
@@ -240,7 +283,11 @@ def write_to_log(self, script_path, type='attempted'):
         """
         if not hasattr(self, 'git_str'):
             raise ValueError('Run get_git_info() first')
-        self.log_path = os.path.join(self.data_dir, 'execution_log.json')
+        self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json')
+        # Check old location for backward compatibility
+        old_log_path = os.path.join(self.data_dir, 'execution_log.json')
+        if not os.path.exists(self.log_path) and os.path.exists(old_log_path):
+            self.log_path = old_log_path
         current_datetime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         if os.path.exists(self.log_path):
             with open(self.log_path, 'r') as log_file_connect:
@@ -592,18 +639,30 @@ def get_hdf5_name(self,):
             print('No HDF5 file found')
 
     def get_params_path(self,):
-        file_list = glob.glob(os.path.join(self.dir_name, '**.params'))
+        # Check metadata directory first, then fall back to data_dir root
+        metadata_dir = os.path.join(self.dir_name, 'metadata')
+        file_list = glob.glob(os.path.join(metadata_dir, '**.params'))
         if len(file_list) > 0:
             self.params_file_path = file_list[0]
         else:
-            print('No PARAMS file found')
+            file_list = glob.glob(os.path.join(self.dir_name, '**.params'))
+            if len(file_list) > 0:
+                self.params_file_path = file_list[0]
+            else:
+                print('No PARAMS file found')
 
     def get_layout_path(self,):
-        file_list = glob.glob(os.path.join(self.dir_name, '**layout.csv'))
+        # Check metadata directory first, then fall back to data_dir root
+        metadata_dir = os.path.join(self.dir_name, 'metadata')
+        file_list = glob.glob(os.path.join(metadata_dir, '**layout.csv'))
         if len(file_list) > 0:
             self.layout_file_path = file_list[0]
         else:
-            print('No LAYOUT file found')
+            file_list = glob.glob(os.path.join(self.dir_name, '**layout.csv'))
+            if len(file_list) > 0:
+                self.layout_file_path = file_list[0]
+            else:
+                print('No LAYOUT file found')
 
     def load_params(self,):
         self.get_params_path()
@@ -612,11 +671,17 @@ def load_params(self,):
                 self.params_dict = json.load(params_file_connect)
 
     def get_info_path(self,):
-        file_list = glob.glob(os.path.join(self.dir_name, '**.info'))
+        # Check metadata directory first, then fall back to data_dir root
+        metadata_dir = os.path.join(self.dir_name, 'metadata')
+        file_list = glob.glob(os.path.join(metadata_dir, '**.info'))
         if len(file_list) > 0:
             self.info_file_path = file_list[0]
         else:
-            print('No INFO file found')
+            file_list = glob.glob(os.path.join(self.dir_name, '**.info'))
+            if len(file_list) > 0:
+                self.info_file_path = file_list[0]
+            else:
+                print('No INFO file found')
 
     def load_info(self,):
         self.get_info_path()
diff --git a/utils/ephys_data/ephys_data.py b/utils/ephys_data/ephys_data.py
index c339bace..c8f90278 100755
--- a/utils/ephys_data/ephys_data.py
+++ b/utils/ephys_data/ephys_data.py
@@ -217,6 +217,7 @@
 import numpy as np
 import pandas as pd
 import pingouin as pg
+from blech_clust.utils.blech_utils import get_metadata_dir
 
 #  ______       _                      _____        _
 # |  ____|     | |                    |  __ \      | |
@@ -1624,8 +1625,17 @@ def get_trial_info_frame(self):
                 - start_taste_ms: Trial start time in ms
                 - end_taste_ms: Trial end time in ms
         """
-        self.trial_info_frame = pd.read_csv(
-            os.path.join(self.data_dir, 'trial_info_frame.csv'))
+        # Check new location (metadata dir) first, then fall back to old location
+        metadata_dir = get_metadata_dir(self.data_dir)
+        new_path = os.path.join(metadata_dir, 'trial_info_frame.csv')
+        old_path = os.path.join(self.data_dir, 'trial_info_frame.csv')
+        
+        if os.path.exists(new_path):
+            self.trial_info_frame = pd.read_csv(new_path)
+        elif os.path.exists(old_path):
+            self.trial_info_frame = pd.read_csv(old_path)
+        else:
+            raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory")
 
     def sequester_trial_inds(self):
         """Organize trial indices by taste and laser condition
@@ -2088,7 +2098,12 @@ def profile_units(self, save_to_file=True, alpha=0.05, recalculate=False):
         print("="*40)
         print("Unit Profiling")
         print("="*40)
-        csv_path = os.path.join(self.data_dir, 'unit_profile.csv')
+        metadata_dir = get_metadata_dir(self.data_dir)
+        csv_path = os.path.join(metadata_dir, 'unit_profile.csv')
+        # Check old location for backward compatibility
+        old_path = os.path.join(self.data_dir, 'unit_profile.csv')
+        if not os.path.exists(csv_path) and os.path.exists(old_path):
+            csv_path = old_path
         if os.path.exists(csv_path) and not recalculate:
             print(f"Loading existing unit profile from {csv_path}")
             print("***To recalculate, set recalculate=True***")
diff --git a/utils/qa_utils/elbo_drift.py b/utils/qa_utils/elbo_drift.py
index ad086735..31f6a1d1 100644
--- a/utils/qa_utils/elbo_drift.py
+++ b/utils/qa_utils/elbo_drift.py
@@ -24,6 +24,7 @@
 import sys
 from tqdm import tqdm, trange
 import argparse
+from blech_clust.utils.blech_utils import get_metadata_dir
 parser = argparse.ArgumentParser()
 parser.add_argument('dir_name', type=str, help='Directory containing data')
 parser.add_argument('--force', action='store_true', help='Force re-fitting')
@@ -152,7 +153,12 @@ def ridge_plot(
 # Load Data
 ############################################################
 # Load trial times from trial_info_frame.csv
-trial_info_path = os.path.join(dir_name, 'trial_info_frame.csv')
+metadata_dir = get_metadata_dir(dir_name)
+trial_info_path = os.path.join(metadata_dir, 'trial_info_frame.csv')
+# Check old location for backward compatibility
+old_path = os.path.join(dir_name, 'trial_info_frame.csv')
+if not os.path.exists(trial_info_path) and os.path.exists(old_path):
+    trial_info_path = old_path
 trial_info_frame = pd.read_csv(trial_info_path)
 
 # Open the hdf5 file
diff --git a/utils/ram_monitor.py b/utils/ram_monitor.py
index 1ae5640a..d72f46fd 100644
--- a/utils/ram_monitor.py
+++ b/utils/ram_monitor.py
@@ -14,7 +14,9 @@
 
 def monitor_ram(output_dir):
     """Monitor RAM usage and write to a log file"""
-    log_file = os.path.join(output_dir, "ram_usage.log")
+    logs_dir = os.path.join(output_dir, 'logs')
+    os.makedirs(logs_dir, exist_ok=True)
+    log_file = os.path.join(logs_dir, "ram_usage.log")
 
     with open(log_file, 'a') as f:
         f.write("Timestamp,RAM_Used_GB,RAM_Total_GB,RAM_Percent\n")
diff --git a/utils/read_file.py b/utils/read_file.py
index 28e2fb51..9889522a 100644
--- a/utils/read_file.py
+++ b/utils/read_file.py
@@ -27,6 +27,7 @@
 # https://github.com/Intan-Technologies/load-rhd-notebook-python
 
 from blech_clust.utils.importrhdutilities import load_file, read_header
+from blech_clust.utils.blech_utils import get_metadata_dir
 
 
 class DigInHandler:
@@ -224,16 +225,24 @@ def get_trial_data(self):
         self.dig_in_frame = dig_in_frame
 
     def write_out_frame(self):
-        # Write out the dig-in frame
+        # Write out the dig-in frame to metadata directory
+        metadata_dir = get_metadata_dir(self.data_dir)
         self.dig_in_frame.to_csv(os.path.join(
-            self.data_dir, 'dig_in_frame.csv'))
-        print('Dig-in frame written out to dig_in_frame.csv')
+            metadata_dir, 'dig_in_frame.csv'))
+        print('Dig-in frame written out to metadata/dig_in_frame.csv')
 
     def load_dig_in_frame(self):
-        # Load the dig-in frame
-        self.dig_in_frame = pd.read_csv(os.path.join(self.data_dir, 'dig_in_frame.csv'),
-                                        index_col=0)
-        print('Dig-in frame loaded from dig_in_frame.csv')
+        # Load the dig-in frame, checking both metadata dir and old location
+        metadata_dir = os.path.join(self.data_dir, 'metadata')
+        old_path = os.path.join(self.data_dir, 'dig_in_frame.csv')
+        new_path = os.path.join(metadata_dir, 'dig_in_frame.csv')
+        
+        if os.path.exists(new_path):
+            self.dig_in_frame = pd.read_csv(new_path, index_col=0)
+            print('Dig-in frame loaded from metadata/dig_in_frame.csv')
+        elif os.path.exists(old_path):
+            self.dig_in_frame = pd.read_csv(old_path, index_col=0)
+            print('Dig-in frame loaded from dig_in_frame.csv')
 
 
 def read_traditional_intan(
diff --git a/utils/umap_plotting/umap_plots.py b/utils/umap_plotting/umap_plots.py
index 931eb799..398621f2 100644
--- a/utils/umap_plotting/umap_plots.py
+++ b/utils/umap_plotting/umap_plots.py
@@ -255,9 +255,12 @@ def run_pipeline(this_row, dir_name, zero_ind):
         print(f'Creating {bash_file}')
         job_count = cpu_count()-2
         f = open(runner_path, 'w')
+        # Create logs directory if it doesn't exist
+        logs_dir = os.path.join(dir_name, 'logs')
+        os.makedirs(logs_dir, exist_ok=True)
         print(f"parallel -k -j {job_count} --noswap --load 100% --progress " +
               "--memfree 4G --retry-failed " +
-              f"--joblog {dir_name}/umap_results.log " +
+              f"--joblog {logs_dir}/umap_results.log " +
               f"python umap_plots.py " +
               f"::: {' '.join([str(x) for x in range(len(path_frame))])}",
               file=f)

From 650a9701f04216335ea37d1ef84bbc2b6b161a1c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Feb 2026 10:48:35 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 blech_exp_info.py                |  3 ++-
 blech_init.py                    |  6 ++++--
 emg/emg_freq_setup.py            |  3 ++-
 tests/test_logs_metadata_dirs.py | 30 +++++++++++++++---------------
 utils/blech_data_summary.py      |  3 ++-
 utils/blech_process_utils.py     |  3 ++-
 utils/blech_utils.py             | 10 ++++++----
 utils/ephys_data/ephys_data.py   |  5 +++--
 utils/read_file.py               |  2 +-
 9 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/blech_exp_info.py b/blech_exp_info.py
index 5f2cd56c..f88f3682 100644
--- a/blech_exp_info.py
+++ b/blech_exp_info.py
@@ -935,7 +935,8 @@ def confirm_check(x):
     # Read and process the layout file
     # Check both metadata directory and old location for backward compatibility
     if not os.path.exists(layout_file_path):
-        old_layout_path = os.path.join(dir_path, dir_name + "_electrode_layout.csv")
+        old_layout_path = os.path.join(
+            dir_path, dir_name + "_electrode_layout.csv")
         if os.path.exists(old_layout_path):
             layout_file_path = old_layout_path
     layout_frame_filled = pd.read_csv(layout_file_path)
diff --git a/blech_init.py b/blech_init.py
index f51f4462..5fd3df5b 100644
--- a/blech_init.py
+++ b/blech_init.py
@@ -204,8 +204,10 @@ def generate_processing_scripts(dir_name, blech_clust_dir, electrode_layout_fram
     this_pipeline_check.write_to_log(script_path, 'attempted')
     # If info_dict present but execution log is not
     # just create the execution log with blech_exp_info marked
-    execution_log_path_new = os.path.join(metadata_handler.dir_name, 'logs', 'execution_log.json')
-    execution_log_path_old = os.path.join(metadata_handler.dir_name, 'execution_log.json')
+    execution_log_path_new = os.path.join(
+        metadata_handler.dir_name, 'logs', 'execution_log.json')
+    execution_log_path_old = os.path.join(
+        metadata_handler.dir_name, 'execution_log.json')
     if 'info_dict' in dir(metadata_handler) and not os.path.exists(execution_log_path_new) and not os.path.exists(execution_log_path_old):
         blech_exp_info_path = os.path.join(
             blech_clust_dir, 'blech_exp_info.py')
diff --git a/emg/emg_freq_setup.py b/emg/emg_freq_setup.py
index 24698722..f57a65f9 100644
--- a/emg/emg_freq_setup.py
+++ b/emg/emg_freq_setup.py
@@ -297,7 +297,8 @@
 elif os.path.exists(trial_info_path_old):
     trial_info_frame = pd.read_csv(trial_info_path_old)
 else:
-    raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory")
+    raise FileNotFoundError(
+        "trial_info_frame.csv not found in metadata/ or data directory")
 
 merge_frame = pd.merge(emg_env_df, trial_info_frame,
                        left_on=['dig_in', 'trial_inds'],
diff --git a/tests/test_logs_metadata_dirs.py b/tests/test_logs_metadata_dirs.py
index a6b8f0a1..47040d68 100644
--- a/tests/test_logs_metadata_dirs.py
+++ b/tests/test_logs_metadata_dirs.py
@@ -9,16 +9,16 @@ def test_ensure_dir_function():
     # Read the function source
     with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
         source = f.read()
-    
+
     # Check that ensure_dir is defined
     assert 'def ensure_dir(data_dir, subdir):' in source
-    
+
     # Check that get_metadata_dir is defined
     assert 'def get_metadata_dir(data_dir):' in source
-    
+
     # Check that get_logs_dir is defined
     assert 'def get_logs_dir(data_dir):' in source
-    
+
     print("✓ All required functions are defined in blech_utils.py")
 
 
@@ -26,7 +26,7 @@ def test_logs_directory_creation_in_tee():
     """Test that Tee class uses logs directory"""
     with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
         source = f.read()
-    
+
     # Check that Tee uses logs directory
     assert "logs_dir = ensure_dir(data_dir, 'logs')" in source
     assert "self.log_path = os.path.join(logs_dir, name)" in source
@@ -37,7 +37,7 @@ def test_logs_directory_in_blech_process():
     """Test that blech_process.py uses logs directory"""
     with open('/workspace/project/blech_clust/blech_process.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it creates logs directory
     assert "log_dir = os.path.join(metadata_handler.dir_name, 'logs')" in source
     assert "os.makedirs(log_dir, exist_ok=True)" in source
@@ -48,7 +48,7 @@ def test_logs_directory_in_blech_init():
     """Test that blech_init.py uses logs directory"""
     with open('/workspace/project/blech_clust/blech_init.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses logs directory for results.log
     assert "logs/results.log" in source
     print("✓ blech_init.py correctly uses logs directory for results.log")
@@ -58,7 +58,7 @@ def test_logs_directory_in_ram_monitor():
     """Test that ram_monitor.py uses logs directory"""
     with open('/workspace/project/blech_clust/utils/ram_monitor.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses logs directory
     assert "logs_dir = os.path.join(output_dir, 'logs')" in source
     assert "ram_usage.log" in source
@@ -69,7 +69,7 @@ def test_metadata_directory_in_read_file():
     """Test that read_file.py uses metadata directory"""
     with open('/workspace/project/blech_clust/utils/read_file.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses get_metadata_dir
     assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source
     assert "metadata_dir = get_metadata_dir(self.data_dir)" in source
@@ -80,7 +80,7 @@ def test_metadata_directory_in_ephys_data():
     """Test that ephys_data.py uses metadata directory"""
     with open('/workspace/project/blech_clust/utils/ephys_data/ephys_data.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses get_metadata_dir
     assert "from blech_clust.utils.blech_utils import get_metadata_dir" in source
     print("✓ ephys_data.py correctly imports get_metadata_dir")
@@ -90,7 +90,7 @@ def test_metadata_directory_in_blech_units_characteristics():
     """Test that blech_units_characteristics.py uses metadata directory"""
     with open('/workspace/project/blech_clust/blech_units_characteristics.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses get_metadata_dir for aggregated_characteristics.csv
     assert "get_metadata_dir" in source
     assert "aggregated_characteristics.csv" in source
@@ -101,7 +101,7 @@ def test_metadata_directory_in_blech_post_process():
     """Test that blech_post_process.py uses metadata directory"""
     with open('/workspace/project/blech_clust/blech_post_process.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses get_metadata_dir for unit_descriptor.csv
     assert "get_metadata_dir" in source
     assert "unit_descriptor.csv" in source
@@ -112,7 +112,7 @@ def test_metadata_directory_in_blech_make_arrays():
     """Test that blech_make_arrays.py uses metadata directory"""
     with open('/workspace/project/blech_clust/blech_make_arrays.py', 'r') as f:
         source = f.read()
-    
+
     # Check that it uses get_metadata_dir for trial_info_frame.csv
     assert "get_metadata_dir" in source
     assert "trial_info_frame.csv" in source
@@ -124,7 +124,7 @@ def test_backward_compatibility_check():
     # Check execution_log.json backward compatibility
     with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
         source = f.read()
-    
+
     assert "old_log_path = os.path.join(self.data_dir, 'execution_log.json')" in source
     assert "if not os.path.exists(self.log_path) and os.path.exists(old_log_path):" in source
     print("✓ Backward compatibility for execution_log.json is maintained")
@@ -134,7 +134,7 @@ def test_find_file_in_dir_function():
     """Test that find_file_in_dir function exists for backward compatibility"""
     with open('/workspace/project/blech_clust/utils/blech_utils.py', 'r') as f:
         source = f.read()
-    
+
     assert "def find_file_in_dir(data_dir, filename_pattern):" in source
     print("✓ find_file_in_dir function exists for backward compatibility")
 
diff --git a/utils/blech_data_summary.py b/utils/blech_data_summary.py
index 098693ee..25a0a21e 100644
--- a/utils/blech_data_summary.py
+++ b/utils/blech_data_summary.py
@@ -153,7 +153,8 @@ def extract_unit_characteristics(dir_name):
 
     # Fallback: read from aggregated_characteristics.csv
     metadata_dir = get_metadata_dir(dir_name)
-    characteristics_path = os.path.join(metadata_dir, 'aggregated_characteristics.csv')
+    characteristics_path = os.path.join(
+        metadata_dir, 'aggregated_characteristics.csv')
     # Check old location for backward compatibility
     old_path = os.path.join(dir_name, 'aggregated_characteristics.csv')
     if not os.path.exists(characteristics_path) and os.path.exists(old_path):
diff --git a/utils/blech_process_utils.py b/utils/blech_process_utils.py
index d6926565..5ba4bd0d 100644
--- a/utils/blech_process_utils.py
+++ b/utils/blech_process_utils.py
@@ -762,7 +762,8 @@ def write_out_recommendations(self):
         """
         waveform_thresh = self.classifier_params['min_suggestion_count']
         metadata_dir = get_metadata_dir(self.data_dir)
-        out_file_path = os.path.join(metadata_dir, 'waveform_classifier_recommendations.csv')
+        out_file_path = os.path.join(
+            metadata_dir, 'waveform_classifier_recommendations.csv')
         count = len(self.pos_spike_dict['waveforms'])
         if count > waveform_thresh:
             percentile_5 = np.percentile(self.pos_spike_dict['prob'], 5)
diff --git a/utils/blech_utils.py b/utils/blech_utils.py
index cb4739bc..b533bd18 100644
--- a/utils/blech_utils.py
+++ b/utils/blech_utils.py
@@ -76,12 +76,12 @@ def find_file_in_dir(data_dir, filename_pattern):
         matches = glob.glob(os.path.join(metadata_dir, filename_pattern))
         if matches:
             return matches[0]
-    
+
     # Fall back to data_dir root
     matches = glob.glob(os.path.join(data_dir, filename_pattern))
     if matches:
         return matches[0]
-    
+
     return None
 
 
@@ -256,7 +256,8 @@ def check_previous(self, script_path):
             if type(parent_script) != list:
                 parent_script = [parent_script]
             # Check that parent script is present in log
-            self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json')
+            self.log_path = os.path.join(ensure_dir(
+                self.data_dir, 'logs'), 'execution_log.json')
             # Check old location for backward compatibility
             old_log_path = os.path.join(self.data_dir, 'execution_log.json')
             if not os.path.exists(self.log_path) and os.path.exists(old_log_path):
@@ -283,7 +284,8 @@ def write_to_log(self, script_path, type='attempted'):
         """
         if not hasattr(self, 'git_str'):
             raise ValueError('Run get_git_info() first')
-        self.log_path = os.path.join(ensure_dir(self.data_dir, 'logs'), 'execution_log.json')
+        self.log_path = os.path.join(ensure_dir(
+            self.data_dir, 'logs'), 'execution_log.json')
         # Check old location for backward compatibility
         old_log_path = os.path.join(self.data_dir, 'execution_log.json')
         if not os.path.exists(self.log_path) and os.path.exists(old_log_path):
diff --git a/utils/ephys_data/ephys_data.py b/utils/ephys_data/ephys_data.py
index c8f90278..d1c6904f 100755
--- a/utils/ephys_data/ephys_data.py
+++ b/utils/ephys_data/ephys_data.py
@@ -1629,13 +1629,14 @@ def get_trial_info_frame(self):
         metadata_dir = get_metadata_dir(self.data_dir)
         new_path = os.path.join(metadata_dir, 'trial_info_frame.csv')
         old_path = os.path.join(self.data_dir, 'trial_info_frame.csv')
-        
+
         if os.path.exists(new_path):
             self.trial_info_frame = pd.read_csv(new_path)
         elif os.path.exists(old_path):
             self.trial_info_frame = pd.read_csv(old_path)
         else:
-            raise FileNotFoundError("trial_info_frame.csv not found in metadata/ or data directory")
+            raise FileNotFoundError(
+                "trial_info_frame.csv not found in metadata/ or data directory")
 
     def sequester_trial_inds(self):
         """Organize trial indices by taste and laser condition
diff --git a/utils/read_file.py b/utils/read_file.py
index 9889522a..8070cce4 100644
--- a/utils/read_file.py
+++ b/utils/read_file.py
@@ -236,7 +236,7 @@ def load_dig_in_frame(self):
         metadata_dir = os.path.join(self.data_dir, 'metadata')
         old_path = os.path.join(self.data_dir, 'dig_in_frame.csv')
         new_path = os.path.join(metadata_dir, 'dig_in_frame.csv')
-        
+
         if os.path.exists(new_path):
             self.dig_in_frame = pd.read_csv(new_path, index_col=0)
             print('Dig-in frame loaded from metadata/dig_in_frame.csv')