diff --git a/.gitignore b/.gitignore
index f3c63fd9..f046e67f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,6 @@ b1_ws/build/
 b1_ws/install/
 b1_ws/log/
 b1_ws/src/ros2
+
+# Virtual environments.
+*.venv*
diff --git a/01_dsp/dspml_pipeline/dspml_pipeline/data/frame_loader.py b/01_dsp/dspml_pipeline/dspml_pipeline/data/frame_loader.py
index 6bb6d86f..6cfe8c20 100644
--- a/01_dsp/dspml_pipeline/dspml_pipeline/data/frame_loader.py
+++ b/01_dsp/dspml_pipeline/dspml_pipeline/data/frame_loader.py
@@ -1,3 +1,21 @@
+"""
+File:
+    frame_loader.py
+
+Description:
+    Tools for loading datasets from the GOPHERS pipeline.
+
+Authors:
+    jLab
+    Eric Vetha
+    nubby
+
+Date:
+    24 Feb 2026
+
+Version:
+    1.0.9
+"""
 import logging
 logger = logging.getLogger(__name__)
 
@@ -5,12 +23,15 @@
 from pathlib import Path
 from ..setup_logging import setup_logging
 import json
+import os
 import pandas as pd
 import sys
 from scipy import signal
 
+
 THRESHOLD = 50 # For anomoly removal
 
+
 class FrameLoader:
     """
     FrameLoader class for processing radar data into standardized input (X) and output (y) matrices for regression tasks.
@@ -38,10 +59,15 @@ class FrameLoader:
         y (np.ndarray):         Corresponding labels (targets).
     """
 
-    def __init__(self, dataset_dirs:list, target_dir:str,
-                 data_log:str = "data-log.csv", 
-                 folder_name:str = "Sample #", label_name:str = "Bulk Density (g/cm^3)", 
-                 verbose:bool = False):
+    def __init__(self,
+                 target_dir: str,
+                 data_log: str = "data-log.csv", 
+                 dataset: np.ndarray = None,
+                 dataset_dirs: list = [],
+                 folder_name: str = "Sample #",
+                 label_name: str = "Bulk Density (g/cm^3)", 
+                 labels: np.ndarray = None,
+                 verbose: bool = False):
         """
         Initializes the FrameLoader instance based on the provided directories.
 
@@ -57,21 +83,252 @@ def __init__(self, dataset_dirs:list, target_dir:str,
         self.dataset_dirs = dataset_dirs
         self.target_dir = target_dir
         self.data_log = data_log
-        self.X = None
-        self.y = None
         self.label_name = label_name
         self.folder_name = folder_name
 
-        # Validate dataset directory
-        for i in self.dataset_dirs:
-            if not Path(i).exists():
-                logger.error(f"Dataset {i} does not exist.")
-            data_log_i = Path(i) / data_log
-            if not data_log_i.exists():
-                logger.error(f"Data log file {data_log_i} does not exist.")
+        # Import data from the dataset directories if provided.
+        if dataset_dirs:
+            # No input datastreams given.
+            self.X = None
+            self.y = None
+
+            # Validate dataset directory
+            for i in self.dataset_dirs:
+                if not Path(i).exists():
+                    logger.error(f"Dataset {i} does not exist.")
+                if not os.path.isdir(i):
+                    logger.error(f"Path {i} does not point to a dataset directory.")
+                data_log_i = Path(i) / data_log
+                if not data_log_i.exists():
+                    logger.warning(f"Data log file {data_log_i} does not exist; "
+                                   f"checking for preprocessed dataset...")
+                    if not self._is_dataset_preprocessed(i):
+                        logger.warning(f"Dataset {i} is invalid.")
+                        sys.exit(1)
+                    else:
+                        logger.info(f"Dataset {i} initialized.")
+        # Directly import tuples of dataset and labels if given.
+        elif ((len(dataset) > 0 and len(labels) > 0) and (len(labels) == len(dataset))):
+            self.X = dataset
+            self.y = labels
+        else:
+            print(f"Cannot load dataset.")
+            sys.exit(1)
+
+    def _is_dataset_preprocessed(self, path: str):
+        """
+        Check for the existence of X.npy, y.npy, features.csv, and results.csv files in the path provided.
+
+        Args:
+            path            (str)
+
+        Returns:
+            preprocessed?   (bool)
+        """
+        required_files = ["X.npy", "y.npy", "features.csv", "results.csv"]
+        current_files = os.listdir(path)
+        if not set(required_files) == set(current_files):
+            return False
+        # TODO: Extract data here?
+        return True
+
+    def _is_new_dataset_valid(self, dataset_dir: str) -> bool:
+        """
+        Confirm whether a dataset contains the required raw radar frames for processing.
+
+        Args:
+            dataset_dir     (str):  Relative path to dataset in question.
+
+        Returns:
+            valid           (bool): Is the dataset valid for extracting radar frame data?
+        """
+        required_file = "data-log.csv"
+        capture_files = []      # Keep track of the number of available radar captures.
+        
+        # First check that all required files are in the base directory.
+        current_files = set(os.listdir(dataset_dir))
+        if not required_file in current_files:
+            return False
+
+        # Next look for a number of raw radar scans greater than zero.
+        dataset_path = Path(dataset_dir)
+        subdirs = [d for d in dataset_path.iterdir() 
+                if d.is_dir() and not d.name.startswith('.')]
+        for i, folder in enumerate(subdirs):
+            capture_files.append(sorted(folder.glob("*.frames")))
+        if len(capture_files) == 0:
+            return False
+
+        return True
+
+    def _is_preprocessed_dataset_valid(self, dataset_dir: str) -> bool:
+        """
+        Confirm whether a dataset contains the required preprocessed radar scans.
+
+        Args:
+            dataset_dir     (str):  Relative path to dataset in question.
+
+        Returns:
+            valid           (bool): Is the dataset valid for use of preprocessed radar data?
+
+        Todo:
+            * Check the contents of the numpy files for validity.
+        """
+        required_files = ["X.npy", "y.npy"]
+        current_files = set(os.listdir(dataset_dir))
+        if not set(required_files).issubset(current_files):
+            return False
+        return True
+
+    def extract_single_dataset(self, dataset_dir: Path) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Extracts the features (X) and labels (y) from a given directory.
+
+        Args:
+            dataset_dir (str):          String of relative path to dataset source directory.
+
+        Returns:
+            frame_data  (np.ndarray):   Processed radar data (features) from one dataset source.
+            labels      (np.ndarray):   Corresponding labels (targets) from one dataset source.
+        """
+        new_frame_data = []
+        new_labels = []
+
+        logging.info(f"Extracting data from {dataset_dir}.")
+
+        dataset_dir = Path(dataset_dir)
+        subdirs = [d for d in dataset_dir.iterdir() 
+                if d.is_dir() and not d.name.startswith('.')]
+        data_log = dataset_dir / self.data_log
+        
+        # Get the labels from the data log.
+        try:
+            df = pd.read_csv(data_log)
+            df[self.folder_name] = df[self.folder_name].astype(str)
+            df[self.label_name] = df[self.label_name].astype(float)
+            logger.info(f"Loaded data log with {len(df)} samples")
+        except Exception as e:
+            logger.warning(f"Expected CSV format: columns include '{self.folder_name}' and '{self.label_name}'; "
+                           f"attempting to load preprocessed {self.folder_name} dataset...")
+            return [], []
+        
+        # In each subdirectory.
+        for i, folder in enumerate(subdirs):
+
+            capture_files = sorted(folder.glob("*.frames"))
+
+            logger.info(f"Processing {len(capture_files)} files in {folder.name}")
+
+            if not capture_files:
+                logger.warning(f"No .frames files found in {folder.name}")
+                continue
+
+            # Find the row in df corresponding to this folder name
+            sample_row = df[df['Sample #'] == folder.name]
+            if sample_row.empty:
+                logger.error(f"No matching sample for folder {folder.name} in data log")
                 sys.exit(1)
+            else:
+                bulk_density = sample_row.iloc[0][self.label_name]
+            
+            # Process each capture file
+            params = None
+            for capture_file in capture_files:
+                try:
+                    frame_data, params = process_frames(folder, capture_file.name)
+
+                    if frame_data is None:
+                        logger.warning(f"Failed to process: {capture_file.name}")
+                        continue
+
+                    # Anomoly removal. Replaces values that deviate from the median by more
+                    # than a threshold with the median. This has been done since the beginning 
+                    # of the project because of odd spikes in the raw DAC output that causes
+                    # large deviations in the data.
+                    median = np.median(frame_data, axis=1, keepdims=True)
+                    mask = np.abs(frame_data - median) > THRESHOLD
+                    frame_data_clean = frame_data.copy()
+                    frame_data_clean[mask] = np.broadcast_to(median, frame_data.shape)[mask]
+                    
+                    # DDC
+                    ddc_frame_data = np.zeros_like(frame_data_clean, dtype=np.complex64)
+                    for i in range(frame_data_clean.shape[1]):
+                        ddc_frame_data[:, i] = novelda_digital_downconvert(frame_data_clean[:, i])
+                    
+                    try:
+                        new_frame_data.append(ddc_frame_data)
+                        new_labels.append(bulk_density)
+                    except:
+                        logger.error(f"Failed to stack radar data from {capture_file.name}")
+                        sys.exit(1)
+
+                # Outputs warning when problem occurs while processing, but continues processing other radar data.
+                except Exception as e:
+                    logger.warning(f"Error processing {capture_file.name}: {e}")
+
+        # Save radar parameters
+        if params and len(capture_files) > 0:
+            params_file = folder / "radar_params.json"
+            with open(params_file, 'w') as f:
+                json.dump(params, f)
+            logger.info(f"Saved parameters: {params_file.name}")
+
+        return new_frame_data, new_labels
+
+    def load_preprocessed_dataset(self, dataset_dir: str) -> tuple:
+        """
+        Load proprocessed datasets.
+        """
+        print(dataset_dir)
+        X_path = Path(dataset_dir) / "X.npy"
+        y_path = Path(dataset_dir) / "y.npy"
 
-    def extract_data(self):
+        # Load dataset if it has already been processed into .npy files.
+        X = np.load(X_path)
+        y = np.load(y_path)
+        
+        logger.info(f"Loaded from existing dataset: X={X.shape}, y={y.shape}")
+
+        return X.tolist(), y.tolist()
+
+    def load(self, new: bool) -> tuple:
+        """
+        Loads and combines the specified datasets based on both existence of raw data and user specs.
+
+        Args:
+            new     (bool)  Load raw radar frames? If False, load .npy files if they exist.
+
+        Returns:
+            X, y    (tuple[np.ndarray, np.ndarray])
+        """
+        X = []
+        y = []
+
+        logger.info("Starting frame processing")
+
+        for dataset_dir in self.dataset_dirs:
+            # Load raw radar scans into new dataset here.
+            if new and self._is_new_dataset_valid(dataset_dir=dataset_dir):
+                X_new, y_new = self.extract_single_dataset(dataset_dir=dataset_dir)
+            # Try to load preprocessed dataset if raw scans unavailable.
+            elif self._is_preprocessed_dataset_valid(dataset_dir):
+                X_new, y_new = self.load_preprocessed_dataset(dataset_dir)
+            else:
+                logger.error(f"Neither existing radar scans nor valid preprocessed "
+                             f"dataset were found for the following dataset:\r\n"
+                             f"\t+ Target:\t\t{self.target_dir}\r\n"
+                             f"\t+ Dataset dir:\t{dataset_dir}")
+                sys.exit(1)
+            # Append the new radar scans and labels to the broader dataset.
+            X += X_new
+            y += y_new
+
+        self.X = np.stack(X)
+        self.y = np.stack(y)
+
+        return self.X, self.y
+
+    def extract_data(self) -> tuple:
         """
         Extracts the features (X) and labels (y) from the provided directries.
 
@@ -87,6 +344,7 @@ def extract_data(self):
 
         # Iterate through each dataset dir
         for i in self.dataset_dirs:
+            logging.info(f"Extracting data from {i}.")
             dataset_dir = Path(i)
             subdirs = [d for d in dataset_dir.iterdir() 
                     if d.is_dir() and not d.name.startswith('.')]
@@ -99,8 +357,9 @@ def extract_data(self):
                 df[self.label_name] = df[self.label_name].astype(float)
                 logger.info(f"Loaded data log with {len(df)} samples")
             except Exception as e:
-                logger.error(f"Expected CSV format: columns include '{self.folder_name}' and '{self.label_name}'")
-                sys.exit(1)
+                logger.warning(f"Expected CSV format: columns include '{self.folder_name}' and '{self.label_name}'; "
+                               f"attempting to load preprocessed {self.folder_name} dataset...")
+                return [], []
             
             # In each subdirectory
             for i, folder in enumerate(subdirs):
@@ -185,13 +444,14 @@ def save_dataset(self):
         logger.info(f"Raw dataset saved as X.npy and y.npy")
         logger.info(f"Saved shapes: X={self.X.shape}, y={self.y.shape}")
 
-def load_dataset(dataset_dir:str):
+def load_dataset(dataset_dir: str, fl: FrameLoader):
     """
     Loads data that has already been processed. Assumes the features are named X.npy and the 
     labels are named y.npy.
 
     Args:
         dataset_dir:        Directory containing the capture file.
+        fl:                 FrameLoader object for given dataset.
 
     Returns:
         X (np.ndarray):     Processed radar data (features).
@@ -202,11 +462,22 @@ def load_dataset(dataset_dir:str):
     y_path = Path(dataset_dir) / "y.npy"
 
     if not X_path.exists() or not y_path.exists():
-        logger.error("X.npy and/or y.npy not found in the dataset directory")
-        sys.exit(1)
-
-    X = np.load(X_path)
-    y = np.load(y_path)
+        logger.warning("X.npy and/or y.npy not found in the dataset directory; generating...")
+        X, y = fl.extract_data()
+        if len(X) == 0 or len(y) == 0:
+            logger.error("X.npy and/or y.npy could not be generated.")
+            sys.exit(1)
+        fl.save_dataset()
+        # If the dataset still does not exists, exit.
+        if not X_path.exists() or not y_path.exists():
+            logger.error("X.npy and/or y.npy could not be generated.")
+            sys.exit(1)
+    else:
+        # Load dataset if it has already been processed into .npy files.
+        print(X_path)
+        print(y_path)
+        X = np.load(X_path)
+        y = np.load(y_path)
     
     logger.info(f"Loaded from existing dataset: X={X.shape}, y={y.shape}")
 
@@ -480,4 +751,4 @@ def novelda_digital_downconvert(raw_frame:np.ndarray):
     # Baseband signal using convolution (provides downcoverted, filtered analytic signal)
     baseband_signal = signal.convolve(mixed, window, mode='same')
 
-    return baseband_signal
\ No newline at end of file
+    return baseband_signal
diff --git a/01_dsp/dspml_pipeline/dspml_pipeline/end_to_end_estimation/transformer.py b/01_dsp/dspml_pipeline/dspml_pipeline/end_to_end_estimation/pt_transformer.py
similarity index 94%
rename from 01_dsp/dspml_pipeline/dspml_pipeline/end_to_end_estimation/transformer.py
rename to 01_dsp/dspml_pipeline/dspml_pipeline/end_to_end_estimation/pt_transformer.py
index 84b05fc7..0e209bea 100644
--- a/01_dsp/dspml_pipeline/dspml_pipeline/end_to_end_estimation/transformer.py
+++ b/01_dsp/dspml_pipeline/dspml_pipeline/end_to_end_estimation/pt_transformer.py
@@ -1,5 +1,19 @@
-# TODO: update docstrings
+"""
+pt_transformer.py
 
+Pretrained, lightweight visual Transformer architecture (MobileViT) repurposed to explore
+its ability at using transfer learning to detect soil compaction through radargrams.
+
+Authors:
+    jLab
+    Eric Vetha
+    
+Date:
+    7 Mar 2026
+
+Version:
+    1.0.0
+"""
 import logging
 logger = logging.getLogger(__name__)
 
@@ -8,13 +22,17 @@
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
 from sklearn.model_selection import KFold
-from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from sklearn.metrics import mean_squared_error, mean_absolute_error
 import time
 import os
 from PIL import Image
 
 from ..parameters import RANDOM_SEED, KFOLD_SPLITS, num2label
-from transformers import MobileViTFeatureExtractor, MobileViTForImageClassification
+try:
+    from transformers import MobileViTFeatureExtractor, MobileViTForImageClassification
+except ImportError:
+    from transformers import MobileViTImageProcessor, MobileViTForImageClassification
+
 
 # Set seeds for reproducibility
 torch.manual_seed(RANDOM_SEED)
@@ -129,7 +147,11 @@ def __init__(self, X, y, epochs=10, batch_size=4, verbose=False):
         # Move to device
         self.mobilevit.to(self.device)
 
-        self.feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/mobilevit-small")
+        try:
+            self.feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/mobilevit-small")
+        except:
+            self.feature_extractor = MobileViTImageProcessor.from_pretrained("apple/mobilevit-small")
+            
         
         # Freeze the backbone, only train the classifier
         for name, param in self.mobilevit.named_parameters():
@@ -413,4 +435,4 @@ def evaluate(model, dataloader, loss_fn, device):
             total_loss += loss.item()
             preds.extend(outputs.cpu().numpy())
             trues.extend(targets.cpu().numpy())
-    return total_loss / len(dataloader), preds, trues
\ No newline at end of file
+    return total_loss / len(dataloader), preds, trues
diff --git a/01_dsp/dspml_pipeline/dspml_pipeline/feature_estimation/eval_tools.py b/01_dsp/dspml_pipeline/dspml_pipeline/feature_estimation/eval_tools.py
index 8d2ebafc..d44364a6 100644
--- a/01_dsp/dspml_pipeline/dspml_pipeline/feature_estimation/eval_tools.py
+++ b/01_dsp/dspml_pipeline/dspml_pipeline/feature_estimation/eval_tools.py
@@ -1,3 +1,21 @@
+"""
+File:
+    eval_tools.py
+
+Description:
+    ???
+
+Authors:
+    jLab
+    Eric Vetha
+    nubby
+
+Date:
+    24 Feb 2026
+
+Version:
+    1.0.9
+"""
 from dspml_pipeline.feature_estimation.ridge_regression import RidgeRegression
 from dspml_pipeline.feature_estimation.random_forest import RandomForest
 from dspml_pipeline.feature_estimation.xgboost_tree import XGBoostTree
@@ -6,7 +24,7 @@
 from dspml_pipeline.feature_estimation.mlp import MLPRegression
 from dspml_pipeline.end_to_end_estimation.cnn import CNNEstimator
 from dspml_pipeline.end_to_end_estimation.lstm import LSTMEstimator
-from dspml_pipeline.end_to_end_estimation.transformer import TransformerEstimator
+from dspml_pipeline.end_to_end_estimation.pt_transformer import TransformerEstimator
 from dspml_pipeline.parameters import num2label
 
 from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score
@@ -263,4 +281,4 @@ def show_results_summary(feature_type: str, training_dir: str, validation_dir: s
     print(f"Validation Results for {feature_type}".center(40))
     print("="*40)
     results_df_amp = load_results(validation_dir)
-    display_feature_results(feature_type, results_df_amp)
\ No newline at end of file
+    display_feature_results(feature_type, results_df_amp)
diff --git a/01_dsp/dspml_pipeline/scripts/configs/f1-f2-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/f1-f2-config.yaml
new file mode 100644
index 00000000..1afc76ac
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/f1-f2-config.yaml
@@ -0,0 +1,93 @@
+# f1-f2-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/field-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/f1_f2-pr-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/field-2-soil-compaction-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/f1_f2-pr-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/f1-pr-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/f1-pr-config.yaml
new file mode 100644
index 00000000..cebb2840
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/f1-pr-config.yaml
@@ -0,0 +1,93 @@
+# f1-pr-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/field-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/f1-pr-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/pie-ranch-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/f1-pr-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/f2-f1-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/f2-f1-config.yaml
new file mode 100644
index 00000000..878299ac
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/f2-f1-config.yaml
@@ -0,0 +1,93 @@
+# f1-f2-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/field-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/f1-f2-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/field-2-soil-compaction-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/f1-f2-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/f2-pr-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/f2-pr-config.yaml
new file mode 100644
index 00000000..312c1767
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/f2-pr-config.yaml
@@ -0,0 +1,93 @@
+# f2-pr-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/field-2-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/f2-pr-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/pie-ranch-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/f2-pr-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/il-f1-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/il-f1-config.yaml
new file mode 100644
index 00000000..0ee79789
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/il-f1-config.yaml
@@ -0,0 +1,95 @@
+# il-f1-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/wet-0-soil-compaction-dataset"
+      - "../data/wet-1-soil-compaction-dataset" 
+      - "../data/wet-2-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/il-f1-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/field-soil-compaction-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/il-f1-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/il-f2-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/il-f2-config.yaml
new file mode 100644
index 00000000..a1ecacfd
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/il-f2-config.yaml
@@ -0,0 +1,95 @@
+# il-f2-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/wet-0-soil-compaction-dataset"
+      - "../data/wet-1-soil-compaction-dataset" 
+      - "../data/wet-2-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/il-f2-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/field-2-soil-compaction-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/il-f2-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/il-pr-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/il-pr-config.yaml
new file mode 100644
index 00000000..3af5e158
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/il-pr-config.yaml
@@ -0,0 +1,95 @@
+# il-pr-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/wet-0-soil-compaction-dataset"
+      - "../data/wet-1-soil-compaction-dataset" 
+      - "../data/wet-2-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/il-pr-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/pie-ranch-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/il-pr-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/il_f1_f2-pr-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/il_f1_f2-pr-config.yaml
new file mode 100644
index 00000000..c74a66e3
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/il_f1_f2-pr-config.yaml
@@ -0,0 +1,97 @@
+# il_f1_f2-pr-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/wet-0-soil-compaction-dataset"
+      - "../data/wet-1-soil-compaction-dataset" 
+      - "../data/wet-2-soil-compaction-dataset"
+      - "../data/field-soil-compaction-dataset"
+      - "../data/field-2-soil-compaction-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/il_f1_f2-pr-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/pie-ranch-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/il_f1_f2-pr-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/pr-f1-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/pr-f1-config.yaml
new file mode 100644
index 00000000..d7da694a
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/pr-f1-config.yaml
@@ -0,0 +1,93 @@
+# pr-f1-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/pie-ranch-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/pr-f1-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/field-soil-compaction-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/pr-f1-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/configs/pr-f2-config.yaml b/01_dsp/dspml_pipeline/scripts/configs/pr-f2-config.yaml
new file mode 100644
index 00000000..ac7553c1
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/configs/pr-f2-config.yaml
@@ -0,0 +1,93 @@
+# pr-f2-config.yaml
+#
+# Dataset labels:
+#   il = In-lab, "wet*" datasets
+#   f1 = "field" dataset
+#   f2 = "field2" dataset
+#   pr = Dataset from Pie Ranch
+#
+#   '_' separates included datasets; '-' separates training from validation datasets.
+#
+# Data configuration.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
+data:       
+  label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
+  new_dataset: true                      # Set to true if this is a new dataset
+  # Training dataset 
+  training:       
+    # Raw datasets to combine (list of directories)
+    dataset_dirs: 
+      - "../data/pie-ranch-dataset"
+    # Target combined training dataset directory
+    target_dir: "../data/pr-f2-training-dataset"
+  # Validation dataset
+  validation:
+    # Raw validation datasets to combine
+    dataset_dirs:
+      - "../data/field-2-soil-compaction-dataset"
+    # Target combined validation dataset directory
+    target_dir: "../data/pr-f2-val-dataset"
+
+# For handcrafted features
+handcrafted:
+  enabled: true               # Enable or disable handcrafted features
+  new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
+  pruning_method: all      # Options: corr, mi, lasso, none
+  top_n: 16             # Only used if pruning_method is not none
+
+# For learned features
+learned:
+  n_features: 8               # Desired number of features
+  # PCA-based feature extraction
+  pca:
+    enabled: true             # Enable or disable PCA features
+  # Kernel-PCA-based feature extraction
+  kpca:
+    enabled: true             # Enable or disable kPCA features
+  # Autoencoder-based feature extraction
+  autoencoder:
+    enabled: true
+    epochs: 1000
+    batch_size: 256
+    verbose: true
+  # CNN-based feature extraction
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: true
+    
+# Classical model configuration for feature regression
+classical:
+  enabled: true               # Enable or disable the evaluation and validation of classical models (on all features)
+  tune_model_params: true    # Set to true to tune the models, or false to save time
+
+# Deep learning model configuration for feature regression
+deep_learning:
+  enabled: true
+
+# End-to-end model configurations for raw data regression
+end-to-end:
+  # LSTM-based end-to-end regression
+  lstm:
+    enabled: false
+    epochs: 50
+    batch_size: 32
+    verbose: false
+  # CNN-based end-to-end regression
+  cnn:
+    enabled: false
+    epochs: 20
+    batch_size: 32
+    verbose: false
+  # Transformer-based end-to-end regression
+  transformer:
+    enabled: true
+    batch_size: 4
+    epochs: 10
+    verbose: false
+
+advanced:
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/main.py b/01_dsp/dspml_pipeline/scripts/main.py
index 44a6f3a0..2f848f29 100644
--- a/01_dsp/dspml_pipeline/scripts/main.py
+++ b/01_dsp/dspml_pipeline/scripts/main.py
@@ -1,8 +1,29 @@
+"""
+File:
+    main.py
+
+Description:
+    Launch file for WADAR dspml_pipeline.
+
+Authors:
+    jLab
+    Eric Vetha
+    nubby
+
+Date:
+    6 Mar 2026
+
+Version:
+    1.0.11
+"""
 import logging
 logger = logging.getLogger(__name__)
 
+import argparse
 import os
+import random
 import sys
+import torch
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
 import numpy as np
@@ -17,44 +38,186 @@
 from dspml_pipeline.feature_extraction.learned.autoencoder import AutoencoderLearnedFeatures
 from dspml_pipeline.feature_extraction.learned.cnn import CNNLearnedFeatures
 from dspml_pipeline.end_to_end_estimation.cnn import CNNEstimator
-from dspml_pipeline.end_to_end_estimation.transformer import TransformerEstimator
+from dspml_pipeline.end_to_end_estimation.pt_transformer import TransformerEstimator
 from dspml_pipeline.end_to_end_estimation.lstm import LSTMEstimator
 
 from scipy import stats
 import matplotlib.pyplot as plt
 import yaml
 
-def main():
 
-    if len(sys.argv) < 2:
-        raise RuntimeError("Usage: python main.py <config_file.yaml>")
-    config_file = sys.argv[1]
+def load_config(path: str) -> dict:
+    """
+    load_config(path)
 
-    # Load configuration
-    with open(config_file, "r") as f:
-        params = yaml.safe_load(f)
+    Load a configuration file into a return dictionary.
+
+    Args:
+        path    (str)
 
+    Returns:
+        params  (dict)
+    """
+    with open(path, "r") as f:
+        params = yaml.safe_load(f)
+    return params
+
+def plant_seeds(seed: int = 42):
+    """
+    plant_seeds(seed)
+
+    Configure "consistent randomness" in system settings.
+
+    Args:
+        seed    (int)   Random seed.
+    """
+    logging.info(f"Configuring random seed of {seed}...")
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    logging.info("DONE.")
+
+def split_dataset(ds: np.ndarray,
+                  labels: np.ndarray,
+                  train_split: float = 0.8,
+                  test_split: float = 0.2,
+                  random_seed: int = 42) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    split_dataset(ds, train_split, test_split, random_seed)
+
+    Divide a given dataset into a training set and testing set. In the event of an
+    imperfect split, the number of training data entries will be rounded up, while the
+    testing entries will be rounded down.
+    
+    Args:
+        ds              (tuple) Dataset to split.
+        labels          (tuple) Labels to split.
+        train_split     (float) Percentage of dataset to put into the new training dataset.
+        test_split      (float) Percentage of dataset to put into the new testing dataset.
+        random_seed     (int)   Random seed for assigning dataset splits.
+
+    Returns:
+        training_ds     (tuple) New training dataset.
+        training_labels (tuple) New training labels.
+        testing_ds      (tuple) New testing dataset.
+        testing_labels  (tuple) New testing labels.
+    """
+    full_ds_size = len(ds)
+    training_ds_size = int(np.ceil(train_split * full_ds_size))
+    testing_ds_size = int(np.floor(test_split * full_ds_size))
+
+    # Verify proper dataset split sizes.
+    assert (training_ds_size + testing_ds_size == full_ds_size), f"Splits of {training_ds_size} and {test_ds_size} are not of total size {full_ds_size}"
+
+    # Split the dataset and labels into training and testing sets based on indices.
+    training_indices = np.random.choice(full_ds_size, training_ds_size, replace=False)
+    testing_indices = [index for index in range(full_ds_size) if index not in training_indices]
+    """
+    training_ds = np.ndarray([ds[index] for index in training_indices])
+    training_labels = np.ndarray([labels[index] for index in training_indices])
+    testing_ds = np.ndarray([ds[index] for index in testing_indices])
+    testing_labels = np.ndarray([labels[index] for index in testing_indices])
+    """
+    training_ds = ds[training_indices]
+    training_labels = labels[training_indices]
+    testing_ds = ds[testing_indices]
+    testing_labels = labels[testing_indices]
+    return training_ds, training_labels, testing_ds, testing_labels
+
+def are_duplicate_examples_present(ds1: tuple, ds2: tuple) -> bool:
+    """
+    are_duplicate_examples_present(ds1, ds2)
+
+    Confirm that there are no duplicated examples both within and between each dataset.
+
+    Args:
+        ds1 (tuple) First dataset.
+        ds2 (tuple) Second dataset.
+
+    Returns:
+            (bool)  Are duplicates present?
+    """
+    dups = False
+    # The dimension of each scan is (512x160), and there are many scans.
+    for i, line1 in enumerate(ds1):
+        for j, line2 in enumerate(ds2):
+            if (len(line1) == len(line2)):
+                for scan1, scan2 in zip(line1, line2):
+                    if (len(scan1) == len(scan2)):
+                        if tuple(scan1) == tuple(scan2):
+                            print(f"Found duplicate at [{i},{j}]!")
+                            dups = True
+    return dups
+
+def main(config_path: str, cross_val: bool = False):
+    """
+    main(config_path, cross_val)
+
+    Run the main training/validation pipeline.
+
+    Args:
+        config_path (str)   Path to selected configuration .yaml file.
+        cross_val   (bool)  Perform cross-validation on training dataset specified.
+    """
+    # Load training parameters from config file.
+    params = load_config(path=config_path)
+
+    # Configure logging.
     setup_logging(verbose=params['advanced']['verbose'])
 
-    # Load data from training and validation datasets
-    trainingFrameLoader = FrameLoader(dataset_dirs=params['data']['training']['dataset_dirs'],
-                              target_dir=params['data']['training']['target_dir'],
-                              data_log="data-log.csv",
-                              label_name=params['data']['label_name'])
-    validationFrameLoader = FrameLoader(dataset_dirs=params['data']['validation']['dataset_dirs'],
-                              target_dir=params['data']['validation']['target_dir'],
-                              data_log="data-log.csv",
-                              label_name=params['data']['label_name'])
-
-    # If new dataset, extract data. Otherwise, load from saved file.
-    if params['data']['new_dataset']:
-        X_train, y_train = trainingFrameLoader.extract_data()
-        trainingFrameLoader.save_dataset()
-        X_val, y_val = validationFrameLoader.extract_data()
-        validationFrameLoader.save_dataset()
+    # Configure environment for consistent training/results.
+    seed = 42   # TODO: Import as config.
+    plant_seeds(seed=seed)
+
+    # Determine whether to split a single dataset into parts or validate on held-out datasets.
+    # Load only the training "dataset_dirs"  for cross validation testing.
+    if cross_val:
+        fullFrameLoader = FrameLoader(dataset_dirs=params['data']['training']['dataset_dirs'],
+                                  target_dir=params['data']['training']['target_dir'],
+                                  data_log="data-log.csv",
+                                  label_name=params['data']['label_name'])
+        X_full, y_full = fullFrameLoader.load(params['data']['new_dataset'])
+
+        # Divide the full dataset into training/testing splits.
+        X_train, y_train, X_val, y_val = split_dataset(ds=X_full, labels=y_full, random_seed=seed)
+
+        # NOTE: Currently, these frame loaders can only write/save each split.
+        trainingFrameLoader = FrameLoader(dataset=X_train,
+                                          data_log="data-log.csv",
+                                          label_name=params['data']['label_name'],
+                                          labels=y_train,
+                                          target_dir=params['data']['training']['target_dir'])
+        validationFrameLoader = FrameLoader(dataset=X_val,
+                                            data_log="data-log.csv",
+                                            label_name=params['data']['label_name'],
+                                            labels=y_val,
+                                            target_dir=params['data']['validation']['target_dir'])
+    # Load all datasets if not doing strict cross-validation.
     else:
-        X_train, y_train = load_dataset(dataset_dir=params['data']['training']['target_dir'])
-        X_val, y_val = load_dataset(dataset_dir=params['data']['validation']['target_dir'])
+        # Load data from training and validation datasets.
+        trainingFrameLoader = FrameLoader(dataset_dirs=params['data']['training']['dataset_dirs'],
+                                  target_dir=params['data']['training']['target_dir'],
+                                  data_log="data-log.csv",
+                                  label_name=params['data']['label_name'])
+        validationFrameLoader = FrameLoader(dataset_dirs=params['data']['validation']['dataset_dirs'],
+                                  target_dir=params['data']['validation']['target_dir'],
+                                  data_log="data-log.csv",
+                                  label_name=params['data']['label_name'])
+        X_train, y_train = trainingFrameLoader.load(params['data']['new_dataset'])
+        X_val, y_val = validationFrameLoader.load(params['data']['new_dataset'])
+
+    # Verify that there are no duplicate examples in dataset.
+    if (are_duplicate_examples_present(X_train, X_val)):
+        print("Found duplicates! Exiting.")
+        sys.exit(1)
+
+    # TODO: Only save dataset conditionally.
+    trainingFrameLoader.save_dataset()
+    validationFrameLoader.save_dataset()
+
 
     # ======== Handcrafted Features ========
     if params['handcrafted']['enabled']:
@@ -523,4 +686,19 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    parser = argparse.ArgumentParser(description="Launch training/evaluation of GOPHERS datasets.")
+    parser.add_argument(
+            "--config",
+            "-c",
+            required=True,
+            type=str,
+            help="Path to desired config path."
+        )
+    parser.add_argument(
+            "--cross-validation",
+            "-x",
+            action="store_true",
+            help="Run cross-validation on the specified dataset (specified as the 'training' dataset in the config)?"
+        )
+    args = parser.parse_args()
+    main(config_path=args.config, cross_val=args.cross_validation)
diff --git a/01_dsp/dspml_pipeline/scripts/config.yaml b/01_dsp/dspml_pipeline/scripts/template_config.yaml
similarity index 72%
rename from 01_dsp/dspml_pipeline/scripts/config.yaml
rename to 01_dsp/dspml_pipeline/scripts/template_config.yaml
index e64657ca..1672bef7 100644
--- a/01_dsp/dspml_pipeline/scripts/config.yaml
+++ b/01_dsp/dspml_pipeline/scripts/template_config.yaml
@@ -1,28 +1,33 @@
-# Data configuration
+# Data configuration - Sensys first submission configs.
+# 
+# This configuration file trains on all controlled data as well as field (1), then
+# validates on field-2. Pie Ranch dataset is excluded.
 data:       
   label_name: "Bulk Density (g/cm^3)"     # Name of label used data_log.csv
-  new_dataset: false                      # Set to true if this is a new dataset
+  new_dataset: true                      # Set to true if this is a new dataset
   # Training dataset 
   training:       
     # Raw datasets to combine (list of directories)
-    dataset_dirs:       
+    dataset_dirs: 
       - "../data/wet-0-soil-compaction-dataset"
       - "../data/wet-1-soil-compaction-dataset" 
       - "../data/wet-2-soil-compaction-dataset"
       - "../data/field-soil-compaction-dataset"
+      - "../data/field-2-soil-compaction-dataset"
     # Target combined training dataset directory
-    target_dir: "../data/combined-training-dataset"
+    target_dir: "../data/test-training-dataset"
   # Validation dataset
   validation:
     # Raw validation datasets to combine
     dataset_dirs:
-      - "../data/field-pie-ranch-dataset"
+      #- "../data/pie-ranch-dataset"
+      - "../data/field-2-soil-compaction-dataset"
     # Target combined validation dataset directory
-    target_dir: "../data/pie-ranch-dataset"
+    target_dir: "../data/test-val-dataset"
 
 # For handcrafted features
 handcrafted:
-  enabled: false               # Enable or disable handcrafted features
+  enabled: true               # Enable or disable handcrafted features
   new_features: true         # Set to true if this is a new dataset or if features have not been generated yet
   pruning_method: all      # Options: corr, mi, lasso, none
   top_n: 16             # Only used if pruning_method is not none
@@ -32,10 +37,10 @@ learned:
   n_features: 8               # Desired number of features
   # PCA-based feature extraction
   pca:
-    enabled: false             # Enable or disable PCA features
+    enabled: true             # Enable or disable PCA features
   # Kernel-PCA-based feature extraction
   kpca:
-    enabled: false             # Enable or disable kPCA features
+    enabled: true             # Enable or disable kPCA features
   # Autoencoder-based feature extraction
   autoencoder:
     enabled: true
@@ -44,7 +49,7 @@ learned:
     verbose: true
   # CNN-based feature extraction
   cnn:
-    enabled: true
+    enabled: false
     epochs: 20
     batch_size: 32
     verbose: true
@@ -62,13 +67,13 @@ deep_learning:
 end-to-end:
   # LSTM-based end-to-end regression
   lstm:
-    enabled: true
+    enabled: false
     epochs: 50
     batch_size: 32
     verbose: false
   # CNN-based end-to-end regression
   cnn:
-    enabled: true
+    enabled: false
     epochs: 20
     batch_size: 32
     verbose: false
@@ -80,4 +85,4 @@ end-to-end:
     verbose: false
 
 advanced:
-  verbose: true               # Set to false to reduce logging output  
\ No newline at end of file
+  verbose: true               # Set to false to reduce logging output  
diff --git a/01_dsp/dspml_pipeline/scripts/view_data.py b/01_dsp/dspml_pipeline/scripts/view_data.py
new file mode 100644
index 00000000..5b975d5e
--- /dev/null
+++ b/01_dsp/dspml_pipeline/scripts/view_data.py
@@ -0,0 +1,69 @@
+"""
+File:
+    view_data
+
+Description:
+    View the contents of a saved numpy file.
+
+Author:
+    jLab
+    nubby
+    Perplexity.AI
+
+Date:
+    24 Feb 2026
+
+Version:
+    1.0.0
+"""
+import argparse
+import numpy as np
+import os
+
+from typing import Union
+
+
+def _load_npy_file(path: str) -> Union[np.array, None]:
+    """
+    _load_npy_file(path)
+    
+    Load the contents of a saved .npy file if proper format;
+    otherwise return None.
+
+    Args:
+        path    (str)   Path to file.
+
+    Returns:
+        data    (np.array, None)
+    """
+    try:
+        assert(os.path.isfile(path) and path.split(".")[-1] == "npy")
+        data = np.load(path)
+    except AssertionError:
+        data = None
+
+    return data
+
+
+def view_data(path: str):
+    # Load the file in question.
+    data = _load_npy_file(path=path)
+
+    try:
+        print(f"Contents: {data}")
+        print(f"Shape: {data.shape}")
+    except AttributeError:
+        print(f"ERROR: File {path} invalid; check the path!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="View the contents of an input .npy file.")
+    parser.add_argument(
+            "--path",
+            "-p",
+            required=True,
+            type=str,
+            help="Path to the desired file."
+        )
+    args = parser.parse_args()
+    view_data(path=args.path)
diff --git a/01_dsp/dspml_pipeline/tests/test_end_to_end.py b/01_dsp/dspml_pipeline/tests/test_end_to_end.py
index a94df172..0a1cb2c0 100644
--- a/01_dsp/dspml_pipeline/tests/test_end_to_end.py
+++ b/01_dsp/dspml_pipeline/tests/test_end_to_end.py
@@ -13,7 +13,7 @@
 from dspml_pipeline.end_to_end_estimation.lstm import LSTMEstimator
 from dspml_pipeline.results import update_results
 from dspml_pipeline.end_to_end_estimation.cnn import CNNEstimator
-from dspml_pipeline.end_to_end_estimation.transformer import TransformerEstimator
+from dspml_pipeline.end_to_end_estimation.pt_transformer import TransformerEstimator
 
 from scipy import stats
 
@@ -59,4 +59,4 @@ def display_feature_importance(feature_array, feature_names, labels):
     X = np.abs(X)
     trans = TransformerEstimator(X, y, verbose=verbose)
     model, metrics = trans.full_monty()
-    # update_results(target_dir, "End-to-end", f"Transformer", metrics)
\ No newline at end of file
+    # update_results(target_dir, "End-to-end", f"Transformer", metrics)