cioos-siooc · JessyBarrette · May 26, 2026 · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add SDN parameter/UOM URNs and names (TEMPSZ01, PSALSZ01, INFLTF01, UPAA, PSUX, ULPM, etc.) for TSG temperature, salinity, and flow rate vocabulary entries.
 - Add `pre-commit` configuration running `ruff` format and lint (mirrors `make lint`) on every commit, with a `make install-hooks` target to install it.
 - Add `ocean_data_parser_version` global attribute to all parsed datasets by routing the NMEA, Star-Oddi DAT, and Sunburst superCO2 notes parsers through `standardize_dataset`.
+- dfo.odf: add option to ignore full paths from ODF_HEADER FILE_DESCRIPTION and
+INSTRUMENT_HEADER DESCRIPTION attributes in odf.
+Default to True for BIO and False for IML.
 
 ### Fixed
 

diff --git a/ocean_data_parser/parsers/dfo/odf.py b/ocean_data_parser/parsers/dfo/odf.py
@@ -72,14 +72,19 @@
 
 
 def bio_odf(
-    path: str, global_attributes: dict = None, encoding="Windows-1252"
+    path: str,
+    global_attributes: dict = None,
+    encoding="Windows-1252",
+    drop_path_from_attributes=True,
 ) -> xarray.Dataset:
     """Bedford Institute of Ocean ODF format parser.
 
     Args:
         path (str): Path to the odf file to parse
         global_attributes (dict): file specific global attributes
         encoding (str): Encoding format of the file (default: Windows-1252)
+        drop_path_from_attributes (bool): Drop the path from the attributes
+            ODF_HEADER FILE_SPECIFICATION and INSTRUMENT_HEADER DESCRIPTION
 
     Returns:
         dataset (xarray dataset): Parsed xarray dataset
@@ -89,17 +94,23 @@ def bio_odf(
         vocabularies=["BIO", "GF3"],
         global_attributes={**bio_global_attributes, **(global_attributes or {})},
         encoding=encoding,
+        drop_path_from_attributes=drop_path_from_attributes,
     )
 
 
 def mli_odf(
-    path: str, global_attributes: dict = None, encoding="Windows-1252"
+    path: str,
+    global_attributes: dict = None,
+    encoding="Windows-1252",
+    drop_path_from_attributes=False,
 ) -> xarray.Dataset:
     """Maurice Lamontagne Institute ODF format parser.
 
     Args:
         path (str): Path to the odf file to parse
         global_attributes (dict): file specific global attributes
+        drop_path_from_attributes (bool): Drop the path from the attributes
+            ODF_HEADER FILE_SPECIFICATION and INSTRUMENT_HEADER DESCRIPTION
         encoding (str): Encoding format of the file (default: Windows-1252)
 
     Returns:
@@ -109,6 +120,7 @@ def mli_odf(
         path,
         vocabularies=["MLI", "GF3"],
         global_attributes={**mli_global_attributes, **(global_attributes or {})},
+        drop_path_from_attributes=drop_path_from_attributes,
         encoding=encoding,
     )
 
@@ -141,13 +153,17 @@ def odf(
     global_attributes: dict = None,
     encoding: str = "Windows-1252",
     filename_convention=FILE_NAME_CONVENTIONS,
+    drop_path_from_attributes: bool = False,
 ) -> xarray.Dataset:
     """ODF format parser.
 
     Args:
         path (str): Path to the odf file to parse
         vocabularies (str): Vocabulary list to use for the vocabulary mapping
         global_attributes (dict): file specific global attributes
+        drop_path_from_attributes (bool): Drop the path from the attributes
+            ODF_HEADER FILE_SPECIFICATION and INSTRUMENT_HEADER DESCRIPTION
+
         encoding (str): Encoding format of the file (default: Windows-1252)
         filename_convention (str): File name convention to extract attributes.
             Should be a regex expression.
@@ -159,6 +175,7 @@ def odf(
         path,
         vocabularies=vocabularies,
         global_attributes={**odf_global_attributes, **(global_attributes or {})},
+        drop_path_from_attributes=drop_path_from_attributes,
         encoding=encoding,
         filename_convention=filename_convention,
     )
diff --git a/ocean_data_parser/parsers/dfo/odf_source/process.py b/ocean_data_parser/parsers/dfo/odf_source/process.py
@@ -40,12 +40,40 @@
 ]
 
 
+def drop_path_from_header_attributes(header: dict) -> dict:
+    """Drop paths from the some of the header parsed attributes.
+
+    Args:
+        header (dict): Header attributes
+
+    Returns:
+        dict: Header attributes without the path
+    """
+
+    def _get_file(file_path: str) -> str:
+        return re.split(r"\\|\/", file_path)[-1]
+
+    attributes = [
+        ("ODF_HEADER", "FILE_SPECIFICATION"),
+        ("INSTRUMENT_HEADER", "DESCRIPTION"),
+    ]
+
+    for header_key, attribute_key in attributes:
+        if header_key in header and header[header_key].get(attribute_key):
+            header[header_key][attribute_key] = _get_file(
+                header[header_key][attribute_key]
+            )
+
+    return header
+
+
 def parse_odf(
     odf_path: str,
     global_attributes: dict = None,
     vocabularies: list = None,
     add_attributes_existing_variables: bool = True,
     generate_new_vocabulary_variables: bool = True,
+    drop_path_from_attributes: bool = False,
     encoding: str = "Windows-1252",
     filename_convention=FILE_NAME_CONVENTIONS,
 ) -> xr.Dataset:
@@ -61,6 +89,8 @@ def parse_odf(
             Defaults to True.
         generate_new_vocabulary_variables (bool, optional): Generate vocabulary variables.
             Defaults to True.
+        drop_path_from_attributes (bool): Drop the path from the attributes
+            ODF_HEADER FILE_SPECIFICATION and INSTRUMENT_HEADER DESCRIPTION
         encoding (str, optional): Encoding format of the file. Defaults to "Windows-1252".
         filename_convention (str, optional): File name convention to extract attributes.
             Should be a regex expression.
@@ -78,6 +108,9 @@ def parse_odf(
             metadata["EVENT_HEADER"]["DATA_TYPE"],
         )
 
+    if drop_path_from_attributes:
+        metadata = drop_path_from_header_attributes(metadata)
+
     # Write global and variable attributes
     file_name_attributes = (
         re.search(filename_convention, Path(odf_path).name)

diff --git a/tests/test_parsers.py b/tests/test_parsers.py
@@ -25,6 +25,9 @@
 )
 from ocean_data_parser.parsers.dfo.odf_source.attributes import _review_station
 from ocean_data_parser.parsers.dfo.odf_source.parser import _convert_odf_time
+from ocean_data_parser.parsers.dfo.odf_source.process import (
+    drop_path_from_header_attributes,
+)
 
 
 def search_caplog_records(caplog, message, levelname=None):
@@ -460,6 +463,50 @@ def test_odf_station_in_globals(self, global_attributes, original_header, statio
         )
         assert response == station, f"Failed to retrieve station={station}"
 
+    @pytest.mark.parametrize(
+        ("path", "expect"),
+        [
+            ("tests/parsers_test_files/dfo/odf/bio/CTD/CTD_001.odf", "CTD_001.odf"),
+            (
+                "tests\\\\parsers_test_files\\\\dfo\\\\odf\\\\bio\\\\CTD\\\\CTD_001.odf",
+                "CTD_001.odf",
+            ),
+            (
+                r"\\tests\\parsers_test_files\\dfo\\odf\\bio\\CTD\\CTD_001.odf",
+                "CTD_001.odf",
+            ),
+        ],
+    )
+    def test_odf_header_file_description_with_no_path(self, path, expect):
+        result = drop_path_from_header_attributes(
+            {"ODF_HEADER": {"FILE_SPECIFICATION": path}}
+        )
+        assert result["ODF_HEADER"]["FILE_SPECIFICATION"] == expect, (
+            "Failed to drop path from header attributes"
+        )
+
+    @pytest.mark.parametrize(
+        ("path", "expect"),
+        [
+            ("tests/parsers_test_files/dfo/odf/bio/CTD/CTD_001.odf", "CTD_001.odf"),
+            (
+                "tests\\\\parsers_test_files\\\\dfo\\\\odf\\\\bio\\\\CTD\\\\CTD_001.odf",
+                "CTD_001.odf",
+            ),
+            (
+                r"\\tests\\parsers_test_files\\dfo\\odf\\bio\\CTD\\CTD_001.odf",
+                "CTD_001.odf",
+            ),
+        ],
+    )
+    def test_instrument_header_description_no_path(self, path, expect):
+        result = drop_path_from_header_attributes(
+            {"INSTRUMENT_HEADER": {"DESCRIPTION": path}}
+        )
+        assert result["INSTRUMENT_HEADER"]["DESCRIPTION"] == expect, (
+            "Failed to drop path from header attributes"
+        )
+
 
 class TestODFBIOParser:
     @pytest.mark.parametrize(

diff --git a/tests/test_reference_netcdf.py b/tests/test_reference_netcdf.py
@@ -131,6 +131,29 @@ def ignore_from_attr(attr, expression, placeholder):
     reference.attrs.pop("ocean_data_parser_version", None)
     test.attrs.pop("ocean_data_parser_version", None)
 
+    reference.attrs.pop("Conventions", None)
+    test.attrs.pop("Conventions", None)
+
+    # Normalize paths stripped by drop_path_from_header_attributes so older
+    # reference files (generated before path-stripping) still match.
+    def _basename(value):
+        if not isinstance(value, str):
+            return value
+        return re.split(r"\\|/", value)[-1]
+
+    for ds in (reference, test):
+        if "instrument_description" in ds.attrs:
+            ds.attrs["instrument_description"] = _basename(
+                ds.attrs["instrument_description"]
+            )
+        for attr in ("original_odf_header_json", "original_header"):
+            if attr in ds.attrs and isinstance(ds.attrs[attr], str):
+                ds.attrs[attr] = re.sub(
+                    r'("(?:DESCRIPTION|FILE_SPECIFICATION)"\s*[:=]\s*")([^"]*)"',
+                    lambda m: f'{m.group(1)}{_basename(m.group(2))}"',
+                    ds.attrs[attr],
+                )
+
     reference = _standardize_dataset(reference)
     test = _standardize_dataset(test)