diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 901b52d9..4f572f47 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -12,9 +12,9 @@ jobs: steps: - uses: actions/checkout@master - - name: Set up Python 3.12 - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 + with: python-version: 3.12 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9280be8f..aa59b9c7 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -12,10 +12,7 @@ jobs: fail-fast: false matrix: os: [windows-latest, ubuntu-latest, macos-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - exclude: - - os: ubuntu-latest - python-version: '3.6' + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] env: OS: ${{ matrix.os }} @@ -35,8 +32,8 @@ jobs: - run: python -m pip install .[test,docs] - run: python -m pytest ./testing/ --cov=idelib --cov-report=xml -n auto - - run: sphinx-build -W -b html docs docs/html - - run: sphinx-build -W -b doctest docs docs/doctest + - run: sphinx-build -b html docs docs/html + - run: sphinx-build -b doctest docs docs/doctest - uses: codecov/codecov-action@v3 with: diff --git a/.gitignore b/.gitignore index a2245009..b0924167 100644 --- a/.gitignore +++ b/.gitignore @@ -70,6 +70,7 @@ docs/_build/ docs/html/ docs/latex/ docs/doctest/ +docs/doctrees/ # PyBuilder target/ diff --git a/README.md b/README.md index bae8bc06..98b7e598 100644 --- a/README.md +++ b/README.md @@ -3,17 +3,18 @@ # _idelib_ README -_idelib_ is a Python API for accessing [enDAQ's](http://endaq.com) IDE recordings. The IDE format is -an [EBML](http://matroska-org.github.io/libebml/specs.html) encoded file using a -custom schema. This library utilizes our -[ebmlite](https://github.com/MideTechnology/ebmlite) to parse the files, and -provides classes that make reading data simple. +_idelib_ is the core Python API for accessing the contents of [enDAQ](http://endaq.com) ``.IDE`` recording files. +It provides a means of easily accessing time series sensor data, with all necessary calibration applied, as +well as recording metadata. + +The package also contains the command-line utilities ``ideexport`` and ``ideinfo`` for use outside of Python. + ## IDE File Basics ### What's an IDE file? -An IDE file is a read-only hierarchical data format that stores recording +An IDE file is an [EBML](http://matroska-org.github.io/libebml/specs.html)-based, read-only hierarchical data format that stores recording information generated by an [enDAQ sensor device](http://endaq.com/collections/endaq-sensors-shock-vibration-s-series). It contains both time-indexed data from several different kinds of sensors (like acceleration, pressure, temperature, etc.), as well as metadata about the recording device (like device diff --git a/docs/api_ref.rst b/docs/api_ref.rst index 0e1a49b8..e9f48e4b 100644 --- a/docs/api_ref.rst +++ b/docs/api_ref.rst @@ -6,3 +6,6 @@ :caption: Contents: idelib/dataset + idelib/importer + idelib/sync + idelib/util diff --git a/docs/conf.py b/docs/conf.py index 89e6cd83..4f5a53f1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,6 +13,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + import idelib # -- Project information ----------------------------------------------------- @@ -204,4 +208,4 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'python': ('https://docs.python.org/2', None)} +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} diff --git a/docs/ide_basics.rst b/docs/ide_basics.rst index 5fed9749..afb68e4c 100644 --- a/docs/ide_basics.rst +++ b/docs/ide_basics.rst @@ -4,7 +4,7 @@ IDE File Basics What's an IDE file? ------------------- -An IDE file is a read-only hierarchical data format that stores recording information generated by an enDAQ sensor device. It contains both time-indexed data from several different kinds of sensors (like acceleration, pressure, temperature, etc.), as well as metadata about the recording device (like device serial number, model number, device name, etc.) and recording settings. +An IDE file is a read-only, `EBML `_-based hierarchical data format that stores recording information generated by an enDAQ sensor device. It contains both time-indexed data from several different kinds of sensors (like acceleration, pressure, temperature, etc.), as well as metadata about the recording device (like device serial number, model number, device name, etc.) and recording settings. .. _dataset_desc: diff --git a/docs/idelib/dataset.rst b/docs/idelib/dataset.rst index 8d09f1e5..145e73a6 100644 --- a/docs/idelib/dataset.rst +++ b/docs/idelib/dataset.rst @@ -1,5 +1,5 @@ -dataset.py -============ +``idelib.dataset`` +================== .. automodule:: idelib.dataset diff --git a/docs/idelib/importer.rst b/docs/idelib/importer.rst new file mode 100644 index 00000000..9507ce70 --- /dev/null +++ b/docs/idelib/importer.rst @@ -0,0 +1,10 @@ +``idelib.importer`` +=================== + +.. automodule:: idelib.importer + +.. autofunction:: idelib.importer.importFile + +.. autofunction:: idelib.importer.openFile + +.. autofunction:: idelib.importer.readData diff --git a/docs/idelib/sync.rst b/docs/idelib/sync.rst new file mode 100644 index 00000000..524581f6 --- /dev/null +++ b/docs/idelib/sync.rst @@ -0,0 +1,6 @@ +``idelib.sync`` +=============== + +.. automodule:: idelib.sync + :members: + diff --git a/docs/idelib/util.rst b/docs/idelib/util.rst new file mode 100644 index 00000000..65e21b36 --- /dev/null +++ b/docs/idelib/util.rst @@ -0,0 +1,5 @@ +``idelib.util`` +=============== + +.. automodule:: idelib.util + :members: diff --git a/docs/index.rst b/docs/index.rst index 49eee2ec..d4a77f41 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,14 +1,18 @@ `idelib`: a Python Library for accessing enDAQ sensor recordings ================================================================ -`idelib` is a Python API for accessing `enDAQ `_'s IDE recordings. The IDE format is an `EBML `_ encoded file using a custom schema. This library utilizes our `ebmlite `_ to parse the files, and provides classes that make reading data simple. +`idelib` is the core Python API for accessing the contents of `enDAQ `_ IDE recordings. +It provides a means of easily accessing time series sensor data, with all necessary calibration applied, as +well as recording metadata. +The package also contains the command-line utilities ``ideexport`` and ``ideinfo`` for use outside of Python. .. toctree:: :maxdepth: 2 :caption: Contents: ide_basics + utilities api_ref diff --git a/docs/utilities.rst b/docs/utilities.rst new file mode 100644 index 00000000..1f1a5e9f --- /dev/null +++ b/docs/utilities.rst @@ -0,0 +1,132 @@ +IDE File Command-Line Utilities +=============================== + +``idelib`` provides some useful command-line utilities for converting IDE data to other formats, +and for viewing general information about an IDE file. These are automatically installed when +installing the ``idelib`` package. + +Getting Started +--------------- + +Installing +'''''''''' + +The utilities are part of the ``idelib`` package, so they are installed along with it. The common +way to install is via ``pip`` (which is typically bundled with Python):: + + pip install idelib + + +Running the scripts +''''''''''''''''''' + +These utilities can be run in two ways: via an executable script, or as a Python module. + +Running as a Python module +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can run the utilities by running Python with the ``-m`` argument, followed by the name of the +submodule and any arguments, e.g.: + +* ``python -m idelib.tools.ideexport --help`` +* ``python -m idelib.tools.ideinfo --help`` +* ``python -m idelib.tools.idesync --help`` + +While verbose, this is the most reliable way to run the utilities. + +Running the executable +~~~~~~~~~~~~~~~~~~~~~~ + +When ``idelib`` is installed, it will create an executable file for each utility. The location +of the scripts varies by operating system (Linux/Windows/MacOS/etc.) and by Python environment (e.g., standard Python from +`python.org `_ or `conda `_), but it should +be in your 'path', so running it is done by typing its name, followed by any arguments: + +* ``ideexport --help`` +* ``ideinfo --help`` +* ``idesync --help`` + +Unfortunately, there are many factors specific to your computer and Python setup that can interfere +with this. Most commonly, the executables are somewhere outside of your 'path' and could not be found, +or security/authorization issues prevented the executables' creation. In either case, the result is +the standard 'not found' error for your OS. + + +``ideexport`` +------------- + +``ideexport`` converts one or more IDE files into text (CSV, etc.) or Matlab MAT v5 files. + +.. code-block:: + + usage: ideexport.py [-h] [-o OUTPUT] [-t {csv,mat,txt}] [-c CHANNEL] [-m] [-u] [-n] [-r] [-d {comma,tab,pipe}] [-f] FILENAME.IDE [FILENAME.IDE ...] + + Batch IDE Conversion Utility v3.4.0 - Copyright (c) 2026 Midé Technology + + positional arguments: + FILENAME.IDE The source .IDE file(s) to convert. Wildcards permitted. + + options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + The output path to which to save the exported files. Defaults to the same location as the source file. + -t {csv,mat,txt}, --type {csv,mat,txt} + The type of file to export. + -c CHANNEL, --channel CHANNEL + Export the specific channel. Can be used multiple times. If not used, all channels will export. + -m, --removemean Remove the mean from accelerometer data. + -u, --utc Write timestamps as UTC 'Unix epoch' time. + -n, --names Include channel names in exported filenames. + -s, --sync Apply recordings' sychronization data (if present in the file). + + Text Export Options (CSV, TXT, etc.): + -r, --headers Write 'header' information (column names) as the first row of text-based export. + -d {comma,tab,pipe}, --delimiter {comma,tab,pipe} + The delimiting character, for exporting non-CSV text-based files. + -f, --isoformat Write timestamps as ISO-formatted UTC. + +``idesync`` +----------- + +``idesync`` generates synchronization metadata, synchronizing one or more files to a 'reference' +recording. This information, which is inserted into the recording, can be applied when exporting +with ``ideexport --sync``. + +.. code-block:: + + usage: idesync.py [-h] [-g] [-i] FILENAME.IDE [FILENAME.IDE ...] + + Batch IDE Synchronization Utility v3.4.0b1 - Copyright (c) 2025 Midé Technology + + positional arguments: + FILENAME.IDE The recording to which to synchronize the others, and/or adjustits starting time if --gps is used. + FILENAME.IDE Recordings to sync to the reference. + + options: + -h, --help show this help message and exit + -g, --gps Update the reference recording's start time using its GPS/GNSS data. Cannot be used in conjunction wtih --inherit. + -i, --inherit If the reference recording has been synced to another, sync the other datasets to that (rather than the reference itself). Cannot be used in conjunction with --gps. + +Note: The applied synchronization is not currently displayed when viewing recordings in *enDAQ Lab* +(verion 3.1.1 or earlier). + + +``ideinfo`` +----------- + +``ideinfo`` gathers information about an IDE file, and either displays the text on the console or +writes it to a file. + +.. code-block:: + + usage: ideinfo.py [-h] [-o OUTPUT] FILENAME.IDE [FILENAME.IDE ...] + + IDE Info Viewer v3.4.0 - Copyright (c) 2026 Midé Technology + + positional arguments: + FILENAME.IDE The source .IDE file(s) to convert. Wildcards permitted. + + options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + The output path to which to save the info text. diff --git a/idelib/__init__.py b/idelib/__init__.py index 0fd24a47..07b2053b 100644 --- a/idelib/__init__.py +++ b/idelib/__init__.py @@ -5,16 +5,18 @@ """ __author__ = "David Randall Stokes" -__copyright__ = "Copyright (c) 2024 Midé Technology" +__copyright__ = "Copyright (c) 2026 Midé Technology" __maintainer__ = "Midé Technology" __email__ = "help@mide.com" -__version__ = '3.3.0' +__version__ = '3.4.0' __status__ = "Production/Stable" -from .importer import importFile +import logging +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s") # Add EBML schema path to ebmlite search paths import ebmlite @@ -22,3 +24,5 @@ SCHEMA_PATH = "{idelib}/schemata" if SCHEMA_PATH not in ebmlite.SCHEMA_PATH: ebmlite.SCHEMA_PATH.insert(0, SCHEMA_PATH) + +from .importer import importFile diff --git a/idelib/attributes.py b/idelib/attributes.py index 9ec75514..e3fe563f 100644 --- a/idelib/attributes.py +++ b/idelib/attributes.py @@ -5,29 +5,23 @@ """ import datetime -import sys import time +from typing import Any, Union from ebmlite import loadSchema -# Dictionaries in Python 3.7+ are explicitly insert-ordered in all -# implementations. If older, continue to use `collections.OrderedDict`. -if sys.hexversion < 0x03070000: - from collections import OrderedDict as Dict -else: - Dict = dict # ============================================================================== # # ============================================================================== -def decode_attributes(data, withTypes=False): +def decode_attributes(data: list[dict], withTypes: bool = False) -> dict: """ Convert a set of Attributes (as a list of dictionaries containing an `AttributeName` and one of the Attribute value elements (`IntAttribute`, `FloatAttribute`, etc.) to a proper dictionary. Attributes are tagged as 'multiple,' so they become lists when the EBML is parsed. """ - result = Dict() + result = {} for atts in data: k = atts.pop('AttributeName', None) if k is None: @@ -41,7 +35,7 @@ def decode_attributes(data, withTypes=False): return result -def encode_attributes(data): +def encode_attributes(data: Union[dict, tuple[str, Any]]) -> list[dict]: """ Construct a set of `Attribute` dicts from a dictionary or list of tuples/lists. Each value should be either a simple data type or a tuple containing the value and the specific value element name @@ -86,7 +80,7 @@ def encode_attributes(data): return result -def build_attributes(data): +def build_attributes(data: Union[dict, tuple[str, Any]]) -> bytes: """ Construct `Attribute` EBML from dictionary or list of key/value pairs. Each value should be either a simple data type or a tuple containing the value and the specific value element name (`IntAttribute`, diff --git a/idelib/dataset.py b/idelib/dataset.py index bafb2be1..67a49966 100644 --- a/idelib/dataset.py +++ b/idelib/dataset.py @@ -47,10 +47,10 @@ 'SubChannel', 'WarningRange', 'Cascading', 'Transformable'] from collections.abc import Iterable, Sequence -from datetime import datetime +from datetime import datetime, timedelta from math import ceil -from threading import Lock -from typing import Any, Dict, Optional +from threading import RLock +from typing import Any, Callable, Dict, List, Optional, Union, Type import warnings import os.path @@ -73,8 +73,7 @@ #=============================================================================== import logging -logger = logging.getLogger('idelib') -logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) __DEBUG__ = str(os.environ.get('MIDE_DEV', 0)) == '1' @@ -91,6 +90,19 @@ def mapRange(x, in_min, in_max, out_min, out_max): return ((x - in_min + 0.0) * (out_max - out_min) / (in_max - in_min) + out_min) + +def greater(v1, v2): + """ Return the greater of two values. Faster than `max()` with only two. + """ + return v1 if v1 > v2 else v2 + + +def lesser(v1, v2): + """ Return the lesser of two values. Faster than `min()` with only two. + """ + return v1 if v1 < v2 else v2 + + #=============================================================================== # Mix-In Classes #=============================================================================== @@ -134,11 +146,11 @@ class Transformable(Cascading): etc.), making it easy to turn the transformation on or off. :ivar transform: The transformation function/object - :ivar raw: If `False`, the transform will not be applied to data. - Note: The object's `transform` attribute will not change; it will - just be ignored. """ + dataset: "Dataset" = None + children: List["Transformable"] = None + def setTransform(self, transform, update=True): """ Set the transforming function/object. This does not change the value of `raw`, however; the new transform will not be applied @@ -185,7 +197,7 @@ def getTransforms(self, id_=None, _tlist=None): """ _tlist = [] if _tlist is None else _tlist if getattr(self, "_transform", None) is not None: - if isinstance(self._transform, Iterable) and id_ is not None: + if isinstance(self._transform, (list, tuple, dict)) and id_ is not None: x = self._transform[id_] else: x = self._transform @@ -217,7 +229,6 @@ class Dataset(Cascading): A valid file will have at least one, even if there are no `Session` elements in the data. :ivar sensors: A dictionary of Sensors. - :ivar channels: A dictionary of individual Sensor channels. :ivar plots: A dictionary of individual Plots, the modified output of a Channel (or even another plot). :ivar transforms: A dictionary of functions (or function-like objects) @@ -229,11 +240,11 @@ def __init__(self, stream, name=None, quiet=True, attributes=None): functions in the `importer` module. :param stream: A file-like stream object containing EBML data. - :keyword name: An optional name for the Dataset. Defaults to the + :param name: An optional name for the Dataset. Defaults to the base name of the file (if applicable). - :keyword quiet: If `True`, non-fatal errors (e.g. schema/file + :param quiet: If `True`, non-fatal errors (e.g. schema/file version mismatches) are suppressed. - :keyword attributes: A dictionary of arbitrary attributes, e.g. + :param attributes: A dictionary of arbitrary attributes, e.g. ``Attribute`` elements parsed from the file. Typically used for diagnostic data. """ @@ -248,6 +259,7 @@ def __init__(self, stream, name=None, quiet=True, attributes=None): self.currentSession = None self.recorderInfo = {} self.recorderConfig = None + self.bandwidthLimits = {} # For future use self.attributes = attributes if attributes else {} @@ -262,11 +274,15 @@ def __init__(self, stream, name=None, quiet=True, attributes=None): # For keeping user-defined data self._userdata: Optional[Dict[str, Any]] = None self._userdataOffset: Optional[int] = None + self._userdataOriginal: Optional[Dict[str, Any]] = None self._filesize: Optional[int] = None - self._channelDataLock = Lock() + self._fingerprint = None + + self._channelDataLock = RLock() # Subsets: used when importing multiple files into the same dataset. + # Currently an experimental feature; see `idelib.multi_importer` self.subsets = [] if name is None: @@ -309,7 +325,21 @@ def channels(self): """ A dictionary of individual Sensor channels. """ # Only return channels with subchannels. If all analog subchannels are # disabled, the recording properties will still show the parent channel. - return {k:v for k,v in self._channels.items() if v.subchannels} + return {k: v for k, v in self._channels.items() if v.subchannels} + + + @property + def fingerprint(self) -> Optional[str]: + """ A simple hash for identifying this `Dataset`, so it can be + referenced elsewhere, even if moved/renamed. The fingerprint + hash is generated from the recording's metadata, so modifying or + truncating the sensor data will not affect it. As such, the + fingerprint should not be used to verify file integrity. + """ + try: + return self._fingerprint.hexdigest() + except AttributeError: + return None def close(self): @@ -364,17 +394,20 @@ def endSession(self): """ cs = self.currentSession if cs is not None: - if cs.startTime is None: - cs.startTime = cs.firstTime - if cs.endTime is None: - cs.endTime = cs.lastTime - self.currentSession = None - def addSensor(self, sensorId=None, name=None, sensorClass=None, - traceData=None, transform=None, attributes=None, - bandwidthLimitId=None): + def addSensor(self, + sensorId: Optional[int] = None, + name: Optional[str] = None, + sensorClass: Optional[Type] = None, + sourceName: Optional[str] = None, + sourceId: Optional[str] = None, + relative: bool = False, + traceData: Optional[Dict[str, Any]] = None, + transform: Union[int, Transform, None] = None, + attributes: Optional[Dict[str, Any]] = None, + bandwidthLimitId: Optional[int] = None): """ Create a new Sensor object, and add it to the dataset, and return it. If the given sensor ID already exists, the existing sensor is returned instead. To modify a sensor or add a sensor object created @@ -383,9 +416,26 @@ def addSensor(self, sensorId=None, name=None, sensorClass=None, Note that the `sensorId` keyword argument is *not* optional. :param sensorId: The ID of the new sensor. - :keyword name: The new sensor's name - :keyword sensorClass: An alternate (sub)class of sensor. Defaults + :param name: The new sensor's name + :param sensorClass: An alternate (sub)class of sensor. Defaults to `None`, which creates a `Sensor`. + :param sourceName: Human-friendly source (authority, network, or + other source-of-truth) name for generic/virtual sensor types, + particularly 'time' sensors. + :param sourceId: Machine-readable, uniquely identifying hash + identifying source equivalency for comparing relative + sources, particularly 'time' sensors. + :param relative: `False` if sensor values are Absolute (default), + `True` if values are Relative (have an unknown offset, e.g., + AC-coupled measurements or time values with an unknown + Epoch). + :param transform: A sensor-level data pre-processing function. + :param attributes: A dictionary of arbitrary attributes, e.g. + ``Attribute`` elements parsed from the file. + :param traceData: An optional dictionary of traceability data, + such as sensor serial number, etc. + :param bandwidthLimitId: The ID of the bandwidth limit (defined + in the ``BwLimitList`` (not currently used). :return: The new sensor. """ # `sensorId` is mandatory; it's a keyword argument to streamline import. @@ -397,8 +447,10 @@ def addSensor(self, sensorId=None, name=None, sensorClass=None, return self.sensors[sensorId] sensorClass = Sensor if sensorClass is None else sensorClass - sensor = sensorClass(self,sensorId,name=name, transform=transform, - traceData=traceData, attributes=attributes, + sensor = sensorClass(self, sensorId, name=name, + sourceName=sourceName, sourceId=sourceId, + relative=relative, traceData=traceData, + transform=transform, attributes=attributes, bandwidthLimitId=bandwidthLimitId) self.sensors[sensorId] = sensor return sensor @@ -409,9 +461,9 @@ def addChannel(self, channelId=None, parser=None, channelClass=None, """ Add a Channel to a Sensor. Note that the `channelId` and `parser` keyword arguments are *not* optional. - :keyword channelId: An unique ID number for the channel. - :keyword parser: The Channel's data parser - :keyword channelClass: An alternate (sub)class of channel. + :param channelId: An unique ID number for the channel. + :param parser: The Channel's data parser + :param channelClass: An alternate (sub)class of channel. Defaults to `None`, which creates a standard `Channel`. """ if channelId is None or parser is None: @@ -444,11 +496,11 @@ def addWarning(self, warningId=None, channelId=None, subchannelId=None, """ Add a `WarningRange` to the dataset, which indicates when a sensor is reporting values outside of a given range. - :keyword warningId: A unique numeric ID for the `WarningRange`. - :keyword channelId: The channel ID of the source being monitored. - :keyword subchannelId: The monitored source's subchannel ID. - :keyword low: The minimum value of the acceptable range. - :keyword high: The maximum value of the acceptable range. + :param warningId: A unique numeric ID for the `WarningRange`. + :param channelId: The channel ID of the source being monitored. + :param subchannelId: The monitored source's subchannel ID. + :param low: The minimum value of the acceptable range. + :param high: The maximum value of the acceptable range. :return: The new `WarningRange` instance. """ w = WarningRange(self, warningId=warningId, channelId=channelId, @@ -488,11 +540,11 @@ def getPlots(self, subchannels=True, plots=True, debug=True, sort=True, visibility=0): """ Get all plotable data sources: sensor SubChannels and/or Plots. - :keyword subchannels: Include subchannels if `True`. - :keyword plots: Include Plots if `True`. - :keyword debug: If `False`, exclude debugging/diagnostic channels. - :keyword sort: Sort the plots by name if `True`. - :keyword visibility: The plots' maximum level of visibility, + :param subchannels: Include subchannels if `True`. + :param plots: Include Plots if `True`. + :param debug: If `False`, exclude debugging/diagnostic channels. + :param sort: Sort the plots by name if `True`. + :param visibility: The plots' maximum level of visibility, for display purposes. Standard visibility ranges: * 0: Standard data sources. Always visible by default. * 10: Optionally visible, of interest but only in certain situations. @@ -542,41 +594,131 @@ def fillCaches(self): #=============================================================================== class Session(object): - """ A collection of data within a dataset, e.g. one test run. A Dataset is - expected to contain one or more Sessions. + """ + Information about a collection of data within a `Dataset`, e.g. one test run. + A `Dataset` is expected to contain at least one `Session`. """ def __init__(self, dataset, sessionId=0, startTime=None, endTime=None, utcStartTime=None): - """ Constructor. This should generally be done indirectly via - `Dataset.addSession()`. + """ Information about a collection of data within a :py:class:`Dataset`, + i.e., one run of a recorder. A :py:class:`Dataset` loaded from an + IDE file contains one `Session`. + + Instantiating a `Session` should generally be done indirectly + via :py:meth:`Dataset.addSession()` as part of the import process. :param dataset: The parent `Dataset` - :keyword sessionId: The Session's numeric ID. Typically + :param sessionId: The Session's numeric ID. Typically sequential, starting at 0. - :keyword startTime: The session's start time, in microseconds, + :param startTime: The session's start time, in microseconds, relative to the start of the recording. - :keyword endTime: The session's end time, in microseconds, + :param endTime: The session's end time, in microseconds, relative to the end of the recording. - :keyword utcStartTime: The session's start time, as an absolute + :param utcStartTime: The session's start time, as an absolute POSIX/epoch timestamp. """ - self.dataset = dataset - self.startTime = startTime - self.endTime = endTime - self.sessionId = sessionId - self.utcStartTime = utcStartTime or dataset.lastUtcTime - + self.dataset: Dataset = dataset + self.sessionId: int = sessionId + self.utcStartTimeOriginal: int = utcStartTime or dataset.lastUtcTime + self.utcStartTime: int = self.utcStartTimeOriginal + + self._data: dict[int, 'EventArray'] = None + self._offset: float = 0 + # firstTime and lastTime are the actual last event time. These will - # typically be the same as startTime and endTime, but not necessarily - # so. - self.firstTime = self.lastTime = None + # typically be the same as startTime and endTime, which are being + # deprecated. + self.firstTimeOriginal: float = startTime + self.firstTime: float = startTime + self.lastTimeOriginal: float = endTime + self.lastTime: float = endTime + self.syncInfo: Optional[Dict[str, Any]] = None + self.syncSensor: Optional[Sensor] = None + self.syncZero: float = None + + + @property + def startTime(self) -> float: + warnings.warn('Session.startTime is deprecated, and will be removed ' + 'in the near future. Use Session.firstTime instead. ', + PendingDeprecationWarning) + return self.firstTime + + + # noinspection PyDeprecation + @startTime.setter + def startTime(self, val: float): + warnings.warn('Session.startTime is deprecated, and will be removed ' + 'in the near future. Use Session.firstTime instead. ', + PendingDeprecationWarning) + self.firstTime = val + + + @property + def endTime(self) -> float: + warnings.warn('Session.endTime is deprecated, and will be removed ' + 'in the future. Use Session.lastTime instead. ', + PendingDeprecationWarning) + return self.lastTime + + # noinspection PyDeprecation + @endTime.setter + def endTime(self, val: float): + warnings.warn('Session.endTime is deprecated, and will be removed ' + 'in the future. Use Session.lastTime instead. ', + PendingDeprecationWarning) + self.lastTime = val + + + @property + def data(self) -> Dict[int, 'EventArray']: + """ All the Channel-level `EventArray` instances in this `Session`, + keyed by channel ID. + """ + if not self._data or self.dataset.loading: + self._data = {} + for chid, ch in self.dataset.channels.items(): + if self.sessionId in ch.sessions: + self._data[chid] = ch.sessions[self.sessionId] + return self._data + + + @property + def offset(self) -> float: + """ A time offset (in microseconds) for all events, across all + Channels, in this `Session`. + """ + return self._offset + + + @offset.setter + def offset(self, offset: float): + """ A time offset (in microseconds) for all events, across all + Channels, in this `Session`. + """ + offset = offset or 0. + self._offset = offset + if self.firstTimeOriginal is not None: + self.firstTime = self.firstTimeOriginal + offset + if self.lastTimeOriginal is not None: + self.lastTime = self.lastTimeOriginal + offset + for d in self.data.values(): + # TODO: Exclude non-relative time channels? + d._setOffset(offset) + + + # noinspection PyDeprecation def __repr__(self): - return "<%s (id=%s) at 0x%08X>" % (self.__class__.__name__, - self.sessionId, id(self)) - + """ Return repr(self). """ + # TODO: change `utcfromtimestamp(t)` to `fromtimestamp(t, datetime.UTC)` + # after Python 3.10 reaches EoL (2026-10) + return "<%s %s (%s)>" % (self.__class__.__name__, + self.sessionId, + datetime.utcfromtimestamp(self.utcStartTime)) + def __eq__(self, other): """ x.__eq__(y) <==> x==y """ @@ -586,57 +728,81 @@ def __eq__(self, other): return False else: return self.dataset == other.dataset \ - and self.startTime == other.startTime \ - and self.endTime == other.endTime \ and self.sessionId == other.sessionId \ - and self.utcStartTime == other.utcStartTime \ - and self.firstTime == other.firstTime \ - and self.lastTime == other.lastTime + and self.utcStartTimeOriginal == other.utcStartTimeOriginal \ + and self.firstTimeOriginal == other.firstTimeOriginal \ + and self.lastTimeOriginal == other.lastTimeOriginal #=============================================================================== # #=============================================================================== + class Sensor(Cascading): """ One Sensor object. A Dataset contains at least one. """ - def __init__(self, dataset, sensorId, name=None, transform=None, - traceData=None, attributes=None, bandwidthLimitId=None): + def __init__(self, + dataset: Dataset, + sensorId: Optional[int] = None, + name: Optional[str] = None, + sourceName: Optional[str] = None, + sourceId: Optional[str] = None, + relative: bool = False, + traceData: Optional[Dict[str, Any]] = None, + transform: Union[int, Transform, None] = None, + attributes: Optional[Dict[str, Any]] = None, + bandwidthLimitId: Optional[int] = None): """ Constructor. This should generally be done indirectly via `Dataset.addSensor()`. - :param dataset: The parent `Dataset`. :param sensorId: The ID of the new sensor. - :keyword name: The new sensor's name. - :keyword transform: A sensor-level data pre-processing function. - :keyword traceData: Sensor traceability data. - :keyword attributes: A dictionary of arbitrary attributes, e.g. + :param name: The new sensor's name + :param sourceName: Human-friendly source (authority, network, or + other source-of-truth) name for generic/virtual sensor types, + particularly 'time' sensors. + :param sourceId: Machine-readable, uniquely identifying hash + identifying source equivalency for comparing relative + sources, particularly 'time' sensors. + :param relative: `False` if sensor values are Absolute (default), + `True` if values are Relative (have an unknown offset), + e.g., AC-coupled measurements or time values with an unknown + Epoch. + :param transform: A sensor-level data pre-processing function. + :param attributes: A dictionary of arbitrary attributes, e.g. ``Attribute`` elements parsed from the file. + :param traceData: An optional dictionary of traceability data, + such as sensor serial number, etc. + :param bandwidthLimitId: The ID of the bandwidth limit (defined + in the ``BwLimitList`` (not currently used). + :return: The new sensor. """ if isinstance(name, bytes): name.decode() - self.name = "Sensor%02d" if name is None else name + self.name = f"Sensor{sensorId:02d}" if name is None else name self.dataset = dataset self.parent = dataset self.id = sensorId - self.channels = {} self.traceData = traceData self.attributes = attributes + self.sourceName = sourceName + self.sourceId = sourceId + self.relative = bool(relative) + + self._channels = None + self._hash = None + + # Not currently used: self.bandwidthLimitId = bandwidthLimitId self._bandwidthCutoff = None self._bandwidthRolloff = None + self._transform = transform - def __getitem__(self, idx): - return self.channels[idx] + def __repr__(self): + return f"<{type(self).__name__} {self.id} '{self.path()}' at 0x{id(self):08x}>" - @property - def children(self): - return list(self.channels.values()) - - @property def bandwidthCutoff(self): if self._bandwidthCutoff is None: @@ -664,6 +830,15 @@ def bandwidthRolloff(self): return self._bandwidthRolloff + def __hash__(self): + if self._hash is None: + self._hash = hash((self.name, self.id, + self.sourceName, self.sourceId, + repr(self.traceData), repr(self.attributes), + self.bandwidthLimitId)) + return self._hash + + def __eq__(self, other): if other is self: return True @@ -671,15 +846,27 @@ def __eq__(self, other): return False else: return self.name == other.name \ - and self.dataset == other.dataset \ - and self.parent == other.parent \ and self.id == other.id \ - and self.channels == other.channels \ and self.traceData == other.traceData \ + and self.sourceId == other.sourceId \ + and self.sourceName == other.sourceName \ and self.attributes == other.attributes \ and self.bandwidthLimitId == other.bandwidthLimitId + def getReferrers(self) -> List['SubChannel']: + """ Get all the `SubChannel` objects that reference this `Sensor`. + """ + if not self._channels or self.dataset.loading: + channels = [] + for ch in self.dataset.channels.values(): + for subc in ch.subchannels: + if subc.sensor == self or subc.sensor == self.id: + channels.append(subc) + self._channels = channels + return self._channels + + #=============================================================================== # Channels #=============================================================================== @@ -707,20 +894,20 @@ def __init__(self, dataset, channelId=None, parser=None, sensor=None, data from a single sensor. :param channelId: The channel's ID, unique within the file. :param parser: The channel's EBML data parser. - :keyword name: A custom name for this channel. - :keyword units: The units measured in this channel, used if units + :param name: A custom name for this channel. + :param units: The units measured in this channel, used if units are not explicitly indicated in the Channel's SubChannels. - :keyword transform: A Transform object for adjusting sensor + :param transform: A Transform object for adjusting sensor readings at the Channel level. - :keyword displayRange: A 'hint' to the minimum and maximum values + :param displayRange: A 'hint' to the minimum and maximum values of data in this channel. - :keyword cache: If `True`, this channel's data will be kept in + :param cache: If `True`, this channel's data will be kept in memory rather than lazy-loaded. - :keyword singleSample: A 'hint' that the data blocks for this + :param singleSample: A 'hint' that the data blocks for this channel each contain only a single sample (e.g. temperature/ pressure on an SSX). If `None`, this will be determined from the sample data. - :keyword attributes: A dictionary of arbitrary attributes, e.g. + :param attributes: A dictionary of arbitrary attributes, e.g. ``Attribute`` elements parsed from the file. """ self.id = channelId @@ -739,10 +926,7 @@ def __init__(self, dataset, channelId=None, parser=None, sensor=None, if isinstance(sensor, int): sensor = self.dataset.sensors.get(sensor, None) - if sensor is not None: - sensor.channels[channelId] = self - sensorname = sensor.name - + if name is None: sensorname = sensor.name if sensor is not None else "Unknown Sensor" name = "%s:%02d" % (sensorname, channelId) @@ -759,13 +943,13 @@ def __init__(self, dataset, channelId=None, parser=None, sensor=None, self.hasDisplayRange = displayRange is not None # Channels have 1 or more subchannels - self.subchannels = [None] * len(self.types) + self.subchannels: List['SubChannel'] = [None] * len(self.types) # A set of session EventLists. Populated dynamically with # each call to getSession(). self.sessions = {} - self.subsampleCount = [0,sys.maxsize] + self.subsampleCount = [0, sys.maxsize] self.setTransform(transform, update=False) @@ -778,7 +962,7 @@ def __init__(self, dataset, channelId=None, parser=None, sensor=None, @property - def children(self): + def children(self) -> List['SubChannel']: return list(iter(self)) @@ -832,8 +1016,7 @@ def addSubChannel(self, subchannelId=None, channelClass=None, **kwargs): if subchannelId >= len(self.subchannels): raise IndexError( - "Channel's parser only generates %d subchannels" % - len(self.subchannels)) + f"Channel's parser only generates {len(self.subchannels)} subchannels") else: channelClass = channelClass or SubChannel sc = self.subchannels[subchannelId] @@ -864,7 +1047,7 @@ def getSubChannel(self, subchannelId): def getSession(self, sessionId=None): """ Retrieve data recorded in a Session. - :keyword sessionId: The ID of the session to retrieve. + :param sessionId: The ID of the session to retrieve. :return: The recorded data. :rtype: `EventArray` """ @@ -886,10 +1069,10 @@ def parseBlock(self, block, start=None, end=None, step=1, subchannel=None): """ Parse subsamples out of a data block. Used internally. :param block: The data block from which to parse subsamples. - :keyword start: The first block index to retrieve. - :keyword end: The last block index to retrieve. - :keyword step: The number of steps between samples. - :keyword subchannel: If supplied, return only the values for a + :param start: The first block index to retrieve. + :param end: The last block index to retrieve. + :param step: The number of steps between samples. + :param subchannel: If supplied, return only the values for a specific subchannel (i.e. the method is being called by a SubChannel). :return: A list of tuples, one for each subsample. @@ -916,7 +1099,7 @@ def parseBlockByIndex(self, block, indices, subchannel=None): :param block: The data block element to parse. :param indices: A list of sample index numbers to retrieve. - :keyword subchannel: If supplied, return only the values for a + :param subchannel: If supplied, return only the values for a specific subchannel :return: A list of tuples, one for each subsample. """ @@ -966,12 +1149,16 @@ def __eq__(self, other): #=============================================================================== + +# noinspection PyMethodOverriding class SubChannel(Channel): """ Output from a sensor, derived from a channel containing multiple pieces of data (e.g. the Y from an accelerometer's XYZ). Looks like a 'real' channel. """ - + + + # noinspection PyMissingConstructor def __init__(self, parent, subchannelId, name=None, units=('', ''), transform=None, displayRange=None, sensorId=None, warningId=None, axisName=None, attributes=None, color=None, @@ -981,26 +1168,26 @@ def __init__(self, parent, subchannelId, name=None, units=('', ''), :param parent: The parent sensor. :param subchannelId: The channel's ID, unique within the file. - :keyword name: A custom name for this channel. - :keyword units: The units measured in this channel, used if units + :param name: A custom name for this channel. + :param units: The units measured in this channel, used if units are not explicitly indicated in the Channel's SubChannels. A tuple containing the 'axis name' (e.g. 'Acceleration') and the unit symbol ('g'). - :keyword transform: A Transform object for adjusting sensor + :param transform: A Transform object for adjusting sensor readings at the Channel level. - :keyword displayRange: A 'hint' to the minimum and maximum values + :param displayRange: A 'hint' to the minimum and maximum values of data in this channel. - :keyword sensorId: The ID of the sensor that generates this + :param sensorId: The ID of the sensor that generates this SubChannel's data. - :keyword warningId: The ID of the `WarningRange` that indicates + :param warningId: The ID of the `WarningRange` that indicates conditions that may adversely affect data recorded in this SubChannel. - :keyword axisName: The name of the axis this SubChannel represents. + :param axisName: The name of the axis this SubChannel represents. Use if the `name` contains additional text (e.g. "X" if the name is "Accelerometer X (low-g)"). - :keyword attributes: A dictionary of arbitrary attributes, e.g. + :param attributes: A dictionary of arbitrary attributes, e.g. ``Attribute`` elements parsed from the file. - :keyword visibility: The subchannel's level of visibility, for + :param visibility: The subchannel's level of visibility, for display purposes. The lower the value, the more 'visible' the subchannel. """ @@ -1048,16 +1235,12 @@ def __init__(self, parent, subchannelId, name=None, units=('', ''), else: self.displayName = self.name - if isinstance(sensorId, int): - self.sensor = self.dataset.sensors.get(sensorId, None) - elif sensorId is None: - if isinstance(parent.sensor, int): - self.sensor = self.dataset.sensors.get(parent.sensor, None) - else: - self.sensor = parent.sensor - else: + try: + sensorId = parent.sensor if sensorId is None else sensorId + self.sensor = self.dataset.sensors.get(sensorId, sensorId) + except TypeError: self.sensor = sensorId - + self.types = (parent.types[subchannelId], ) self._sessions = None @@ -1129,9 +1312,9 @@ def parseBlock(self, block, start=None, end=None, step=1): """ Parse subsamples out of a data block. Used internally. :param block: The data block from which to parse subsamples. - :keyword start: The first block index to retrieve. - :keyword end: The last block index to retrieve. - :keyword step: The number of steps between samples. + :param start: The first block index to retrieve. + :param end: The last block index to retrieve. + :param step: The number of steps between samples. """ return self.parent.parseBlock(block, start, end, step=step) @@ -1219,7 +1402,7 @@ def __init__(self, parentChannel, session=None, parentList=None): self.dataset = parentChannel.dataset self.hasSubchannels = not isinstance(self.parent, SubChannel) self._parentList = parentList - self._childLists = [] + self._childLists = [] # TODO: Remove _childLists (seems deprecated) self.noBivariates = False @@ -1229,29 +1412,37 @@ def __init__(self, parentChannel, session=None, parentList=None): self._singleSample = parentChannel.singleSample if self.hasSubchannels or not isinstance(parentChannel.parent, Channel): - # Cache of block start times and sample indices for faster search + # Cache of block start times and sample indices for faster search. + # Note: _blockTimes is not changed if a time offset is applied. self._blockTimes = [] self._blockIndices = [] + self._blockIndicesArray = np.array([], dtype=np.int64) + self._blockTimesArray = np.array([], dtype=np.float64) else: s = self.session.sessionId if session is not None else None ps = parentChannel.parent.getSession(s) self._blockTimes = ps._blockTimes self._blockIndices = ps._blockIndices + self._blockIndicesArray = ps._blockIndicesArray + self._blockTimesArray = ps._blockTimesArray self.noBivariates = ps.noBivariates - + if self.hasSubchannels: self.channelId = self.parent.id self.subchannelId = None + self.parseBlock = self.parent.parseBlock else: self.channelId = self.parent.parent.id self.subchannelId = self.parent.id - + self.parseBlock = self.parent.parent.parseBlock + self._hasSubsamples = False self.hasDisplayRange = self.parent.hasDisplayRange self.displayRange = self.parent.displayRange + self._mean = None self.removeMean = False self.hasMinMeanMax = True self._rollingMeanSpan = None @@ -1262,16 +1453,16 @@ def __init__(self, parentChannel, session=None, parentList=None): self.updateTransforms(recurse=False) self.allowMeanRemoval = parentChannel.allowMeanRemoval - if self.hasSubchannels: - self.parseBlock = self.parent.parseBlock - else: - self.parseBlock = self.parent.parent.parseBlock + self._npType = np.uint8 + self._makeDType() - self._blockIndicesArray = np.array([], dtype=np.float64) - self._blockTimesArray = np.array([], dtype=np.float64) + self._channelDataLock = parentChannel.dataset._channelDataLock + self._cacheArray = None + self._cacheBytes = None - self._mean = None + def _makeDType(self): + """ Construct the Numpy type for the channel's payload. """ _format = self.parent.parser.format if not _format: self._npType = np.uint8 @@ -1281,7 +1472,7 @@ def __init__(self, parentChannel, session=None, parentList=None): if isinstance(_format, bytes): _format = _format.decode() - if _format[0] in ['<', '>', '=']: + if _format[0] in '<>=': endian = _format[0] dtypes = [endian + ChannelDataBlock.TO_NP_TYPESTR[x] for x in _format[1:]] else: @@ -1289,15 +1480,18 @@ def __init__(self, parentChannel, session=None, parentList=None): self._npType = np.dtype([(str(i), dtype) for i, dtype in enumerate(dtypes)]) - self._channelDataLock = parentChannel.dataset._channelDataLock - self._cacheArray = None - self._cacheBytes = None - self._fullyCached = False - self._cacheStart = None - self._cacheEnd = None - self._cacheBlockStart = None - self._cacheBlockEnd = None - self._cacheLen = 0 + + def _setOffset(self, offset): + """ Apply an offset to the `EventArray` timestamps. Do not use + directly; use `Session.offset`. + """ + for d in self._data: + d.startTime = d.startTimeOriginal + offset + d.endTime = d.endTimeOriginal + offset + + # skip if the dataset is loading (blockTimes still getting built) + if not self.dataset.loading: + self._blockTimesArray = np.array(self._blockTimes, dtype=np.float64) + offset @property @@ -1331,7 +1525,7 @@ def updateTransforms(self, recurse=True): sessionId = self.session.sessionId if self.session is not None else None children = [] dispX = [] - for x,sc in zip(xs,self.parent.subchannels): + for x, sc in zip(xs, self.parent.subchannels): if sessionId in sc.sessions: cl = sc.sessions[sessionId] if cl.transform is None: @@ -1386,11 +1580,6 @@ def copy(self, newParent=None): newList._channelDataLock = self._channelDataLock newList._cacheArray = self._cacheArray newList._cacheBytes = self._cacheBytes - newList._fullyCached = self._fullyCached - newList._cacheStart = self._cacheStart - newList._cacheEnd = self._cacheEnd - newList._cacheBlockStart = self._cacheBlockStart - newList._cacheBlockEnd = self._cacheBlockEnd return newList @@ -1406,17 +1595,33 @@ def append(self, block): if block.numSamples is None: block.numSamples = block.getNumSamples(self.parent.parser) + block.startTime = block.startTimeOriginal + self.session._offset + block.endTime = block.endTimeOriginal + self.session._offset + # Set the session first/last times if they aren't already set. # Possibly redundant if all sessions are 'closed.' - if self.session.firstTime is None: + try: + self.session.firstTimeOriginal = lesser(self.session.firstTimeOriginal, + block.startTimeOriginal) + self.session.firstTime = self.session.firstTimeOriginal + self.session._offset + except TypeError: + # session.firstTimeOriginal probably None + if self.session.firstTimeOriginal is not None: + raise + self.session.firstTimeOriginal = block.startTimeOriginal self.session.firstTime = block.startTime - else: - self.session.firstTime = min(self.session.firstTime, block.startTime) - if self.session.lastTime is None: + try: + self.session.lastTimeOriginal = greater(self.session.lastTimeOriginal, + block.endTimeOriginal) + self.session.lastTime = self.session.lastTimeOriginal + self.session._offset + except TypeError: + # session.lastTimeOriginal probably None + if self.session.lastTimeOriginal is not None: + raise + self.session.lastTimeOriginal = block.endTimeOriginal self.session.lastTime = block.endTime - else: - self.session.lastTime = max(self.session.lastTime, block.endTime) + # Check that the block actually contains at least one sample. if block.numSamples < 1: @@ -1446,7 +1651,7 @@ def append(self, block): # HACK (somewhat): Single-sample-per-block channels get min/mean/max # which is just the same as the value of the sample. Set the values, # but don't set hasMinMeanMax. - if self._singleSample is True:# and not self.hasMinMeanMax: + if self._singleSample is True and block.numSamples == 1: # and not self.hasMinMeanMax: block.minMeanMax = np.tile(block.payload, 3) mmmArr = np_recfunctions.structured_to_unstructured( block._minMeanMax.view(self._npType)) @@ -1458,10 +1663,10 @@ def append(self, block): block.min, block.mean, block.max = mmmArr self.hasMinMeanMax = True else: - # XXX: Attempt to calculate min/mean/max here instead of + # FUTURE: Attempt to calculate min/mean/max here instead of # in _computeMinMeanMax(). Causes issues with pressure for some # reason - it starts removing mean and won't plot. - vals = np_recfunctions.structured_to_unstructured(block.payload.view(self._npType)) + vals: np.array = np_recfunctions.structured_to_unstructured(block.payload.view(self._npType)) block.min = vals.min(axis=0) block.mean = vals.mean(axis=0) block.max = vals.max(axis=0) @@ -1469,7 +1674,7 @@ def append(self, block): # Cache the index range for faster searching self._blockIndices.append(oldLength) - self._blockTimes.append(block.startTime) + self._blockTimes.append(block.startTimeOriginal) self._hasSubsamples = self._hasSubsamples or block.numSamples > 1 @@ -1504,8 +1709,8 @@ def _getBlockIndexWithIndex(self, idx, start=0, stop=None): index. :param idx: The event index to find - :keyword start: The first block index to search - :keyword stop: The last block index to search + :param start: The first block index to search + :param stop: The last block index to search """ # TODO: profile & determine if this change is beneficial ''' @@ -1519,9 +1724,9 @@ def _getBlockIndexWithIndex(self, idx, start=0, stop=None): ''' if len(self._blockIndicesArray) != len(self._blockIndices): - self._blockIndicesArray = np.array(self._blockIndices) + self._blockIndicesArray = np.array(self._blockIndices, dtype=np.int64) - idxOffset = max(start, 1) + idxOffset = greater(start, 1) return idxOffset-1 + np.searchsorted( self._blockIndicesArray[idxOffset:stop], idx, side='right' ) @@ -1531,10 +1736,11 @@ def _getBlockIndexWithTime(self, t, start=0, stop=None): """ Get the index of a raw data block in which the given time occurs. :param t: The time to find - :keyword start: The first block index to search - :keyword stop: The last block index to search + :param start: The first block index to search + :param stop: The last block index to search """ - # TODO: profile & determine if this change is beneficial + # TODO: profile & determine if this change is beneficial. I'm not + # sure the new version was ever actually profiled vs. the old. ''' if stop: blockIdx = bisect_right(self._blockTimes, t, start, stop) @@ -1545,9 +1751,14 @@ def _getBlockIndexWithTime(self, t, start=0, stop=None): return blockIdx ''' if len(self._blockTimesArray) != len(self._blockTimes): - self._blockTimesArray = np.array(self._blockTimes) + self._blockTimesArray = np.array(self._blockTimes, dtype=np.float64) + self.session._offset - idxOffset = max(start, 1) + if t is None or t < self._blockTimesArray[0]: + return 0 + elif t > self._blockTimesArray[-1]: + return len(self._blockTimesArray) + + idxOffset = greater(start, 1) return idxOffset-1 + np.searchsorted( self._blockTimesArray[idxOffset:stop], t, side='right' ) @@ -1570,8 +1781,8 @@ def _getBlockRollingMean_old(self, blockIdx, force=False): span = self.rollingMeanSpan if (block._rollingMean is not None - and block._rollingMeanSpan == span - and block._rollingMeanLen == len(self._data)): + and block._rollingMeanSpan == span + and block._rollingMeanLen == len(self._data)): return block._rollingMean self._computeMinMeanMax() @@ -1581,7 +1792,7 @@ def _getBlockRollingMean_old(self, blockIdx, force=False): stop=blockIdx) lastBlock = self._getBlockIndexWithTime(block.startTime + (span/2), start=blockIdx) - lastBlock = max(lastBlock+1, firstBlock+1) + lastBlock = greater(lastBlock+1, firstBlock+1) else: firstBlock = lastBlock = None @@ -1655,12 +1866,11 @@ def __getitem__(self, idx, display=False): if isinstance(idx, (int, np.integer)): - if idx >= len(self): raise IndexError("EventArray index out of range") if idx < 0: - idx = max(0, len(self) + idx) + idx = greater(0, len(self) + idx) return self.arraySlice(idx, idx + 1)[:, 0] @@ -1696,6 +1906,8 @@ def __len__(self): def __eq__(self, other): + """ Return self==value. + """ if other is self: return True elif not isinstance(other, self.__class__): @@ -1732,13 +1944,13 @@ def itervalues(self, start=None, end=None, step=1, subchannels=True, display=False): """ Iterate all values in the given index range (w/o times). - :keyword start: The first index in the range, or a slice. - :keyword end: The last index in the range. Not used if `start` is + :param start: The first index in the range, or a slice. + :param end: The last index in the range. Not used if `start` is a slice. - :keyword step: The step increment. Not used if `start` is a slice. - :keyword subchannels: A list of subchannel IDs or Boolean. `True` + :param step: The step increment. Not used if `start` is a slice. + :param subchannels: A list of subchannel IDs or Boolean. `True` will return all subchannels in native order. - :keyword display: If `True`, the `EventArray` transform (i.e. the + :param display: If `True`, the `EventArray` transform (i.e. the 'display' transform) will be applied to the data. :return: an iterable of structured array value blocks in the specified index range. @@ -1756,13 +1968,13 @@ def arrayValues(self, start=None, end=None, step=1, subchannels=True, display=False): """ Get all values in the given index range (w/o times). - :keyword start: The first index in the range, or a slice. - :keyword end: The last index in the range. Not used if `start` is + :param start: The first index in the range, or a slice. + :param end: The last index in the range. Not used if `start` is a slice. - :keyword step: The step increment. Not used if `start` is a slice. - :keyword subchannels: A list of subchannel IDs or Boolean. `True` + :param step: The step increment. Not used if `start` is a slice. + :param subchannels: A list of subchannel IDs or Boolean. `True` will return all subchannels in native order. - :keyword display: If `True`, the `EventArray` transform (i.e. the + :param display: If `True`, the `EventArray` transform (i.e. the 'display' transform) will be applied to the data. :return: a structured array of values in the specified index range. """ @@ -1786,9 +1998,11 @@ def arrayValues(self, start=None, end=None, step=1, subchannels=True, out = np.empty((len(rawData.dtype), len(rawData))) if isinstance(self.parent, SubChannel): - xform.polys[self.subchannelId].inplace(rawData, out=out, noBivariates=self.noBivariates) + xform.polys[self.subchannelId].inplace(rawData, out=out, + noBivariates=self.noBivariates) else: - xform.inplace(np_recfunctions.structured_to_unstructured(rawData).T, out=out, noBivariates=self.noBivariates) + xform.inplace(np_recfunctions.structured_to_unstructured(rawData).T, + out=out, noBivariates=self.noBivariates) if self.removeMean: out[1:] -= out[1:].mean(axis=1, keepdims=True) @@ -1802,11 +2016,11 @@ def arrayValues(self, start=None, end=None, step=1, subchannels=True, def iterSlice(self, start=None, end=None, step=1, display=False): """ Create an iterator producing events for a range of indices. - :keyword start: The first index in the range, or a slice. - :keyword end: The last index in the range. Not used if `start` is + :param start: The first index in the range, or a slice. + :param end: The last index in the range. Not used if `start` is a slice. - :keyword step: The step increment. Not used if `start` is a slice. - :keyword display: If `True`, the `EventArray` transform (i.e. the + :param step: The step increment. Not used if `start` is a slice. + :param display: If `True`, the `EventArray` transform (i.e. the 'display' transform) will be applied to the data. :return: an iterable of events in the specified index range. """ @@ -1822,11 +2036,11 @@ def iterSlice(self, start=None, end=None, step=1, display=False): def arraySlice(self, start=None, end=None, step=1, display=False): """ Create an array of events within a range of indices. - :keyword start: The first index in the range, or a slice. - :keyword end: The last index in the range. Not used if `start` is + :param start: The first index in the range, or a slice. + :param end: The last index in the range. Not used if `start` is a slice. - :keyword step: The step increment. Not used if `start` is a slice. - :keyword display: If `True`, the `EventArray` transform (i.e. the + :param step: The step increment. Not used if `start` is a slice. + :param display: If `True`, the `EventArray` transform (i.e. the 'display' transform) will be applied to the data. :return: a structured array of events in the specified index range. """ @@ -1852,9 +2066,11 @@ def arraySlice(self, start=None, end=None, step=1, display=False): self._inplaceTime(start, end, step, out=out[0]) if isinstance(self.parent, SubChannel): - xform.polys[self.subchannelId].inplace(rawData, out=out[1], timestamp=out[0], noBivariates=self.noBivariates) + xform.polys[self.subchannelId].inplace(rawData, out=out[1], timestamp=out[0], + noBivariates=self.noBivariates) else: - xform.inplace(np_recfunctions.structured_to_unstructured(rawData).T, out=out[1:], timestamp=out[0], noBivariates=self.noBivariates) + xform.inplace(np_recfunctions.structured_to_unstructured(rawData).T, + out=out[1:], timestamp=out[0], noBivariates=self.noBivariates) if self.removeMean: out[1:] -= out[1:].mean(axis=1, keepdims=True) @@ -1866,13 +2082,13 @@ def iterJitterySlice(self, start=None, end=None, step=1, jitter=0.5, display=False): """ Create an iterator producing events for a range of indices. - :keyword start: The first index in the range, or a slice. - :keyword end: The last index in the range. Not used if `start` is + :param start: The first index in the range, or a slice. + :param end: The last index in the range. Not used if `start` is a slice. - :keyword step: The step increment. Not used if `start` is a slice. - :keyword jitter: The amount by which to vary the sample time, as a + :param step: The step increment. Not used if `start` is a slice. + :param jitter: The amount by which to vary the sample time, as a normalized percentage of the regular time between samples. - :keyword display: If `True`, the `EventArray` transform (i.e. the + :param display: If `True`, the `EventArray` transform (i.e. the 'display' transform) will be applied to the data. :return: an iterable of events in the specified index range. """ @@ -1882,7 +2098,8 @@ def iterJitterySlice(self, start=None, end=None, step=1, jitter=0.5, self._computeMinMeanMax() - data = self.arrayJitterySlice(start=start, end=end, step=step, jitter=jitter, display=display) + data = self.arrayJitterySlice(start=start, end=end, step=step, + jitter=jitter, display=display) yield from data.T @@ -1892,13 +2109,13 @@ def arrayJitterySlice(self, start=None, end=None, step=1, jitter=0.5, display=False): """ Create an array of events within a range of indices. - :keyword start: The first index in the range, or a slice. - :keyword end: The last index in the range. Not used if `start` is + :param start: The first index in the range, or a slice. + :param end: The last index in the range. Not used if `start` is a slice. - :keyword step: The step increment. Not used if `start` is a slice. - :keyword jitter: The amount by which to vary the sample time, as a + :param step: The step increment. Not used if `start` is a slice. + :param jitter: The amount by which to vary the sample time, as a normalized percentage of the regular time between samples. - :keyword display: If `True`, the `EventArray` transform (i.e. the + :param display: If `True`, the `EventArray` transform (i.e. the 'display' transform) will be applied to the data. :return: a structured array of events in the specified index range. """ @@ -1954,10 +2171,12 @@ def arrayJitterySlice(self, start=None, end=None, step=1, jitter=0.5, noBivariates = self.noBivariates if isinstance(self.parent, SubChannel): - xform.polys[self.subchannelId].inplace(rawData, out=out[1], timestamp=out[0], noBivariates=noBivariates) + xform.polys[self.subchannelId].inplace(rawData, out=out[1], timestamp=out[0], + noBivariates=noBivariates) else: for i, (k, _) in enumerate(rawData.dtype.descr): - xform.polys[i].inplace(rawData[k], out=out[i + 1], timestamp=out[0], noBivariates=noBivariates) + xform.polys[i].inplace(rawData[k], out=out[i + 1], timestamp=out[0], + noBivariates=noBivariates) return out @@ -2028,9 +2247,9 @@ def getRangeIndices(self, startTime, endTime): """ Get the first and last event indices that fall within the specified interval. - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. """ if self.parent.singleSample: @@ -2062,32 +2281,38 @@ def getRangeIndices(self, startTime, endTime): endIdx = 0 else: endIdx = self.getEventIndexBefore(endTime)+1 - return max(0, startIdx), min(endIdx, len(self)) + return greater(0, startIdx), lesser(endIdx, len(self)) def iterRange(self, startTime=None, endTime=None, step=1, display=False): """ Get a set of data occurring in a given interval. - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. + :param step: The number of steps between samples. + :param display: If `True`, the final 'display' transform (e.g. + unit conversion) will be applied to the results. """ warnings.warn(DeprecationWarning('iter methods should be expected to be ' 'removed in future versions of idelib')) startIdx, endIdx = self.getRangeIndices(startTime, endTime) - return self.iterSlice(startIdx,endIdx,step,display=display) + return self.iterSlice(startIdx, endIdx, step, display=display) def arrayRange(self, startTime=None, endTime=None, step=1, display=False): """ Get a set of data occurring in a given time interval. - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. + :param step: The number of steps between samples. + :param display: If `True`, the final 'display' transform (e.g. + unit conversion) will be applied to the results. :return: a structured array of events in the specified time interval. """ @@ -2100,27 +2325,30 @@ def getRange(self, startTime=None, endTime=None, display=False): """ Get a set of data occurring in a given time interval. (Currently an alias of `arrayRange`.) - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. + :param display: If `True`, the final 'display' transform (e.g. + unit conversion) will be applied to the results. :return: a collection of events in the specified time interval. """ return self.arrayRange(startTime, endTime, display=display) + # noinspection PyCallingNonCallable def iterMinMeanMax(self, startTime=None, endTime=None, padding=0, times=True, display=False): """ Get the minimum, mean, and maximum values for blocks within a specified interval. - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. - :keyword times: If `True` (default), the results include the + :param times: If `True` (default), the results include the block's starting time. - :keyword display: If `True`, the final 'display' transform (e.g. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. :return: An iterator producing sets of three events (min, mean, and max, respectively). @@ -2140,8 +2368,7 @@ def iterMinMeanMax(self, startTime=None, endTime=None, padding=0, session = self.session removeMean = self.removeMean and self.allowMeanRemoval _getBlockRollingMean = self._getBlockRollingMean - if not hasSubchannels: - parent_id = self.subchannelId + parent_id = None if self.hasSubchannels else self.subchannelId if self.useAllTransforms: xform = self._fullXform @@ -2211,25 +2438,28 @@ def arrayMinMeanMax(self, startTime=None, endTime=None, padding=0, """ Get the minimum, mean, and maximum values for blocks within a specified interval. - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. - :keyword times: If `True` (default), the results include the + :param times: If `True` (default), the results include the block's starting time. - :keyword display: If `True`, the final 'display' transform (e.g. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. + :param iterator: A function that iterates the output. Intended + for allowing iteration to be externally cancelled (e.g., in + a GUI). :return: A structured array of data block statistics (min, mean, and max, respectively). """ # TODO: Remember what `padding` was for, and either implement or # remove it completely. Related to plotting; see `plots`. - # TODO: Use `iterator`? It may have been removed accidentally. if not self._data: return None + iterator = iterator or iter startBlock, endBlock = self._getBlockRange(startTime, endTime) - shape = (3, max(1, len(self._npType)) + int(times), endBlock - startBlock) + shape = (3, greater(1, len(self._npType)) + int(times), endBlock - startBlock) scid = self.subchannelId isSubchannel = isinstance(self.parent, SubChannel) @@ -2240,11 +2470,11 @@ def arrayMinMeanMax(self, startTime=None, endTime=None, padding=0, else: xform = self._comboXform - noBivariates= self.noBivariates + noBivariates = self.noBivariates out = np.empty(shape) - for i, d in enumerate(self._data[startBlock:endBlock]): + for i, d in enumerate(iterator(self._data[startBlock:endBlock])): if isSubchannel: if times: out[:, 0, i] = d.startTime @@ -2300,14 +2530,17 @@ def getMinMeanMax(self, startTime=None, endTime=None, padding=0, """ Get the minimum, mean, and maximum values for blocks within a specified interval. (Currently an alias of `arrayMinMeanMax`.) - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. - :keyword times: If `True` (default), the results include the + :param times: If `True` (default), the results include the block's starting time. - :keyword display: If `True`, the final 'display' transform (e.g. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. + :param iterator: A function that iterates the output. Intended + for allowing iteration to be externally cancelled (e.g., in + a GUI). :return: A structured array of data block statistics (min, mean, and max, respectively). """ @@ -2322,14 +2555,17 @@ def getRangeMinMeanMax(self, startTime=None, endTime=None, subchannel=None, specifying a subchannel number can produce meaningless data if the channels use different units or are on different scales. - :keyword startTime: The first time (in microseconds by default), + :param startTime: The first time (in microseconds by default), `None` to start at the beginning of the session. - :keyword endTime: The second time, or `None` to use the end of + :param endTime: The second time, or `None` to use the end of the session. - :keyword subchannel: The subchannel ID to retrieve, if the + :param subchannel: The subchannel ID to retrieve, if the EventArray's parent has subchannels. - :keyword display: If `True`, the final 'display' transform (e.g. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. + :param iterator: A function that iterates the output. Intended + for allowing iteration to be externally cancelled (e.g., in + a GUI). :return: A namedtuple of aggregated event statistics (min, mean, and max, respectively). """ @@ -2359,14 +2595,14 @@ def _getBlockRange(self, startTime=None, endTime=None): startBlockIdx = 0 else: startBlockIdx = self._getBlockIndexWithTime(startTime) - startBlockIdx = max(startBlockIdx-1, 0) + startBlockIdx = greater(startBlockIdx-1, 0) if endTime is None: endBlockIdx = len(self._data) else: if endTime < 0: endTime += self._data[-1].endTime endBlockIdx = self._getBlockIndexWithTime(endTime, start=startBlockIdx) - endBlockIdx = min(len(self._data), max(startBlockIdx+1, endBlockIdx+1)) + endBlockIdx = lesser(len(self._data), greater(startBlockIdx+1, endBlockIdx+1)) return startBlockIdx, endBlockIdx @@ -2376,10 +2612,13 @@ def getMax(self, startTime=None, endTime=None, display=False, iterator=iter): time range. For Channels, returns the maximum among all Subchannels. - :keyword startTime: The starting time. Defaults to the start. - :keyword endTime: The ending time. Defaults to the end. - :keyword display: If `True`, the final 'display' transform (e.g. + :param startTime: The starting time. Defaults to the start. + :param endTime: The ending time. Defaults to the end. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. + :param iterator: A function that iterates the output. Intended + for allowing iteration to be externally cancelled (e.g., in + a GUI). :return: The event with the maximum value. """ maxs = self.arrayMinMeanMax(startTime, endTime, times=False, @@ -2398,10 +2637,13 @@ def getMin(self, startTime=None, endTime=None, display=False, iterator=iter): time range. For Channels, returns the minimum among all Subchannels. - :keyword startTime: The starting time. Defaults to the start. - :keyword endTime: The ending time. Defaults to the end. - :keyword display: If `True`, the final 'display' transform (e.g. + :param startTime: The starting time. Defaults to the start. + :param endTime: The ending time. Defaults to the end. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. + :param iterator: A function that iterates the output. Intended + for allowing iteration to be externally cancelled (e.g., in + a GUI). :return: The event with the minimum value. """ if not self.hasMinMeanMax: @@ -2421,7 +2663,7 @@ def getMin(self, startTime=None, endTime=None, display=False, iterator=iter): def _getBlockSampleTime(self, blockIdx=0): """ Get the time between samples within a given data block. - :keyword blockIdx: The index of the block to measure. Times + :param blockIdx: The index of the block to measure. Times within the same block are expected to be consistent, but can possibly vary from block to block. :return: The sample rate, as samples per second @@ -2471,7 +2713,7 @@ def _getBlockSampleRate(self, blockIdx=0): the channel definition or calculated from the actual data and cached. - :keyword blockIdx: The block to check. Optional, because in an + :param blockIdx: The block to check. Optional, because in an ideal world, all blocks would be the same. :return: The sample rate, as samples per second (float) """ @@ -2488,7 +2730,7 @@ def _getBlockSampleRate(self, blockIdx=0): def getSampleTime(self, idx=None): """ Get the time between samples. - :keyword idx: Because it is possible for sample rates to vary + :param idx: Because it is possible for sample rates to vary within a channel, an event index can be specified; the time between samples for that event and its siblings will be returned. @@ -2508,7 +2750,7 @@ def getSampleRate(self, idx=None): the channel definition or calculated from the actual data and cached. - :keyword idx: Because it is possible for sample rates to vary + :param idx: Because it is possible for sample rates to vary within a channel, an event index can be specified; the sample rate for that event and its siblings will be returned. :return: The sample rate, as samples per second (float) @@ -2526,9 +2768,11 @@ def getValueAt(self, at, outOfRange=False, display=False): existing events. :param at: The time at which to take the sample. - :keyword outOfRange: If `False`, times before the first sample + :param outOfRange: If `False`, times before the first sample or after the last will raise an `IndexError`. If `True`, the first or last time, respectively, is returned. + :param display: If `True`, export using the EventArray's 'display' + transform (e.g. unit conversion). """ # TODO: Optimize. This creates a bottleneck in the calibration. startIdx = self.getEventIndexBefore(at) @@ -2538,14 +2782,14 @@ def getValueAt(self, at, outOfRange=False, display=False): return first if outOfRange: return first - raise IndexError("Specified time occurs before first event (%d)" % first[0]) + raise IndexError("Specified time occurs before first event (%s)" % first[0]) elif startIdx >= len(self) - 1: last = self.__getitem__(-1, display=display) if last[0] == at: return last if outOfRange: return last - raise IndexError("Specified time occurs after last event (%d)" % last[0]) + raise IndexError("Specified time occurs after last event (%s)" % last[0]) startEvt = self.__getitem__(startIdx, display=display) endEvt = self.__getitem__(startIdx+1, display=display) @@ -2564,10 +2808,13 @@ def getMean(self, startTime=None, endTime=None, display=False, iterator=iter): time range. For Channels, returns the mean among all Subchannels. - :keyword startTime: The starting time. Defaults to the start. - :keyword endTime: The ending time. Defaults to the end. - :keyword display: If `True`, the final 'display' transform (e.g. + :param startTime: The starting time. Defaults to the start. + :param endTime: The ending time. Defaults to the end. + :param display: If `True`, the final 'display' transform (e.g. unit conversion) will be applied to the results. + :param iterator: A function that iterates the output. Intended + for allowing iteration to be externally cancelled (e.g., in + a GUI). :return: The event with the minimum value. """ if not self.hasMinMeanMax: @@ -2642,9 +2889,9 @@ def iterResampledRange(self, startTime, stopTime, maxPoints, padding=0, startIdx, stopIdx = self.getRangeIndices(startTime, stopTime) numPoints = (stopIdx - startIdx) - startIdx = max(startIdx-padding, 0) - stopIdx = min(stopIdx+padding, len(self)) - step = max(-int(-numPoints // maxPoints), 1) + startIdx = greater(startIdx-padding, 0) + stopIdx = lesser(stopIdx+padding, len(self)) + step = greater(-int(-numPoints // maxPoints), 1) if jitter != 0: return self.iterJitterySlice(startIdx, stopIdx, step, jitter, @@ -2662,9 +2909,9 @@ def arrayResampledRange(self, startTime, stopTime, maxPoints, padding=0, # particularly not with single-sample blocks. startIdx, stopIdx = self.getRangeIndices(startTime, stopTime) - startIdx = max(startIdx-padding, 0) - stopIdx = min(stopIdx+padding+1, len(self)) - step = max(int(ceil((stopIdx - startIdx) / maxPoints)), 1) + startIdx = greater(startIdx-padding, 0) + stopIdx = lesser(stopIdx+padding+1, len(self)) + step = greater(int(ceil((stopIdx - startIdx) / maxPoints)), 1) if jitter != 0: return self.arrayJitterySlice(startIdx, stopIdx, step, jitter, @@ -2672,22 +2919,36 @@ def arrayResampledRange(self, startTime, stopTime, maxPoints, padding=0, return self.arraySlice(startIdx, stopIdx, step, display=display) - def exportCsv(self, stream, start=None, stop=None, step=1, subchannels=True, - callback=None, callbackInterval=0.01, timeScalar=1, - raiseExceptions=False, dataFormat="%.6f", delimiter=", ", - useUtcTime=False, useIsoFormat=False, headers=False, - removeMean=None, meanSpan=None, display=False, - noBivariates=None): + # noinspection PyDeprecation + def exportCsv(self, + stream, + start: Optional[int] = None, + stop: Optional[int] = None, + step: int = 1, + subchannels: Optional[Iterable] = None, + callback: Optional[Callable] = None, + callbackInterval: float = 0.01, + timeScalar: float = 1, + raiseExceptions: bool = False, + dataFormat: str = "%.6f", + delimiter: str = ", ", + useUtcTime: bool = False, + useIsoFormat: bool = False, + headers: bool = False, + removeMean: Optional[bool] = None, + meanSpan: Optional[int] = None, + display: bool = False, + noBivariates: Optional[bool] = None) -> tuple[int, timedelta]: """ Export events as CSV to a stream (e.g. a file). :param stream: The stream object to which to write CSV data. - :keyword start: The first event index to export. - :keyword stop: The last event index to export. - :keyword step: The number of events between exported lines. - :keyword subchannels: A sequence of individual subchannel numbers + :param start: The first event index to export. + :param stop: The last event index to export. + :param step: The number of events between exported lines. + :param subchannels: A sequence of individual subchannel numbers to export. Only applicable to objects with subchannels. `True` (default) exports them all. - :keyword callback: A function (or function-like object) to notify + :param callback: A function (or function-like object) to notify as work is done. It should take four keyword arguments: `count` (the current line number), `total` (the total number of lines), `error` (an exception, if raised during the @@ -2695,29 +2956,36 @@ def exportCsv(self, stream, start=None, stop=None, step=1, subchannels=True, complete). If the callback object has a `cancelled` attribute that is `True`, the CSV export will be aborted. The default callback is `None` (nothing will be notified). - :keyword callbackInterval: The frequency of update, as a + :param callbackInterval: The frequency of update, as a normalized percent of the total lines to export. - :keyword timeScalar: A scaling factor for the event times. - The default is 1 (microseconds). - :keyword raiseExceptions: - :keyword dataFormat: The number of decimal places to use for the + :param timeScalar: A scaling factor for the event times. The + default is 1 (microseconds). Not applicable when exporting + with UTC timestamps, which are always seconds. + :param raiseExceptions: If `False`, all exceptions will be + handled quietly, passed along to the callback. + :param dataFormat: The number of decimal places to use for the data. This is the same format as used when formatting floats. - :keyword useUtcTime: If `True`, times are written as the UTC + :param delimiter: The characters separating columns in the output. + :param useUtcTime: If `True`, times are written as the UTC timestamp. If `False`, times are relative to the recording. - :keyword useIsoFormat: If `True`, the time column is written as + :param useIsoFormat: If `True`, the time column is written as the standard ISO date/time string. Only applies if `useUtcTime` is `True`. - :keyword headers: If `True`, the first line of the CSV will contain + :param headers: If `True`, the first line of the CSV will contain the names of each column. - :keyword removeMean: Overrides the EventArray's mean removal for the + :param removeMean: Overrides the EventArray's mean removal for the export. - :keyword meanSpan: The span of the mean removal for the export. + :param meanSpan: The span of the mean removal for the export. -1 removes the total mean. - :keyword display: If `True`, export using the EventArray's 'display' + :param display: If `True`, export using the EventArray's 'display' transform (e.g. unit conversion). + :param noBivariates: If `True`, do not apply the second value + in bivariate calibration polynomials (e.g., temperature + compensation). :return: Tuple: The number of rows exported and the elapsed time. """ - noCallback = callback is None + # TODO: change `utcfromtimestamp(t)` to `fromtimestamp(t, datetime.UTC)` + # after Python 3.10 reaches EoL (2026-10) _self = self.copy() if noBivariates is not None: @@ -2725,6 +2993,7 @@ def exportCsv(self, stream, start=None, stop=None, step=1, subchannels=True, # Create a function for formatting the event time. if useUtcTime and _self.session.utcStartTime: + timeScalar = 1e-06 if useIsoFormat: timeFormatter = lambda x: datetime.utcfromtimestamp(x[0] * timeScalar + _self.session.utcStartTime).isoformat() else: @@ -2765,18 +3034,16 @@ def exportCsv(self, stream, start=None, stop=None, step=1, subchannels=True, stream.write('"Time"%s%s\n' % (delimiter, delimiter.join(['"%s"' % n for n in names]))) - data = _self.arraySlice(start, stop, step) - if useUtcTime and _self.session.utcStartTime: - if useIsoFormat: - times = data[0] - data = data.astype([('time', ' low else: - self.valid = lambda x: x > low and x < high - + self.valid = lambda x: low < x < high + self._displayName = None @@ -3107,7 +3374,7 @@ def getValueAt(self, at, sessionId=None, source=None): return at, True source = self.getSessionSource(sessionId) if source is None else source - t = min(max(source[0][0], at), source[-1][0]) + t = lesser(max(source[0][0], at), source[-1][0]) val = source.getValueAt(t, outOfRange=True) return at, self.valid(val[-1]) diff --git a/idelib/importer.py b/idelib/importer.py index 374c0936..871c9b74 100644 --- a/idelib/importer.py +++ b/idelib/importer.py @@ -4,13 +4,13 @@ from collections import Counter from datetime import datetime +import hashlib import os.path import sys -from time import time as time_time -from time import sleep import warnings import struct + try: import tqdm.auto except ModuleNotFoundError: @@ -20,7 +20,6 @@ from .dataset import Dataset from . import parsers - #=============================================================================== # #=============================================================================== @@ -28,9 +27,8 @@ # from dataset import __DEBUG__ import logging -logger = logging.getLogger('idelib') -logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) #=============================================================================== # Defaults @@ -45,63 +43,63 @@ 0x00: {"name": "832M1 Accelerometer"}, 0x01: {"name": "MPL3115 Temperature/Pressure"} }, - + "channels": { - 0x00: {"name": "Accelerometer XYZ", - # "parser": struct.Struct("= self.cancelAt: - self.cancelled=True + self.cancelled = True if self.startTime is None: self.startTime = datetime.now() if starting: @@ -230,15 +233,15 @@ def __call__(self, count=0, total=None, percent=None, error=None, else: self.dump('\x0d%s samples read' % count) if percent is not None: - p = int(percent*100) + p = int(percent * 100) self.dump(' (%d%%)' % p) - if p > 0 and p < 100: - d = ((datetime.now() - self.startTime) / p) * (100-p) + if 0 < p < 100: + d = ((datetime.now() - self.startTime) / p) * (100 - p) self.dump(' - est. completion in %s' % d) if self.estSum is None: self.estSum = d else: - self.dump(' '*25) + self.dump(' ' * 25) sys.stdout.flush() @@ -250,31 +253,34 @@ class TQDMUpdater: _size = 100 + def __init__(self, fileLength=None): self.fileLength = fileLength pbarKwargs = { # 'ncols': 150, 'unit_scale': 1, - } + } if fileLength is None: self.pbar = tqdm.auto.tqdm(total=self._size, unit='%', **pbarKwargs) else: self.pbar = tqdm.auto.tqdm(total=fileLength, unit='B', **pbarKwargs) self._lastUpdate = 0 + def __call__(self, percent=0, done=False, **kwargs): if done: self.pbar.update(self.pbar.total - self.pbar.n) return if self.fileLength is None: - self.pbar.update(int(percent*self._size) - self._lastUpdate) - self._lastUpdate = int(percent*self._size) + self.pbar.update(int(percent * self._size) - self._lastUpdate) + self._lastUpdate = int(percent * self._size) else: filepos = kwargs.get('filepos', 1) self.pbar.update(filepos - self._lastUpdate) self._lastUpdate = kwargs.get('filepos') + def __del__(self): self.pbar.close() @@ -283,7 +289,7 @@ def __del__(self): def TQDMUpdater(): warnings.warn('TQDM was not imported properly') return nullUpdater() - + #=============================================================================== # @@ -327,10 +333,10 @@ def importFile(filename='', startTime=None, endTime=None, channels=None, # FUTURE: Remove `kwargs` and this conditional warning. if kwargs: warnings.warn( - 'Some importFile() updater-related arguments have been deprecated.' - ' Ignored arguments: {}'.format(', '.join(kwargs)), - DeprecationWarning, - stacklevel=2, + 'Some importFile() updater-related arguments have been deprecated.' + ' Ignored arguments: {}'.format(', '.join(kwargs)), + DeprecationWarning, + stacklevel=2, ) defaults = defaults or DEFAULTS @@ -373,15 +379,16 @@ def openFile(stream, updater=None, parserTypes=None, defaults=None, name=None, if isinstance(stream, str): stream = open(stream, 'rb') - + doc = Dataset(stream, name=name, quiet=quiet) doc.addSession() if doc._parsers is None: doc._parsers = instantiateParsers(doc, parserTypes) - + + fingerprint = hashlib.md5() elementParsers = doc._parsers - + try: for r in doc.ebmldoc: if getattr(updater, "cancelled", False): @@ -392,8 +399,9 @@ def openFile(stream, updater=None, parserTypes=None, defaults=None, name=None, parser = elementParsers[r.name] if parser.makesData(): break - parser.parse(r) - + fingerprint.update(r.getRaw()) + parser.parse(r) + except IOError as e: if e.errno is None: # The EBML library raises an empty IOError if it hits EOF. @@ -412,7 +420,8 @@ def openFile(stream, updater=None, parserTypes=None, defaults=None, name=None, # Got data before the recording props; use defaults. if defaults is not None: createDefaultSensors(doc, defaults) - + + doc._fingerprint = fingerprint doc.updateTransforms() return doc @@ -464,10 +473,12 @@ def filterTime(doc, startTime=0, endTime=None, channels=None): blockEnd = blockEnd or blockStart if chId is None: - logger.warning("Extractor: {} missing subelement, skipping.".format(el)) + logger.warning( + "Extractor: {} missing subelement, skipping.".format(el)) continue if blockStart is None: - logger.warning("Extractor: {} missing subelement, skipping.".format(el)) + logger.warning( + "Extractor: {} missing subelement, skipping.".format(el)) continue if finished.setdefault(chId, False): @@ -566,17 +577,17 @@ def readData(doc, source=None, startTime=None, endTime=None, channels=None, # FUTURE: Remove `kwargs` and this conditional warning. if kwargs: warnings.warn( - 'Some importFile() updater-related arguments have been deprecated.' - ' Ignored arguments: {}'.format(', '.join(kwargs)), - DeprecationWarning, - stacklevel=2, + 'Some importFile() updater-related arguments have been deprecated.' + ' Ignored arguments: {}'.format(', '.join(kwargs)), + DeprecationWarning, + stacklevel=2, ) parserTypes = parserTypes or ELEMENT_PARSER_TYPES if doc._parsers is None: # Possibly redundant; is `doc._parsers` ever `None` at this point? doc._parsers = instantiateParsers(doc, parserTypes) - + elementParsers = doc._parsers elementCount = 0 @@ -626,7 +637,7 @@ def readData(doc, source=None, startTime=None, endTime=None, channels=None, if source != doc and el_name == "TimeBaseUTC": timeOffset = (el.value - doc.lastSession.utcStartTime) * 1000000.0 continue - + try: parser = elementParsers[el_name] @@ -635,7 +646,7 @@ def readData(doc, source=None, startTime=None, endTime=None, channels=None, added = parser.parse(el, timeOffset=timeOffset) if isinstance(added, int): numSamples += added - + except parsers.ParsingError as err: # TODO: Error messages? logger.error("Parsing error during import: %s" % err) @@ -650,7 +661,7 @@ def readData(doc, source=None, startTime=None, endTime=None, channels=None, doc.fileDamaged = True elif updater: updater(error=e, done=True) - + except TypeError: # This can occur if there is a bad element in the data # (typically the last) @@ -663,4 +674,3 @@ def readData(doc, source=None, startTime=None, endTime=None, channels=None, updater(done=True) return numSamples - diff --git a/idelib/matfile.py b/idelib/matfile.py index 659c999f..09bda636 100644 --- a/idelib/matfile.py +++ b/idelib/matfile.py @@ -1,16 +1,20 @@ -''' +""" MATLAB .MAT file exporting. -''' +""" -from datetime import datetime +from collections.abc import Iterable +from datetime import datetime, timedelta from glob import glob import os.path import string import struct +from typing import Any, Callable, Optional + +from .dataset import EventArray import logging -logger = logging.getLogger('idelib') -logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + # NOTE: 64 bit Scipy is unstable; avoid using it for now (v0.13.2, 12/2014). # from scipy.io.matlab import mio5_params as MP @@ -627,44 +631,64 @@ def makeHeader(cls, doc, session=-1, prefix="MATLAB 5.0 MAT-file"): # #=============================================================================== -def exportMat(events, filename, start=0, stop=-1, step=1, subchannels=True, - callback=None, callbackInterval=0.01, timeScalar=1, - raiseExceptions=False, useUtcTime=False, headers=True, - removeMean=None, meanSpan=None, display=False, matArgs={}, - noBivariates=False, **kwargs): +def exportMat(events: EventArray, + filename: str, + start: int = 0, + stop: int = -1, + step: int = 1, + subchannels: Optional[Iterable] = None, + callback: Callable = None, + callbackInterval: float = 0.01, + timeScalar: float = 1, + raiseExceptions: bool = False, + useUtcTime: bool = False, + headers: bool = True, + removeMean: Optional[bool] = None, + meanSpan: Optional[int] = None, + display: bool = False, + matArgs: Optional[dict[str, Any]] = None, + noBivariates: bool = False, + **kwargs) -> tuple[int, timedelta]: """ Export a `dataset.EventList` as a Matlab .MAT file. Works in a manner similar to the standard `EventList.exportCsv()` method. + + Note: Keyword arguments not explicitly listed are ignored for the + sake of compatibility between exporters. :param events: an `EventList` from which to export. :param filename: The path/name of the .MAT file to write. - :keyword start: The first event index to export (defaults to first). - :keyword stop: The last event index to export (defaults to last). - :keyword step: The number of events between exported lines. - :keyword subchannels: A sequence of individual subchannel numbers - to export. Only applicable to objects with subchannels. - `True` (default) exports them all. - :keyword callback: A function (or function-like object) to notify - as work is done. It should take four keyword arguments: - `count` (the current line number), `total` (the total number - of lines), `error` (an exception, if raised during the - export), and `done` (will be `True` when the export is - complete). If the callback object has a `cancelled` - attribute that is `True`, the MAT export will be aborted. - The default callback is `None` (nothing will be notified). - :keyword callbackInterval: The frequency of update, as a - normalized percent of the total lines to export. - :keyword timeScalar: A scaling factor for the even times. - The default is 1 (microseconds). - :keyword raiseExceptions: If `False`, all exceptions will be handled + :param start: The first event index to export (defaults to first). + :param stop: The last event index to export (defaults to last). + :param step: The number of events between exported lines. + :param subchannels: A sequence of individual subchannel numbers to + export. Only applicable to objects with subchannels. + :param callback: A function (or function-like object) to notify as + work is done. It should take four keyword arguments: `count` + (the current line number), `total` (the total number of lines), + `error` (an exception, if raised during the export), and `done` + (will be `True` when the export is complete). If the callback + object has a `cancelled` attribute that is `True`, the MAT export + will be aborted. The default callback is `None` (nothing will be + notified). + :param callbackInterval: The frequency of update, as a normalized + percent of the total lines to export. + :param timeScalar: A scaling factor for the event times. The default + is 1 (microseconds). Not applicable when exporting with UTC + timestamps, which are always seconds. + :param raiseExceptions: If `False`, all exceptions will be handled quietly, passed along to the callback. - :keyword useUtcTime: If `True`, times are written as the UTC - timestamp. If `False`, times are relative to the recording. - :keyword removeMean: If `True`, remove the mean from the output. - :keyword meanSpan: The span over which the mean is calculated. -1 - for the total mean. - :keyword display: If `True`, export using the EventList's 'display' + :param useUtcTime: If `True`, times are written as the UTC timestamp. + If `False`, times are relative to the recording. + :param headers: If `True`, write a list of column names to the file + as a separate array of strings. + :param removeMean: If `True`, remove the mean from the output. + :param meanSpan: The span over which the mean is calculated. -1 for + the total mean. + :param display: If `True`, export using the EventList's 'display' transform (e.g. unit conversion). - :keyword matArgs: A dictionary of keyword arguments supplied to the + :param noBivariates: If `True`, do not apply the second value in + bivariate calibration polynomials (e.g., temperature compensation). + :param matArgs: A dictionary of keyword arguments supplied to the `MatStream` constructor. :return: Tuple: The number of rows exported and the elapsed time. """ @@ -698,43 +722,48 @@ def exportMat(events, filename, start=0, stop=-1, step=1, subchannels=True, pass # Scale to increments used in the source. - createTime /= timeScalar + rowTimeScalar = 1e-06 + timeScalar = 1 + else: + rowTimeScalar = 1 # If specific subchannels are specified, export them in order. if events.hasSubchannels: - if subchannels is True: + if subchannels: + # Export specific channels, a subset and/or reordered + # Create a function instead of chewing the subchannels every time + numCols = len(subchannels) + formatter = eval(f'lambda x: ({",".join([f"x[{c}]" for c in subchannels])},)') + names = [events.parent.subchannels[x].name for x in subchannels] + else: numCols = len(events.parent.subchannels) formatter = None names = [x.name for x in events.parent.subchannels] - else: - numCols = len(subchannels) - # Create a function instead of chewing the subchannels every time - formatter = eval("lambda x: (%s,)" % \ - ",".join([("x[%d]" % c) for c in subchannels])) - names = [events.parent.subchannels[x].name for x in subchannels] else: numCols = 1 formatter = lambda x: (x,) names = [events.parent.name] totalSamples = totalLines * numCols - if headers is False: + if not headers: names = None - + + matArgs = matArgs or {} comments = MatStream.makeHeader(events.dataset, events.session.sessionId) - matfile = MatStream(filename, events.dataset, comments, + matfile = MatStream(filename, events.dataset, comments, timeScalar=timeScalar, **matArgs) matfile.startArray(events.parent.name, numCols, rows=totalLines, colNames=names, noTimes=False) - + + num = -1 try: for num, evt in enumerate(events.iterSlice(start, stop, step, display)): t, v = evt[0], tuple(evt[1:]) if formatter is not None: v = formatter(v) - matfile.writeRow((createTime + t,)+v) + matfile.writeRow((createTime + t * rowTimeScalar,) + v) if callback is not None: if getattr(callback, 'cancelled', False): @@ -755,8 +784,3 @@ def exportMat(events, filename, start=0, stop=-1, step=1, subchannels=True, matfile.close() return num + 1, datetime.now() - t0 - -#=============================================================================== -# -#=============================================================================== - diff --git a/idelib/parsers.py b/idelib/parsers.py index bf1a94f2..388af4db 100644 --- a/idelib/parsers.py +++ b/idelib/parsers.py @@ -47,15 +47,8 @@ from . import transforms from .attributes import decode_attributes -# Dictionaries in Python 3.7+ are explicitly insert-ordered in all -# implementations. If older, continue to use `collections.OrderedDict`. -if sys.hexversion < 0x03070000: - from collections import OrderedDict as Dict -else: - Dict = dict - import logging -logger = logging.getLogger('idelib') +logger = logging.getLogger(__name__) logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s") #=============================================================================== @@ -91,11 +84,11 @@ def renameKeys(d, renamed, exclude=True, recurse=True, :param d: The source dictionary :param renamed: A dictionary of new names keyed by old names - :keyword exclude: If `True`, only keys appearing in `renamed` are + :param exclude: If `True`, only keys appearing in `renamed` are copied to the new dictionary. - :keyword recurse: If `True`, the renaming operates over all nested + :param recurse: If `True`, the renaming operates over all nested dictionaries. - :keyword mergeAttributes: If `True`, any `Attribute` elements are + :param mergeAttributes: If `True`, any `Attribute` elements are processed into a standard key/values and merged into the main dictionary. :return: A new dictionary, a deep copy of the original, with different @@ -108,7 +101,7 @@ def renameKeys(d, renamed, exclude=True, recurse=True, elif not isinstance(d, dict): return d - result = Dict() + result = {} for oldname, v in d.items(): if oldname == "Attribute": @@ -172,15 +165,16 @@ def parseAttribute(obj, element, multiple=True): """ Utility function to parse an `Attribute` element's data into a key/value pair and apply it to an object's `attribute` attribute (a dictionary). - + + :param obj: The object to which the attributes apply. :param element: The `Attribute` element to parse. - :keyword multiple: An object may have more than one Attribute element + :param multiple: An object may have more than one Attribute element with the same name. If `True`, the value corresponding to the name is a list which is appended to. If `False`, the value is that of the last `Attribute` element parsed. """ if not hasattr(obj, 'attributes'): - obj.attributes = Dict() + obj.attributes = {} k = v = None for ch in element.value: @@ -244,6 +238,8 @@ def getParserRanges(parser, useDefaults=True): Note that floating point values are typically reported as (-1.0,1.0). :param parser: A `struct.Struct`-like parser. + :param useDefaults: If `True`, use the data type min/max if no ranges + have been explicitly defined. :return: A tuple of (min, max) tuples. Non-numeric values will have a reported range of `None`. """ @@ -268,9 +264,9 @@ def getElementHandlers(module=None, subElements=False): """ Retrieve all EBML element handlers (parsers) from a module. Handlers are identified by being subclasses of `ElementHandler`. - :keyword module: The module from which to get the handlers. Defaults to + :param module: The module from which to get the handlers. Defaults to the current module (i.e. `idelib.parsers`). - :keyword subElements: `True` if the set of handlers should also + :param subElements: `True` if the set of handlers should also include non-root elements (e.g. the sub-elements of a `RecordingProperties` or `ChannelDataBlock`). :return: A list of element handler classes. @@ -403,9 +399,9 @@ class AccelerometerParser(object): :deprecated: Used only for really old recordings without recorder description data. - :cvar size: The size (in bytes) of one parsed sample. - :cvar format: The `struct.Struct` parsing format string used to parse. - :cvar ranges: A tuple containing the absolute min and max values. + :var size: The size (in bytes) of one parsed sample. + :var format: The `struct.Struct` parsing format string used to parse. + :var ranges: A tuple containing the absolute min and max values. """ NAME = "LegacyAccelerometer" @@ -477,6 +473,7 @@ def parse(self, element, **kwargs): handler = self.childHandlers[el.name] result.append(handler.parse(el, **kwargs)) return result + return None def getElementName(self, element): @@ -556,7 +553,7 @@ def isValidLength(self, parser): """ Check if an element's payload data is evenly divisible by into a set of channels. - :param n: The size of the data in bytes. + :param parser: The block's parsing struct. :return: `True` if the number of bytes can be evenly distributed into subsamples. """ @@ -568,6 +565,10 @@ def payload(self): return self.element.value + def getHeader(self): + raise NotImplementedError + + #=============================================================================== # SimpleChannelDataBlock-related handlers #=============================================================================== @@ -615,11 +616,11 @@ def parseWith(self, parser, start=None, end=None, step=1, subchannel=None): `parser.parse()` for consistency's sake. :param parser: The DataParser to use - :keyword start: First subsample index to parse - :keyword end: Last subsample index to parse - :keyword step: The number of samples to skip, if the start and end + :param start: First subsample index to parse + :param end: Last subsample index to parse + :param step: The number of samples to skip, if the start and end cover more than one sample. - :keyword subchannel: The subchannel to get, if specified. + :param subchannel: The subchannel to get, if specified. """ # SimpleChannelDataBlock payloads contain header info; skip it. data = self.payload @@ -645,7 +646,7 @@ def parseByIndexWith(self, parser, indices, subchannel=None): :param parser: The DataParser to use :param indices: A list of indices into the block's data. - :keyword subchannel: The subchannel to get, if specified. + :param subchannel: The subchannel to get, if specified. """ # SimpleChannelDataBlock payloads contain header info; skip it. data = self.payload @@ -716,8 +717,9 @@ def parse(self, element, sessionId=None, timeOffset=0): """ Create a (Simple)ChannelDataBlock from the given EBML element. :param element: A sample-carrying EBML element. - :keyword sessionId: The session currently being read; defaults to + :param sessionId: The session currently being read; defaults to whatever the Dataset says is current. + :param timeOffset: A timestamp offset, in microseconds. :return: The number of subsamples read from the element's payload. """ try: @@ -731,15 +733,18 @@ def parse(self, element, sessionId=None, timeOffset=0): # TODO: Actually handle, instead of ignoring? logger.warning("XXX: bad attribute in element %s" % element) return 0 - - + block.startTime = timeOffset + int(self.fixOverflow(block, timestamp)) if block.endTime is not None: block.endTime = timeOffset + int(self.fixOverflow(block, block.endTime)) + block.startTimeOriginal = block.startTime + block.endTimeOriginal = block.endTime + if channel not in self.doc.channels: # Unknown channel; could be debugging info, so that might be okay. # FUTURE: Better handling of unknown channel types. Low priority. + logger.debug(f'Got ChannelDataBlock for unknown channel ID: {channel}') return 0 try: @@ -793,11 +798,11 @@ def __init__(self, element): parseAttribute(self, el) el.gc() elif el.name == "StartTimeCodeAbs": - # TODO: store indicator that the start timestamp is non-modulo? + # FUTURE: store indicator that the start timestamp is non-modulo? self.startTime = el.value self._timestamp = el.value elif el.name == "EndTimeCodeAbs": - # TODO: store indicator that the end timestamp is non-modulo? + # FUTURE: store indicator that the end timestamp is non-modulo? self.endTime = el.value elif el.name == "ChannelFlags": # FUTURE: Handle channel flag bits @@ -805,17 +810,23 @@ def __init__(self, element): # Add other child element handlers here. element.gc(recurse=False) - + # Single-sample blocks have a total time of 0. Old files did not write # the end timestamp; if it's missing, duplicate the starting time. if self.endTime is None: self.endTime = self.startTime - self._payload = None + # Original start/end times, so `startTime`/`endTime` can be modified + # for syncing and reverted. Also, some functions need the originals. + self.startTimeOriginal = self.startTime + self.endTimeOriginal = self.endTime + self._payload = None self._parser = None - self._streamDtype = None - self._commonDtype = None + + # TODO: These don't seem to be used. Remove? + # self._streamDtype = None + # self._commonDtype = None @property def payload(self): @@ -1008,7 +1019,10 @@ class SensorListParser(ElementHandler): "TraceabilityData": "traceData", "SensorSerialNumber": "serialNum", "Attribute": "attributes", -# "SensorBwLimitIDRef": "bandwidthLimitId" # FUTURE + "SourceName": "sourceName", + "SourceIdentifier": "sourceId", + "IsRelative": "relative", + "SensorBwLimitIDRef": "bandwidthLimitId" } def parse(self, element, **kwargs): @@ -1087,7 +1101,7 @@ def parse(self, element, **kwargs): if 'parser' in data: pname = data.pop('parser', '') parser = DATA_PARSERS.get(pname, DATA_PARSERS.get(pname.upper(), None)) - if parser: + if parser is not None: # A named parser; use the special-case parser function. data['parser'] = parser() else: @@ -1146,7 +1160,9 @@ class PlotListParser(ChannelParser): 'PlotChannelRef': 'channelId', 'PlotSubChannelRef': 'subchannelId' } - + + + # noinspection PyMissingConstructor,PyUnusedLocal def __init__(self, *args, **kwargs): pnames = super(PlotListParser, self).parameterNames.copy() self.parameterNames.update(pnames) @@ -1273,13 +1289,14 @@ class TimeBaseUTCParser(ElementHandler): """ Handle TimeBaseUTC elements, applying it as the UTC start time of the current Session. """ - elementName = "TimeBaseUTC" + elementName = ("TimeBaseUTC", "TimeBaseUTCFine") isHeader = True def parse(self, element, **kwargs): val = element.value self.doc.lastUtcTime = val self.doc.lastSession.utcStartTime = val + self.doc.lastSession.utcStartTimeOriginal = val class RecorderUserDataParser(ElementHandler): diff --git a/idelib/schemata/mide_ide.xml b/idelib/schemata/mide_ide.xml index 97265ea6..42d7ce4c 100644 --- a/idelib/schemata/mide_ide.xml +++ b/idelib/schemata/mide_ide.xml @@ -97,7 +97,9 @@ WiSpi API Level for network co-processor Network Co-Processor Application Firmware Version Indicates the version of the key table, 0 if not present - Present in the device's DEVINFO if it supports control via serial + Present in the device's DEVINFO if it supports control via serial + Bytes to be escaped during command packet encoding (in addition to the standard HDLC break and HDLC escape characters) + Element in the device's DEVINFO indicating it supports control via the COMMAND and RESPONSE files. 0 is unsupported, 1 is supported; assumed to be 1 if not present. @@ -129,8 +131,8 @@ The modulus at which modulo timestamps for this channel roll over. The samplerate for this channel, if known and fixed. String represents a valid numeric expression, such as integer, decimal or ratio, e.g. '32768/16262'. Element required if both starting and ending timecodes will ever be omitted for blocks in this channel. Master element for SensorSubChannels. - ID of this SubChannel. Currently, SubChannelIDs must be sequential, starting from 0, for use with Slam Stick Lab. A specific SubChannel's ID will change depending on what 'sibling' channels are enabled. - ID of this SubChannel as reported in the DEVINFO, which shows all 'sibling' SubChannels. These numbers do not change if channels are enabled or disabled. + ID of this SubChannel. Currently, SubChannelIDs must be sequential, starting from 0, for use with Slam Stick Lab. A specific SubChannel's ID will change depending on what 'sibling' channels are enabled. + ID of this SubChannel as reported in the DEVINFO, which shows all 'sibling' SubChannels. These numbers do not change if channels are enabled or disabled. Display name of the subchannel, typically the axis name for multiaxis measurements (e.g. "X", "Yaw", etc.). Typically omitted if no display name is needed beyond a measurement label and units. Reference to a Calibration in CalibrationList. Reference to a BwLimitList entry. If present, this discloses any bandwidth limitations imposed for the acquisition channel, e.g. antialias or other filter settings. Note that the effective bandwidth is the lesser of the Sensor and SubChannel bandwidth! @@ -173,8 +175,8 @@ Text name of the axis(?). Reference to an Axis. List of channels/subchannels used by this Plot - SubChannelID - SubChannelID + ChannelID + SubChannelID @@ -182,11 +184,13 @@ "Idiot Light" warnings for sensors that are inaccurate in certain conditions - Warning ID, referenced by Subchannels. - ChannelID - SubChannelID + Warning ID, referenced by Subchannels. + ChannelID + SubChannelID Minimum valid value. Note: this is in real, post-converted units, not raw channel units. Maximum valid value. Note: this is in real, post-converted units, not raw channel units. + A name or description of the warning. Primarily intended for use in `UserData`. + Application-specific data describing the visual representation of the warning. Primarily intended for use in `UserData`. @@ -397,11 +401,12 @@ --> Session time base value in Unix Time. If present, used as the base for all future timecodes in the session. + Session time base value in Unix Time, using fractional seconds. If present, used as the base for all future timecodes in the session. This element contains potentially-channelized instrumentation data in a minimalist format. Its mandatory, fixed-length header includes a 2-byte modulo timecode (scaled to the channel's TimecodeScale, default of 1/32768 sec) and a 1-byte integer ChannelID. This element contains child elements including instrumentation data associated to a channel. This is used for e.g. binding a timestamp(s) and/or metadata to a specific multi-sample block of sensor data, which may be written asynchronously with respect to other channels' data (i.e. multiplexed). - Child of ChannelDataBlock: the channel this data is associated with + Child of ChannelDataBlock: the channel this data is associated with Child of ChannelDataBlock: optional flags to indicate datablock features such as discontinuity Child of ChannelDataBlock: the actual channel data samples. If there are multiple subchannels, for each sample point, the sample for each subchannel will be written consecutively (i.e. [sc0 sc1 sc2] [sc0 sc1 sc2]). Absolute timecode as an offset from the session TimeBase. The timecode resolution is given by the channel's TimeCodeScale. @@ -414,20 +419,47 @@ + A block of user-supplied data, appended to a file after recording, used primarily to keep information about the recording's display. + User-added notes attached to the recording. An offset (in microseconds) for all sample times. - Application-specific data describing GUI settings, etc. - User-created highlights, marking particular spans and points in time + + + + User-created highlights, marking particular spans and points in time - The annotation's ID, arbitrary but unique to the file. - A name and/or notes about the annotation. - Annotation start time. - Annotation end time. If present, the annotation will cover a time span. - Application-specific data describing the visual representation of the annotation. + One user-created marker, indicating a point or range in time. + The annotation's ID, arbitrary but unique to the file. + A name and/or notes about the annotation. + Annotation start time. + Annotation end time. If present, the annotation will cover a time span. + Application-specific data describing the visual representation of the annotation. - - + + Information connecting this recording to another for the purpose of synchronizing data. + 1 (or no element) if this sync is applied, 0 if inactive. Only one SyncInfo should be marked as active; If multiple SyncInfo are marked as active, only the first is applied. + The Channel ID of the time sync reference. + The SubChannel ID of the time sync reference. + The SubChannel ID of the time sync reference. + Human-friendly source (authority, network, or other source-of-truth) name for generic/virtual 'time' sensors. + Machine-readable, uniquely identifying hash identifying source equivalency for comparing 'time' sensors. + The time reference value corresponding to this file's timestamp zero. + The time reference value corresponding to the reference file's timestamp zero. + The rate, in microseconds per microsecond, that the enDAQ timestamps drift from the sync reference time. + The file to which this file is synced. + The fingerprint hash of the reference file to which this file is synced (a hex string). + This file's original name. Used when transferring sync data. + This file's fingerprint hash (a hex string). Used when transferring sync data. + The reference's UTC starting time (fractional seconds), applied to the synced Dataset. + + + Application-specific data describing GUI settings, etc. + + + + + diff --git a/idelib/sync.py b/idelib/sync.py new file mode 100644 index 00000000..ac0d618b --- /dev/null +++ b/idelib/sync.py @@ -0,0 +1,656 @@ +""" +Functions to assist in syncing one file to another, and functions +to adjust files' timestamps based on the GPS/GNSS satelite time; two +separate but related operations. + +Syncing +------- +Syncing modifies the start time and timestamps of one or more +:py:class:`Dataset` objects' recording session to match a 'reference' +:py:class:`Dataset`. Syncing is non-destructive; the sync can be repeatedly +changed (i.e., a :py:class:`Dataset` can be synced to a different reference) +or removed without any cumulative effect to the timing. + +Syncing can only be done with IDE files containing a common time reference +channel; currently, only Wi-Fi enabled devices (i.e., the enDAQ W series) +connected to the same Wi-Fi access point can create these. Note that the time +sync reference channels are not shown in *enDAQ Lab*, but can be seen in +:py:attr:`Dataset.channels`. + +GPS/GNSS Time Adjustment +------------------------ +Adjusting a recording's starting timestamp using GPS/GNSS time requires +the file was recorded on a device with a GPS module (e.g., an enDAQ W series +recorder) and contains GPS/GNSS timing data. Please note that the +former does not guarantee the latter; the GPS timing data may be missing +if satellite reception was poor, or the recording ended before the +signal was acquired. + +Syncing and GPS/GNSS time adjustment can be used together by first +applying the GPS/GNSS adjustment to the 'reference' recording before +syncing other files to it. All recordings must have the same sync +time channel, but only the 'reference' recording needs the GPS/GNSS +data as well. + +Usage +----- +While this module implements several functions, the primary ones are +:py:func:`idelib.sync.sync()` and :py:func:`idelib.sync.applyGNSSTime()`. +:py:func:`idelib.sync.updateUserdata()` (in conjunction with +:py:func:`idelib.userdata.writeUserData()`)can be used to save a recording's +calculated time/sync info into itself for later use. +""" + +from copy import deepcopy +import logging +from typing import Any, Dict, List, Optional, Union, Tuple, TYPE_CHECKING + +from idelib import userdata + +if TYPE_CHECKING: + # codecov:ignore:next + from idelib.dataset import Dataset, EventArray, Sensor, Session + + +logger = logging.getLogger(__name__) + + +# =========================================================================== +# +# =========================================================================== + +class SyncError(ValueError): + """ Exception raised when files cannot be synchronized. """ + + +# =========================================================================== +# +# =========================================================================== + +def getSyncSensors(dataset: "Dataset") -> List["Sensor"]: + """ Get all the 'sensors' in a `Dataset` that can be used for time + synchronization. + """ + return [s for s in dataset.sensors.values() + if s.name and 'Time' in s.name and not s.relative] + + +def getSyncSensor(dataset: "Dataset", + sourceId: Optional[str] = None, + sourceName: Optional[str] = None) -> "Sensor": + """ Get a specific time reference sensor from a `Dataset` by name and/or + identifier. + + :param dataset: The `Dataset` from which to get the sensor. + :param sourceId: The time reference sensor's unique ID (e.g., the MAC + address of a Wi-Fi access point generating TSF data). + :param sourceName: The name of the time reference sensor (e.g., the + name of a Wi-Fi access point generating TSF data). + """ + if not (sourceId or sourceName): + raise SyncError('Must specify a sourceId, sourceName, or both') + for s in getSyncSensors(dataset): + if (sourceId is None or s.sourceId == sourceId) and \ + (sourceName is None or s.sourceName == sourceName): + return s + msgs = [] + if sourceId: + msgs.append(f'with ID {sourceId!r}') + if sourceName: + msgs.append(f'named {sourceName!r}') + raise SyncError(f'Dataset does not have a time reference {" ".join(msgs)}') + + +def getSyncSources(dataset: "Dataset") -> List["EventArray"]: + """ Get the `SubChannel` instances that can be used for time synchronization. + + :param dataset: The `Dataset` from which to get the sources. + """ + sources = [] + for s in getSyncSensors(dataset): + for ch in s.getReferrers(): + sources.append(ch.getSession()) + return sources + + +# =========================================================================== +# +# =========================================================================== + +def _getSession(data: Union["Dataset", "EventArray", "Session"] + ) -> Tuple["Dataset", "Session"]: + """ Helper function to get the `Dataset` and current session from a + `Dataset`, `EventArray`, or `Session` object. + """ + if hasattr(data, 'channels'): + # data is a Dataset + session = data.currentSession + dataset = data + elif hasattr(data, 'session'): + # data is an EventArray + session = data.session + dataset = data.dataset + elif hasattr(data, 'sessionId'): + # Data is a Session + session = data + dataset = data.dataset + else: + raise TypeError(f'Cannot get sync info from {type(data).__name__!r} object') + + return dataset, session + + +def getSyncTimeZero(data: Union["Dataset", "EventArray"], + start: Optional[int] = None, + end: Optional[int] = None, + sensorId: Optional[int] = None, + clear: bool = False) -> int: + """ Get the sync reference's time corresponding to the recording's + relative timestamp zero. + + :param data: The data from which to get the reference time. It can + be either a `Dataset` (in which case it uses either the first sync + source found or the one indicated by `sensorId`) or an `EventArray` + (to use a specific subchannel as the time reference). + :param start: The starting index into the time reference data, to use + a limited range to compute the zero offset. For use with large + and/or noisy Datasets. Defaults to the beginning of the data. + :param end: The ending index into the time reference data, to use + a limited range to compute the zero offset. For use with large + and/or noisy Datasets. Defaults to the end of the data. + :param sensorId: The ID of a specific time reference sensor. Only + used if `data` is a `Dataset`. If `None`, the first time + reference sensor found is used. + :param clear: If `False`, use cached zero offset (if present). + :returns: The sync reference time (in microseconds) corresponding to + the recording's relative timestamp zero. + """ + dataset, session = _getSession(data) + + if session.syncZero is not None and session.syncInfo and not clear: + return session.syncZero + + # HACK: An indirect means of detecting a `Dataset`, avoiding circular imports. + if hasattr(data, 'channels'): # isinstance(data, Dataset): + sources = getSyncSources(data) + if not sources: + raise SyncError(f'{dataset} does not contain any sync time sources') + if sensorId is not None: + sources = [s for s in sources if s.parent.sensor.id == sensorId] + if not sources: + raise SyncError(f'{dataset} does not contain time reference sensor ID {sensorId}') + sync = sources[0] + else: + sync = data + + if len(sync) == 0: + raise SyncError(f'No sync reference data in {dataset} - was it not fully imported?') + + with sync.dataset._channelDataLock: + timestamp, synctime = sync.arraySlice(start, end).mean(axis=1) + + session.syncZero = int(synctime - timestamp) + session.syncSensor = sync.parent.sensor + + if session.syncInfo is None or clear is True: + session.syncInfo = {'SyncActive': False} + + session.syncInfo.update({ + 'SyncChannelIDRef': sync.parent.parent.id, + 'SyncSubChannelIDRef': sync.parent.id, + 'SyncSourceName': sync.parent.sensor.sourceName, + 'SyncSourceIdentifier': sync.parent.sensor.sourceId, + 'SyncZero': session.syncZero, + 'TimeBaseUTCFine': [float(session.utcStartTimeOriginal)], + 'SyncFilename': dataset.filename, + 'SyncFingerprint': dataset.fingerprint, + }) + + return session.syncZero + + +def getCommonSensorIds(*datasets: "Dataset") -> List[int]: + """ Get the Sensor ID of the time reference shared by two or more + recordings. This does *not* verify that any time data has been + recorded from it, only that the sensor exists. + """ + if len(datasets) < 2: + raise SyncError('At least two datasets are required') + + sources = set(getSyncSensors(datasets[0])) + for ds in datasets[1:]: + s = getSyncSensors(ds) + if not s: + raise SyncError(f'No usable time reference found in {ds}') + sources.intersection_update(s) + + if not sources: + raise SyncError('Recordings do not share a common time reference') + + # TODO: More tests? Check compatible times (similar UTCs)? + return [s.id for s in sources] + + +# =========================================================================== +# +# =========================================================================== + +def sync(reference: "Dataset", *datasets: "Dataset", + sensorId: Optional[int] = None, + inherit: bool = True): + """ Synchronize one or more recordings with a canonical 'reference' + recording. The 'reference' is not modified. Synched recordings + will have their timestamps and UTC start time offset to match + the reference. + + Note that this function runs synchronously, and can block/be blocked + by other functions affecting the contents of the :py:class:`Dataset`. + + :param reference: The reference `Dataset` to which to synchronize + the others. + :param datasets: One or more `Dataset` objects to synchronize. + :param sensorId: The time reference's sensor ID. If `None`, the + first common time reference sensor detected is used. Use if the + recordings have multiple time references in common. + :param inherit: If `True` and the `reference` Dataset has been synced + to another recording, sync the `datasets` to the same recording + `reference` has been synced to. If `False`, clear existing sync + info from the `reference` and sync the `datasets` to the + reference's zero time. + """ + # NOTE: This function assumes there is only one session, only one + # reference time sensor, and only one subchannel for the reference time. + + if len(datasets) < 1: + raise SyncError('At least one dataset to sync is required') + + allDatasets = (reference, *datasets) + + if sensorId is None: + ids = tuple(getCommonSensorIds(reference, *datasets)) + if len(ids) > 1: + raise SyncError(f'Recordings share multiple time references {ids}, ' + 'use sensorId parameter to select one') + sensorId = ids[0] + elif not all(sensorId in ds.sensors for ds in (reference, *datasets)): + raise SyncError(f'Not all recordings contain sensor ID {sensorId}') + + # Backup offsets/zero times, in case of failure + origOffsets = [ds.currentSession.offset for ds in allDatasets] + origZeros = [ds.currentSession.syncZero for ds in allDatasets] + origInfo = [deepcopy(ds.currentSession.syncInfo) for ds in allDatasets] + + refInfo = reference.currentSession.syncInfo or {} + clear = not inherit or 'SyncReferenceZero' not in refInfo + + try: + for ds in allDatasets if clear else datasets: + ch = ds.sensors[sensorId].getReferrers()[0] + ref = ch.getSession() + ds.currentSession.offset = 0 + ds.currentSession.syncZero = getSyncTimeZero(ref, start=0) + + if clear: + refzero = reference.currentSession.syncZero + refutc = float(reference.currentSession.utcStartTime) + refFilename = reference.filename + refFingerprint = reference.fingerprint + else: + refzero = refInfo.get('SyncReferenceZero', reference.currentSession.syncZero) + refutc = refInfo.get('SyncReferenceTimeBase', float(reference.currentSession.utcStartTime)) + refFilename = refInfo.get('SyncReferenceFilename') + refFingerprint = refInfo.get('SyncReferenceFingerprint') + + for ds in datasets: + with ds._channelDataLock: + offset = ds.currentSession.syncZero - refzero + ds.currentSession.offset = offset + ds.currentSession.utcStartTime = refutc + + # Update syncInfo set by getSyncTimeZero() + ds.currentSession.syncInfo.update({ + 'SyncActive': True, + 'SyncReferenceZero': refzero, + 'SyncReferenceFilename': refFilename, + 'SyncReferenceFingerprint': refFingerprint, + 'SyncReferenceTimeBase': refutc + }) + + except Exception: + # Failure: Restore original pre-sync values. + # codecov:ignore:this + for ds, offset, zero, info in zip(allDatasets, origOffsets, origZeros, origInfo): + with ds._channelDataLock: + ds.currentSession.syncZero = zero + ds.currentSession.offset = offset + ds.currentSession.utcStartTime = ds.currentSession.utcStartTimeOriginal + ds.currentSession.syncInfo = info + raise + + +def removeSync(data: Union["Dataset", "EventArray", "Session"], + clean: bool = False): + """ Remove sync info from a :py:class:`Dataset` recording session. The + UTC start time and timestamp offsets will revert to those + originally in the file. + + Note that this function runs synchronously, and can block/be blocked + by other functions affecting the contents of the :py:class:`Dataset`. + + :param data: The data from which to remove the sync info. + :param clean: If `False`, the information syncing this recording to + another is removed, but metadata about the file itself is kept. + If `True`, all sync-related info is completely removed. + """ + if not isSynced(data) and not clean: + raise SyncError('Data is not synced') + + dataset, session = _getSession(data) + with dataset._channelDataLock: + if clean: + session.syncInfo = None + elif session.syncInfo: + session.syncInfo['SyncActive'] = False + for k in tuple(session.syncInfo.keys()): + if 'Reference' in k: + del session.syncInfo[k] + + session.offset = 0 + session.utcStartTime = session.utcStartTimeOriginal + session.firstTime = session.firstTimeOriginal + session.lastTime = session.lastTimeOriginal + + if clean: + session.syncSensor = None + + +def isSynced(data: Union["Dataset", "EventArray", "Session"]) -> bool: + """ Has the data been synchronized to another recording? + """ + dataset, session = _getSession(data) + if session.syncZero is None or not session.offset: + return False + elif session.utcStartTime == session.utcStartTimeOriginal: + return False + return True + + +# =========================================================================== +# +# =========================================================================== + +def getGNSSTimebase(data: Union["Dataset", "EventArray"]) -> float: + """ Get the recording's fine-grained UTC start time from its GPS/GNSS + data. + + :param data: The data from which to get the timebase. It can be + either a `Dataset` (in which case it uses the first GNSS time + source found) or an `EventArray` (to get the timebase from a + specific subchannel). + :returns: The recording's updated UTC start time with fractional + seconds (UNIX epoch). + """ + events = None + if hasattr(data, 'channels'): + # data is a Dataset + timechannel = None + for ch in data.channels.values(): + if ch.name == 'GNSS Time': + timechannel = ch + events = ch.getSession() + break + + if timechannel is None: + raise SyncError(f'No GNSS time channel found in {data!r}') + + elif hasattr(data, 'session'): + # data is an EventArray + events = data + + else: + raise TypeError(f'Cannot get GNSS time from {type(data).__name__!r} object') + + try: + times = events[0] + return times[1] - times[0] / 10 ** 6 + except (IndexError, TypeError): + raise SyncError(f'No GNSS time data in {data!r} - was it not fully imported?') + + +def applyGNSSTime(data: "Dataset", clear=False) -> Tuple[float, float]: + """ Modify the recording's UTC start time using GPS/GNSS time data. Note + that recordings with GPS/GNSS time bases cannot be synchronized to + another recording; they should be the 'reference' recording to which + others are synchronized. + + :param data: The recording to modify. It must contain GPS/GNSS time data. + :param clear: If `True`, remove any previous synchronization before + updating the recording's timebase. If `False` and the recording + has been synced to another, a `SyncError` will be raised. + :returns: The recording's original initial starting time and the new + GNSS-corrected time (UNIX epoch seconds). + """ + if isSynced(data): + if clear: + removeSync(data) + else: + raise SyncError('Data has already been synced, call removeSync() first') + + timebase = getGNSSTimebase(data) + dataset, session = _getSession(data) + + if dataset.lastUtcTime == session.utcStartTime: + dataset.lastUtcTime = timebase + session.utcStartTime = timebase + session.syncInfo = session.syncInfo or {} + session.syncInfo['TimeBaseUTCFine'] = [timebase] + + return session.utcStartTimeOriginal, session.utcStartTime + + +def removeGNSSTime(data: Union["Dataset", "EventArray", "Session"]): + """ Remove the GPS/GNSS UTC start time from a recording, reverting to + the original, device-generated initial timestamp. + """ + dataset, session = _getSession(data) + if dataset.lastUtcTime == session.utcStartTime: + dataset.lastUtcTime = session.utcStartTimeOriginal + session.utcStartTime = session.utcStartTimeOriginal + if session.syncInfo: + session.syncInfo['TimeBaseUTCFine'] = [float(session.utcStartTimeOriginal)] + + +# =========================================================================== +# Getting/applying sync info loaded from userdata and/or copied from another +# recording. +# =========================================================================== + +def validateSyncInfo(dataset: "Dataset", + info: Dict[str, Any]) -> bool: + """ Check that a dictionary of sync info is valid for a dataset. + + :param dataset: The `Dataset` to which the sync info will be applied. + :param info: The dictionary of synchronization info to validate. The + keys match the names of ``SyncInfo`` child elements in the + ``mide_ide.xml`` EBML schema. + """ + if not info: + return False + + schema = dataset.ebmldoc.schema + validElements = [schema[eid].name for eid in schema['SyncInfo'].children] + for key in info: + if key not in validElements: + raise SyncError(f'Invalid sync info key {key!r}') + + # Find the time source. This will raise an exception if the source is not found. + getSyncSensor(dataset, + sourceId=info.get('SyncSourceIdentifier'), + sourceName=info.get('SyncSourceName')) + + # TODO: Additional validation (time range compatibility, etc.)? + return True + + +def hasSyncReferenceInfo(info: Dict[str, Any]) -> bool: + """ Check if a dictionary of sync info contains the information + needed to sync to a reference recording (e.g., copied from a + recording that has already been synced). + + :param info: The dictionary of synchronization info to check. The + keys match the names of ``SyncInfo`` child elements in the + ``mide_ide.xml`` EBML schema. + """ + if 'SyncSourceName' not in info and 'SyncSourceIdentifier' not in info: + return False + return 'SyncReferenceZero' in info and 'SyncReferenceTimeBase' in info + + +def makeSyncReferenceInfo(info: Dict[str, Any]) -> Dict[str, Any]: + """ Create a dictionary of sync 'reference' info from a dictionary of + sync 'target' info. For use with `applySyncInfo()` when applying + sync info copied from another recording, to sync to that recording. + """ + info = deepcopy(info) + info.update({ + 'SyncActive': True, + 'SyncReferenceZero': info.pop('SyncZero', None), + 'SyncReferenceFilename': info.pop('SyncFilename', None), + 'SyncReferenceFingerprint': info.pop('SyncFingerprint', None), + 'SyncReferenceTimeBase': float(info.pop('TimeBaseUTCFine', [0])[0]) + }) + return info + + +def applySyncInfo(dataset: "Dataset", + info: Dict[str, Any], + validate: bool = True): + """ Apply a dictionary of sync info to a dataset. + + Note that this function runs synchronously, and can block/be blocked + by other functions affecting the contents of the :py:class:`Dataset`. + + :param dataset: The `Dataset` to which the sync info will be applied. + :param info: The dictionary of synchronization info to validate. The + keys match the names of ``SyncInfo`` child elements in the + ``mide_ide.xml`` EBML schema. + :param validate: If `True`, validate the sync info before applying. + """ + info = {k: v for k, v in info.items() if v is not None} + + if validate: + validateSyncInfo(dataset, info) + + with dataset._channelDataLock: + session = dataset.currentSession + + oldInfo = session.syncInfo + oldZero = session.syncZero + oldOffset = session.offset + oldSensor = session.syncSensor + oldStart = session.utcStartTime + + try: + session.syncInfo = info + session.syncSensor = getSyncSensor(dataset, + sourceId=info.get('SyncSourceIdentifier'), + sourceName=info.get('SyncSourceName')) + + if not info.get('SyncActive', True): + return + + zero = info.get('SyncZero') + + if (info.get('SyncFingerprint') == dataset.fingerprint + and zero is not None): + # Sync info is from this file (probably loaded from userdata); + # don't recalculate (which will fail if the file was opened + # but not fully imported) + session.syncZero = zero + else: + # Generate new sync info (requires loaded file) + getSyncTimeZero(dataset, sensorId=session.syncSensor.id) + + session.utcStartTime = info.get('SyncReferenceTimeBase', session.utcStartTime) + session.offset = session.syncZero - info.get('SyncReferenceZero', 0) + + except Exception: + session.syncInfo = oldInfo + session.syncZero = oldZero + session.offset = oldOffset + session.syncSensor = oldSensor + session.utcStartTime = oldStart + raise + + +def getSyncInfo(dataset: "Dataset") -> Dict[str, Any]: + """ Get a dictionary of sync info from a recording. + """ + getSyncTimeZero(dataset) + session = dataset.currentSession + + # Remove any `None` values + return {k: v for k, v in session.syncInfo.items() if v is not None} + + +def loadSyncInfo(dataset: "Dataset", + refresh: bool = False) -> bool: + """ Read and apply sync info from a file's userdata. + + :param dataset: The `Dataset` from which to load the sync info. + :param refresh: If `True`, ignore any cached user data and reload + from the file. + :return: `True` if sync info was present, `False` otherwise. Errors + in the sync data will raise exceptions (e.g., `SyncError`). + """ + changed = False + data = userdata.readUserData(dataset, refresh=refresh) + if not data: + return False + sync = data.get('SyncInfo', None) + if sync: + applySyncInfo(dataset, sync, validate=False) + changed = True + if 'SyncReferenceTimeBase' in sync: + # Recording synced to another, ignore other time base adjustment + return changed + if 'TimeBaseUTCFine' in data: + dataset.currentSession.utcStartTime = data['TimeBaseUTCFine'][0] + changed = True + return changed + + +def updateUserdata(dataset: "Dataset"): + """ Create or update sync info in a Dataset's userdata. Note that this does + not save the updated user data to the file; the function + :py:func:`idelib.userdata.writeUserData()` must be called explicitly. + + :param dataset: The `Dataset` to update. + """ + try: + data = userdata.readUserData(dataset) or {} + session = dataset.currentSession + + if session.utcStartTimeOriginal != session.utcStartTime: + data['TimeBaseUTCFine'] = float(session.utcStartTime) + else: + data.pop('TimeBaseUTCFine', None) + + data.pop('SyncInfo', None) + + if session.syncInfo: + try: + info = getSyncInfo(dataset) + except SyncError: + # No sync source (i.e., TSF data) + info = None + + if info: + data['SyncInfo'] = info + + if data is not dataset._userdata: + dataset._userdata = data + + except Exception: + dataset._userdata = deepcopy(dataset._userdataOriginal) + raise diff --git a/idelib/tools/ideexport.py b/idelib/tools/ideexport.py index 7ee38b1e..baa188c7 100644 --- a/idelib/tools/ideexport.py +++ b/idelib/tools/ideexport.py @@ -13,6 +13,8 @@ from idelib.matfile import exportMat from idelib.tools.ideinfo import showIdeInfo +import idelib.sync + # try: # import tqdm.auto # Updater = importer.TQDMUpdater @@ -74,7 +76,8 @@ def ideExport(ideFilename: str, useNames: bool = False, updater: Optional[Callable] = None, timeScalar: float = 1.0, - saveInfo: bool = True) -> int: + saveInfo: bool = True, + sync: bool = False) -> int: """ The main function that handles generating text files from an IDE file. :param ideFilename: The name of the source IDE file. @@ -109,6 +112,8 @@ def ideExport(ideFilename: str, seconds, etc. :param saveInfo: If `True`, save a text file with key recording metadata and summary info. + :param sync: Apply recordings' sychronization data (if present in the + file). """ b = os.path.basename(ideFilename) outputType = outputType.strip('.') @@ -119,12 +124,6 @@ def ideExport(ideFilename: str, outFilename = os.path.join(outFilename, os.path.splitext(b)[0]) doc = importer.openFile(ideFilename, updater=updater) - if saveInfo: - with open(f'{outFilename}_info.txt', 'wt') as f: - showIdeInfo(doc, out=f, extra={'headers': headers, - 'removeMean': removeMean, - 'useUtcTime': useUtcTime, - 'useIsoFormat': useIsoFormat}) if not channels: channels = [c.id for c in doc.channels.values() @@ -143,6 +142,20 @@ def ideExport(ideFilename: str, endTime=endTime, updater=updater) + if sync: + try: + idelib.sync.loadSyncInfo(doc) + except idelib.sync.SyncError: + # Probably does not have sync sources + pass + + if saveInfo: + with open(f'{outFilename}_info.txt', 'wt') as f: + showIdeInfo(doc, out=f, extra={'headers': headers, + 'removeMean': removeMean, + 'useUtcTime': useUtcTime, + 'useIsoFormat': useIsoFormat}) + exportChannels = [doc.channels[cid] for cid in channels if cid in doc.channels] @@ -253,6 +266,8 @@ def main(argv=None): help="Write timestamps as UTC 'Unix epoch' time.") argparser.add_argument('-n', '--names', action='store_true', help="Include channel names in exported filenames.") + argparser.add_argument('-s', '--sync', action='store_true', + help="Apply recordings' sychronization data (if present in the file).") txtargs = argparser.add_argument_group("Text Export Options (CSV, TXT, etc.)") txtargs.add_argument('-r', '--headers', action='store_true', @@ -288,7 +303,8 @@ def main(argv=None): useUtcTime=args.utc, useIsoFormat=args.isoformat, useNames=args.names, - updater=updater) + updater=updater, + sync=args.sync) numfiles = f'{len(sources)} file' + ('s' if len(sources) > 1 else '') tstr = str(tt).rstrip('0.') diff --git a/idelib/tools/idesync.py b/idelib/tools/idesync.py new file mode 100644 index 00000000..d60dfc2f --- /dev/null +++ b/idelib/tools/idesync.py @@ -0,0 +1,142 @@ +""" +Batch .IDE Synchronization Utility: Add sync info to IDE files. + +Note: This utility modifies the files its synchronizes, adding or changing +existing userdata appended to the end of the recordings. +""" + +from datetime import datetime, timezone +import os +from pathlib import Path +import sys +from typing import List, Tuple, Union + +from idelib import __version__, __copyright__ +from idelib import importer +from idelib import sync +from idelib import userdata + + +# =========================================================================== +# +# =========================================================================== + +def syncFiles(reference: Union[str, Path], *recordings: Union[str, Path], + inherit: bool = False, + gps: bool = False) -> Tuple[List[str], List[str]]: + """ Synchronize recording files, writing the sync info in their userdata + for later use. + + :param reference: The 'reference' IDE filename, to which the other + recordings will be synchronized. + :param recordings: The IDE filenames to sync to the reference. These + are optionl if `gps` is `True`. + :param inherit: If `True`, information in the reference syncing it + to another recording will be used to sync the other files, as + opposed to syncing them to the reference itself. + :param gps: If `True`, update the reference file's starting time + using the GPS/GNSS it contains. The new time base will be + written to the reference file's user data. + :returns: A tuple of two lists of filenames: successfully synced + and failures. Note that the utility doesn't really use the + return values; they're primarily for testing. + """ + successes, failures = [], [] + + with importer.importFile(reference) as ref: + sync.loadSyncInfo(ref) + if gps: + if inherit: + raise sync.SyncError('Arguments gps and inherit are mutually exclusive') + try: + old, new = sync.applyGNSSTime(ref, clear=True) + sync.updateUserdata(ref) + userdata.saveUserData(ref) + olddt = datetime.fromtimestamp(old, timezone.utc) + newdt = datetime.fromtimestamp(new, timezone.utc) + dt1, dt2 = sorted((olddt, newdt)) + diff = f"{'-' if old < new else ''}{dt2 - dt1}" + print(f'GPS/GNSS time base applied to reference file {reference}; ' + f'now {newdt.isoformat()} UTC ({diff} difference)', + file=sys.stdout, flush=True) + successes.append(reference) + except sync.SyncError: + raise sync.SyncError(f'No GPS/GNSS data found in {reference}') + elif inherit: + info = sync.getSyncInfo(ref) + if not sync.hasSyncReferenceInfo(info): + raise sync.SyncError('Reference file has no sync info to inherit') + + for filename in recordings: + with importer.importFile(filename) as doc: + try: + sync.loadSyncInfo(doc) + sync.sync(ref, doc, inherit=inherit) + sync.updateUserdata(doc) + userdata.saveUserData(doc) + successes.append(filename) + print(f'Synced {filename} to {reference}', file=sys.stdout, flush=True) + except (IOError, sync.SyncError) as err: + failures.append(filename) + print(f'Could not sync {filename}: {err}', file=sys.stderr, flush=True) + + return successes, failures + + +# =========================================================================== +# +# =========================================================================== + +def main(argv=None): + import argparse + from glob import glob + import locale + + locale.setlocale(locale.LC_ALL, '') + + argparser = argparse.ArgumentParser( + description=f"Batch IDE Synchronization Utility v{__version__} - {__copyright__}") + + argparser.add_argument('-g', '--gps', action='store_true', + help=("Update the reference recording's start time using its GPS/GNSS data. " + "Cannot be used in conjunction wtih --inherit.")) + argparser.add_argument('-i', '--inherit', action='store_true', + help=("If the reference recording has been synced to another, sync " + "the other datasets to that (rather than the reference itself). " + "Cannot be used in conjunction with --gps.")) + argparser.add_argument('reference', metavar="FILENAME.IDE", + help=("The recording to which to synchronize the others, and/or adjust" + "its starting time if --gps is used.")) + argparser.add_argument('recordings', nargs='*', metavar="FILENAME.IDE", + help=("Recordings to sync to the reference.")) + + args = argparser.parse_args(argv) + + if args.gps and args.inherit: + print("ERROR: --gps and --inhert cannot be used together", + file=sys.stderr, flush=True) + exit(1) + + recordings = [] + for source in args.recordings: + recordings.extend([s for s in glob(source) if os.path.isfile(s)]) + + if not recordings and not args.gps: + print("ERROR: Nothing to do; no change to the reference file, no other files found.", + file=sys.stderr, flush=True) + exit(1) + + try: + syncFiles(args.reference, *recordings, gps=args.gps, inherit=args.inherit) + + except sync.SyncError as err: + print(f"ERROR: {err}", file=sys.stderr, flush=True) + exit(1) + + except KeyboardInterrupt: + print("\n*** Conversion canceled!", file=sys.stdout, flush=True) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/idelib/transforms.py b/idelib/transforms.py index 7163278d..95532b98 100644 --- a/idelib/transforms.py +++ b/idelib/transforms.py @@ -25,8 +25,7 @@ else: Dict = dict -logger = logging.getLogger('idelib') -logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) #=============================================================================== @@ -1433,7 +1432,7 @@ def inplace(self, values, y=None, timestamp=None, session=None, noBivariates=Fal # in which the main channel can be accessed before the calibration # channel has loaded. This should fix it. if getattr(self.dataset, 'loading', False): - logger.warning("%s occurred in combined polynomial %r"% + logger.warning("%s occurred in combined polynomial %r" % (err.__class__.__name__, self)) return None raise diff --git a/idelib/userdata.py b/idelib/userdata.py index 738494db..3fd4c1dc 100644 --- a/idelib/userdata.py +++ b/idelib/userdata.py @@ -4,12 +4,15 @@ the display of the `Dataset`. """ +import copy import errno import os.path import logging -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Tuple, Union, TYPE_CHECKING -from .dataset import Dataset +if TYPE_CHECKING: + # codecov:ignore:next + from .dataset import Dataset #=============================================================================== # @@ -17,13 +20,14 @@ MIN_VOID_SIZE = 9 -logger = logging.getLogger('idelib') +logger = logging.getLogger(__name__) + #=============================================================================== # #=============================================================================== -def getUserDataPos(dataset: Dataset, +def getUserDataPos(dataset: "Dataset", refresh: bool = False) -> Tuple[bool, int, int]: """ Get the offset of the start of the user data. @@ -69,8 +73,9 @@ def getUserDataPos(dataset: Dataset, finally: fs.seek(oldpos, os.SEEK_SET) - dataset._userdataOffset = offset - dataset._filesize = filesize + dataset._userdataOffset = offset + dataset._filesize = filesize + return hasdata, offset, filesize @@ -78,12 +83,12 @@ def getUserDataPos(dataset: Dataset, # #=============================================================================== -def readUserData(dataset: Dataset, +def readUserData(dataset: "Dataset", refresh: bool = False) -> Union[Dict[str, Any], None]: """ Read application-specific user data from the end of an IDE file. :param dataset: The `Dataset` from which to read the user data. - :param refresh:: If `True`, ignore any cached values and re-read + :param refresh: If `True`, ignore any cached values and re-read from the file. :return: A dictionary of user data, or `None` if no user data could be read from the file (e.g., none exists). @@ -107,20 +112,22 @@ def readUserData(dataset: Dataset, data, _next = doc.parseElement(fs) dump = data.dump() dataset._userdata = dump - return dump finally: fs.seek(oldpos, os.SEEK_SET) + dataset._userdataOriginal = copy.deepcopy(dataset._userdata) + return dataset._userdata + #=============================================================================== # #=============================================================================== -def writeUserData(dataset: Dataset, +def writeUserData(dataset: "Dataset", userdata: Dict[str, Any], refresh: bool = False): - """ Write user data to the end of an IDE file. + """ Write arbitrary data to the end of an IDE file. :param dataset: The `Dataset` from which to read the user data. :param userdata: A dictionary of user data, or `None` to remove @@ -183,8 +190,19 @@ def writeUserData(dataset: Dataset, fs.write(userblob) dataset._userdata = userdata + dataset._userdataOriginal = copy.deepcopy(dataset._userdata) logger.debug(f'(userdata) Wrote {len(userblob)} bytes to {dataset} ' f'(file was {filesize}, now {newsize})') finally: fs.seek(oldpos, os.SEEK_SET) + + +def saveUserData(dataset: "Dataset", refresh: bool = False): + """ Save the user data attached to a `Dataset`. + + :param dataset: The `Dataset` from which to write the user data. + :param refresh: If `True`, ignore any cached values and find the + position in the file to which to write. + """ + writeUserData(dataset, dataset._userdata, refresh=refresh) diff --git a/idelib/util.py b/idelib/util.py index dd661965..61bf78ef 100644 --- a/idelib/util.py +++ b/idelib/util.py @@ -4,7 +4,6 @@ from io import IOBase import logging -import os.path from pathlib import Path from ebmlite import loadSchema @@ -16,7 +15,7 @@ # # ============================================================================== -logger = logging.getLogger('idelib') +logger = logging.getLogger(__name__) # ============================================================================== diff --git a/setup.py b/setup.py index 913d13d7..63eca3d5 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,8 @@ def get_version(rel_path): 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', 'Topic :: Scientific/Engineering', ], keywords='ebml binary ide mide', @@ -71,14 +73,15 @@ def get_version(rel_path): entry_points={'console_scripts': [ 'ideexport=idelib.tools.ideexport:main', 'ideinfo=idelib.tools.ideinfo:main', + 'idesync=idelib.tools.idesync:main', ]}, project_urls={ "Bug Tracker": "https://github.com/MideTechnology/idelib/issues", - "Documentation": "https://mide-technology-idelib.readthedocs-hosted.com/en/latest/", + "Documentation": "https://docs.endaq.com/projects/idelib/en/develop/index.html", "Source Code": "https://github.com/MideTechnology/idelib", }, test_suite='./testing', - python_requires='>=3.5', + python_requires='>=3.9', install_requires=INSTALL_REQUIRES, extras_require={ 'test': INSTALL_REQUIRES + TEST_REQUIRES, diff --git a/testing/GNSS1.IDE b/testing/GNSS1.IDE new file mode 100644 index 00000000..98ba3d1e Binary files /dev/null and b/testing/GNSS1.IDE differ diff --git a/testing/TSF1.IDE b/testing/TSF1.IDE new file mode 100644 index 00000000..43cf34de Binary files /dev/null and b/testing/TSF1.IDE differ diff --git a/testing/TSF2.IDE b/testing/TSF2.IDE new file mode 100644 index 00000000..36689a60 Binary files /dev/null and b/testing/TSF2.IDE differ diff --git a/testing/test_dataset.py b/testing/test_dataset.py index b52ff298..58cbd570 100644 --- a/testing/test_dataset.py +++ b/testing/test_dataset.py @@ -34,7 +34,7 @@ from idelib import importer from idelib import parsers -from testing.utils import nullcontext +from contextlib import nullcontext from .file_streams import makeStreamLike @@ -441,16 +441,11 @@ def testEndSession(self): def testAddSensor(self): """ Test that the sensors are being added correctly. """ sensor1 = Sensor(self.dataset, 0) - sensor2 = Sensor(self.dataset, 'q') # test that numeric ids work self.dataset.addSensor(0) self.assertEqual(sensor1, self.dataset.sensors[0]) - # test that string ids work - self.dataset.addSensor('q') - self.assertEqual(sensor2, self.dataset.sensors['q']) - def testAddChannel(self): """ Test that each channel is being added to the dataset correctly, and @@ -481,16 +476,12 @@ def testAddTransform(self): # set up new transforms xform1 = Transformable() xform1.id = 1 - xform2 = Transformable() - xform2.id = 'q' xform3 = Transformable() xform3.id = None # assert that transforms are being added correctly self.dataset.addTransform(xform1) - self.dataset.addTransform(xform2) self.assertEqual(self.dataset.transforms[1], xform1) - self.assertEqual(self.dataset.transforms['q'], xform2) # assert that transforms without an id will raise errors self.assertRaises(ValueError, self.dataset.addTransform, xform3) @@ -573,6 +564,7 @@ class TestSession(unittest.TestCase): def testInitAndEQ(self): self.dataset = importer.importFile('./testing/SSX70065.IDE') + # TODO: Change these Sessions, they aren't realistic. session1 = Session( self.dataset, sessionId=1, startTime=2, endTime=3, utcStartTime=4) session2 = Session( @@ -582,19 +574,20 @@ def testInitAndEQ(self): self.assertNotEqual(session1, GenericObject()) self.assertEqual(session1.dataset, self.dataset) - self.assertEqual(session1.endTime, 3) self.assertEqual(session1.sessionId, 1) - self.assertEqual(session1.startTime, 2) + self.assertEqual(session1.firstTime, 2) + self.assertEqual(session1.lastTime, 3) self.assertEqual(session1.utcStartTime, 4) def testRepr(self): """ Test that __repr__ is creating the correct string. """ + # TODO: Not realistic. Redo or remove this test. fileStream = makeStreamLike('./testing/SSX70065.IDE') dataset = Dataset(fileStream) session1 = Session( dataset, sessionId=1, startTime=2, endTime=3, utcStartTime=4) - self.assertIn(" 0 + + sources2 = sync.getSyncSources(doc2) + assert len(sources2) == 0 + + sensor = sync.getSyncSensors(doc1)[0] + assert sources1[0].parent.sensor == sensor + + +def test_getSyncTimeZero(): + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) # Has sync reference + doc2 = importer.importFile(os.path.join(cwd, 'test3.IDE')) # No sync reference + doc3 = importer.openFile(os.path.join(cwd, 'TSF2.IDE')) # Has sync reference + + sync.getSyncTimeZero(doc1) + assert doc1.currentSession.syncZero is not None + + with pytest.raises(TypeError): + # Not a Dataset or EventArray + sync.getSyncTimeZero('an invalid object') + + with pytest.raises(sync.SyncError): + # Bad sensor ID + sync.getSyncTimeZero(doc1, sensorId=254, clear=True) + + with pytest.raises(sync.SyncError): + # No sync reference + _ = sync.getSyncTimeZero(doc2) + + with pytest.raises(sync.SyncError): + # Only header loaded, no reference data + sync.getSyncTimeZero(doc3) + + +def test_sync_basic(): + """ Test that sync will modify the target file but not the reference. + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + doc2 = importer.importFile(os.path.join(cwd, 'TSF2.IDE')) + doc3 = importer.importFile(os.path.join(cwd, 'test3.IDE')) # No sync reference + + # Sanity check + assert len(sync.getSyncSensors(doc1)) == 1 + assert len(sync.getSyncSensors(doc2)) == 1 + assert len(sync.getSyncSensors(doc3)) == 0 + assert sync.getSyncTimeZero(doc1) != 0 + assert sync.getSyncTimeZero(doc2) != 0 + + accel1 = doc1.channels[80].getSession() + accel2 = doc2.channels[80].getSession() + first1 = accel1[0][0] + last1 = accel1[-1][0] + first2 = accel2[0][0] + last2 = accel2[-1][0] + utc1 = accel1.session.utcStartTime + utc2 = accel2.session.utcStartTime + + # Sanity check: initial offsets are zero, start times different + assert accel1.session.offset == 0 + assert accel2.session.offset == 0 + assert utc1 != utc2 + + sync.sync(doc1, doc2) + assert accel1.session.offset == 0 # doc1 (the reference) offset unchanged + assert accel2.session.offset != 0 # doc2 offset modified + assert accel1[0][0] == first1 # doc1 timestamps unchanged + assert accel1[-1][0] == last1 + assert accel2[0][0] == first2 + accel2.session.offset # doc2 timestamps offset + assert accel2[-1][0] == last2 + accel2.session.offset + + assert accel1.session.utcStartTime == utc1 + assert accel2.session.utcStartTime == utc1 + + # Get sync time zero again; value should have been cached + # (mainly for code coverage) + assert doc1.currentSession.syncZero is not None + sync.getSyncTimeZero(doc1) + + with pytest.raises(sync.SyncError): + # At least 2 datasets needed (reference and another) + sync.sync(doc1) + + with pytest.raises(sync.SyncError): + # No sync reference in doc3 + sync.sync(doc1, doc3) + + with pytest.raises(sync.SyncError): + # Sync reference in doc1 and doc2; no sync reference in doc3 + sync.sync(doc1, doc2, doc3) + + with pytest.raises(sync.SyncError): + sync.sync(doc1, doc3, sensorId=103) + + +def test_sync_repeat(): + """ Test that syncing an already synced session doesn't cause problems. + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + doc2 = importer.importFile(os.path.join(cwd, 'TSF2.IDE')) + + accel2 = doc2.channels[80].getSession() + offsetPreSync = accel2.session.offset + + # Verify sync applied + sync.sync(doc1, doc2) + offsetPostSync = accel2.session.offset + assert offsetPostSync != offsetPreSync + + # Verify multiple syncs don't stack/conflict + sync.sync(doc1, doc2) + assert accel2.session.offset == offsetPostSync + + +def test_sync_inherit(): + """ Test syncing with the 'inherit' flag set and unset. + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + doc2 = importer.importFile(os.path.join(cwd, 'TSF2.IDE')) + + doc1_startTimeOriginal = doc1.currentSession.utcStartTime + + + # Set the syncInfo in doc1 as if it were synced to another recording + sync.getSyncTimeZero(doc1) + + doc1.currentSession.syncInfo.update({ + 'SyncActive': True, + 'SyncReferenceZero': doc1.currentSession.syncZero + 6000, + 'SyncReferenceFilename': 'bogus.ide', + 'SyncReferenceTimeBase': doc1.currentSession.utcStartTime + 60., + }) + + # sync w/o clear: doc2 uses doc1's syncInfo (as if doc1 was synced to another) + sync.sync(doc1, doc2, inherit=True) + assert doc2.currentSession.utcStartTime == doc1.currentSession.utcStartTimeOriginal + 60. + assert doc2.currentSession.syncInfo['SyncReferenceFilename'] == 'bogus.ide' + + # sync w/ clear: doc2 syncs to doc1 + sync.sync(doc1, doc2, inherit=False) + assert doc2.currentSession.utcStartTime == doc1.currentSession.utcStartTimeOriginal + assert doc2.currentSession.syncInfo['SyncReferenceFilename'] == doc1.filename + + # sync w/o clear, but doc1 has no syncInfo: same as clear + sync.removeSync(doc1, clean=True) + sync.sync(doc1, doc2, inherit=True) + assert doc2.currentSession.utcStartTime == doc1.currentSession.utcStartTime + assert doc2.currentSession.syncInfo['SyncReferenceFilename'] == doc1.filename + + +def test_apply_sync(): + """ Test applying a dictionary of sync info. In this test, the info is + deserialized from JSON, as one use of applySyncInfo is for copying + between IDEs in enDAQ Lab. Copying will (probably) use JSON and the + clipboard (at least initially). + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + doc2 = importer.importFile(os.path.join(cwd, 'TSF2.IDE')) + + # Files start with no sync info + assert not sync.isSynced(doc1) + assert not sync.isSynced(doc2) + + sync.sync(doc1, doc2, inherit=False) + + # The timing of the reference recording (1st IDE) not change, but the 2nd + # recording should be synced to the 1st. + assert not sync.isSynced(doc1) + assert sync.isSynced(doc2) + assert doc2.currentSession.utcStartTime == doc1.currentSession.utcStartTime + + info = json.loads(json.dumps(doc2.currentSession.syncInfo)) + offset = doc2.currentSession.offset + sensor = doc2.currentSession.syncSensor + + sync.removeSync(doc2, clean=True) + assert not sync.isSynced(doc2) + assert doc2.currentSession.offset == 0 + assert doc2.currentSession.utcStartTime != doc1.currentSession.utcStartTime + assert doc2.currentSession.syncSensor is None + + sync.applySyncInfo(doc2, info) + assert doc2.currentSession.syncInfo == info + assert doc2.currentSession.offset == offset + assert doc2.currentSession.utcStartTime == doc1.currentSession.utcStartTime + assert doc2.currentSession.syncSensor == sensor + + +def test_sync_userdata(): + """ Test reading/writing sync info from IDE user data. These tests + don't actually save the userdata to the file, only verify the + userdata (in memory) is updated. + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + doc2 = importer.importFile(os.path.join(cwd, 'TSF2.IDE')) + + sync.sync(doc1, doc2) + + assert not sync.isSynced(doc1) + assert sync.isSynced(doc2) + + userdata.readUserData(doc2) + sync.updateUserdata(doc2) + assert sync.isSynced(doc2) + + ud = userdata.readUserData(doc2) + si = doc2.currentSession.syncInfo + + assert 'SyncInfo' in ud + assert ud['SyncInfo'] == {k: v for k, v in si.items() + if v is not None} + + sync.removeSync(doc2, clean=True) + assert not sync.isSynced(doc2) + + sync.updateUserdata(doc2) + + ud = userdata.readUserData(doc2) # Note: this will return the cached copy + assert 'SyncInfo' not in ud + + +def test_validateSyncInfo(): + """ Test validation of a sync info dictionary. + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + + assert sync.validateSyncInfo(doc1, SYNC_INFO) is True + assert sync.validateSyncInfo(doc1, {}) is False + + with pytest.raises(sync.SyncError): + # Bad key + sync.validateSyncInfo(doc1, {'bogus': 1}) + + with pytest.raises(sync.SyncError): + # Real schema element, but bad key for SyncInfo + sync.validateSyncInfo(doc1, {'ChannelDataBlock': {}}) + + +def test_sync_reference_info(): + """ Test a couple functions related to syc reference info. + """ + # Test hasSyncReferenceInfo() + assert sync.hasSyncReferenceInfo(SYNC_INFO) is True + assert sync.hasSyncReferenceInfo(SYNC_INFO_NO_REFERENCE) is False + assert sync.hasSyncReferenceInfo({}) is False + + # Test makeSyncReferenceInfo() + info = sync.makeSyncReferenceInfo(SYNC_INFO_NO_REFERENCE) + assert sync.hasSyncReferenceInfo(info) is True + + +def test_file_sync_userdata(tmp_path): + """ Test reading/writing sync info from a recording's userdata. + """ + # This test modifies the IDE; make a working copy + cwd = os.path.dirname(__file__) + sourcename = os.path.join(cwd, 'TSF1.IDE') + filename = tmp_path / os.path.basename(sourcename) + shutil.copyfile(sourcename, filename) + + with importer.importFile(filename) as doc: + userdata.readUserData(doc) + info = sync.getSyncInfo(doc) + sync.updateUserdata(doc) + userdata.writeUserData(doc, doc._userdata) + + with importer.importFile(filename) as doc: + # Check saved info + sync.loadSyncInfo(doc) + assert info == sync.getSyncInfo(doc) + + # Check removed info + sync.removeSync(doc, clean=True) + sync.updateUserdata(doc) + assert 'SyncInfo' not in doc._userdata + + # Check file without userdata + doc2 = importer.importFile(os.path.join(cwd, 'test3.IDE')) + assert sync.loadSyncInfo(doc2) is False + + +def test_file_sync_unread_file(tmp_path): + """ Test applying sync data to a file that hasn't been fully imported, + so syncing must be done with userdata. + """ + cwd = os.path.dirname(__file__) + doc1 = importer.importFile(os.path.join(cwd, 'TSF1.IDE')) + + # This test modifies the IDE; make a working copy + sourcename = os.path.join(cwd, 'TSF2.IDE') + filename = tmp_path / os.path.basename(sourcename) + shutil.copyfile(sourcename, filename) + doc2 = importer.importFile(filename) + + sync.sync(doc1, doc2) + info = sync.getSyncInfo(doc2) + start = doc2.currentSession.utcStartTime + sync.updateUserdata(doc2) + userdata.saveUserData(doc2) + doc2.close() + + doc2a = importer.openFile(filename) + sync.loadSyncInfo(doc2a) + assert sync.getSyncInfo(doc2a) == info + assert doc2a.currentSession.utcStartTime == start + + importer.readData(doc2a) + assert doc2a.currentSession.utcStartTime == start + + +# =========================================================================== +# +# =========================================================================== + +def test_gnss_sync(): + """ Basic test: add and remove GPS/GNSS time base modification. + """ + cwd = os.path.dirname(__file__) + filename = os.path.join(cwd, 'GNSS1.IDE') + + with importer.importFile(filename) as doc: + oldTime = doc.currentSession.utcStartTime + sync.applyGNSSTime(doc) + assert doc.currentSession.utcStartTime != oldTime + + sync.removeGNSSTime(doc) + assert doc.currentSession.utcStartTime == oldTime + + +def test_gnss_sync_nodata(tmp_path): + """ Test failing to get/apply GNSS time base. + """ + cwd = os.path.dirname(__file__) + + # File has no GNSS Time channel, obvious fail + filename = os.path.join(cwd, 'test3.IDE') + with importer.importFile(filename) as doc: + with pytest.raises(sync.SyncError): + sync.applyGNSSTime(doc) + + # File has a GNSS Time channel, but it has no data + filename = os.path.join(cwd, 'TSF1.IDE') + with importer.importFile(filename) as doc: + with pytest.raises(sync.SyncError): + sync.applyGNSSTime(doc) + + +def test_gnss_sync_userdata(tmp_path): + """ Test reading/writing GPS/GNSS time base info from a recording's + userdata. + """ + # This test modifies the IDE; make a working copy + cwd = os.path.dirname(__file__) + sourcename = os.path.join(cwd, 'GNSS1.IDE') + filename = tmp_path / os.path.basename(sourcename) + shutil.copyfile(sourcename, filename) + + with importer.importFile(filename) as doc: + # Write GNSS timebase to userdata + sync.applyGNSSTime(doc) + sync.updateUserdata(doc) + gnsstime = doc.currentSession.utcStartTime + assert doc._userdata['TimeBaseUTCFine'] == gnsstime + userdata.writeUserData(doc, doc._userdata) + + with importer.importFile(filename) as doc: + # Check saved info + assert 'TimeBaseUTCFine' not in (doc._userdata or {}) + assert doc.currentSession.utcStartTime == doc.currentSession.utcStartTimeOriginal + assert doc.currentSession.utcStartTime != gnsstime + sync.loadSyncInfo(doc) + assert 'TimeBaseUTCFine' in doc._userdata + assert doc.currentSession.utcStartTime != doc.currentSession.utcStartTimeOriginal + assert doc.currentSession.utcStartTime == gnsstime diff --git a/testing/test_tools.py b/testing/test_tools.py index 2e25d77a..ad623988 100644 --- a/testing/test_tools.py +++ b/testing/test_tools.py @@ -1,9 +1,16 @@ from glob import glob import os.path +import shutil -from idelib import importer -from idelib.tools import ideexport, ideinfo +import pytest +from idelib import importer, sync, userdata +from idelib.tools import ideexport, ideinfo, idesync + + +# =========================================================================== +# ideexport tests +# =========================================================================== def test_ideexport_basic(tmpdir_factory): """ Basic ideexport test, verify the correct number of files are created. @@ -41,6 +48,10 @@ def test_ideexport_csv(tmpdir_factory): assert lines == len(events) +# =========================================================================== +# ideinfo tests +# =========================================================================== + def test_ideinfo_basic(tmpdir_factory): """ Test that ideinfo creates an info file. """ @@ -50,3 +61,75 @@ def test_ideinfo_basic(tmpdir_factory): ideinfo.main(['--output', outfile, filename]) assert os.path.exists(outfile) + + +# =========================================================================== +# idesync tests +# =========================================================================== + +def test_idesync_basics(tmpdir_factory): + """ Test basic idesync functionality. Note that this skips the `main()` + function and calls `syncFiles()` directly in order to get exceptions + that `main()` catches. + """ + filenames = [] + cwd = os.path.dirname(__file__) + path = tmpdir_factory.mktemp('test_idesync_basics') + + for f in ('GNSS1.IDE', 'TSF1.IDE', 'TSF2.IDE'): + shutil.copy2(os.path.join(cwd, f), path / f) + filenames.append(str(path / f)) # Filenames will be strings when run from the CLI + + # One file: just set GPS/GNSS time base + successes, failures = idesync.syncFiles(filenames[0], gps=True) + assert len(successes) == 1 + assert len(failures) == 0 + + with importer.importFile(filenames[0]) as doc: + userdata.readUserData(doc) + assert doc._userdata['TimeBaseUTCFine'] != doc.currentSession.utcStartTimeOriginal + + +def test_idesync_fail(tmpdir_factory): + """ Test basic idesync failures. Note that this skips the `main()` + function and calls `syncFiles()` directly in order to get exceptions + that `main()` catches. + """ + filenames = [] + cwd = os.path.dirname(__file__) + path = tmpdir_factory.mktemp('test_idesync_fail') + for f in ('GNSS1.IDE', 'TSF1.IDE', 'TSF2.IDE', 'test3.IDE'): + shutil.copy2(os.path.join(cwd, f), path / f) + filenames.append(str(path / f)) # Filenames will be strings when run from the CLI + + # Fail: can't use `gps` and `inherit` together + with pytest.raises(sync.SyncError, match=r'.*mutually exclusive.*'): + idesync.syncFiles(filenames[1], filenames[2], gps=True, inherit=True) + + # Fail: No GPS/GNSS time data + with pytest.raises(sync.SyncError, match=r'No GPS/GNSS data.*'): + idesync.syncFiles(filenames[1], filenames[2], gps=True, inherit=False) + + successes, failures = idesync.syncFiles(filenames[0], filenames[1], filenames[2], gps=False) + assert len(successes) == 0 + assert len(failures) == 2 + + +def test_idesync_ideexport(tmpdir_factory): + """ Basic ideexport test, verify the correct number of files are created. + """ + filenames = [] + cwd = os.path.dirname(__file__) + path = tmpdir_factory.mktemp('test_idesync_ideexport') + for f in ('TSF1.IDE', 'TSF2.IDE', 'GNSS1.IDE'): + shutil.copy2(os.path.join(cwd, f), path / f) + filenames.append(str(path / f)) # Filenames will be strings when run from the CLI + + idesync.syncFiles(filenames[0], filenames[1]) + idesync.syncFiles(filenames[2], gps=True) + + # Just see if the files can be exported with sync without failing + ideexport.main(['--output', str(path), '-s', filenames[1]]) + ideexport.main(['--output', str(path), '-s', filenames[2]]) + + # FUTURE: Check export content, even though test_sync tests data? diff --git a/testing/utils.py b/testing/utils.py deleted file mode 100644 index 4297468a..00000000 --- a/testing/utils.py +++ /dev/null @@ -1,9 +0,0 @@ -class nullcontext: - """ A replacement for `contextlib.nullcontext` for python versions before 3.7 - """ - - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - pass