From a3cd70125e85c8853c284c5058153b4d629604d0 Mon Sep 17 00:00:00 2001 From: Cees-Bart Breunesse Date: Tue, 18 Mar 2025 22:17:38 -0700 Subject: [PATCH 1/2] bye bye mmap, hello numpy >= 2 --- README.md | 4 ++ setup.cfg | 2 +- tests/test_parameter.py | 5 ++- trsfile/engine/trs.py | 79 ++++++++++++--------------------------- trsfile/traceparameter.py | 3 +- 5 files changed, 33 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 0a41400..40b4daa 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +# YO! + +This is a fork and I (ceeesb@gmail.com) killed the mmap / mmap resize dependencies in `trs.py` so that you may use this library on a mac to create trs files. I also fixed the `numpy.bool8` issue so that installing trsfile doesn't pull back the numpy version into the stone age. + # Inspector Trace Set `.trs` file support in Python [![Build Status](https://app.travis-ci.com/Riscure/python-trsfile.svg?branch=master)](https://app.travis-ci.com/Riscure/python-trsfile) [![Documentation Status](https://readthedocs.org/projects/trsfile/badge/)](https://trsfile.readthedocs.io/) diff --git a/setup.cfg b/setup.cfg index 90cc2f5..9a58b6f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ packages = trsfile.engine trsfile.converters install_requires = - numpy>=1,<2 + numpy include_package_data = True [options.extras_require] diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 45636a9..c11abfa 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,7 +1,8 @@ from io import BytesIO from unittest import TestCase -from numpy import ndarray, int16, array, int32, int64, single, double, uint8, int8, uint16, bool8 +from numpy import ndarray, int16, array, int32, int64, single, double, uint8, int8, uint16 +from numpy import bool as bool8 from trsfile.traceparameter import BooleanArrayParameter, ByteArrayParameter, DoubleArrayParameter, FloatArrayParameter, \ IntegerArrayParameter, ShortArrayParameter, LongArrayParameter, StringParameter @@ -47,7 +48,7 @@ def test_byte_parameter(self): ByteArrayParameter([0, '1']) with self.assertRaises(TypeError): ByteArrayParameter([bytes([0, 1, 2, 3]), bytes([4, 5, 6, 7])]) - with self.assertRaises(TypeError): + with self.assertRaises(OverflowError): ByteArrayParameter(ndarray(shape=[16], dtype=int8, buffer=array([int8(val) for val in int_data]))) with self.assertRaises(TypeError): ByteArrayParameter(ndarray(shape=[16], dtype=uint16, buffer=array([uint16(val) for val in int_data]))) diff --git a/trsfile/engine/trs.py b/trsfile/engine/trs.py index 80e3b9f..6d1182e 100644 --- a/trsfile/engine/trs.py +++ b/trsfile/engine/trs.py @@ -1,5 +1,4 @@ import os -import mmap import struct from typing import List, Union, Dict, Any, Optional @@ -43,14 +42,12 @@ class TrsEngine(Engine): def __init__(self, path, mode='x', **options): self.path = path if type(path) is str else str(path) self.handle = None - self.file_handle = None + self.handle = None self.traceblock_offset = None self.sample_length = None self.trace_length = None - self.is_mmap_synched = False - # Initialize empty dictionaries self.headers = {} self.header_locations = {} @@ -75,8 +72,7 @@ def __init__(self, path, mode='x', **options): if not os.path.isfile(self.path): raise FileNotFoundError('No TRS file: \'{0:s}\''.format(self.path)) - self.file_handle = open(self.path, 'rb') - self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_READ) + self.handle = open(self.path, 'r+b') self.read_only = True self.read_headers = True @@ -85,15 +81,7 @@ def __init__(self, path, mode='x', **options): if headers is not None and any(not isinstance(header, Header) for header in headers): raise TypeError('Creation of TRS files requires passing Headers to the constructor.') - # Sadly, to memory map we need a file with a minimum of length 1 - self.file_handle = open(self.path, 'wb') - self.file_handle.write(b'\x00') - self.file_handle.close() - - # Now we can open it properly - self.file_handle = open(self.path, 'r+b') - self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_WRITE) - + self.handle = open(self.path, 'w+b') self.read_only = False self.read_headers = False @@ -105,13 +93,7 @@ def __init__(self, path, mode='x', **options): if os.path.isfile(self.path): raise FileExistsError('TRS file exists: \'{0:s}\''.format(self.path)) - # Sadly, to memory map we need a file with a minimum of length 1 - self.file_handle = open(self.path, 'wb') - self.file_handle.write(b'\x00') - self.file_handle.close() - - self.file_handle = open(self.path, 'r+b') - self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_WRITE) + self.handle = open(self.path, 'w+b') self.read_only = False self.read_headers = False @@ -124,12 +106,6 @@ def __init__(self, path, mode='x', **options): else: self.read_headers = False - # We need to create an empty file - # Sadly, to memory map we need a file with a minimum of length 1 - self.file_handle = open(self.path, 'wb') - self.file_handle.write(b'\x00') - self.file_handle.close() - if self.read_headers and headers is not None: raise TypeError('Cannot change headers when reading TRS files.') elif not self.read_headers and headers is not None and any( @@ -137,8 +113,10 @@ def __init__(self, path, mode='x', **options): raise TypeError('Creation of TRS files requires passing instances of Headers to the constructor.') # NOTE: We are using r+b mode because we are essentially updating the file! - self.file_handle = open(self.path, 'r+b') - self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_WRITE) + if self.read_headers: + self.handle = open(self.path, 'r+b') + else: + self.handle = open(self.path, 'w+b') self.read_only = False else: @@ -153,7 +131,8 @@ def is_closed(self): return self.handle is None or self.handle.closed def has_trace_data(self) -> bool: - return self.traceblock_offset is not None and self.handle.size() > self.traceblock_offset + # print(f"{os.path.getsize(self.path)} xxxx {self.traceblock_offset} xxx {self.traceblock_offset is not None and os.path.getsize(self.path) > self.traceblock_offset}") + return self.traceblock_offset is not None and os.path.getsize(self.path) > self.traceblock_offset def update_headers_with_traces_metadata(self, traces: List[Trace]) -> None: # Check if any of the following headers are NOT initialized: @@ -254,34 +233,33 @@ def set_traces(self, index: Union[slice, int], traces: List[Trace]) -> None: raise ValueError('Trace has a different length from the expected length and padding mode is NONE') # Seek to the beginning of the trace (this automatically enables us to overwrite) - self.file_handle.seek(self.traceblock_offset + i * self.trace_length) + self.handle.seek(self.traceblock_offset + i * self.trace_length) # Title and title padding title = trace.title.strip().encode('utf-8') if len(title) > self.headers[Header.TITLE_SPACE]: raise TypeError('Trace title is longer than available title space') - self.file_handle.write(title) + self.handle.write(title) if len(title) < self.headers[Header.TITLE_SPACE]: - self.file_handle.write(bytes(self.headers[Header.TITLE_SPACE] - len(title))) + self.handle.write(bytes(self.headers[Header.TITLE_SPACE] - len(title))) # Parameters - self.file_handle.write(trace.parameters.serialize()) + self.handle.write(trace.parameters.serialize()) # Automatic truncate - trace.samples[:self.headers[Header.NUMBER_SAMPLES]].tofile(self.file_handle) + trace.samples[:self.headers[Header.NUMBER_SAMPLES]].tofile(self.handle) # Add any required padding length = (self.headers[Header.NUMBER_SAMPLES] - len(trace.samples)) * self.headers[ Header.SAMPLE_CODING].size if length > 0: - self.file_handle.write(length * b'\x00') + self.handle.write(length * b'\x00') # Write the new total number of traces # If you want to have live update, you can give this flag and have this # automatically write to the file new_number_traces = max(self.headers[Header.NUMBER_TRACES], max(indexes) + 1) if self.headers[Header.NUMBER_TRACES] < new_number_traces: - self.is_mmap_synched = False self.live_update_count += len(traces) if self.live_update != 0 and self.live_update_count >= self.live_update: @@ -290,8 +268,8 @@ def set_traces(self, index: Union[slice, int], traces: List[Trace]) -> None: # Force flush self.handle.flush() - self.file_handle.flush() - os.fsync(self.file_handle.fileno()) + self.handle.flush() + os.fsync(self.handle.fileno()) else: self.headers[Header.NUMBER_TRACES] = new_number_traces @@ -311,14 +289,6 @@ def get_traces(self, index: Union[slice, int]) -> List[Trace]: indexes = range(index, index + 1) - # We need to resize the mmap if we added something directly on the file handle - # We do it here for optimization purposes, if you do not read, no resizing :) - if not self.is_mmap_synched and not self.read_only: - total_file_size = self.traceblock_offset + (self.length() + 1) * self.trace_length - if self.handle.size() < total_file_size: - self.handle.resize(total_file_size) - self.is_mmap_synched = True - # Now read in all traces traces = [] for i in indexes: @@ -367,11 +337,10 @@ def close(self): if not self.read_only: self.__write_headers({Header.NUMBER_TRACES: self.headers[Header.NUMBER_TRACES]}) - # Flush the mmap (according to docs this is important) and close self.handle.flush() self.handle.close() - if self.file_handle is not None and not self.file_handle.closed: - self.file_handle.close() + if self.handle is not None and not self.handle.closed: + self.handle.close() def update_headers(self, headers: Dict[Header, Any]): changed_headers = super().update_headers(headers) @@ -479,7 +448,9 @@ def __write_headers(self, headers: Optional[Dict[Header, Any]] = None): # Update the TLV value offset = self.header_locations[header][0] - self.handle[offset : offset + len(tag_value)] = tag_value + self.handle.seek(offset) + self.handle.write(tag_value) + self.handle.seek(0, os.SEEK_END) else: # Construct the TLV tag = [header.value] @@ -497,16 +468,12 @@ def __write_headers(self, headers: Optional[Dict[Header, Any]] = None): self.traceblock_offset = None # Store this index for future references - if self.handle.size() < self.handle.tell() + len(tag): - self.handle.resize(self.handle.tell() + len(tag)) self.handle.write(bytes(tag)) self.header_locations[header] = (self.handle.tell() - len(tag_value), tag_length) # Save the TRACE_BLOCK if not already saved if Header.TRACE_BLOCK not in self.header_locations: # Write the TRACE_BLOCK - if self.handle.size() < self.handle.tell() + len(TrsEngine._TRACE_BLOCK_START): - self.handle.resize(self.handle.tell() + len(TrsEngine._TRACE_BLOCK_START)) self.handle.write(TrsEngine._TRACE_BLOCK_START) # Calculate offset diff --git a/trsfile/traceparameter.py b/trsfile/traceparameter.py index 494115d..0e83f15 100644 --- a/trsfile/traceparameter.py +++ b/trsfile/traceparameter.py @@ -7,7 +7,8 @@ from io import BytesIO from typing import Any -from numpy import ndarray, integer, bool8, uint8, double, single +from numpy import ndarray, integer, uint8, double, single +from numpy import bool as bool8 from trsfile.utils import encode_as_short, read_short From 525b5d6584f02674a71c1f872918afbaa21f602c Mon Sep 17 00:00:00 2001 From: Cees-Bart Breunesse Date: Wed, 19 Mar 2025 09:38:15 -0700 Subject: [PATCH 2/2] cherry picked for the pull request --- README.md | 4 --- setup.cfg | 2 +- trsfile/engine/trs.py | 79 ++++++++++++++++++++++++++++++------------- 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 40b4daa..0a41400 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,3 @@ -# YO! - -This is a fork and I (ceeesb@gmail.com) killed the mmap / mmap resize dependencies in `trs.py` so that you may use this library on a mac to create trs files. I also fixed the `numpy.bool8` issue so that installing trsfile doesn't pull back the numpy version into the stone age. - # Inspector Trace Set `.trs` file support in Python [![Build Status](https://app.travis-ci.com/Riscure/python-trsfile.svg?branch=master)](https://app.travis-ci.com/Riscure/python-trsfile) [![Documentation Status](https://readthedocs.org/projects/trsfile/badge/)](https://trsfile.readthedocs.io/) diff --git a/setup.cfg b/setup.cfg index 9a58b6f..dfd9804 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ packages = trsfile.engine trsfile.converters install_requires = - numpy + numpy>=2 include_package_data = True [options.extras_require] diff --git a/trsfile/engine/trs.py b/trsfile/engine/trs.py index 6d1182e..80e3b9f 100644 --- a/trsfile/engine/trs.py +++ b/trsfile/engine/trs.py @@ -1,4 +1,5 @@ import os +import mmap import struct from typing import List, Union, Dict, Any, Optional @@ -42,12 +43,14 @@ class TrsEngine(Engine): def __init__(self, path, mode='x', **options): self.path = path if type(path) is str else str(path) self.handle = None - self.handle = None + self.file_handle = None self.traceblock_offset = None self.sample_length = None self.trace_length = None + self.is_mmap_synched = False + # Initialize empty dictionaries self.headers = {} self.header_locations = {} @@ -72,7 +75,8 @@ def __init__(self, path, mode='x', **options): if not os.path.isfile(self.path): raise FileNotFoundError('No TRS file: \'{0:s}\''.format(self.path)) - self.handle = open(self.path, 'r+b') + self.file_handle = open(self.path, 'rb') + self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_READ) self.read_only = True self.read_headers = True @@ -81,7 +85,15 @@ def __init__(self, path, mode='x', **options): if headers is not None and any(not isinstance(header, Header) for header in headers): raise TypeError('Creation of TRS files requires passing Headers to the constructor.') - self.handle = open(self.path, 'w+b') + # Sadly, to memory map we need a file with a minimum of length 1 + self.file_handle = open(self.path, 'wb') + self.file_handle.write(b'\x00') + self.file_handle.close() + + # Now we can open it properly + self.file_handle = open(self.path, 'r+b') + self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_WRITE) + self.read_only = False self.read_headers = False @@ -93,7 +105,13 @@ def __init__(self, path, mode='x', **options): if os.path.isfile(self.path): raise FileExistsError('TRS file exists: \'{0:s}\''.format(self.path)) - self.handle = open(self.path, 'w+b') + # Sadly, to memory map we need a file with a minimum of length 1 + self.file_handle = open(self.path, 'wb') + self.file_handle.write(b'\x00') + self.file_handle.close() + + self.file_handle = open(self.path, 'r+b') + self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_WRITE) self.read_only = False self.read_headers = False @@ -106,6 +124,12 @@ def __init__(self, path, mode='x', **options): else: self.read_headers = False + # We need to create an empty file + # Sadly, to memory map we need a file with a minimum of length 1 + self.file_handle = open(self.path, 'wb') + self.file_handle.write(b'\x00') + self.file_handle.close() + if self.read_headers and headers is not None: raise TypeError('Cannot change headers when reading TRS files.') elif not self.read_headers and headers is not None and any( @@ -113,10 +137,8 @@ def __init__(self, path, mode='x', **options): raise TypeError('Creation of TRS files requires passing instances of Headers to the constructor.') # NOTE: We are using r+b mode because we are essentially updating the file! - if self.read_headers: - self.handle = open(self.path, 'r+b') - else: - self.handle = open(self.path, 'w+b') + self.file_handle = open(self.path, 'r+b') + self.handle = mmap.mmap(self.file_handle.fileno(), 0, access=mmap.ACCESS_WRITE) self.read_only = False else: @@ -131,8 +153,7 @@ def is_closed(self): return self.handle is None or self.handle.closed def has_trace_data(self) -> bool: - # print(f"{os.path.getsize(self.path)} xxxx {self.traceblock_offset} xxx {self.traceblock_offset is not None and os.path.getsize(self.path) > self.traceblock_offset}") - return self.traceblock_offset is not None and os.path.getsize(self.path) > self.traceblock_offset + return self.traceblock_offset is not None and self.handle.size() > self.traceblock_offset def update_headers_with_traces_metadata(self, traces: List[Trace]) -> None: # Check if any of the following headers are NOT initialized: @@ -233,33 +254,34 @@ def set_traces(self, index: Union[slice, int], traces: List[Trace]) -> None: raise ValueError('Trace has a different length from the expected length and padding mode is NONE') # Seek to the beginning of the trace (this automatically enables us to overwrite) - self.handle.seek(self.traceblock_offset + i * self.trace_length) + self.file_handle.seek(self.traceblock_offset + i * self.trace_length) # Title and title padding title = trace.title.strip().encode('utf-8') if len(title) > self.headers[Header.TITLE_SPACE]: raise TypeError('Trace title is longer than available title space') - self.handle.write(title) + self.file_handle.write(title) if len(title) < self.headers[Header.TITLE_SPACE]: - self.handle.write(bytes(self.headers[Header.TITLE_SPACE] - len(title))) + self.file_handle.write(bytes(self.headers[Header.TITLE_SPACE] - len(title))) # Parameters - self.handle.write(trace.parameters.serialize()) + self.file_handle.write(trace.parameters.serialize()) # Automatic truncate - trace.samples[:self.headers[Header.NUMBER_SAMPLES]].tofile(self.handle) + trace.samples[:self.headers[Header.NUMBER_SAMPLES]].tofile(self.file_handle) # Add any required padding length = (self.headers[Header.NUMBER_SAMPLES] - len(trace.samples)) * self.headers[ Header.SAMPLE_CODING].size if length > 0: - self.handle.write(length * b'\x00') + self.file_handle.write(length * b'\x00') # Write the new total number of traces # If you want to have live update, you can give this flag and have this # automatically write to the file new_number_traces = max(self.headers[Header.NUMBER_TRACES], max(indexes) + 1) if self.headers[Header.NUMBER_TRACES] < new_number_traces: + self.is_mmap_synched = False self.live_update_count += len(traces) if self.live_update != 0 and self.live_update_count >= self.live_update: @@ -268,8 +290,8 @@ def set_traces(self, index: Union[slice, int], traces: List[Trace]) -> None: # Force flush self.handle.flush() - self.handle.flush() - os.fsync(self.handle.fileno()) + self.file_handle.flush() + os.fsync(self.file_handle.fileno()) else: self.headers[Header.NUMBER_TRACES] = new_number_traces @@ -289,6 +311,14 @@ def get_traces(self, index: Union[slice, int]) -> List[Trace]: indexes = range(index, index + 1) + # We need to resize the mmap if we added something directly on the file handle + # We do it here for optimization purposes, if you do not read, no resizing :) + if not self.is_mmap_synched and not self.read_only: + total_file_size = self.traceblock_offset + (self.length() + 1) * self.trace_length + if self.handle.size() < total_file_size: + self.handle.resize(total_file_size) + self.is_mmap_synched = True + # Now read in all traces traces = [] for i in indexes: @@ -337,10 +367,11 @@ def close(self): if not self.read_only: self.__write_headers({Header.NUMBER_TRACES: self.headers[Header.NUMBER_TRACES]}) + # Flush the mmap (according to docs this is important) and close self.handle.flush() self.handle.close() - if self.handle is not None and not self.handle.closed: - self.handle.close() + if self.file_handle is not None and not self.file_handle.closed: + self.file_handle.close() def update_headers(self, headers: Dict[Header, Any]): changed_headers = super().update_headers(headers) @@ -448,9 +479,7 @@ def __write_headers(self, headers: Optional[Dict[Header, Any]] = None): # Update the TLV value offset = self.header_locations[header][0] - self.handle.seek(offset) - self.handle.write(tag_value) - self.handle.seek(0, os.SEEK_END) + self.handle[offset : offset + len(tag_value)] = tag_value else: # Construct the TLV tag = [header.value] @@ -468,12 +497,16 @@ def __write_headers(self, headers: Optional[Dict[Header, Any]] = None): self.traceblock_offset = None # Store this index for future references + if self.handle.size() < self.handle.tell() + len(tag): + self.handle.resize(self.handle.tell() + len(tag)) self.handle.write(bytes(tag)) self.header_locations[header] = (self.handle.tell() - len(tag_value), tag_length) # Save the TRACE_BLOCK if not already saved if Header.TRACE_BLOCK not in self.header_locations: # Write the TRACE_BLOCK + if self.handle.size() < self.handle.tell() + len(TrsEngine._TRACE_BLOCK_START): + self.handle.resize(self.handle.tell() + len(TrsEngine._TRACE_BLOCK_START)) self.handle.write(TrsEngine._TRACE_BLOCK_START) # Calculate offset