diff --git a/doc/information.rst b/doc/information.rst index 38a1e3aa..d81ea7d7 100644 --- a/doc/information.rst +++ b/doc/information.rst @@ -83,8 +83,8 @@ HDF5 compression filters and compression libraries sources were obtained from: (commit `4bbe9df7e4bcb `_) using `SZ3 `_ and ZStd. * `HDF5-ZFP plugin `_ (v1.1.1) using ZFP. -* `HDF5Plugin-Zstandard `_ - (commit `d5afdb5 `_) using ZStd. +* `ZStd plugin `_ + (commit `5573db8 `_) using ZStd. Sources of compression libraries shared accross multiple filters were obtained from: diff --git a/lib/HDF5Plugin-Zstandard/.gitignore b/lib/HDF5Plugin-Zstandard/.gitignore deleted file mode 100644 index f805e810..00000000 --- a/lib/HDF5Plugin-Zstandard/.gitignore +++ /dev/null @@ -1,33 +0,0 @@ -# Object files -*.o -*.ko -*.obj -*.elf - -# Precompiled Headers -*.gch -*.pch - -# Libraries -*.lib -*.a -*.la -*.lo - -# Shared objects (inc. Windows DLLs) -*.dll -*.so -*.so.* -*.dylib - -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex - -# Debug files -*.dSYM/ -*.su diff --git a/lib/HDF5Plugin-Zstandard/CMakeLists.txt b/lib/HDF5Plugin-Zstandard/CMakeLists.txt deleted file mode 100644 index f8a85d7a..00000000 --- a/lib/HDF5Plugin-Zstandard/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -cmake_minimum_required(VERSION 2.8.10) -project(zstd_hdf5) - -# options -set(PLUGIN_INSTALL_PATH "/usr/local/hdf5/lib/plugin" CACHE PATH - "Where to install the dynamic HDF5-plugin") - -# sources -set(SOURCES zstd_h5plugin.c) -set(PLUGIN_SOURCES zstd_h5plugin.c) - -# dependencies -if(MSVC) - # FindHDF5.cmake does not find Windows installations. Try to - # use an environment variable instead until the official "find" - # file can be updated for Windows. - # - # Note that you have to set this environment variable by hand. - file(TO_CMAKE_PATH "$ENV{HDF5_DIR}" HDF5_HINT) - set(HDF5_DIR ${HDF5_HINT} CACHE STRING "Path to HDF5 CMake config directory.") - find_package(HDF5 REQUIRED HINTS ${HDF5_DIR}) -else(MSVC) - find_package(HDF5 REQUIRED) -endif(MSVC) -include_directories(${HDF5_INCLUDE_DIRS}) - -# HDF5 plugin as shared library -add_library(zstd_h5_plugin_shared SHARED ${PLUGIN_SOURCES}) -set_target_properties(zstd_h5_plugin_shared PROPERTIES OUTPUT_NAME H5Zzstd) -target_link_libraries(zstd_h5_plugin_shared zstd ${HDF5_LIBRARIES}) -install(TARGETS zstd_h5_plugin_shared DESTINATION ${PLUGIN_INSTALL_PATH} COMPONENT HDF5_FILTER_DEV) diff --git a/lib/HDF5Plugin-Zstandard/README.md b/lib/HDF5Plugin-Zstandard/README.md deleted file mode 100644 index 43a8fcd2..00000000 --- a/lib/HDF5Plugin-Zstandard/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# HDF5Plugin-Zstandard - -*HDF5* is a data model, library, and file format for storing and -managing data. It supports an unlimited variety of datatypes, and is -designed for flexible and efficient I/O and for high volume and -complex data. HDF5 is portable and is extensible, allowing -applications to evolve in their use of HDF5. The HDF5 Technology suite -includes tools and applications for managing, manipulating, viewing, -and analyzing data in the HDF5 format. - -https://support.hdfgroup.org/HDF5/ - ---- - -*Zstandard* is a real-time compression algorithm, providing high -compression ratios. It offers a very wide range of compression/speed -trade-off, while being backed by a very fast decoder. - -http://www.zstd.net - ---- - -This repository provides an implementation of Zstandard compression -filter plugin for HDF5 with the assigned filter code 32015. - ---- - -## Build - -This plugin can be built with cmake and installed as shared library to `/usr/local/hdf5/lib/plugin` (or a custom path). - -```bash -cmake . -make -sudo make install -``` diff --git a/lib/HDF5Plugin-Zstandard/zstd_h5plugin.c b/lib/HDF5Plugin-Zstandard/zstd_h5plugin.c deleted file mode 100644 index 5a3dfd38..00000000 --- a/lib/HDF5Plugin-Zstandard/zstd_h5plugin.c +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include "zstd_h5plugin.h" -#include "zstd.h" - -#define ZSTD_FILTER 32015 - -DLL_EXPORT size_t zstd_filter(unsigned int flags, size_t cd_nelmts, - const unsigned int cd_values[], size_t nbytes, - size_t *buf_size, void **buf) -{ - void *outbuf = NULL; /* Pointer to new output buffer */ - void *inbuf = NULL; /* Pointer to input buffer */ - inbuf = *buf; - - size_t ret_value; - size_t origSize = nbytes; /* Number of bytes for output (compressed) buffer */ - - if (flags & H5Z_FLAG_REVERSE) - { - size_t decompSize = ZSTD_getDecompressedSize(*buf, origSize); - if (NULL == (outbuf = malloc(decompSize))) - goto error; - - decompSize = ZSTD_decompress(outbuf, decompSize, inbuf, origSize); - - free(*buf); - *buf = outbuf; - outbuf = NULL; - ret_value = (size_t)decompSize; - } - else - { - int aggression; - if (cd_nelmts > 0) - aggression = (int)cd_values[0]; - else - aggression = ZSTD_CLEVEL_DEFAULT; - if (aggression < 1 /*ZSTD_minCLevel()*/) - aggression = 1 /*ZSTD_minCLevel()*/; - else if (aggression > ZSTD_maxCLevel()) - aggression = ZSTD_maxCLevel(); - - size_t compSize = ZSTD_compressBound(origSize); - if (NULL == (outbuf = malloc(compSize))) - goto error; - - compSize = ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression); - - free(*buf); - *buf = outbuf; - *buf_size = compSize; - outbuf = NULL; - ret_value = compSize; - } - if (outbuf != NULL) - free(outbuf); - return ret_value; - -error: - return 0; -} - -const H5Z_class_t zstd_H5Filter = -{ - H5Z_CLASS_T_VERS, - (H5Z_filter_t)(ZSTD_FILTER), - 1, 1, - "Zstandard compression: http://www.zstd.net", - NULL, NULL, - (H5Z_func_t)(zstd_filter) -}; - -DLL_EXPORT H5PL_type_t H5PLget_plugin_type(void) -{ - return H5PL_TYPE_FILTER; -} - -DLL_EXPORT const void* H5PLget_plugin_info(void) -{ - return &zstd_H5Filter; -} diff --git a/lib/HDF5Plugin-Zstandard/zstd_h5plugin.h b/lib/HDF5Plugin-Zstandard/zstd_h5plugin.h deleted file mode 100644 index e29c0689..00000000 --- a/lib/HDF5Plugin-Zstandard/zstd_h5plugin.h +++ /dev/null @@ -1,22 +0,0 @@ -#include "hdf5.h" - -#if defined(_MSC_VER) - #define DLL_EXPORT __declspec(dllexport) -#else - #define DLL_EXPORT -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -DLL_EXPORT size_t zstd_filter(unsigned int flags, size_t cd_nelmts, - const unsigned int cd_values[], size_t nbytes, - size_t *buf_size, void **buf); - -DLL_EXPORT H5PL_type_t H5PLget_plugin_type(void); -DLL_EXPORT const void* H5PLget_plugin_info(void); - -#ifdef __cplusplus -} -#endif diff --git a/lib/HDF5Plugin-Zstandard/LICENSE b/lib/hdf5_plugins/ZSTD/Additional_Legal/LICENSE similarity index 100% rename from lib/HDF5Plugin-Zstandard/LICENSE rename to lib/hdf5_plugins/ZSTD/Additional_Legal/LICENSE diff --git a/lib/hdf5_plugins/ZSTD/Additional_Legal/PyTables_Copyrights_and_Licenses.txt b/lib/hdf5_plugins/ZSTD/Additional_Legal/PyTables_Copyrights_and_Licenses.txt new file mode 100644 index 00000000..558344cf --- /dev/null +++ b/lib/hdf5_plugins/ZSTD/Additional_Legal/PyTables_Copyrights_and_Licenses.txt @@ -0,0 +1,36 @@ +PyTables Copyright Statement +============================ + +Copyright Notice and Statement for PyTables Software Library and Utilities: + +Copyright (c) 2002, 2003, 2004 Francesc Altet +Copyright (c) 2005, 2006, 2007 Carabos Coop. V. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +c. Neither the name of the Carabos Coop. V. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/hdf5_plugins/ZSTD/README.txt b/lib/hdf5_plugins/ZSTD/README.txt new file mode 100644 index 00000000..0a78ae77 --- /dev/null +++ b/lib/hdf5_plugins/ZSTD/README.txt @@ -0,0 +1,22 @@ +Building this filter/example requires knowledge of the hdf5 and the zstd +installation. + +For autotools configure, you must supply these using + --with-hdf5 and --with-zstdlib. +Example (in the build folder): + ../configure --with-hdf5=/temp/hdf5 --with-zstdlib=/temp/zstd + +For CMake, you must supply the location of the cmake configuration files + in environment variables. + In addition, CMake options "H5PL_BUILD_EXAMPLES" and "H5PL_BUILD_TESTING" must + be set "ON" in order to build the example and run the tests. +Example: + set(ENV{HDF5_ROOT} "/temp/hdf5/") + set(ENV{ZSTD_ROOT} "/temp/zstd/") + set(ENV{LD_LIBRARY_PATH} "/temp/zstd/lib:/temp/hdf5/lib") + set(ADD_BUILD_OPTIONS "-DH5PL_BUILD_EXAMPLES:BOOL=ON -DH5PL_BUILD_TESTING:BOOL=ON") + + For non-cmake built hdf5 or zstd, use the location of the include/lib + folders: + set(ENV{HDF5_ROOT} "/temp/hdf5") + set(ENV{ZSTD_ROOT} "/temp/zstd") diff --git a/lib/hdf5_plugins/ZSTD/src/H5Zzstd.c b/lib/hdf5_plugins/ZSTD/src/H5Zzstd.c new file mode 100644 index 00000000..b96fa84c --- /dev/null +++ b/lib/hdf5_plugins/ZSTD/src/H5Zzstd.c @@ -0,0 +1,123 @@ +/* + * ZSTD HDF5 filter + * + * Author: Mark Rivers + * Created: 2019 + * + * + */ + +#include +#include +#include +#include +#include + +#include "H5PLextern.h" + +#include "zstd.h" + +static size_t H5Z_filter_zstd(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], + size_t nbytes, size_t *buf_size, void **buf); + +#define H5Z_FILTER_ZSTD 32015 + +#define PUSH_ERR(func, minor, str) \ + H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, str) + +const H5Z_class2_t H5Z_ZSTD[1] = {{ + H5Z_CLASS_T_VERS, /* H5Z_class_t version */ + (H5Z_filter_t)H5Z_FILTER_ZSTD, /* Filter id number */ +#ifdef FILTER_DECODE_ONLY + 0, /* encoder_present flag (false is not available) */ +#else + 1, /* encoder_present flag (set to true) */ +#endif + 1, /* decoder_present flag (set to true) */ + "HDF5 zstd filter; see " + "https://github.com/HDFGroup/hdf5_plugins/blob/master/docs/RegisteredFilterPlugins.md", + /* Filter name for debugging */ + NULL, /* The "can apply" callback */ + NULL, /* The "set local" callback */ + (H5Z_func_t)H5Z_filter_zstd, /* The actual filter function */ +}}; + +H5PL_type_t +H5PLget_plugin_type(void) +{ + return H5PL_TYPE_FILTER; +} +const void * +H5PLget_plugin_info(void) +{ + return H5Z_ZSTD; +} + +static size_t +H5Z_filter_zstd(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, + size_t *buf_size, void **buf) +{ + size_t buf_size_out = 0; + size_t origSize = nbytes; /* Number of bytes for output (compressed) buffer */ + void *outbuf = NULL; + void *inbuf = NULL; /* Pointer to input buffer */ + inbuf = *buf; + + if (flags & H5Z_FLAG_REVERSE) { + /* We're decompressing */ + size_t decompSize = ZSTD_getFrameContentSize(*buf, origSize); + if (NULL == (outbuf = malloc(decompSize))) + goto error; + + decompSize = ZSTD_decompress(outbuf, decompSize, inbuf, origSize); + +#ifdef ZSTD_DEBUG + fprintf(stderr, " decompressing nbytes: %ld\n", decompSize); +#endif + + buf_size_out = decompSize; + } + else { + /* We're compressing */ + /* + * cd_values[0] = aggression + * + * As of Zstandard v1.5.7 + * ZSTD_minCLevel() == -1<<17 == -131072 + * ZSTD_maxCLevel() == 22 + * + * Negative compression levels are faster at the cost of compression + * aggression >= 20 require more memory + */ + int aggression; + if (cd_nelmts > 0) + aggression = (int)cd_values[0]; + else + aggression = ZSTD_CLEVEL_DEFAULT; + if (aggression < ZSTD_minCLevel()) + aggression = ZSTD_minCLevel(); + else if (aggression > ZSTD_maxCLevel()) + aggression = ZSTD_maxCLevel(); + + size_t compSize = ZSTD_compressBound(origSize); + if (NULL == (outbuf = malloc(compSize))) + goto error; + + compSize = ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression); + +#ifdef ZSTD_DEBUG + fprintf(stderr, " compressing nbytes: %ld\n", compSize); +#endif + + buf_size_out = compSize; + } + free(*buf); + *buf = outbuf; + *buf_size = buf_size_out; + return buf_size_out; + +error: + if (outbuf) + free(outbuf); + return 0; +} diff --git a/setup.py b/setup.py index 79e9752b..e943ae51 100644 --- a/setup.py +++ b/setup.py @@ -1243,15 +1243,13 @@ def _get_blosc2_plugin(): PLUGIN_LIB_DEPENDENCIES["blosc2"] = "lz4", "zlib", "zstd" -def _get_zstandard_plugin(): - """HDF5Plugin-Zstandard plugin build config""" - zstandard_dir = "lib/HDF5Plugin-Zstandard" - +def _get_zstd_plugin(): + """Zstd plugin build config""" return HDF5PluginExtension( "hdf5plugin.plugins.libh5zstd", - sources=[f"{zstandard_dir}/zstd_h5plugin.c"], + sources=["lib/hdf5_plugins/ZSTD/src/H5Zzstd.c"], extra_objects=get_clib_config("zstd", "extra_objects"), - include_dirs=[zstandard_dir] + get_clib_config("zstd", "include_dirs"), + include_dirs=get_clib_config("zstd", "include_dirs"), extra_link_args=get_clib_config("zstd", "extra_link_args"), libraries=get_clib_config("zstd", "libraries"), ) @@ -1504,7 +1502,7 @@ def _get_sperr_plugin(): "sz": _get_sz_plugin, "sz3": _get_sz3_plugin, "zfp": _get_h5zfp_plugin, - "zstd": _get_zstandard_plugin, + "zstd": _get_zstd_plugin, } PLUGIN_NAMES = set(_EMBEDDED_PLUGIN_EXTENSIONS.keys()) diff --git a/src/hdf5plugin/_filters.py b/src/hdf5plugin/_filters.py index a60b6071..eca6f3ab 100644 --- a/src/hdf5plugin/_filters.py +++ b/src/hdf5plugin/_filters.py @@ -1258,6 +1258,19 @@ def _from_filter_options(cls, filter_options: tuple[int, ...]) -> SZ3: raise ValueError(f"Unsupported sz_mode: {sz_mode}") +def _to_uint32(value: int) -> int: + """Cast an integer to a uint32 and store it as a Python int + + Useful to pass negative integers as hdf5 filter options (uint32) + """ + return int(struct.unpack("I", struct.pack("i", value))[0]) + + +def _from_uint32(value: int) -> int: + """Cast value stored as uint32 to a signed int""" + return int(struct.unpack("i", struct.pack("I", value))[0]) + + class Zstd(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using Zstd filter. @@ -1270,25 +1283,35 @@ class Zstd(FilterBase): compression=hdf5plugin.Zstd(clevel=22)) f.close() - :param clevel: Compression level from 1 (lowest compression) to 22 (maximum compression). - Ultra compression extends from 20 through 22. Default: 3. + :param clevel: Compression level from -131072 (lowest compression) to 22 (maximum compression). + Negative compression levels offer faster compression and decompression speed at the cost of compression ratio. + Compression levels from 20 to 22 offer better compression ratio at the expense of requiring more memory. + Default: 3. """ filter_name = "zstd" filter_id = ZSTD_ID + # As of Zstandard v1.5.7: ZSTD_minCLevel() -> -1<<17 = -131072 + _ZSTD_MIN_CLEVEL = -131072 + _ZSTD_MAX_CLEVEL = 22 + def __init__(self, clevel: int = 3): - if not 1 <= clevel <= 22: - raise ValueError("clevel must be in the range [1, 22]") + if not self._ZSTD_MIN_CLEVEL <= clevel <= self._ZSTD_MAX_CLEVEL: + raise ValueError( + f"clevel must be in the range [{self._ZSTD_MIN_CLEVEL}, {self._ZSTD_MAX_CLEVEL}]" + ) + clevel_uint32 = _to_uint32(clevel) super().__init__( - filter_options=(clevel,), + filter_options=(clevel_uint32,), config={"clevel": clevel}, ) @property def clevel(self) -> int: - """Compression level from 1 (lowest compression) to 22 (maximum compression)""" - return self.filter_options[0] + """Compression level from -131072 (lowest compression) to 22 (maximum compression)""" + clevel = _from_uint32(self.filter_options[0]) + return clevel @classmethod def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Zstd: diff --git a/src/hdf5plugin/test.py b/src/hdf5plugin/test.py index bbfc0f61..2848fb3c 100644 --- a/src/hdf5plugin/test.py +++ b/src/hdf5plugin/test.py @@ -406,11 +406,21 @@ def testZfp(self): def testZstd(self): """Write/read test with Zstd filter plugin""" self._test("zstd") - tests = [{"clevel": 3}, {"clevel": 22}] - for options in tests: + tests: list[tuple[int, str | bool]] = [ + (-131072, "nocheck"), # Do not check if data is compressed for faster mode + (-1, True), + (3, True), + (22, True), + ] + for clevel, compressed in tests: for dtype in (numpy.float32, numpy.float64): - with self.subTest(options=options, dtype=dtype): - self._test("zstd", dtype=dtype, options=options) + with self.subTest(clevel=clevel, dtype=dtype): + self._test( + "zstd", + dtype=dtype, + options={"clevel": clevel}, + compressed=compressed, + ) class TestStrings(unittest.TestCase):