From 64c50ae5872711d2bc16d524bf16099125450f10 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Fri, 15 May 2026 16:06:32 +0200 Subject: [PATCH 1/8] feature - implement std.compression one-shot codecs (#339, #548) --- crates/incan_core/src/lang/stdlib.rs | 79 +++++++ .../stdlib/compression/_auto.incn | 60 ++++++ .../stdlib/compression/_core.incn | 200 ++++++++++++++++++ .../incan_stdlib/stdlib/compression/bz2.incn | 56 +++++ .../stdlib/compression/deflate.incn | 57 +++++ .../incan_stdlib/stdlib/compression/gzip.incn | 57 +++++ .../incan_stdlib/stdlib/compression/lzma.incn | 55 +++++ .../stdlib/compression/prelude.incn | 9 + .../stdlib/compression/snappy.incn | 46 ++++ .../stdlib/compression/snappy/raw.incn | 43 ++++ .../incan_stdlib/stdlib/compression/zlib.incn | 57 +++++ .../incan_stdlib/stdlib/compression/zstd.incn | 52 +++++ crates/rust_inspect/src/cache.rs | 2 +- crates/rust_inspect/src/extractor.rs | 59 +++++- src/backend/ir/codegen.rs | 68 +++--- src/backend/ir/emit/decls/mod.rs | 21 +- src/backend/ir/emit/expressions/mod.rs | 41 +++- src/backend/ir/lower/decl/functions.rs | 2 +- src/backend/ir/lower/decl/mod.rs | 17 ++ src/frontend/typechecker/check_expr/access.rs | 9 +- .../check_expr/calls/rust_boundary.rs | 6 +- src/frontend/typechecker/mod.rs | 54 +++++ src/frontend/typechecker/tests.rs | 28 ++- tests/codegen_snapshot_tests.rs | 28 +++ .../valid/std_compression_surface.incn | 33 +++ tests/integration_tests.rs | 33 +++ ...gen_snapshot_tests__std_uuid_compiled.snap | 2 +- .../docs/RFCs/061_std_compression.md | 94 +++++++- 28 files changed, 1217 insertions(+), 51 deletions(-) create mode 100644 crates/incan_stdlib/stdlib/compression/_auto.incn create mode 100644 crates/incan_stdlib/stdlib/compression/_core.incn create mode 100644 crates/incan_stdlib/stdlib/compression/bz2.incn create mode 100644 crates/incan_stdlib/stdlib/compression/deflate.incn create mode 100644 crates/incan_stdlib/stdlib/compression/gzip.incn create mode 100644 crates/incan_stdlib/stdlib/compression/lzma.incn create mode 100644 crates/incan_stdlib/stdlib/compression/prelude.incn create mode 100644 crates/incan_stdlib/stdlib/compression/snappy.incn create mode 100644 crates/incan_stdlib/stdlib/compression/snappy/raw.incn create mode 100644 crates/incan_stdlib/stdlib/compression/zlib.incn create mode 100644 crates/incan_stdlib/stdlib/compression/zstd.incn create mode 100644 tests/fixtures/valid/std_compression_surface.incn diff --git a/crates/incan_core/src/lang/stdlib.rs b/crates/incan_core/src/lang/stdlib.rs index 429fbe18e..269b16da4 100644 --- a/crates/incan_core/src/lang/stdlib.rs +++ b/crates/incan_core/src/lang/stdlib.rs @@ -350,6 +350,45 @@ pub const STDLIB_NAMESPACES: &[StdlibNamespace] = &[ submodules: &["_core", "_streaming", "prelude"], typechecker_only: false, }, + StdlibNamespace { + name: "compression", + feature: None, + extra_crate_deps: &[ + StdlibExtraCrateDep { + crate_name: "flate2", + source: StdlibExtraCrateSource::Version("1"), + }, + StdlibExtraCrateDep { + crate_name: "zstd", + source: StdlibExtraCrateSource::Version("0.13"), + }, + StdlibExtraCrateDep { + crate_name: "bzip2", + source: StdlibExtraCrateSource::Version("0.6"), + }, + StdlibExtraCrateDep { + crate_name: "xz2", + source: StdlibExtraCrateSource::Version("0.1"), + }, + StdlibExtraCrateDep { + crate_name: "snap", + source: StdlibExtraCrateSource::Version("1"), + }, + ], + submodules: &[ + "_core", + "_auto", + "gzip", + "zlib", + "deflate", + "zstd", + "bz2", + "lzma", + "snappy", + "snappy.raw", + ], + typechecker_only: false, + }, StdlibNamespace { name: "tempfile", feature: None, @@ -546,6 +585,11 @@ mod tests { assert!(is_known_stdlib_module(&segs(&["std", "encoding", "base58"]))); assert!(is_known_stdlib_module(&segs(&["std", "encoding", "bech32"]))); assert!(is_known_stdlib_module(&segs(&["std", "hash"]))); + assert!(is_known_stdlib_module(&segs(&["std", "compression"]))); + assert!(is_known_stdlib_module(&segs(&["std", "compression", "_core"]))); + assert!(is_known_stdlib_module(&segs(&["std", "compression", "_auto"]))); + assert!(is_known_stdlib_module(&segs(&["std", "compression", "gzip"]))); + assert!(is_known_stdlib_module(&segs(&["std", "compression", "snappy", "raw"]))); assert!(is_known_stdlib_module(&segs(&["std", "tempfile"]))); assert!(is_known_stdlib_module(&segs(&["std", "collections"]))); assert!(is_known_stdlib_module(&segs(&["std", "rust"]))); @@ -632,6 +676,26 @@ mod tests { stdlib_stub_path(&segs(&["std", "hash"])), Some("stdlib/hash/prelude.incn".to_string()) ); + assert_eq!( + stdlib_stub_path(&segs(&["std", "compression"])), + Some("stdlib/compression/prelude.incn".to_string()) + ); + assert_eq!( + stdlib_stub_path(&segs(&["std", "compression", "_core"])), + Some("stdlib/compression/_core.incn".to_string()) + ); + assert_eq!( + stdlib_stub_path(&segs(&["std", "compression", "_auto"])), + Some("stdlib/compression/_auto.incn".to_string()) + ); + assert_eq!( + stdlib_stub_path(&segs(&["std", "compression", "gzip"])), + Some("stdlib/compression/gzip.incn".to_string()) + ); + assert_eq!( + stdlib_stub_path(&segs(&["std", "compression", "snappy", "raw"])), + Some("stdlib/compression/snappy/raw.incn".to_string()) + ); assert_eq!( stdlib_stub_path(&segs(&["std", "tempfile"])), Some("stdlib/tempfile.incn".to_string()) @@ -736,6 +800,7 @@ mod tests { let hash_ns = find_namespace("hash"); let datetime_ns = find_namespace("datetime"); let collections_ns = find_namespace("collections"); + let compression_ns = find_namespace("compression"); assert_eq!(async_ns.and_then(|ns| ns.feature), Some("async")); assert_eq!(reflection_ns.map(|ns| ns.submodules.is_empty()), Some(true)); @@ -775,6 +840,20 @@ mod tests { assert_eq!(hash_ns.map(|ns| ns.submodules.contains(&"_core")), Some(true)); assert_eq!(hash_ns.map(|ns| ns.submodules.contains(&"_streaming")), Some(true)); assert_eq!(hash_ns.map(|ns| ns.typechecker_only), Some(false)); + assert_eq!(compression_ns.map(|ns| ns.feature), Some(None)); + assert_eq!(compression_ns.map(|ns| ns.submodules.contains(&"_core")), Some(true)); + assert_eq!(compression_ns.map(|ns| ns.submodules.contains(&"_auto")), Some(true)); + assert_eq!(compression_ns.map(|ns| ns.submodules.contains(&"gzip")), Some(true)); + assert_eq!( + compression_ns.map(|ns| ns.submodules.contains(&"snappy.raw")), + Some(true) + ); + assert_eq!( + compression_ns + .and_then(|ns| ns.extra_crate_deps.first()) + .map(|dep| dep.crate_name), + Some("flate2") + ); assert_eq!(datetime_ns.map(|ns| ns.feature), Some(None)); assert_eq!(datetime_ns.map(|ns| ns.extra_crate_deps.is_empty()), Some(true)); assert_eq!(datetime_ns.map(|ns| ns.submodules.contains(&"civil.naive")), Some(true)); diff --git a/crates/incan_stdlib/stdlib/compression/_auto.incn b/crates/incan_stdlib/stdlib/compression/_auto.incn new file mode 100644 index 000000000..3684dd8d1 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/_auto.incn @@ -0,0 +1,60 @@ +""" +Autodetection helpers for `std.compression`. + +Autodetection is deliberately decompression-only and signature-driven. Raw deflate and raw Snappy are not guessed +because they lack reliable framing signatures. +""" + +from std.compression._core import ( + Codec, + CompressionError, + _allowed, + _looks_like_bz2, + _looks_like_gzip, + _looks_like_snappy_frame, + _looks_like_xz, + _looks_like_zlib, + _looks_like_zstd, + _validate_allowed, +) +from std.compression.gzip import decompress as _gzip_decompress +from std.compression.zlib import decompress as _zlib_decompress +from std.compression.zstd import decompress as _zstd_decompress +from std.compression.bz2 import decompress as _bz2_decompress +from std.compression.lzma import decompress as _lzma_decompress +from std.compression.snappy import decompress as _snappy_decompress + + +pub def decompress_auto(data: bytes, allowed: list[Codec] = Codec.all()) -> Result[tuple[Codec, bytes], CompressionError]: + """ + Decompress bytes by explicit signature-based autodetection. + + Args: + data: Compressed bytes with a detectable framing signature. + allowed: Candidate codecs to consider. The list is binding; codecs not present are not attempted. + + Returns: + `Ok((codec, plain))` when an allowed codec signature matches, or `Err(CompressionError)` when the filter is + empty, no signature matches, or the matched codec rejects the payload. + """ + _validate_allowed(allowed)? + if _allowed(allowed.clone(), Codec.Gzip) and _looks_like_gzip(data): + return Ok((Codec.Gzip, _gzip_decompress(data)?)) + elif _allowed(allowed, Codec.Zstd) and _looks_like_zstd(data): + return Ok((Codec.Zstd, _zstd_decompress(data)?)) + elif _allowed(allowed, Codec.Bz2) and _looks_like_bz2(data): + return Ok((Codec.Bz2, _bz2_decompress(data)?)) + elif _allowed(allowed, Codec.Lzma) and _looks_like_xz(data): + return Ok((Codec.Lzma, _lzma_decompress(data)?)) + elif _allowed(allowed, Codec.Snappy) and _looks_like_snappy_frame(data): + return Ok((Codec.Snappy, _snappy_decompress(data)?)) + elif _allowed(allowed, Codec.Zlib) and _looks_like_zlib(data): + return Ok((Codec.Zlib, _zlib_decompress(data)?)) + return Err( + CompressionError( + kind="unsupported_codec", + codec=None, + operation="decompress_auto", + detail="no allowed compression codec matched the input signature", + ), + ) diff --git a/crates/incan_stdlib/stdlib/compression/_core.incn b/crates/incan_stdlib/stdlib/compression/_core.incn new file mode 100644 index 000000000..06c5edee6 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/_core.incn @@ -0,0 +1,200 @@ +""" +Shared `std.compression` vocabulary and internal helpers. + +Public callers should import `Codec`, `CompressionError`, and codec modules from `std.compression`. The underscored +helpers in this module are implementation details shared by sibling codec modules and the autodetection module. +""" + +from std.traits.error import Error + + +pub enum Codec(str): + """ + Compression codec identity used by `std.compression` autodetection. + """ + + Gzip = "gzip" + Zlib = "zlib" + Deflate = "deflate" + Zstd = "zstd" + Bz2 = "bz2" + Lzma = "lzma" + Snappy = "snappy" + + @staticmethod + def all() -> list[Codec]: + """ + Return the stable codec order used by default autodetection filters. + + Raw Snappy block compression is intentionally absent because it has no framing signature. + """ + return [Codec.Gzip, Codec.Zlib, Codec.Deflate, Codec.Zstd, Codec.Bz2, Codec.Lzma, Codec.Snappy] + + +@derive(Clone) +pub model CompressionError with Error: + """ + Stable compression failure boundary. + + `kind` is intended for branching on categories such as `invalid_data`, `truncated_input`, `unsupported_codec`, + `unsupported_option`, `invalid_level`, `invalid_chunk_size`, `ambiguous_autodetection`, `io`, and `backend`. + """ + + pub kind: str + pub codec: Option[Codec] + pub operation: str + pub detail: str + + def message(self) -> str: + """ + Return the human-readable error detail. + """ + return self.detail + + def source(self) -> Option[str]: + """ + Return a structured source error when one is available. + + Compression currently normalizes backend details into `detail`, so there is no nested source value. + """ + return None + + +pub def _level_or_default(codec: Codec, level: Option[int], default: int, minimum: int, maximum: int) -> Result[int, CompressionError]: + """ + Validate a portable integer compression-level request. + """ + match level: + Some(value) => + if value < minimum or value > maximum: + return Err( + CompressionError( + kind="invalid_level", + codec=Some(codec), + operation="compress", + detail=f"level for {codec.value()} must be between {minimum} and {maximum}", + ), + ) + return Ok(value) + None => return Ok(default) + + +pub def _reject_level(codec: Codec, level: Option[int]) -> Result[None, CompressionError]: + """ + Reject a level argument for codecs without level support. + """ + match level: + Some(_) => + return Err( + CompressionError( + kind="unsupported_option", + codec=Some(codec), + operation="compress", + detail=f"{codec.value()} does not support configurable compression levels", + ), + ) + None => return Ok(None) + + +pub def _codec_error(codec: Codec, operation: str, detail: str) -> CompressionError: + """ + Build a codec failure from backend error text. + + Backend crates expose different error enums and messages. The first source pass keeps a stable Incan `kind` by + classifying the most common invalid/truncated markers and otherwise preserving the backend text as `backend`. + """ + mut kind = "backend" + if detail.contains("EOF") or detail.contains("eof") or detail.contains("early") or detail.contains("truncated"): + kind = "truncated_input".to_string() + elif detail.contains("invalid") or detail.contains("corrupt") or detail.contains("checksum"): + kind = "invalid_data".to_string() + return CompressionError(kind=kind, codec=Some(codec), operation=operation, detail=detail) + + +pub def _io_error(codec: Option[Codec], operation: str, detail: str) -> CompressionError: + """ + Build an I/O failure. + """ + return CompressionError(kind="io", codec=codec, operation=operation, detail=detail) + + +pub def _validate_allowed(allowed: list[Codec]) -> Result[None, CompressionError]: + """ + Reject an empty autodetection candidate set. + """ + if len(allowed) == 0: + return Err( + CompressionError( + kind="unsupported_codec", + codec=None, + operation="decompress_auto", + detail="allowed codec list must not be empty", + ), + ) + return Ok(None) + + +pub def _allowed(allowed: list[Codec], codec: Codec) -> bool: + """ + Return whether `codec` is present in an explicit autodetection filter. + """ + for candidate in allowed: + if candidate == codec: + return true + return false + + +pub def _byte_at(data: bytes, index: int) -> int: + """ + Read one byte as an integer for signature checks. + """ + return int(data[index]) + + +pub def _looks_like_gzip(data: bytes) -> bool: + """ + Return whether `data` starts with the gzip magic bytes. + """ + return len(data) >= 2 and _byte_at(data, 0) == 31 and _byte_at(data, 1) == 139 + + +pub def _looks_like_zstd(data: bytes) -> bool: + """ + Return whether `data` starts with the zstd frame magic bytes. + """ + return len(data) >= 4 and _byte_at(data, 0) == 40 and _byte_at(data, 1) == 181 and _byte_at(data, 2) == 47 and _byte_at(data, 3) == 253 + + +pub def _looks_like_bz2(data: bytes) -> bool: + """ + Return whether `data` starts with the bzip2 stream signature. + """ + return len(data) >= 3 and _byte_at(data, 0) == 66 and _byte_at(data, 1) == 90 and _byte_at(data, 2) == 104 + + +pub def _looks_like_xz(data: bytes) -> bool: + """ + Return whether `data` starts with the XZ container magic bytes. + """ + return len(data) >= 6 and _byte_at(data, 0) == 253 and _byte_at(data, 1) == 55 and _byte_at(data, 2) == 122 and _byte_at(data, 3) == 88 and _byte_at(data, 4) == 90 and _byte_at(data, 5) == 0 + + +pub def _looks_like_snappy_frame(data: bytes) -> bool: + """ + Return whether `data` starts with the framed Snappy stream identifier. + """ + return len(data) >= 10 and _byte_at(data, 0) == 255 and _byte_at(data, 1) == 6 and _byte_at(data, 2) == 0 and _byte_at(data, 3) == 0 and _byte_at(data, 4) == 115 and _byte_at(data, 5) == 78 and _byte_at(data, 6) == 97 and _byte_at(data, 7) == 80 and _byte_at(data, 8) == 112 and _byte_at(data, 9) == 89 + + +pub def _looks_like_zlib(data: bytes) -> bool: + """ + Return whether `data` has a valid zlib header. + + Zlib's header does not have a fixed magic sequence. The RFC 1950 check below validates the compression method and + header checksum; decompression still owns the final data-validity decision. + """ + if len(data) < 2: + return false + cmf = _byte_at(data, 0) + flg = _byte_at(data, 1) + return cmf % 16 == 8 and ((cmf * 256 + flg) % 31 == 0) diff --git a/crates/incan_stdlib/stdlib/compression/bz2.incn b/crates/incan_stdlib/stdlib/compression/bz2.incn new file mode 100644 index 000000000..8c8f0802d --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/bz2.incn @@ -0,0 +1,56 @@ +""" +Bzip2 compression helpers. + +This module owns the byte-oriented bzip2 surface and translates portable levels into the Rust backend's level type. +""" + +from rust::std::io import Cursor, Read +from rust::bzip2 @ "0.6" import Compression as BzCompression +from rust::bzip2::read @ "0.6" import BzDecoder, BzEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as a bzip2 stream. + + Args: + data: Plain input bytes. + level: Optional portable compression level from 0 through 9. + + Returns: + `Ok(bytes)` with a bzip2 stream, or `Err(CompressionError)` for invalid levels or codec failures. + """ + mut reader = BzEncoder.new(Cursor.new(data), _bz_level(level)?) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Bz2, "compress", err.to_string())) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress a bzip2 stream. + + Args: + data: Bzip2-compressed bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + mut reader = BzDecoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Bz2, "decompress", err.to_string())) + + +def _bz_level(level: Option[int]) -> Result[BzCompression, CompressionError]: + """ + Convert a portable bzip2 level into the backend compression type. + """ + selected = _level_or_default(Codec.Bz2, level, 6, 0, 9)? + maybe: Option[u32] = selected.try_resize() + match maybe: + Some(value) => return Ok(BzCompression.new(value)) + None => return Err(_codec_error(Codec.Bz2, "compress", "level does not fit u32")) diff --git a/crates/incan_stdlib/stdlib/compression/deflate.incn b/crates/incan_stdlib/stdlib/compression/deflate.incn new file mode 100644 index 000000000..681c8c513 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/deflate.incn @@ -0,0 +1,57 @@ +""" +Raw deflate compression helpers. + +Raw deflate has no reliable framing signature, so this module supports explicit codec calls but is intentionally absent +from autodetection. +""" + +from rust::std::io import Cursor, Read +from rust::flate2 @ "1" import Compression as FlateCompression +from rust::flate2::read @ "1" import DeflateDecoder, DeflateEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as a raw deflate stream. + + Args: + data: Plain input bytes. + level: Optional portable compression level from 0 through 9. + + Returns: + `Ok(bytes)` with raw deflate data, or `Err(CompressionError)` for invalid levels or codec failures. + """ + mut reader = DeflateEncoder.new(Cursor.new(data), _flate_level(level)?) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Deflate, "compress", err.to_string())) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress a raw deflate stream. + + Args: + data: Raw deflate bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + mut reader = DeflateDecoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Deflate, "decompress", err.to_string())) + + +def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionError]: + """ + Convert a portable deflate level into the `flate2` compression type. + """ + selected = _level_or_default(Codec.Deflate, level, 6, 0, 9)? + maybe: Option[u32] = selected.try_resize() + match maybe: + Some(value) => return Ok(FlateCompression.new(value)) + None => return Err(_codec_error(Codec.Deflate, "compress", "level does not fit u32")) diff --git a/crates/incan_stdlib/stdlib/compression/gzip.incn b/crates/incan_stdlib/stdlib/compression/gzip.incn new file mode 100644 index 000000000..423df742c --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/gzip.incn @@ -0,0 +1,57 @@ +""" +Gzip compression helpers. + +This module writes and reads gzip-wrapped deflate streams. It keeps the public surface byte-oriented while using the +Rust `flate2` reader adapters as the codec boundary. +""" + +from rust::std::io import Cursor, Read +from rust::flate2 @ "1" import Compression as FlateCompression +from rust::flate2::read @ "1" import GzDecoder, GzEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as a gzip stream. + + Args: + data: Plain input bytes. + level: Optional portable compression level from 0 through 9. + + Returns: + `Ok(bytes)` with the complete gzip payload, or `Err(CompressionError)` for invalid levels or codec failures. + """ + mut reader = GzEncoder.new(Cursor.new(data), _flate_level(level)?) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Gzip, "compress", err.to_string())) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress a gzip stream. + + Args: + data: Gzip-compressed bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + mut reader = GzDecoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Gzip, "decompress", err.to_string())) + + +def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionError]: + """ + Convert a portable gzip level into the `flate2` compression type. + """ + selected = _level_or_default(Codec.Gzip, level, 6, 0, 9)? + maybe: Option[u32] = selected.try_resize() + match maybe: + Some(value) => return Ok(FlateCompression.new(value)) + None => return Err(_codec_error(Codec.Gzip, "compress", "level does not fit u32")) diff --git a/crates/incan_stdlib/stdlib/compression/lzma.incn b/crates/incan_stdlib/stdlib/compression/lzma.incn new file mode 100644 index 000000000..b4b1f32b4 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/lzma.incn @@ -0,0 +1,55 @@ +""" +XZ/LZMA-family compression helpers. + +The public `std.compression.lzma` name exposes XZ-framed LZMA-family data through the `xz2` backend. +""" + +from rust::std::io import Cursor, Read +from rust::xz2::read @ "0.1" import XzDecoder, XzEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as an XZ/LZMA-family stream. + + Args: + data: Plain input bytes. + level: Optional portable compression level from 0 through 9. + + Returns: + `Ok(bytes)` with an XZ-framed payload, or `Err(CompressionError)` for invalid levels or codec failures. + """ + mut reader = XzEncoder.new(Cursor.new(data), _xz_level(level)?) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Lzma, "compress", err.to_string())) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress an XZ/LZMA-family stream. + + Args: + data: XZ/LZMA-family compressed bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + mut reader = XzDecoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Lzma, "decompress", err.to_string())) + + +def _xz_level(level: Option[int]) -> Result[u32, CompressionError]: + """ + Convert a portable LZMA level into the `xz2` numeric level. + """ + selected = _level_or_default(Codec.Lzma, level, 6, 0, 9)? + maybe: Option[u32] = selected.try_resize() + match maybe: + Some(value) => return Ok(value) + None => return Err(_codec_error(Codec.Lzma, "compress", "level does not fit u32")) diff --git a/crates/incan_stdlib/stdlib/compression/prelude.incn b/crates/incan_stdlib/stdlib/compression/prelude.incn new file mode 100644 index 000000000..ed81bc8a8 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/prelude.incn @@ -0,0 +1,9 @@ +""" +Codec-based compression and decompression. + +`std.compression` exposes explicit codec submodules for normal workflows and top-level autodetection helpers for +decompression when the caller deliberately opts in. +""" + +from std.compression._core import Codec, CompressionError +from std.compression._auto import decompress_auto diff --git a/crates/incan_stdlib/stdlib/compression/snappy.incn b/crates/incan_stdlib/stdlib/compression/snappy.incn new file mode 100644 index 000000000..148255237 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/snappy.incn @@ -0,0 +1,46 @@ +""" +Framed Snappy compression helpers. + +Framed Snappy is the default `std.compression.snappy` surface because it carries framing bytes suitable for streams and +autodetection. Raw block helpers live under `std.compression.snappy.raw`. +""" + +from rust::std::io import Cursor, Read +from rust::snap::read @ "1" import FrameDecoder, FrameEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _reject_level + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as a framed Snappy stream. + + Args: + data: Plain input bytes. + level: Must be `None`; Snappy does not expose a portable compression level. + + Returns: + `Ok(bytes)` with a framed Snappy stream, or `Err(CompressionError)` for unsupported options or codec failures. + """ + _reject_level(Codec.Snappy, level)? + mut reader = FrameEncoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Snappy, "compress", err.to_string())) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress a framed Snappy stream. + + Args: + data: Framed Snappy bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + mut reader = FrameDecoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Snappy, "decompress", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/snappy/raw.incn b/crates/incan_stdlib/stdlib/compression/snappy/raw.incn new file mode 100644 index 000000000..879e6f7e5 --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/snappy/raw.incn @@ -0,0 +1,43 @@ +""" +Raw Snappy block helpers. + +Raw Snappy is an advanced interop surface for systems that store individual Snappy blocks. It is intentionally excluded +from `std.compression` autodetection because raw blocks have no stable stream signature. +""" + +from rust::snap::raw @ "1" import Decoder as RawDecoder, Encoder as RawEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _reject_level + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes using raw Snappy block format. + + Args: + data: Plain input bytes. + level: Must be `None`; raw Snappy does not expose a portable compression level. + + Returns: + `Ok(bytes)` with a raw Snappy block, or `Err(CompressionError)` for unsupported options or codec failures. + """ + _reject_level(Codec.Snappy, level)? + mut encoder = RawEncoder.new() + match encoder.compress_vec(data.as_slice()): + Ok(out) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Snappy, "raw.compress", err)) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress bytes using raw Snappy block format. + + Args: + data: Raw Snappy block bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid block data. + """ + mut decoder = RawDecoder.new() + match decoder.decompress_vec(data.as_slice()): + Ok(out) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Snappy, "raw.decompress", err)) diff --git a/crates/incan_stdlib/stdlib/compression/zlib.incn b/crates/incan_stdlib/stdlib/compression/zlib.incn new file mode 100644 index 000000000..9be3ed42f --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/zlib.incn @@ -0,0 +1,57 @@ +""" +Zlib compression helpers. + +This module writes and reads zlib-wrapped deflate streams. The user-facing API stays byte-oriented and normalizes +backend errors into `CompressionError`. +""" + +from rust::std::io import Cursor, Read +from rust::flate2 @ "1" import Compression as FlateCompression +from rust::flate2::read @ "1" import ZlibDecoder, ZlibEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as a zlib-wrapped deflate stream. + + Args: + data: Plain input bytes. + level: Optional portable compression level from 0 through 9. + + Returns: + `Ok(bytes)` with a complete zlib stream, or `Err(CompressionError)` for invalid levels or codec failures. + """ + mut reader = ZlibEncoder.new(Cursor.new(data), _flate_level(level)?) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Zlib, "compress", err.to_string())) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress a zlib-wrapped deflate stream. + + Args: + data: Zlib-compressed bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + mut reader = ZlibDecoder.new(Cursor.new(data)) + mut out: bytes = b"" + match reader.read_to_end(out): + Ok(_) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Zlib, "decompress", err.to_string())) + + +def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionError]: + """ + Convert a portable zlib level into the `flate2` compression type. + """ + selected = _level_or_default(Codec.Zlib, level, 6, 0, 9)? + maybe: Option[u32] = selected.try_resize() + match maybe: + Some(value) => return Ok(FlateCompression.new(value)) + None => return Err(_codec_error(Codec.Zlib, "compress", "level does not fit u32")) diff --git a/crates/incan_stdlib/stdlib/compression/zstd.incn b/crates/incan_stdlib/stdlib/compression/zstd.incn new file mode 100644 index 000000000..f016b22bb --- /dev/null +++ b/crates/incan_stdlib/stdlib/compression/zstd.incn @@ -0,0 +1,52 @@ +""" +Zstandard compression helpers. + +This module exposes zstd frames through one-shot byte helpers and keeps backend-specific error details behind +`CompressionError`. +""" + +from rust::std::io import Cursor +from rust::zstd::stream @ "0.13" import decode_all, encode_all +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default + + +pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: + """ + Compress bytes as a zstd frame. + + Args: + data: Plain input bytes. + level: Optional portable zstd level from -7 through 22. + + Returns: + `Ok(bytes)` with a zstd frame, or `Err(CompressionError)` for invalid levels or codec failures. + """ + match encode_all(Cursor.new(data), _zstd_level(level)?): + Ok(out) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Zstd, "compress", err)) + + +pub def decompress(data: bytes) -> Result[bytes, CompressionError]: + """ + Decompress a zstd frame. + + Args: + data: Zstd-compressed bytes. + + Returns: + `Ok(bytes)` with the decompressed payload, or `Err(CompressionError)` for invalid or truncated data. + """ + match decode_all(Cursor.new(data)): + Ok(out) => return Ok(out) + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress", err)) + + +def _zstd_level(level: Option[int]) -> Result[i32, CompressionError]: + """ + Convert a portable zstd level into the backend's `i32` level. + """ + selected = _level_or_default(Codec.Zstd, level, 0, -7, 22)? + maybe: Option[i32] = selected.try_resize() + match maybe: + Some(value) => return Ok(value) + None => return Err(_codec_error(Codec.Zstd, "compress", "level does not fit i32")) diff --git a/crates/rust_inspect/src/cache.rs b/crates/rust_inspect/src/cache.rs index 534ab6315..f5880a8e0 100644 --- a/crates/rust_inspect/src/cache.rs +++ b/crates/rust_inspect/src/cache.rs @@ -91,7 +91,7 @@ struct DiskCacheEnvelope { } // Bump when extracted metadata semantics change in a way that makes previously persisted items unsafe to reuse. -const DISK_CACHE_FORMAT: u32 = 5; +const DISK_CACHE_FORMAT: u32 = 6; const DISK_CACHE_FILE: &str = ".incan_rust_inspect_cache.json"; // Backward-compatibility read path for caches written before the crate/module rename. const LEGACY_DISK_CACHE_FILE: &str = ".incan_rust_metadata_cache.json"; diff --git a/crates/rust_inspect/src/extractor.rs b/crates/rust_inspect/src/extractor.rs index 8e08d26de..3dd324493 100644 --- a/crates/rust_inspect/src/extractor.rs +++ b/crates/rust_inspect/src/extractor.rs @@ -146,6 +146,44 @@ fn render_shape_display(shape: &RustTypeShape) -> String { } } +fn is_exact_numeric_display(text: &str) -> bool { + matches!( + text, + "f32" + | "f64" + | "i8" + | "i16" + | "i32" + | "i64" + | "i128" + | "isize" + | "u8" + | "u16" + | "u32" + | "u64" + | "u128" + | "usize" + ) +} + +/// Return the canonical Rust numeric display when `text` is exactly a primitive numeric type or reference. +fn exact_numeric_boundary_display(text: &str) -> Option { + let normalized = normalize_display_path(text) + .replace("'static ", "") + .replace("'_", "") + .replace(' ', ""); + if is_exact_numeric_display(normalized.as_str()) { + return Some(normalized); + } + if let Some(inner) = normalized.strip_prefix('&') { + let inner = inner.strip_prefix("mut").unwrap_or(inner).trim(); + if is_exact_numeric_display(inner) { + return Some(format!("&{inner}")); + } + } + None +} + fn resolve_source_path(text: &str, crate_name: &str, module: Module, db: &RootDatabase) -> Option { let text = text.trim().replace(' ', ""); if text.is_empty() { @@ -412,8 +450,12 @@ fn rust_type_shape(ty: &Type<'_>, db: &RootDatabase, dt: DisplayTarget) -> RustT } fn function_sig_type_display(ty: &Type<'_>, db: &RootDatabase, dt: DisplayTarget) -> String { + let raw = normalize_display_path(format_ty(ty, db, dt).as_str()); + if let Some(display) = exact_numeric_boundary_display(raw.as_str()) { + return display; + } match rust_type_shape(ty, db, dt) { - RustTypeShape::Unknown => normalize_display_path(format_ty(ty, db, dt).as_str()), + RustTypeShape::Unknown => raw, other => render_shape_display(&other), } } @@ -433,6 +475,9 @@ fn source_function_return_type_display(f: Function, db: &RootDatabase) -> Option .display_name(db) .map(|name| name.canonical_name().as_str().to_owned())?; let shape = source_type_shape(text.as_str(), crate_name.as_str(), module, db); + if let Some(display) = exact_numeric_boundary_display(text.as_str()) { + return Some(display); + } Some(match shape { RustTypeShape::Unknown => normalize_display_path(text.as_str()), other => render_shape_display(&other), @@ -564,6 +609,9 @@ fn source_function_param_type_display(f: Function, param: &ra_ap_hir::Param<'_>, .display_name(db) .map(|name| name.canonical_name().as_str().to_owned())?; let shape = source_type_shape(text.as_str(), crate_name.as_str(), module, db); + if let Some(display) = exact_numeric_boundary_display(text.as_str()) { + return Some(display); + } if matches!(shape, RustTypeShape::TypeParam(_)) && let Some(imported_display) = canonicalize_imported_single_segment_type_display(text.as_str(), f, db) { @@ -952,7 +1000,14 @@ mod tests { use incan_core::interop::RustItemKind; - use super::{RustWorkspace, extract_rust_item}; + use super::{RustWorkspace, exact_numeric_boundary_display, extract_rust_item}; + + #[test] + fn exact_numeric_boundary_display_preserves_widths() { + assert_eq!(exact_numeric_boundary_display("u32").as_deref(), Some("u32")); + assert_eq!(exact_numeric_boundary_display("& i32").as_deref(), Some("&i32")); + assert_eq!(exact_numeric_boundary_display("String"), None); + } #[test] fn type_metadata_records_direct_trait_impls() -> Result<(), Box> { diff --git a/src/backend/ir/codegen.rs b/src/backend/ir/codegen.rs index f54356c5e..62f5daccf 100644 --- a/src/backend/ir/codegen.rs +++ b/src/backend/ir/codegen.rs @@ -105,37 +105,13 @@ fn generated_module_path_for_source_import(path: &ImportPath, current_module_pat /// True when a dependency module should keep its public API even if the main module does not import every item. fn should_preserve_dependency_public_items(module_path: &[String], preserve_non_stdlib_public_items: bool) -> bool { - if module_path_matches_any_std_root(module_path, &["derives", "collection"]) - || module_path_matches_any_std_root(module_path, &["result"]) - || module_path_matches_any_std_root(module_path, &["serde", "json"]) - || module_path_matches_any_std_root(module_path, &["logging"]) - { - return true; - } - if !preserve_non_stdlib_public_items { - return false; - } - !matches!( - module_path.first().map(String::as_str), - Some(stdlib::INCAN_STD_NAMESPACE) - ) -} - -/// Return whether a generated module path exactly matches a static path literal. -fn module_path_matches_any_std_root(module_path: &[String], tail: &[&str]) -> bool { - if module_path.len() != tail.len() + 1 { - return false; - } - if !matches!( + if matches!( module_path.first().map(String::as_str), - Some(stdlib::STDLIB_ROOT) | Some(stdlib::INCAN_STD_NAMESPACE) + Some(stdlib::STDLIB_ROOT | stdlib::INCAN_STD_NAMESPACE) ) { - return false; + return true; } - module_path[1..] - .iter() - .zip(tail.iter()) - .all(|(actual, expected)| actual == expected) + preserve_non_stdlib_public_items } /// Return whether a function carries the stdlib-backed web route decorator that lowers to a Rust proc-macro attribute. @@ -1684,6 +1660,42 @@ def main() -> None: assert_no_generated_unused_lint_allows(constants_code); } + #[test] + fn normal_codegen_preserves_stdlib_dependency_public_items_for_generated_projects() { + let gzip_module = parse_program( + r#" +pub def compress(data: bytes) -> bytes: + return data + +pub def decompress(data: bytes) -> bytes: + return data +"#, + ); + let main_module = parse_program( + r#" +from std.compression.gzip import decompress + +def main() -> None: + _ = decompress(b"data") +"#, + ); + let gzip_path = vec!["__incan_std".to_string(), "compression".to_string(), "gzip".to_string()]; + let mut codegen = IrCodegen::new(); + codegen.set_preserve_dependency_public_items(false); + codegen.add_module_with_path_segments("__incan_std_compression_gzip", &gzip_module, gzip_path.clone()); + + let (_main_code, rust_modules) = + must_ok(codegen.try_generate_multi_file_nested(&main_module, std::slice::from_ref(&gzip_path))); + let gzip_code = must_some( + rust_modules.get(&gzip_path), + "missing generated std.compression.gzip module", + ); + + assert!(gzip_code.contains("pub fn compress"), "{gzip_code}"); + assert!(gzip_code.contains("pub fn decompress"), "{gzip_code}"); + assert_no_generated_unused_lint_allows(gzip_code); + } + #[test] fn normal_codegen_can_preserve_dependency_public_items_for_library_mode() { let constants_module = parse_program( diff --git a/src/backend/ir/emit/decls/mod.rs b/src/backend/ir/emit/decls/mod.rs index 017ec0be4..bfbf5d2cc 100644 --- a/src/backend/ir/emit/decls/mod.rs +++ b/src/backend/ir/emit/decls/mod.rs @@ -370,11 +370,26 @@ impl<'a> IrEmitter<'a> { } else { false }; + let should_reexport_item = |item: &super::super::decl::IrImportItem| { + let binding = item.alias.as_ref().unwrap_or(&item.name); + if is_incan_source_stdlib && binding.starts_with('_') { + return false; + } + export_item_import + }; let item_stmts: Vec = items .iter() .filter(|item| { let binding = item.alias.as_ref().unwrap_or(&item.name); - export_item_import + let private_type_like_binding = binding + .trim_start_matches('_') + .chars() + .next() + .is_some_and(|ch| ch.is_ascii_uppercase()); + if is_incan_source_stdlib && binding.starts_with('_') && !private_type_like_binding { + return self.should_emit_extension_trait_import(binding); + } + should_reexport_item(item) || self.should_emit_import_binding(binding) || self.should_emit_extension_trait_import(binding) || (preserve_metadata_missing_trait_candidate @@ -388,7 +403,7 @@ impl<'a> IrEmitter<'a> { let absolute_path = matches!(qualifier, IrImportQualifier::None) && !is_pub_library_import; if let Some(alias) = &item.alias { let alias_ident = Self::rust_ident(alias); - if export_item_import { + if should_reexport_item(item) { if absolute_path { quote! { pub use :: #path_ts_clone :: #name_ident as #alias_ident; } } else { @@ -402,7 +417,7 @@ impl<'a> IrEmitter<'a> { } } } else { - if export_item_import { + if should_reexport_item(item) { if absolute_path { quote! { pub use :: #path_ts_clone :: #name_ident; } } else { diff --git a/src/backend/ir/emit/expressions/mod.rs b/src/backend/ir/emit/expressions/mod.rs index 09f21e63a..c6b9dfb15 100644 --- a/src/backend/ir/emit/expressions/mod.rs +++ b/src/backend/ir/emit/expressions/mod.rs @@ -1002,7 +1002,7 @@ impl<'a> IrEmitter<'a> { quote! { (#inner) as #target } } incan_core::interop::CoercionPolicy::Borrow => match rust_target.as_str() { - "&str" | "&[u8]" => quote! { #inner }, + "&str" | "&[u8]" => quote! { &#inner }, "&String" | "&std::string::String" | "&alloc::string::String" => { quote! { &(#inner).to_string() } } @@ -1354,6 +1354,45 @@ mod tests { Ok(()) } + #[test] + fn interop_borrowed_bytes_slice_coercion_borrows_owned_bytes() -> Result<(), String> { + let registry = FunctionRegistry::new(); + let emitter = IrEmitter::new(®istry); + let expr = TypedExpr::new( + IrExprKind::InteropCoerce { + expr: Box::new(TypedExpr::new( + IrExprKind::Var { + name: "data".to_string(), + access: VarAccess::Read, + ref_kind: VarRefKind::Value, + }, + IrType::Bytes, + )), + from_ty: IrType::Bytes, + to_ty: IrType::Ref(Box::new(IrType::Bytes)), + kind: IrInteropCoercionKind::Builtin { + policy: incan_core::interop::CoercionPolicy::Borrow, + rust_target: "&[u8]".to_string(), + }, + }, + IrType::Ref(Box::new(IrType::Bytes)), + ); + + let emitted = emitter + .emit_expr(&expr) + .map_err(|err| format!("expected successful expression emission, got {err:?}"))?; + let rendered = emitted.to_string(); + assert!( + rendered.starts_with("&"), + "expected borrowed bytes slice, got `{rendered}`" + ); + assert!( + rendered.contains("data"), + "expected borrowed bytes coercion to preserve the source expression, got `{rendered}`" + ); + Ok(()) + } + #[test] fn non_string_method_call_join_stays_regular_method_call() -> Result<(), String> { let registry = FunctionRegistry::new(); diff --git a/src/backend/ir/lower/decl/functions.rs b/src/backend/ir/lower/decl/functions.rs index 549b080f1..1c6a6f829 100644 --- a/src/backend/ir/lower/decl/functions.rs +++ b/src/backend/ir/lower/decl/functions.rs @@ -45,7 +45,7 @@ impl AstLowering { &mut self, f: &ast::FunctionDecl, ) -> Result { - self.lower_function_named(f, f.name.clone(), Self::map_visibility(f.visibility)) + self.lower_function_named(f, f.name.clone(), self.map_callable_visibility(f.visibility)) } /// Lower a function declaration using an explicit emitted name and visibility. diff --git a/src/backend/ir/lower/decl/mod.rs b/src/backend/ir/lower/decl/mod.rs index 809ab8a8b..1d1e77c66 100644 --- a/src/backend/ir/lower/decl/mod.rs +++ b/src/backend/ir/lower/decl/mod.rs @@ -32,6 +32,23 @@ impl AstLowering { } } + /// Map callable visibility, keeping private source stdlib helpers visible across generated sibling modules. + pub(in crate::backend::ir::lower) fn map_callable_visibility( + &self, + vis: crate::frontend::ast::Visibility, + ) -> Visibility { + let mapped = Self::map_visibility(vis); + if mapped == Visibility::Private + && self + .current_source_module_name + .as_deref() + .is_some_and(|name| name.starts_with("__incan_std.") || name.starts_with("std.")) + { + return Visibility::Crate; + } + mapped + } + /// Lower a declaration to IR. /// /// # Parameters diff --git a/src/frontend/typechecker/check_expr/access.rs b/src/frontend/typechecker/check_expr/access.rs index 22a768e6b..fc4f46e33 100644 --- a/src/frontend/typechecker/check_expr/access.rs +++ b/src/frontend/typechecker/check_expr/access.rs @@ -1168,7 +1168,7 @@ impl TypeChecker { TypeInfo::Newtype(nt) if nt.is_rusttype => { if let ResolvedType::RustPath(path) = &nt.underlying { if let Some(sig) = self.rust_associated_function_signature(path, field) { - return Some(self.resolved_function_type_from_rust_sig(&sig, false)); + return Some(self.resolved_function_type_from_rust_sig_for_path(&sig, false, path)); } if let Some(meta) = self.rust_item_metadata_for_path(path) && let RustItemKind::Type(info) = &meta.kind @@ -1314,14 +1314,15 @@ impl TypeChecker { self.type_info.record_regular_method_arg_shape(receiver_span, method); } let callable_display = format!("rust::{rust_path}.{method}"); - Some(self.validate_rust_method_call( + let ret = self.validate_rust_method_call( callable_display.as_str(), &sig, args, arg_types, preserves_lookup_arg_shape, span, - )) + ); + Some(Self::substitute_rust_self_type(ret, rust_path)) } RustItemKind::Unsupported { description } => { self.errors.push(errors::rust_item_shape_not_supported( @@ -2281,7 +2282,7 @@ impl TypeChecker { } RustItemKind::Type(_) => { if let Some(sig) = self.rust_associated_function_signature(path, field) { - return self.resolved_function_type_from_rust_sig(&sig, false); + return self.resolved_function_type_from_rust_sig_for_path(&sig, false, path); } if let RustItemKind::Type(info) = &meta.kind && let Some(rust_field) = info.fields.iter().find(|f| f.name == field) diff --git a/src/frontend/typechecker/check_expr/calls/rust_boundary.rs b/src/frontend/typechecker/check_expr/calls/rust_boundary.rs index b2356c752..a1de6873c 100644 --- a/src/frontend/typechecker/check_expr/calls/rust_boundary.rs +++ b/src/frontend/typechecker/check_expr/calls/rust_boundary.rs @@ -287,6 +287,7 @@ impl TypeChecker { self.type_info.record_call_site_callable_params(span, ¶ms); } + /// Return whether a lookup-style Rust method should preserve the probe argument's emitted shape. fn rust_lookup_probe_boundary_match(&self, arg_ty: &ResolvedType, target_ty: &ResolvedType) -> bool { let ResolvedType::Ref(inner) = target_ty else { return false; @@ -299,10 +300,7 @@ impl TypeChecker { ) } ResolvedType::Bytes | ResolvedType::FrozenBytes => { - matches!( - inner.as_ref(), - ResolvedType::Bytes | ResolvedType::RustPath(_) | ResolvedType::TypeVar(_) - ) + matches!(inner.as_ref(), ResolvedType::RustPath(_) | ResolvedType::TypeVar(_)) } _ => false, } diff --git a/src/frontend/typechecker/mod.rs b/src/frontend/typechecker/mod.rs index 73aafe7f9..25f1ebe89 100644 --- a/src/frontend/typechecker/mod.rs +++ b/src/frontend/typechecker/mod.rs @@ -671,6 +671,60 @@ impl TypeChecker { ResolvedType::Function(params, Box::new(ret)) } + /// Replace Rust metadata's `Self` placeholder with the concrete receiver path at a call site. + fn substitute_rust_self_type(ty: ResolvedType, rust_path: &str) -> ResolvedType { + match ty { + ResolvedType::RustPath(path) if path == "Self" => ResolvedType::RustPath(rust_path.to_string()), + ResolvedType::Ref(inner) => ResolvedType::Ref(Box::new(Self::substitute_rust_self_type(*inner, rust_path))), + ResolvedType::RefMut(inner) => { + ResolvedType::RefMut(Box::new(Self::substitute_rust_self_type(*inner, rust_path))) + } + ResolvedType::Generic(name, args) => ResolvedType::Generic( + name, + args.into_iter() + .map(|arg| Self::substitute_rust_self_type(arg, rust_path)) + .collect(), + ), + ResolvedType::Tuple(items) => ResolvedType::Tuple( + items + .into_iter() + .map(|item| Self::substitute_rust_self_type(item, rust_path)) + .collect(), + ), + ResolvedType::FrozenList(inner) => { + ResolvedType::FrozenList(Box::new(Self::substitute_rust_self_type(*inner, rust_path))) + } + ResolvedType::FrozenSet(inner) => { + ResolvedType::FrozenSet(Box::new(Self::substitute_rust_self_type(*inner, rust_path))) + } + ResolvedType::FrozenDict(key, value) => ResolvedType::FrozenDict( + Box::new(Self::substitute_rust_self_type(*key, rust_path)), + Box::new(Self::substitute_rust_self_type(*value, rust_path)), + ), + ResolvedType::Function(params, ret) => ResolvedType::Function( + params + .into_iter() + .map(|param| CallableParam { + ty: Self::substitute_rust_self_type(param.ty, rust_path), + ..param + }) + .collect(), + Box::new(Self::substitute_rust_self_type(*ret, rust_path)), + ), + other => other, + } + } + + /// Build a Rust function type and resolve `Self` return or parameter positions against `rust_path`. + pub(crate) fn resolved_function_type_from_rust_sig_for_path( + &self, + sig: &RustFunctionSig, + drop_receiver: bool, + rust_path: &str, + ) -> ResolvedType { + Self::substitute_rust_self_type(self.resolved_function_type_from_rust_sig(sig, drop_receiver), rust_path) + } + /// Render `path` with generic arguments as `path` for embedding in [`ResolvedType::RustPath`]. /// /// When `args` is empty, returns `path` unchanged (no angle brackets). diff --git a/src/frontend/typechecker/tests.rs b/src/frontend/typechecker/tests.rs index cc3f01f38..0fca0bccd 100644 --- a/src/frontend/typechecker/tests.rs +++ b/src/frontend/typechecker/tests.rs @@ -895,6 +895,22 @@ def main() -> None: check_str(source).map_err(|errs| format!("{errs:?}")) } +#[test] +fn owned_value_does_not_satisfy_incan_shared_ref_parameter() { + let source = r#" +def borrowed(data: &bytes) -> None: + return + +def main(data: bytes) -> None: + borrowed(data) +"#; + let errs = check_str_err(source, "owned bytes should not satisfy an Incan &bytes parameter"); + assert!( + errs.iter().any(|err| err.message.contains("Type mismatch")), + "expected type mismatch, got {errs:?}" + ); +} + #[test] fn rfc009_binary_float_literals_are_checked_for_f32_targets() { let ok = r#" @@ -3098,11 +3114,13 @@ def f() -> None: })?; let info = checker.type_info(); assert!( - info.expressions - .expr_types - .values() - .any(|t| matches!(t, ResolvedType::Function(params, _) if params.is_empty())), - "expected associated function field access to resolve to a callable type, got {:?}", + info.expressions.expr_types.values().any(|t| matches!( + t, + ResolvedType::Function(params, ret) + if params.is_empty() + && matches!(ret.as_ref(), ResolvedType::RustPath(path) if path == "demo::Builder") + )), + "expected associated function field access to resolve to a callable type returning demo::Builder, got {:?}", info.expressions.expr_types ); Ok(()) diff --git a/tests/codegen_snapshot_tests.rs b/tests/codegen_snapshot_tests.rs index 1bc1332e1..0a3126cd6 100644 --- a/tests/codegen_snapshot_tests.rs +++ b/tests/codegen_snapshot_tests.rs @@ -2712,6 +2712,34 @@ fn test_std_graph_compiled_codegen() { insta::assert_snapshot!("std_graph_compiled", rust_code); } +/// RFC 061: compile the `std.compression` source modules. +#[test] +fn test_std_compression_modules_compile_codegen() -> Result<(), Box> { + let paths = [ + "crates/incan_stdlib/stdlib/compression/prelude.incn", + "crates/incan_stdlib/stdlib/compression/_core.incn", + "crates/incan_stdlib/stdlib/compression/_auto.incn", + "crates/incan_stdlib/stdlib/compression/gzip.incn", + "crates/incan_stdlib/stdlib/compression/zlib.incn", + "crates/incan_stdlib/stdlib/compression/deflate.incn", + "crates/incan_stdlib/stdlib/compression/zstd.incn", + "crates/incan_stdlib/stdlib/compression/bz2.incn", + "crates/incan_stdlib/stdlib/compression/lzma.incn", + "crates/incan_stdlib/stdlib/compression/snappy.incn", + "crates/incan_stdlib/stdlib/compression/snappy/raw.incn", + ]; + + for path in paths { + let source = fs::read_to_string(path)?; + let rust_code = generate_rust(&source); + assert!( + rust_code.contains("__incan"), + "expected {path} to compile into Incan-generated Rust, got:\n{rust_code}" + ); + } + Ok(()) +} + /// RFC 047: verify `std.graph` imports, direct constructors, DAGs, and multigraph edge ids lower to Rust. #[test] fn test_std_graph_import_codegen() { diff --git a/tests/fixtures/valid/std_compression_surface.incn b/tests/fixtures/valid/std_compression_surface.incn new file mode 100644 index 000000000..059314a77 --- /dev/null +++ b/tests/fixtures/valid/std_compression_surface.incn @@ -0,0 +1,33 @@ +from std.compression import Codec, CompressionError, bz2, deflate, decompress_auto, gzip, lzma, snappy, zlib, zstd +from std.compression.snappy.raw import compress as raw_snappy_compress, decompress as raw_snappy_decompress + + +def assert_round_trip(name: str, plain: bytes, decoded: bytes) -> None: + assert decoded == plain + println(f"{name} round trip ok") + + +def exercise_compression() -> Result[None, CompressionError]: + data = b"Incan compression dogfood" + + gzip_data = gzip.compress(data, None)? + assert_round_trip("gzip", data, gzip.decompress(gzip_data)?) + codec, decoded = decompress_auto(gzip_data, [Codec.Gzip])? + assert codec == Codec.Gzip + assert decoded == data + + assert_round_trip("zlib", data, zlib.decompress(zlib.compress(data, None)?)?) + assert_round_trip("deflate", data, deflate.decompress(deflate.compress(data, None)?)?) + assert_round_trip("zstd", data, zstd.decompress(zstd.compress(data, None)?)?) + assert_round_trip("bz2", data, bz2.decompress(bz2.compress(data, None)?)?) + assert_round_trip("lzma", data, lzma.decompress(lzma.compress(data, None)?)?) + assert_round_trip("snappy", data, snappy.decompress(snappy.compress(data, None)?)?) + assert_round_trip("snappy.raw", data, raw_snappy_decompress(raw_snappy_compress(data, None)?)?) + + return Ok(None) + + +def main() -> None: + match exercise_compression(): + Ok(_) => return + Err(_) => assert false diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e778c0e2c..bcabd4c81 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -6650,6 +6650,39 @@ def main() -> None: Ok(()) } + #[test] + fn test_std_compression_surface_runs_generated_project() -> Result<(), Box> { + let output = Command::new(incan_debug_binary()) + .args(["run", "tests/fixtures/valid/std_compression_surface.incn"]) + .env("CARGO_NET_OFFLINE", "true") + .output()?; + + assert!( + output.status.success(), + "std.compression surface run failed: status={:?} stderr={}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + let lines: Vec<&str> = stdout.lines().map(str::trim).filter(|line| !line.is_empty()).collect(); + assert_eq!( + lines, + vec![ + "gzip round trip ok", + "zlib round trip ok", + "deflate round trip ok", + "zstd round trip ok", + "bz2 round trip ok", + "lzma round trip ok", + "snappy round trip ok", + "snappy.raw round trip ok", + ], + "unexpected std.compression output: {stdout}" + ); + Ok(()) + } + #[test] fn test_rust_associated_call_in_elif_branch_uses_path_syntax() { let Ok(output) = Command::new(incan_debug_binary()) diff --git a/tests/snapshots/codegen_snapshot_tests__std_uuid_compiled.snap b/tests/snapshots/codegen_snapshot_tests__std_uuid_compiled.snap index 95ce75719..9de325162 100644 --- a/tests/snapshots/codegen_snapshot_tests__std_uuid_compiled.snap +++ b/tests/snapshots/codegen_snapshot_tests__std_uuid_compiled.snap @@ -22,7 +22,7 @@ pub use crate::__incan_std::hash::sha1 as hash_sha1; pub use crate::__incan_std::io::BytesIO; pub use crate::__incan_std::io::Endian; pub use crate::__incan_std::io::IoError; -pub use crate::__incan_std::io::_BytesIO; +use crate::__incan_std::io::_BytesIO; pub use crate::__incan_std::traits::error::Error; pub const NIL: UUID = UUID(0); pub const MAX: UUID = UUID(340282366920938463463374607431768211455); diff --git a/workspaces/docs-site/docs/RFCs/061_std_compression.md b/workspaces/docs-site/docs/RFCs/061_std_compression.md index 6710413ae..037dc6746 100644 --- a/workspaces/docs-site/docs/RFCs/061_std_compression.md +++ b/workspaces/docs-site/docs/RFCs/061_std_compression.md @@ -1,6 +1,6 @@ # RFC 061: `std.compression` — codec-based compression and decompression -- **Status:** Planned +- **Status:** In Progress - **Created:** 2026-04-14 - **Author(s):** Danny Meijer (@dannymeijer) - **Related:** @@ -297,6 +297,98 @@ This feature is additive. Existing Rust interop or third-party compression code - **Execution handoff**: implementations must preserve codec behavior, stream incrementally, and avoid backend API leakage. - **Docs / examples**: must standardize bytes, stream, compression-level, error, Snappy raw, and autodetection usage patterns. +## Implementation Plan + +### Phase 1: RFC lifecycle, registry, and source-defined surface + +- Move the RFC to `In Progress` and use this plan/checklist as the implementation source of truth. +- Register `std.compression` and its codec submodules in the stdlib namespace registry. +- Add source-defined Incan stdlib modules for `Codec`, `CompressionError`, per-codec APIs, one-shot autodetection helpers, and `snappy.raw`. +- Preserve the dogfooding constraint: implement the stdlib surface in `.incn` over normal Rust crate imports where needed, without new `@rust.extern` function or type implementation surfaces. + +### Phase 2: Codec behavior and dependency handoff + +- Add stdlib-managed crate dependencies for the codec backends needed by generated projects. +- Implement one-shot compression/decompression for `gzip`, `zlib`, `deflate`, `zstd`, `bz2`, `lzma`, framed `snappy`, and raw `snappy`. +- Implement stream helpers by chunking through `std.fs.File` and `std.io.BytesIO` read/write methods. +- Normalize codec, option, chunk-size, and I/O failures into `CompressionError`. + +### Phase 3: Autodetection, validation, and docs + +- Implement top-level decompression autodetection with explicit `allowed` filtering and signature/framing checks. +- Add typechecker/registry tests and generated-code snapshots for the new stdlib modules. +- Add integration tests for round trips, invalid options, chunk-size errors, and autodetection behavior. +- Update authored stdlib docs and release notes for the new user-visible module. +- Bump the active dev version by one dev increment once the implementation lands. + +## Progress Checklist + +Implementation note: the dogfooded `.incn` pass avoids new `@rust.extern` surfaces and builds as a generated Rust +project for one-shot compression, decompression, and explicit byte autodetection. Issue +[#548](https://github.com/dannys-code-corner/incan/issues/548) was resolved in this implementation loop by preserving +public stdlib dependency APIs in generated projects and fixing the Rust boundary cases surfaced by the RFC 061 fixture. +Streaming APIs are intentionally not exposed until they can satisfy the `std.fs.File | std.io.BytesIO` incremental +contract instead of pretending that whole-buffer wrappers are stream support. Authored docs, release notes, versioning, +and broader integration coverage remain open. + +### Spec / lifecycle + +- [x] RFC moved to `Planned` with settled design decisions and issue #339 relabeled as `feature`. +- [x] RFC moved to `In Progress` with implementation plan and progress checklist. +- [x] GitHub issue #339 has the In Progress traceability comment. + +### Stdlib registry / dependency handoff + +- [x] Register `std.compression` and codec submodules in `STDLIB_NAMESPACES`. +- [x] Add registry tests for known module paths and stub path resolution. +- [x] Add stdlib-managed crate dependencies for codec backends. +- [x] Verify generated projects receive the codec dependencies through the stdlib registry. + +### Source-defined stdlib surface + +- [x] Add `crates/incan_stdlib/stdlib/compression/prelude.incn`. +- [x] Add internal `_core` and `_auto` source modules for shared vocabulary and autodetection implementation. +- [x] Add `Codec` and `CompressionError` as Incan-authored public types. +- [x] Add per-codec submodules for `gzip`, `zlib`, `deflate`, `zstd`, `bz2`, `lzma`, and `snappy`. +- [x] Add `snappy.raw` advanced interop surface. +- [x] Avoid new `@rust.extern` function or type implementation surfaces. + +### Codec behavior + +- [x] Implement one-shot `compress` and `decompress` for every required codec at the source/typecheck layer. +- [x] Implement one-shot `compress` and `decompress` for every required codec in a Rust-buildable generated project. +- [ ] Implement stream `compress_stream` and `decompress_stream` for every required codec. +- [ ] Reject non-positive `chunk_size` values at the source/typecheck layer. +- [x] Reject unsupported compression levels through stable error categories at the source/typecheck layer. +- [x] Normalize invalid data, truncated input, I/O, and backend failures into `CompressionError` at the source/typecheck layer. + +### Autodetection + +- [x] Implement `decompress_auto` at the source/typecheck layer. +- [ ] Implement `decompress_auto_stream` at the source/typecheck layer. +- [x] Enforce the `allowed` codec filter exactly at the source/typecheck layer. +- [x] Reject empty `allowed` lists at the source/typecheck layer. +- [x] Exclude raw Snappy from autodetection. +- [x] Avoid extension/path/MIME guessing. + +### Tests + +- [x] Add typechecker tests for imports and public symbols. +- [x] Add codegen snapshot coverage for root and codec stdlib modules. +- [x] Add integration tests for bytes round trips. +- [ ] Add integration tests for stream round trips. +- [ ] Add integration tests for option/chunk-size errors. +- [ ] Add integration tests for autodetection. + +### Docs / release / version + +- [ ] Add authored stdlib reference docs for `std.compression`. +- [ ] Add release notes entry for RFC 061 / issue #339. +- [ ] Bump the workspace dev version when the implementation lands. +- [ ] Regenerate RFC snippets/index if lifecycle metadata changes require it. +- [ ] Run docs build. +- [ ] Run repository verification gate. + ## Design Decisions - `std.compression` is a dedicated codec module and is not folded into `std.io` or `std.fs`. From 8bf10f4a40f9a3f3462864f9e580081d7ab37993 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 10:40:46 +0200 Subject: [PATCH 2/8] feature - complete std.compression RFC (#339, #548) --- Cargo.lock | 18 +- Cargo.toml | 2 +- .../stdlib/compression/_auto.incn | 286 ++++++++++++++++++ .../stdlib/compression/_core.incn | 44 ++- .../incan_stdlib/stdlib/compression/bz2.incn | 135 ++++++++- .../stdlib/compression/deflate.incn | 135 ++++++++- .../incan_stdlib/stdlib/compression/gzip.incn | 135 ++++++++- .../incan_stdlib/stdlib/compression/lzma.incn | 135 ++++++++- .../stdlib/compression/prelude.incn | 2 +- .../stdlib/compression/snappy.incn | 133 +++++++- .../incan_stdlib/stdlib/compression/zlib.incn | 135 ++++++++- .../incan_stdlib/stdlib/compression/zstd.incn | 150 ++++++++- crates/incan_stdlib/stdlib/io.incn | 2 +- .../valid/std_compression_surface.incn | 147 ++++++++- tests/integration_tests.rs | 4 + .../implemented}/061_std_compression.md | 40 ++- .../language/reference/stdlib/compression.md | 109 +++++++ .../docs/language/reference/stdlib/index.md | 1 + .../docs-site/docs/release_notes/0_3.md | 30 +- workspaces/docs-site/mkdocs.yml | 1 + 20 files changed, 1591 insertions(+), 53 deletions(-) rename workspaces/docs-site/docs/RFCs/{ => closed/implemented}/061_std_compression.md (93%) create mode 100644 workspaces/docs-site/docs/language/reference/stdlib/compression.md diff --git a/Cargo.lock b/Cargo.lock index fa433e5b7..b3d6af21c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1457,7 +1457,7 @@ dependencies = [ [[package]] name = "incan" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "blake2", "blake3", @@ -1501,14 +1501,14 @@ dependencies = [ [[package]] name = "incan_core" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "serde", ] [[package]] name = "incan_derive" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "proc-macro2", "quote", @@ -1517,14 +1517,14 @@ dependencies = [ [[package]] name = "incan_semantics_core" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "incan_core", ] [[package]] name = "incan_semantics_stdlib" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "incan_core", "incan_semantics_core", @@ -1532,7 +1532,7 @@ dependencies = [ [[package]] name = "incan_stdlib" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "axum", "incan_core", @@ -1545,7 +1545,7 @@ dependencies = [ [[package]] name = "incan_syntax" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "incan_core", "incan_semantics_core", @@ -1566,7 +1566,7 @@ dependencies = [ [[package]] name = "incan_web_macros" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "proc-macro2", "quote", @@ -3090,7 +3090,7 @@ dependencies = [ [[package]] name = "rust_inspect" -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" dependencies = [ "hex", "incan_core", diff --git a/Cargo.toml b/Cargo.toml index 89e1671d6..b144fe18d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ resolver = "2" ra_ap_proc_macro_api = { path = "crates/third_party/ra_ap_proc_macro_api" } [workspace.package] -version = "0.3.0-dev.46" +version = "0.3.0-dev.47" description = "The Incan programming language compiler" edition = "2024" rust-version = "1.92" diff --git a/crates/incan_stdlib/stdlib/compression/_auto.incn b/crates/incan_stdlib/stdlib/compression/_auto.incn index 3684dd8d1..5daf57c92 100644 --- a/crates/incan_stdlib/stdlib/compression/_auto.incn +++ b/crates/incan_stdlib/stdlib/compression/_auto.incn @@ -3,12 +3,24 @@ Autodetection helpers for `std.compression`. Autodetection is deliberately decompression-only and signature-driven. Raw deflate and raw Snappy are not guessed because they lack reliable framing signatures. + +Stream autodetection keeps concrete `BytesIO` and `File` branches instead of a generic Rust `Read` helper. Current +generic lowering adds bounds that Rust codec adapters do not satisfy, so the source keeps the duplication visible until +the generic boundary can express owned reader adapters directly. """ +from rust::std::io import Cursor, Read +from rust::bzip2::read @ "0.6" import BzDecoder +from rust::flate2::read @ "1" import GzDecoder, ZlibDecoder +from rust::snap::read @ "1" import FrameDecoder +from rust::xz2::read @ "0.1" import XzDecoder +from rust::zstd::stream::read @ "0.13" import Decoder as ZstdReadDecoder from std.compression._core import ( Codec, CompressionError, _allowed, + _codec_error, + _io_error, _looks_like_bz2, _looks_like_gzip, _looks_like_snappy_frame, @@ -16,6 +28,8 @@ from std.compression._core import ( _looks_like_zlib, _looks_like_zstd, _validate_allowed, + _validate_chunk_size, + _write_sink_bytes, ) from std.compression.gzip import decompress as _gzip_decompress from std.compression.zlib import decompress as _zlib_decompress @@ -23,6 +37,8 @@ from std.compression.zstd import decompress as _zstd_decompress from std.compression.bz2 import decompress as _bz2_decompress from std.compression.lzma import decompress as _lzma_decompress from std.compression.snappy import decompress as _snappy_decompress +from std.fs import File +from std.io import _BytesIO pub def decompress_auto(data: bytes, allowed: list[Codec] = Codec.all()) -> Result[tuple[Codec, bytes], CompressionError]: @@ -58,3 +74,273 @@ pub def decompress_auto(data: bytes, allowed: list[Codec] = Codec.all()) -> Resu detail="no allowed compression codec matched the input signature", ), ) + + +pub def decompress_auto_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + allowed: list[Codec] = Codec.all(), + chunk_size: int = 65536, +) -> Result[Codec, CompressionError]: + """ + Decompress a binary stream by explicit signature-based autodetection. + + Only the fixed signature prefix is buffered before dispatch. The buffered prefix is chained back onto the remaining + source stream so decompression still proceeds incrementally. + + Args: + source: `BytesIO` or `File` positioned at compressed input bytes. + target: `BytesIO` or `File` that receives plain bytes. + allowed: Candidate codecs to consider; codecs outside the list are not attempted. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(Codec)` with the detected codec after writing to `target`, or `Err(CompressionError)`. + """ + _validate_chunk_size(None, "decompress_auto_stream", chunk_size)? + _validate_allowed(allowed, "decompress_auto_stream")? + prefix = _read_prefix(source)? + if _allowed(allowed.clone(), Codec.Gzip) and _looks_like_gzip(prefix): + _decompress_gzip_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Gzip) + elif _allowed(allowed.clone(), Codec.Zstd) and _looks_like_zstd(prefix): + _decompress_zstd_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Zstd) + elif _allowed(allowed.clone(), Codec.Bz2) and _looks_like_bz2(prefix): + _decompress_bz2_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Bz2) + elif _allowed(allowed.clone(), Codec.Lzma) and _looks_like_xz(prefix): + _decompress_lzma_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Lzma) + elif _allowed(allowed.clone(), Codec.Snappy) and _looks_like_snappy_frame(prefix): + _decompress_snappy_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Snappy) + elif _allowed(allowed, Codec.Zlib) and _looks_like_zlib(prefix): + _decompress_zlib_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Zlib) + return Err( + CompressionError( + kind="unsupported_codec", + codec=None, + operation="decompress_auto_stream", + detail="no allowed compression codec matched the input signature", + ), + ) + + +def _read_prefix(source: Union[_BytesIO, File]) -> Result[bytes, CompressionError]: + """ + Read the maximum signature prefix used by autodetection. + """ + if isinstance(source, _BytesIO): + match source.read_bytes(10): + Ok(prefix) => return Ok(prefix) + Err(err) => return Err(_io_error(None, "decompress_auto_stream", err.message())) + elif isinstance(source, File): + match source.read_bytes(10): + Ok(prefix) => return Ok(prefix) + Err(err) => return Err(_io_error(None, "decompress_auto_stream", err.message())) + + +def _decompress_gzip_stream( + prefix: bytes, + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress an autodetected gzip stream. + """ + if isinstance(source, _BytesIO): + mut handle = source.handle.borrow_mut() + mut reader = GzDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Gzip), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Gzip, "decompress_auto_stream", err.to_string())) + elif isinstance(source, File): + mut handle = source.handle.borrow_mut() + mut reader = GzDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Gzip), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Gzip, "decompress_auto_stream", err.to_string())) + + +def _decompress_zlib_stream( + prefix: bytes, + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress an autodetected zlib stream. + """ + if isinstance(source, _BytesIO): + mut handle = source.handle.borrow_mut() + mut reader = ZlibDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zlib), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zlib, "decompress_auto_stream", err.to_string())) + elif isinstance(source, File): + mut handle = source.handle.borrow_mut() + mut reader = ZlibDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zlib), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zlib, "decompress_auto_stream", err.to_string())) + + +def _decompress_zstd_stream( + prefix: bytes, + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress an autodetected zstd stream. + """ + if isinstance(source, _BytesIO): + mut handle = source.handle.borrow_mut() + match ZstdReadDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))): + Ok(reader) => + mut adapter = reader + while true: + mut chunk: bytes = b"" + match adapter.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zstd), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_auto_stream", err.to_string())) + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_auto_stream", err.to_string())) + elif isinstance(source, File): + mut handle = source.handle.borrow_mut() + match ZstdReadDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))): + Ok(reader) => + mut adapter = reader + while true: + mut chunk: bytes = b"" + match adapter.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zstd), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_auto_stream", err.to_string())) + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_auto_stream", err.to_string())) + + +def _decompress_bz2_stream( + prefix: bytes, + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress an autodetected bzip2 stream. + """ + if isinstance(source, _BytesIO): + mut handle = source.handle.borrow_mut() + mut reader = BzDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Bz2), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Bz2, "decompress_auto_stream", err.to_string())) + elif isinstance(source, File): + mut handle = source.handle.borrow_mut() + mut reader = BzDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Bz2), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Bz2, "decompress_auto_stream", err.to_string())) + + +def _decompress_lzma_stream( + prefix: bytes, + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress an autodetected XZ/LZMA-family stream. + """ + if isinstance(source, _BytesIO): + mut handle = source.handle.borrow_mut() + mut reader = XzDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Lzma), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Lzma, "decompress_auto_stream", err.to_string())) + elif isinstance(source, File): + mut handle = source.handle.borrow_mut() + mut reader = XzDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Lzma), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Lzma, "decompress_auto_stream", err.to_string())) + + +def _decompress_snappy_stream( + prefix: bytes, + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress an autodetected framed Snappy stream. + """ + if isinstance(source, _BytesIO): + mut handle = source.handle.borrow_mut() + mut reader = FrameDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Snappy), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Snappy, "decompress_auto_stream", err.to_string())) + elif isinstance(source, File): + mut handle = source.handle.borrow_mut() + mut reader = FrameDecoder.new(Cursor.new(prefix).chain(Read.by_ref(handle))) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Snappy), "decompress_auto_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Snappy, "decompress_auto_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/_core.incn b/crates/incan_stdlib/stdlib/compression/_core.incn index 06c5edee6..b71d3e215 100644 --- a/crates/incan_stdlib/stdlib/compression/_core.incn +++ b/crates/incan_stdlib/stdlib/compression/_core.incn @@ -5,6 +5,8 @@ Public callers should import `Codec`, `CompressionError`, and codec modules from helpers in this module are implementation details shared by sibling codec modules and the autodetection module. """ +from std.fs import File +from std.io import _BytesIO from std.traits.error import Error @@ -118,7 +120,45 @@ pub def _io_error(codec: Option[Codec], operation: str, detail: str) -> Compress return CompressionError(kind="io", codec=codec, operation=operation, detail=detail) -pub def _validate_allowed(allowed: list[Codec]) -> Result[None, CompressionError]: +pub def _validate_chunk_size(codec: Option[Codec], operation: str, chunk_size: int) -> Result[None, CompressionError]: + """ + Reject a non-positive stream chunk size before touching source or target streams. + """ + if chunk_size <= 0: + return Err( + CompressionError( + kind="invalid_chunk_size", + codec=codec, + operation=operation, + detail="chunk_size must be positive", + ), + ) + return Ok(None) + + +pub def _write_sink_bytes( + codec: Option[Codec], + operation: str, + target: Union[_BytesIO, File], + data: bytes, +) -> Result[None, CompressionError]: + """ + Write one compressed or decompressed chunk to a `BytesIO` or `File` sink. + + Stream helpers keep codec adapters in their codec modules, but use this shared sink boundary so `IoError` + normalization stays identical across modules. + """ + if isinstance(target, _BytesIO): + match target.write_bytes(data): + Ok(_) => return Ok(None) + Err(err) => return Err(_io_error(codec, operation, err.message())) + elif isinstance(target, File): + match target.write_bytes(data): + Ok(_) => return Ok(None) + Err(err) => return Err(_io_error(codec, operation, err.message())) + + +pub def _validate_allowed(allowed: list[Codec], operation: str = "decompress_auto") -> Result[None, CompressionError]: """ Reject an empty autodetection candidate set. """ @@ -127,7 +167,7 @@ pub def _validate_allowed(allowed: list[Codec]) -> Result[None, CompressionError CompressionError( kind="unsupported_codec", codec=None, - operation="decompress_auto", + operation=operation, detail="allowed codec list must not be empty", ), ) diff --git a/crates/incan_stdlib/stdlib/compression/bz2.incn b/crates/incan_stdlib/stdlib/compression/bz2.incn index 8c8f0802d..70e3c41fb 100644 --- a/crates/incan_stdlib/stdlib/compression/bz2.incn +++ b/crates/incan_stdlib/stdlib/compression/bz2.incn @@ -7,7 +7,9 @@ This module owns the byte-oriented bzip2 surface and translates portable levels from rust::std::io import Cursor, Read from rust::bzip2 @ "0.6" import Compression as BzCompression from rust::bzip2::read @ "0.6" import BzDecoder, BzEncoder -from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -45,6 +47,55 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: Err(err) => return Err(_codec_error(Codec.Bz2, "decompress", err.to_string())) +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as bzip2 bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives bzip2 bytes. + level: Optional portable compression level from 0 through 9. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the bzip2 payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Bz2), "compress_stream", chunk_size)? + compression = _bz_level(level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, compression, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, compression, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress a bzip2 binary stream. + + Args: + source: `BytesIO` or `File` positioned at bzip2 bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Bz2), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + def _bz_level(level: Option[int]) -> Result[BzCompression, CompressionError]: """ Convert a portable bzip2 level into the backend compression type. @@ -54,3 +105,85 @@ def _bz_level(level: Option[int]) -> Result[BzCompression, CompressionError]: match maybe: Some(value) => return Ok(BzCompression.new(value)) None => return Err(_codec_error(Codec.Bz2, "compress", "level does not fit u32")) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + compression: BzCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = BzEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Bz2), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Bz2, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + compression: BzCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = BzEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Bz2), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Bz2, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress bzip2 bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = BzDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Bz2), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Bz2, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress bzip2 bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = BzDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Bz2), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Bz2, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/deflate.incn b/crates/incan_stdlib/stdlib/compression/deflate.incn index 681c8c513..3701c7904 100644 --- a/crates/incan_stdlib/stdlib/compression/deflate.incn +++ b/crates/incan_stdlib/stdlib/compression/deflate.incn @@ -8,7 +8,9 @@ from autodetection. from rust::std::io import Cursor, Read from rust::flate2 @ "1" import Compression as FlateCompression from rust::flate2::read @ "1" import DeflateDecoder, DeflateEncoder -from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -46,6 +48,55 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: Err(err) => return Err(_codec_error(Codec.Deflate, "decompress", err.to_string())) +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as raw deflate bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives raw deflate bytes. + level: Optional portable compression level from 0 through 9. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the raw deflate payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Deflate), "compress_stream", chunk_size)? + compression = _flate_level(level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, compression, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, compression, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress a raw deflate binary stream. + + Args: + source: `BytesIO` or `File` positioned at raw deflate bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Deflate), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionError]: """ Convert a portable deflate level into the `flate2` compression type. @@ -55,3 +106,85 @@ def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionErro match maybe: Some(value) => return Ok(FlateCompression.new(value)) None => return Err(_codec_error(Codec.Deflate, "compress", "level does not fit u32")) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + compression: FlateCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = DeflateEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Deflate), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Deflate, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + compression: FlateCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = DeflateEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Deflate), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Deflate, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress raw deflate bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = DeflateDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Deflate), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Deflate, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress raw deflate bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = DeflateDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Deflate), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Deflate, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/gzip.incn b/crates/incan_stdlib/stdlib/compression/gzip.incn index 423df742c..15de2028c 100644 --- a/crates/incan_stdlib/stdlib/compression/gzip.incn +++ b/crates/incan_stdlib/stdlib/compression/gzip.incn @@ -8,7 +8,9 @@ Rust `flate2` reader adapters as the codec boundary. from rust::std::io import Cursor, Read from rust::flate2 @ "1" import Compression as FlateCompression from rust::flate2::read @ "1" import GzDecoder, GzEncoder -from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -46,6 +48,55 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: Err(err) => return Err(_codec_error(Codec.Gzip, "decompress", err.to_string())) +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as gzip bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives gzip bytes. + level: Optional portable compression level from 0 through 9. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the gzip payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Gzip), "compress_stream", chunk_size)? + compression = _flate_level(level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, compression, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, compression, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress a gzip binary stream. + + Args: + source: `BytesIO` or `File` positioned at gzip bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Gzip), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionError]: """ Convert a portable gzip level into the `flate2` compression type. @@ -55,3 +106,85 @@ def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionErro match maybe: Some(value) => return Ok(FlateCompression.new(value)) None => return Err(_codec_error(Codec.Gzip, "compress", "level does not fit u32")) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + compression: FlateCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = GzEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Gzip), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Gzip, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + compression: FlateCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = GzEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Gzip), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Gzip, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress gzip bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = GzDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Gzip), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Gzip, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress gzip bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = GzDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Gzip), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Gzip, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/lzma.incn b/crates/incan_stdlib/stdlib/compression/lzma.incn index b4b1f32b4..565a390b0 100644 --- a/crates/incan_stdlib/stdlib/compression/lzma.incn +++ b/crates/incan_stdlib/stdlib/compression/lzma.incn @@ -6,7 +6,9 @@ The public `std.compression.lzma` name exposes XZ-framed LZMA-family data throug from rust::std::io import Cursor, Read from rust::xz2::read @ "0.1" import XzDecoder, XzEncoder -from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -44,6 +46,55 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: Err(err) => return Err(_codec_error(Codec.Lzma, "decompress", err.to_string())) +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as XZ/LZMA-family bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives XZ/LZMA-family bytes. + level: Optional portable compression level from 0 through 9. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the XZ/LZMA-family payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Lzma), "compress_stream", chunk_size)? + compression = _xz_level(level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, compression, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, compression, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress an XZ/LZMA-family binary stream. + + Args: + source: `BytesIO` or `File` positioned at XZ/LZMA-family bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Lzma), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + def _xz_level(level: Option[int]) -> Result[u32, CompressionError]: """ Convert a portable LZMA level into the `xz2` numeric level. @@ -53,3 +104,85 @@ def _xz_level(level: Option[int]) -> Result[u32, CompressionError]: match maybe: Some(value) => return Ok(value) None => return Err(_codec_error(Codec.Lzma, "compress", "level does not fit u32")) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + compression: u32, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = XzEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Lzma), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Lzma, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + compression: u32, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = XzEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Lzma), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Lzma, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress XZ/LZMA bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = XzDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Lzma), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Lzma, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress XZ/LZMA bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = XzDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Lzma), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Lzma, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/prelude.incn b/crates/incan_stdlib/stdlib/compression/prelude.incn index ed81bc8a8..82d806cc9 100644 --- a/crates/incan_stdlib/stdlib/compression/prelude.incn +++ b/crates/incan_stdlib/stdlib/compression/prelude.incn @@ -6,4 +6,4 @@ decompression when the caller deliberately opts in. """ from std.compression._core import Codec, CompressionError -from std.compression._auto import decompress_auto +from std.compression._auto import decompress_auto, decompress_auto_stream diff --git a/crates/incan_stdlib/stdlib/compression/snappy.incn b/crates/incan_stdlib/stdlib/compression/snappy.incn index 148255237..a34b52f71 100644 --- a/crates/incan_stdlib/stdlib/compression/snappy.incn +++ b/crates/incan_stdlib/stdlib/compression/snappy.incn @@ -7,7 +7,9 @@ autodetection. Raw block helpers live under `std.compression.snappy.raw`. from rust::std::io import Cursor, Read from rust::snap::read @ "1" import FrameDecoder, FrameEncoder -from std.compression._core import Codec, CompressionError, _codec_error, _reject_level +from std.compression._core import Codec, CompressionError, _codec_error, _reject_level, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -44,3 +46,132 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: match reader.read_to_end(out): Ok(_) => return Ok(out) Err(err) => return Err(_codec_error(Codec.Snappy, "decompress", err.to_string())) + + +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as framed Snappy bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives framed Snappy bytes. + level: Must be `None`; Snappy does not expose a portable compression level. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the framed Snappy payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Snappy), "compress_stream", chunk_size)? + _reject_level(Codec.Snappy, level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress a framed Snappy binary stream. + + Args: + source: `BytesIO` or `File` positioned at framed Snappy bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Snappy), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = FrameEncoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Snappy), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Snappy, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = FrameEncoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Snappy), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Snappy, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress framed Snappy bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = FrameDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Snappy), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Snappy, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress framed Snappy bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = FrameDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Snappy), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Snappy, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/zlib.incn b/crates/incan_stdlib/stdlib/compression/zlib.incn index 9be3ed42f..9940b91be 100644 --- a/crates/incan_stdlib/stdlib/compression/zlib.incn +++ b/crates/incan_stdlib/stdlib/compression/zlib.incn @@ -8,7 +8,9 @@ backend errors into `CompressionError`. from rust::std::io import Cursor, Read from rust::flate2 @ "1" import Compression as FlateCompression from rust::flate2::read @ "1" import ZlibDecoder, ZlibEncoder -from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -46,6 +48,55 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: Err(err) => return Err(_codec_error(Codec.Zlib, "decompress", err.to_string())) +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as zlib-wrapped deflate bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives zlib bytes. + level: Optional portable compression level from 0 through 9. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the zlib payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Zlib), "compress_stream", chunk_size)? + compression = _flate_level(level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, compression, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, compression, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress a zlib-wrapped binary stream. + + Args: + source: `BytesIO` or `File` positioned at zlib bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Zlib), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionError]: """ Convert a portable zlib level into the `flate2` compression type. @@ -55,3 +106,85 @@ def _flate_level(level: Option[int]) -> Result[FlateCompression, CompressionErro match maybe: Some(value) => return Ok(FlateCompression.new(value)) None => return Err(_codec_error(Codec.Zlib, "compress", "level does not fit u32")) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + compression: FlateCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = ZlibEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zlib), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zlib, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + compression: FlateCompression, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = ZlibEncoder.new(Read.by_ref(handle), compression) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zlib), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zlib, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress zlib bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = ZlibDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zlib), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zlib, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress zlib bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + mut reader = ZlibDecoder.new(Read.by_ref(handle)) + while true: + mut chunk: bytes = b"" + match reader.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zlib), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zlib, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/compression/zstd.incn b/crates/incan_stdlib/stdlib/compression/zstd.incn index f016b22bb..44bdcc46a 100644 --- a/crates/incan_stdlib/stdlib/compression/zstd.incn +++ b/crates/incan_stdlib/stdlib/compression/zstd.incn @@ -5,9 +5,12 @@ This module exposes zstd frames through one-shot byte helpers and keeps backend- `CompressionError`. """ -from rust::std::io import Cursor +from rust::std::io import Cursor, Read from rust::zstd::stream @ "0.13" import decode_all, encode_all -from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default +from rust::zstd::stream::read @ "0.13" import Decoder as ZstdReadDecoder, Encoder as ZstdReadEncoder +from std.compression._core import Codec, CompressionError, _codec_error, _level_or_default, _validate_chunk_size, _write_sink_bytes +from std.fs import File +from std.io import _BytesIO pub def compress(data: bytes, level: Option[int] = None) -> Result[bytes, CompressionError]: @@ -41,6 +44,55 @@ pub def decompress(data: bytes) -> Result[bytes, CompressionError]: Err(err) => return Err(_codec_error(Codec.Zstd, "decompress", err)) +pub def compress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + level: Option[int] = None, + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Compress a binary stream as zstd frame bytes. + + Args: + source: `BytesIO` or `File` positioned at plain input bytes. + target: `BytesIO` or `File` that receives zstd frame bytes. + level: Optional portable zstd level from -7 through 22. + chunk_size: Positive number of compressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the zstd payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Zstd), "compress_stream", chunk_size)? + compression = _zstd_level(level)? + if isinstance(source, _BytesIO): + return _compress_bytesio_stream(source, target, compression, chunk_size) + elif isinstance(source, File): + return _compress_file_stream(source, target, compression, chunk_size) + + +pub def decompress_stream( + source: Union[_BytesIO, File], + target: Union[_BytesIO, File], + chunk_size: int = 65536, +) -> Result[None, CompressionError]: + """ + Decompress a zstd binary stream. + + Args: + source: `BytesIO` or `File` positioned at zstd bytes. + target: `BytesIO` or `File` that receives plain bytes. + chunk_size: Positive number of decompressed bytes to move per step. + + Returns: + `Ok(None)` after streaming the payload, or `Err(CompressionError)`. + """ + _validate_chunk_size(Some(Codec.Zstd), "decompress_stream", chunk_size)? + if isinstance(source, _BytesIO): + return _decompress_bytesio_stream(source, target, chunk_size) + elif isinstance(source, File): + return _decompress_file_stream(source, target, chunk_size) + + def _zstd_level(level: Option[int]) -> Result[i32, CompressionError]: """ Convert a portable zstd level into the backend's `i32` level. @@ -50,3 +102,97 @@ def _zstd_level(level: Option[int]) -> Result[i32, CompressionError]: match maybe: Some(value) => return Ok(value) None => return Err(_codec_error(Codec.Zstd, "compress", "level does not fit i32")) + + +def _compress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + compression: i32, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + match ZstdReadEncoder.new(Read.by_ref(handle), compression): + Ok(reader) => + mut adapter = reader + while true: + mut chunk: bytes = b"" + match adapter.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zstd), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zstd, "compress_stream", err.to_string())) + Err(err) => return Err(_codec_error(Codec.Zstd, "compress_stream", err.to_string())) + + +def _compress_file_stream( + source: File, + target: Union[_BytesIO, File], + compression: i32, + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Compress bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + match ZstdReadEncoder.new(Read.by_ref(handle), compression): + Ok(reader) => + mut adapter = reader + while true: + mut chunk: bytes = b"" + match adapter.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zstd), "compress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zstd, "compress_stream", err.to_string())) + Err(err) => return Err(_codec_error(Codec.Zstd, "compress_stream", err.to_string())) + + +def _decompress_bytesio_stream( + source: _BytesIO, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress zstd bytes from an in-memory stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + match ZstdReadDecoder.new(Read.by_ref(handle)): + Ok(reader) => + mut adapter = reader + while true: + mut chunk: bytes = b"" + match adapter.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zstd), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_stream", err.to_string())) + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_stream", err.to_string())) + + +def _decompress_file_stream( + source: File, + target: Union[_BytesIO, File], + chunk_size: int, +) -> Result[None, CompressionError]: + """ + Decompress zstd bytes from a file stream into a binary sink. + """ + mut handle = source.handle.borrow_mut() + match ZstdReadDecoder.new(Read.by_ref(handle)): + Ok(reader) => + mut adapter = reader + while true: + mut chunk: bytes = b"" + match adapter.by_ref().take(chunk_size).read_to_end(chunk): + Ok(_) => + if len(chunk) == 0: + return Ok(None) + _write_sink_bytes(Some(Codec.Zstd), "decompress_stream", target, chunk)? + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_stream", err.to_string())) + Err(err) => return Err(_codec_error(Codec.Zstd, "decompress_stream", err.to_string())) diff --git a/crates/incan_stdlib/stdlib/io.incn b/crates/incan_stdlib/stdlib/io.incn index efb9fdeff..8648d5cf1 100644 --- a/crates/incan_stdlib/stdlib/io.incn +++ b/crates/incan_stdlib/stdlib/io.incn @@ -117,7 +117,7 @@ pub class _BytesIO with BinaryReader, BinaryRead[u8], BinaryRead[i8], BinaryRead ``` """ - handle: Rc[RefCell[Cursor[bytes]]] + pub handle: Rc[RefCell[Cursor[bytes]]] def read(self, size: int = -1) -> Result[bytes, IoError]: """ diff --git a/tests/fixtures/valid/std_compression_surface.incn b/tests/fixtures/valid/std_compression_surface.incn index 059314a77..b0d2c391c 100644 --- a/tests/fixtures/valid/std_compression_surface.incn +++ b/tests/fixtures/valid/std_compression_surface.incn @@ -1,5 +1,8 @@ -from std.compression import Codec, CompressionError, bz2, deflate, decompress_auto, gzip, lzma, snappy, zlib, zstd +from std.compression import Codec, CompressionError, bz2, deflate, decompress_auto, decompress_auto_stream, gzip, lzma, snappy, zlib, zstd from std.compression.snappy.raw import compress as raw_snappy_compress, decompress as raw_snappy_decompress +from std.fs import Path +from std.io import BytesIO, IoError +from std.tempfile import TemporaryDirectory def assert_round_trip(name: str, plain: bytes, decoded: bytes) -> None: @@ -7,23 +10,149 @@ def assert_round_trip(name: str, plain: bytes, decoded: bytes) -> None: println(f"{name} round trip ok") +def assert_auto(compressed: bytes, expected: Codec, plain: bytes) -> Result[None, CompressionError]: + codec, decoded = decompress_auto(compressed, [expected])? + assert codec == expected + assert decoded == plain + + target = BytesIO() + stream_codec = decompress_auto_stream(BytesIO(compressed), target, [expected], 2)? + assert stream_codec == expected + assert target.getvalue() == plain + return Ok(None) + + +def must_compression_none(result: Result[None, CompressionError]) -> None: + match result: + Ok(_) => return + Err(_) => assert false + + +def exercise_file_stream(data: bytes) -> Result[None, IoError]: + scratch = TemporaryDirectory.try_new()? + root: Path = scratch.path() + source_path = root.joinpath("plain.bin") + compressed_path = root.joinpath("plain.bin.gz") + output_path = root.joinpath("plain.out") + + source_path.write_bytes(data)? + source = source_path.open("rb")? + compressed = compressed_path.open("wb")? + must_compression_none(gzip.compress_stream(source, compressed, None, 4)) + compressed.flush()? + + compressed_source = compressed_path.open("rb")? + output = output_path.open("wb")? + must_compression_none(gzip.decompress_stream(compressed_source, output, 3)) + output.flush()? + + assert output_path.read_bytes()? == data + println("file stream round trip ok") + return Ok(None) + + def exercise_compression() -> Result[None, CompressionError]: data = b"Incan compression dogfood" gzip_data = gzip.compress(data, None)? assert_round_trip("gzip", data, gzip.decompress(gzip_data)?) - codec, decoded = decompress_auto(gzip_data, [Codec.Gzip])? - assert codec == Codec.Gzip - assert decoded == data + assert_auto(gzip_data, Codec.Gzip, data)? + + zlib_data = zlib.compress(data, None)? + zstd_data = zstd.compress(data, None)? + bz2_data = bz2.compress(data, None)? + lzma_data = lzma.compress(data, None)? + snappy_data = snappy.compress(data, None)? - assert_round_trip("zlib", data, zlib.decompress(zlib.compress(data, None)?)?) + assert_round_trip("zlib", data, zlib.decompress(zlib_data)?) assert_round_trip("deflate", data, deflate.decompress(deflate.compress(data, None)?)?) - assert_round_trip("zstd", data, zstd.decompress(zstd.compress(data, None)?)?) - assert_round_trip("bz2", data, bz2.decompress(bz2.compress(data, None)?)?) - assert_round_trip("lzma", data, lzma.decompress(lzma.compress(data, None)?)?) - assert_round_trip("snappy", data, snappy.decompress(snappy.compress(data, None)?)?) + assert_round_trip("zstd", data, zstd.decompress(zstd_data)?) + assert_round_trip("bz2", data, bz2.decompress(bz2_data)?) + assert_round_trip("lzma", data, lzma.decompress(lzma_data)?) + assert_round_trip("snappy", data, snappy.decompress(snappy_data)?) assert_round_trip("snappy.raw", data, raw_snappy_decompress(raw_snappy_compress(data, None)?)?) + assert_auto(zlib_data, Codec.Zlib, data)? + assert_auto(zstd_data, Codec.Zstd, data)? + assert_auto(bz2_data, Codec.Bz2, data)? + assert_auto(lzma_data, Codec.Lzma, data)? + assert_auto(snappy_data, Codec.Snappy, data)? + println("autodetection ok") + + gzip_stream = BytesIO() + gzip.compress_stream(BytesIO(data), gzip_stream, None, 3)? + gzip_plain = BytesIO() + gzip.decompress_stream(BytesIO(gzip_stream.getvalue()), gzip_plain, 2)? + assert gzip_plain.getvalue() == data + + zlib_stream = BytesIO() + zlib.compress_stream(BytesIO(data), zlib_stream, None, 3)? + zlib_plain = BytesIO() + zlib.decompress_stream(BytesIO(zlib_stream.getvalue()), zlib_plain, 2)? + assert zlib_plain.getvalue() == data + + deflate_stream = BytesIO() + deflate.compress_stream(BytesIO(data), deflate_stream, None, 3)? + deflate_plain = BytesIO() + deflate.decompress_stream(BytesIO(deflate_stream.getvalue()), deflate_plain, 2)? + assert deflate_plain.getvalue() == data + + zstd_stream = BytesIO() + zstd.compress_stream(BytesIO(data), zstd_stream, None, 3)? + zstd_plain = BytesIO() + zstd.decompress_stream(BytesIO(zstd_stream.getvalue()), zstd_plain, 2)? + assert zstd_plain.getvalue() == data + + bz2_stream = BytesIO() + bz2.compress_stream(BytesIO(data), bz2_stream, None, 3)? + bz2_plain = BytesIO() + bz2.decompress_stream(BytesIO(bz2_stream.getvalue()), bz2_plain, 2)? + assert bz2_plain.getvalue() == data + + lzma_stream = BytesIO() + lzma.compress_stream(BytesIO(data), lzma_stream, None, 3)? + lzma_plain = BytesIO() + lzma.decompress_stream(BytesIO(lzma_stream.getvalue()), lzma_plain, 2)? + assert lzma_plain.getvalue() == data + + snappy_stream = BytesIO() + snappy.compress_stream(BytesIO(data), snappy_stream, None, 3)? + snappy_plain = BytesIO() + snappy.decompress_stream(BytesIO(snappy_stream.getvalue()), snappy_plain, 2)? + assert snappy_plain.getvalue() == data + println("stream round trips ok") + match exercise_file_stream(data): + Ok(_) => pass + Err(_) => assert false + + match gzip.compress_stream(BytesIO(data), BytesIO(), None, 0): + Ok(_) => assert false + Err(err) => assert err.kind == "invalid_chunk_size" + match gzip.decompress_stream(BytesIO(gzip_data), BytesIO(), 0): + Ok(_) => assert false + Err(err) => assert err.kind == "invalid_chunk_size" + match decompress_auto_stream(BytesIO(gzip_data), BytesIO(), [Codec.Gzip], 0): + Ok(_) => assert false + Err(err) => assert err.kind == "invalid_chunk_size" + match snappy.compress_stream(BytesIO(data), BytesIO(), Some(1), 3): + Ok(_) => assert false + Err(err) => assert err.kind == "unsupported_option" + match zstd.compress(data, Some(99)): + Ok(_) => assert false + Err(err) => assert err.kind == "invalid_level" + + allowed: list[Codec] = [] + match decompress_auto(gzip_data, allowed): + Ok(_) => assert false + Err(err) => assert err.kind == "unsupported_codec" + match decompress_auto(gzip_data, [Codec.Zstd]): + Ok(_) => assert false + Err(err) => assert err.kind == "unsupported_codec" + match decompress_auto_stream(BytesIO(gzip_data), BytesIO(), [Codec.Zstd], 2): + Ok(_) => assert false + Err(err) => assert err.kind == "unsupported_codec" + println("option and chunk errors ok") + return Ok(None) diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index bcabd4c81..6018be5fb 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -6677,6 +6677,10 @@ def main() -> None: "lzma round trip ok", "snappy round trip ok", "snappy.raw round trip ok", + "autodetection ok", + "stream round trips ok", + "file stream round trip ok", + "option and chunk errors ok", ], "unexpected std.compression output: {stdout}" ); diff --git a/workspaces/docs-site/docs/RFCs/061_std_compression.md b/workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md similarity index 93% rename from workspaces/docs-site/docs/RFCs/061_std_compression.md rename to workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md index 037dc6746..740928e8a 100644 --- a/workspaces/docs-site/docs/RFCs/061_std_compression.md +++ b/workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md @@ -1,6 +1,6 @@ # RFC 061: `std.compression` — codec-based compression and decompression -- **Status:** In Progress +- **Status:** Implemented - **Created:** 2026-04-14 - **Author(s):** Danny Meijer (@dannymeijer) - **Related:** @@ -11,7 +11,7 @@ - **Issue:** https://github.com/dannys-code-corner/incan/issues/339 - **RFC PR:** — - **Written against:** v0.2 -- **Shipped in:** — +- **Shipped in:** v0.3.0-dev.47 ## Summary @@ -323,13 +323,11 @@ This feature is additive. Existing Rust interop or third-party compression code ## Progress Checklist -Implementation note: the dogfooded `.incn` pass avoids new `@rust.extern` surfaces and builds as a generated Rust -project for one-shot compression, decompression, and explicit byte autodetection. Issue -[#548](https://github.com/dannys-code-corner/incan/issues/548) was resolved in this implementation loop by preserving -public stdlib dependency APIs in generated projects and fixing the Rust boundary cases surfaced by the RFC 061 fixture. -Streaming APIs are intentionally not exposed until they can satisfy the `std.fs.File | std.io.BytesIO` incremental -contract instead of pretending that whole-buffer wrappers are stream support. Authored docs, release notes, versioning, -and broader integration coverage remain open. +Implementation note: the dogfooded `.incn` implementation avoids new `@rust.extern` surfaces and builds as a generated +Rust project for one-shot compression, stream compression/decompression, explicit byte autodetection, and explicit +stream autodetection. Issue [#548](https://github.com/dannys-code-corner/incan/issues/548) was resolved in this +implementation loop by preserving public stdlib dependency APIs in generated projects and fixing the Rust boundary cases +surfaced by the RFC 061 fixture. ### Spec / lifecycle @@ -357,15 +355,15 @@ and broader integration coverage remain open. - [x] Implement one-shot `compress` and `decompress` for every required codec at the source/typecheck layer. - [x] Implement one-shot `compress` and `decompress` for every required codec in a Rust-buildable generated project. -- [ ] Implement stream `compress_stream` and `decompress_stream` for every required codec. -- [ ] Reject non-positive `chunk_size` values at the source/typecheck layer. +- [x] Implement stream `compress_stream` and `decompress_stream` for every required codec. +- [x] Reject non-positive `chunk_size` values at the source/typecheck layer. - [x] Reject unsupported compression levels through stable error categories at the source/typecheck layer. - [x] Normalize invalid data, truncated input, I/O, and backend failures into `CompressionError` at the source/typecheck layer. ### Autodetection - [x] Implement `decompress_auto` at the source/typecheck layer. -- [ ] Implement `decompress_auto_stream` at the source/typecheck layer. +- [x] Implement `decompress_auto_stream` at the source/typecheck layer. - [x] Enforce the `allowed` codec filter exactly at the source/typecheck layer. - [x] Reject empty `allowed` lists at the source/typecheck layer. - [x] Exclude raw Snappy from autodetection. @@ -376,18 +374,18 @@ and broader integration coverage remain open. - [x] Add typechecker tests for imports and public symbols. - [x] Add codegen snapshot coverage for root and codec stdlib modules. - [x] Add integration tests for bytes round trips. -- [ ] Add integration tests for stream round trips. -- [ ] Add integration tests for option/chunk-size errors. -- [ ] Add integration tests for autodetection. +- [x] Add integration tests for stream round trips. +- [x] Add integration tests for option/chunk-size errors. +- [x] Add integration tests for autodetection. ### Docs / release / version -- [ ] Add authored stdlib reference docs for `std.compression`. -- [ ] Add release notes entry for RFC 061 / issue #339. -- [ ] Bump the workspace dev version when the implementation lands. -- [ ] Regenerate RFC snippets/index if lifecycle metadata changes require it. -- [ ] Run docs build. -- [ ] Run repository verification gate. +- [x] Add authored stdlib reference docs for `std.compression`. +- [x] Add release notes entry for RFC 061 / issue #339. +- [x] Bump the workspace dev version when the implementation lands. +- [x] Regenerate RFC snippets/index if lifecycle metadata changes require it. +- [x] Run docs build. +- [x] Run repository verification gate. ## Design Decisions diff --git a/workspaces/docs-site/docs/language/reference/stdlib/compression.md b/workspaces/docs-site/docs/language/reference/stdlib/compression.md new file mode 100644 index 000000000..496215d4a --- /dev/null +++ b/workspaces/docs-site/docs/language/reference/stdlib/compression.md @@ -0,0 +1,109 @@ +# std.compression reference + +`std.compression` provides codec-based compression and decompression for byte payloads, `BytesIO` streams, and +`std.fs.File` handles. + +```incan +from std.compression import gzip, decompress_auto, Codec +from std.io import BytesIO +``` + +Use explicit codec modules when the data format is known. Use autodetection only when the input may be one of several +framed formats and the caller is willing to handle a no-match error. + +## Codec Modules + +`std.compression` exposes these codec namespaces: + +| Namespace | Format | +| --------- | ------ | +| `gzip` | gzip-wrapped deflate | +| `zlib` | zlib-wrapped deflate | +| `deflate` | raw deflate | +| `zstd` | zstd frames | +| `bz2` | bzip2 streams | +| `lzma` | XZ/LZMA-family streams | +| `snappy` | framed Snappy | +| `snappy.raw` | raw Snappy blocks for advanced interop | + +Each top-level codec namespace exposes one-shot helpers: + +```incan +compressed = gzip.compress(payload, level=None)? +plain = gzip.decompress(compressed)? +``` + +The default `snappy` namespace uses framed Snappy. Raw Snappy is available under `std.compression.snappy.raw`, but it is +not part of autodetection because raw blocks do not carry a reliable frame signature. + +## Streaming + +Every required codec module exposes stream helpers over `std.io.BytesIO` and `std.fs.File`: + +```incan +from std.compression import zstd +from std.fs import Path + +source = Path("events.jsonl.zst").open("rb")? +target = Path("events.jsonl").open("wb")? +zstd.decompress_stream(source, target)? +target.flush()? +``` + +`compress_stream(source, target, level=None, chunk_size=65536)` reads plain bytes from `source` and writes compressed +bytes to `target`. `decompress_stream(source, target, chunk_size=65536)` reads compressed bytes and writes plain bytes. + +`chunk_size` must be positive. A non-positive value returns a `CompressionError` with `kind == "invalid_chunk_size"`. + +## Levels + +`level=None` selects the codec default. + +| Codec | Supported levels | +| ----- | ---------------- | +| `gzip`, `zlib`, `deflate`, `bz2`, `lzma` | `0` through `9` | +| `zstd` | `-7` through `22` | +| `snappy`, `snappy.raw` | no configurable level | + +Codecs with numeric levels return `CompressionError(kind="invalid_level", ...)` for out-of-range values. Snappy returns +`CompressionError(kind="unsupported_option", ...)` when a level is supplied. + +## Autodetection + +Autodetection is explicit and decompression-only: + +```incan +codec, plain = decompress_auto(blob, allowed=[Codec.Gzip, Codec.Zstd])? +``` + +`decompress_auto(data, allowed=Codec.all())` returns `(Codec, bytes)`. `decompress_auto_stream(source, target, +allowed=Codec.all(), chunk_size=65536)` writes the decoded stream to `target` and returns the detected `Codec`. + +The `allowed` list is binding. An empty list or a payload whose signature does not match any allowed codec returns +`CompressionError(kind="unsupported_codec", ...)`. + +Autodetection uses signatures and framing bytes only. It does not inspect file extensions, paths, or MIME types. Raw +deflate and raw Snappy are not guessed because they do not have reliable framing signatures. + +## Errors + +Fallible helpers return `Result[..., CompressionError]`. + +| Field | Meaning | +| ----- | ------- | +| `kind` | Stable category such as `invalid_data`, `truncated_input`, `unsupported_codec`, `unsupported_option`, `invalid_level`, `invalid_chunk_size`, `io`, or `backend`. | +| `codec` | The codec involved in the failure, when known. | +| `operation` | The operation that failed, such as `compress`, `decompress_stream`, or `decompress_auto_stream`. | +| `detail` | Backend or stdlib validation detail for diagnostics. | + +```incan +match gzip.decompress(blob): + Ok(plain) => println(len(plain)) + Err(err) => println(err.kind) +``` + +## Boundaries + +`std.compression` does not provide archive containers such as ZIP or TAR, dictionary training APIs, authenticated +encryption, checksums, or password hashing. Those require separate APIs because their compatibility and security +contracts are different from byte compression. diff --git a/workspaces/docs-site/docs/language/reference/stdlib/index.md b/workspaces/docs-site/docs/language/reference/stdlib/index.md index b58c1da76..41a6c406f 100644 --- a/workspaces/docs-site/docs/language/reference/stdlib/index.md +++ b/workspaces/docs-site/docs/language/reference/stdlib/index.md @@ -8,6 +8,7 @@ Pages in this section are curated and checked into the repository. - [`std.async`](async.md) (curated) - [`std.collections`](collections.md) (curated) +- [`std.compression`](compression.md) (curated) - [`std.derives.*`](derives.md) (curated) - [`std.datetime`](datetime.md) (curated; see also [tutorial](../../tutorials/dates_and_times.md), [how-to](../../how-to/dates_and_times.md), and [model](../../explanation/datetime_model.md)) - [`std.encoding`](encoding.md) (curated) diff --git a/workspaces/docs-site/docs/release_notes/0_3.md b/workspaces/docs-site/docs/release_notes/0_3.md index 4446b1473..f99174551 100644 --- a/workspaces/docs-site/docs/release_notes/0_3.md +++ b/workspaces/docs-site/docs/release_notes/0_3.md @@ -2,7 +2,7 @@ Incan 0.3 is the current development release. It picks up after the `0.2` line, which made the language surface more explicit around stdlib imports, Rust interop, library manifests, module state, and call-site generics. -`0.3` now includes a larger numeric surface, a new control-flow surface, richer enum behavior, Rust trait adoption from Incan-owned wrappers, graph, collections, datetime, logging, encoding, and hashing stdlib surfaces, iterator adapter chains, Result combinators, and tighter tooling contracts. RFC 009 makes numeric annotations precise enough for Rust interop, wire formats, data schemas, and fixed-scale decimal values; RFC 016 adds `loop:` and `break ` so loops can produce values directly; RFC 030 adds `std.collections` for specialized collection semantics; RFC 047 adds `std.graph` for explicit in-memory dependency and plan graphs; RFC 064 adds `std.encoding` for strict-by-default binary-text transforms; RFC 065 adds `std.hash` for stable byte, file, reader, cryptographic, compatibility, and non-cryptographic hashing workflows; RFC 050 lets enums declare methods and adopt traits; RFC 043 starts Rust trait implementation authoring from Incan source with `with Trait`, method-level `for Trait`, and associated type declarations on newtypes and rusttypes; RFC 088 standardizes lazy iterator adapters and terminal consumers; RFC 070 adds Rust-shaped `Result[T, E]` composition with `map`, `map_err`, `and_then`, `or_else`, `inspect`, and `inspect_err`; RFC 053 tightens the formatter contract so output is less dependent on local heuristics and more predictable across CLI, editor, and library entry points; RFC 058 adds Rust-backed runtime timing plus source-defined civil temporal values, fixed UTC offsets, Python-shaped parsing/formatting, and interval arithmetic; and RFC 072 adds source-defined structured logging. +`0.3` now includes a larger numeric surface, a new control-flow surface, richer enum behavior, Rust trait adoption from Incan-owned wrappers, graph, collections, datetime, logging, encoding, hashing, and compression stdlib surfaces, iterator adapter chains, Result combinators, and tighter tooling contracts. RFC 009 makes numeric annotations precise enough for Rust interop, wire formats, data schemas, and fixed-scale decimal values; RFC 016 adds `loop:` and `break ` so loops can produce values directly; RFC 030 adds `std.collections` for specialized collection semantics; RFC 047 adds `std.graph` for explicit in-memory dependency and plan graphs; RFC 064 adds `std.encoding` for strict-by-default binary-text transforms; RFC 065 adds `std.hash` for stable byte, file, reader, cryptographic, compatibility, and non-cryptographic hashing workflows; RFC 061 adds `std.compression` for byte, stream, and explicit autodetected compression workflows; RFC 050 lets enums declare methods and adopt traits; RFC 043 starts Rust trait implementation authoring from Incan source with `with Trait`, method-level `for Trait`, and associated type declarations on newtypes and rusttypes; RFC 088 standardizes lazy iterator adapters and terminal consumers; RFC 070 adds Rust-shaped `Result[T, E]` composition with `map`, `map_err`, `and_then`, `or_else`, `inspect`, and `inspect_err`; RFC 053 tightens the formatter contract so output is less dependent on local heuristics and more predictable across CLI, editor, and library entry points; RFC 058 adds Rust-backed runtime timing plus source-defined civil temporal values, fixed UTC offsets, Python-shaped parsing/formatting, and interval arithmetic; and RFC 072 adds source-defined structured logging. This page will grow as more `0.3` work lands. If you are looking for the shipped `0.2` story, start with [Release 0.2](0_2.md). @@ -27,6 +27,7 @@ The release is still early, but the initial direction is already visible: - binary-text encoding should have explicit stdlib modules for strict and lenient value plus finite source/sink transforms, with variant choices visible in API names - specialized collection semantics should have explicit stdlib types instead of forcing every queue, multiset, ordered map, sorted set, layered map, or priority queue through bare builtin containers - byte, file, and reader hashing should have explicit algorithm namespaces, with cryptographic and compatibility digests separated from fast non-cryptographic integer helpers +- compression should be codec-explicit by default, with stream helpers and explicit autodetection rather than hidden format guessing - user-facing tooling behavior should match the docs closely enough that CI and editor integrations can rely on it - testing should feel like a first-class workflow, with inline unit tests, fixtures, parametrization, selection, scheduling, and machine-readable reports owned by Incan rather than delegated to ad hoc scripts - iterator pipelines should be lazy by default, with terminal consumers such as `.collect()`, `.count()`, `.any()`, `.all()`, `.find()`, and `.fold()` making realization or summarization explicit @@ -343,6 +344,30 @@ These are ordinary Incan stdlib APIs. The public surface is source-owned under ` See also: [std.encoding reference](../language/reference/stdlib/encoding.md), [RFC 064]. +### RFC 061 `std.compression` + +`std.compression` now provides codec-based compression and decompression for `gzip`, `zlib`, raw `deflate`, `zstd`, +`bz2`, XZ/LZMA-family streams, framed `snappy`, and advanced raw Snappy interop. + +```incan +from std.compression import gzip, decompress_auto, Codec + +compressed = gzip.compress(payload)? +codec, plain = decompress_auto(compressed, [Codec.Gzip])? +``` + +Every required codec exposes one-shot byte helpers and stream helpers over `std.io.BytesIO` and `std.fs.File`. +Autodetection is decompression-only and opt-in through `decompress_auto(...)` or `decompress_auto_stream(...)`; it uses +framing signatures, respects the caller's `allowed` filter exactly, and never guesses from file extensions or MIME +types. The public error boundary is `CompressionError`, with stable categories for invalid data, truncated input, +unsupported codecs/options, invalid levels, invalid chunk sizes, I/O failures, and backend failures. + +The implementation is dogfooded in Incan stdlib source using ordinary Rust crate imports for the codec boundary rather +than new `@rust.extern` function or type implementation surfaces. The generated-project regression fixture covers +one-shot, BytesIO stream, file stream, autodetection, option error, and chunk-size error behavior. + +See also: [std.compression reference](../language/reference/stdlib/compression.md), [RFC 061]. + ## Detailed inventory The sections above are the release story. The list below is the detailed inventory of language, compiler, runtime, tooling, and docs changes that have landed for `0.3`, grouped roughly by theme rather than by the order work happened to land. @@ -423,6 +448,7 @@ The sections above are the release story. The list below is the detailed invento - **Language/Stdlib**: `std.datetime` adds Rust `std::time`-backed `Duration`, `Instant`, and `SystemTime`, plus source-defined Incan civil values for dates, times, naive datetimes, fixed UTC offsets, fixed-offset datetimes, UTC civil clock factories, day/time intervals, year/month intervals, compound datetime intervals, ISO-style parsing/formatting, Python-shaped `strftime` / `strptime` with nanosecond `%f`, deterministic calendar arithmetic, and interval normalization (#292, RFC 058). - **Language/Stdlib**: `std.collections` adds explicit specialized collection types for double-ended queues, multisets, default-valued maps, ordered maps and sets, sorted maps and sets, layered maps, and priority queues. The namespace is registered as an ordinary source stdlib module with no feature gate, no extra Cargo dependencies, and no Rust-backed stdlib dispatch (#164, RFC 030). - **Language/Stdlib**: `std.encoding` adds strict-by-default binary-text transform modules for hex, base32, base64, base85, base58, and Bech32/Bech32m, with explicit variant function names, separately named lenient decoders, and source/sink helpers that compose with `std.fs.Path` and `std.io.BytesIO` (#342, RFC 064). +- **Language/Stdlib**: `std.compression` adds codec namespaces for gzip, zlib, raw deflate, zstd, bzip2, XZ/LZMA, framed Snappy, and raw Snappy interop, with source-defined one-shot helpers, stream helpers over `std.fs.File` and `std.io.BytesIO`, explicit decompression autodetection, stable `Codec` and `CompressionError` vocabulary, stdlib-managed generated-project dependencies, and generated-project regression coverage for issue #548 (#339, #548, RFC 061). - **Language/Compiler**: RFC 029 adds anonymous closed union annotations with canonical `Union[A, B, ...]` and `A | B` syntax. The compiler normalizes duplicates, nested unions, ordering, and `None`-containing unions, accepts member-to-union and union-to-union assignability, lowers ordinary unions to generated closed Rust enums, preserves `None` unions on the existing `Option[...]` path, and supports `isinstance` narrowing for true branches, else branches, wider unions, chained `elif` branches, and `Option[Union[...]]`, plus `is None` / `is not None` narrowing and exhaustive `match` type patterns (#163, RFC 029). - **Language/Stdlib**: RFC 028 expands `std.traits.ops` into the nominal operator capability vocabulary for custom types, including `FloorDiv`, `Pow`, shifts, bitwise operators, pipe operators, `MatMul`, unary `Not`, `GetItem` / `SetItem`, and explicit in-place compound-assignment traits for `+=`, `-=`, `*=`, `/=`, `//=`, `%=`, `@=`, `&=`, `|=`, `^=`, `<<=`, and `>>=` (#162, RFC 028). - **Language/Stdlib**: RFC 055 introduces `std.fs` as the path-centric filesystem module: `Path`, `File`, `OpenOptions`, directory entries, metadata, disk usage, structured `IoError`, whole-file byte/text helpers, chunked file handles, traversal, globbing, copy/move, recursive deletion, links, permissions, and explicit durability syncs (#286, RFC 055). @@ -476,6 +502,7 @@ The sections above are the release story. The list below is the detailed invento - **Compiler internals**: Typechecker lowering metadata is grouped into explicit semantic artifact families instead of one flat `TypeCheckInfo` payload, narrowing the backend-facing contract without changing language behavior (#283). - **Project metadata**: Workspace package metadata and Cargo lock entries now identify the development line as `0.3.0-dev.44`. - **Project metadata**: Workspace package metadata and Cargo lock entries now identify the development line as `0.3.0-dev.45`. +- **Project metadata**: Workspace package metadata and Cargo lock entries now identify the development line as `0.3.0-dev.47`. ## Known limitations (0.3) @@ -509,6 +536,7 @@ The sections above are the release story. The list below is the detailed invento - Path-centric filesystem APIs: [RFC 055] - `std.datetime` temporal values and intervals: [RFC 058] - UUID parsing, formatting, inspection, and generation: [RFC 060](../RFCs/closed/implemented/060_std_uuid.md) +- Codec-based compression and decompression: [RFC 061](../RFCs/closed/implemented/061_std_compression.md) - Binary-text encoding and decoding utilities: [RFC 064] - Byte, file, reader, cryptographic, compatibility, and non-cryptographic hashing: [RFC 065] - Targeted generated-Rust lint suppression: [RFC 057] diff --git a/workspaces/docs-site/mkdocs.yml b/workspaces/docs-site/mkdocs.yml index d60314b07..f8bfd1282 100644 --- a/workspaces/docs-site/mkdocs.yml +++ b/workspaces/docs-site/mkdocs.yml @@ -163,6 +163,7 @@ nav: - Overview: language/reference/stdlib/index.md - std.async: language/reference/stdlib/async.md - std.collections: language/reference/stdlib/collections.md + - std.compression: language/reference/stdlib/compression.md - std.derives.*: language/reference/stdlib/derives.md - std.datetime: language/reference/stdlib/datetime.md - std.encoding: language/reference/stdlib/encoding.md From 85e2692cb53de45e7ae530838338b6887baa9510 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 10:46:38 +0200 Subject: [PATCH 3/8] docs - mark RFC 061 shipped in 0.3.0 (#339) --- .../docs/RFCs/closed/implemented/061_std_compression.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md b/workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md index 740928e8a..8bc865ca3 100644 --- a/workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md +++ b/workspaces/docs-site/docs/RFCs/closed/implemented/061_std_compression.md @@ -11,7 +11,7 @@ - **Issue:** https://github.com/dannys-code-corner/incan/issues/339 - **RFC PR:** — - **Written against:** v0.2 -- **Shipped in:** v0.3.0-dev.47 +- **Shipped in:** 0.3.0 ## Summary From 68fab2a1a774614040d8a62b1d55f01980c7a69e Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 10:50:58 +0200 Subject: [PATCH 4/8] docs - add std.compression how-to (#339) --- .../docs/language/how-to/compression.md | 168 ++++++++++++++++++ .../language/reference/stdlib/compression.md | 7 + .../docs-site/docs/release_notes/0_3.md | 3 +- workspaces/docs-site/mkdocs.yml | 1 + 4 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 workspaces/docs-site/docs/language/how-to/compression.md diff --git a/workspaces/docs-site/docs/language/how-to/compression.md b/workspaces/docs-site/docs/language/how-to/compression.md new file mode 100644 index 000000000..16d9b837c --- /dev/null +++ b/workspaces/docs-site/docs/language/how-to/compression.md @@ -0,0 +1,168 @@ +# Compress and Decompress Data + +Use `std.compression` when a program needs to read or write compressed byte payloads, compressed files, or codec-framed +data from another system. + +```incan +from std.compression import Codec, CompressionError, decompress_auto, gzip, zstd +from std.io import BytesIO +``` + +Compression is not archive handling. Use `std.compression` for byte streams such as gzip, zstd, bzip2, XZ/LZMA, or +Snappy. Archive containers such as ZIP and TAR have entry names, permissions, directory traversal risks, and extraction +rules that belong in archive-specific APIs. + +## Choose the Codec at the Boundary + +Prefer an explicit codec whenever the format is known from a protocol, file extension, header, configuration value, or +caller contract. + +| Situation | Prefer | +| --- | --- | +| HTTP-style payloads or `.gz` files | `gzip` | +| zlib-wrapped deflate from older protocols | `zlib` | +| raw deflate blocks from a protocol that says "deflate" explicitly | `deflate` | +| data pipelines and log files that want high compression and fast decode | `zstd` | +| existing `.bz2` files | `bz2` | +| existing `.xz` / LZMA-family files | `lzma` | +| framed Snappy streams | `snappy` | +| raw Snappy blocks required by a storage format | `snappy.raw` | + +Do not silently try codecs in a loop. If the format is ambiguous, use the explicit autodetection helpers so the policy is +visible at the call site. + +## Compress Bytes Already in Memory + +Use one-shot helpers when the payload is already in memory and small enough to keep there. + +```incan +from std.compression import CompressionError, gzip + +def encode_payload(payload: bytes) -> Result[bytes, CompressionError]: + return gzip.compress(payload, level=None) + +def decode_payload(payload: bytes) -> Result[bytes, CompressionError]: + return gzip.decompress(payload) +``` + +`level=None` uses the codec default. Pass a level only when the caller has a reason to trade compression speed for output +size. + +```incan +from std.compression import CompressionError, zstd + +def archive_payload(payload: bytes) -> Result[bytes, CompressionError]: + return zstd.compress(payload, level=Some(10)) +``` + +Keep the compressed value typed as `bytes`. If it needs to cross a text-only boundary, encode it afterwards with +`std.encoding`. + +## Stream Files Instead of Loading Them + +Use stream helpers for files and pipeline stages. They move bytes between `std.fs.File` and `std.io.BytesIO` without +requiring the complete input to be materialized first. + +```incan +from std.compression import zstd +from std.fs import Path + +source = Path("events.jsonl").open("rb")? +target = Path("events.jsonl.zst").open("wb")? +zstd.compress_stream(source, target, level=Some(3), chunk_size=65536)? +target.flush()? +``` + +Decompression is the same shape: + +```incan +from std.compression import zstd +from std.fs import Path + +source = Path("events.jsonl.zst").open("rb")? +target = Path("events.jsonl").open("wb")? +zstd.decompress_stream(source, target, chunk_size=65536)? +target.flush()? +``` + +Choose a positive `chunk_size`. The default works for normal file workflows. Smaller chunks are useful in tests and +latency-sensitive pipelines; larger chunks may reduce overhead for large local files. + +## Use BytesIO for In-Memory Pipelines + +`BytesIO` is useful when a pipeline step expects a stream but the caller starts with bytes. + +```incan +from std.compression import CompressionError, gzip +from std.io import BytesIO + +def stream_compress_for_response(payload: bytes) -> Result[bytes, CompressionError]: + target = BytesIO() + gzip.compress_stream(BytesIO(payload), target, level=None, chunk_size=8192)? + return Ok(target.getvalue()) +``` + +This keeps code shaped like the file-streaming path while still returning a byte payload to the caller. + +## Autodetect Only When the Input Is Genuinely Mixed + +Use `decompress_auto` when a boundary may receive several framed compression formats and the caller cannot know which one +ahead of time. + +```incan +from std.compression import Codec, CompressionError, decompress_auto + +def decode_upload(payload: bytes) -> Result[bytes, CompressionError]: + codec, plain = decompress_auto(payload, [Codec.Gzip, Codec.Zstd, Codec.Bz2])? + return Ok(plain) +``` + +Keep the `allowed` list narrow. It documents the formats the boundary accepts and prevents unexpected codec behavior. + +For streamed input, use `decompress_auto_stream`: + +```incan +from std.compression import Codec, decompress_auto_stream +from std.fs import Path + +source = Path("payload.bin").open("rb")? +target = Path("payload.out").open("wb")? +codec = decompress_auto_stream(source, target, [Codec.Gzip, Codec.Zstd], chunk_size=65536)? +target.flush()? +println(codec) +``` + +Autodetection uses signatures and framing bytes. It does not inspect file extensions, paths, or MIME types. Raw deflate +and raw Snappy are not guessed because they do not have reliable frame signatures. + +## Handle Compression Errors at the Same Boundary + +Compression helpers return `Result[..., CompressionError]`. Match the error when the caller can recover or report a +specific category. + +```incan +from std.compression import gzip + +match gzip.decompress(payload): + Ok(plain) => println(len(plain)) + Err(err) => println(err.kind) +``` + +Common categories include `invalid_data`, `truncated_input`, `unsupported_codec`, `unsupported_option`, `invalid_level`, +`invalid_chunk_size`, `io`, and `backend`. + +## Keep Compression Separate from Related Work + +- Use `std.encoding` after compression when bytes need a text-safe representation. +- Use hashing after compression only when the digest must cover the compressed bytes. Hash before compression when the + digest must cover the original payload. +- Keep password hashing and encryption separate from compression. They have different security contracts. +- Do not use raw Snappy unless another format specifically requires block-level Snappy behavior. + +## See Also + +- [`std.compression` reference](../reference/stdlib/compression.md) +- [Binary-text encoding](binary_text_encoding.md) +- [File I/O](file_io.md) +- [`std.io` reference](../reference/stdlib/io.md) +- [`std.fs` reference](../reference/stdlib/fs.md) diff --git a/workspaces/docs-site/docs/language/reference/stdlib/compression.md b/workspaces/docs-site/docs/language/reference/stdlib/compression.md index 496215d4a..6353b7694 100644 --- a/workspaces/docs-site/docs/language/reference/stdlib/compression.md +++ b/workspaces/docs-site/docs/language/reference/stdlib/compression.md @@ -107,3 +107,10 @@ match gzip.decompress(blob): `std.compression` does not provide archive containers such as ZIP or TAR, dictionary training APIs, authenticated encryption, checksums, or password hashing. Those require separate APIs because their compatibility and security contracts are different from byte compression. + +## See Also + +- [Compress and decompress data](../../how-to/compression.md) +- [`std.io` reference](io.md) +- [`std.fs` reference](fs.md) +- [`std.encoding` reference](encoding.md) diff --git a/workspaces/docs-site/docs/release_notes/0_3.md b/workspaces/docs-site/docs/release_notes/0_3.md index f99174551..e12cf3a50 100644 --- a/workspaces/docs-site/docs/release_notes/0_3.md +++ b/workspaces/docs-site/docs/release_notes/0_3.md @@ -366,7 +366,8 @@ The implementation is dogfooded in Incan stdlib source using ordinary Rust crate than new `@rust.extern` function or type implementation surfaces. The generated-project regression fixture covers one-shot, BytesIO stream, file stream, autodetection, option error, and chunk-size error behavior. -See also: [std.compression reference](../language/reference/stdlib/compression.md), [RFC 061]. +See also: [compression how-to](../language/how-to/compression.md), +[std.compression reference](../language/reference/stdlib/compression.md), [RFC 061]. ## Detailed inventory diff --git a/workspaces/docs-site/mkdocs.yml b/workspaces/docs-site/mkdocs.yml index f8bfd1282..6656a77c9 100644 --- a/workspaces/docs-site/mkdocs.yml +++ b/workspaces/docs-site/mkdocs.yml @@ -129,6 +129,7 @@ nav: - Error messages: language/how-to/error_messages.md - File I/O: language/how-to/file_io.md - Binary-text encoding: language/how-to/binary_text_encoding.md + - Compression: language/how-to/compression.md - Generators: language/how-to/generators.md - Imports and modules: language/how-to/imports_and_modules.md - Module state: language/how-to/module_state.md From b9761a964912d73c367ddb93442f14143cefc8cc Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 10:59:37 +0200 Subject: [PATCH 5/8] docs - regenerate language reference for std.compression (#339) --- workspaces/docs-site/docs/language/reference/language.md | 1 + 1 file changed, 1 insertion(+) diff --git a/workspaces/docs-site/docs/language/reference/language.md b/workspaces/docs-site/docs/language/reference/language.md index d5e0a2d9c..2b96e4fc7 100644 --- a/workspaces/docs-site/docs/language/reference/language.md +++ b/workspaces/docs-site/docs/language/reference/language.md @@ -117,6 +117,7 @@ Soft keywords are only reserved when their activating `std.*` namespace is impor | `std.io` | - | - | - | | `std.encoding` | - | `std.encoding._shared`, `std.encoding.prelude`, `std.encoding.hex`, `std.encoding.base32`, `std.encoding.base64`, `std.encoding.base85`, `std.encoding.base58`, `std.encoding.bech32` | - | | `std.hash` | - | `std.hash._core`, `std.hash._streaming`, `std.hash.prelude` | - | +| `std.compression` | - | `std.compression._core`, `std.compression._auto`, `std.compression.gzip`, `std.compression.zlib`, `std.compression.deflate`, `std.compression.zstd`, `std.compression.bz2`, `std.compression.lzma`, `std.compression.snappy`, `std.compression.snappy.raw` | - | | `std.tempfile` | - | - | - | | `std.rust` | - | - | - | | `std.builtins` | - | - | - | From efe9993edb54230753ab5005aab774a746e9e303 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 11:18:21 +0200 Subject: [PATCH 6/8] chore - simplify std.compression autodetection (#339) --- .../stdlib/compression/_auto.incn | 132 +++++++++++------- .../stdlib/compression/_core.incn | 26 ---- 2 files changed, 82 insertions(+), 76 deletions(-) diff --git a/crates/incan_stdlib/stdlib/compression/_auto.incn b/crates/incan_stdlib/stdlib/compression/_auto.incn index 5daf57c92..44886f1b9 100644 --- a/crates/incan_stdlib/stdlib/compression/_auto.incn +++ b/crates/incan_stdlib/stdlib/compression/_auto.incn @@ -18,7 +18,6 @@ from rust::zstd::stream::read @ "0.13" import Decoder as ZstdReadDecoder from std.compression._core import ( Codec, CompressionError, - _allowed, _codec_error, _io_error, _looks_like_bz2, @@ -27,7 +26,6 @@ from std.compression._core import ( _looks_like_xz, _looks_like_zlib, _looks_like_zstd, - _validate_allowed, _validate_chunk_size, _write_sink_bytes, ) @@ -41,6 +39,56 @@ from std.fs import File from std.io import _BytesIO +def _detect_codec(data: bytes, allowed: list[Codec], operation: str) -> Result[Option[Codec], CompressionError]: + """ + Return the first allowed codec whose reliable framing signature matches. + """ + if len(allowed) == 0: + return Err( + CompressionError( + kind="unsupported_codec", + codec=None, + operation=operation, + detail="allowed codec list must not be empty", + ), + ) + + for candidate in allowed: + match candidate: + Codec.Gzip => + if _looks_like_gzip(data): + return Ok(Some(Codec.Gzip)) + Codec.Zlib => + if _looks_like_zlib(data): + return Ok(Some(Codec.Zlib)) + Codec.Deflate => pass + Codec.Zstd => + if _looks_like_zstd(data): + return Ok(Some(Codec.Zstd)) + Codec.Bz2 => + if _looks_like_bz2(data): + return Ok(Some(Codec.Bz2)) + Codec.Lzma => + if _looks_like_xz(data): + return Ok(Some(Codec.Lzma)) + Codec.Snappy => + if _looks_like_snappy_frame(data): + return Ok(Some(Codec.Snappy)) + return Ok(None) + + +def _unsupported_autodetect_error(operation: str) -> CompressionError: + """ + Build the stable no-signature-match autodetection error. + """ + return CompressionError( + kind="unsupported_codec", + codec=None, + operation=operation, + detail="no allowed compression codec matched the input signature", + ) + + pub def decompress_auto(data: bytes, allowed: list[Codec] = Codec.all()) -> Result[tuple[Codec, bytes], CompressionError]: """ Decompress bytes by explicit signature-based autodetection. @@ -53,27 +101,16 @@ pub def decompress_auto(data: bytes, allowed: list[Codec] = Codec.all()) -> Resu `Ok((codec, plain))` when an allowed codec signature matches, or `Err(CompressionError)` when the filter is empty, no signature matches, or the matched codec rejects the payload. """ - _validate_allowed(allowed)? - if _allowed(allowed.clone(), Codec.Gzip) and _looks_like_gzip(data): - return Ok((Codec.Gzip, _gzip_decompress(data)?)) - elif _allowed(allowed, Codec.Zstd) and _looks_like_zstd(data): - return Ok((Codec.Zstd, _zstd_decompress(data)?)) - elif _allowed(allowed, Codec.Bz2) and _looks_like_bz2(data): - return Ok((Codec.Bz2, _bz2_decompress(data)?)) - elif _allowed(allowed, Codec.Lzma) and _looks_like_xz(data): - return Ok((Codec.Lzma, _lzma_decompress(data)?)) - elif _allowed(allowed, Codec.Snappy) and _looks_like_snappy_frame(data): - return Ok((Codec.Snappy, _snappy_decompress(data)?)) - elif _allowed(allowed, Codec.Zlib) and _looks_like_zlib(data): - return Ok((Codec.Zlib, _zlib_decompress(data)?)) - return Err( - CompressionError( - kind="unsupported_codec", - codec=None, - operation="decompress_auto", - detail="no allowed compression codec matched the input signature", - ), - ) + match _detect_codec(data, allowed, "decompress_auto")?: + Some(Codec.Gzip) => return Ok((Codec.Gzip, _gzip_decompress(data)?)) + Some(Codec.Zlib) => return Ok((Codec.Zlib, _zlib_decompress(data)?)) + Some(Codec.Zstd) => return Ok((Codec.Zstd, _zstd_decompress(data)?)) + Some(Codec.Bz2) => return Ok((Codec.Bz2, _bz2_decompress(data)?)) + Some(Codec.Lzma) => return Ok((Codec.Lzma, _lzma_decompress(data)?)) + Some(Codec.Snappy) => return Ok((Codec.Snappy, _snappy_decompress(data)?)) + Some(Codec.Deflate) => pass + None => pass + return Err(_unsupported_autodetect_error("decompress_auto")) pub def decompress_auto_stream( @@ -98,34 +135,29 @@ pub def decompress_auto_stream( `Ok(Codec)` with the detected codec after writing to `target`, or `Err(CompressionError)`. """ _validate_chunk_size(None, "decompress_auto_stream", chunk_size)? - _validate_allowed(allowed, "decompress_auto_stream")? prefix = _read_prefix(source)? - if _allowed(allowed.clone(), Codec.Gzip) and _looks_like_gzip(prefix): - _decompress_gzip_stream(prefix, source, target, chunk_size)? - return Ok(Codec.Gzip) - elif _allowed(allowed.clone(), Codec.Zstd) and _looks_like_zstd(prefix): - _decompress_zstd_stream(prefix, source, target, chunk_size)? - return Ok(Codec.Zstd) - elif _allowed(allowed.clone(), Codec.Bz2) and _looks_like_bz2(prefix): - _decompress_bz2_stream(prefix, source, target, chunk_size)? - return Ok(Codec.Bz2) - elif _allowed(allowed.clone(), Codec.Lzma) and _looks_like_xz(prefix): - _decompress_lzma_stream(prefix, source, target, chunk_size)? - return Ok(Codec.Lzma) - elif _allowed(allowed.clone(), Codec.Snappy) and _looks_like_snappy_frame(prefix): - _decompress_snappy_stream(prefix, source, target, chunk_size)? - return Ok(Codec.Snappy) - elif _allowed(allowed, Codec.Zlib) and _looks_like_zlib(prefix): - _decompress_zlib_stream(prefix, source, target, chunk_size)? - return Ok(Codec.Zlib) - return Err( - CompressionError( - kind="unsupported_codec", - codec=None, - operation="decompress_auto_stream", - detail="no allowed compression codec matched the input signature", - ), - ) + match _detect_codec(prefix, allowed, "decompress_auto_stream")?: + Some(Codec.Gzip) => + _decompress_gzip_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Gzip) + Some(Codec.Zlib) => + _decompress_zlib_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Zlib) + Some(Codec.Zstd) => + _decompress_zstd_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Zstd) + Some(Codec.Bz2) => + _decompress_bz2_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Bz2) + Some(Codec.Lzma) => + _decompress_lzma_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Lzma) + Some(Codec.Snappy) => + _decompress_snappy_stream(prefix, source, target, chunk_size)? + return Ok(Codec.Snappy) + Some(Codec.Deflate) => pass + None => pass + return Err(_unsupported_autodetect_error("decompress_auto_stream")) def _read_prefix(source: Union[_BytesIO, File]) -> Result[bytes, CompressionError]: diff --git a/crates/incan_stdlib/stdlib/compression/_core.incn b/crates/incan_stdlib/stdlib/compression/_core.incn index b71d3e215..baf442817 100644 --- a/crates/incan_stdlib/stdlib/compression/_core.incn +++ b/crates/incan_stdlib/stdlib/compression/_core.incn @@ -158,32 +158,6 @@ pub def _write_sink_bytes( Err(err) => return Err(_io_error(codec, operation, err.message())) -pub def _validate_allowed(allowed: list[Codec], operation: str = "decompress_auto") -> Result[None, CompressionError]: - """ - Reject an empty autodetection candidate set. - """ - if len(allowed) == 0: - return Err( - CompressionError( - kind="unsupported_codec", - codec=None, - operation=operation, - detail="allowed codec list must not be empty", - ), - ) - return Ok(None) - - -pub def _allowed(allowed: list[Codec], codec: Codec) -> bool: - """ - Return whether `codec` is present in an explicit autodetection filter. - """ - for candidate in allowed: - if candidate == codec: - return true - return false - - pub def _byte_at(data: bytes, index: int) -> int: """ Read one byte as an integer for signature checks. From 167c652919f6d03386f3d175e91dd5a9dedd41fa Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 12:11:23 +0200 Subject: [PATCH 7/8] chore - clarify std.compression autodetect misses (#339) --- .../stdlib/compression/_auto.incn | 55 ++++++++++++------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/crates/incan_stdlib/stdlib/compression/_auto.incn b/crates/incan_stdlib/stdlib/compression/_auto.incn index 44886f1b9..5327b9c0f 100644 --- a/crates/incan_stdlib/stdlib/compression/_auto.incn +++ b/crates/incan_stdlib/stdlib/compression/_auto.incn @@ -39,6 +39,38 @@ from std.fs import File from std.io import _BytesIO +def _matching_codec(candidate: Codec, data: bytes) -> Option[Codec]: + """ + Return `candidate` only when its reliable framing signature matches `data`. + """ + match candidate: + Codec.Gzip => + if _looks_like_gzip(data): + return Some(Codec.Gzip) + return None + Codec.Zlib => + if _looks_like_zlib(data): + return Some(Codec.Zlib) + return None + Codec.Deflate => return None + Codec.Zstd => + if _looks_like_zstd(data): + return Some(Codec.Zstd) + return None + Codec.Bz2 => + if _looks_like_bz2(data): + return Some(Codec.Bz2) + return None + Codec.Lzma => + if _looks_like_xz(data): + return Some(Codec.Lzma) + return None + Codec.Snappy => + if _looks_like_snappy_frame(data): + return Some(Codec.Snappy) + return None + + def _detect_codec(data: bytes, allowed: list[Codec], operation: str) -> Result[Option[Codec], CompressionError]: """ Return the first allowed codec whose reliable framing signature matches. @@ -54,26 +86,9 @@ def _detect_codec(data: bytes, allowed: list[Codec], operation: str) -> Result[O ) for candidate in allowed: - match candidate: - Codec.Gzip => - if _looks_like_gzip(data): - return Ok(Some(Codec.Gzip)) - Codec.Zlib => - if _looks_like_zlib(data): - return Ok(Some(Codec.Zlib)) - Codec.Deflate => pass - Codec.Zstd => - if _looks_like_zstd(data): - return Ok(Some(Codec.Zstd)) - Codec.Bz2 => - if _looks_like_bz2(data): - return Ok(Some(Codec.Bz2)) - Codec.Lzma => - if _looks_like_xz(data): - return Ok(Some(Codec.Lzma)) - Codec.Snappy => - if _looks_like_snappy_frame(data): - return Ok(Some(Codec.Snappy)) + match _matching_codec(candidate, data): + Some(codec) => return Ok(Some(codec)) + None => pass return Ok(None) From dca69cd8a8f59333c1d4bda07fba62675b242f97 Mon Sep 17 00:00:00 2001 From: Danny Meijer Date: Mon, 18 May 2026 12:38:31 +0200 Subject: [PATCH 8/8] chore - warm std.compression offline test deps (#339) --- Cargo.lock | 109 +++++++++++++++++++++++++++++++++++++ Cargo.toml | 6 ++ tests/integration_tests.rs | 26 +++++++++ 3 files changed, 141 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index b3d6af21c..126abb5b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -346,6 +346,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + [[package]] name = "camino" version = "1.2.2" @@ -464,6 +473,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -969,6 +980,16 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1462,7 +1483,9 @@ dependencies = [ "blake2", "blake3", "byteorder", + "bzip2", "clap", + "flate2", "hex", "incan_core", "incan_semantics_core", @@ -1484,6 +1507,7 @@ dependencies = [ "sha1", "sha2", "sha3", + "snap", "syn", "tempfile", "thiserror 2.0.18", @@ -1497,6 +1521,8 @@ dependencies = [ "wasmtime-wasi", "wat", "xxhash-rust", + "xz2", + "zstd", ] [[package]] @@ -1686,6 +1712,16 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "jod-thread" version = "1.0.0" @@ -1755,6 +1791,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "libbz2-rs-sys" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34b357333733e8260735ba5894eb928c02ecc69c78715f01a8019e7fa7f2db4c" + [[package]] name = "libc" version = "0.2.183" @@ -1823,6 +1865,17 @@ dependencies = [ "url", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mach2" version = "0.4.3" @@ -1930,6 +1983,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -2155,6 +2209,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "portable-atomic" version = "1.13.1" @@ -3426,6 +3486,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "similar" version = "2.7.0" @@ -3457,6 +3523,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.6.3" @@ -4973,6 +5045,15 @@ version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.8.1" @@ -5075,3 +5156,31 @@ name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index b144fe18d..cb4f0e6de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,6 +110,12 @@ wasmtime-wasi = { version = "44.0.1", default-features = false, features = ["p1" [dev-dependencies] # Runtime crate used by std.io generated-project offline smoke tests. byteorder = "1" +# Runtime crates used by std.compression generated-project offline smoke tests. +flate2 = "1" +zstd = "0.13" +bzip2 = "0.6" +xz2 = "0.1" +snap = "1" # Runtime crates used by std.hash generated-project offline smoke tests. blake2 = "0.10" blake3 = "1" diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 6018be5fb..d963392a5 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -6652,6 +6652,32 @@ def main() -> None: #[test] fn test_std_compression_surface_runs_generated_project() -> Result<(), Box> { + // Keep std.compression's generated-project dependencies in the root Cargo graph so CI fetches them before this + // smoke runs the generated project under CARGO_NET_OFFLINE. + use std::io::{Cursor, Read as _}; + + let sample = b"abc"; + let mut gzip = flate2::read::GzEncoder::new(Cursor::new(sample), flate2::Compression::new(6)); + let mut gzip_out = Vec::new(); + gzip.read_to_end(&mut gzip_out)?; + assert!(!gzip_out.is_empty()); + + let zstd_out = zstd::stream::encode_all(Cursor::new(sample), 0)?; + assert!(!zstd_out.is_empty()); + + let mut bz2 = bzip2::read::BzEncoder::new(Cursor::new(sample), bzip2::Compression::new(6)); + let mut bz2_out = Vec::new(); + bz2.read_to_end(&mut bz2_out)?; + assert!(!bz2_out.is_empty()); + + let mut lzma = xz2::read::XzEncoder::new(Cursor::new(sample), 6); + let mut lzma_out = Vec::new(); + lzma.read_to_end(&mut lzma_out)?; + assert!(!lzma_out.is_empty()); + + let mut snappy = snap::raw::Encoder::new(); + assert!(!snappy.compress_vec(sample)?.is_empty()); + let output = Command::new(incan_debug_binary()) .args(["run", "tests/fixtures/valid/std_compression_surface.incn"]) .env("CARGO_NET_OFFLINE", "true")