diff --git a/Cargo.toml b/Cargo.toml index 3afaa733..f6df4973 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,8 @@ license = "MIT OR Apache-2.0" keywords = ["tar", "tarfile", "encoding"] readme = "README.md" edition = "2021" -rust-version = "1.63" +# Primarily from zerocopy +rust-version = "1.86.0" exclude = ["tests/archives/*"] description = """ @@ -21,6 +22,7 @@ contents are never required to be entirely resident in memory all at once. [dependencies] filetime = "0.2.8" +tar-core = "0.1.0" [dev-dependencies] astral-tokio-tar = "0.5" diff --git a/src/archive.rs b/src/archive.rs index a3ae6f01..dff4b0f3 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -7,12 +7,14 @@ use std::io::{self, SeekFrom}; use std::marker; use std::path::Path; +use tar_core::parse::{Limits, ParseError, ParseEvent, Parser}; +use tar_core::SparseEntry as CoreSparseEntry; + use crate::entry::{EntryFields, EntryIo}; use crate::error::TarError; use crate::header::BLOCK_SIZE; use crate::other; -use crate::pax::*; -use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; +use crate::{Entry, Header}; /// A top-level representation of an archive file. /// @@ -48,6 +50,8 @@ struct EntriesFields<'a> { next: u64, done: bool, raw: bool, + parser: Parser, + buf: Vec, } impl Archive { @@ -223,6 +227,8 @@ impl Archive { done: false, next: 0, raw: false, + parser: new_parser(), + buf: Vec::new(), }) } @@ -292,15 +298,17 @@ impl<'a, R: Read> Iterator for Entries<'a, R> { } impl<'a> EntriesFields<'a> { - fn next_entry_raw( - &mut self, - pax_extensions: Option<&[u8]>, - ) -> io::Result>> { + /// Read a single raw entry from the archive without processing + /// extension headers (GNU long name/link, PAX). + fn next_entry_raw(&mut self) -> io::Result>> { let mut header = Header::new_old(); let mut header_pos = self.next; loop { // Seek to the start of the next header in the archive - let delta = self.next - self.archive.inner.pos.get(); + let delta = self + .next + .checked_sub(self.archive.inner.pos.get()) + .ok_or_else(|| other("archive position overflow"))?; self.skip(delta)?; // EOF is an indicator that we are at the end of the archive. @@ -334,27 +342,8 @@ impl<'a> EntriesFields<'a> { return Err(other("archive header checksum mismatch")); } - let mut pax_size: Option = None; - if let Some(pax_extensions_ref) = &pax_extensions { - pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE); - - if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) { - header.set_uid(pax_uid); - } - - if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) { - header.set_gid(pax_gid); - } - } - let file_pos = self.next; - let mut size = header.entry_size()?; - // If this exists, it must override the header size. Disagreement among - // parsers allows construction of malicious archives that appear different - // when parsed. - if let Some(pax_size) = pax_size { - size = pax_size; - } + let size = header.entry_size()?; let ret = EntryFields { size, header_pos, @@ -385,173 +374,192 @@ impl<'a> EntriesFields<'a> { Ok(Some(ret.into_entry())) } + /// Read header bytes into the buffer and feed them to the tar-core parser + /// until it emits an Entry or End event. fn next_entry(&mut self) -> io::Result>> { - if self.raw { - return self.next_entry_raw(None); - } + // Skip past any content from the previous entry that hasn't been + // consumed yet. + let delta = self + .next + .checked_sub(self.archive.inner.pos.get()) + .ok_or_else(|| other("archive position overflow"))?; + self.skip(delta)?; + + // Clear the header buffer for this round. + self.buf.clear(); - let mut gnu_longname = None; - let mut gnu_longlink = None; - let mut pax_extensions = None; - let mut processed = 0; loop { - processed += 1; - let entry = match self.next_entry_raw(pax_extensions.as_deref())? { - Some(entry) => entry, - None if processed > 1 => { - return Err(other( - "members found describing a future member \ - but no future member found", - )); + let event = self.parser.parse(&self.buf).map_err(parse_error_to_io)?; + + match event { + ParseEvent::NeedData { min_bytes } => { + let cur_len = self.buf.len(); + let new_bytes = min_bytes.checked_sub(cur_len).ok_or_else(|| { + other("parser requested fewer bytes than already buffered") + })?; + self.buf.resize(min_bytes, 0); + match try_read_all(&mut &self.archive.inner, &mut self.buf[cur_len..]) { + Ok(true) => { + self.next += new_bytes as u64; + } + Ok(false) => { + if cur_len == 0 || self.archive.inner.ignore_zeros { + return Ok(None); + } + return Err(other("unexpected EOF in archive")); + } + Err(e) => return Err(e), + } } - None => return Ok(None), - }; - - let is_recognized_header = - entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); - - if is_recognized_header && entry.header().entry_type().is_gnu_longname() { - if gnu_longname.is_some() { - return Err(other( - "two long name entries describing \ - the same member", - )); + + ParseEvent::End { consumed } => { + if self.archive.inner.ignore_zeros { + // Drain consumed zero blocks and reset the parser so + // it can parse the next concatenated archive (if any). + self.buf.drain(..consumed); + self.parser = new_parser(); + continue; + } + return Ok(None); } - gnu_longname = Some(EntryFields::from(entry).read_all()?); - continue; - } - if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { - if gnu_longlink.is_some() { - return Err(other( - "two long name entries describing \ - the same member", - )); + ParseEvent::Entry { consumed, entry } => { + let meta = EntryMeta::from_parsed(consumed, entry, None); + return self.finish_entry(meta); } - gnu_longlink = Some(EntryFields::from(entry).read_all()?); - continue; - } - if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { - if pax_extensions.is_some() { - return Err(other( - "two pax extensions entries describing \ - the same member", - )); + ParseEvent::SparseEntry { + consumed, + entry, + sparse_map, + real_size, + } => { + let meta = + EntryMeta::from_parsed(consumed, entry, Some((sparse_map, real_size))); + return self.finish_entry(meta); } - pax_extensions = Some(EntryFields::from(entry).read_all()?); - continue; - } - let mut fields = EntryFields::from(entry); - fields.long_pathname = gnu_longname; - fields.long_linkname = gnu_longlink; - fields.pax_extensions = pax_extensions; - self.parse_sparse_header(&mut fields)?; - return Ok(Some(fields.into_entry())); + ParseEvent::GlobalExtensions { consumed, .. } => { + // Global PAX headers set defaults for subsequent entries. + // tar-rs historically ignores them; consume and continue. + self.buf.drain(..consumed); + continue; + } + } } } - fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { - if !entry.header.entry_type().is_gnu_sparse() { - return Ok(()); - } - let gnu = match entry.header.as_gnu() { - Some(gnu) => gnu, - None => return Err(other("sparse entry type listed but not GNU header")), + /// Finish constructing an entry from its owned metadata. + /// + /// `EntryMeta::from_parsed` already consumed all borrowed data from + /// the `ParsedEntry`, so this method can freely borrow `&mut self`. + fn finish_entry(&mut self, meta: EntryMeta) -> io::Result>> { + let header_pos = self + .next + .checked_sub(meta.consumed as u64) + .ok_or_else(|| other("archive position overflow"))?; + let file_pos = self.next; + + // Build the I/O chain. + let (data, size) = if let Some((sparse_map, real_size)) = meta.sparse { + let data = Self::build_sparse_io( + &self.archive.inner, + &sparse_map, + real_size, + meta.content_size, + )?; + (data, real_size) + } else { + ( + vec![EntryIo::Data((&self.archive.inner).take(meta.content_size))], + meta.content_size, + ) }; - // Sparse files are represented internally as a list of blocks that are - // read. Blocks are either a bunch of 0's or they're data from the - // underlying archive. - // - // Blocks of a sparse file are described by the `GnuSparseHeader` - // structure, some of which are contained in `GnuHeader` but some of - // which may also be contained after the first header in further - // headers. - // - // We read off all the blocks here and use the `add_block` function to - // incrementally add them to the list of I/O block (in `entry.data`). - // The `add_block` function also validates that each chunk comes after - // the previous, we don't overrun the end of the file, and each block is - // aligned to a 512-byte boundary in the archive itself. - // - // At the end we verify that the sparse file size (`Header::size`) is - // the same as the current offset (described by the list of blocks) as - // well as the amount of data read equals the size of the entry - // (`Header::entry_size`). - entry.data.truncate(0); - - let mut cur = 0; - let mut remaining = entry.size; - { - let data = &mut entry.data; - let reader = &self.archive.inner; - let size = entry.size; - let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { - if block.is_empty() { - return Ok(()); - } - let off = block.offset()?; - let len = block.length()?; - if len != 0 && (size - remaining) % BLOCK_SIZE != 0 { - return Err(other( - "previous block in sparse file was not \ - aligned to 512-byte boundary", - )); - } else if off < cur { - return Err(other( - "out of order or overlapping sparse \ - blocks", - )); - } else if cur < off { - let block = io::repeat(0).take(off - cur); - data.push(EntryIo::Pad(block)); - } - cur = off - .checked_add(len) - .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; - remaining = remaining.checked_sub(len).ok_or_else(|| { - other( - "sparse file consumed more data than the header \ - listed", - ) - })?; - data.push(EntryIo::Data(reader.take(len))); - Ok(()) - }; - for block in gnu.sparse.iter() { - add_block(block)? - } - if gnu.is_extended() { - let mut ext = GnuExtSparseHeader::new(); - ext.isextended[0] = 1; - while ext.is_extended() { - if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { - return Err(other("failed to read extension")); - } + self.next = file_pos + .checked_add(meta.padded_content_size) + .ok_or_else(|| other("size overflow"))?; - self.next += BLOCK_SIZE; - for block in ext.sparse.iter() { - add_block(block)?; - } - } + let fields = EntryFields { + size, + header_pos, + file_pos, + data, + header: meta.header, + long_pathname: meta.long_pathname, + long_linkname: meta.long_linkname, + pax_extensions: meta.pax_extensions, + mask: self.archive.inner.mask, + unpack_xattrs: self.archive.inner.unpack_xattrs, + preserve_permissions: self.archive.inner.preserve_permissions, + preserve_mtime: self.archive.inner.preserve_mtime, + overwrite: self.archive.inner.overwrite, + preserve_ownerships: self.archive.inner.preserve_ownerships, + }; + + Ok(Some(fields.into_entry())) + } + + /// Build the sparse I/O chain from a tar-core sparse map. + /// + /// Interleaves zero-fill padding (`EntryIo::Pad`) for gaps and data + /// reads (`EntryIo::Data`) for sparse chunks, producing a reader that + /// yields the logical file content. + fn build_sparse_io( + reader: &'a ArchiveInner, + sparse_map: &[CoreSparseEntry], + real_size: u64, + on_disk_size: u64, + ) -> io::Result>> { + let mut data = Vec::new(); + let mut cur = 0u64; + let mut remaining = on_disk_size; + + for block in sparse_map { + let off = block.offset; + let len = block.length; + + if len != 0 && (on_disk_size - remaining) % BLOCK_SIZE != 0 { + return Err(other( + "previous block in sparse file was not \ + aligned to 512-byte boundary", + )); + } + if off < cur { + return Err(other( + "out of order or overlapping sparse \ + blocks", + )); } + if cur < off { + data.push(EntryIo::Pad(io::repeat(0).take(off - cur))); + } + cur = off + .checked_add(len) + .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; + remaining = remaining.checked_sub(len).ok_or_else(|| { + other( + "sparse file consumed more data than the header \ + listed", + ) + })?; + data.push(EntryIo::Data(reader.take(len))); } - if cur != gnu.real_size()? { + + if cur != real_size { return Err(other( "mismatch in sparse file chunks and \ size in header", )); } - entry.size = cur; if remaining > 0 { return Err(other( "mismatch in sparse file chunks and \ entry size in header", )); } - Ok(()) + + Ok(data) } fn skip(&mut self, mut amt: u64) -> io::Result<()> { @@ -580,18 +588,22 @@ impl<'a> Iterator for EntriesFields<'a> { fn next(&mut self) -> Option>> { if self.done { - None + return None; + } + let result = if self.raw { + self.next_entry_raw() } else { - match self.next_entry() { - Ok(Some(e)) => Some(Ok(e)), - Ok(None) => { - self.done = true; - None - } - Err(e) => { - self.done = true; - Some(Err(e)) - } + self.next_entry() + }; + match result { + Ok(Some(e)) => Some(Ok(e)), + Ok(None) => { + self.done = true; + None + } + Err(e) => { + self.done = true; + Some(Err(e)) } } } @@ -613,6 +625,84 @@ impl Seek for &ArchiveInner { } } +/// Owned entry metadata extracted from a borrowed `ParsedEntry`. +/// +/// Consuming the `ParsedEntry` fields into owned data releases the borrow +/// on the parser's input buffer, letting `finish_entry` take `&mut self` +/// to build the I/O chain and update stream positions. +struct EntryMeta { + consumed: usize, + header: Header, + content_size: u64, + padded_content_size: u64, + long_pathname: Option>, + long_linkname: Option>, + pax_extensions: Option>, + sparse: Option<(Vec, u64)>, +} + +impl EntryMeta { + fn from_parsed( + consumed: usize, + entry: tar_core::parse::ParsedEntry<'_>, + sparse: Option<(Vec, u64)>, + ) -> Self { + let mut header = Header::new_old(); + header + .as_mut_bytes() + .copy_from_slice(entry.header.as_bytes()); + header.set_uid(entry.uid); + header.set_gid(entry.gid); + + // Extract sizes before moving fields out of entry. + let content_size = entry.size; + let padded_content_size = entry.padded_size(); + + let long_pathname = if entry.path.as_ref() != entry.header.path_bytes() { + Some(entry.path.into_owned()) + } else { + None + }; + + let long_linkname = entry.link_target.and_then(|lt| { + let header_link = entry.header.link_name_bytes(); + if lt.as_ref() != header_link { + Some(lt.into_owned()) + } else { + None + } + }); + + Self { + consumed, + header, + content_size, + padded_content_size, + long_pathname, + long_linkname, + pax_extensions: entry.pax.map(|b| b.to_vec()), + sparse, + } + } +} + +/// Create a new tar-core parser with default limits. +fn new_parser() -> Parser { + let mut parser = Parser::new(Limits::default()); + parser.set_allow_empty_path(true); + parser +} + +/// Map tar-core parse errors to io::Error with messages compatible with +/// existing tar-rs error strings. +fn parse_error_to_io(e: ParseError) -> io::Error { + let msg = match e { + ParseError::InvalidSize(_) => "size overflow".to_string(), + err => err.to_string(), + }; + io::Error::new(io::ErrorKind::InvalidData, msg) +} + /// Try to fill the buffer from the reader. /// /// If the reader reaches its end before filling the buffer at all, returns `false`. diff --git a/src/header.rs b/src/header.rs index 8413d10f..92667072 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,3 +1,13 @@ +//! Tar header types with I/O integration. +//! +//! This module provides the [`Header`] type and format-specific header types +//! ([`GnuHeader`], [`UstarHeader`], [`OldHeader`]) that wrap raw 512-byte +//! blocks with convenient accessor methods and filesystem metadata integration. +//! +//! The underlying sans-IO header parsing and format detection is provided by +//! the [`tar_core`] crate; this module adds I/O error handling, path +//! manipulation, and metadata filling on top of that foundation. + #[cfg(all(unix, not(target_arch = "wasm32")))] use std::os::unix::prelude::*; #[cfg(windows)] @@ -152,14 +162,10 @@ impl Header { /// extensions such as long path names, long link names, and setting the /// atime/ctime metadata attributes of files. pub fn new_gnu() -> Header { + let core = tar_core::Header::new_gnu(); let mut header = Header { - bytes: [0; BLOCK_SIZE as usize], + bytes: *core.as_bytes(), }; - unsafe { - let gnu = cast_mut::<_, GnuHeader>(&mut header); - gnu.magic = *b"ustar "; - gnu.version = *b" \0"; - } header.set_mtime(0); header } @@ -172,14 +178,10 @@ impl Header { /// /// UStar is also the basis used for pax archives. pub fn new_ustar() -> Header { + let core = tar_core::Header::new_ustar(); let mut header = Header { - bytes: [0; BLOCK_SIZE as usize], + bytes: *core.as_bytes(), }; - unsafe { - let gnu = cast_mut::<_, UstarHeader>(&mut header); - gnu.magic = *b"ustar\0"; - gnu.version = *b"00"; - } header.set_mtime(0); header } @@ -191,21 +193,27 @@ impl Header { /// format limits the path name limit and isn't able to contain extra /// metadata like atime/ctime. pub fn new_old() -> Header { + let core = tar_core::Header::new_old(); let mut header = Header { - bytes: [0; BLOCK_SIZE as usize], + bytes: *core.as_bytes(), }; header.set_mtime(0); header } fn is_ustar(&self) -> bool { - let ustar = unsafe { cast::<_, UstarHeader>(self) }; - ustar.magic[..] == b"ustar\0"[..] && ustar.version[..] == b"00"[..] + self.as_core().is_ustar() } fn is_gnu(&self) -> bool { - let ustar = unsafe { cast::<_, UstarHeader>(self) }; - ustar.magic[..] == b"ustar "[..] && ustar.version[..] == b" \0"[..] + self.as_core().is_gnu() + } + + /// Returns a reference to the underlying `tar_core::Header`. + /// + /// This is a zero-copy cast since both types are `[u8; 512]`. + fn as_core(&self) -> &tar_core::Header { + tar_core::Header::from_bytes(&self.bytes) } /// View this archive header as a raw "old" archive header. diff --git a/src/pax.rs b/src/pax.rs index c62e5f47..9942f623 100644 --- a/src/pax.rs +++ b/src/pax.rs @@ -1,4 +1,6 @@ -#![allow(dead_code)] +// Many PAX constants are kept for completeness even though they aren't +// currently referenced in tar-rs itself after the tar-core migration. +#![allow(dead_code, unused_imports)] use std::io; use std::io::Write; use std::slice; @@ -7,33 +9,35 @@ use std::str; use crate::other; // Keywords for PAX extended header records. +// The canonical definitions live in tar-core; we re-export or alias them here +// to keep this crate's internal references working. pub const PAX_NONE: &str = ""; // Indicates that no PAX key is suitable -pub const PAX_PATH: &str = "path"; -pub const PAX_LINKPATH: &str = "linkpath"; -pub const PAX_SIZE: &str = "size"; -pub const PAX_UID: &str = "uid"; -pub const PAX_GID: &str = "gid"; -pub const PAX_UNAME: &str = "uname"; -pub const PAX_GNAME: &str = "gname"; -pub const PAX_MTIME: &str = "mtime"; -pub const PAX_ATIME: &str = "atime"; -pub const PAX_CTIME: &str = "ctime"; // Removed from later revision of PAX spec, but was valid +pub use tar_core::PAX_ATIME; +pub use tar_core::PAX_CTIME; // Removed from later revision of PAX spec, but was valid +pub use tar_core::PAX_GID; +pub use tar_core::PAX_GNAME; +pub use tar_core::PAX_LINKPATH; +pub use tar_core::PAX_MTIME; +pub use tar_core::PAX_PATH; +pub use tar_core::PAX_SIZE; +pub use tar_core::PAX_UID; +pub use tar_core::PAX_UNAME; pub const PAX_CHARSET: &str = "charset"; // Currently unused pub const PAX_COMMENT: &str = "comment"; // Currently unused -pub const PAX_SCHILYXATTR: &str = "SCHILY.xattr."; +pub const PAX_SCHILYXATTR: &str = tar_core::PAX_SCHILY_XATTR; // Keywords for GNU sparse files in a PAX extended header. -pub const PAX_GNUSPARSE: &str = "GNU.sparse."; -pub const PAX_GNUSPARSENUMBLOCKS: &str = "GNU.sparse.numblocks"; -pub const PAX_GNUSPARSEOFFSET: &str = "GNU.sparse.offset"; -pub const PAX_GNUSPARSENUMBYTES: &str = "GNU.sparse.numbytes"; -pub const PAX_GNUSPARSEMAP: &str = "GNU.sparse.map"; -pub const PAX_GNUSPARSENAME: &str = "GNU.sparse.name"; -pub const PAX_GNUSPARSEMAJOR: &str = "GNU.sparse.major"; -pub const PAX_GNUSPARSEMINOR: &str = "GNU.sparse.minor"; -pub const PAX_GNUSPARSESIZE: &str = "GNU.sparse.size"; -pub const PAX_GNUSPARSEREALSIZE: &str = "GNU.sparse.realsize"; +pub const PAX_GNUSPARSE: &str = tar_core::PAX_GNU_SPARSE; +pub const PAX_GNUSPARSENUMBLOCKS: &str = tar_core::PAX_GNU_SPARSE_NUMBLOCKS; +pub const PAX_GNUSPARSEOFFSET: &str = tar_core::PAX_GNU_SPARSE_OFFSET; +pub const PAX_GNUSPARSENUMBYTES: &str = tar_core::PAX_GNU_SPARSE_NUMBYTES; +pub const PAX_GNUSPARSEMAP: &str = tar_core::PAX_GNU_SPARSE_MAP; +pub const PAX_GNUSPARSENAME: &str = tar_core::PAX_GNU_SPARSE_NAME; +pub const PAX_GNUSPARSEMAJOR: &str = tar_core::PAX_GNU_SPARSE_MAJOR; +pub const PAX_GNUSPARSEMINOR: &str = tar_core::PAX_GNU_SPARSE_MINOR; +pub const PAX_GNUSPARSESIZE: &str = tar_core::PAX_GNU_SPARSE_SIZE; +pub const PAX_GNUSPARSEREALSIZE: &str = tar_core::PAX_GNU_SPARSE_REALSIZE; /// An iterator over the pax extensions in an archive entry. /// diff --git a/tests/all.rs b/tests/all.rs index 39f0bac0..fdebeccc 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -1364,6 +1364,64 @@ fn sparse_with_trailing() { assert_eq!(&s[0x100_000..], "1MB through\n"); } +/// Test PAX sparse v1.0 format extraction. +/// +/// Verifies that: +/// - The file is extracted with its real name from GNU.sparse.name, not the +/// synthetic `GNUSparseFile.0/` path (issue #295) +/// - The file has the correct real size from GNU.sparse.realsize, not the +/// on-disk data size (issue #286) +/// - The sparse regions are correctly zero-filled +/// +/// Test archive from PR #298 by ncihnegn. +#[test] +fn pax_sparse() { + let rdr = Cursor::new(tar!("pax_sparse.tar")); + let mut ar = Archive::new(rdr); + let td = TempBuilder::new().prefix("tar-rs").tempdir().unwrap(); + ar.unpack(td.path()).unwrap(); + + // The file should be extracted as "sparse_begin.txt", NOT under + // "GNUSparseFile.0/sparse_begin.txt". + assert!( + !td.path().join("GNUSparseFile.0").exists(), + "GNUSparseFile.0 directory should not exist" + ); + + let mut s = String::new(); + File::open(td.path().join("sparse_begin.txt")) + .unwrap() + .read_to_string(&mut s) + .unwrap(); + + // Real size is 8096 bytes per GNU.sparse.realsize + assert_eq!(s.len(), 8096); + // First 5 bytes are "test\n" + assert_eq!(&s[..5], "test\n"); + // The rest is zero-filled (sparse hole) + assert!(s[5..].chars().all(|x| x == '\u{0}')); +} + +/// Test PAX sparse v1.0 format via the entries API. +/// +/// Verifies the entry-level path and size are correct without unpacking. +/// Test archive from PR #298 by ncihnegn. +#[test] +fn pax_sparse_entries() { + let rdr = Cursor::new(tar!("pax_sparse.tar")); + let mut ar = Archive::new(rdr); + let mut entries = ar.entries().unwrap(); + + let entry = entries.next().unwrap().unwrap(); + // Path should be the real name, not GNUSparseFile.0/sparse_begin.txt + assert_eq!(entry.path().unwrap().to_str().unwrap(), "sparse_begin.txt"); + // Size should be the real size (8096), not the on-disk data size + assert_eq!(entry.size(), 8096); + + // No more entries + assert!(entries.next().is_none()); +} + #[test] #[allow(clippy::option_map_unit_fn)] fn writing_sparse() { diff --git a/tests/archives/pax_sparse.tar b/tests/archives/pax_sparse.tar new file mode 100644 index 00000000..d74bef7b Binary files /dev/null and b/tests/archives/pax_sparse.tar differ