From 704b37e613dcf821244cf12a15601340e6efbbad Mon Sep 17 00:00:00 2001 From: Munif Tanjim Date: Thu, 1 Jan 2026 17:59:57 +0600 Subject: [PATCH 1/3] feat(iter): add iter support --- iter.go | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ volume.go | 18 +++-- 2 files changed, 223 insertions(+), 6 deletions(-) create mode 100644 iter.go diff --git a/iter.go b/iter.go new file mode 100644 index 0000000..6df555a --- /dev/null +++ b/iter.go @@ -0,0 +1,211 @@ +package rardecode + +import "io" + +// Iter provides iteration over files in a RAR archive. +// It follows the scanner pattern: call Next() in a loop, then check Err(). +type Iter struct { + f archiveFile + header *FileHeader + err error + headersOnly bool + skipped bool +} + +// Next advances to the next file in the archive. +// It returns true if there is a file to process, false if iteration +// is complete or an error occurred. When Next returns false, call Err +// to check for errors. +// +// If the previous file's content was not fully read (via Read, WriteTo, +// or Skip), Next will automatically skip remaining content. For solid +// archives, this requires decompressing the skipped data internally. +func (i *Iter) Next() bool { + if i.err != nil { + return false + } + + if i.header != nil && !i.skipped { + if err := i.skipContent(); err != nil { + i.err = err + return false + } + } + + blocks, err := i.f.nextFile() + if err != nil { + if err != io.EOF { + i.err = err + } + return false + } + + // Prepare header + blocks.mu.RLock() + header := blocks.blocks[0].FileHeader + for _, block := range blocks.blocks[1:] { + header.PackedSize += block.PackedSize + } + blocks.mu.RUnlock() + + i.header = &header + i.skipped = false + + if !i.headersOnly { + i.f, err = i.f.newArchiveFile(blocks) + if err != nil { + i.err = err + return false + } + } else { + i.skipped = true + } + + return true +} + +// Header returns the FileHeader for the current file. +// It returns nil if Next has not been called or returned false. +func (i *Iter) Header() *FileHeader { + return i.header +} + +// Read reads decompressed content from the current file. +// It implements io.Reader. +func (i *Iter) Read(p []byte) (int, error) { + if i.skipped { + return 0, io.EOF + } + n, err := i.f.Read(p) + if err == io.EOF { + i.skipped = true + } + return n, err +} + +// ReadByte reads and returns a single byte from the current file. +// It implements io.ByteReader. +func (i *Iter) ReadByte() (byte, error) { + if i.skipped { + return 0, io.EOF + } + b, err := i.f.ReadByte() + if err == io.EOF { + i.skipped = true + } + return b, err +} + +// WriteTo writes all remaining content of the current file to w. +// It implements io.WriterTo for efficient copying. +func (i *Iter) WriteTo(w io.Writer) (int64, error) { + if i.skipped { + return 0, nil + } + n, err := i.f.WriteTo(w) + if err == nil || err == io.EOF { + i.skipped = true + err = nil + } + return n, err +} + +// Skip marks the current file's content as consumed without reading it. +// +// For non-solid archives, Skip avoids decompression: +// - Seekable readers (e.g., *os.File, *bytes.Reader): Uses Seek to skip packed data (most efficient) +// - Non-seekable readers (e.g., net.Conn, io.Pipe): Reads and discards packed bytes (no decompression) +// +// For solid archives: +// - Must decompress all content to maintain decoder state for subsequent files +// +// Skip is automatically called by Next() if content was not fully consumed. +// Calling Skip explicitly is useful to document intent. +func (i *Iter) Skip() error { + if i.skipped { + return nil + } + return i.skipContent() +} + +func (i *Iter) skipContent() error { + // For non-solid files, we don't need to decompress. + // The next call to nextFile() will efficiently skip packed data + // via Seek (for file-based archives) or Discard (for streams). + if i.header != nil && !i.header.Solid { + i.skipped = true + return nil + } + + // For solid files, we must decompress to maintain decoder state + // (dictionary and decode tables carry over to subsequent files). + _, err := io.Copy(io.Discard, i.f) + if err == nil || err == io.EOF { + i.skipped = true + err = nil + } + return err +} + +// Err returns the first error encountered during iteration. +// If iteration completed successfully (io.EOF), Err returns nil. +func (i *Iter) Err() error { + return i.err +} + +func newIter(v volume, opts *options) Iter { + pr := newPackedFileReader(v, opts) + return Iter{ + f: pr, + headersOnly: opts.iterHeadersOnly, + } +} + +// NewIter creates an Iter reading from r. +// NewIter only supports single volume archives. +// Multi-volume archives must use OpenIter. +func NewIter(r io.Reader, opts ...Option) (*Iter, error) { + options := getOptions(opts) + v, err := newVolume(r, options, 0) + if err != nil { + return nil, err + } + iter := newIter(v, options) + return &iter, nil +} + +// IterCloser is an Iter that must be closed when done. +type IterCloser struct { + Iter + closer io.Closer + vm *volumeManager +} + +// Close closes the archive file. +// It must be called when done with the iterator. +func (ic *IterCloser) Close() error { + return ic.closer.Close() +} + +// Volumes returns the volume filenames that have been used in decoding the archive +// up to this point. This will include the current open volume if the archive is still +// being processed. +func (ic *IterCloser) Volumes() []string { + if ic.vm == nil { + return nil + } + return ic.vm.Files() +} + +// OpenIter opens a RAR archive file and returns an IterCloser. +// The caller must call Close when finished. +func OpenIter(name string, opts ...Option) (*IterCloser, error) { + options := getOptions(opts) + v, err := openVolume(name, options) + if err != nil { + return nil, err + } + ic := &IterCloser{vm: v.vm, closer: v} + ic.Iter = newIter(v, options) + return ic, nil +} diff --git a/volume.go b/volume.go index df60de0..335baaa 100644 --- a/volume.go +++ b/volume.go @@ -32,12 +32,13 @@ func (fs osFS) Open(name string) (fs.File, error) { } type options struct { - bsize int // size to be use for bufio.Reader - maxDictSize int64 // max dictionary size - fs fs.FS // filesystem to use to open files - pass *string // password for encrypted volumes - skipCheck bool - openCheck bool + bsize int // size to be use for bufio.Reader + maxDictSize int64 // max dictionary size + fs fs.FS // filesystem to use to open files + pass *string // password for encrypted volumes + skipCheck bool + openCheck bool + iterHeadersOnly bool // skip file contents automatically (for iteration) } // An Option is used for optional archive extraction settings. @@ -72,6 +73,11 @@ func SkipCheck(o *options) { o.skipCheck = true } // OpenFSCheck flags the archive files to be checked on Open or List. func OpenFSCheck(o *options) { o.openCheck = true } +// IterHeadersOnly configures the iterator to skip file contents automatically. +// This is more efficient than manually calling Skip() after each Next() +// as it can avoid setting up decompression readers. +func IterHeadersOnly(o *options) { o.iterHeadersOnly = true } + func getOptions(opts []Option) *options { opt := &options{ fs: defaultFS, From 27719ca7e759a175b8a2a375e4e03bfb3af31d45 Mon Sep 17 00:00:00 2001 From: Munif Tanjim Date: Fri, 2 Jan 2026 14:42:00 +0600 Subject: [PATCH 2/3] fix(reader): fix nextFile block iteration --- reader.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/reader.go b/reader.go index f71450d..c8eaab9 100644 --- a/reader.go +++ b/reader.go @@ -256,14 +256,6 @@ func (f *packedFileReader) nextBlock() error { // next advances to the next packed file in the RAR archive. func (f *packedFileReader) nextFile() (*fileBlockList, error) { - // skip to last block in current file - var err error - for err == nil { - err = f.nextBlock() - } - if err != io.EOF { - return nil, err - } h, err := f.v.nextBlock() // get next file block if err != nil { if err == errVolumeOrArchiveEnd { @@ -279,6 +271,16 @@ func (f *packedFileReader) nextFile() (*fileBlockList, error) { if err != nil { return nil, err } + + for { + if err := f.nextBlock(); err != nil { + if err == io.EOF { + break + } + return nil, err + } + } + return blocks, nil } From 6cb51fd28439cff6c2662a9d90c458ce017dde42 Mon Sep 17 00:00:00 2001 From: Munif Tanjim Date: Sun, 4 Jan 2026 10:13:58 +0600 Subject: [PATCH 3/3] feat(iter): add split block support --- reader.go | 14 ++++++++------ volume.go | 5 +++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/reader.go b/reader.go index c8eaab9..fb83db5 100644 --- a/reader.go +++ b/reader.go @@ -263,7 +263,7 @@ func (f *packedFileReader) nextFile() (*fileBlockList, error) { } return nil, err } - if !h.first { + if !h.first && !f.opt.iterSplitBlocks { return nil, ErrInvalidFileBlock } blocks := newFileBlockList(h) @@ -272,12 +272,14 @@ func (f *packedFileReader) nextFile() (*fileBlockList, error) { return nil, err } - for { - if err := f.nextBlock(); err != nil { - if err == io.EOF { - break + if !f.opt.iterSplitBlocks { + for { + if err := f.nextBlock(); err != nil { + if err == io.EOF { + break + } + return nil, err } - return nil, err } } diff --git a/volume.go b/volume.go index 335baaa..4b15b49 100644 --- a/volume.go +++ b/volume.go @@ -39,6 +39,7 @@ type options struct { skipCheck bool openCheck bool iterHeadersOnly bool // skip file contents automatically (for iteration) + iterSplitBlocks bool // (for iteration) } // An Option is used for optional archive extraction settings. @@ -78,6 +79,10 @@ func OpenFSCheck(o *options) { o.openCheck = true } // as it can avoid setting up decompression readers. func IterHeadersOnly(o *options) { o.iterHeadersOnly = true } +// IterSplitBlocks configures the iterator to return each block of a split file +// separately, rather than collecting all blocks into a single file entry. +func IterSplitBlocks(o *options) { o.iterSplitBlocks = true } + func getOptions(opts []Option) *options { opt := &options{ fs: defaultFS,