From 277e57467af208710c50b2ef50b57857a4d48150 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:31:46 +0900 Subject: [PATCH 01/13] Add FormatValidator protocol, ValidationResult, and ByteReader utility Foundation for the structural validation layer that checks image header integrity beyond magic byte detection. Co-Authored-By: Claude Opus 4.6 (1M context) --- Sources/XPBCCore/FormatValidator.swift | 10 +++++++++ Sources/XPBCCore/Validators/ByteReader.swift | 23 ++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 Sources/XPBCCore/FormatValidator.swift create mode 100644 Sources/XPBCCore/Validators/ByteReader.swift diff --git a/Sources/XPBCCore/FormatValidator.swift b/Sources/XPBCCore/FormatValidator.swift new file mode 100644 index 0000000..ca595f2 --- /dev/null +++ b/Sources/XPBCCore/FormatValidator.swift @@ -0,0 +1,10 @@ +import Foundation + +public enum ValidationResult: Equatable, Sendable { + case valid + case invalid(reason: String) +} + +protocol FormatValidator: Sendable { + func validate(_ data: Data) -> ValidationResult +} diff --git a/Sources/XPBCCore/Validators/ByteReader.swift b/Sources/XPBCCore/Validators/ByteReader.swift new file mode 100644 index 0000000..e356abb --- /dev/null +++ b/Sources/XPBCCore/Validators/ByteReader.swift @@ -0,0 +1,23 @@ +import Foundation + +func readBigEndianUInt32(_ data: Data, offset: Int) -> UInt32 { + let start = data.startIndex + offset + return UInt32(data[start]) << 24 + | UInt32(data[start + 1]) << 16 + | UInt32(data[start + 2]) << 8 + | UInt32(data[start + 3]) +} + +func readLittleEndianUInt32(_ data: Data, offset: Int) -> UInt32 { + let start = data.startIndex + offset + return UInt32(data[start]) + | UInt32(data[start + 1]) << 8 + | UInt32(data[start + 2]) << 16 + | UInt32(data[start + 3]) << 24 +} + +func readLittleEndianUInt16(_ data: Data, offset: Int) -> UInt16 { + let start = data.startIndex + offset + return UInt16(data[start]) + | UInt16(data[start + 1]) << 8 +} From b6a87c35b420203cf16c199925149951638ae788 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:33:03 +0900 Subject: [PATCH 02/13] Add DataValidator engine and all format validators Structural validation layer that checks image header integrity beyond magic byte detection. Each validator performs lightweight checks: - PNG: IHDR chunk presence, width/height > 0 - JPEG: valid marker after SOI (0xC0-0xFE range) - GIF: Logical Screen Descriptor width/height > 0 - TIFF: IFD offset within valid range - BMP: DIB header size matches known valid values - WebP: VP8/VP8L/VP8X chunk header presence - HEIC/AVIF: ftyp box size sanity check - PDF: rejects files with /JS, /JavaScript, /OpenAction, /AA, /Launch Also adds validationFailed case to XPBCError. Co-Authored-By: Claude Opus 4.6 (1M context) --- Sources/XPBCCore/DataValidator.swift | 21 +++++++++++++++ .../XPBCCore/Validators/BMPValidator.swift | 19 +++++++++++++ .../XPBCCore/Validators/FtypValidator.swift | 21 +++++++++++++++ .../XPBCCore/Validators/GIFValidator.swift | 22 +++++++++++++++ .../XPBCCore/Validators/JPEGValidator.swift | 25 +++++++++++++++++ .../XPBCCore/Validators/PDFValidator.swift | 23 ++++++++++++++++ .../XPBCCore/Validators/PNGValidator.swift | 27 +++++++++++++++++++ .../XPBCCore/Validators/TIFFValidator.swift | 27 +++++++++++++++++++ .../XPBCCore/Validators/WebPValidator.swift | 24 +++++++++++++++++ Sources/XPBCCore/XPBCError.swift | 3 +++ 10 files changed, 212 insertions(+) create mode 100644 Sources/XPBCCore/DataValidator.swift create mode 100644 Sources/XPBCCore/Validators/BMPValidator.swift create mode 100644 Sources/XPBCCore/Validators/FtypValidator.swift create mode 100644 Sources/XPBCCore/Validators/GIFValidator.swift create mode 100644 Sources/XPBCCore/Validators/JPEGValidator.swift create mode 100644 Sources/XPBCCore/Validators/PDFValidator.swift create mode 100644 Sources/XPBCCore/Validators/PNGValidator.swift create mode 100644 Sources/XPBCCore/Validators/TIFFValidator.swift create mode 100644 Sources/XPBCCore/Validators/WebPValidator.swift diff --git a/Sources/XPBCCore/DataValidator.swift b/Sources/XPBCCore/DataValidator.swift new file mode 100644 index 0000000..8b274b5 --- /dev/null +++ b/Sources/XPBCCore/DataValidator.swift @@ -0,0 +1,21 @@ +import Foundation + +public struct DataValidator: Sendable { + static let validators: [DataType: FormatValidator] = [ + .png: PNGValidator(), + .jpeg: JPEGValidator(), + .gif: GIFValidator(), + .tiff: TIFFValidator(), + .bmp: BMPValidator(), + .webp: WebPValidator(), + .heic: FtypValidator(), + .avif: FtypValidator(), + .pdf: PDFValidator(), + ] + + public static func validate(_ data: Data, as type: DataType) -> ValidationResult { + guard type != .text else { return .valid } + guard let validator = validators[type] else { return .valid } + return validator.validate(data) + } +} diff --git a/Sources/XPBCCore/Validators/BMPValidator.swift b/Sources/XPBCCore/Validators/BMPValidator.swift new file mode 100644 index 0000000..347f64a --- /dev/null +++ b/Sources/XPBCCore/Validators/BMPValidator.swift @@ -0,0 +1,19 @@ +import Foundation + +struct BMPValidator: FormatValidator { + private static let validDIBSizes: Set = [12, 40, 52, 56, 108, 124] + + func validate(_ data: Data) -> ValidationResult { + // BMP file header is 14 bytes, then DIB header starts with its size (LE u32) + guard data.count >= 18 else { + return .invalid(reason: "too short for DIB header size (need >= 18 bytes)") + } + + let dibSize = readLittleEndianUInt32(data, offset: 14) + guard Self.validDIBSizes.contains(dibSize) else { + return .invalid(reason: "invalid DIB header size \(dibSize)") + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/FtypValidator.swift b/Sources/XPBCCore/Validators/FtypValidator.swift new file mode 100644 index 0000000..9586335 --- /dev/null +++ b/Sources/XPBCCore/Validators/FtypValidator.swift @@ -0,0 +1,21 @@ +import Foundation + +struct FtypValidator: FormatValidator { + func validate(_ data: Data) -> ValidationResult { + // ftyp box: first 4 bytes = box size (big-endian UInt32) + guard data.count >= 8 else { + return .invalid(reason: "too short for ftyp box (need >= 8 bytes)") + } + + let boxSize = readBigEndianUInt32(data, offset: 0) + guard boxSize >= 8 else { + return .invalid(reason: "ftyp box size \(boxSize) is less than minimum (8)") + } + + guard boxSize <= data.count else { + return .invalid(reason: "ftyp box size \(boxSize) exceeds data size \(data.count)") + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/GIFValidator.swift b/Sources/XPBCCore/Validators/GIFValidator.swift new file mode 100644 index 0000000..4248ff4 --- /dev/null +++ b/Sources/XPBCCore/Validators/GIFValidator.swift @@ -0,0 +1,22 @@ +import Foundation + +struct GIFValidator: FormatValidator { + func validate(_ data: Data) -> ValidationResult { + // Logical Screen Descriptor at offset 6: width (LE u16) + height (LE u16) + guard data.count >= 10 else { + return .invalid(reason: "too short for Logical Screen Descriptor (need >= 10 bytes)") + } + + let width = readLittleEndianUInt16(data, offset: 6) + guard width > 0 else { + return .invalid(reason: "logical screen width is 0") + } + + let height = readLittleEndianUInt16(data, offset: 8) + guard height > 0 else { + return .invalid(reason: "logical screen height is 0") + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/JPEGValidator.swift b/Sources/XPBCCore/Validators/JPEGValidator.swift new file mode 100644 index 0000000..8a69a2f --- /dev/null +++ b/Sources/XPBCCore/Validators/JPEGValidator.swift @@ -0,0 +1,25 @@ +import Foundation + +struct JPEGValidator: FormatValidator { + func validate(_ data: Data) -> ValidationResult { + // After SOI (FF D8), next should be FF xx where xx in 0xC0...0xFE + guard data.count >= 4 else { + return .invalid(reason: "too short for marker after SOI (need >= 4 bytes)") + } + + guard data[data.startIndex + 2] == 0xFF else { + return .invalid( + reason: "expected 0xFF at offset 2, got 0x\(String(format: "%02X", data[data.startIndex + 2]))" + ) + } + + let marker = data[data.startIndex + 3] + guard (0xC0...0xFE).contains(marker) else { + return .invalid( + reason: "invalid marker 0xFF\(String(format: "%02X", marker)) at offset 2" + ) + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/PDFValidator.swift b/Sources/XPBCCore/Validators/PDFValidator.swift new file mode 100644 index 0000000..da9272f --- /dev/null +++ b/Sources/XPBCCore/Validators/PDFValidator.swift @@ -0,0 +1,23 @@ +import Foundation + +struct PDFValidator: FormatValidator { + private static let dangerousKeywords: [String] = [ + "/JS", "/JavaScript", "/OpenAction", "/AA", "/Launch", + ] + + func validate(_ data: Data) -> ValidationResult { + guard let content = String(data: data, encoding: .ascii) + ?? String(data: data, encoding: .isoLatin1) + else { + return .valid + } + + for keyword in Self.dangerousKeywords { + if content.contains(keyword) { + return .invalid(reason: "contains potentially dangerous keyword '\(keyword)'") + } + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/PNGValidator.swift b/Sources/XPBCCore/Validators/PNGValidator.swift new file mode 100644 index 0000000..5ad24a2 --- /dev/null +++ b/Sources/XPBCCore/Validators/PNGValidator.swift @@ -0,0 +1,27 @@ +import Foundation + +struct PNGValidator: FormatValidator { + func validate(_ data: Data) -> ValidationResult { + // PNG: 8-byte signature + IHDR chunk (4-byte length + 4-byte "IHDR" + 13-byte data) + guard data.count >= 29 else { + return .invalid(reason: "too short for IHDR chunk (need >= 29 bytes, got \(data.count))") + } + + let ihdr: [UInt8] = [0x49, 0x48, 0x44, 0x52] + guard data[data.startIndex + 12.. 0 else { + return .invalid(reason: "width is 0") + } + + let height = readBigEndianUInt32(data, offset: 20) + guard height > 0 else { + return .invalid(reason: "height is 0") + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/TIFFValidator.swift b/Sources/XPBCCore/Validators/TIFFValidator.swift new file mode 100644 index 0000000..2532f3b --- /dev/null +++ b/Sources/XPBCCore/Validators/TIFFValidator.swift @@ -0,0 +1,27 @@ +import Foundation + +struct TIFFValidator: FormatValidator { + func validate(_ data: Data) -> ValidationResult { + guard data.count >= 8 else { + return .invalid(reason: "too short for IFD offset (need >= 8 bytes)") + } + + let isLittleEndian = data[data.startIndex] == 0x49 // 'I' + let ifdOffset: UInt32 + if isLittleEndian { + ifdOffset = readLittleEndianUInt32(data, offset: 4) + } else { + ifdOffset = readBigEndianUInt32(data, offset: 4) + } + + guard ifdOffset >= 8 else { + return .invalid(reason: "IFD offset \(ifdOffset) is less than minimum (8)") + } + + guard ifdOffset < data.count else { + return .invalid(reason: "IFD offset \(ifdOffset) exceeds data size \(data.count)") + } + + return .valid + } +} diff --git a/Sources/XPBCCore/Validators/WebPValidator.swift b/Sources/XPBCCore/Validators/WebPValidator.swift new file mode 100644 index 0000000..7ace922 --- /dev/null +++ b/Sources/XPBCCore/Validators/WebPValidator.swift @@ -0,0 +1,24 @@ +import Foundation + +struct WebPValidator: FormatValidator { + private static let vp8: [UInt8] = [0x56, 0x50, 0x38, 0x20] // "VP8 " + private static let vp8l: [UInt8] = [0x56, 0x50, 0x38, 0x4C] // "VP8L" + private static let vp8x: [UInt8] = [0x56, 0x50, 0x38, 0x58] // "VP8X" + + func validate(_ data: Data) -> ValidationResult { + guard data.count >= 16 else { + return .invalid(reason: "too short for chunk header (need >= 16 bytes)") + } + + let chunkID = data[data.startIndex + 12.. Date: Sun, 22 Mar 2026 00:34:16 +0900 Subject: [PATCH 03/13] Add DataValidator tests (39 test cases) Covers all 8 format validators plus text passthrough: - PNG: valid, too short, missing IHDR, zero width/height - JPEG: valid, no marker prefix, invalid range, too short - GIF: valid, zero width/height, too short - TIFF: valid LE/BE, offset too small/exceeds data, too short - BMP: valid DIB sizes (40, 124), invalid size, too short - WebP: VP8/VP8L/VP8X valid, unknown chunk, too short - HEIC/AVIF: valid, box size too small/exceeds data - PDF: valid, /JS, /JavaScript, /OpenAction, /AA, /Launch - Text: always valid Co-Authored-By: Claude Opus 4.6 (1M context) --- Tests/XPBCCoreTests/DataValidatorTests.swift | 283 +++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 Tests/XPBCCoreTests/DataValidatorTests.swift diff --git a/Tests/XPBCCoreTests/DataValidatorTests.swift b/Tests/XPBCCoreTests/DataValidatorTests.swift new file mode 100644 index 0000000..3814bfa --- /dev/null +++ b/Tests/XPBCCoreTests/DataValidatorTests.swift @@ -0,0 +1,283 @@ +import Testing +import Foundation +@testable import XPBCCore + +struct DataValidatorTests { + // MARK: - PNG Validation + + @Test func validPNG_passes() { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) // signature + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) // IHDR length = 13 + data.append(contentsOf: [0x49, 0x48, 0x44, 0x52]) // "IHDR" + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) // width = 1 + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) // height = 1 + data.append(contentsOf: [0x08, 0x02, 0x00, 0x00, 0x00]) // bit depth, color type, etc. + #expect(DataValidator.validate(data, as: .png) == .valid) + } + + @Test func png_tooShort_fails() { + let data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00]) + #expect(DataValidator.validate(data, as: .png) != .valid) + } + + @Test func png_missingIHDR_fails() { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) + data.append(contentsOf: [0x74, 0x45, 0x58, 0x74]) // "tEXt" instead of "IHDR" + data.append(Data(repeating: 0x01, count: 13)) + #expect(DataValidator.validate(data, as: .png) != .valid) + } + + @Test func png_zeroWidth_fails() { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) + data.append(contentsOf: [0x49, 0x48, 0x44, 0x52]) // "IHDR" + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) // width = 0 + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) // height = 1 + data.append(contentsOf: [0x08, 0x02, 0x00, 0x00, 0x00]) + #expect(DataValidator.validate(data, as: .png) != .valid) + } + + @Test func png_zeroHeight_fails() { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) + data.append(contentsOf: [0x49, 0x48, 0x44, 0x52]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) // width = 1 + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) // height = 0 + data.append(contentsOf: [0x08, 0x02, 0x00, 0x00, 0x00]) + #expect(DataValidator.validate(data, as: .png) != .valid) + } + + // MARK: - JPEG Validation + + @Test func validJPEG_passes() { + let data = Data([0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10]) + #expect(DataValidator.validate(data, as: .jpeg) == .valid) + } + + @Test func jpeg_noMarkerPrefix_fails() { + let data = Data([0xFF, 0xD8, 0x00, 0xE0]) + #expect(DataValidator.validate(data, as: .jpeg) != .valid) + } + + @Test func jpeg_invalidMarkerRange_fails() { + let data = Data([0xFF, 0xD8, 0xFF, 0x00]) + #expect(DataValidator.validate(data, as: .jpeg) != .valid) + } + + @Test func jpeg_tooShort_fails() { + let data = Data([0xFF, 0xD8, 0xFF]) + #expect(DataValidator.validate(data, as: .jpeg) != .valid) + } + + // MARK: - GIF Validation + + @Test func validGIF89a_passes() { + let data = Data([0x47, 0x49, 0x46, 0x38, 0x39, 0x61, + 0x0A, 0x00, 0x0A, 0x00]) // width=10, height=10 + #expect(DataValidator.validate(data, as: .gif) == .valid) + } + + @Test func gif_zeroWidth_fails() { + let data = Data([0x47, 0x49, 0x46, 0x38, 0x39, 0x61, + 0x00, 0x00, 0x0A, 0x00]) + #expect(DataValidator.validate(data, as: .gif) != .valid) + } + + @Test func gif_zeroHeight_fails() { + let data = Data([0x47, 0x49, 0x46, 0x38, 0x39, 0x61, + 0x0A, 0x00, 0x00, 0x00]) + #expect(DataValidator.validate(data, as: .gif) != .valid) + } + + @Test func gif_tooShort_fails() { + let data = Data([0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x0A]) + #expect(DataValidator.validate(data, as: .gif) != .valid) + } + + // MARK: - TIFF Validation + + @Test func validTIFF_littleEndian_passes() { + // II (LE) + magic 42 + IFD offset = 8 + let data = Data([0x49, 0x49, 0x2A, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00]) + #expect(DataValidator.validate(data, as: .tiff) == .valid) + } + + @Test func validTIFF_bigEndian_passes() { + // MM (BE) + magic 42 + IFD offset = 8 + let data = Data([0x4D, 0x4D, 0x00, 0x2A, + 0x00, 0x00, 0x00, 0x08, 0x00]) + #expect(DataValidator.validate(data, as: .tiff) == .valid) + } + + @Test func tiff_ifdOffsetTooSmall_fails() { + let data = Data([0x49, 0x49, 0x2A, 0x00, + 0x04, 0x00, 0x00, 0x00]) // offset = 4 + #expect(DataValidator.validate(data, as: .tiff) != .valid) + } + + @Test func tiff_ifdOffsetExceedsData_fails() { + let data = Data([0x49, 0x49, 0x2A, 0x00, + 0xFF, 0x00, 0x00, 0x00]) // offset = 255 + #expect(DataValidator.validate(data, as: .tiff) != .valid) + } + + @Test func tiff_tooShort_fails() { + let data = Data([0x49, 0x49, 0x2A, 0x00, 0x08]) + #expect(DataValidator.validate(data, as: .tiff) != .valid) + } + + // MARK: - BMP Validation + + @Test func validBMP_dibSize40_passes() { + var data = Data([0x42, 0x4D]) // "BM" + data.append(Data(repeating: 0x00, count: 12)) // rest of file header + data.append(contentsOf: [0x28, 0x00, 0x00, 0x00]) // DIB size = 40 + #expect(DataValidator.validate(data, as: .bmp) == .valid) + } + + @Test func validBMP_dibSize124_passes() { + var data = Data([0x42, 0x4D]) + data.append(Data(repeating: 0x00, count: 12)) + data.append(contentsOf: [0x7C, 0x00, 0x00, 0x00]) // DIB size = 124 + #expect(DataValidator.validate(data, as: .bmp) == .valid) + } + + @Test func bmp_invalidDIBSize_fails() { + var data = Data([0x42, 0x4D]) + data.append(Data(repeating: 0x00, count: 12)) + data.append(contentsOf: [0x30, 0x00, 0x00, 0x00]) // DIB size = 48 (invalid) + #expect(DataValidator.validate(data, as: .bmp) != .valid) + } + + @Test func bmp_tooShort_fails() { + let data = Data([0x42, 0x4D, 0x00, 0x00]) + #expect(DataValidator.validate(data, as: .bmp) != .valid) + } + + // MARK: - WebP Validation + + @Test func validWebP_VP8_passes() { + var data = Data([0x52, 0x49, 0x46, 0x46]) // "RIFF" + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) // size + data.append(contentsOf: [0x57, 0x45, 0x42, 0x50]) // "WEBP" + data.append(contentsOf: [0x56, 0x50, 0x38, 0x20]) // "VP8 " + #expect(DataValidator.validate(data, as: .webp) == .valid) + } + + @Test func validWebP_VP8L_passes() { + var data = Data([0x52, 0x49, 0x46, 0x46]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) + data.append(contentsOf: [0x57, 0x45, 0x42, 0x50]) + data.append(contentsOf: [0x56, 0x50, 0x38, 0x4C]) // "VP8L" + #expect(DataValidator.validate(data, as: .webp) == .valid) + } + + @Test func validWebP_VP8X_passes() { + var data = Data([0x52, 0x49, 0x46, 0x46]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) + data.append(contentsOf: [0x57, 0x45, 0x42, 0x50]) + data.append(contentsOf: [0x56, 0x50, 0x38, 0x58]) // "VP8X" + #expect(DataValidator.validate(data, as: .webp) == .valid) + } + + @Test func webp_unknownChunk_fails() { + var data = Data([0x52, 0x49, 0x46, 0x46]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) + data.append(contentsOf: [0x57, 0x45, 0x42, 0x50]) + data.append(contentsOf: [0x41, 0x4E, 0x49, 0x4D]) // "ANIM" (not VP8*) + #expect(DataValidator.validate(data, as: .webp) != .valid) + } + + @Test func webp_tooShort_fails() { + var data = Data([0x52, 0x49, 0x46, 0x46]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x00]) + data.append(contentsOf: [0x57, 0x45, 0x42, 0x50]) + #expect(DataValidator.validate(data, as: .webp) != .valid) + } + + // MARK: - HEIC/AVIF Validation (FtypValidator) + + @Test func validHEIC_passes() { + // box size = 32 (0x20), ftyp, heic + var data = Data([0x00, 0x00, 0x00, 0x20]) + data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) // "ftyp" + data.append(contentsOf: [0x68, 0x65, 0x69, 0x63]) // "heic" + data.append(Data(repeating: 0x00, count: 20)) // pad to 32 bytes total + #expect(DataValidator.validate(data, as: .heic) == .valid) + } + + @Test func validAVIF_passes() { + var data = Data([0x00, 0x00, 0x00, 0x20]) + data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) + data.append(contentsOf: [0x61, 0x76, 0x69, 0x66]) // "avif" + data.append(Data(repeating: 0x00, count: 20)) + #expect(DataValidator.validate(data, as: .avif) == .valid) + } + + @Test func ftyp_boxSizeTooSmall_fails() { + var data = Data([0x00, 0x00, 0x00, 0x04]) // size = 4 (< 8) + data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) + data.append(contentsOf: [0x68, 0x65, 0x69, 0x63]) + #expect(DataValidator.validate(data, as: .heic) != .valid) + } + + @Test func ftyp_boxSizeExceedsData_fails() { + var data = Data([0x00, 0x00, 0x01, 0x00]) // size = 256 + data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) + data.append(contentsOf: [0x68, 0x65, 0x69, 0x63]) + // Only 12 bytes of data, box says 256 + #expect(DataValidator.validate(data, as: .heic) != .valid) + } + + // MARK: - PDF Validation + + @Test func validPDF_noDangerousKeywords_passes() { + let content = "%PDF-1.4\n1 0 obj\n<< /Type /Catalog >>\nendobj" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) == .valid) + } + + @Test func pdf_withJS_fails() { + let content = "%PDF-1.4\n/JS (app.alert('xss'))" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) != .valid) + } + + @Test func pdf_withJavaScript_fails() { + let content = "%PDF-1.4\n/JavaScript (malicious)" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) != .valid) + } + + @Test func pdf_withOpenAction_fails() { + let content = "%PDF-1.4\n<< /OpenAction 1 0 R >>" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) != .valid) + } + + @Test func pdf_withAA_fails() { + let content = "%PDF-1.4\n<< /AA << /O 1 0 R >> >>" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) != .valid) + } + + @Test func pdf_withLaunch_fails() { + let content = "%PDF-1.4\n<< /Launch /Win >>" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) != .valid) + } + + // MARK: - Text (no validation) + + @Test func text_alwaysValid() { + let data = "Hello, world!".data(using: .utf8)! + #expect(DataValidator.validate(data, as: .text) == .valid) + } + + @Test func text_emptyDataValid() { + let data = Data() + #expect(DataValidator.validate(data, as: .text) == .valid) + } +} From 07444334d895cd5abef7ff213504475ba494c45c Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:34:53 +0900 Subject: [PATCH 04/13] Add --no-validate flag and integrate structural validation into CLI Validation runs by default after format detection. If the image header is malformed, xpbc exits with error code 1 and a descriptive message. Use --no-validate to skip validation for trusted inputs. Co-Authored-By: Claude Opus 4.6 (1M context) --- Sources/xpbc/main.swift | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Sources/xpbc/main.swift b/Sources/xpbc/main.swift index 6379d67..7d6ceea 100644 --- a/Sources/xpbc/main.swift +++ b/Sources/xpbc/main.swift @@ -6,7 +6,7 @@ let version = "0.1.0" func printUsage() { let usage = """ - Usage: xpbc [-pboard {general|ruler|find|font}] [--help] [--version] + Usage: xpbc [-pboard {general|ruler|find|font}] [--no-validate] [--help] [--version] eXtended PasteBoard Copy - copies stdin to the macOS clipboard. Automatically detects image data (PNG, JPEG, GIF, TIFF, BMP, WebP, HEIC, AVIF, PDF) @@ -14,6 +14,7 @@ func printUsage() { Options: -pboard NAME Specify the pasteboard (default: general) + --no-validate Skip structural validation of image headers -h, --help Show this help message -v, --version Show version """ @@ -30,6 +31,7 @@ func printError(_ message: String) { func run() throws { var pasteboardName: NSPasteboard.Name = .general + var shouldValidate = true let args = CommandLine.arguments.dropFirst() var iterator = args.makeIterator() @@ -41,6 +43,8 @@ func run() throws { case "-v", "--version": printVersion() return + case "--no-validate": + shouldValidate = false case "-pboard": guard let name = iterator.next() else { throw XPBCError.invalidArgument("-pboard requires a value") @@ -53,6 +57,14 @@ func run() throws { let data = try StdinReader.read() let dataType = DataTypeDetector.detect(from: data) + + if shouldValidate { + let result = DataValidator.validate(data, as: dataType) + if case .invalid(let reason) = result { + throw XPBCError.validationFailed(format: "\(dataType)", reason: reason) + } + } + let writer = PasteboardWriter(pasteboardName: pasteboardName) try writer.write(data, as: dataType) } From 05207ca837a2e8fffc786584597d62f13caeeb62 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:35:35 +0900 Subject: [PATCH 05/13] Update README with --no-validate flag and security limitations Adds Important limitations subsection to Security section explaining that structural validation cannot detect crafted exploit payloads, decompression bombs, or obfuscated PDF active content. Warns against piping untrusted data. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7f84cd9..d34fdc8 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ make install PREFIX=~/.local ## Usage ``` -xpbc [-pboard {general|ruler|find|font}] [--help] [--version] +xpbc [-pboard {general|ruler|find|font}] [--no-validate] [--help] [--version] ``` Pipe any data into `xpbc` via stdin. It automatically detects the format and copies accordingly. @@ -90,7 +90,7 @@ Anything that doesn't match a known image signature is copied as text. | Code | Meaning | |------|---------| | 0 | Success | -| 1 | Known error (empty input, input too large, invalid argument, pasteboard write failure) | +| 1 | Known error (empty input, input too large, invalid argument, validation failure, pasteboard write failure) | | 2 | Unexpected error | ### Options @@ -98,6 +98,7 @@ Anything that doesn't match a known image signature is copied as text. | Flag | Description | |------|-------------| | `-pboard NAME` | Target pasteboard: `general` (default), `ruler`, `find`, or `font` | +| `--no-validate` | Skip structural validation of image headers | | `-h`, `--help` | Print usage | | `-v`, `--version` | Print version | @@ -118,6 +119,17 @@ make clean # Clean build artifacts - Input size is capped at 100 MB (read in 64 KB chunks to prevent OOM) - stdin-only input (no file path arguments, no path traversal risk) - Written in memory-safe Swift with no `Unsafe` pointer usage +- Structural validation of image headers is enabled by default (use `--no-validate` to skip) + +### Important limitations + +**xpbc does not guarantee the safety of clipboard contents.** While structural validation checks that image headers are well-formed, it cannot detect: + +- **Crafted exploit payloads** — A structurally valid image (valid headers, correct dimensions) can still contain malicious data that exploits vulnerabilities in the application where you paste it (e.g., heap overflows in image decoders like libwebp, ImageIO). +- **Decompression bombs** — An image with valid headers but compressed data that expands to an extreme size, causing the paste target to crash with out-of-memory. +- **PDF active content** — While xpbc blocks PDFs containing known dangerous keywords (`/JS`, `/JavaScript`, `/OpenAction`, `/AA`, `/Launch`), obfuscated or novel techniques may bypass this check. + +**Do not pipe untrusted data** (e.g., `curl | xpbc`) **without understanding the risk.** The clipboard contents will be processed by whatever application you paste into, and xpbc cannot protect against vulnerabilities in those applications. ## Architecture From 9b72ad0a1221ef242e82e615d6fdf92434bcbcbd Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:31:19 +0900 Subject: [PATCH 06/13] Add checksum verification and path validation to install.sh - Validate INSTALL_DIR is an absolute path and does not contain '..' - Download checksums.txt from release and verify SHA256 before extraction - Use curl -fsSL to fail on HTTP errors - Send error messages to stderr Co-Authored-By: Claude Opus 4.6 (1M context) --- Scripts/install.sh | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/Scripts/install.sh b/Scripts/install.sh index 5bb38ea..871ff59 100755 --- a/Scripts/install.sh +++ b/Scripts/install.sh @@ -4,17 +4,38 @@ set -euo pipefail REPO="chigichan24/xpbc" ASSET_NAME="xpbc-macos.artifactbundle.zip" ASSET_URL="https://github.com/$REPO/releases/latest/download/$ASSET_NAME" +CHECKSUM_URL="https://github.com/$REPO/releases/latest/download/checksums.txt" INSTALL_DIR="${1:-$HOME/.local/bin}" +# Validate install directory +case "$INSTALL_DIR" in + /*) ;; + *) echo "Error: install directory must be an absolute path: $INSTALL_DIR" >&2; exit 1 ;; +esac +case "$INSTALL_DIR" in + *..*) echo "Error: install directory must not contain '..': $INSTALL_DIR" >&2; exit 1 ;; +esac + # Download zip file echo "Downloading latest xpbc..." -curl -sL -o "$ASSET_NAME" "$ASSET_URL" +curl -fsSL -o "$ASSET_NAME" "$ASSET_URL" +curl -fsSL -o checksums.txt "$CHECKSUM_URL" + +# Verify checksum +echo "Verifying checksum..." +shasum -a 256 -c checksums.txt --ignore-missing || { + echo "Error: checksum verification failed!" >&2 + rm -f "$ASSET_NAME" checksums.txt + exit 1 +} +rm checksums.txt + unzip -qo "$ASSET_NAME" -d extracted_files rm "$ASSET_NAME" VERSION=$(ls ./extracted_files/xpbc.artifactbundle | sed -n 's/^xpbc-\([^-]*\)-macos$/\1/p' | head -n 1) if [ -z "$VERSION" ]; then - echo "Error: version not found in the artifact bundle." + echo "Error: version not found in the artifact bundle." >&2 rm -rf extracted_files exit 1 fi From a81b3fb8a42cb1fd35dc58457ff712c8072d0392 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:31:31 +0900 Subject: [PATCH 07/13] Add version format validation and fix sed delimiter in release script - Validate VERSION_STRING matches semver format vX.Y.Z - Change sed delimiter from '/' to '|' to prevent injection via version string - Send error messages to stderr Co-Authored-By: Claude Opus 4.6 (1M context) --- Scripts/release-artifactbundle.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Scripts/release-artifactbundle.sh b/Scripts/release-artifactbundle.sh index b48f33c..90fd37e 100755 --- a/Scripts/release-artifactbundle.sh +++ b/Scripts/release-artifactbundle.sh @@ -3,9 +3,15 @@ set -euo pipefail VERSION_STRING="$1" +# Validate version format +if ! echo "$VERSION_STRING" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "Error: invalid version format '$VERSION_STRING' (expected vX.Y.Z)" >&2 + exit 1 +fi + mkdir -p "xpbc.artifactbundle/xpbc-$VERSION_STRING-macos/bin" -sed "s/__VERSION__/$VERSION_STRING/g" ./Scripts/info.json > "xpbc.artifactbundle/info.json" +sed "s|__VERSION__|$VERSION_STRING|g" ./Scripts/info.json > "xpbc.artifactbundle/info.json" cp -f "./.build/apple/Products/Release/xpbc" "xpbc.artifactbundle/xpbc-$VERSION_STRING-macos/bin" From 77a8e8a207ec5e43b469488e013de0c54aeda01e Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:31:42 +0900 Subject: [PATCH 08/13] Add checksum generation to release workflow and restrict permissions - Generate SHA256 checksums.txt and upload as release asset - Explicitly set packages, issues, pull-requests permissions to none Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/release.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 83bad4c..3f11806 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,6 +6,9 @@ on: permissions: contents: write + packages: none + issues: none + pull-requests: none jobs: build-and-release: @@ -28,8 +31,12 @@ jobs: - name: Create artifact bundle run: ./Scripts/release-artifactbundle.sh "${{ env.TAG_NAME }}" + - name: Generate checksums + run: shasum -a 256 xpbc-macos.artifactbundle.zip > checksums.txt + - name: Upload release assets uses: softprops/action-gh-release@v2 with: files: | xpbc-macos.artifactbundle.zip + checksums.txt From 05d4b465b12d7d5b567bf20ebd3c4a3b89a56132 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:31:30 +0900 Subject: [PATCH 09/13] Filter control characters in Latin-1 fallback to prevent terminal escape injection When decodeText falls back to Latin-1 encoding, strip C0 control characters (except tab, newline, carriage return) and DEL (0x7F) to prevent potential terminal escape sequence injection from untrusted input. Co-Authored-By: Claude Opus 4.6 (1M context) --- Sources/XPBCCore/PasteboardWriter.swift | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Sources/XPBCCore/PasteboardWriter.swift b/Sources/XPBCCore/PasteboardWriter.swift index 8ac25da..7e31604 100644 --- a/Sources/XPBCCore/PasteboardWriter.swift +++ b/Sources/XPBCCore/PasteboardWriter.swift @@ -30,7 +30,15 @@ public struct PasteboardWriter: Sendable { Data("xpbc: warning: input is not valid UTF-8, falling back to Latin-1\n".utf8) ) // Latin-1 can decode any byte sequence, so this never returns nil - return String(data: data, encoding: .isoLatin1)! + let raw = String(data: data, encoding: .isoLatin1)! + // Strip control characters to prevent terminal escape sequence injection. + // Allow printable characters + tab (0x09), newline (0x0A), carriage return (0x0D). + return raw.filter { ch in + let v = ch.unicodeScalars.first!.value + if v == 0x09 || v == 0x0A || v == 0x0D { return true } + if v < 0x20 || v == 0x7F { return false } + return true + } } } From bc38cdf710a83788a49749edf5bd9a037c1e0e79 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:54:56 +0900 Subject: [PATCH 10/13] Address nirami code review findings Critical: - ByteReader: Convert to Data extension with Optional return and boundary checks to prevent out-of-bounds crashes Warning: - DataValidator: Replace Dictionary lookup with exhaustive switch so compiler catches missing validators when DataType grows - PDFValidator: Add boundary-aware keyword matching to reduce false positives (/JSActions, /AABattery no longer trigger), fail-close on decode failure - DataType: Add CustomStringConvertible for stable error messages - PasteboardWriter: Apply control character filter to UTF-8 text too (not just Latin-1 fallback), extract stripControlCharacters - install.sh: Add trap cleanup EXIT for intermediate file cleanup Suggestion: - FtypValidator: Add ISO BMFF comment for boxSize 0/1, explicit Int cast for UInt32/Int comparison - Tests: Add boundary value tests (PNG 29/28 bytes, ftyp boxSize==8), PDF false positive tests, total 69 test cases Co-Authored-By: Claude Opus 4.6 (1M context) --- Scripts/install.sh | 12 +++-- Sources/XPBCCore/DataValidator.swift | 35 +++++++------ Sources/XPBCCore/FormatDetector.swift | 17 ++++++- Sources/XPBCCore/PasteboardWriter.swift | 25 ++++++---- .../XPBCCore/Validators/BMPValidator.swift | 3 +- Sources/XPBCCore/Validators/ByteReader.swift | 41 ++++++++------- .../XPBCCore/Validators/FtypValidator.swift | 11 ++-- .../XPBCCore/Validators/GIFValidator.swift | 11 ++-- .../XPBCCore/Validators/PDFValidator.swift | 26 +++++++++- .../XPBCCore/Validators/PNGValidator.swift | 10 ++-- .../XPBCCore/Validators/TIFFValidator.swift | 18 ++++--- Tests/XPBCCoreTests/DataValidatorTests.swift | 50 +++++++++++++++++++ 12 files changed, 182 insertions(+), 77 deletions(-) diff --git a/Scripts/install.sh b/Scripts/install.sh index 871ff59..095a32c 100755 --- a/Scripts/install.sh +++ b/Scripts/install.sh @@ -7,6 +7,13 @@ ASSET_URL="https://github.com/$REPO/releases/latest/download/$ASSET_NAME" CHECKSUM_URL="https://github.com/$REPO/releases/latest/download/checksums.txt" INSTALL_DIR="${1:-$HOME/.local/bin}" +# Clean up intermediate files on any exit +cleanup() { + rm -f "$ASSET_NAME" checksums.txt + rm -rf extracted_files +} +trap cleanup EXIT + # Validate install directory case "$INSTALL_DIR" in /*) ;; @@ -25,25 +32,20 @@ curl -fsSL -o checksums.txt "$CHECKSUM_URL" echo "Verifying checksum..." shasum -a 256 -c checksums.txt --ignore-missing || { echo "Error: checksum verification failed!" >&2 - rm -f "$ASSET_NAME" checksums.txt exit 1 } -rm checksums.txt unzip -qo "$ASSET_NAME" -d extracted_files -rm "$ASSET_NAME" VERSION=$(ls ./extracted_files/xpbc.artifactbundle | sed -n 's/^xpbc-\([^-]*\)-macos$/\1/p' | head -n 1) if [ -z "$VERSION" ]; then echo "Error: version not found in the artifact bundle." >&2 - rm -rf extracted_files exit 1 fi mkdir -p "$INSTALL_DIR" cp -f "./extracted_files/xpbc.artifactbundle/xpbc-$VERSION-macos/bin/xpbc" "$INSTALL_DIR/xpbc" chmod +x "$INSTALL_DIR/xpbc" -rm -rf extracted_files echo "Installed xpbc $VERSION to $INSTALL_DIR/xpbc" echo "Please make sure $INSTALL_DIR is in your \$PATH" diff --git a/Sources/XPBCCore/DataValidator.swift b/Sources/XPBCCore/DataValidator.swift index 8b274b5..911a60f 100644 --- a/Sources/XPBCCore/DataValidator.swift +++ b/Sources/XPBCCore/DataValidator.swift @@ -1,21 +1,26 @@ import Foundation public struct DataValidator: Sendable { - static let validators: [DataType: FormatValidator] = [ - .png: PNGValidator(), - .jpeg: JPEGValidator(), - .gif: GIFValidator(), - .tiff: TIFFValidator(), - .bmp: BMPValidator(), - .webp: WebPValidator(), - .heic: FtypValidator(), - .avif: FtypValidator(), - .pdf: PDFValidator(), - ] - public static func validate(_ data: Data, as type: DataType) -> ValidationResult { - guard type != .text else { return .valid } - guard let validator = validators[type] else { return .valid } - return validator.validate(data) + switch type { + case .text: + return .valid + case .png: + return PNGValidator().validate(data) + case .jpeg: + return JPEGValidator().validate(data) + case .gif: + return GIFValidator().validate(data) + case .tiff: + return TIFFValidator().validate(data) + case .bmp: + return BMPValidator().validate(data) + case .webp: + return WebPValidator().validate(data) + case .heic, .avif: + return FtypValidator().validate(data) + case .pdf: + return PDFValidator().validate(data) + } } } diff --git a/Sources/XPBCCore/FormatDetector.swift b/Sources/XPBCCore/FormatDetector.swift index 3347841..076b4e2 100644 --- a/Sources/XPBCCore/FormatDetector.swift +++ b/Sources/XPBCCore/FormatDetector.swift @@ -1,6 +1,6 @@ import Foundation -public enum DataType: Equatable, Sendable { +public enum DataType: Equatable, Sendable, CustomStringConvertible { case png case jpeg case gif @@ -11,6 +11,21 @@ public enum DataType: Equatable, Sendable { case avif case pdf case text + + public var description: String { + switch self { + case .png: return "PNG" + case .jpeg: return "JPEG" + case .gif: return "GIF" + case .tiff: return "TIFF" + case .bmp: return "BMP" + case .webp: return "WebP" + case .heic: return "HEIC" + case .avif: return "AVIF" + case .pdf: return "PDF" + case .text: return "text" + } + } } protocol FormatDetector: Sendable { diff --git a/Sources/XPBCCore/PasteboardWriter.swift b/Sources/XPBCCore/PasteboardWriter.swift index 7e31604..a20ec50 100644 --- a/Sources/XPBCCore/PasteboardWriter.swift +++ b/Sources/XPBCCore/PasteboardWriter.swift @@ -23,22 +23,27 @@ public struct PasteboardWriter: Sendable { } private func decodeText(from data: Data) -> String { + let decoded: String if let utf8 = String(data: data, encoding: .utf8) { - return utf8 + decoded = utf8 } else { FileHandle.standardError.write( Data("xpbc: warning: input is not valid UTF-8, falling back to Latin-1\n".utf8) ) // Latin-1 can decode any byte sequence, so this never returns nil - let raw = String(data: data, encoding: .isoLatin1)! - // Strip control characters to prevent terminal escape sequence injection. - // Allow printable characters + tab (0x09), newline (0x0A), carriage return (0x0D). - return raw.filter { ch in - let v = ch.unicodeScalars.first!.value - if v == 0x09 || v == 0x0A || v == 0x0D { return true } - if v < 0x20 || v == 0x7F { return false } - return true - } + decoded = String(data: data, encoding: .isoLatin1)! + } + return stripControlCharacters(decoded) + } + + /// Strip C0 control characters (except tab, newline, carriage return) and DEL + /// to prevent terminal escape sequence injection. + private func stripControlCharacters(_ text: String) -> String { + text.filter { ch in + let v = ch.unicodeScalars.first!.value + if v == 0x09 || v == 0x0A || v == 0x0D { return true } + if v < 0x20 || v == 0x7F { return false } + return true } } diff --git a/Sources/XPBCCore/Validators/BMPValidator.swift b/Sources/XPBCCore/Validators/BMPValidator.swift index 347f64a..cd3c6bd 100644 --- a/Sources/XPBCCore/Validators/BMPValidator.swift +++ b/Sources/XPBCCore/Validators/BMPValidator.swift @@ -5,11 +5,10 @@ struct BMPValidator: FormatValidator { func validate(_ data: Data) -> ValidationResult { // BMP file header is 14 bytes, then DIB header starts with its size (LE u32) - guard data.count >= 18 else { + guard let dibSize = data.readLittleEndianUInt32(at: 14) else { return .invalid(reason: "too short for DIB header size (need >= 18 bytes)") } - let dibSize = readLittleEndianUInt32(data, offset: 14) guard Self.validDIBSizes.contains(dibSize) else { return .invalid(reason: "invalid DIB header size \(dibSize)") } diff --git a/Sources/XPBCCore/Validators/ByteReader.swift b/Sources/XPBCCore/Validators/ByteReader.swift index e356abb..07f1c45 100644 --- a/Sources/XPBCCore/Validators/ByteReader.swift +++ b/Sources/XPBCCore/Validators/ByteReader.swift @@ -1,23 +1,28 @@ import Foundation -func readBigEndianUInt32(_ data: Data, offset: Int) -> UInt32 { - let start = data.startIndex + offset - return UInt32(data[start]) << 24 - | UInt32(data[start + 1]) << 16 - | UInt32(data[start + 2]) << 8 - | UInt32(data[start + 3]) -} +extension Data { + func readBigEndianUInt32(at offset: Int) -> UInt32? { + guard offset >= 0, offset + 4 <= count else { return nil } + let start = startIndex + offset + return UInt32(self[start]) << 24 + | UInt32(self[start + 1]) << 16 + | UInt32(self[start + 2]) << 8 + | UInt32(self[start + 3]) + } -func readLittleEndianUInt32(_ data: Data, offset: Int) -> UInt32 { - let start = data.startIndex + offset - return UInt32(data[start]) - | UInt32(data[start + 1]) << 8 - | UInt32(data[start + 2]) << 16 - | UInt32(data[start + 3]) << 24 -} + func readLittleEndianUInt32(at offset: Int) -> UInt32? { + guard offset >= 0, offset + 4 <= count else { return nil } + let start = startIndex + offset + return UInt32(self[start]) + | UInt32(self[start + 1]) << 8 + | UInt32(self[start + 2]) << 16 + | UInt32(self[start + 3]) << 24 + } -func readLittleEndianUInt16(_ data: Data, offset: Int) -> UInt16 { - let start = data.startIndex + offset - return UInt16(data[start]) - | UInt16(data[start + 1]) << 8 + func readLittleEndianUInt16(at offset: Int) -> UInt16? { + guard offset >= 0, offset + 2 <= count else { return nil } + let start = startIndex + offset + return UInt16(self[start]) + | UInt16(self[start + 1]) << 8 + } } diff --git a/Sources/XPBCCore/Validators/FtypValidator.swift b/Sources/XPBCCore/Validators/FtypValidator.swift index 9586335..e9c0267 100644 --- a/Sources/XPBCCore/Validators/FtypValidator.swift +++ b/Sources/XPBCCore/Validators/FtypValidator.swift @@ -2,17 +2,18 @@ import Foundation struct FtypValidator: FormatValidator { func validate(_ data: Data) -> ValidationResult { - // ftyp box: first 4 bytes = box size (big-endian UInt32) - guard data.count >= 8 else { - return .invalid(reason: "too short for ftyp box (need >= 8 bytes)") + guard let boxSize = data.readBigEndianUInt32(at: 0) else { + return .invalid(reason: "too short for ftyp box (need >= 4 bytes)") } - let boxSize = readBigEndianUInt32(data, offset: 0) + // Per ISO BMFF, boxSize == 0 means "box extends to EOF" and boxSize == 1 means + // "64-bit extended size follows". Both are valid but rejected here for simplicity + // since typical ftyp boxes have a concrete small size. guard boxSize >= 8 else { return .invalid(reason: "ftyp box size \(boxSize) is less than minimum (8)") } - guard boxSize <= data.count else { + guard Int(boxSize) <= data.count else { return .invalid(reason: "ftyp box size \(boxSize) exceeds data size \(data.count)") } diff --git a/Sources/XPBCCore/Validators/GIFValidator.swift b/Sources/XPBCCore/Validators/GIFValidator.swift index 4248ff4..d5e9312 100644 --- a/Sources/XPBCCore/Validators/GIFValidator.swift +++ b/Sources/XPBCCore/Validators/GIFValidator.swift @@ -2,17 +2,16 @@ import Foundation struct GIFValidator: FormatValidator { func validate(_ data: Data) -> ValidationResult { - // Logical Screen Descriptor at offset 6: width (LE u16) + height (LE u16) - guard data.count >= 10 else { - return .invalid(reason: "too short for Logical Screen Descriptor (need >= 10 bytes)") + guard let width = data.readLittleEndianUInt16(at: 6) else { + return .invalid(reason: "too short for Logical Screen Descriptor (need >= 8 bytes)") } - - let width = readLittleEndianUInt16(data, offset: 6) guard width > 0 else { return .invalid(reason: "logical screen width is 0") } - let height = readLittleEndianUInt16(data, offset: 8) + guard let height = data.readLittleEndianUInt16(at: 8) else { + return .invalid(reason: "too short for Logical Screen Descriptor (need >= 10 bytes)") + } guard height > 0 else { return .invalid(reason: "logical screen height is 0") } diff --git a/Sources/XPBCCore/Validators/PDFValidator.swift b/Sources/XPBCCore/Validators/PDFValidator.swift index da9272f..0dd4b61 100644 --- a/Sources/XPBCCore/Validators/PDFValidator.swift +++ b/Sources/XPBCCore/Validators/PDFValidator.swift @@ -1,23 +1,45 @@ import Foundation struct PDFValidator: FormatValidator { + // PDF delimiter characters that follow a name object per ISO 32000. + private static let pdfDelimiters: CharacterSet = CharacterSet(charactersIn: " \t\r\n<>()[]/%") + private static let dangerousKeywords: [String] = [ "/JS", "/JavaScript", "/OpenAction", "/AA", "/Launch", ] func validate(_ data: Data) -> ValidationResult { + // isoLatin1 can decode any byte sequence, so this guard is defensive only. guard let content = String(data: data, encoding: .ascii) ?? String(data: data, encoding: .isoLatin1) else { - return .valid + return .invalid(reason: "unable to decode PDF content for inspection") } for keyword in Self.dangerousKeywords { - if content.contains(keyword) { + if containsKeywordAtBoundary(content, keyword: keyword) { return .invalid(reason: "contains potentially dangerous keyword '\(keyword)'") } } return .valid } + + /// Check if the keyword appears in content followed by a PDF delimiter or at end of string. + /// This reduces false positives from names like "/JSActions" or "/AABattery". + private func containsKeywordAtBoundary(_ content: String, keyword: String) -> Bool { + var searchRange = content.startIndex.. 0 else { - return .invalid(reason: "width is 0") + guard let width = data.readBigEndianUInt32(at: 16), width > 0 else { + return .invalid(reason: "width is 0 or unreadable") } - let height = readBigEndianUInt32(data, offset: 20) - guard height > 0 else { - return .invalid(reason: "height is 0") + guard let height = data.readBigEndianUInt32(at: 20), height > 0 else { + return .invalid(reason: "height is 0 or unreadable") } return .valid diff --git a/Sources/XPBCCore/Validators/TIFFValidator.swift b/Sources/XPBCCore/Validators/TIFFValidator.swift index 2532f3b..3acd8bf 100644 --- a/Sources/XPBCCore/Validators/TIFFValidator.swift +++ b/Sources/XPBCCore/Validators/TIFFValidator.swift @@ -7,19 +7,23 @@ struct TIFFValidator: FormatValidator { } let isLittleEndian = data[data.startIndex] == 0x49 // 'I' - let ifdOffset: UInt32 + let ifdOffset: UInt32? if isLittleEndian { - ifdOffset = readLittleEndianUInt32(data, offset: 4) + ifdOffset = data.readLittleEndianUInt32(at: 4) } else { - ifdOffset = readBigEndianUInt32(data, offset: 4) + ifdOffset = data.readBigEndianUInt32(at: 4) } - guard ifdOffset >= 8 else { - return .invalid(reason: "IFD offset \(ifdOffset) is less than minimum (8)") + guard let offset = ifdOffset else { + return .invalid(reason: "unable to read IFD offset") } - guard ifdOffset < data.count else { - return .invalid(reason: "IFD offset \(ifdOffset) exceeds data size \(data.count)") + guard offset >= 8 else { + return .invalid(reason: "IFD offset \(offset) is less than minimum (8)") + } + + guard Int(offset) < data.count else { + return .invalid(reason: "IFD offset \(offset) exceeds data size \(data.count)") } return .valid diff --git a/Tests/XPBCCoreTests/DataValidatorTests.swift b/Tests/XPBCCoreTests/DataValidatorTests.swift index 3814bfa..2f90e24 100644 --- a/Tests/XPBCCoreTests/DataValidatorTests.swift +++ b/Tests/XPBCCoreTests/DataValidatorTests.swift @@ -269,6 +269,56 @@ struct DataValidatorTests { #expect(DataValidator.validate(data, as: .pdf) != .valid) } + @Test func pdf_keywordAsPrefix_noFalsePositive() { + // "/JSActions" should NOT trigger /JS detection (boundary check) + let content = "%PDF-1.4\n<< /JSActions 1 0 R >>" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) == .valid) + } + + @Test func pdf_AAAsPrefix_noFalsePositive() { + // "/AABattery" should NOT trigger /AA detection + let content = "%PDF-1.4\n<< /AABattery 1 >>" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) == .valid) + } + + @Test func pdf_keywordAtEndOfFile_fails() { + let content = "%PDF-1.4\n/JS" + let data = content.data(using: .utf8)! + #expect(DataValidator.validate(data, as: .pdf) != .valid) + } + + // MARK: - Boundary value tests + + @Test func png_exactMinimumSize_passes() { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) + data.append(contentsOf: [0x49, 0x48, 0x44, 0x52]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) + data.append(contentsOf: [0x08, 0x02, 0x00, 0x00, 0x00]) + #expect(data.count == 29) + #expect(DataValidator.validate(data, as: .png) == .valid) + } + + @Test func png_oneByteBelowMinimum_fails() { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) + data.append(contentsOf: [0x49, 0x48, 0x44, 0x52]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x01]) + data.append(contentsOf: [0x08, 0x02, 0x00, 0x00]) // 28 bytes + #expect(DataValidator.validate(data, as: .png) != .valid) + } + + @Test func ftyp_exactMinimumBoxSize_passes() { + // boxSize == 8, data.count == 8 + let data = Data([0x00, 0x00, 0x00, 0x08, + 0x66, 0x74, 0x79, 0x70]) + #expect(DataValidator.validate(data, as: .heic) == .valid) + } + // MARK: - Text (no validation) @Test func text_alwaysValid() { From b327a38ed880bd1b7a6a28164e7ad92a3e105c59 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 00:58:53 +0900 Subject: [PATCH 11/13] Address second nirami review findings Warning fixes: - stripControlCharacters: Use allSatisfy over all Unicode scalars instead of first! to handle multi-scalar graphemes safely - PDFValidator: Add comment documenting hex escape bypass limitation - FtypValidator: Raise minimum ftyp box size from 8 to 12 per ISO 14496-12 (header 8 + major brand 4) - install.sh: Add VERSION format validation (semver pattern) Co-Authored-By: Claude Opus 4.6 (1M context) --- Scripts/install.sh | 4 ++++ Sources/XPBCCore/PasteboardWriter.swift | 11 +++++++---- Sources/XPBCCore/Validators/FtypValidator.swift | 6 ++++-- Sources/XPBCCore/Validators/PDFValidator.swift | 2 ++ Tests/XPBCCoreTests/DataValidatorTests.swift | 9 +++++---- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/Scripts/install.sh b/Scripts/install.sh index 095a32c..979b604 100755 --- a/Scripts/install.sh +++ b/Scripts/install.sh @@ -42,6 +42,10 @@ if [ -z "$VERSION" ]; then echo "Error: version not found in the artifact bundle." >&2 exit 1 fi +if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "Error: unexpected version format: $VERSION" >&2 + exit 1 +fi mkdir -p "$INSTALL_DIR" cp -f "./extracted_files/xpbc.artifactbundle/xpbc-$VERSION-macos/bin/xpbc" "$INSTALL_DIR/xpbc" diff --git a/Sources/XPBCCore/PasteboardWriter.swift b/Sources/XPBCCore/PasteboardWriter.swift index a20ec50..1290516 100644 --- a/Sources/XPBCCore/PasteboardWriter.swift +++ b/Sources/XPBCCore/PasteboardWriter.swift @@ -38,12 +38,15 @@ public struct PasteboardWriter: Sendable { /// Strip C0 control characters (except tab, newline, carriage return) and DEL /// to prevent terminal escape sequence injection. + /// Checks all Unicode scalars in each Character to handle multi-scalar graphemes. private func stripControlCharacters(_ text: String) -> String { text.filter { ch in - let v = ch.unicodeScalars.first!.value - if v == 0x09 || v == 0x0A || v == 0x0D { return true } - if v < 0x20 || v == 0x7F { return false } - return true + ch.unicodeScalars.allSatisfy { scalar in + let v = scalar.value + if v == 0x09 || v == 0x0A || v == 0x0D { return true } + if v < 0x20 || v == 0x7F { return false } + return true + } } } diff --git a/Sources/XPBCCore/Validators/FtypValidator.swift b/Sources/XPBCCore/Validators/FtypValidator.swift index e9c0267..3704c98 100644 --- a/Sources/XPBCCore/Validators/FtypValidator.swift +++ b/Sources/XPBCCore/Validators/FtypValidator.swift @@ -9,8 +9,10 @@ struct FtypValidator: FormatValidator { // Per ISO BMFF, boxSize == 0 means "box extends to EOF" and boxSize == 1 means // "64-bit extended size follows". Both are valid but rejected here for simplicity // since typical ftyp boxes have a concrete small size. - guard boxSize >= 8 else { - return .invalid(reason: "ftyp box size \(boxSize) is less than minimum (8)") + // Minimum 12: box header (8) + major brand (4). Full ftyp also has minor_version (4) + // but we check for 12 as the bare minimum for a recognizable ftyp box. + guard boxSize >= 12 else { + return .invalid(reason: "ftyp box size \(boxSize) is less than minimum (12)") } guard Int(boxSize) <= data.count else { diff --git a/Sources/XPBCCore/Validators/PDFValidator.swift b/Sources/XPBCCore/Validators/PDFValidator.swift index 0dd4b61..0adc7a9 100644 --- a/Sources/XPBCCore/Validators/PDFValidator.swift +++ b/Sources/XPBCCore/Validators/PDFValidator.swift @@ -8,6 +8,8 @@ struct PDFValidator: FormatValidator { "/JS", "/JavaScript", "/OpenAction", "/AA", "/Launch", ] + // NOTE: This check does not cover hex-encoded PDF name objects (e.g., /#4A#53 for /JS). + // Full coverage would require decoding PDF name hex escapes before matching. func validate(_ data: Data) -> ValidationResult { // isoLatin1 can decode any byte sequence, so this guard is defensive only. guard let content = String(data: data, encoding: .ascii) diff --git a/Tests/XPBCCoreTests/DataValidatorTests.swift b/Tests/XPBCCoreTests/DataValidatorTests.swift index 2f90e24..00a83d5 100644 --- a/Tests/XPBCCoreTests/DataValidatorTests.swift +++ b/Tests/XPBCCoreTests/DataValidatorTests.swift @@ -217,7 +217,7 @@ struct DataValidatorTests { } @Test func ftyp_boxSizeTooSmall_fails() { - var data = Data([0x00, 0x00, 0x00, 0x04]) // size = 4 (< 8) + var data = Data([0x00, 0x00, 0x00, 0x08]) // size = 8 (< 12 minimum) data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) data.append(contentsOf: [0x68, 0x65, 0x69, 0x63]) #expect(DataValidator.validate(data, as: .heic) != .valid) @@ -313,9 +313,10 @@ struct DataValidatorTests { } @Test func ftyp_exactMinimumBoxSize_passes() { - // boxSize == 8, data.count == 8 - let data = Data([0x00, 0x00, 0x00, 0x08, - 0x66, 0x74, 0x79, 0x70]) + // boxSize == 12, data.count == 12 (header 8 + major brand 4) + var data = Data([0x00, 0x00, 0x00, 0x0C]) // size = 12 + data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) // "ftyp" + data.append(contentsOf: [0x68, 0x65, 0x69, 0x63]) // "heic" (major brand) #expect(DataValidator.validate(data, as: .heic) == .valid) } From 0460eee80ed41ccc1e694aeb896b56f9bb0c4ccd Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 01:01:15 +0900 Subject: [PATCH 12/13] Address third nirami review findings - Add ftyp boundary value test for boxSize=11 (just below minimum 12) - Make stripControlCharacters internal for testability - Add stripControlCharacters tests: ESC removal, tab/newline/CR preservation, NUL removal, multi-scalar grapheme passthrough - Total: 74 test cases Co-Authored-By: Claude Opus 4.6 (1M context) --- Sources/XPBCCore/PasteboardWriter.swift | 2 +- Tests/XPBCCoreTests/DataValidatorTests.swift | 34 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Sources/XPBCCore/PasteboardWriter.swift b/Sources/XPBCCore/PasteboardWriter.swift index 1290516..4dbf14e 100644 --- a/Sources/XPBCCore/PasteboardWriter.swift +++ b/Sources/XPBCCore/PasteboardWriter.swift @@ -39,7 +39,7 @@ public struct PasteboardWriter: Sendable { /// Strip C0 control characters (except tab, newline, carriage return) and DEL /// to prevent terminal escape sequence injection. /// Checks all Unicode scalars in each Character to handle multi-scalar graphemes. - private func stripControlCharacters(_ text: String) -> String { + func stripControlCharacters(_ text: String) -> String { text.filter { ch in ch.unicodeScalars.allSatisfy { scalar in let v = scalar.value diff --git a/Tests/XPBCCoreTests/DataValidatorTests.swift b/Tests/XPBCCoreTests/DataValidatorTests.swift index 00a83d5..7e43661 100644 --- a/Tests/XPBCCoreTests/DataValidatorTests.swift +++ b/Tests/XPBCCoreTests/DataValidatorTests.swift @@ -312,6 +312,14 @@ struct DataValidatorTests { #expect(DataValidator.validate(data, as: .png) != .valid) } + @Test func ftyp_boxSizeJustBelowMinimum_fails() { + // boxSize == 11: one below the minimum of 12 + var data = Data([0x00, 0x00, 0x00, 0x0B]) // size = 11 + data.append(contentsOf: [0x66, 0x74, 0x79, 0x70]) // "ftyp" + data.append(contentsOf: [0x68, 0x65, 0x69]) // 3 bytes (total 11) + #expect(DataValidator.validate(data, as: .heic) != .valid) + } + @Test func ftyp_exactMinimumBoxSize_passes() { // boxSize == 12, data.count == 12 (header 8 + major brand 4) var data = Data([0x00, 0x00, 0x00, 0x0C]) // size = 12 @@ -320,6 +328,32 @@ struct DataValidatorTests { #expect(DataValidator.validate(data, as: .heic) == .valid) } + // MARK: - stripControlCharacters + + @Test func stripControlCharacters_removesESC() { + let writer = PasteboardWriter() + let result = writer.stripControlCharacters("hello\u{1B}[31mworld") + #expect(result == "hello[31mworld") + } + + @Test func stripControlCharacters_preservesTabNewlineCR() { + let writer = PasteboardWriter() + let input = "line1\tvalue\nline2\r\n" + #expect(writer.stripControlCharacters(input) == input) + } + + @Test func stripControlCharacters_removesNUL() { + let writer = PasteboardWriter() + #expect(writer.stripControlCharacters("a\u{0000}b") == "ab") + } + + @Test func stripControlCharacters_multiScalarGrapheme_passes() { + let writer = PasteboardWriter() + // "é" as e + combining acute accent (multi-scalar Character) + let input = "caf\u{0065}\u{0301}" + #expect(writer.stripControlCharacters(input) == input) + } + // MARK: - Text (no validation) @Test func text_alwaysValid() { From 988f4fe7ca6b5929f3b10862b54c21aaa2e06bd6 Mon Sep 17 00:00:00 2001 From: Kazuki Chigita Date: Sun, 22 Mar 2026 01:03:43 +0900 Subject: [PATCH 13/13] Update architecture.md to reflect structural validation layer - Add DataValidator and FormatValidator to data flow diagram - Document all 8 format validators and their specific checks - Document safe byte-reading Data extension (boundary-checked, Optional) - Document PDF boundary-aware keyword matching and hex escape limitation - Document control character stripping for terminal injection prevention - Update error handling table with validationFailed case - Update module boundaries with new public/internal types - Update "Adding a New Format" guide (now 7 steps including validator) - Update testing section: 74 tests across 2 suites with detailed breakdown Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/architecture.md | 103 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 15 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index ea128f6..2c3565c 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -6,7 +6,7 @@ xpbc is structured as two Swift Package Manager targets: ``` Package -├── XPBCCore (library) — format detection, pasteboard writing, stdin reading, error types +├── XPBCCore (library) — format detection, structural validation, pasteboard writing, stdin reading, error types └── xpbc (executable) — CLI entry point, argument parsing ``` @@ -15,15 +15,16 @@ The library/executable split enables unit testing of core logic via `@testable i ## Data Flow ``` -stdin ──→ StdinReader ──→ DataTypeDetector ──→ PasteboardWriter ──→ NSPasteboard - (raw bytes) (magic bytes) (raw passthrough) +stdin ──→ StdinReader ──→ DataTypeDetector ──→ DataValidator ──→ PasteboardWriter ──→ NSPasteboard + (raw bytes) (magic bytes) (header check) (raw passthrough) ``` 1. **StdinReader** reads stdin in 64 KB chunks, enforcing a 100 MB size limit 2. **DataTypeDetector** inspects the first few bytes to identify the format -3. **PasteboardWriter** writes the raw bytes to `NSPasteboard` with the appropriate UTI type +3. **DataValidator** performs structural validation of image headers (skipped with `--no-validate`) +4. **PasteboardWriter** writes the raw bytes to `NSPasteboard` with the appropriate UTI type -No image decoding occurs at any stage. The tool is a pure passthrough. +No image decoding occurs at any stage. The tool is a pure passthrough with header-level validation. ## Format Detection @@ -64,11 +65,60 @@ HEIC and AVIF both use the ISO Base Media File Format (ISOBMFF) container. Rathe A `precondition` enforces that the brand is exactly 4 bytes. Adding a new ftyp-based format requires only a new `FtypDetector` instance in the detectors array. +## Structural Validation + +After format detection, `DataValidator` performs lightweight structural validation using the `FormatValidator` protocol: + +```swift +protocol FormatValidator: Sendable { + func validate(_ data: Data) -> ValidationResult +} +``` + +`DataValidator.validate(_:as:)` uses an exhaustive `switch` on `DataType` to dispatch to the appropriate validator. This ensures the compiler catches missing validators when new formats are added. Text data (`.text`) is always valid and skips validation. + +### Validator Checks + +| Format | Validator | Checks | +|--------|-----------|--------| +| PNG | PNGValidator | IHDR chunk present, width/height > 0 | +| JPEG | JPEGValidator | Valid marker after SOI (0xC0–0xFE range) | +| GIF | GIFValidator | Logical Screen Descriptor width/height > 0 | +| TIFF | TIFFValidator | IFD offset within valid range (≥ 8, < data size) | +| BMP | BMPValidator | DIB header size is a known valid value (12, 40, 52, 56, 108, 124) | +| WebP | WebPValidator | VP8/VP8L/VP8X chunk header present | +| HEIC/AVIF | FtypValidator | ftyp box size ≥ 12 (per ISO 14496-12) and ≤ data size | +| PDF | PDFValidator | Rejects files containing dangerous keywords at PDF name boundaries (`/JS`, `/JavaScript`, `/OpenAction`, `/AA`, `/Launch`) | + +### Byte Reading + +Validators use safe byte-reading methods defined as a `Data` extension in `ByteReader.swift`: + +```swift +extension Data { + func readBigEndianUInt32(at offset: Int) -> UInt32? + func readLittleEndianUInt32(at offset: Int) -> UInt32? + func readLittleEndianUInt16(at offset: Int) -> UInt16? +} +``` + +All methods perform boundary checks and return `nil` if the offset is out of range, preventing out-of-bounds crashes regardless of caller behavior. + +### PDF Validation + +`PDFValidator` scans for dangerous PDF keywords with boundary-aware matching: a keyword must be followed by a PDF delimiter character (whitespace, `<`, `>`, `(`, `)`, `[`, `]`, `/`, `%`) or appear at the end of the file. This reduces false positives from names like `/JSActions` or `/AABattery`. + +Known limitation: hex-encoded PDF name objects (e.g., `/#4A#53` for `/JS`) are not decoded before matching. This is documented in the source. + ## Pasteboard Writing `PasteboardWriter` maps `DataType` to `NSPasteboard.PasteboardType` (UTI strings) and writes raw bytes via `NSPasteboard.setData(_:forType:)`. For text, it decodes via UTF-8 with a Latin-1 fallback (which can decode any byte sequence) and uses `setString(_:forType:)`. -Key design decisions: +### Control Character Stripping + +All text (both UTF-8 and Latin-1 fallback) is sanitized by `stripControlCharacters` before being placed on the clipboard. This removes C0 control characters (U+0000–U+001F except tab, newline, carriage return) and DEL (U+007F) to prevent terminal escape sequence injection. The filter checks all Unicode scalars in each `Character` to correctly handle multi-scalar graphemes. + +### Key Design Decisions - **No image decoders**: `NSImage`, `CGImageSource`, and `NSBitmapImageRep` are never used. This avoids exposure to vulnerabilities in ImageIO/CoreGraphics (e.g., CVE-2021-30860 FORCEDENTRY, CVE-2023-41064 BLASTPASS). - **clearContents timing**: The pasteboard is cleared immediately before writing, after all validation and data preparation is complete. @@ -84,41 +134,64 @@ All errors are modeled as `XPBCError`, a `LocalizedError` enum: | `inputTooLarge(size:maxMB:)` | Input exceeds 100 MB limit | | `pasteboardWriteFailed` | `NSPasteboard.setData/setString` returned false | | `invalidArgument(String)` | Unrecognized CLI flag or pasteboard name | +| `validationFailed(format:reason:)` | Structural validation of image header failed | -The CLI distinguishes expected errors (`XPBCError` -> exit 1) from unexpected errors (exit 2) for easier debugging. +The CLI distinguishes expected errors (`XPBCError` -> exit 1) from unexpected errors (exit 2) for easier debugging. `DataType` conforms to `CustomStringConvertible` so that validation error messages display stable, human-readable format names (e.g., "PNG", "JPEG"). ## Module Boundaries | Component | Access Level | Rationale | |-----------|-------------|-----------| | `DataType` | `public` | Used by both library and executable | -| `DataTypeDetector.detect(from:)` | `public` | Primary API | +| `DataTypeDetector.detect(from:)` | `public` | Primary detection API | +| `DataValidator.validate(_:as:)` | `public` | Primary validation API | +| `ValidationResult` | `public` | Returned by validation API | | `StdinReader.read()` | `public` | Called from executable | | `PasteboardWriter` | `public` | Called from executable | | `XPBCError` | `public` | Caught in executable | | `NSPasteboard.Name.from(userInput:)` | `public` | CLI argument parsing | | `FormatDetector` protocol | `internal` | Implementation detail | +| `FormatValidator` protocol | `internal` | Implementation detail | | `detectors` array | `internal` | Implementation detail | | `maxInputSize` / `maxInputSizeMB` | `internal` | Implementation detail | | All concrete detectors | `internal` | Implementation detail | +| All concrete validators | `internal` | Implementation detail | +| `Data` byte-reading extension | `internal` | Implementation detail | ## Adding a New Format -1. Create a new struct conforming to `FormatDetector` in `Sources/XPBCCore/Detectors/` (or use `FtypDetector` for ISOBMFF-based formats) -2. Add it to `DataTypeDetector.detectors` in the appropriate position by signature length -3. Add a case to `DataType` enum -4. Add UTI mapping in `PasteboardWriter.pasteboardType(for:)` and the case list in `write(_:as:)` -5. Add tests in `DataTypeDetectorTests` +1. Add a case to the `DataType` enum (also add a `description` in the `CustomStringConvertible` conformance) +2. Create a new struct conforming to `FormatDetector` in `Sources/XPBCCore/Detectors/` (or use `FtypDetector` for ISOBMFF-based formats) +3. Add it to `DataTypeDetector.detectors` in the appropriate position by signature length +4. Create a new struct conforming to `FormatValidator` in `Sources/XPBCCore/Validators/` +5. Add a case in `DataValidator.validate(_:as:)` for the new type +6. Add UTI mapping in `PasteboardWriter.pasteboardType(for:)` and the case list in `write(_:as:)` +7. Add detection tests in `DataTypeDetectorTests` and validation tests in `DataValidatorTests` -The compiler will guide steps 4 via exhaustive switch errors. +The compiler will guide steps 1, 5, and 6 via exhaustive switch errors. ## Testing -Tests cover `DataTypeDetector.detect(from:)` with 24 test cases: +Tests cover two suites with 74 test cases total: + +### DataTypeDetectorTests (24 tests) - **Format detection** (11): one per supported format, including both GIF versions and both TIFF endiannesses - **Text fallback** (2): ASCII and Japanese UTF-8 - **Edge cases** (6): empty data, single byte, partial headers, RIFF non-WebP, ftyp non-image, random binary - **Security** (5): oversized data with valid header, all-zero bytes, all-0xFF bytes, partial JPEG signatures +### DataValidatorTests (50 tests) + +- **PNG validation** (7): valid, too short, missing IHDR, zero width/height, exact minimum size (29 bytes), one byte below minimum +- **JPEG validation** (4): valid, no marker prefix, invalid marker range, too short +- **GIF validation** (4): valid, zero width/height, too short +- **TIFF validation** (5): valid LE/BE, IFD offset too small/exceeds data, too short +- **BMP validation** (4): valid DIB sizes (40, 124), invalid DIB size, too short +- **WebP validation** (5): VP8/VP8L/VP8X valid, unknown chunk, too short +- **HEIC/AVIF validation** (5): valid HEIC/AVIF, box size too small, exceeds data, just below minimum (11), exact minimum (12) +- **PDF validation** (8): valid, /JS, /JavaScript, /OpenAction, /AA, /Launch, false positive tests (/JSActions, /AABattery), keyword at end of file +- **Control character stripping** (4): ESC removal, tab/newline/CR preservation, NUL removal, multi-scalar grapheme passthrough +- **Text passthrough** (2): always valid, empty data valid + Test data uses in-memory byte arrays (no fixture files needed).