From 30e5597024b046a79c1de18c90f431af6e5f294e Mon Sep 17 00:00:00 2001 From: Mike Wiacek Date: Tue, 12 Mar 2024 17:35:34 +0000 Subject: [PATCH] Support java class files Java class files and MachO FAT binaries have the same magic number '0xCAFEBABE'. It's important to inspect the following 8 bytes to disambiguate the two. --- fixtures/sample.class | Bin 0 -> 179 bytes match_test.go | 2 ++ matchers/archive.go | 21 ++++++++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 fixtures/sample.class diff --git a/fixtures/sample.class b/fixtures/sample.class new file mode 100644 index 0000000000000000000000000000000000000000..dbb4aac0d87188ff3968abbf3a459aa8a95d8734 GIT binary patch literal 179 zcmX^0Z`VEs1_pBmUM>b^1}=66ZgvJ9Mg}&U%)HDJJ4Oa(4b3n{1{UZ1lvG9rexJ;| zRKL>Pq|~C2#H1Xc2v=}^X;E^jTPBFZnUz?Us0Si=7+4rs85tOz85smXeEponymWp4 zq^#8B5(WhZCZJUyzzD=Zg+Q7O$dU!pAQ4ur?F@_?!P4wNk_{{flHvgJm>4(#DViPk literal 0 HcmV?d00001 diff --git a/match_test.go b/match_test.go index 946bb25..6d38488 100644 --- a/match_test.go +++ b/match_test.go @@ -54,6 +54,7 @@ func TestMatchFile(t *testing.T) { {"zst"}, {"exr"}, {"avif"}, + {"class"}, } for _, test := range cases { @@ -77,6 +78,7 @@ func TestMatchReader(t *testing.T) { {bytes.NewBuffer([]byte{0xFF, 0xD8, 0xFF}), "jpg"}, {bytes.NewBuffer([]byte{0xFF, 0xD8, 0x00}), "unknown"}, {bytes.NewBuffer([]byte{0x89, 0x50, 0x4E, 0x47}), "png"}, + {bytes.NewBuffer([]byte{0xCA, 0xFE, 0xBA, 0xBE, 0x00, 0x00, 0x00, 0xff}), "class"}, } for _, test := range cases { diff --git a/matchers/archive.go b/matchers/archive.go index dd892ce..99c96d1 100644 --- a/matchers/archive.go +++ b/matchers/archive.go @@ -36,6 +36,7 @@ var ( TypeDcm = newType("dcm", "application/dicom") TypeIso = newType("iso", "application/x-iso9660-image") TypeMachO = newType("macho", "application/x-mach-binary") // Mach-O binaries have no common extension. + TypeClass = newType("class", "application/java-vm") // Java class files ) var Archive = Map{ @@ -67,6 +68,7 @@ var Archive = Map{ TypeDcm: Dcm, TypeIso: Iso, TypeMachO: MachO, + TypeClass: Class, } var ( @@ -184,7 +186,24 @@ func MachO(buf []byte) bool { // Big endian versions below here... (buf[0] == 0xCF && buf[1] == 0xFA && buf[2] == 0xED && buf[3] == 0xFE) || (buf[0] == 0xCE && buf[1] == 0xFA && buf[2] == 0xED && buf[3] == 0xFE) || - (buf[0] == 0xCA && buf[1] == 0xFE && buf[2] == 0xBA && buf[3] == 0xBE)) + // Java class files have a 0xCAFEBABE magic number. As the Class func can disambiguate, + // if buf has 0xCAFEBABE as a magic number but Class returns false, assume it's a MachO. + (buf[0] == 0xCA && buf[1] == 0xFE && buf[2] == 0xBA && buf[3] == 0xBE && !Class(buf))) +} + +// Class matches Java class files which are pretty tricky. +// They have the same magic number as some MachO FAT files. +// Reference: https://opensource.apple.com/source/file/file-80.40.2/file/magic/Magdir/cafebabe.auto.html +func Class(buf []byte) bool { + if len(buf) < 8 { + return false + } + + if !(buf[0] == 0xCA && buf[1] == 0xFE && buf[2] == 0xBA && buf[3] == 0xBE) { + return false + } + + return binary.BigEndian.Uint32(buf[4:8]) >= 44 // Java 1.0 corresponds to major version 44. } // Zstandard compressed data is made of one or more frames.