@@ -3,7 +3,6 @@ package thorlog
33import (
44 "bytes"
55 "encoding/hex"
6- "encoding/json"
76 "fmt"
87 "regexp"
98 "strings"
@@ -13,57 +12,108 @@ import (
1312 "github.com/NextronSystems/jsonlog"
1413)
1514
16- type MatchData struct {
17- Data [] byte
18- FullHex bool
15+ type StringWithEncoding struct {
16+ Data string `json:"data"`
17+ Encoding StringEncoding `json:"encoding"`
1918}
2019
21- func (f MatchData ) MarshalJSON () ([]byte , error ) {
22- matchingString := f .String ()
23- return InvalidUnicodeString (matchingString ).MarshalJSON ()
20+ type StringEncoding string
21+
22+ const (
23+ Plain StringEncoding = "plain"
24+ Hex StringEncoding = "hex"
25+ )
26+
27+ // Encode encodes the given data into a StringWithEncoding,
28+ // choosing the most appropriate encoding based on its content.
29+ func Encode (s []byte ) StringWithEncoding {
30+ if utf8 .Valid (s ) {
31+ return StringWithEncoding {
32+ Data : string (s ),
33+ Encoding : Plain ,
34+ }
35+ } else {
36+ return StringWithEncoding {
37+ Data : hex .EncodeToString (s ),
38+ Encoding : Hex ,
39+ }
40+ }
2441}
2542
26- func (f * MatchData ) UnmarshalJSON (data []byte ) error {
27- var matchingString string
28- err := json .Unmarshal (data , & matchingString )
29- if err != nil {
30- return err
43+ // EncodeString encodes the given data into a StringWithEncoding,
44+ // choosing the most appropriate encoding based on its content.
45+ func EncodeString (s string ) StringWithEncoding {
46+ if utf8 .ValidString (s ) {
47+ return StringWithEncoding {
48+ Data : s ,
49+ Encoding : Plain ,
50+ }
51+ } else {
52+ return StringWithEncoding {
53+ Data : hex .EncodeToString ([]byte (s )),
54+ Encoding : Hex ,
55+ }
3156 }
32- f .Data = []byte (matchingString )
33- return nil
3457}
3558
36- func (f MatchData ) JSONSchemaAlias () any {
37- return ""
59+ // Plaintext returns the raw byte sequence represented by the StringWithEncoding.
60+ func (s StringWithEncoding ) Plaintext () []byte {
61+ switch s .Encoding {
62+ case Plain :
63+ return []byte (s .Data )
64+ case Hex :
65+ data , err := hex .DecodeString (s .Data )
66+ if err != nil {
67+ return []byte ("<invalid hex data: " + err .Error () + ">" )
68+ }
69+ return data
70+ default :
71+ return []byte (fmt .Sprintf ("<unknown encoding %s>" , s .Encoding ))
72+ }
3873}
3974
4075var notOnlyASCII = regexp .MustCompile (`[^\x20-\x7E\x0d\x0a\x09]+` ) // printable chars + \r,\n,\t
4176
42- func (f MatchData ) String () string {
43- if f .FullHex {
44- return hex .EncodeToString (f .Data )
77+ // String returns a human-readable representation of the encoded string.
78+ // The representation is guaranteed to be valid UTF-8.
79+ func (s StringWithEncoding ) String () string {
80+ data := s .decode ()
81+ if needsQuoting .MatchString (data ) {
82+ return quote (data )
4583 }
46- data := f . Data
47- matchingString := string ( data ) // Try to directly convert
84+ return data
85+ }
4886
49- if ! f .FullHex && notOnlyASCII .MatchString (matchingString ) { // Check if any non-printable chars occur
50- var utf16Data = data
51- // Try UTF16 encoding
52- if len (utf16Data ) > 1 && utf16Data [0 ] == 0xFF && utf16Data [1 ] == 0xFE {
53- // Remove byte order mark
54- utf16Data = utf16Data [2 :]
55- }
56- if len (utf16Data ) > 0 && utf16Data [0 ] == 0 {
57- // Might be UTF16 shifted by one byte
58- utf16Data = utf16Data [1 :]
59- }
60- matchingString , _ = decodeUTF16 (utf16Data )
61- if notOnlyASCII .MatchString (matchingString ) || len (matchingString ) == 0 {
62- // Can't cleanly be rendered as UTF-16
63- matchingString = string (data )
64- }
87+ // decode returns the plain text, after decoding it from UTF-16, if applicable.
88+ func (s StringWithEncoding ) decode () string {
89+ plaintext := s .Plaintext ()
90+
91+ if decoded , ok := attemptDecodeUTF16 (plaintext ); ok {
92+ return decoded
6593 }
66- return matchingString
94+
95+ return string (plaintext )
96+ }
97+
98+ // attemptDecodeUTF16 tries to decode the given byte slice as UTF-16 and checks
99+ // whether the decoded string contains non-ASCII characters.
100+ // It returns the decoded string and a boolean indicating whether the decoding was successful.
101+ func attemptDecodeUTF16 (b []byte ) (string , bool ) {
102+ // Try UTF16 encoding
103+ if len (b ) > 1 && b [0 ] == 0xFF && b [1 ] == 0xFE {
104+ // Remove byte order mark
105+ b = b [2 :]
106+ }
107+ if len (b ) > 0 && b [0 ] == 0 {
108+ // Might be UTF16 shifted by one byte
109+ b = b [1 :]
110+ }
111+ decodedUtf16 , _ := decodeUTF16 (b )
112+ if ! notOnlyASCII .MatchString (decodedUtf16 ) && len (decodedUtf16 ) >= 0 {
113+ // Can cleanly be rendered as UTF-16
114+ return decodedUtf16 , true
115+ }
116+ return "" , false
67117}
68118
69119// https://gist.github.com/bradleypeabody/185b1d7ed6c0c2ab6cec
@@ -84,31 +134,30 @@ func decodeUTF16(b []byte) (string, error) {
84134 return ret .String (), nil
85135}
86136
87- func ( f MatchData ) QuotedString ( ) string {
88- matchingString := f . String ( )
89- matchingString = escaper . Replace ( matchingString )
90- var replacedString bytes. Buffer
91- for _ , char := range []byte (matchingString ) {
137+ func quote ( s string ) string {
138+ s = escaper . Replace ( s )
139+ var quotedString bytes. Buffer
140+ quotedString . WriteString ( `"` )
141+ for _ , char := range []byte (s ) {
92142 if char < 0x20 || char > 0x7E { // non ASCII
93- replacedString .WriteString ("\\ x" )
94- replacedString .WriteString (hex .EncodeToString ([]byte {char }))
143+ quotedString .WriteString ("\\ x" )
144+ quotedString .WriteString (hex .EncodeToString ([]byte {char }))
95145 } else {
96- replacedString .WriteByte (char )
146+ quotedString .WriteByte (char )
97147 }
98148 }
99- matchingString = replacedString .String ()
100- matchingString = fmt .Sprintf ("\" %s\" " , matchingString )
101- return matchingString
149+ quotedString .WriteString (`"` )
150+ return quotedString .String ()
102151}
103152
104153// MatchString describes a sequence of bytes in an object
105154// that was matched on by a signature.
106155type MatchString struct {
107156 // Match contains the bytes that were matched.
108- Match MatchData `json:"data"`
157+ Match StringWithEncoding `json:"data"`
109158 // Context contains the bytes surrounding the matched bytes.
110159 // This may be missing if no context is available.
111- Context * MatchData `json:"context,omitempty"`
160+ Context * StringWithEncoding `json:"context,omitempty"`
112161 // Offset contains the Match's offset within the Field
113162 // where the data was matched.
114163 Offset * uint64 `json:"offset,omitempty"`
@@ -120,26 +169,16 @@ type MatchString struct {
120169var needsQuoting = regexp .MustCompile (`[^\x21\x23-\x7E]` )
121170
122171func (f MatchString ) String () string {
123- var matchString string
124- if needsQuoting .MatchString (f .Match .String ()) && ! f .Match .FullHex {
125- matchString += f .Match .QuotedString ()
126- } else {
127- matchString += f .Match .String ()
128- }
172+ matchString := f .Match .String ()
129173 if f .Context != nil {
130- matchString += " in "
131- if needsQuoting .MatchString (f .Context .String ()) && ! f .Context .FullHex {
132- matchString += f .Context .QuotedString ()
133- } else {
134- matchString += f .Context .String ()
135- }
174+ matchString += " in " + f .Context .String ()
136175 }
137176 if f .Offset != nil {
138177 // Only show the offset if this match does not encompass the full field and it's not explicitly hidden
139178 var showOffset = ! f .HideOffset
140179 if f .Field != nil && * f .Offset == 0 {
141180 if targetString , isString := f .Field .Value ().(string ); isString {
142- if targetString == string ( f .Match .Data ) {
181+ if targetString == f .Match .Data {
143182 showOffset = false
144183 }
145184 }
0 commit comments