@@ -144,7 +144,7 @@ def _custom_chunked_genops(
144144 pickle_file : BinaryIO ,
145145 chunk_range : Tuple [int , int ],
146146) -> Iterator [tuple [pickletools .OpcodeInfo , Any | None ]]:
147- """Generates string-declaring opcodes and arguments from a chunk of pickle data .
147+ """Generates string-declaring opcodes and arguments from a chunk.
148148
149149 This function reads a specific byte range (chunk) of the pickle bytecode
150150 and yields opcodes that are known to declare strings, along with their
@@ -232,7 +232,7 @@ def _process_chunk_for_generate_ops(
232232 chunk_range : Tuple [int , int ],
233233 is_shared_memory : bool = False ,
234234) -> Set [str ]:
235- """Helper function for generate_ops to process a chunk of pickle data."""
235+ """Helper function to process a chunk of pickle data."""
236236 chunked_operands = set ()
237237 try :
238238 if is_shared_memory :
@@ -266,7 +266,7 @@ def generate_ops_from_file(
266266 shm_name : Optional [str ] = None ,
267267 pickle_length : Optional [int ] = None ,
268268) -> Set [str ]:
269- """Returns opcodes that declare strings from a pickle file path or shared memory.
269+ """Returns opcodes that declare strings from a path or shared memory.
270270
271271 Args:
272272 pickle_file_path: The path to the pickle file.
@@ -344,7 +344,7 @@ def generate_ops_from_file(
344344
345345
346346def generate_ops (pickle_bytes : bytes ) -> Set [str ]:
347- """Returns opcodes that declare strings from a pickle file .
347+ """Returns string-declaring opcodes .
348348
349349 Args:
350350 pickle_bytes: The pickle bytecode to yield opcode information for.
@@ -393,6 +393,17 @@ def get_class_instantiations(pickle_bytes: bytes) -> tuple[io.StringIO, bool]:
393393 unsafe_modules = constants .UNSAFE_STRINGS ,
394394 )
395395 factory .default .unpickler = unpickler
396+
397+ # Monkey-patch load_build so that we don't miss
398+ # BUILD instructions due to differing Pickle implementations.
399+ original_load_build = unpickler .load_build
400+
401+ def fixed_load_build (* unused_args ):
402+ return original_load_build ()
403+
404+ unpickler .load_build = fixed_load_build
405+ unpickler .dispatch [pickle .BUILD [0 ]] = unpickler .load_build
406+
396407 unpickler .load ()
397408
398409 # These errors are expected and should not be raised.
@@ -410,17 +421,19 @@ def get_class_instantiations(pickle_bytes: bytes) -> tuple[io.StringIO, bool]:
410421 ):
411422 pass
412423
413- is_build_instr_blocked = getattr (
414- unpickler , "has_blocked_unsafe_build_instr" , False
415- )
424+ is_build_instr_blocked = False
425+ if unpickler :
426+ is_build_instr_blocked = getattr (
427+ unpickler , "has_blocked_unsafe_build_instr" , False
428+ )
416429 return picklemagic_output , is_build_instr_blocked
417430
418431
419432def categorize_strings (
420433 filtered_output : Set [str ] | io .StringIO ,
421434 use_picklemagic : bool = False ,
422435) -> ScanResults :
423- """Counts the relevant strings from the filtered output and categorizes them.
436+ """Counts strings from filtered output and categorizes them.
424437
425438 Args:
426439 filtered_output: The series of statements filtered by string declarations.
@@ -591,7 +604,7 @@ def categorize_strings(
591604
592605
593606def strict_security_scan (pickle_bytes : bytes ) -> bool :
594- """Strict security scan to detect malicious content in pickle files.
607+ """Strict security scan for malicious content in pickle files.
595608
596609 Args:
597610 pickle_bytes: Pickle bytecode to scan.
@@ -669,7 +682,7 @@ def is_unsafe(
669682def picklemagic_scan (
670683 pickle_bytes : bytes ,
671684) -> ScanResults :
672- """Picklemagic scan to detect malicious content in pickle files.
685+ """Picklemagic scan for malicious content in pickle files.
673686
674687 Args:
675688 pickle_bytes: Pickle bytecode to scan.
@@ -696,7 +709,7 @@ def genops_scan(
696709 pickle_file_path : Optional [str ] = None ,
697710 shm_name : Optional [str ] = None ,
698711) -> ScanResults :
699- """Genops scan to detect malicious content in pickle files.
712+ """Genops scan for malicious content in pickle files.
700713
701714 Args:
702715 pickle_bytes: Pickle bytecode to scan.
@@ -1075,98 +1088,67 @@ def _scan_and_load(
10751088 if not loader_mod :
10761089 loader_mod = pickle_copy
10771090
1078- if strict_check and allow_unsafe :
1079- error_string_illegal_combination = (
1080- "Strict scanning and allow_unsafe cannot be used together."
1081- )
1082- if report_only :
1083- logging .error (error_string_illegal_combination )
1084- return
1085- raise IllegalArgumentCombinationError (error_string_illegal_combination )
1086-
10871091 if is_load :
10881092 load_func = loader_mod .load
10891093 load_args = (pickle_file ,)
10901094 else :
10911095 load_func = loader_mod .loads
10921096 load_args = (data_bytes ,)
10931097
1094- if allow_unsafe :
1098+ if strict_check and allow_unsafe :
1099+ error_string_illegal_combination = (
1100+ "Strict scanning and allow_unsafe cannot be used together."
1101+ )
1102+ if report_only :
1103+ logging .error (error_string_illegal_combination )
1104+ else :
1105+ raise IllegalArgumentCombinationError (error_string_illegal_combination )
1106+ elif allow_unsafe :
10951107 if report_only :
10961108 logging .info ("Loading pickle file with allow_unsafe set to True." )
1097- try :
1098- return load_func (* load_args , * args , ** kwargs )
1099- except AttributeError as exc :
1100- logging .info ("Could not load an absent class: %s" , exc )
1101- return
1102-
1103- if strict_check :
1109+ elif strict_check :
11041110 if strict_security_scan (data_bytes ):
11051111 error_string_strict_check = "Pickle file failed strict security check."
11061112 if report_only :
11071113 logging .error (error_string_strict_check )
1108- return
1109- raise StrictCheckError (error_string_strict_check )
1110- try :
1111- return load_func (* load_args , * args , ** kwargs )
1112- except (AttributeError , pickle .UnpicklingError ) as exc :
1113- if "persistent load" in str (exc ):
1114- logging .info ("Persistent load error: %s" , exc )
1115- return
1116- elif "Can't get attribute" in str (exc ):
1117- logging .exception (
1118- "Could not load an absent class: %s" , exc , exc_info = True
1119- )
1120- raise UnsafePickleDetectedError (
1121- constants .ERROR_STRING .substitute (
1122- classification = Classification .SUSPICIOUS .value
1123- )
1124- ) from exc
1125- elif "underflow" in str (exc ):
1126- raise
1127- logging .exception ("Unknown error: %s" , exc , exc_info = True )
1128- return
1129-
1130- # If we get here, we are not in strict check or allow_unsafe mode.
1131- # We perform non-strict scanning as usual with force_scan if needed.
1132- scan_scores = security_scan (data_bytes , force_scan = force_scan )
1133- number_of_unsafe_results = scan_scores ["unsafe" ]
1134- number_of_suspicious_results = scan_scores ["suspicious" ]
1135- number_of_unknown_results = scan_scores ["unknown" ]
1136-
1137- if number_of_suspicious_results == 0 and number_of_unsafe_results == 0 :
1138- if report_only :
1139- logging .info ("Loading safe pickle file" )
1140- if number_of_unknown_results > 0 :
1141- logging .warning (
1142- "SaferPickle: File contains %d unknown items that were ignored." ,
1143- number_of_unknown_results ,
1144- )
1145- try :
1146- return load_func (* load_args , * args , ** kwargs )
1147- except (AttributeError , pickle .UnpicklingError ) as exc :
1148- if "persistent load" in str (exc ):
1149- logging .info ("Persistent load error: %s" , exc )
1150- return
1151- elif "Can't get attribute" in str (exc ):
1152- logging .exception (
1153- "Could not load an absent class: %s" , exc , exc_info = True
1154- )
1155- raise UnsafePickleDetectedError (
1156- constants .ERROR_STRING .substitute (
1157- classification = Classification .SUSPICIOUS .value
1158- )
1159- ) from exc
1160- elif "underflow" in str (exc ):
1161- raise
1162- logging .exception ("Unknown error: %s" , exc , exc_info = True )
1163- return
1164-
1165- elif number_of_unsafe_results > number_of_suspicious_results :
1166- _report_or_raise (Classification .UNSAFE , report_only , log_info )
1167- return
1114+ else :
1115+ raise StrictCheckError (error_string_strict_check )
11681116 else :
1169- _report_or_raise (Classification .SUSPICIOUS , report_only , log_info )
1117+ # Default scanning routines
1118+ scan_scores = security_scan (data_bytes , force_scan = force_scan )
1119+ number_of_unsafe_results = scan_scores ["unsafe" ]
1120+ number_of_suspicious_results = scan_scores ["suspicious" ]
1121+ number_of_unknown_results = scan_scores ["unknown" ]
1122+
1123+ if number_of_suspicious_results == 0 and number_of_unsafe_results == 0 :
1124+ if report_only :
1125+ logging .info ("Loading safe pickle file" )
1126+ if number_of_unknown_results > 0 :
1127+ logging .warning (
1128+ "SaferPickle: File contains %d unknown items that were ignored." ,
1129+ number_of_unknown_results ,
1130+ )
1131+ elif number_of_unsafe_results > number_of_suspicious_results :
1132+ _report_or_raise (Classification .UNSAFE , report_only , log_info )
1133+ else :
1134+ _report_or_raise (Classification .SUSPICIOUS , report_only , log_info )
1135+
1136+ # Load the pickle if report_only is True and no exceptions were raised earlier
1137+ try :
1138+ return load_func (* load_args , * args , ** kwargs )
1139+ except (AttributeError , pickle .UnpicklingError , ModuleNotFoundError ) as exc :
1140+ if "persistent load" in str (exc ):
1141+ logging .info ("Persistent load error: %s" , exc )
1142+ elif "Can't get attribute" in str (exc ):
1143+ logging .exception (
1144+ "Could not load an absent class: %s" , exc , exc_info = True
1145+ )
1146+ elif "underflow" in str (exc ):
1147+ logging .exception ("Unpickling underflow error: %s" , exc , exc_info = True )
1148+ elif "No module named" in str (exc ):
1149+ logging .exception ("Module was not found: %s" , exc , exc_info = True )
1150+ else :
1151+ logging .exception ("Unknown error during load: %s" , exc , exc_info = True )
11701152 return
11711153
11721154
0 commit comments