Skip to content

Commit badb529

Browse files
nier0x00copybara-github
authored andcommitted
No public description
PiperOrigin-RevId: 883325139
1 parent 36f55d3 commit badb529

2 files changed

Lines changed: 72 additions & 89 deletions

File tree

lib/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@
115115
"load_module",
116116
"loads",
117117
"lzma.open",
118+
"malicious",
118119
"marshal",
119120
"msvcrt",
120121
"open",

saferpickle.py

Lines changed: 71 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def _custom_chunked_genops(
144144
pickle_file: BinaryIO,
145145
chunk_range: Tuple[int, int],
146146
) -> Iterator[tuple[pickletools.OpcodeInfo, Any | None]]:
147-
"""Generates string-declaring opcodes and arguments from a chunk of pickle data.
147+
"""Generates string-declaring opcodes and arguments from a chunk.
148148
149149
This function reads a specific byte range (chunk) of the pickle bytecode
150150
and yields opcodes that are known to declare strings, along with their
@@ -232,7 +232,7 @@ def _process_chunk_for_generate_ops(
232232
chunk_range: Tuple[int, int],
233233
is_shared_memory: bool = False,
234234
) -> Set[str]:
235-
"""Helper function for generate_ops to process a chunk of pickle data."""
235+
"""Helper function to process a chunk of pickle data."""
236236
chunked_operands = set()
237237
try:
238238
if is_shared_memory:
@@ -266,7 +266,7 @@ def generate_ops_from_file(
266266
shm_name: Optional[str] = None,
267267
pickle_length: Optional[int] = None,
268268
) -> Set[str]:
269-
"""Returns opcodes that declare strings from a pickle file path or shared memory.
269+
"""Returns opcodes that declare strings from a path or shared memory.
270270
271271
Args:
272272
pickle_file_path: The path to the pickle file.
@@ -344,7 +344,7 @@ def generate_ops_from_file(
344344

345345

346346
def generate_ops(pickle_bytes: bytes) -> Set[str]:
347-
"""Returns opcodes that declare strings from a pickle file.
347+
"""Returns string-declaring opcodes.
348348
349349
Args:
350350
pickle_bytes: The pickle bytecode to yield opcode information for.
@@ -393,6 +393,17 @@ def get_class_instantiations(pickle_bytes: bytes) -> tuple[io.StringIO, bool]:
393393
unsafe_modules=constants.UNSAFE_STRINGS,
394394
)
395395
factory.default.unpickler = unpickler
396+
397+
# Monkey-patch load_build so that we don't miss
398+
# BUILD instructions due to differing Pickle implementations.
399+
original_load_build = unpickler.load_build
400+
401+
def fixed_load_build(*unused_args):
402+
return original_load_build()
403+
404+
unpickler.load_build = fixed_load_build
405+
unpickler.dispatch[pickle.BUILD[0]] = unpickler.load_build
406+
396407
unpickler.load()
397408

398409
# These errors are expected and should not be raised.
@@ -410,17 +421,19 @@ def get_class_instantiations(pickle_bytes: bytes) -> tuple[io.StringIO, bool]:
410421
):
411422
pass
412423

413-
is_build_instr_blocked = getattr(
414-
unpickler, "has_blocked_unsafe_build_instr", False
415-
)
424+
is_build_instr_blocked = False
425+
if unpickler:
426+
is_build_instr_blocked = getattr(
427+
unpickler, "has_blocked_unsafe_build_instr", False
428+
)
416429
return picklemagic_output, is_build_instr_blocked
417430

418431

419432
def categorize_strings(
420433
filtered_output: Set[str] | io.StringIO,
421434
use_picklemagic: bool = False,
422435
) -> ScanResults:
423-
"""Counts the relevant strings from the filtered output and categorizes them.
436+
"""Counts strings from filtered output and categorizes them.
424437
425438
Args:
426439
filtered_output: The series of statements filtered by string declarations.
@@ -591,7 +604,7 @@ def categorize_strings(
591604

592605

593606
def strict_security_scan(pickle_bytes: bytes) -> bool:
594-
"""Strict security scan to detect malicious content in pickle files.
607+
"""Strict security scan for malicious content in pickle files.
595608
596609
Args:
597610
pickle_bytes: Pickle bytecode to scan.
@@ -669,7 +682,7 @@ def is_unsafe(
669682
def picklemagic_scan(
670683
pickle_bytes: bytes,
671684
) -> ScanResults:
672-
"""Picklemagic scan to detect malicious content in pickle files.
685+
"""Picklemagic scan for malicious content in pickle files.
673686
674687
Args:
675688
pickle_bytes: Pickle bytecode to scan.
@@ -696,7 +709,7 @@ def genops_scan(
696709
pickle_file_path: Optional[str] = None,
697710
shm_name: Optional[str] = None,
698711
) -> ScanResults:
699-
"""Genops scan to detect malicious content in pickle files.
712+
"""Genops scan for malicious content in pickle files.
700713
701714
Args:
702715
pickle_bytes: Pickle bytecode to scan.
@@ -1075,98 +1088,67 @@ def _scan_and_load(
10751088
if not loader_mod:
10761089
loader_mod = pickle_copy
10771090

1078-
if strict_check and allow_unsafe:
1079-
error_string_illegal_combination = (
1080-
"Strict scanning and allow_unsafe cannot be used together."
1081-
)
1082-
if report_only:
1083-
logging.error(error_string_illegal_combination)
1084-
return
1085-
raise IllegalArgumentCombinationError(error_string_illegal_combination)
1086-
10871091
if is_load:
10881092
load_func = loader_mod.load
10891093
load_args = (pickle_file,)
10901094
else:
10911095
load_func = loader_mod.loads
10921096
load_args = (data_bytes,)
10931097

1094-
if allow_unsafe:
1098+
if strict_check and allow_unsafe:
1099+
error_string_illegal_combination = (
1100+
"Strict scanning and allow_unsafe cannot be used together."
1101+
)
1102+
if report_only:
1103+
logging.error(error_string_illegal_combination)
1104+
else:
1105+
raise IllegalArgumentCombinationError(error_string_illegal_combination)
1106+
elif allow_unsafe:
10951107
if report_only:
10961108
logging.info("Loading pickle file with allow_unsafe set to True.")
1097-
try:
1098-
return load_func(*load_args, *args, **kwargs)
1099-
except AttributeError as exc:
1100-
logging.info("Could not load an absent class: %s", exc)
1101-
return
1102-
1103-
if strict_check:
1109+
elif strict_check:
11041110
if strict_security_scan(data_bytes):
11051111
error_string_strict_check = "Pickle file failed strict security check."
11061112
if report_only:
11071113
logging.error(error_string_strict_check)
1108-
return
1109-
raise StrictCheckError(error_string_strict_check)
1110-
try:
1111-
return load_func(*load_args, *args, **kwargs)
1112-
except (AttributeError, pickle.UnpicklingError) as exc:
1113-
if "persistent load" in str(exc):
1114-
logging.info("Persistent load error: %s", exc)
1115-
return
1116-
elif "Can't get attribute" in str(exc):
1117-
logging.exception(
1118-
"Could not load an absent class: %s", exc, exc_info=True
1119-
)
1120-
raise UnsafePickleDetectedError(
1121-
constants.ERROR_STRING.substitute(
1122-
classification=Classification.SUSPICIOUS.value
1123-
)
1124-
) from exc
1125-
elif "underflow" in str(exc):
1126-
raise
1127-
logging.exception("Unknown error: %s", exc, exc_info=True)
1128-
return
1129-
1130-
# If we get here, we are not in strict check or allow_unsafe mode.
1131-
# We perform non-strict scanning as usual with force_scan if needed.
1132-
scan_scores = security_scan(data_bytes, force_scan=force_scan)
1133-
number_of_unsafe_results = scan_scores["unsafe"]
1134-
number_of_suspicious_results = scan_scores["suspicious"]
1135-
number_of_unknown_results = scan_scores["unknown"]
1136-
1137-
if number_of_suspicious_results == 0 and number_of_unsafe_results == 0:
1138-
if report_only:
1139-
logging.info("Loading safe pickle file")
1140-
if number_of_unknown_results > 0:
1141-
logging.warning(
1142-
"SaferPickle: File contains %d unknown items that were ignored.",
1143-
number_of_unknown_results,
1144-
)
1145-
try:
1146-
return load_func(*load_args, *args, **kwargs)
1147-
except (AttributeError, pickle.UnpicklingError) as exc:
1148-
if "persistent load" in str(exc):
1149-
logging.info("Persistent load error: %s", exc)
1150-
return
1151-
elif "Can't get attribute" in str(exc):
1152-
logging.exception(
1153-
"Could not load an absent class: %s", exc, exc_info=True
1154-
)
1155-
raise UnsafePickleDetectedError(
1156-
constants.ERROR_STRING.substitute(
1157-
classification=Classification.SUSPICIOUS.value
1158-
)
1159-
) from exc
1160-
elif "underflow" in str(exc):
1161-
raise
1162-
logging.exception("Unknown error: %s", exc, exc_info=True)
1163-
return
1164-
1165-
elif number_of_unsafe_results > number_of_suspicious_results:
1166-
_report_or_raise(Classification.UNSAFE, report_only, log_info)
1167-
return
1114+
else:
1115+
raise StrictCheckError(error_string_strict_check)
11681116
else:
1169-
_report_or_raise(Classification.SUSPICIOUS, report_only, log_info)
1117+
# Default scanning routines
1118+
scan_scores = security_scan(data_bytes, force_scan=force_scan)
1119+
number_of_unsafe_results = scan_scores["unsafe"]
1120+
number_of_suspicious_results = scan_scores["suspicious"]
1121+
number_of_unknown_results = scan_scores["unknown"]
1122+
1123+
if number_of_suspicious_results == 0 and number_of_unsafe_results == 0:
1124+
if report_only:
1125+
logging.info("Loading safe pickle file")
1126+
if number_of_unknown_results > 0:
1127+
logging.warning(
1128+
"SaferPickle: File contains %d unknown items that were ignored.",
1129+
number_of_unknown_results,
1130+
)
1131+
elif number_of_unsafe_results > number_of_suspicious_results:
1132+
_report_or_raise(Classification.UNSAFE, report_only, log_info)
1133+
else:
1134+
_report_or_raise(Classification.SUSPICIOUS, report_only, log_info)
1135+
1136+
# Load the pickle if report_only is True and no exceptions were raised earlier
1137+
try:
1138+
return load_func(*load_args, *args, **kwargs)
1139+
except (AttributeError, pickle.UnpicklingError, ModuleNotFoundError) as exc:
1140+
if "persistent load" in str(exc):
1141+
logging.info("Persistent load error: %s", exc)
1142+
elif "Can't get attribute" in str(exc):
1143+
logging.exception(
1144+
"Could not load an absent class: %s", exc, exc_info=True
1145+
)
1146+
elif "underflow" in str(exc):
1147+
logging.exception("Unpickling underflow error: %s", exc, exc_info=True)
1148+
elif "No module named" in str(exc):
1149+
logging.exception("Module was not found: %s", exc, exc_info=True)
1150+
else:
1151+
logging.exception("Unknown error during load: %s", exc, exc_info=True)
11701152
return
11711153

11721154

0 commit comments

Comments
 (0)