From c76333b0a1a65347e2eb14175a03db2e6deccc07 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Tue, 24 Jun 2025 16:50:22 -0700 Subject: [PATCH 1/8] allow for conditional return instructions --- chb/app/BasicBlock.py | 4 ++ chb/app/CHVersion.py | 2 +- chb/app/Cfg.py | 24 +++++++++- chb/app/InstrXData.py | 42 ++++++++++++++++ chb/app/Instruction.py | 4 ++ chb/arm/opcodes/ARMBranchExchange.py | 71 ++++++++++++++++++++++++++-- 6 files changed, 141 insertions(+), 6 deletions(-) diff --git a/chb/app/BasicBlock.py b/chb/app/BasicBlock.py index 59f344f8..564f5bc2 100644 --- a/chb/app/BasicBlock.py +++ b/chb/app/BasicBlock.py @@ -95,6 +95,10 @@ def last_instruction(self) -> Instruction: def has_return(self) -> bool: return self.last_instruction.is_return_instruction + @property + def has_conditional_return(self) -> bool: + return self.last_instruction.is_conditional_return_instruction + @property @abstractmethod def instructions(self) -> Mapping[str, Instruction]: diff --git a/chb/app/CHVersion.py b/chb/app/CHVersion.py index e28791c6..24e0dd1e 100644 --- a/chb/app/CHVersion.py +++ b/chb/app/CHVersion.py @@ -1 +1 @@ -chbversion: str = "0.3.0-20250608" +chbversion: str = "0.3.0-20250624" diff --git a/chb/app/Cfg.py b/chb/app/Cfg.py index 827ce949..61358afa 100644 --- a/chb/app/Cfg.py +++ b/chb/app/Cfg.py @@ -682,10 +682,32 @@ def ast(self, for n in self.rpo_sorted_nodes: astblock = astfn.astblock(n) blocknode = astblock.ast(astree) - if astblock.has_return: + + if astblock.has_conditional_return: + succ = self.successors(n)[0] instr = astblock.last_instruction rv = instr.return_value() astexpr: Optional[AST.ASTExpr] = None + if rv is not None and not astree.returns_void(): + astexpr = XU.xxpr_to_ast_def_expr( + rv, instr.xdata, instr.iaddr, astree) + rtnstmt = astree.mk_return_stmt( + astexpr, instr.iaddr, instr.bytestring) + rvcondition = instr.ast_condition(astree) + if rvcondition is not None: + elsebr = astree.mk_instr_sequence([]) + brstmt = cast(AST.ASTBranch, astree.mk_branch( + rvcondition, rtnstmt, elsebr, succ)) + blockstmts[n] = [blocknode, brstmt] + else: + blockstmts[n] = [blocknode, rtnstmt] + else: + blockstmts[n] = [blocknode] + + elif astblock.has_return: + instr = astblock.last_instruction + rv = instr.return_value() + astexpr = None if rv is not None and not astree.returns_void(): astexpr = XU.xxpr_to_ast_def_expr( rv, instr.xdata, instr.iaddr, astree) diff --git a/chb/app/InstrXData.py b/chb/app/InstrXData.py index eeea1ffa..0bc90962 100644 --- a/chb/app/InstrXData.py +++ b/chb/app/InstrXData.py @@ -526,6 +526,26 @@ def get_instruction_condition(self) -> XXpr: else: raise UF.CHBError("No instruction condition index found") + def get_instruction_c_condition(self) -> XXpr: + for t in self.tags: + if t.startswith("icc:"): + index = int(t[4:]) + argval = self.args[index] + if argval == -2: + raise UF.CHBError( + "Error value in instruction c condition") + return self.xprdictionary.xpr(argval) + else: + raise UF.CHBError("No instruction c_condition index found") + + def has_valid_instruction_c_condition(self) -> bool: + for t in self.tags: + if t.startswith("icc:"): + index = int(t[4:]) + argval = self.args[index] + return argval >= 0 + return False + def has_condition_block_condition(self) -> bool: return "TF" in self.tags @@ -576,6 +596,17 @@ def has_return_cxpr(self) -> bool: else: return False + @property + def is_return_xpr_ok(self) -> bool: + if not self.has_return_xpr(): + return False + rvtag = next(t for t in self.tags if t.startswith("return:")) + rvix = int(rvtag[7:]) + rval = self.args[rvix] + if rval == -2: + return False + return True + def get_return_xpr(self) -> XXpr: rvtag = next(t for t in self.tags if t.startswith("return:")) rvix = int(rvtag[7:]) @@ -584,6 +615,17 @@ def get_return_xpr(self) -> XXpr: raise UF.CHBError("Unexpected error in return value") return self.xprdictionary.xpr(rval) + @property + def is_return_xxpr_ok(self) -> bool: + if not self.has_return_xpr(): + return False + rvtag = next(t for t in self.tags if t.startswith("return:")) + rvix = int(rvtag[7:]) + rval = self.args[rvix + 1] + if rval == -2: + return False + return True + def get_return_xxpr(self) -> XXpr: rvtag = next(t for t in self.tags if t.startswith("return:")) rvix = int(rvtag[7:]) diff --git a/chb/app/Instruction.py b/chb/app/Instruction.py index 8ce33b48..fd13c841 100644 --- a/chb/app/Instruction.py +++ b/chb/app/Instruction.py @@ -206,6 +206,10 @@ def is_store_instruction(self) -> bool: def is_return_instruction(self) -> bool: ... + @property + def is_conditional_return_instruction(self) -> bool: + return False + @property def is_nop_instruction(self) -> bool: return False diff --git a/chb/arm/opcodes/ARMBranchExchange.py b/chb/arm/opcodes/ARMBranchExchange.py index d45ab975..b21e35b0 100644 --- a/chb/arm/opcodes/ARMBranchExchange.py +++ b/chb/arm/opcodes/ARMBranchExchange.py @@ -38,7 +38,7 @@ from chb.astinterface.ASTInterface import ASTInterface from chb.invariants.XXpr import XXpr - +import chb.invariants.XXprUtil as XU import chb.util.fileutil as UF from chb.util.loggingutil import chklogger @@ -69,6 +69,14 @@ def xxtgt(self) -> "XXpr": def has_return_xpr(self) -> bool: return self.xdata.has_return_xpr() + @property + def is_return_xpr_ok(self) -> bool: + return self.xdata.is_return_xpr_ok + + @property + def is_return_xxpr_ok(self) -> bool: + return self.xdata.is_return_xxpr_ok + def returnval(self) -> "XXpr": return self.xdata.get_return_xpr() @@ -81,6 +89,18 @@ def has_creturnval(self) -> bool: def creturnval(self) -> "XXpr": return self.xdata.get_return_cxpr() + def has_instruction_condition(self) -> bool: + return self.xdata.has_instruction_condition() + + def get_instruction_condition(self) -> "XXpr": + return self.xdata.get_instruction_condition() + + def has_valid_instruction_c_condition(self) -> bool: + return self.xdata.has_valid_instruction_c_condition() + + def get_instruction_c_condition(self) -> "XXpr": + return self.xdata.get_instruction_c_condition() + @property def annotation(self) -> str: if self.xdata.is_bx_call: @@ -89,8 +109,10 @@ def annotation(self) -> str: cx = (" (C: " + (str(self.creturnval()) if self.has_creturnval() else "None") + ")") - if self.is_ok: + if self.is_return_xxpr_ok: return "return " + str(self.rreturnval()) + cx + elif self.is_return_xpr_ok: + return "return " + str(self.returnval()) + cx else: return "Error value" else: @@ -126,11 +148,20 @@ def is_return_instruction(self, xdata: InstrXData) -> bool: else: return False + def is_conditional_return_instruction(self, xdata: InstrXData) -> bool: + if self.is_return_instruction(xdata): + return xdata.has_instruction_condition() + return False + def return_value(self, xdata: InstrXData) -> Optional[XXpr]: xd = ARMBranchExchangeXData(xdata) - if xd.has_creturnval(): - if xd.is_ok: + if xd.has_return_xpr(): + if xd.has_creturnval(): return xd.creturnval() + elif xd.is_return_xxpr_ok: + return xd.rreturnval() + elif xd.is_return_xpr_ok: + return xd.returnval() else: chklogger.logger.warning( "Return value is an error value") @@ -174,6 +205,38 @@ def assembly_ast( """Need check for branch on LR, which should emit a return statement.""" return [] + def ast_condition_prov( + self, + astree: ASTInterface, + iaddr: str, + bytestring: str, + xdata: InstrXData, + reverse: bool + ) -> Tuple[Optional[AST.ASTExpr], Optional[AST.ASTExpr]]: + + ll_astcond = self.ast_cc_expr(astree) + + if xdata.has_instruction_condition(): + xd = ARMBranchExchangeXData(xdata) + if xd.has_valid_instruction_c_condition(): + pcond = xd.get_instruction_c_condition() + else: + pcond = xd.get_instruction_condition() + hl_astcond = XU.xxpr_to_ast_def_expr(pcond, xdata, iaddr, astree) + + astree.add_expr_mapping(hl_astcond, ll_astcond) + astree.add_expr_reachingdefs(hl_astcond, xdata.reachingdefs) + astree.add_flag_expr_reachingdefs(ll_astcond, xdata.flag_reachingdefs) + astree.add_condition_address(ll_astcond, [iaddr]) + + return (hl_astcond, ll_astcond) + + else: + chklogger.logger.error( + "No condition found at address %s", iaddr) + hl_astcond = astree.mk_temp_lval_expression() + return (hl_astcond, ll_astcond) + def ast_prov( self, astree: ASTInterface, From 6f6e1ea7e86e0f325159778cb6517787585b24a1 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Tue, 24 Jun 2025 16:51:56 -0700 Subject: [PATCH 2/8] support for conditional return instruction --- chb/arm/ARMInstruction.py | 4 ++++ chb/arm/ARMOpcode.py | 3 +++ chb/arm/opcodes/ARMLoadRegisterSignedByte.py | 4 ++++ .../opcodes/ARMLoadRegisterSignedHalfword.py | 1 + chb/arm/opcodes/ARMMove.py | 12 ++++++++++++ chb/arm/opcodes/ARMPop.py | 8 ++++++++ chb/arm/opcodes/opcodes_covered.json | 19 ++++++++++++++----- chb/astinterface/ASTInterfaceBasicBlock.py | 4 ++++ 8 files changed, 50 insertions(+), 5 deletions(-) diff --git a/chb/arm/ARMInstruction.py b/chb/arm/ARMInstruction.py index e48ff8b4..5ffa36af 100644 --- a/chb/arm/ARMInstruction.py +++ b/chb/arm/ARMInstruction.py @@ -171,6 +171,10 @@ def is_store_instruction(self) -> bool: def is_return_instruction(self) -> bool: return self.opcode.is_return_instruction(self.xdata) + @property + def is_conditional_return_instruction(self) -> bool: + return self.opcode.is_conditional_return_instruction(self.xdata) + @property def is_nop_instruction(self) -> bool: return self.opcode.is_nop_instruction(self.xdata) diff --git a/chb/arm/ARMOpcode.py b/chb/arm/ARMOpcode.py index 60ef415b..5984dbb7 100644 --- a/chb/arm/ARMOpcode.py +++ b/chb/arm/ARMOpcode.py @@ -522,6 +522,9 @@ def is_branch_instruction(self) -> bool: def is_return_instruction(self, xdata: InstrXData) -> bool: return False + def is_conditional_return_instruction(self, xdata: InstrXData) -> bool: + return False + def return_value(self, xdata: InstrXData) -> Optional[XXpr]: return None diff --git a/chb/arm/opcodes/ARMLoadRegisterSignedByte.py b/chb/arm/opcodes/ARMLoadRegisterSignedByte.py index 5e2939cf..39be28c7 100644 --- a/chb/arm/opcodes/ARMLoadRegisterSignedByte.py +++ b/chb/arm/opcodes/ARMLoadRegisterSignedByte.py @@ -126,6 +126,10 @@ def __init__(self, d: "ARMDictionary", ixval: IndexedTableValue) -> None: def operands(self) -> List[ARMOperand]: return [self.armd.arm_operand(self.args[i]) for i in [0, 3]] + @property + def opargs(self) -> List[ARMOperand]: + return [self.armd.arm_operand(self.args[i]) for i in [0, 1, 2, 3]] + def is_load_instruction(self, xdata: InstrXData) -> bool: return True diff --git a/chb/arm/opcodes/ARMLoadRegisterSignedHalfword.py b/chb/arm/opcodes/ARMLoadRegisterSignedHalfword.py index fb4d4b5d..569203d6 100644 --- a/chb/arm/opcodes/ARMLoadRegisterSignedHalfword.py +++ b/chb/arm/opcodes/ARMLoadRegisterSignedHalfword.py @@ -183,6 +183,7 @@ def ast_prov( rhs, xdata, iaddr, astree, memaddr=xaddr, size=2) elif xd.is_xrmem_unknown and xd.is_address_known: + lhs = xd.vrt xaddr = xd.xaddr hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) hl_rhs = XU.xmemory_dereference_lval_expr( diff --git a/chb/arm/opcodes/ARMMove.py b/chb/arm/opcodes/ARMMove.py index 8a6c773c..38e00bd7 100644 --- a/chb/arm/opcodes/ARMMove.py +++ b/chb/arm/opcodes/ARMMove.py @@ -90,6 +90,18 @@ def cresult(self) -> "XXpr": def is_cresult_ok(self) -> bool: return self.is_cxpr_ok(0) + def has_instruction_condition(self) -> bool: + return self.xdata.has_instruction_condition() + + def get_instruction_condition(self) -> "XXpr": + return self.xdata.get_instruction_condition() + + def has_valid_instruction_c_condition(self) -> bool: + return self.xdata.has_valid_instruction_c_condition() + + def get_instruction_c_condition(self) -> "XXpr": + return self.xdata.get_instruction_c_condition() + @property def annotation(self) -> str: if self.xdata.instruction_is_subsumed(): diff --git a/chb/arm/opcodes/ARMPop.py b/chb/arm/opcodes/ARMPop.py index f488c1ce..2786a7f8 100644 --- a/chb/arm/opcodes/ARMPop.py +++ b/chb/arm/opcodes/ARMPop.py @@ -196,6 +196,11 @@ def operandstring(self) -> str: def is_return_instruction(self, xdata: InstrXData) -> bool: return ARMPopXData(xdata).has_return_xpr() + def is_conditional_return_instruction(self, xdata: InstrXData) -> bool: + if self.is_return_instruction(xdata): + return xdata.has_instruction_condition() + return False + def return_value(self, xdata: InstrXData) -> Optional[XXpr]: xd = ARMPopXData(xdata) if xd.has_return_xpr(): @@ -222,10 +227,13 @@ def ast_condition_prov( ll_astcond = self.ast_cc_expr(astree) if xdata.has_instruction_condition(): + pcond = xdata.get_instruction_condition() + ''' if reverse: pcond = xdata.xprs[(2 * len(xdata.vars)) + 3] else: pcond = xdata.xprs[(2 * len(xdata.vars)) + 2] + ''' hl_astcond = XU.xxpr_to_ast_def_expr(pcond, xdata, iaddr, astree) astree.add_expr_mapping(hl_astcond, ll_astcond) diff --git a/chb/arm/opcodes/opcodes_covered.json b/chb/arm/opcodes/opcodes_covered.json index dc2edd79..46c5cda5 100644 --- a/chb/arm/opcodes/opcodes_covered.json +++ b/chb/arm/opcodes/opcodes_covered.json @@ -815,7 +815,8 @@ { "T": "A1", "E": "<6>PUDW1<11><-imm7>1", - "D": "Y" + "D": "Y", + "U": ["210b90ec"] }, { "T": "A2", @@ -843,7 +844,8 @@ { "T": "A1", "E": "<6>01D00<11><-imm7>1", - "D": "Y" + "D": "Y", + "U": ["210b80ec"] }, { "T": "A1-wb", @@ -895,7 +897,10 @@ "U": ["02a1f0ec"] }, { - "T": "A2" + "T": "A2", + "E": "<15><6>PUDW1<-imm8->", + "D": "Y", + "U": ["0181b0fc"] }, { "T": "T1", @@ -2662,7 +2667,9 @@ "all": [ { "T": "A2", - "E": "<15>110PU0W0<-imm8->" + "E": "<15>110PU0W0<-imm8->", + "D": "Y", + "U": ["0181a0fc"] }, { "T": "T2", @@ -2692,7 +2699,9 @@ "all": [ { "T": "A1", - "E": "110PU1W0<-imm8->" + "E": "110PU1W0<-imm8->", + "D": "Y", + "U": ["0201e0ec"] }, { "T": "T1", diff --git a/chb/astinterface/ASTInterfaceBasicBlock.py b/chb/astinterface/ASTInterfaceBasicBlock.py index 5ee07e7d..6d35298d 100644 --- a/chb/astinterface/ASTInterfaceBasicBlock.py +++ b/chb/astinterface/ASTInterfaceBasicBlock.py @@ -119,6 +119,10 @@ def trampoline_block_instructions( def has_return(self) -> bool: return self.basicblock.has_return + @property + def has_conditional_return(self) -> bool: + return self.basicblock.has_conditional_return + @property def last_instruction(self) -> ASTInterfaceInstruction: bb_lastinstr = self.basicblock.last_instruction From 23409cb2e0721e06015b302da4fac8ee2dc727b4 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 7 Jul 2025 13:25:00 -0700 Subject: [PATCH 3/8] ASTI: introduce basic block fragment for predicated instructions --- chb/app/BasicBlock.py | 174 +++++++++++++++++++- chb/app/Cfg.py | 13 +- chb/app/InstrXData.py | 16 ++ chb/app/Instruction.py | 24 ++- chb/astinterface/ASTInterfaceBasicBlock.py | 53 +++++- chb/astinterface/ASTInterfaceInstruction.py | 21 +++ 6 files changed, 291 insertions(+), 10 deletions(-) diff --git a/chb/app/BasicBlock.py b/chb/app/BasicBlock.py index 564f5bc2..933e8823 100644 --- a/chb/app/BasicBlock.py +++ b/chb/app/BasicBlock.py @@ -4,7 +4,7 @@ # ------------------------------------------------------------------------------ # The MIT License (MIT) # -# Copyright (c) 2021-2024 Aarno Labs LLC +# Copyright (c) 2021-2025 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -52,12 +52,135 @@ import chb.app.Function +class BasicBlockFragment: + """Represents a basic block fragment without ast control flow. + + In ARM instructions may be predicated, e.g.: + + MOVEQ R0, R1 : if condition then R0 := R1 + + In the ocaml analyzer this additional control flow can be accomodated + directly in CHIF without the need to create a separate basic block + in the CFG (i.e. lightweight control flow). In decompilation, some of + these instructions may be accomodated without explicit control flow (e.g., + by the C ternary operation), but this is not always possible. + + However, even when predicated instructions cannot be converted into + expressions, it is not necessary to create top-level basic blocks + that are subject to the CFG-to-AST conversion. A more light-weight + solution is to embed the necessary control flow (notably limited + to branches and instruction sequences) within the block created for + the original basic block. + + The basic block is partitioned into a linear sequence of BasicBlock + Fragments, where each fragment can be one of the following: + - a (linear) instruction sequence statement + - a branch statement containing a condition and a single (if) branch + - a branch statement containing a condition and a then and an else + branch. + In case of the branch statement either branch can have one or more + instructions that have the same condition setter and the same + condition. + """ + + def __init__(self, instr: Instruction) -> None: + self._linear: List[Instruction] = [] + self._thenbranch: List[Instruction] = [] + self._elsebranch: List[Instruction] = [] + self._setter: Optional[str] = None + self._condition: Optional[str] = None + self.add_instr(instr) + + @property + def condition(self) -> Optional[str]: + return self._condition + + @property + def setter(self) -> Optional[str]: + return self._setter + + @property + def is_predicated(self) -> bool: + return self.condition is not None + + @property + def is_then_only(self) -> bool: + return self.is_predicated and len(self.elsebranch) == 0 + + @property + def linear(self) -> List[Instruction]: + return self._linear + + @property + def thenbranch(self) -> List[Instruction]: + return self._thenbranch + + @property + def elsebranch(self) -> List[Instruction]: + return self._elsebranch + + @property + def is_empty(self) -> bool: + return ( + len(self.linear) + len(self.thenbranch) + len(self.elsebranch) == 0) + + def add_predicated_instr(self, instr: Instruction) -> None: + if self.is_empty: + self._condition = instr.get_instruction_cc() + self._setter = instr.get_instruction_condition_setter() + self.thenbranch.append(instr) + elif self.is_predicated: + if self.condition == instr.get_instruction_cc(): + self.thenbranch.append(instr) + else: + self.elsebranch.append(instr) + else: + raise UF.CHBError("Cannot add predicated instruction to linear frag") + + def add_linear_instr(self, instr: Instruction) -> None: + if self.is_empty or (not self.is_predicated): + self.linear.append(instr) + else: + raise UF.CHBError( + "Cannot add unpredicated instruction to predicate fragment") + + def add_instr(self, instr: Instruction) -> None: + if instr.has_control_flow(): + self.add_predicated_instr(instr) + else: + self.add_linear_instr(instr) + + def __str__(self) -> str: + lines: List[str] = [] + if self.condition: + setter = " (" + self.setter + ")" if self.setter else "" + lines.append("condition: " + self.condition + setter) + if self.linear: + lines.append("linear") + for i in self.linear: + lines.append(" " + str(i)) + if self.thenbranch: + lines.append("then:") + for i in self.thenbranch: + lines.append(" " + str(i)) + if self.elsebranch: + lines.append("else:") + for i in self.elsebranch: + lines.append(" " + str(i)) + return "\n".join(lines) + + class BasicBlock(ABC): def __init__( self, xnode: ET.Element) -> None: self.xnode = xnode + self._partition: Dict[str, BasicBlockFragment] = {} + + @property + def partition(self) -> Dict[str, BasicBlockFragment]: + return self._partition @property def baddr(self) -> str: @@ -99,6 +222,55 @@ def has_return(self) -> bool: def has_conditional_return(self) -> bool: return self.last_instruction.is_conditional_return_instruction + def has_control_flow(self) -> bool: + """Returns true if this block contains predicated instructions that + are not otherwise covered in aggregates or by other conditions. + + The case of a block with a conditional return is already handled in + the Cfg, so it is excluded here. + + The case of a block with a conditional return and other conditional + instructions is not yet handled. + """ + + count = len([i for i in self.instructions.values() if i.has_control_flow()]) + if count == 1 and self.has_conditional_return: + return False + + return any(i.has_control_flow() for i in self.instructions.values()) + + def partition_control_flow(self) -> None: + curblock: Optional[BasicBlockFragment] = None + curaddr: Optional[str] = None + + for (a, i) in sorted(self.instructions.items()): + if curaddr is None or curblock is None: + curaddr = a + curblock = BasicBlockFragment(i) + else: + if i.has_control_flow(): + if curblock.is_predicated: + if i.get_instruction_condition_setter() == curblock.setter: + curblock.add_instr(i) + else: + self._partition[curaddr] = curblock + curblock = BasicBlockFragment(i) + curaddr = a + else: + self._partition[curaddr] = curblock + curblock = BasicBlockFragment(i) + curaddr = a + else: + if curblock.is_predicated: + self._partition[curaddr] = curblock + curblock = BasicBlockFragment(i) + curaddr = a + else: + curblock.add_instr(i) + + if curaddr is not None and curblock is not None: + self._partition[curaddr] = curblock + @property @abstractmethod def instructions(self) -> Mapping[str, Instruction]: diff --git a/chb/app/Cfg.py b/chb/app/Cfg.py index 61358afa..dacf3119 100644 --- a/chb/app/Cfg.py +++ b/chb/app/Cfg.py @@ -687,13 +687,14 @@ def ast(self, succ = self.successors(n)[0] instr = astblock.last_instruction rv = instr.return_value() + rvcondition = instr.ast_condition(astree) astexpr: Optional[AST.ASTExpr] = None if rv is not None and not astree.returns_void(): astexpr = XU.xxpr_to_ast_def_expr( rv, instr.xdata, instr.iaddr, astree) rtnstmt = astree.mk_return_stmt( astexpr, instr.iaddr, instr.bytestring) - rvcondition = instr.ast_condition(astree) + if rvcondition is not None: elsebr = astree.mk_instr_sequence([]) brstmt = cast(AST.ASTBranch, astree.mk_branch( @@ -702,7 +703,15 @@ def ast(self, else: blockstmts[n] = [blocknode, rtnstmt] else: - blockstmts[n] = [blocknode] + rtnstmt = astree.mk_return_stmt( + None, instr.iaddr, instr.bytestring) + if rvcondition is not None: + elsebr = astree.mk_instr_sequence([]) + brstmt = cast(AST.ASTBranch, astree.mk_branch( + rvcondition, rtnstmt, elsebr, succ)) + blockstmts[n] = [blocknode, brstmt] + else: + blockstmts[n] = [blocknode, rtnstmt] elif astblock.has_return: instr = astblock.last_instruction diff --git a/chb/app/InstrXData.py b/chb/app/InstrXData.py index 0bc90962..549abd1b 100644 --- a/chb/app/InstrXData.py +++ b/chb/app/InstrXData.py @@ -642,6 +642,22 @@ def get_return_cxpr(self) -> XXpr: raise UF.CHBError("Unexpected error in C return value") return self.xprdictionary.xpr(rval) + @property + def is_predicate_assignment(self) -> bool: + return "agg:predassign" in self.tags + + @property + def is_nondet_predicate_assignment(self) -> bool: + return "agg:predassign:nd" in self.tags + + @property + def is_ternary_assignment(self) -> bool: + return "agg:ternassign" in self.tags + + @property + def is_nondet_ternary_assignment(self) -> bool: + return "agg:ternassign:nd" in self.tags + @property def is_aggregate_jumptable(self) -> bool: return "agg-jt" in self.tags diff --git a/chb/app/Instruction.py b/chb/app/Instruction.py index fd13c841..97a4308d 100644 --- a/chb/app/Instruction.py +++ b/chb/app/Instruction.py @@ -4,7 +4,7 @@ # ------------------------------------------------------------------------------ # The MIT License (MIT) # -# Copyright (c) 2021-2024 Aarno Labs LLC +# Copyright (c) 2021-2025 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -145,6 +145,18 @@ def rev_bytestring(self) -> str: revb = "".join(i+j for i, j in zip(b[:-1][::-2], b[::-2])) return revb + def has_control_flow(self) -> bool: + """Returns true if this instruction is predicated and not covered + by an enclosing aggregate or other condition.""" + + return self.xnode.get("brcc") is not None + + def get_instruction_cc(self) -> Optional[str]: + return self.xnode.get("brcc") + + def get_instruction_condition_setter(self) -> Optional[str]: + return self.xnode.get("brsetter") + def md5(self) -> str: m = hashlib.md5() m.update(self.bytestring.encode("utf-8")) @@ -292,10 +304,16 @@ def ast_prov(self, astree: ASTInterface) -> Tuple[ def is_condition_true(self) -> bool: return False - def ast_condition_prov(self, astree: ASTInterface, reverse: bool = False) -> Tuple[ - Optional[AST.ASTExpr], Optional[AST.ASTExpr]]: + def ast_condition_prov( + self, astree: ASTInterface, reverse: bool = False + ) -> Tuple[Optional[AST.ASTExpr], Optional[AST.ASTExpr]]: raise UF.CHBError("ast-condition-prov not defined") + def ast_cc_condition_prov( + self, astree: ASTInterface + ) -> Tuple[Optional[AST.ASTExpr], Optional[AST.ASTExpr]]: + raise UF.CHBError("ast-cc-codntiion-prov not defined") + def assembly_ast_condition( self, astree: ASTInterface, diff --git a/chb/astinterface/ASTInterfaceBasicBlock.py b/chb/astinterface/ASTInterfaceBasicBlock.py index 6d35298d..8d577726 100644 --- a/chb/astinterface/ASTInterfaceBasicBlock.py +++ b/chb/astinterface/ASTInterfaceBasicBlock.py @@ -4,7 +4,7 @@ # ------------------------------------------------------------------------------ # The MIT License (MIT) # -# Copyright (c) 2022-2024 Aarno Labs LLC +# Copyright (c) 2022-2025 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,7 +26,7 @@ # ------------------------------------------------------------------------------ """Basic block in an abstract syntax tree.""" -from typing import cast, Dict, List, Optional, Set, TYPE_CHECKING +from typing import cast, Dict, List, Optional, Set, Tuple, TYPE_CHECKING import chb.ast.ASTNode as AST @@ -40,7 +40,7 @@ if TYPE_CHECKING: from chb.arm.ARMCfgBlock import ARMCfgBlock - from chb.app.BasicBlock import BasicBlock + from chb.app.BasicBlock import BasicBlock, BasicBlockFragment from chb.arm.ARMInstruction import ARMInstruction from chb.arm.opcodes.ARMLogicalShiftLeft import ARMLogicalShiftLeft from chb.arm.opcodes.ARMReverseSubtract import ARMReverseSubtract @@ -168,12 +168,57 @@ def assembly_ast(self, astree: "ASTInterface") -> AST.ASTStmt: instrs.extend(i.assembly_ast(astree)) return astree.mk_instr_sequence(instrs) + def ast_fragment( + self, astree: "ASTInterface", frag: "BasicBlockFragment") -> AST.ASTStmt: + if frag.is_predicated: + theninstrs = [ASTInterfaceInstruction(i) for i in frag.thenbranch] + elseinstrs = [ASTInterfaceInstruction(i) for i in frag.elsebranch] + thenstmt = self.linear_ast(astree, theninstrs) + elsestmt = self.linear_ast(astree, elseinstrs) + cinstr = theninstrs[0] + brcond = cinstr.ast_cc_condition(astree) + if brcond is None: + chklogger.logger.warning( + "No instruction predicate expression found at address %s", + cinstr.iaddr) + brcond = astree.mk_temp_lval_expression() + return astree.mk_branch(brcond, thenstmt, elsestmt, "0x0") + else: + instrs = [ASTInterfaceInstruction(i) for i in frag.linear] + return self.linear_ast(astree, instrs) + + def fragmented_ast(self, astree: "ASTInterface") -> AST.ASTStmt: + + if len(self.basicblock.partition) == 0: + raise UF.CHBError("Error in fragmented ast") + + stmts: List[AST.ASTStmt] = [] + + for (a, bf) in sorted(self.basicblock.partition.items()): + stmt = self.ast_fragment(astree, bf) + stmts.append(stmt) + + return astree.mk_block(stmts) + def ast(self, astree: "ASTInterface") -> AST.ASTStmt: if self.is_trampoline: return self.trampoline_ast(astree) + if self.basicblock.has_control_flow(): + self.basicblock.partition_control_flow() + return self.fragmented_ast(astree) + + else: + + return self.linear_ast( + astree, sorted(self.instructions.values(), key = lambda p:p.iaddr)) + + def linear_ast( + self, + astree: "ASTInterface", + instritems: List[ASTInterfaceInstruction]) -> AST.ASTStmt: instrs: List[AST.ASTInstruction] = [] - for (a, i) in sorted(self.instructions.items(), key=lambda p: p[0]): + for i in instritems: instrs.extend(i.ast(astree)) return astree.mk_instr_sequence(instrs) diff --git a/chb/astinterface/ASTInterfaceInstruction.py b/chb/astinterface/ASTInterfaceInstruction.py index 159a09d7..552ff052 100644 --- a/chb/astinterface/ASTInterfaceInstruction.py +++ b/chb/astinterface/ASTInterfaceInstruction.py @@ -48,6 +48,8 @@ def __init__(self, instr: "Instruction") -> None: self._ll_ast_instrs: List[AST.ASTInstruction] = [] self._hl_ast_condition: Optional[AST.ASTExpr] = None self._ll_ast_condition: Optional[AST.ASTExpr] = None + self._hl_ast_cc_condition: Optional[AST.ASTExpr] = None + self._ll_ast_cc_condition: Optional[AST.ASTExpr] = None self._hl_ast_switch_condition: Optional[AST.ASTExpr] = None self._ll_ast_swtich_condition: Optional[AST.ASTExpr] = None @@ -90,6 +92,14 @@ def hl_ast_condition(self) -> Optional[AST.ASTExpr]: def ll_ast_condition(self) -> Optional[AST.ASTExpr]: return self._ll_ast_condition + @property + def hl_ast_cc_condition(self) -> Optional[AST.ASTExpr]: + return self._hl_ast_cc_condition + + @property + def ll_ast_cc_condition(self) -> Optional[AST.ASTExpr]: + return self._ll_ast_cc_condition + @property def hl_ast_switch_condition(self) -> Optional[AST.ASTExpr]: return self._hl_ast_switch_condition @@ -136,6 +146,11 @@ def ast_condition( self.ast_condition_prov(astree, reverse=reverse) return self.hl_ast_condition + def ast_cc_condition(self, astree: "ASTInterface") -> Optional[AST.ASTExpr]: + if self.hl_ast_cc_condition is None: + self.ast_cc_condition_prov(astree) + return self.hl_ast_cc_condition + def ast_switch_condition( self, astree: "ASTInterface") -> Optional[AST.ASTExpr]: @@ -152,6 +167,12 @@ def ast_condition_prov( self._hl_ast_condition = hl self._ll_ast_condition = ll + def ast_cc_condition_prov(self, astree: "ASTInterface") -> None: + if self.hl_ast_cc_condition is None: + (hl, ll) = self.instruction.ast_cc_condition_prov(astree) + self._hl_ast_cc_condition = hl + self._ll_ast_cc_condition = ll + def ast_switch_condition_prov( self, astree: "ASTInterface") -> None: if self.hl_ast_switch_condition is None: From 1828a2d2b228a4d75a788db1872166bd280cde3b Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 7 Jul 2025 13:27:00 -0700 Subject: [PATCH 4/8] ARM: support for lifting of predicated instructions --- chb/arm/ARMOpcode.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/chb/arm/ARMOpcode.py b/chb/arm/ARMOpcode.py index 5984dbb7..e6aa7e40 100644 --- a/chb/arm/ARMOpcode.py +++ b/chb/arm/ARMOpcode.py @@ -186,6 +186,18 @@ def has_cxpr(self, index: int) -> bool: def is_cxpr_ok(self, index: int) -> bool: return self.xdata.is_cxpr_ok(index) + def has_instruction_condition(self) -> bool: + return self.xdata.has_instruction_condition() + + def get_instruction_condition(self) -> "XXpr": + return self.xdata.get_instruction_condition() + + def has_valid_instruction_c_condition(self) -> bool: + return self.xdata.has_valid_instruction_c_condition() + + def get_instruction_c_condition(self) -> "XXpr": + return self.xdata.get_instruction_c_condition() + def add_instruction_condition(self, s: str) -> str: if self.xdata.has_unknown_instruction_condition(): return "if ? then " + s @@ -330,6 +342,37 @@ def ast_condition_prov( expr = self.ast_condition(astree, iaddr, bytestring, xdata, reverse) return (expr, expr) + def ast_cc_condition_prov( + self, + astree: ASTInterface, + iaddr: str, + bytestring: str, + xdata: InstrXData + ) -> Tuple[Optional[AST.ASTExpr], Optional[AST.ASTExpr]]: + + ll_astcond = self.ast_cc_expr(astree) + + if xdata.has_instruction_condition(): + xd = ARMOpcodeXData(xdata) + if xd.has_valid_instruction_c_condition(): + pcond = xd.get_instruction_c_condition() + else: + pcond = xd.get_instruction_condition() + hl_astcond = XU.xxpr_to_ast_def_expr(pcond, xdata, iaddr, astree) + + astree.add_expr_mapping(hl_astcond, ll_astcond) + astree.add_expr_reachingdefs(hl_astcond, xdata.reachingdefs) + astree.add_flag_expr_reachingdefs(ll_astcond, xdata.flag_reachingdefs) + astree.add_condition_address(ll_astcond, [iaddr]) + + return (hl_astcond, ll_astcond) + + else: + chklogger.logger.error( + "No condition found at address %s", iaddr) + hl_astcond = astree.mk_temp_lval_expression() + return (hl_astcond, ll_astcond) + def ast_switch_condition_prov( self, astree: ASTInterface, From c6cb9c17a7d24f76b045e4f05f9262518d000ba0 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 7 Jul 2025 13:27:52 -0700 Subject: [PATCH 5/8] ARM: support for lifting of predicated instructions --- chb/arm/ARMInstruction.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/chb/arm/ARMInstruction.py b/chb/arm/ARMInstruction.py index 5ffa36af..5eadeb3c 100644 --- a/chb/arm/ARMInstruction.py +++ b/chb/arm/ARMInstruction.py @@ -98,6 +98,10 @@ def opcode(self) -> ARMOpcode: self._opcode = self.armdictionary.read_xml_arm_opcode(self.xnode) return self._opcode + def has_control_flow(self) -> bool: + brcc = self.xnode.get("brcc") + return brcc is not None + @property def xdata(self) -> InstrXData: if self._xdata is None: @@ -306,6 +310,20 @@ def ast_condition_prov( expr = astree.mk_integer_constant(0) return (expr, expr) + def ast_cc_condition_prov( + self, astree: ASTInterface + ) -> Tuple[Optional[ASTExpr], Optional[ASTExpr]]: + + try: + return self.opcode.ast_cc_condition_prov( + astree, self.iaddr, self.bytestring, self.xdata) + except Exception as e: + chklogger.logger.warning( + "Error in generating cc-condition at address %s: %s.", + self.iaddr, str(e)) + expr = astree.mk_temp_lval_expression() + return (expr, expr) + def ast_switch_condition_prov(self, astree: ASTInterface) -> Tuple[ Optional[ASTExpr], Optional[ASTExpr]]: From 4707cffe79792d4566cae1b790197cc81b29d243 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 7 Jul 2025 13:31:49 -0700 Subject: [PATCH 6/8] ARM: log error message for lifting indirect calls --- chb/arm/ARMCallOpcode.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/chb/arm/ARMCallOpcode.py b/chb/arm/ARMCallOpcode.py index ba52897c..142f7e16 100644 --- a/chb/arm/ARMCallOpcode.py +++ b/chb/arm/ARMCallOpcode.py @@ -286,6 +286,13 @@ def ast_call_prov( # construct high-level target finfo = xdata.function.finfo + + if finfo.has_call_target(iaddr): + calltarget = finfo.call_target(iaddr) + if calltarget.is_unknown: + chklogger.logger.error( + "BL: Indirect call not yet handled at address %s", iaddr) + if finfo.has_call_target_info(iaddr): ctinfo = finfo.call_target_info(iaddr) ftype = ctinfo.target_interface.bctype @@ -342,6 +349,10 @@ def ast_call_prov( str(hl_lhs), str(hl_tgt), iaddr) hl_lhs = None + else: + chklogger.logger.error( + "BL: Indirect call not yet handled at address %s", iaddr) + # argument data argcount = xd.argument_count From 419829f5d25804592a859fb5fdd7ea9a72d3a949 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 7 Jul 2025 13:32:55 -0700 Subject: [PATCH 7/8] ARM: support for predicate/ternary assignment --- chb/arm/opcodes/ARMMove.py | 254 ++++++++++++++++++++++++++++++++++++- 1 file changed, 251 insertions(+), 3 deletions(-) diff --git a/chb/arm/opcodes/ARMMove.py b/chb/arm/opcodes/ARMMove.py index 38e00bd7..2804feff 100644 --- a/chb/arm/opcodes/ARMMove.py +++ b/chb/arm/opcodes/ARMMove.py @@ -61,6 +61,33 @@ class ARMMoveXData(ARMOpcodeXData): - c expressions: 0: cresult + + Aggregates: + + - expressions with predicate / ternary assignments + + - nondet: + - predicate: none + - ternary: + 0: true assign (constant) + 1: false assign (constant) + + - predicate: + 0: p + 1: xp + + - ternary: + 0: p + 1: xp + 2: true assign (constant) + 3: false assign (constant) + + - c expressions with predicate /ternary assignments + + - nondet: none + + - predicate/ternary: + 0: p """ def __init__(self, xdata: InstrXData) -> None: @@ -70,6 +97,22 @@ def __init__(self, xdata: InstrXData) -> None: def vrd(self) -> "XVariable": return self.var(0, "vrd") + @property + def is_predicate_assign(self) -> bool: + return self.xdata.is_predicate_assignment + + @property + def is_nondet_predicate_assign(self) -> bool: + return self.xdata.is_nondet_predicate_assignment + + @property + def is_ternary_assign(self) -> bool: + return self.xdata.is_ternary_assignment + + @property + def is_nondet_ternary_assign(self) -> bool: + return self.xdata.is_nondet_ternary_assignment + @property def xrm(self) -> "XXpr": return self.xpr(0, "xrm") @@ -90,6 +133,37 @@ def cresult(self) -> "XXpr": def is_cresult_ok(self) -> bool: return self.is_cxpr_ok(0) + @property + def predicate(self) -> "XXpr": + # known to be valid + return self.xpr(0, "p") # known to be valid + + @property + def xpredicate(self) -> "XXpr": + return self.xpr(1, "xp") # known to be valid + + @property + def cpredicate(self) -> "XXpr": + return self.cxpr(0, "cxp") + + @property + def is_cpredicate_ok(self) -> bool: + return self.is_cxpr_ok(0) + + @property + def tern_assign_true(self) -> "XXpr": # known to be valid + if self.is_nondet_ternary_assign: + return self.xpr(1, "xp1") + else: + return self.xpr(3, "xp1") + + @property + def tern_assign_false(self) -> "XXpr": # known to be valid + if self.is_nondet_ternary_assign: + return self.xpr(0, "xp2") + else: + return self.xpr(2, "xp2") + def has_instruction_condition(self) -> bool: return self.xdata.has_instruction_condition() @@ -102,10 +176,37 @@ def has_valid_instruction_c_condition(self) -> bool: def get_instruction_c_condition(self) -> "XXpr": return self.xdata.get_instruction_c_condition() + def predicate_assignment_ann(self) -> str: + if self.is_nondet_predicate_assign: + rhs = "? (unknown predicate)" + else: + crhs = ( + " (C:" + + (str(self.cpredicate) if self.is_cpredicate_ok else "?") + + ")") + rhs = str(self.xpredicate) + crhs + return str(self.vrd) + " := " + rhs + + def ternary_assignment_ann(self) -> str: + rhs = str(self.tern_assign_true) + " : " + str(self.tern_assign_false) + if self.is_nondet_ternary_assign: + rhs = " ? " + rhs + else: + rhs = str(self.xpredicate) + " ? " + rhs + cpred = ( + " (C:" + + (str(self.cpredicate) if self.is_cpredicate_ok else "?") + + ")") + return str(self.vrd) + " := " + rhs + cpred + @property def annotation(self) -> str: if self.xdata.instruction_is_subsumed(): return "subsumed by " + self.xdata.subsumed_by() + if self.is_ternary_assign or self.is_nondet_ternary_assign: + return self.ternary_assignment_ann() + if self.is_predicate_assign or self.is_nondet_predicate_assign: + return self.predicate_assignment_ann() if self.xdata.instruction_subsumes(): return "subsumes " + ", ".join(self.xdata.subsumes()) cx = " (C: " + (str(self.cresult) if self.is_cresult_ok else "None") + ")" @@ -209,6 +310,141 @@ def ast_prov_subsumed( return ([], [ll_assign]) + def ast_prov_predicate_assign( + self, + astree: ASTInterface, + iaddr: str, + bytestring: str, + xdata: InstrXData + ) -> Tuple[List[AST.ASTInstruction], List[AST.ASTInstruction]]: + + annotations: List[str] = [iaddr, "MOV (predicate assign)"] + + # low-level assignment + + (ll_lhs, _, _) = self.opargs[0].ast_lvalue(astree) + (ll_rhs, _, _) = self.opargs[1].ast_rvalue(astree) + + ll_assign = astree.mk_assign( + ll_lhs, + ll_rhs, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + # high-level assignment + + xd = ARMMoveXData(xdata) + + if xd.is_nondet_predicate_assign: + chklogger.logger.warning( + "Predicate assignment without associated predicate at " + + "address %s", iaddr) + return ([], [ll_assign]) + + rhs = xd.cpredicate if xd.is_cpredicate_ok else xd.xpredicate + lhs = xd.vrd + rdefs = xdata.reachingdefs + defuses = xdata.defuses + defuseshigh = xdata.defuseshigh + + hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree, rhs=rhs) + hl_rhs = XU.xxpr_to_ast_def_expr(rhs, xdata, iaddr, astree) + + hl_assign = astree.mk_assign( + hl_lhs, + hl_rhs, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + astree.add_instr_mapping(hl_assign, ll_assign) + astree.add_instr_address(hl_assign, [iaddr]) + astree.add_expr_mapping(hl_rhs, ll_rhs) + astree.add_lval_mapping(hl_lhs, ll_lhs) + astree.add_expr_reachingdefs(hl_rhs, rdefs[1:]) + astree.add_expr_reachingdefs(ll_rhs, [rdefs[0]]) + astree.add_lval_defuses(hl_lhs, defuses[0]) + astree.add_lval_defuses_high(hl_lhs, defuseshigh[0]) + + if astree.has_register_variable_intro(iaddr): + rvintro = astree.get_register_variable_intro(iaddr) + if rvintro.has_cast(): + astree.add_expose_instruction(hl_assign.instrid) + + return ([hl_assign], [ll_assign]) + + + def ast_prov_ternary_assign( + self, + astree: ASTInterface, + iaddr: str, + bytestring: str, + xdata: InstrXData + ) -> Tuple[List[AST.ASTInstruction], List[AST.ASTInstruction]]: + + annotations: List[str] = [iaddr, "MOV (ternary assign)"] + + # low-level assignment + + (ll_lhs, _, _) = self.opargs[0].ast_lvalue(astree) + (ll_rhs, _, _) = self.opargs[1].ast_rvalue(astree) + + ll_assign = astree.mk_assign( + ll_lhs, + ll_rhs, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + # high-level assignment + + xd = ARMMoveXData(xdata) + + if xd.is_nondet_ternary_assign: + chklogger.logger.warning( + "Ternary assignment without associated predicate at address %s", + iaddr) + return ([], [ll_assign]) + + rhsp = xd.cpredicate if xd.is_cpredicate_ok else xd.xpredicate + rhs1 = xd.tern_assign_true + rhs2 = xd.tern_assign_false + lhs = xd.vrd + rdefs = xdata.reachingdefs + defuses = xdata.defuses + defuseshigh = xdata.defuseshigh + + hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) + hl_rhsp = XU.xxpr_to_ast_def_expr(rhsp, xdata, iaddr, astree) + hl_rhs1 = XU.xxpr_to_ast_def_expr(rhs1, xdata, iaddr, astree) + hl_rhs2 = XU.xxpr_to_ast_def_expr(rhs2, xdata, iaddr, astree) + hl_rhs = astree.mk_question(hl_rhsp, hl_rhs1, hl_rhs2) + + hl_assign = astree.mk_assign( + hl_lhs, + hl_rhs, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + astree.add_instr_mapping(hl_assign, ll_assign) + astree.add_instr_address(hl_assign, [iaddr]) + astree.add_expr_mapping(hl_rhs, ll_rhs) + astree.add_lval_mapping(hl_lhs, ll_lhs) + astree.add_expr_reachingdefs(hl_rhs, rdefs[1:]) + astree.add_expr_reachingdefs(ll_rhs, [rdefs[0]]) + astree.add_lval_defuses(hl_lhs, defuses[0]) + astree.add_lval_defuses_high(hl_lhs, defuseshigh[0]) + + if astree.has_register_variable_intro(iaddr): + rvintro = astree.get_register_variable_intro(iaddr) + if rvintro.has_cast(): + astree.add_expose_instruction(hl_assign.instrid) + + return ([hl_assign], [ll_assign]) + + def ast_prov( self, astree: ASTInterface, @@ -232,10 +468,22 @@ def ast_prov( if xdata.instruction_is_subsumed(): return self.ast_prov_subsumed(astree, iaddr, bytestring, xdata) + xd = ARMMoveXData(xdata) + if xdata.instruction_subsumes(): - chklogger.logger.warning( - "MOV instruction at %s is part of an aggregate that is not yet supported", - iaddr) + if xd.is_predicate_assign or xd.is_nondet_predicate_assign: + return self.ast_prov_predicate_assign( + astree, iaddr, bytestring, xdata) + + elif xd.is_ternary_assign or xd.is_nondet_ternary_assign: + return self.ast_prov_ternary_assign( + astree, iaddr, bytestring, xdata) + + else: + chklogger.logger.warning( + "MOV instruction at %s is part of an aggregate that is " + + "not yet supported", + iaddr) return ([], []) # low-level assignment From 286bc46970542445830381ccd2ffe8e770ada6d3 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 7 Jul 2025 13:36:36 -0700 Subject: [PATCH 8/8] ARM: updates for result types / c expressions --- chb/arm/opcodes/ARMBitwiseAnd.py | 7 +- chb/arm/opcodes/ARMBranchExchange.py | 12 --- .../opcodes/ARMByteReversePackedHalfword.py | 43 +++++------ chb/arm/opcodes/ARMByteReverseWord.py | 38 ++++------ .../opcodes/ARMLoadMultipleIncrementAfter.py | 2 +- chb/arm/opcodes/ARMLoadRegister.py | 8 ++ chb/arm/opcodes/ARMLogicalShiftLeft.py | 76 ++++++++++++++----- chb/arm/opcodes/ARMLogicalShiftRight.py | 76 ++++++++++++++----- chb/arm/opcodes/ARMPop.py | 6 -- 9 files changed, 165 insertions(+), 103 deletions(-) diff --git a/chb/arm/opcodes/ARMBitwiseAnd.py b/chb/arm/opcodes/ARMBitwiseAnd.py index 7f07506c..000ae69c 100644 --- a/chb/arm/opcodes/ARMBitwiseAnd.py +++ b/chb/arm/opcodes/ARMBitwiseAnd.py @@ -160,10 +160,15 @@ def operands(self) -> List[ARMOperand]: def opargs(self) -> List[ARMOperand]: return [self.armd.arm_operand(i) for i in self.args[1:-1]] + @property + def writeback(self) -> bool: + return self.args[0] == 1 + def mnemonic_extension(self) -> str: + wb = "S" if self.writeback else "" cc = ARMOpcode.mnemonic_extension(self) wide = ".W" if self.args[4] == 1 else "" - return cc + wide + return wb + cc + wide def annotation(self, xdata: InstrXData) -> str: xd = ARMBitwiseAndXData(xdata) diff --git a/chb/arm/opcodes/ARMBranchExchange.py b/chb/arm/opcodes/ARMBranchExchange.py index b21e35b0..887cbd74 100644 --- a/chb/arm/opcodes/ARMBranchExchange.py +++ b/chb/arm/opcodes/ARMBranchExchange.py @@ -89,18 +89,6 @@ def has_creturnval(self) -> bool: def creturnval(self) -> "XXpr": return self.xdata.get_return_cxpr() - def has_instruction_condition(self) -> bool: - return self.xdata.has_instruction_condition() - - def get_instruction_condition(self) -> "XXpr": - return self.xdata.get_instruction_condition() - - def has_valid_instruction_c_condition(self) -> bool: - return self.xdata.has_valid_instruction_c_condition() - - def get_instruction_c_condition(self) -> "XXpr": - return self.xdata.get_instruction_c_condition() - @property def annotation(self) -> str: if self.xdata.is_bx_call: diff --git a/chb/arm/opcodes/ARMByteReversePackedHalfword.py b/chb/arm/opcodes/ARMByteReversePackedHalfword.py index de412416..3a5b10be 100644 --- a/chb/arm/opcodes/ARMByteReversePackedHalfword.py +++ b/chb/arm/opcodes/ARMByteReversePackedHalfword.py @@ -38,9 +38,11 @@ from chb.astinterface.ASTInterface import ASTInterface import chb.invariants.XXprUtil as XU -import chb.util.fileutil as UF +import chb.util.fileutil as UF from chb.util.IndexedTable import IndexedTableValue +from chb.util.loggingutil import chklogger + if TYPE_CHECKING: from chb.arm.ARMDictionary import ARMDictionary @@ -155,11 +157,7 @@ def ast_prov( annotations: List[str] = [iaddr, "REV16"] - lhs = xdata.vars[0] - rhs = xdata.xprs[0] - rdefs = xdata.reachingdefs - defuses = xdata.defuses - defuseshigh = xdata.defuseshigh + # low-level assignment (ll_lhs, _, _) = self.opargs[0].ast_lvalue(astree) (ll_rhs, _, _) = self.opargs[1].ast_rvalue(astree) @@ -171,30 +169,23 @@ def ast_prov( iaddr=iaddr, bytestring=bytestring) - lhsasts = XU.xvariable_to_ast_lvals(lhs, xdata, astree) - if len(lhsasts) == 0: - raise UF.CHBError( - "ByteReversePatckedHalfword (REV16): no lval found") + # high-level assignment - if len(lhsasts) > 1: - raise UF.CHBError( - "ByteReversePackedHalfword (REV16): multiple lvals in ast: " - + ", ".join(str(v) for v in lhsasts)) - - hl_lhs = lhsasts[0] + xd = ARMByteReversePackedHalfwordXData(xdata) + if not xd.is_ok: + chklogger.logger.error( + "Encountered error value at address %s", iaddr) + return ([], []) - rhsasts = XU.xxpr_to_ast_def_exprs(rhs, xdata, iaddr, astree) - if len(rhsasts) == 0: - raise UF.CHBError( - "ByteReversePackedHalfword (REV16): no argument value found") + lhs = xd.vrd + rhs = xd.xxrm - if len(rhsasts) > 1: - raise UF.CHBError( - "ByteReversePackedHalfword (REV16): " - + "multiple argument values in asts: " - + ", ".join(str(x) for x in rhsasts)) + rdefs = xdata.reachingdefs + defuses = xdata.defuses + defuseshigh = xdata.defuseshigh - hl_rhs = rhsasts[0] + hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) + hl_rhs = XU.xxpr_to_ast_def_expr(rhs, xdata, iaddr, astree) if astree.has_variable_intro(iaddr): vname = astree.get_variable_intro(iaddr) diff --git a/chb/arm/opcodes/ARMByteReverseWord.py b/chb/arm/opcodes/ARMByteReverseWord.py index 595dd878..1d6b2e36 100644 --- a/chb/arm/opcodes/ARMByteReverseWord.py +++ b/chb/arm/opcodes/ARMByteReverseWord.py @@ -39,8 +39,9 @@ import chb.invariants.XXprUtil as XU import chb.util.fileutil as UF - from chb.util.IndexedTable import IndexedTableValue +from chb.util.loggingutil import chklogger + if TYPE_CHECKING: from chb.arm.ARMDictionary import ARMDictionary @@ -157,11 +158,7 @@ def ast_prov( annotations: List[str] = [iaddr, "REV"] - lhs = xdata.vars[0] - rhs = xdata.xprs[0] - rdefs = xdata.reachingdefs - defuses = xdata.defuses - defuseshigh = xdata.defuseshigh + # low-level assignment (ll_lhs, _, _) = self.opargs[0].ast_lvalue(astree) (ll_rhs, _, _) = self.opargs[1].ast_rvalue(astree) @@ -173,27 +170,22 @@ def ast_prov( iaddr=iaddr, bytestring=bytestring) - lhsasts = XU.xvariable_to_ast_lvals(lhs, xdata, astree) - if len(lhsasts) == 0: - raise UF.CHBError("ByteReverseWord (REV): no lval found") + # high-level assignment - if len(lhsasts) > 1: - raise UF.CHBError( - "ByteReverseWord (REV): multiple lvals in ast: " - + ", ".join(str(v) for v in lhsasts)) - - hl_lhs = lhsasts[0] + xd = ARMByteReverseWordXData(xdata) + if not xd.is_ok: + chklogger.logger.error( + "REV: Encountered error value at address %s", iaddr) - rhsasts = XU.xxpr_to_ast_def_exprs(rhs, xdata, iaddr, astree) - if len(rhsasts) == 0: - raise UF.CHBError("ByteReverseWord (REV): no argument value found") + lhs = xd.vrd + rhs = xd.xxrn - if len(rhsasts) > 1: - raise UF.CHBError( - "ByteReverseWord (REV): multiple argument values in asts: " - + ", ".join(str(x) for x in rhsasts)) + rdefs = xdata.reachingdefs + defuses = xdata.defuses + defuseshigh = xdata.defuseshigh - hl_rhs = rhsasts[0] + hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) + hl_rhs = XU.xxpr_to_ast_def_expr(rhs, xdata, iaddr, astree) if astree.has_variable_intro(iaddr): vname = astree.get_variable_intro(iaddr) diff --git a/chb/arm/opcodes/ARMLoadMultipleIncrementAfter.py b/chb/arm/opcodes/ARMLoadMultipleIncrementAfter.py index 5300c7f5..92bde135 100644 --- a/chb/arm/opcodes/ARMLoadMultipleIncrementAfter.py +++ b/chb/arm/opcodes/ARMLoadMultipleIncrementAfter.py @@ -193,7 +193,7 @@ def annotation(self) -> str: else: pairs = [] if len(pairs) > 0: - assigns = "; ".join(str(v) + " := " + str(x) for (x, v) in pairs) + assigns = "; ".join(str(v) + " := " + str(x) for (v, x) in pairs) else: assigns = "unknown rhs memory" wbu = self.writeback_update() diff --git a/chb/arm/opcodes/ARMLoadRegister.py b/chb/arm/opcodes/ARMLoadRegister.py index 187160d4..8c979e96 100644 --- a/chb/arm/opcodes/ARMLoadRegister.py +++ b/chb/arm/opcodes/ARMLoadRegister.py @@ -152,6 +152,8 @@ def annotation(self) -> str: str(self.vrt) + " := *(" + str(self.xaddr) + ")" + cx + caddr) else: assignment = "Error value" + pcwarning = "**indirect call** " if str(self.vrt) == "PC" else "" + assignment = pcwarning + assignment return self.add_instruction_condition(assignment) + wbu @@ -251,6 +253,12 @@ def has_cast() -> bool: lhs = xd.vrt + if str(lhs) == "PC": + chklogger.logger.error( + "LDR: Indirect call via Load to PC not yet handled at %s", + iaddr) + return ([], (ll_pre + [ll_assign] + ll_post)) + if xd.is_ok: rhs = xd.cxrmem rhsval = None if has_cast() else xd.cxrmem diff --git a/chb/arm/opcodes/ARMLogicalShiftLeft.py b/chb/arm/opcodes/ARMLogicalShiftLeft.py index ea79560a..13248b00 100644 --- a/chb/arm/opcodes/ARMLogicalShiftLeft.py +++ b/chb/arm/opcodes/ARMLogicalShiftLeft.py @@ -49,6 +49,19 @@ class ARMLogicalShiftLeftXData(ARMOpcodeXData): + """Data format: + - variables: + 0: vrd + + - expressions: + 0: xrn + 1: xrm + 2: result + 3: rresult (result rewritten) + + - c expresions: + 0: cresult + """ def __init__(self, xdata: InstrXData) -> None: ARMOpcodeXData.__init__(self, xdata) @@ -69,18 +82,41 @@ def xrm(self) -> "XXpr": def result(self) -> "XXpr": return self.xpr(2, "result") + @property + def is_result_ok(self) -> bool: + return self.is_xpr_ok(2) + @property def rresult(self) -> "XXpr": return self.xpr(3, "rresult") + @property + def is_rresult_ok(self) -> bool: + return self.is_xpr_ok(3) + + @property + def cresult(self) -> "XXpr": + return self.cxpr(0, "cresult") + + @property + def is_cresult_ok(self) -> bool: + return self.is_cxpr_ok(0) + @property def result_simplified(self) -> str: - return simplify_result( - self.xdata.args[3], self.xdata.args[4], self.result, self.rresult) + if self.is_result_ok and self.is_rresult_ok: + return simplify_result( + self.xdata.args[3], self.xdata.args[4], self.result, self.rresult) + else: + return str(self.xrn) + " << " + str(self.xrm) @property def annotation(self) -> str: - assignment = str(self.vrd) + " := " + self.result_simplified + cresult = ( + " (C: " + + (str(self.cresult) if self.is_cresult_ok else "None") + + ")") + assignment = str(self.vrd) + " := " + self.result_simplified + cresult return self.add_instruction_condition(assignment) @@ -156,10 +192,7 @@ def lsl_xdata(self, xdata: InstrXData) -> ARMLogicalShiftLeftXData: def annotation(self, xdata: InstrXData) -> str: xd = ARMLogicalShiftLeftXData(xdata) - if xd.is_ok: - return xd.annotation - else: - return "Error value" + return xd.annotation def ast_prov( self, @@ -185,24 +218,35 @@ def ast_prov( bytestring=bytestring, annotations=annotations) + rdefs = xdata.reachingdefs + + astree.add_expr_reachingdefs(ll_rhs1, [rdefs[0]]) + astree.add_expr_reachingdefs(ll_rhs2, [rdefs[1]]) + # high-level assignment xd = ARMLogicalShiftLeftXData(xdata) - if not xd.is_ok: + + if xd.is_cresult_ok and xd.is_rresult_ok: + rhs = xd.cresult + + elif xd.is_rresult_ok: + rhs = xd.rresult + + elif xd.is_result_ok: + rhs = xd.result + + else: chklogger.logger.error( - "Encountered error value at address %s", iaddr) - return ([], []) + "LSL: Encountered error value for rhs address %s", iaddr) + return ([], [ll_assign]) lhs = xd.vrd - rhs1 = xd.xrn - rhs2 = xd.xrm - rresult = xd.rresult - rdefs = xdata.reachingdefs defuses = xdata.defuses defuseshigh = xdata.defuseshigh hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) - hl_rhs = XU.xxpr_to_ast_def_expr(rresult, xdata, iaddr, astree) + hl_rhs = XU.xxpr_to_ast_def_expr(rhs, xdata, iaddr, astree) hl_assign = astree.mk_assign( hl_lhs, @@ -216,8 +260,6 @@ def ast_prov( astree.add_expr_mapping(hl_rhs, ll_rhs) astree.add_lval_mapping(hl_lhs, ll_lhs) astree.add_expr_reachingdefs(ll_rhs, [rdefs[0], rdefs[1]]) - astree.add_expr_reachingdefs(ll_rhs1, [rdefs[0]]) - astree.add_expr_reachingdefs(ll_rhs2, [rdefs[1]]) astree.add_expr_reachingdefs(hl_rhs, rdefs[2:]) astree.add_lval_defuses(hl_lhs, defuses[0]) astree.add_lval_defuses_high(hl_lhs, defuseshigh[0]) diff --git a/chb/arm/opcodes/ARMLogicalShiftRight.py b/chb/arm/opcodes/ARMLogicalShiftRight.py index c59a8b5f..c1868f91 100644 --- a/chb/arm/opcodes/ARMLogicalShiftRight.py +++ b/chb/arm/opcodes/ARMLogicalShiftRight.py @@ -49,6 +49,19 @@ class ARMLogicalShiftRightXData(ARMOpcodeXData): + """Data format: + - variables: + 0: vrd + + - expressions: + 0: xrn + 1: xrm + 2: result + 3: rresult (result rewritten) + + - c expresions: + 0: cresult + """ def __init__(self, xdata: InstrXData) -> None: ARMOpcodeXData.__init__(self, xdata) @@ -69,18 +82,41 @@ def xrm(self) -> "XXpr": def result(self) -> "XXpr": return self.xpr(2, "result") + @property + def is_result_ok(self) -> bool: + return self.is_xpr_ok(2) + @property def rresult(self) -> "XXpr": return self.xpr(3, "rresult") + @property + def is_rresult_ok(self) -> bool: + return self.is_xpr_ok(3) + + @property + def cresult(self) -> "XXpr": + return self.cxpr(0, "cresult") + + @property + def is_cresult_ok(self) -> bool: + return self.is_cxpr_ok(0) + @property def result_simplified(self) -> str: - return simplify_result( - self.xdata.args[3], self.xdata.args[4], self.result, self.rresult) + if self.is_result_ok and self.is_rresult_ok: + return simplify_result( + self.xdata.args[3], self.xdata.args[4], self.result, self.rresult) + else: + return str(self.xrn) + " >> " + str(self.xrm) @property def annotation(self) -> str: - assignment = str(self.vrd) + " := " + self.result_simplified + cresult = ( + " (C: " + + (str(self.cresult) if self.is_cresult_ok else "None") + + ")") + assignment = str(self.vrd) + " := " + self.result_simplified + cresult return self.add_instruction_condition(assignment) @@ -145,10 +181,7 @@ def is_writeback(self) -> bool: def annotation(self, xdata: InstrXData) -> str: xd = ARMLogicalShiftRightXData(xdata) - if xd.is_ok: - return xd.annotation - else: - return "Error Value" + return xd.annotation def ast_prov( self, @@ -174,24 +207,35 @@ def ast_prov( bytestring=bytestring, annotations=annotations) + rdefs = xdata.reachingdefs + + astree.add_expr_reachingdefs(ll_rhs1, [rdefs[0]]) + astree.add_expr_reachingdefs(ll_rhs2, [rdefs[1]]) + # high-level assignment xd = ARMLogicalShiftRightXData(xdata) - if not xd.is_ok: + + if xd.is_cresult_ok and xd.is_rresult_ok: + rhs = xd.cresult + + elif xd.is_rresult_ok: + rhs = xd.rresult + + elif xd.is_result_ok: + rhs = xd.result + + else: chklogger.logger.error( - "Encountered error value at address %s", iaddr) - return ([], []) + "LSR: Encountered error value for rhs address %s", iaddr) + return ([], [ll_assign]) lhs = xd.vrd - rhs1 = xd.xrn - rhs2 = xd.xrm - rresult = xd.rresult - rdefs = xdata.reachingdefs defuses = xdata.defuses defuseshigh = xdata.defuseshigh hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) - hl_rhs = XU.xxpr_to_ast_def_expr(rresult, xdata, iaddr, astree) + hl_rhs = XU.xxpr_to_ast_def_expr(rhs, xdata, iaddr, astree) hl_assign = astree.mk_assign( hl_lhs, @@ -205,8 +249,6 @@ def ast_prov( astree.add_expr_mapping(hl_rhs, ll_rhs) astree.add_lval_mapping(hl_lhs, ll_lhs) astree.add_expr_reachingdefs(ll_rhs, [rdefs[0], rdefs[1]]) - astree.add_expr_reachingdefs(ll_rhs1, [rdefs[0]]) - astree.add_expr_reachingdefs(ll_rhs2, [rdefs[1]]) astree.add_expr_reachingdefs(hl_rhs, rdefs[2:]) astree.add_lval_defuses(hl_lhs, defuses[0]) astree.add_lval_defuses_high(hl_lhs, defuseshigh[0]) diff --git a/chb/arm/opcodes/ARMPop.py b/chb/arm/opcodes/ARMPop.py index 2786a7f8..32df70ca 100644 --- a/chb/arm/opcodes/ARMPop.py +++ b/chb/arm/opcodes/ARMPop.py @@ -228,12 +228,6 @@ def ast_condition_prov( if xdata.has_instruction_condition(): pcond = xdata.get_instruction_condition() - ''' - if reverse: - pcond = xdata.xprs[(2 * len(xdata.vars)) + 3] - else: - pcond = xdata.xprs[(2 * len(xdata.vars)) + 2] - ''' hl_astcond = XU.xxpr_to_ast_def_expr(pcond, xdata, iaddr, astree) astree.add_expr_mapping(hl_astcond, ll_astcond)