Skip to content

SWE-agent[bot] PR to fix: PyEval._invoke silently skips IndexError in certain pipeline#20

Open
github-actions[bot] wants to merge 7 commits into
masterfrom
swe-agent-fix-#5-94260816
Open

SWE-agent[bot] PR to fix: PyEval._invoke silently skips IndexError in certain pipeline#20
github-actions[bot] wants to merge 7 commits into
masterfrom
swe-agent-fix-#5-94260816

Conversation

@github-actions

@github-actions github-actions Bot commented Mar 6, 2026

Copy link
Copy Markdown

This is a PR opened by AI tool SWE Agent to close #5 (PyEval._invoke silently skips IndexError in certain pipeline).

Closes #5.

Thought process ('trajectory') of SWE-agent (click to expand)

🧑‍🚒 Response (0):

👀‍ Observation (0):

Dockerfile  Pipfile.lock  codecov.yml  quark		      tests
LICENSE     README.md	  debian       quark_engine.egg-info
Pipfile     bandit.yml	  docs	       setup.py

🧑‍🚒 Response (1):

👀‍ Observation (1):

Dockerfile  Pipfile.lock  codecov.yml  quark		      tests
LICENSE     README.md	  debian       quark_engine.egg-info
Pipfile     bandit.yml	  docs	       setup.py

🧑‍🚒 Response (2):

👀‍ Observation (2):

Here's the result of running `cat -n` on /quark-engine/quark/evaluator/pyeval.py:
     1	# -*- coding: utf-8 -*-
     2	# This file is part of Quark-Engine - https://github.com/quark-engine/quark-engine
     3	# See the file 'LICENSE' for copying permission.
     4	# Thanks for the description of Dalvik bytecode instruction from the following
     5	# websites, some of our explanations or comments will quote from it.
     6	# https://source.android.google.cn/devices/tech/dalvik/instruction-formats
     7	# http://pallergabor.uw.hu/androidblog/dalvik_opcodes.html
     8	
     9	import logging
    10	from quark import config
    11	from quark.core.struct.registerobject import RegisterObject
    12	from quark.core.struct.tableobject import TableObject
    13	from quark.core.struct.valuenode import (
    14	    Primitive, MethodCall, BytecodeOps
    15	)
    16	from quark.utils.logger import defaultHandler
    17	
    18	log = logging.getLogger(__name__)
    19	log.setLevel(logging.DEBUG)
    20	log.addHandler(defaultHandler)
    21	log.disabled = not config.DEBUG
    22	
    23	
    24	def logger(func):
    25	    def warp(*args, **kwargs):
    26	        log.info(f"{func.__name__} with args-> {args}")
    27	
    28	        func(*args, **kwargs)
    29	
    30	    return warp
    31	
    32	
    33	class PyEval:
    34	    def __init__(self, apkinfo):
    35	        # Main switch for executing the bytecode instruction.
    36	        self.eval = {
    37	            # invoke-kind
    38	            "invoke-virtual": self.INVOKE_VIRTUAL,
    39	            "invoke-virtual/range": self.INVOKE_VIRTUAL,
    40	            "invoke-direct": self.INVOKE_DIRECT,
    41	            "invoke-direct/range": self.INVOKE_DIRECT,
    42	            "invoke-static": self.INVOKE_STATIC,
    43	            "invoke-static/range": self.INVOKE_STATIC,
    44	            "invoke-interface": self.INVOKE_INTERFACE,
    45	            "invoke-interface/range": self.INVOKE_INTERFACE,
    46	            "invoke-super": self.INVOKE_SUPER,
    47	            "invoke-super/range": self.INVOKE_SUPER,
    48	            "invoke-polymorphic": self.INVOKE_POLYMORPHIC,
    49	            "invoke-polymorphic/range": self.INVOKE_POLYMORPHIC,
    50	            "invoke-custom": self.INVOKE_CUSTOM,
    51	            "invoke-custom/range": self.INVOKE_CUSTOM,
    52	            # move-result-kind
    53	            "move-result": self.MOVE_RESULT,
    54	            "move-result-wide": self.MOVE_RESULT_WIDE,
    55	            "move-result-object": self.MOVE_RESULT_OBJECT,
    56	            # instance-kind
    57	            "new-instance": self.NEW_INSTANCE,
    58	            "new-array": self.NEW_ARRAY,
    59	            # const-kind
    60	            "const-string": self.CONST_STRING,
    61	            "const-string/jumbo": self.CONST_STRING,
    62	            "const-class": self.CONST_CLASS,
    63	            "const": self.CONST,
    64	            "const/4": self.CONST_FOUR,
    65	            "const/16": self.CONST_SIXTEEN,
    66	            "const/high16": self.CONST_HIGHSIXTEEN,
    67	            "const-wide": self.CONST_WIDE,
    68	            "const-wide/16": self.CONST_WIDE_SIXTEEN,
    69	            "const-wide/32": self.CONST_WIDE_THIRTY_TWO,
    70	            "const-wide/high16": self.CONST_WIDE_HIGHSIXTEEN,
    71	        }
    72	
    73	        # move-kind
    74	        for prefix in ("move", "move-object", "move-wide"):
    75	            for postfix in ("", "/from16", "/16"):
    76	                self.eval[f"{prefix}{postfix}"] = self.MOVE_KIND
    77	        self.eval["array-length"] = self.MOVE_KIND
    78	
    79	        # filled-array-kind
    80	        for ins in ("filled-new-array", "filled-new-array/range"):
    81	            self.eval[ins] = self.FILLED_NEW_ARRAY_KIND
    82	
    83	        # aget-kind
    84	        for postfix in ("", "-object", "-boolean", "-byte", "-char", "-short"):
    85	            self.eval[f"aget{postfix}"] = self.AGET_KIND
    86	            self.eval["aget-wide"] = self.AGET_WIDE_KIND
    87	
    88	        # aput-kind
    89	        for postfix in ("", "-object", "-boolean", "-byte", "-char", "-short"):
    90	            self.eval[f"aput{postfix}"] = self.APUT_KIND
    91	            self.eval["aput-wide"] = self.APUT_WIDE_KIND
    92	
    93	        # neg-kind and not-kind
    94	        for prefix in ("neg", "not"):
    95	            self.eval[f"{prefix}-int"] = self.NEG_AND_NOT_KIND
    96	            self.eval[f"{prefix}-long"] = self.NEG_AND_NOT_KIND
    97	            self.eval[f"{prefix}-float"] = self.NEG_AND_NOT_KIND
    98	            self.eval[f"{prefix}-double"] = self.NEG_AND_NOT_KIND
    99	
   100	        # type casting
   101	        for first_type in ("int", "long", "float", "double"):
   102	            for second_type in ("int", "long", "float", "double"):
   103	                if first_type == second_type:
   104	                    continue
   105	                self.eval[f"{first_type}-{second_type}"] = self.CAST_TYPE
   106	
   107	        # binop_kind
   108	        for prefix in (
   109	            "add",
   110	            "sub",
   111	            "mul",
   112	            "div",
   113	            "rem",
   114	            "and",
   115	            "or",
   116	            "xor",
   117	            "shl",
   118	            "shr",
   119	            "ushr",
   120	        ):
   121	            for _type in ("int", "float", "double", "long"):
   122	                for postfix in ("", "/2addr", "/lit16", "/lit8"):
   123	                    self.eval[f"{prefix}-{_type}{postfix}"] = self.BINOP_KIND
   124	
   125	        self.eval["move-exception"] = lambda ins: self._assign_value(
   126	            (ins[0], ins[1], "Exception"), value_type="Ljava/lang/Throwable;"
   127	        )
   128	        self.eval[
   129	            "fill-array-data"
   130	        ] = lambda ins: self._move_value_and_data_to_register(
   131	            (ins[0], ins[1], ins[1], ins[2]), "Embedded-array-data()[]"
   132	        )
   133	
   134	        self.type_mapping = {
   135	            "boolean": "Z",
   136	            "byte": "B",
   137	            "char": "C",
   138	            "short": "S",
   139	            "int": "I",
   140	            "long": "J",
   141	            "float": "F",
   142	            "double": "D",
   143	        }
   144	
   145	        self.table_obj = TableObject()
   146	        self.ret_stack = []
   147	        self.ret_type = ""
   148	        self.apkinfo = apkinfo
   149	
   150	    def _invoke(self, instruction, look_up=False, skip_self=False):
   151	        """
   152	        Function call in Android smali code. It will check if the corresponding
   153	        table field has a value, if it does,
   154	        inserts its own function name into called_by_func column.
   155	        """
   156	
   157	        opcode, *regList, targetMethod = instruction
   158	        regIdxList = [int(r[1:]) for r in regList]
   159	
   160	        if look_up and len(regIdxList) > 0:
   161	            try:
   162	                targetMethod = self._lookup_implement(
   163	                    self.table_obj.getLatestRegValue(
   164	                        regIdxList[0]
   165	                    ).current_type,
   166	                    instruction[-1],
   167	                    skip_self=skip_self,
   168	                )
   169	            except ValueError as e:
   170	                log.exception(e)
   171	            except IndexError:
   172	                pass
   173	
   174	        valueOfRegList = []
   175	        # query the value from hash table based on register index.
   176	        for index in regIdxList:
   177	            if not self.table_obj.getRegValues(index):
   178	                # Insert a RegisterObject if one is missing.
   179	                # Therefore, we can trace the usage of this register.
   180	                self.table_obj.insert(
   181	                    index, RegisterObject(Primitive("", None))
   182	                )
   183	
   184	            value = self.table_obj.getLatestRegValue(index)
   185	            valueOfRegList.append(value.value)
   186	
   187	        # Check whether any argument is missing a value type.
   188	        argIdxWithoutType = [
   189	            idx
   190	            for idx, arg in enumerate(valueOfRegList)
   191	            if isinstance(arg, Primitive) and arg.value_type == ""
   192	        ]
   193	        if len(argIdxWithoutType) > 0:
   194	            # Set the missing value types based on the method's descriptor.
   195	            argTypes = (
   196	                []
   197	                if opcode.startswith("invoke-static")
   198	                else [targetMethod[: targetMethod.find("->")]]
   199	            )
   200	
   201	            rawArgTypes = targetMethod[
   202	                targetMethod.find("(") + 1 : targetMethod.find(")")
   203	            ].split(" ")
   204	            
   205	            for argType in rawArgTypes:
   206	                argTypes.append(argType)
   207	                if argType in ["J", "D"]:
   208	                    # Put long and double twice
   209	                    # because these types take up two registers.
   210	                    argTypes.append(argType)
   211	
   212	            for argIdx in argIdxWithoutType:
   213	                valueOfRegList[argIdx].value_type = argTypes[argIdx]
   214	
   215	        methodCall = MethodCall(targetMethod, tuple(valueOfRegList))
   216	
   217	        # insert the function and the parameter into called_by_func
   218	        for index in regIdxList:
   219	            # add the function name into each parameter table
   220	            value = self.table_obj.getLatestRegValue(index)
   221	            value.called_by_func = methodCall
   222	
   223	            if (
   224	                value.bears_object()
   225	                and value.current_type != "Ljava/lang/String;"
   226	            ):
   227	                # If the register bears an object, update its value to reflect
   228	                # the method invocation since the method may modify the
   229	                # internal state of the object.
   230	                value.value = methodCall
   231	
   232	        if not targetMethod.endswith(")V"):
   233	            # push the return value into ret_stack
   234	            self.ret_stack.append(methodCall)
   235	
   236	            # Extract the type of return value
   237	            self.ret_type = targetMethod[targetMethod.index(")") + 1 :]
   238	
   239	    def _move_result(self, instruction):
   240	
   241	        reg = instruction[1]
   242	        index = int(reg[1:])
   243	        try:
   244	            pre_ret = self.ret_stack.pop()
   245	            variable_object = RegisterObject(
   246	                value=pre_ret, value_type=self.ret_type
   247	            )
   248	            self.table_obj.insert(index, variable_object)
   249	            self.ret_type = ""
   250	        except IndexError as e:
   251	
   252	            log.exception(f"{e} in _move_result")
   253	
   254	    def _move_object(self, src_reg_idx: int, dest_reg_idx: int):
   255	        """
   256	        Move object from src_reg_idx to dest_reg_idx without creating new
   257	        RegisterObject. This allow both registers to point to the same object.
   258	        """
   259	        # Get the source object from the table
   260	        src_obj = self.table_obj.getLatestRegValue(src_reg_idx)

🧑‍🚒 Response (3):

👀‍ Observation (3):

Found 2 matches for "FILLED_NEW_ARRAY_KIND" in /quark-engine/quark/evaluator/pyeval.py:
Line 81:            self.eval[ins] = self.FILLED_NEW_ARRAY_KIND
Line 565:    def FILLED_NEW_ARRAY_KIND(self, instruction):
End of matches for "FILLED_NEW_ARRAY_KIND" in /quark-engine/quark/evaluator/pyeval.py

🧑‍🚒 Response (4):

👀‍ Observation (4):

Here's the result of running `cat -n` on /quark-engine/quark/evaluator/pyeval.py:
   520	    def AGET_KIND(self, instruction):
   521	        """
   522	        aget-kind vx,vy,vz
   523	
   524	        Gets an object reference value of an object reference array into vx. The array is referenced by vy and is
   525	        indexed by vz.
   526	
   527	        It means vx = vy[vz].
   528	        """
   529	
   530	        try:
   531	            if "-" in instruction[0] and "object" not in instruction[0]:
   532	                index = instruction[0].index("-") + 1
   533	                value_type = self.type_mapping[instruction[0][index:]]
   534	            else:
   535	                array_reg_index = int(instruction[2][1:])
   536	                value_type = self.table_obj.getLatestRegValue(
   537	                    array_reg_index
   538	                ).current_type
   539	                # If value_type is not None
   540	                if value_type:
   541	                    value_type = value_type[1:]
   542	
   543	            self._move_value_to_register(
   544	                instruction, "{src0}[{src1}]", wide=True, value_type=value_type
   545	            )
   546	        except IndexError as e:
   547	            log.exception(f"{e} in AGET_OBJECT")
   548	
   549	    @logger
   550	    def MOVE_KIND(self, instruction):
   551	        try:
   552	            if instruction[0].startswith("move-object"):
   553	                self._move_object(
   554	                    src_reg_idx=int(instruction[2][1:]),
   555	                    dest_reg_idx=int(instruction[1][1:]),
   556	                )
   557	                return
   558	
   559	            wide = "wide" in instruction[0]
   560	            self._move_value_to_register(instruction, "{src0}", wide=wide)
   561	        except IndexError as e:
   562	            log.exception(f"{e} in MOVE_KIND")
   563	
   564	    @logger
   565	    def FILLED_NEW_ARRAY_KIND(self, instruction):
   566	        value_type = instruction[-1]
   567	
   568	        try:
   569	            self._invoke(instruction[:-1] + [f"new-array(){value_type}"])
   570	        except IndexError as e:
   571	            log.exception(f"{e} in MOVE_KIND")
   572	
   573	    @logger
   574	    def AGET_WIDE_KIND(self, instruction):
   575	        array_reg_index = int(instruction[2][1:])
   576	
   577	
   578	        try:
   579	            array_reg = self.table_obj.getLatestRegValue(array_reg_index)
   580	            value_type = (
   581	                array_reg.current_type[1:] if array_reg.current_type else None
   582	            )
   583	            destination = int(instruction[1][1:])
   584	            source_list = [int(reg[1:]) for reg in instruction[2:]]
   585	
   586	            self._transfer_register(
   587	                source_list,
   588	                destination,
   589	                "{src0}[{src1}]",
   590	                value_type=value_type,
   591	            )
   592	            self._transfer_register(
   593	                source_list,
   594	                destination + 1,
   595	                "{src0}[{src1}]",
   596	                value_type=value_type,
   597	            )
   598	        except IndexError as e:
   599	            log.exception(f"{e} in {instruction[0]}")
   600	
   601	    @logger
   602	    def APUT_KIND(self, instruction):
   603	        try:
   604	            value, array_reference, index = instruction[1:]
   605	            self._move_value_to_register(
   606	                (None, array_reference, array_reference, index, value),
   607	                "{src0}[{src1}]:{src2}",
   608	            )
   609	        except IndexError as e:
   610	            log.exception(f"{e} in {instruction[0]}")
   611	
   612	    @logger
   613	    def APUT_WIDE_KIND(self, instruction):
   614	        try:
   615	            value, array_reference, index = instruction[1:]
   616	            self._move_value_to_register(
   617	                (
   618	                    None,
   619	                    array_reference,
   620	                    array_reference,
   621	                    index,
   622	                    value,
   623	                    f"v{int(value[1:])+1}",
   624	                ),
   625	                "{src0}[{src1}]:({src2}, {src3})",
   626	            )
   627	        except IndexError as e:
   628	            log.exception(f"{e} in {instruction[0]}")
   629	
   630	    @logger
   631	    def NEG_AND_NOT_KIND(self, instruction):
   632	        try:
   633	            wide = any(wide_type in instruction[0] for wide_type in ("double", "long"))
   634	            self._move_value_to_register(instruction, "{src0}", wide)
   635	        except IndexError as e:
   636	            log.exception(f"{e} in {instruction[0]}")
   637	
   638	    @logger
   639	    def CAST_TYPE(self, instruction):
   640	        try:

🧑‍🚒 Response (5):

👀‍ Observation (5):

Downloading sample APK from https://github.com/ev-flow/apk-samples/raw/refs/heads/master/malware-samples/Ahmyth.apk to /quark-engine...
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
Warning: Failed to open the file /quark-engine: Is a directory

  0  261k    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
curl: (23) client returned ERROR on write of 16375 bytes
Failed to download APK from https://github.com/ev-flow/apk-samples/raw/refs/heads/master/malware-samples/Ahmyth.apk.

🧑‍🚒 Response (6):

👀‍ Observation (6):

Downloading sample APK from https://github.com/ev-flow/apk-samples/raw/refs/heads/master/malware-samples/Ahmyth.apk to /quark-engine/sample.apk...
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0

100  261k  100  261k    0     0  2981k      0 --:--:-- --:--:-- --:--:-- 2981k
Successfully downloaded APK to /quark-engine/sample.apk.
-rw-r--r-- 1 root root 262K Mar  6 13:24 /quark-engine/sample.apk

🧑‍🚒 Response (7):

👀‍ Observation (7):

Here's the result of running `cat -n` on /quark-engine/quark/core/struct/valuenode.py:
     1	# -*- coding: utf-8 -*-
     2	# This file is part of Quark-Engine - https://github.com/quark-engine/quark-engine
     3	# See the file 'LICENSE' for copying permission.
     4	
     5	from abc import ABC, abstractmethod
     6	import collections
     7	from dataclasses import dataclass
     8	from typing import Any, Generator, Type, TypeVar
     9	from weakref import WeakValueDictionary
    10	
    11	
    12	@dataclass()
    13	class ValueNode(ABC):
    14	    """Abstract base class for value node."""
    15	
    16	    def resolve(self, evaluateArgs: bool = True) -> str:
    17	        """Resolve the value into a string representation.
    18	
    19	        :param evaluateArgs: True to evaluate argument base on its type,
    20	        default to True
    21	        :return: a string representation of the value
    22	        """
    23	        return iterativeResolve(self, evaluateArgs=evaluateArgs)
    24	
    25	    @abstractmethod
    26	    def _getChildren(self) -> tuple["ValueNode", ...]:
    27	        """Get the child ValueNodes of this node.
    28	
    29	        :return: a tuple of child ValueNodes
    30	        """
    31	        pass
    32	
    33	    @abstractmethod
    34	    def _assembleResolvedString(
    35	        self, childStrs: tuple[str, ...], evaluateArgs: bool
    36	    ) -> str:
    37	        """Assemble the resolved string from child strings.
    38	
    39	        :param childStrs: a tuple of resolved child strings
    40	        :param evaluateArgs: True to evaluate argument base on its type,
    41	        default to True
    42	        :return: the assembled resolved string
    43	        """
    44	        pass
    45	
    46	    def __eq__(self, value: object) -> bool:
    47	        return self is value
    48	
    49	    def __hash__(self):
    50	        return id(self)
    51	
    52	
    53	@dataclass(slots=True, eq=False)
    54	class Primitive(ValueNode):
    55	    """A ValueNode that wraps a primitive type (str, int, etc.)."""
    56	
    57	    value: Any
    58	    value_type: str | None
    59	
    60	    def __str__(self):
    61	        return str(self.value)
    62	
    63	    def __repr__(self):
    64	        return f"Primitive({self.value!r})"
    65	
    66	    def _getChildren(self) -> tuple["ValueNode", ...]:
    67	        return ()
    68	
    69	    def _assembleResolvedString(self, _, evaluateArgs: bool) -> str:
    70	        return (
    71	            str(evaluateArgument(self.value, self.value_type))
    72	            if evaluateArgs
    73	            else str(self.value)
    74	        )
    75	
    76	
    77	@dataclass(slots=True, eq=False)
    78	class MethodCall(ValueNode):
    79	    """A ValueNode that represents a method call."""
    80	
    81	    method: str
    82	    argumentNodes: tuple[ValueNode, ...]
    83	
    84	    def __str__(self):
    85	        return f"<invoke:{self.method}, {self.argumentNodes!r}>"
    86	
    87	    def __repr__(self):
    88	        return f"MethodCall({self.method!r}, {self.argumentNodes!r})"
    89	
    90	    def _getChildren(self) -> tuple["ValueNode", ...]:
    91	        return self.argumentNodes
    92	
    93	    def _assembleResolvedString(self, argStrs: tuple[str, ...], _) -> str:
    94	        return f"{self.method}({','.join(argStrs)})"
    95	
    96	    def getArguments(self, evaluateArgs: bool = True) -> list[Any]:
    97	        return [
    98	            (
    99	                evaluateArgument(rawArg.value, rawArg.value_type)
   100	                if evaluateArgs and isinstance(rawArg, Primitive)
   101	                else rawArg.resolve(evaluateArgs)
   102	            )
   103	            for rawArg in self.argumentNodes
   104	        ]
   105	
   106	
   107	@dataclass(slots=True, eq=False)
   108	class BytecodeOps(ValueNode):
   109	    """A ValueNode that represents a bytecode operation (e.g., binop, cast)."""
   110	
   111	    strFormat: str
   112	    operands: tuple[ValueNode, ...]
   113	    data: Any
   114	
   115	    def __str__(self):
   116	        return f"<op:{self.strFormat}>"
   117	
   118	    def __repr__(self):
   119	        return (
   120	            f"BytecodeOps({self.strFormat!r}, {self.operands!r}, {self.data!r})"
   121	        )
   122	
   123	    def _getChildren(self) -> tuple[ValueNode, ...]:
   124	        return self.operands
   125	
   126	    def _assembleResolvedString(self, operandStrs: tuple[str, ...], _) -> str:
   127	        value_dict = {
   128	            f"src{index}": value for index, value in enumerate(operandStrs)
   129	        }
   130	        value_dict["data"] = str(self.data)
   131	        return self.strFormat.format(**value_dict)
   132	
   133	
   134	T = TypeVar("T", bound=ValueNode)
   135	
   136	__resolvedCache: WeakValueDictionary[int, "StringWrapper"] = (
   137	    WeakValueDictionary()
   138	)
   139	
   140	
   141	@dataclass(frozen=True)
   142	class StringWrapper:
   143	    value: str
   144	
   145	
   146	def iterativeResolve(node: ValueNode, evaluateArgs: bool) -> str:
   147	    """Resolve the value node into a string representation.
   148	
   149	    :param node: value node to resolve
   150	    :param evaluateArgs: True to evaluate argument base on its type
   151	    :return: a string representation of the value
   152	    """
   153	    stack = [(node, [])]
   154	    visiting = {id(node)}
   155	
   156	    while stack:
   157	        current, childStrs = stack[-1]
   158	        children = current._getChildren()
   159	
   160	        if len(childStrs) < len(children):
   161	            # Still has children to process
   162	            child = children[len(childStrs)]
   163	
   164	            cachedValue = __resolvedCache.get(id(child))
   165	            if cachedValue is not None:
   166	                # Use cached resolved value
   167	                childStrs.append(cachedValue.value)
   168	                continue
   169	
   170	            if id(child) in visiting:
   171	                childStrs.append("<recursion>")
   172	                continue
   173	
   174	            # Update current node to continue with next child later
   175	            visiting.add(id(child))
   176	            stack.append((child, []))
   177	            continue
   178	
   179	        result = current._assembleResolvedString(
   180	            tuple(childStrs), evaluateArgs
   181	        )
   182	        __resolvedCache[id(current)] = StringWrapper(result)
   183	
   184	        # Current node is fully processed, pop from stack
   185	        visiting.remove(id(current))
   186	        stack.pop()
   187	
   188	        if not stack:
   189	            # No parent, this is the root node
   190	            return result
   191	
   192	        # Append result to parent's list
   193	        _, parentProcessedChildren = stack[-1]
   194	        parentProcessedChildren.append(result)
   195	
   196	    raise RuntimeError("Unreachable code reached in iterativeResolve")
   197	
   198	
   199	def iteratePriorNodes(
   200	    node: ValueNode, nodeType: Type[T]
   201	) -> Generator[T, None, None]:
   202	    """Yield all prior ValueNodes that contribute to the given ValueNode,
   203	    including itself.
   204	
   205	    :param node: root node to start
   206	    :param nodeType: node type to yield
   207	    :yield: value nodes of given node types
   208	    """
   209	    visited = set()
   210	    queue = collections.deque([node])
   211	
   212	    while queue:
   213	        node = queue.popleft()
   214	        if id(node) in visited:
   215	            continue
   216	        visited.add(id(node))
   217	
   218	        if isinstance(node, nodeType):
   219	            yield node
   220

🧑‍🚒 Response (8):

👀‍ Observation (8):

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ImportError: cannot import name 'APKInfo' from 'quark.core.apkinfo' (/quark-engine/quark/core/apkinfo.py)

🧑‍🚒 Response (9):

👀‍ Observation (9):

['BaseApkinfo']
['AnalyzeAPK']
<class 'quark.core.apkinfo.AndroguardImp'>

🧑‍🚒 Response (10):

👀‍ Observation (10):

loaded <Apkinfo-APK:sample.apk, Imp:androguard>
Traceback (most recent call last):
  File "<stdin>", line 4, in <module>
AttributeError: 'AndroguardImp' object has no attribute 'get_methods'. Did you mean: 'all_methods'?

🧑‍🚒 Response (11):

👀‍ Observation (11):

loaded <Apkinfo-APK:sample.apk, Imp:androguard>
all_methods len 3526
Traceback (most recent call last):
  File "<stdin>", line 5, in <module>
TypeError: 'set' object is not subscriptable

🧑‍🚒 Response (12):

👀‍ Observation (12):

class <class 'quark.core.struct.methodobject.MethodObject'>
dir sample ['access_flags', 'cache', 'class_name', 'descriptor', 'full_name', 'is_android_api', 'name']
get_instructions exists? False
get_method returned? False

🧑‍🚒 Response (13):

👀‍ Observation (13):

Invalid `view_range`: [1, 260]. Its second element `260` should be smaller than the number of lines in the file: `43`

🧑‍🚒 Response (14):

👀‍ Observation (14):

Here's the result of running `cat -n` on /quark-engine/quark/core/struct/methodobject.py:
     1	from dataclasses import dataclass, field
     2	
     3	
     4	@dataclass(unsafe_hash=False)
     5	class MethodObject(object):
     6	    """
     7	    Information about a method in a dex file.
     8	    """
     9	
    10	    class_name: str
    11	    name: str
    12	    descriptor: str
    13	    access_flags: str = field(compare=False, default="")
    14	    cache: object = field(compare=False, default=None, repr=False)
    15	
    16	    def __hash__(self):
    17	        return hash(self.full_name)
    18	
    19	    @property
    20	    def full_name(self) -> str:
    21	        return self.__str__()
    22	
    23	    def is_android_api(self) -> bool:
    24	        # Packages found at https://developer.android.com/reference/packages
    25	        api_list = [
    26	            "Landroid/",
    27	            "Lcom/google/android/",
    28	            "Ldalvik/",
    29	            "Ljava/",
    30	            "Ljavax/",
    31	            "Ljunit/",
    32	            "Lorg/apache/",
    33	            "Lorg/json/",
    34	            "Lorg/w3c/",
    35	            "Lorg/xml/",
    36	            "Lorg/xmlpull/",
    37	        ]
    38	
    39	        return any(self.class_name.startswith(prefix) for prefix in api_list)
    40	
    41	    def __str__(self) -> str:
    42	        return f"{self.class_name} {self.name} {self.descriptor}"
    43

🧑‍🚒 Response (15):

👀‍ Observation (15):

Found 3 matches for "filled-new-array" in /quark-engine:
/quark-engine/quark/evaluator/pyeval.py (1 matches)
/quark-engine/reproduce.py (1 matches)
/quark-engine/tests/evaluator/test_pyeval.py (1 matches)
End of matches for "filled-new-array" in /quark-engine

🧑‍🚒 Response (16):

👀‍ Observation (16):

Here's the result of running `cat -n` on /quark-engine/tests/evaluator/test_pyeval.py:
     1	import itertools
     2	from unittest.mock import patch
     3	
     4	import pytest
     5	
     6	from quark.core.apkinfo import AndroguardImp
     7	from quark.core.struct.registerobject import RegisterObject
     8	from quark.core.struct.tableobject import TableObject
     9	from quark.evaluator.pyeval import PyEval
    10	from quark.core.struct.valuenode import Primitive, MethodCall
    11	
    12	
    13	@pytest.fixture()
    14	def instructions():
    15	    ins = [
    16	        "invoke-virtual",
    17	        "invoke-direct",
    18	        "move-result-object",
    19	        "new-instance",
    20	        "const-string",
    21	        "const/4",
    22	        "aget-object",
    23	    ]
    24	
    25	    yield ins
    26	
    27	    del ins
    28	
    29	
    30	@pytest.fixture(scope="module")
    31	def apkinfo(SAMPLE_PATH_13667):
    32	    apkinfo = AndroguardImp(SAMPLE_PATH_13667)
    33	    yield apkinfo
    34	
    35	
    36	@pytest.fixture(scope="function")
    37	def pyeval(apkinfo):
    38	    pyeval = PyEval(apkinfo)
    39	
    40	    v4_mock_variable_obj = RegisterObject(
    41	        value=Primitive(
    42	            "Lcom/google/progress/SMSHelper;", "Lcom/google/progress/SMSHelper;"
    43	        ),
    44	        value_type="Lcom/google/progress/SMSHelper;",
    45	    )
    46	    v5_mock_variable_obj = RegisterObject(
    47	        value=MethodCall(
    48	            "java.lang.String.toString", (Primitive("some_number", ""),)
    49	        ),
    50	        value_type="I",
    51	    )
    52	    v6_mock_variable_obj = RegisterObject(
    53	        value=MethodCall(
    54	            "java.lang.Collection.toArray", (Primitive("an_array", ""),)
    55	        ),
    56	        value_type="[I",
    57	    )
    58	    v7_mock_variable_obj = RegisterObject(
    59	        value=Primitive("a_float", "F"), value_type="F"
    60	    )
    61	    v8_mock_variable_obj = RegisterObject(
    62	        value=Primitive(
    63	            "ArrayMap object", "Landroid/support/v4/util/ArrayMap;"
    64	        ),
    65	        value_type="Landroid/support/v4/util/ArrayMap;",
    66	    )
    67	    v9_mock_variable_obj = RegisterObject(
    68	        value=Primitive("some_string", "Ljava/lang/String;"),
    69	        value_type="Ljava/lang/String;",
    70	    )
    71	    v9_mock_variable_obj.called_by_func.append(
    72	        MethodCall("java.io.file.close", tuple())
    73	    )
    74	
    75	    pyeval.table_obj.insert(4, v4_mock_variable_obj)
    76	    pyeval.table_obj.insert(5, v5_mock_variable_obj)
    77	    pyeval.table_obj.insert(6, v6_mock_variable_obj)
    78	    pyeval.table_obj.insert(7, v7_mock_variable_obj)
    79	    pyeval.table_obj.insert(8, v8_mock_variable_obj)
    80	    pyeval.table_obj.insert(9, v9_mock_variable_obj)
    81	
    82	    yield pyeval
    83	
    84	    del pyeval
    85	
    86	
    87	MOVE_KIND = [
    88	    prefix + postfix
    89	    for prefix, postfix in itertools.product(
    90	        ["move", "move-object"], ["", "/from16", "/16"]
    91	    )
    92	] + ["array-length"]
    93	MOVE_WIDE_KIND = ["move-wide" + postfix for postfix in ["", "/from16", "/16"]]
    94	
    95	
    96	@pytest.fixture(scope="module", params=MOVE_KIND)
    97	def move_kind(request):
    98	    return request.param
    99	
   100	
   101	@pytest.fixture(scope="module", params=MOVE_WIDE_KIND)
   102	def move_wide_kind(request):
   103	    return request.param
   104	
   105	
   106	FILLED_ARRAY_KIND = ("filled-new-array", "filled-new-array/range")
   107	
   108	
   109	@pytest.fixture(scope="module", params=FILLED_ARRAY_KIND)
   110	def filled_array_kind(request):
   111	    return request.param
   112	
   113	
   114	AGET_KIND = [
   115	    "aget" + postfix
   116	    for postfix in ("", "-object", "-byte", "-char", "-short", "-boolean")
   117	]
   118	AGET_WIDE_KIND = ["aget-wide"]
   119	
   120	
   121	@pytest.fixture(scope="module", params=AGET_KIND)
   122	def aget_kind(request):
   123	    return request.param
   124	
   125	
   126	@pytest.fixture(scope="module", params=AGET_WIDE_KIND)
   127	def aget_wide_kind(request):
   128	    return request.param
   129	
   130	
   131	APUT_KIND = [
   132	    "aput" + postfix
   133	    for postfix in ("", "-object", "-byte", "-char", "-short", "-boolean")
   134	]
   135	APUT_WIDE_KIND = ("aput-wide",)
   136	
   137	
   138	@pytest.fixture(scope="module", params=APUT_KIND)
   139	def aput_kind(request):
   140	    return request.param
   141	
   142	
   143	@pytest.fixture(scope="module", params=APUT_WIDE_KIND)
   144	def aput_wide_kind(request):
   145	    return request.param
   146	
   147	
   148	NEG_NOT_KIND = [
   149	    prefix + postfix
   150	    for prefix, postfix in itertools.product(
   151	        ["neg-", "not-"], ["int", "long", "float"]
   152	    )
   153	]
   154	
   155	NEG_NOT_WIDE_KIND = ("neg-double", "not-double")
   156	
   157	
   158	@pytest.fixture(scope="module", params=NEG_NOT_KIND)
   159	def neg_not_kind(request):
   160	    return request.param
   161	
   162	
   163	@pytest.fixture(scope="module", params=NEG_NOT_WIDE_KIND)
   164	def neg_not_wide_kind(request):
   165	    return request.param
   166	
   167	
   168	ALL_CAST_KIND = list(
   169	    {
   170	        prefix + "-" + postfix
   171	        for prefix, postfix in itertools.product(
   172	            ("int", "long", "float", "double"),
   173	            ("int", "long", "float", "double"),
   174	        )
   175	    }.difference(
   176	        {
   177	            "int-int",
   178	            "long-long",
   179	            "float-float",
   180	            "double-double",
   181	            "double-long",
   182	            "long-double",
   183	        }
   184	    )
   185	)
   186	
   187	CAST_KIND = [
   188	    ins for ins in ALL_CAST_KIND if "double" not in ins and "long" not in ins
   189	]
   190	CAST_SIMPLE_TO_WIDE_KIND = [
   191	    ins
   192	    for ins in ALL_CAST_KIND
   193	    if ins.endswith("double") or ins.endswith("long")
   194	]
   195	CAST_WIDE_TO_SIMPLE_KIND = [
   196	    ins
   197	    for ins in ALL_CAST_KIND
   198	    if ins.startswith("double") or ins.startswith("long")
   199	]
   200	
   201	
   202	@pytest.fixture(scope="module", params=CAST_KIND)
   203	def cast_kind(request):
   204	    return request.param
   205	
   206	
   207	@pytest.fixture(scope="module", params=CAST_SIMPLE_TO_WIDE_KIND)
   208	def cast_simple_to_wide_kind(request):
   209	    return request.param
   210	
   211	
   212	@pytest.fixture(scope="module", params=CAST_WIDE_TO_SIMPLE_KIND)
   213	def cast_wide_to_simple_kind(request):
   214	    return request.param
   215	
   216	
   217	_BINOP_PREFIX = (
   218	    "add",
   219	    "sub",
   220	    "mul",

🧑‍🚒 Response (17):

👀‍ Observation (17):

No matches found for "FILLED_NEW_ARRAY_KIND" in /quark-engine/tests/evaluator/test_pyeval.py

🧑‍🚒 Response (18):

👀‍ Observation (18):

Found 6 matches for "new-array()" in /quark-engine:
/quark-engine/quark/evaluator/pyeval.py (2 matches)
/quark-engine/reproduce.py (1 matches)
/quark-engine/tests/evaluator/test_pyeval.py (3 matches)
End of matches for "new-array()" in /quark-engine

🧑‍🚒 Response (19):

👀‍ Observation (19):

Found 5 matches for "new-array" in /quark-engine/tests/evaluator/test_pyeval.py:
Line 106:FILLED_ARRAY_KIND = ("filled-new-array", "filled-new-array/range")
Line 832:        instruction = ["new-array", "v1", "v5", "[java/lang/String;"]
Line 838:            == "new-array()[(java.lang.String.toString(some_number))"
Line 848:        assert pyeval.ret_stack[0].resolve() == "new-array()[type_idx()"
Line 858:        assert pyeval.ret_stack[0].resolve() == "new-array()[I()"
End of matches for "new-array" in /quark-engine/tests/evaluator/test_pyeval.py

🧑‍🚒 Response (20):

👀‍ Observation (20):

Here's the result of running `cat -n` on /quark-engine/tests/evaluator/test_pyeval.py:
   820	        pyeval.eval[instruction[0]](instruction)
   821	
   822	        assert (
   823	            pyeval.table_obj.getLatestRegValue(1).value.resolve()
   824	            == "Lcom/google/progress/SMSHelper;"
   825	        )
   826	        assert (
   827	            pyeval.table_obj.getLatestRegValue(2).value.resolve()
   828	            == "java.lang.String.toString(some_number)"
   829	        )
   830	
   831	    def test_new_array(self, pyeval):
   832	        instruction = ["new-array", "v1", "v5", "[java/lang/String;"]
   833	
   834	        pyeval.eval[instruction[0]](instruction)
   835	
   836	        assert (
   837	            pyeval.table_obj.getLatestRegValue(1).value.resolve()
   838	            == "new-array()[(java.lang.String.toString(some_number))"
   839	        )
   840	
   841	    def test_filled_array_kind_with_class_type(
   842	        self, pyeval, filled_array_kind
   843	    ):
   844	        instruction = [filled_array_kind, "v1", "[type_idx"]
   845	
   846	        pyeval.eval[instruction[0]](instruction)
   847	
   848	        assert pyeval.ret_stack[0].resolve() == "new-array()[type_idx()"
   849	        assert pyeval.ret_type == "[type_idx"
   850	
   851	    def test_filled_array_kind_with_primitive_type(
   852	        self, pyeval, filled_array_kind
   853	    ):
   854	        instruction = [filled_array_kind, "v1", "[I"]
   855	
   856	        pyeval.eval[instruction[0]](instruction)
   857	
   858	        assert pyeval.ret_stack[0].resolve() == "new-array()[I()"
   859	        assert pyeval.ret_type == "[I"
   860	
   861	    # Tests for aget-kind
   862	    def test_aget_kind(self, pyeval, aget_kind):
   863	        v2_mock_variable_obj = RegisterObject(
   864	            value=Primitive("some_list_like[1,2,3,4]", "[Ljava/lang/Integer;"),
   865	            called_by_func=MethodCall("java.io.file.close", tuple()),
   866	            value_type="[Ljava/lang/Integer;",
   867	        )
   868	        v3_mock_variable_obj = RegisterObject(
   869	            value=Primitive("2", "I"), value_type="I"
   870	        )
   871	        pyeval.table_obj.insert(2, v2_mock_variable_obj)
   872	        pyeval.table_obj.insert(3, v3_mock_variable_obj)
   873	
   874	        if "-" in aget_kind:
   875	            index = aget_kind.index("-") + 1
   876	            postfix = aget_kind[index:]
   877	            if postfix == "object":
   878	                expected_value_type = "Ljava/lang/Integer;"
   879	            else:
   880	                expected_value_type = pyeval.type_mapping[postfix]

🧑‍🚒 Response (21):

👀‍ Observation (21):

Executing instruction: ['filled-new-array', 'v13', 'v11', 'v12', 'new-array()[I']
Completed without IndexError
Traceback (most recent call last):
  File "/quark-engine/repro_issue.py", line 16, in <module>
    print('ret_stack last:', pyeval.ret_stack[-1].resolve())
                             ~~~~~~~~~~~~~~~~^^^^
IndexError: list index out of range

🧑‍🚒 Response (22):

👀‍ Observation (22):

Here's the result of running `cat -n` on /quark-engine/quark/core/struct/registerobject.py:
     1	# -*- coding: utf-8 -*-
     2	# This file is part of Quark-Engine - https://github.com/quark-engine/quark-engine
     3	# See the file 'LICENSE' for copying permission.
     4	
     5	
     6	from typing import Generator
     7	
     8	from quark.core.struct.valuenode import (
     9	    MethodCall,
    10	    ValueNode,
    11	    iteratePriorCalls,
    12	)
    13	
    14	
    15	class RegisterObject:
    16	    """The RegisterObject is used to record the state of each register"""
    17	
    18	    __slots__ = [
    19	        "_value",
    20	        "_called_by_func",
    21	        "_current_type"
    22	    ]
    23	
    24	    def __init__(
    25	        self,
    26	        value: ValueNode,
    27	        called_by_func: ValueNode | None = None,
    28	        value_type=None,
    29	    ):
    30	        """
    31	        A data structure for creating the bytecode variable object, which
    32	        used to record the state of each register.
    33	
    34	        +========+==================+
    35	        | value | called_by_func    |
    36	        +========+==================+
    37	
    38	        :param value:
    39	        :param called_by_func:
    40	        """
    41	        self._value = value
    42	        self._current_type = value_type
    43	        self._called_by_func = []
    44	        if called_by_func is not None:
    45	            self._called_by_func.append(called_by_func)
    46	
    47	    def __repr__(self):
    48	        return (
    49	            f"<RegisterObject-value:{self._value},"
    50	            f" called_by_func:{','.join(self._called_by_func)},"
    51	            f" current_type:{self._current_type}>"
    52	        )
    53	
    54	    def __eq__(self, obj):
    55	        return (
    56	            isinstance(obj, RegisterObject)
    57	            and obj.called_by_func == self.called_by_func
    58	            and obj.value == self.value
    59	            and obj.current_type == self.current_type
    60	        )
    61	
    62	    @property
    63	    def called_by_func(self):
    64	        """
    65	        Record which functions have been called by using this register as a parameter.
    66	
    67	        :return: a list containing function name
    68	        """
    69	        return self._called_by_func
    70	
    71	    @called_by_func.setter
    72	    def called_by_func(self, called_by_func):
    73	        """
    74	        Setter of called_by_func.
    75	
    76	        :param called_by_func:
    77	        :return: None
    78	        """
    79	        self._called_by_func.append(called_by_func)
    80	
    81	    @property
    82	    def value(self):
    83	        """
    84	        The current value stored in the register.
    85	
    86	        :return: a string of the value
    87	        """
    88	        return self._value
    89	
    90	    @value.setter
    91	    def value(self, value):
    92	        """
    93	        Setter of value.
    94	
    95	        :param value:
    96	        :return: None
    97	        """
    98	        self._value = value
    99	
   100	    @property
   101	    def current_type(self):
   102	        """
   103	        Get the type of the value in the register
   104	
   105	        :return: a plant text that describes a data type
   106	        :rtype: str
   107	        """
   108	        return self._current_type
   109	
   110	    @current_type.setter
   111	    def current_type(self, value):
   112	        self._current_type = value
   113	
   114	    def bears_object(self) -> bool:
   115	        """
   116	        Check whether the register bears an object or has an unknown type.
   117	
   118	        :return: True if the register holds an object or its type is unknown;
   119	        False otherwise.
   120	        :rtype: bool
   121	        """
   122	        return self.current_type is None or self.current_type.startswith("L")
   123	
   124	    def iterateInvolvedCalls(self) -> Generator[MethodCall, None, None]:
   125	        """
   126	        Yield all method calls involved by this register.
   127	        """
   128	        for call in self._called_by_func:
   129	            yield from iteratePriorCalls(call)
   130	
   131	
   132	if __name__ == "__main__":
   133	    pass
   134

🧑‍🚒 Response (23):

👀‍ Observation (23):

The file /quark-engine/quark/evaluator/pyeval.py has been edited. Here's the result of running `cat -n` on a snippet of /quark-engine/quark/evaluator/pyeval.py:
   190	            for idx, arg in enumerate(valueOfRegList)
   191	            if isinstance(arg, Primitive) and arg.value_type == ""
   192	        ]
   193	        if len(argIdxWithoutType) > 0:
   194	            # Set the missing value types based on the method's descriptor.
   195	            argTypes = (
   196	                []
   197	                if opcode.startswith("invoke-static") or "->" not in targetMethod
   198	                else [targetMethod[: targetMethod.find("->")]]
   199	            )
   200	
   201	            rawArgTypes = targetMethod[
   202	                targetMethod.find("(") + 1 : targetMethod.find(")")
   203	            ]
   204	            # Split by space but ignore empty strings
   205	            rawArgTypes = [arg for arg in rawArgTypes.split(" ") if arg]
   206	            
   207	            for argType in rawArgTypes:
   208	                argTypes.append(argType)
   209	                if argType in ["J", "D"]:
   210	                    # Put long and double twice
   211	                    # because these types take up two registers.
   212	                    argTypes.append(argType)
   213	
   214	            # If we still don't have enough argument types (e.g., filled-new-array
   215	            # creating an array of primitives), try to infer the element type from
   216	            # the return descriptor. For example, "new-array()[I" should set the
   217	            # argument type to "I" for each provided element.
   218	            if argIdxWithoutType:
   219	                max_idx = max(argIdxWithoutType)
   220	                if len(argTypes) <= max_idx:
   221	                    ret_type = ""
   222	                    if ")" in targetMethod:
   223	                        ret_type = targetMethod[targetMethod.index(")") + 1 :]
   224	
   225	                    inferred_type = ""
   226	                    if ret_type.startswith("[") and len(ret_type) > 1:
   227	                        # Use the component type for arrays.
   228	                        inferred_type = ret_type[1:]
   229	                    else:
   230	                        inferred_type = ret_type
   231	
   232	                    # Pad argTypes so that indexing is safe.
   233	                    while len(argTypes) <= max_idx:
   234	                        argTypes.append(inferred_type)
   235	
   236	            for argIdx in argIdxWithoutType:
   237	                valueOfRegList[argIdx].value_type = argTypes[argIdx]
   238	
   239	        methodCall = MethodCall(targetMethod, tuple(valueOfRegList))
   240	
   241	        # insert the function and the parameter into called_by_func
Review the changes and make sure they are as expected. Edit the file again if necessary.

🧑‍🚒 Response (24):

👀‍ Observation (24):

Executing instruction: ['filled-new-array', 'v13', 'v11', 'v12', 'new-array()[I']
Completed without IndexError
ret_stack last: new-array()new-array()[I(,1,2)

🧑‍🚒 Response (25):

👀‍ Observation (25):

bash: pytest: command not found

🧑‍🚒 Response (26):

👀‍ Observation (26):

Found 1 matches for "def NEW_ARRAY" in /quark-engine/quark/evaluator/pyeval.py:
Line 441:    def NEW_ARRAY(self, instruction):
End of matches for "def NEW_ARRAY" in /quark-engine/quark/evaluator/pyeval.py

🧑‍🚒 Response (27):

👀‍ Observation (27):

Here's the result of running `cat -n` on /quark-engine/quark/evaluator/pyeval.py:
   430	        """
   431	        new-instance vx,type
   432	
   433	        Instantiates an object type and puts the reference of the newly created instance into vx.
   434	
   435	        Store variables to vx, and then insert the VariableObject into table.
   436	        """
   437	
   438	        self._assign_value(instruction, value_type=instruction[2])
   439	
   440	    @logger
   441	    def NEW_ARRAY(self, instruction):
   442	        try:
   443	            self._move_value_to_register(
   444	                instruction[:-1],
   445	                "new-array()[({src0})",
   446	                value_type=instruction[-1],
   447	            )
   448	        except IndexError as e:
   449	            log.exception(f"{e} in NEW_ARRAY")
   450	
   451	    @logger
   452	    def CONST_STRING(self, instruction):
   453	        """
   454	        const-string vx,string_id
   455	
   456	        Puts reference to a string constant identified by string_id into vx.
   457	
   458	        Store string variable to vx, and then insert the VariableObject into table.
   459	        """
   460	
   461	        self._assign_value(instruction, value_type="Ljava/lang/String;")
   462	
   463	    @logger
   464	    def CONST_CLASS(self, instruction):
   465	        self._assign_value(instruction, value_type="Ljava/lang/Class;")
   466	
   467	    @logger
   468	    def CONST(self, instruction):
   469	        """
   470	        const vx, lit32

🧑‍🚒 Response (28):

👀‍ Observation (28):

Executing instruction: ['filled-new-array', 'v13', 'v11', 'v12', '[I']
Completed without IndexError
ret_stack last: new-array()[I(,0,1)
ret_type: [I
arg types: [None, 'I', 'I']

🧑‍🚒 Response (29):
Summary:

  • Fixed argument type inference in PyEval._invoke to prevent IndexError for instructions like filled-new-array with descriptors such as new-array()[I.
  • Added robust parsing of argument types, safely handling targets without ->, ignoring empty splits, and inferring element types from the return descriptor when arg types are missing.
  • Ensured argTypes is padded before assignment so missing types no longer crash execution.

How to verify:

  1. Run the reproduction script below:
cd /quark-engine
PYTHONPATH=. python repro_issue.py

You should see:

Executing instruction: ['filled-new-array', 'v13', 'v11', 'v12', '[I']
Completed without IndexError
ret_stack last: new-array()[I(,0,1)
ret_type: [I
arg types: [None, 'I', 'I']
  1. No IndexError is thrown in _invoke and array element types are inferred from the return descriptor.
    👀‍ Observation (29):


🧑‍🚒 Response (30):

👀‍ Observation (30):

Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.

1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
   If the reproduction script is failing, please revisit your changes and make sure they are correct.
   If you have already removed your reproduction script, please ignore this step.
2. Remove your reproduction script (if you haven't done so already, use bash tool with `rm <filename.py>` to remove it).
3. Run the submit command again to confirm.

Here is a list of all of your changes:

<diff>
diff --git a/quark/evaluator/pyeval.py b/quark/evaluator/pyeval.py
index 94655f5..f4b5888 100644
--- a/quark/evaluator/pyeval.py
+++ b/quark/evaluator/pyeval.py
@@ -194,13 +194,15 @@ class PyEval:
             # Set the missing value types based on the method's descriptor.
             argTypes = (
                 []
-                if opcode.startswith("invoke-static")
+                if opcode.startswith("invoke-static") or "->" not in targetMethod
                 else [targetMethod[: targetMethod.find("->")]]
             )
 
             rawArgTypes = targetMethod[
                 targetMethod.find("(") + 1 : targetMethod.find(")")
-            ].split(" ")
+            ]
+            # Split by space but ignore empty strings
+            rawArgTypes = [arg for arg in rawArgTypes.split(" ") if arg]
             
             for argType in rawArgTypes:
                 argTypes.append(argType)
@@ -209,6 +211,28 @@ class PyEval:
                     # because these types take up two registers.
                     argTypes.append(argType)
 
+            # If we still don't have enough argument types (e.g., filled-new-array
+            # creating an array of primitives), try to infer the element type from
+            # the return descriptor. For example, "new-array()[I" should set the
+            # argument type to "I" for each provided element.
+            if argIdxWithoutType:
+                max_idx = max(argIdxWithoutType)
+                if len(argTypes) <= max_idx:
+                    ret_type = ""
+                    if ")" in targetMethod:
+                        ret_type = targetMethod[targetMethod.index(")") + 1 :]
+
+                    inferred_type = ""
+                    if ret_type.startswith("[") and len(ret_type) > 1:
+                        # Use the component type for arrays.
+                        inferred_type = ret_type[1:]
+                    else:
+                        inferred_type = ret_type
+
+                    # Pad argTypes so that indexing is safe.
+                    while len(argTypes) <= max_idx:
+                        argTypes.append(inferred_type)
+
             for argIdx in argIdxWithoutType:
                 valueOfRegList[argIdx].value_type = argTypes[argIdx]
 
diff --git a/repro_issue.py b/repro_issue.py
new file mode 100644
index 0000000..cc02dde
--- /dev/null
+++ b/repro_issue.py
@@ -0,0 +1,19 @@
+from quark.core.apkinfo import AndroguardImp
+from quark.evaluator.pyeval import PyEval
+from quark.core.struct.registerobject import RegisterObject
+from quark.core.struct.valuenode import Primitive
+
+# Set up minimal registers to simulate filled-new-array with primitives
+apk = AndroguardImp('sample.apk')
+pyeval = PyEval(apk)
+# filled-new-array registers: dest v13, src v11, v12
+pyeval.table_obj.insert(11, RegisterObject(value=Primitive('0', ""), value_type=""))
+pyeval.table_obj.insert(12, RegisterObject(value=Primitive('1', ""), value_type=""))
+
+instruction = ['filled-new-array', 'v13', 'v11', 'v12', '[I']
+print('Executing instruction:', instruction)
+pyeval.eval[instruction[0]](instruction)
+print('Completed without IndexError')
+print('ret_stack last:', pyeval.ret_stack[-1].resolve())
+print('ret_type:', pyeval.ret_type)
+print('arg types:', [n.value_type for n in pyeval.ret_stack[-1].argumentNodes])
diff --git a/reproduce.py b/reproduce.py
new file mode 100644
index 0000000..a73ec39
--- /dev/null
+++ b/reproduce.py
@@ -0,0 +1,16 @@
+import logging
+import sys
+from quark.core.apkinfo import APKInfo
+from quark.evaluator.pyeval import PyEval
+
+logging.basicConfig(level=logging.DEBUG)
+
+apk_path = sys.argv[1] if len(sys.argv) > 1 else 'sample.apk'
+apk = APKInfo(apk_path)
+pyeval = PyEval(apk)
+
+# Let's scan methods to find filled-new-array new-array() scenario
+# We'll iterate over dalvik instructions with androguard? APKInfo must expose something
+
+# APKInfo seems to provide functions. Explore.
+print('APK load done')
diff --git a/sample.apk b/sample.apk
new file mode 100644
index 0000000..d95bb7b
Binary files /dev/null and b/sample.apk differ

</diff>

🧑‍🚒 Response (31):

👀‍ Observation (31):



🧑‍🚒 Response (32):

👀‍ Observation (32):

diff --git a/quark/evaluator/pyeval.py b/quark/evaluator/pyeval.py
index 94655f5..f4b5888 100644
--- a/quark/evaluator/pyeval.py
+++ b/quark/evaluator/pyeval.py
@@ -194,13 +194,15 @@ class PyEval:
             # Set the missing value types based on the method's descriptor.
             argTypes = (
                 []
-                if opcode.startswith("invoke-static")
+                if opcode.startswith("invoke-static") or "->" not in targetMethod
                 else [targetMethod[: targetMethod.find("->")]]
             )
 
             rawArgTypes = targetMethod[
                 targetMethod.find("(") + 1 : targetMethod.find(")")
-            ].split(" ")
+            ]
+            # Split by space but ignore empty strings
+            rawArgTypes = [arg for arg in rawArgTypes.split(" ") if arg]
             
             for argType in rawArgTypes:
                 argTypes.append(argType)
@@ -209,6 +211,28 @@ class PyEval:
                     # because these types take up two registers.
                     argTypes.append(argType)
 
+            # If we still don't have enough argument types (e.g., filled-new-array
+            # creating an array of primitives), try to infer the element type from
+            # the return descriptor. For example, "new-array()[I" should set the
+            # argument type to "I" for each provided element.
+            if argIdxWithoutType:
+                max_idx = max(argIdxWithoutType)
+                if len(argTypes) <= max_idx:
+                    ret_type = ""
+                    if ")" in targetMethod:
+                        ret_type = targetMethod[targetMethod.index(")") + 1 :]
+
+                    inferred_type = ""
+                    if ret_type.startswith("[") and len(ret_type) > 1:
+                        # Use the component type for arrays.
+                        inferred_type = ret_type[1:]
+                    else:
+                        inferred_type = ret_type
+
+                    # Pad argTypes so that indexing is safe.
+                    while len(argTypes) <= max_idx:
+                        argTypes.append(inferred_type)
+
             for argIdx in argIdxWithoutType:
                 valueOfRegList[argIdx].value_type = argTypes[argIdx]
 
diff --git a/sample.apk b/sample.apk
new file mode 100644
index 0000000..d95bb7b
Binary files /dev/null and b/sample.apk differ

@haeter525 haeter525 marked this pull request as ready for review March 16, 2026 17:55
@haeter525

Copy link
Copy Markdown
Owner

@CodiumAI-Agent /review

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

PyEval._invoke silently skips IndexError in certain pipeline

1 participant