PaddlePaddle · lijialin03 · Sep 5, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/padiff/comparison/actions.py b/padiff/comparison/actions.py
@@ -40,7 +40,7 @@ def get_action_by_name(self, name):
             raise ValueError(f"Action '{name}' not registered. Available: {list(self.pool.keys())}")
         return self.pool[name]
 
-    def find_actions(self, report_0, node_0, report_1, node_1, name=None):
+    def find_actions(self, report_0=None, node_0=None, report_1=None, node_1=None, name=None):
         if name is not None:
             return self.get_action_by_name(name)
 
@@ -85,11 +85,16 @@ def __call__(self, file_list_0, file_list_1, cfg):
         for info_0, info_1 in zip(file_list_0, file_list_1):
             tensor_0 = load_numpy(info_0["path"])
             tensor_1 = load_numpy(info_1["path"])
+
+            if "transpose" in cfg and cfg["transpose"]:
+                tensor_1 = np.transpose(tensor_1)
+
             if tensor_0.size == 0 or tensor_1.size == 0:
                 if tensor_0.size != tensor_1.size:
                     raise RuntimeError("size of tensors is not equal")
                 logger.warning("Found nparray.size == 0, compare skipped!")
                 continue
+
             assert_tensor_equal(tensor_0, tensor_1, cfg)
 
 
@@ -113,17 +118,37 @@ def __call__(self, file_list_0, file_list_1, cfg):
         for info_0, info_1 in zip(file_list_0[:min_len], file_list_1[:min_len]):
             tensor_0 = load_numpy(info_0["path"])
             tensor_1 = load_numpy(info_1["path"])
+
+            if cfg["transpose"]:
+                tensor_1 = np.transpose(tensor_1)
+
             if tensor_0.size == 0 or tensor_1.size == 0:
                 logger.debug("Found empty tensor, compare skipped!")
                 continue
+
             if tensor_0.shape != tensor_1.shape:
                 logger.debug(f"Shape of tensors are not equal: {tensor_0.shape}!={tensor_1.shape}")
                 if tensor_0.size == tensor_1.size:
                     logger.debug(f"Try to reshape them to {tensor_0.shape}")
                     tensor_1 = np.reshape(tensor_1, tensor_0.shape)
                 else:
                     continue
+
             assert_tensor_equal(tensor_0, tensor_1, cfg)
             num_success += 1
+
         if min_len != 0 and num_success == 0:
             raise RuntimeError("All outputs for the layer have different shape!")
+
+
+@global_actions.register("ignore")
+class IgnoreAction(Action):
+    def match(self, report_0, node_0, report_1, node_1):
+        return True
+
+    @property
+    def priority(self):
+        return 100
+
+    def __call__(self, file_list_0, file_list_1, cfg):
+        pass
diff --git a/padiff/comparison/checker/base.py b/padiff/comparison/checker/base.py
@@ -16,37 +16,7 @@
 import torch
 from itertools import zip_longest
 import numpy as np
-from ...configs import global_yaml_loader
-from ...utils import load_numpy, struct_info_log, assert_tensor_equal, logger
-
-
-def process_each_param(process, node_lists, reports, compare_target, cfg):
-    for node_0, node_1 in zip_longest(node_lists[0], node_lists[1], fillvalue=None):
-        if node_0 is None or node_1 is None:
-            raise RuntimeError("Found model with difference number of sublayers. Check your model.")
-        for (param_name_0, param_path_0), (param_name_1, param_path_1) in zip(
-            node_0[compare_target].items(),
-            node_1[compare_target].items(),
-        ):
-            try:
-                settings = global_yaml_loader.get_weight_settings(
-                    (node_0["name"], node_1["name"]),
-                    (reports[0]["framework"], reports[1]["framework"]),
-                    (param_name_0, param_name_1),
-                )
-                settings.update(cfg)
-                param_0 = load_numpy(param_path_0)
-                param_1 = load_numpy(param_path_1)
-                process([node_0, node_1], [param_name_0, param_name_1], [param_0, param_1], settings)
-            except Exception as e:
-                err_str = f"{type(e).__name__ + ':  ' + str(e)}\n"
-                err_str += f"Error occured between:\n"
-                err_str += f"    (base_model):  {node_0['route'] + '.' + param_name_0}\n"
-                err_str += f"    (raw_model):   {node_1['route'] + '.' + param_name_1}\n\n"
-
-                err_str += struct_info_log(reports, (compare_target, compare_target), compare_target)
-
-                raise RuntimeError(err_str)
+from ...utils import assert_tensor_equal, logger
 
 
 def assert_shape(params, settings):

diff --git a/padiff/comparison/checker/params.py b/padiff/comparison/checker/params.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os.path as osp
+from itertools import zip_longest
 
 from ...utils import logger, build_file_name, get_all_valid_path, load_json, traversal_node
-from ...configs import parse_cfg
-from .base import assert_weight, assert_grad, process_each_param
+from ...configs import parse_cfg, global_yaml_loader
+from ..actions import get_action
 
 
 def check_params(report_path_0, report_path_1, cfg=None):
@@ -30,79 +32,83 @@ def check_params(report_path_0, report_path_1, cfg=None):
         node_lists = [traversal_node(rep["tree"], []) for rep in reports]
 
         logger.info(f"Checking params in {path_0} and {path_1}")
-        weight_rst = weight_rst and check_target(assert_weight, node_lists, reports, "weights", cfg)
-        grad_rst = grad_rst and check_target(assert_grad, node_lists, reports, "grads", cfg)
+        weight_rst = weight_rst and _check_params_impl(node_lists, reports, "weights", cfg)
+        grad_rst = grad_rst and _check_params_impl(node_lists, reports, "grads", cfg)
     return weight_rst and grad_rst
 
 
 def check_weights(report_path_0, report_path_1, cfg=None):
-    cfg = parse_cfg(cfg)
-    logger.info(f"Check weights cfg: {cfg}")
+    return _check_param_impl(report_path_0, report_path_1, "weights", cfg)
 
-    weight_rst = True
-    all_ranks_path_0, all_ranks_path_1 = get_all_valid_path(report_path_0, report_path_1)
-    for path_0, path_1 in zip(all_ranks_path_0, all_ranks_path_1):
-        reports = [load_json(path_0, "weights.json"), load_json(path_1, "weights.json")]
-        node_lists = [traversal_node(rep["tree"], []) for rep in reports]
 
-        logger.info(f"Checking weights in {path_0} and {path_1}")
-        weight_rst = weight_rst and check_target(assert_weight, node_lists, reports, "weights", cfg)
-    return weight_rst
+def check_grads(report_path_0, report_path_1, cfg=None):
+    return _check_param_impl(report_path_0, report_path_1, "grads", cfg)
 
 
-def check_grads(report_path_0, report_path_1, cfg=None):
+def _check_param_impl(report_path_0, report_path_1, compare_target, cfg=None):
     cfg = parse_cfg(cfg)
-    logger.info(f"Check grads cfg: {cfg}")
+    logger.info(f"Check {compare_target} cfg: {cfg}")
 
-    grad_rst = True
+    param_rst = True
     all_ranks_path_0, all_ranks_path_1 = get_all_valid_path(report_path_0, report_path_1)
     for path_0, path_1 in zip(all_ranks_path_0, all_ranks_path_1):
-        reports = [load_json(path_0, "grads.json"), load_json(path_1, "grads.json")]
+        reports = [load_json(path_0, f"{compare_target}.json"), load_json(path_1, f"{compare_target}.json")]
         node_lists = [traversal_node(rep["tree"], []) for rep in reports]
 
-        logger.info(f"Checking grads in {path_0} and {path_1}")
-        grad_rst = grad_rst and check_target(assert_grad, node_lists, reports, "grads", cfg)
-    return grad_rst
+        logger.info(f"Checking {compare_target} in {path_0} and {path_1}")
+        param_rst = param_rst and _check_params_impl(node_lists, reports, compare_target, cfg)
+    return param_rst
 
 
-def check_target(fn, node_lists, reports, compare_target, cfg):
-    flag = True
+def _check_params_impl(node_lists, reports, compare_target, cfg):
+    diff_found = False
     log_name = build_file_name(reports[0], compare_target + "_diff")
+    if osp.exists(osp.join(logger.log_path, log_name)):
+        with open(osp.join(logger.log_path, log_name), "w") as f:
+            pass
+
+    action_name = cfg.get("action_name", None)
+    act = get_action(name=action_name)
+
+    for node_0, node_1 in zip_longest(node_lists[0], node_lists[1], fillvalue=None):
+        if node_0 is None or node_1 is None:
+            raise RuntimeError("Found model with difference number of sublayers. Check your model.")
+
+        for (param_name_0, param_path_0), (param_name_1, param_path_1) in zip(
+            node_0[compare_target].items(),
+            node_1[compare_target].items(),
+        ):
+            try:
+                assert (
+                    param_path_0 is not None and param_path_1 is not None
+                ), f"{compare_target.capitalize()} for at least one of base or raw model is not found."
+
+                settings = global_yaml_loader.get_weight_settings(
+                    (node_0["name"], node_1["name"]),
+                    (reports[0]["framework"], reports[1]["framework"]),
+                    (param_name_0, param_name_1),
+                )
+                settings.update(cfg)
 
-    def checker(nodes, param_names, params, settings):
-        try:
-            fn(params, settings)
-        except Exception as e:
-            nonlocal flag
-            flag = False
-            info = (
-                "=" * 25 + "\n" + "{} value is different.\n"
-                "between base_model: {}\n"
-                "        raw_model:  {}\n\n"
-                "base_model param path:\n    {}\n"
-                "raw_model param path:\n    {}\n\n"
-                "{}\n\n".format(
-                    compare_target,
-                    nodes[0]["repr"],
-                    nodes[1]["repr"],
-                    nodes[0]["route"] + "." + param_names[0],
-                    nodes[1]["route"] + "." + param_names[1],
-                    type(e).__name__ + ":  " + str(e),
+                file_list_0 = [{"path": param_path_0}]
+                file_list_1 = [{"path": param_path_1}]
+
+                act(file_list_0, file_list_1, settings)
+
+            except Exception as e:
+                diff_found = True
+                info = (
+                    f"=========================\n"
+                    f"FAILED!!! {compare_target.capitalize()} Mismatch:\n"
+                    f"   Layer: {node_0['name']}(base) vs {node_1['name']}(raw)\n"
+                    f"   Route: {node_0['route']}.{param_name_0}(base) vs {node_1['route']}.{param_name_1}(raw)\n"
+                    f"{e}\n\n"
                 )
-            )
-            logger.log_file(log_name, "a", info)
+                logger.log_file(log_name, "a", info)
 
-    try:
-        process_each_param(checker, node_lists, reports, compare_target, cfg)
-    except Exception as e:
-        logger.error("=" * 10 + f"Err occurs when compare {compare_target}!!!" + "=" * 10 + "\n" + str(e))
+    if diff_found:
+        logger.error(f"The {compare_target} comparing failed !!! Please check report '{logger.log_path}/{log_name}'.")
         return False
 
-    if flag == False:
-        logger.info(
-            f"Diff found when compare {compare_target}, please check report \n        {logger.log_path}/{log_name}"
-        )
-    else:
-        logger.info(f"{compare_target} compared.")
-
-    return flag
+    logger.info(f"The {compare_target} comparing compared.")
+    return True
diff --git a/padiff/comparison/manual.py b/padiff/comparison/manual.py
@@ -23,53 +23,53 @@ def compare_dumps(dump_path1, dump_path2, cfg=None, diff_phase="both"):
     try:
         report_success = check_report(dump_path1, dump_path2, cfg=cfg, diff_phase=diff_phase)
         if report_success:
-            logger.info("✅ check_report: SUCCESS !!!")
+            logger.info("✅ check_report: SUCCESS !!!\n")
         else:
-            logger.warning("❌ check_report: FAILED !!!")
+            logger.error("❌ check_report: FAILED !!!\n")
     except Exception as e:
-        logger.error(f"❌ check_report: FAILED with error: {e}")
+        logger.error(f"❌ check_report: FAILED with error: {e}\n")
         report_success = False
 
     # check grads
     grads_success = None
     if os.path.exists(f"{dump_path1}/grads.json") and os.path.exists(f"{dump_path2}/grads.json"):
-        logger.info("\n🔍 Start comparison grads (check_grads)...")
+        logger.info("🔍 Start comparison grads (check_grads)...")
         try:
             grads_success = check_grads(dump_path1, dump_path2, cfg=cfg)
             if grads_success:
-                logger.info("✅ check_grads: SUCCESS !!!")
+                logger.info("✅ check_grads: SUCCESS !!!\n")
             else:
-                logger.warning("❌ check_grads: FAILED !!!")
+                logger.error("❌ check_grads: FAILED !!!\n")
         except Exception as e:
-            logger.error(f"❌ check_grads: FAILED with error: {e}")
+            logger.error(f"❌ check_grads: FAILED with error: {e}\n")
             grads_success = False
 
     # check weights
     weights_success = None
     if os.path.exists(f"{dump_path1}/weights.json") and os.path.exists(f"{dump_path2}/weights.json"):
-        logger.info("\n🔍 Start comparison weights (check_weights)...")
+        logger.info("🔍 Start comparison weights (check_weights)...")
         try:
             weights_success = check_weights(dump_path1, dump_path2, cfg=cfg)
             if weights_success:
-                logger.info("✅ check_weights: SUCCESS !!!")
+                logger.info("✅ check_weights: SUCCESS !!!\n")
             else:
-                logger.warning("❌ check_weights: FAILED !!!")
+                logger.error("❌ check_weights: FAILED !!!\n")
         except Exception as e:
-            logger.error(f"❌ check_weights: FAILED with error: {e}")
+            logger.error(f"❌ check_weights: FAILED with error: {e}\n")
             weights_success = False
 
     # check params
     params_success = None
     if os.path.exists(f"{dump_path1}/params.json") and os.path.exists(f"{dump_path2}/params.json"):
-        logger.info("\n🔍 Start comparison all parameters (check_params)...")
+        logger.info("🔍 Start comparison all parameters (check_params)...")
         try:
             params_success = check_params(dump_path1, dump_path2, cfg=cfg)
             if params_success:
-                logger.info("✅ check_params: SUCCESS !!!")
+                logger.info("✅ check_params: SUCCESS !!!\n")
             else:
-                logger.warning("❌ check_params: FAILED !!!")
+                logger.error("❌ check_params: FAILED !!!\n")
         except Exception as e:
-            logger.error(f"❌ check_params: FAILED with error: {e}")
+            logger.error(f"❌ check_params: FAILED with error: {e}\n")
             params_success = False
 
     # final result
@@ -80,7 +80,7 @@ def compare_dumps(dump_path1, dump_path2, cfg=None, diff_phase="both"):
     if success:
         logger.info(f"🎉 final comparison result: SUCCESS !!!")
     else:
-        logger.warning(f"❌ final comparison result: FAILED !!!")
+        logger.error(f"❌ final comparison result: FAILED !!!")
     return success
 
 

diff --git a/padiff/utils/log.py b/padiff/utils/log.py
@@ -107,13 +107,12 @@ def log_file(self, filename, mode, info):
 
 def print_report_info(nodes, reports, exc, stage, msg=None):
 
-    logger.error("FAILED !!!")
-    logger.error("DIFF DETAILS:")
-    logger.error(f"  '{stage}' Stage Mismatch")
-    logger.error(f"  Layer: {nodes[0]['name']} vs {nodes[1]['name']}")
-    logger.error(f"  Route: {nodes[0]['route']} vs {nodes[1]['route']} \n")
-
-    logger.error(f"Error({type(exc).__name__}): {str(exc)} \n")
+    logger.error(
+        f"FAILED !!! '{stage}' Stage Mismatch! \n"
+        f"  Layer: {nodes[0]['name']} vs {nodes[1]['name']} \n"
+        f"  Route: {nodes[0]['route']} vs {nodes[1]['route']} \n"
+        f"Error({type(exc).__name__}): {str(exc)} \n"
+    )
 
     if msg is not None:
         logger.warning("ADDITIONAL MESSAGE:")

diff --git a/padiff/utils/optim.py b/padiff/utils/optim.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+
+from ..tools import dump_grads
+
+
+def wrap_optimizer_step(optimizer):
+    if hasattr(optimizer, "_original_step"):
+        return
+
+    original_step = optimizer.step
+
+    @functools.wraps(original_step)
+    def wrapped_step():
+        original_step()
+
+        proxy_model = getattr(optimizer, "_padiff_proxy_model", None)
+        if proxy_model is not None:
+            dump_grads(proxy_model, proxy_model.dump_path)
+
+    optimizer.step = wrapped_step
+    optimizer._original_step = original_step