From 71ef540369d121e425cdbd0b487c39dda3adc57c Mon Sep 17 00:00:00 2001
From: lijialin03 <lijialin03@baidu.com>
Date: Fri, 5 Sep 2025 06:38:34 +0000
Subject: [PATCH 1/2] fix:fix bugs and optimize log

---
 padiff/abstracts/hooks/base.py  |  2 +-
 padiff/abstracts/hooks/guard.py | 19 +++++----
 padiff/abstracts/hooks/hook.py  | 76 +++++++++++++++++++--------------
 padiff/comparison/actions.py    |  2 +-
 padiff/comparison/manual.py     | 69 +++++++++++++++++-------------
 padiff/utils/log.py             | 20 ++++++++-
 requirements.txt                |  1 +
 7 files changed, 114 insertions(+), 75 deletions(-)

diff --git a/padiff/abstracts/hooks/base.py b/padiff/abstracts/hooks/base.py
index a352c17..1cdad72 100644
--- a/padiff/abstracts/hooks/base.py
+++ b/padiff/abstracts/hooks/base.py
@@ -143,7 +143,7 @@ def find_base_report_node(net_id, step_idx):
         raise RuntimeError(f"Cannot find net_id={net_id} in base report.")
 
     node_list = _context.base[net_id]
-    if step_idx < 0 or step_idx >= len(node_list):
+    if step_idx >= len(node_list):
         raise RuntimeError(f"Index out of range: net_id={net_id}, step_idx={step_idx}, list length={len(node_list)}")
 
     return _context.base[net_id][step_idx]
diff --git a/padiff/abstracts/hooks/guard.py b/padiff/abstracts/hooks/guard.py
index f97c230..91e3709 100644
--- a/padiff/abstracts/hooks/guard.py
+++ b/padiff/abstracts/hooks/guard.py
@@ -280,15 +280,16 @@ def PaDiffGuard(
 
             yield model
 
-    except _CallsComplete:
-        # dump
-        proxy_model.dump_report(proxy_model.dump_path)
-        proxy_model.dump_weights(proxy_model.dump_path)
-        if optimizer is None:
-            proxy_model.dump_grads(proxy_model.dump_path)
-
-        sys.exit(0)
-
     except SystemExit as e:
         logger.info("PaDiffGuard: SystemExit received, skipping dump_report.")
         raise
+
+    finally:
+        try:
+            proxy_model.dump_report(proxy_model.dump_path)
+            proxy_model.dump_weights(proxy_model.dump_path)
+            if optimizer is None:
+                proxy_model.dump_grads(proxy_model.dump_path)
+        except Exception as e:
+            logger.error(f"Failed to dump: {e}")
+        sys.exit(0)
diff --git a/padiff/abstracts/hooks/hook.py b/padiff/abstracts/hooks/hook.py
index 1b7af6c..a94516c 100644
--- a/padiff/abstracts/hooks/hook.py
+++ b/padiff/abstracts/hooks/hook.py
@@ -167,38 +167,7 @@ def info_hook(model, input, output, net_id):
     if single_step_state() == "forward" and net_id != -1:
         # two report_item with same id, the step_idx should be corresponded
         step_idx = len(list(filter(lambda x: x.type == "forward" and x.net_id == net_id, report.items))) - 1
-
-        try:
-            base_report_node = find_base_report_node(net_id, step_idx)
-        except (IndexError, RuntimeError) as e:
-            error_msg = str(e)
-            base_max_calls = "unknown"
-            if "list length=" in error_msg:
-                try:
-                    base_max_calls = int(error_msg.split("list length=")[1].split()[0])
-                except:
-                    pass
-            current_calls = step_idx + 1
-            route = getattr(model, "route", "unknown")
-            logger.error(
-                f"\n   ❌ Single-step alignment FAILED: Execution path mismatch!"
-                f"\n   📌 Layer '{route}' called {current_calls} times (current) vs {base_max_calls} times (base)."
-                f"\n   📌 Check the forward logic in both models around this layer."
-            )
-            sys.exit(1)
-
-        if base_report_node["name"] != _model.__class__.__name__:
-            warning_msg = (
-                f"\n   ⚠️ Single-step alignment FAILED: Layer with net_id={net_id} mismatch!"
-                f"\n   📌 Mismatch Layer: {base_report_node['name']}(base) vs {_model.__class__.__name__}(raw)"
-                f"\n   💡 Suggestion: Models have different architectures or initialization order. "
-                "Please check the model implementation or decrease 'align_depth' to reduce the alignment "
-                "granularity, or add layers that do not require alignment to the blacklist."
-            )
-            logger.warning(warning_msg)
-        else:
-            logger.debug(f"Single Step: {_model.__class__.__name__}(net_id={net_id})")
-
+        base_report_node = single_step_check(report, net_id, step_idx, _model.__class__.__name__, "forward")
         retval = map_structure(replace_forward_output(base_report_node), output)
         __in_info_hook__ = False
         return retval
@@ -297,3 +266,46 @@ def inner(input_):
             return input_
 
     return inner
+
+
+def single_step_check(report, net_id, step_idx, current_name, node_type, bwd_item=None):
+
+    try:
+        base_report_node = find_base_report_node(net_id, step_idx)
+        if base_report_node["name"] != current_name:
+            warning_msg = (
+                f"\n   ⚠️ Single-step alignment FAILED: {node_type} with net_id={net_id} mismatch!\n"
+                f"   📌 Mismatch {node_type.capitalize()}: {base_report_node['name']}(base) vs {current_name}(raw)\n"
+                f"   💡 Suggestion: Models have different architectures or initialization order. "
+                "Please check the model implementation or decrease 'align_depth' to reduce the alignment "
+                "granularity, or add layers that do not require alignment to the blacklist."
+            )
+            logger.warning(warning_msg)
+        else:
+            logger.debug(f"Single Step: {current_name}(net_id={net_id})")
+
+        return base_report_node
+
+    except (IndexError, RuntimeError) as e:
+        error_msg = str(e)
+        base_max_calls = "unknown"
+        if "list length=" in error_msg:
+            try:
+                base_max_calls = int(error_msg.split("list length=")[1].split()[0])
+            except:
+                pass
+        current_calls = step_idx + 1
+        route = "unknown"
+        if bwd_item and hasattr(bwd_item.net, "route"):
+            route = bwd_item.net.route
+        elif hasattr(report.stack._top().net, "route"):
+            route = report.stack._top().net.route
+
+        logger.error(
+            f"\n   ❌ Single-step alignment FAILED: Execution path mismatch in {node_type}!"
+            f"\n   📌 Layer '{route}' called {current_calls} times (current) vs {base_max_calls} times (base)."
+            f"\n   📌 Check the {node_type} logic in both models around this layer."
+        )
+        sys.exit(1)
+
+    return None
diff --git a/padiff/comparison/actions.py b/padiff/comparison/actions.py
index 87d0d5a..7afb037 100644
--- a/padiff/comparison/actions.py
+++ b/padiff/comparison/actions.py
@@ -119,7 +119,7 @@ def __call__(self, file_list_0, file_list_1, cfg):
             tensor_0 = load_numpy(info_0["path"])
             tensor_1 = load_numpy(info_1["path"])
 
-            if cfg["transpose"]:
+            if "transpose" in cfg and cfg["transpose"]:
                 tensor_1 = np.transpose(tensor_1)
 
             if tensor_0.size == 0 or tensor_1.size == 0:
diff --git a/padiff/comparison/manual.py b/padiff/comparison/manual.py
index de54a4d..96c6de7 100644
--- a/padiff/comparison/manual.py
+++ b/padiff/comparison/manual.py
@@ -33,44 +33,53 @@ def compare_dumps(dump_path1, dump_path2, cfg=None, diff_phase="both"):
     # check grads
     grads_success = None
     if os.path.exists(f"{dump_path1}/grads.json") and os.path.exists(f"{dump_path2}/grads.json"):
-        logger.info("🔍 Start comparison grads (check_grads)...")
-        try:
-            grads_success = check_grads(dump_path1, dump_path2, cfg=cfg)
-            if grads_success:
-                logger.info("✅ check_grads: SUCCESS !!!\n")
-            else:
-                logger.error("❌ check_grads: FAILED !!!\n")
-        except Exception as e:
-            logger.error(f"❌ check_grads: FAILED with error: {e}\n")
-            grads_success = False
+        if len(os.listdir(f"{dump_path1}/grads")) == 0 or len(os.listdir(f"{dump_path2}/grads")) == 0:
+            logger.warning(f" ⚠️ Grads dir is empty of {dump_path1} or/and {dump_path2}\n")
+        else:
+            logger.info("🔍 Start comparison grads (check_grads)...")
+            try:
+                grads_success = check_grads(dump_path1, dump_path2, cfg=cfg)
+                if grads_success:
+                    logger.info("✅ check_grads: SUCCESS !!!\n")
+                else:
+                    logger.error("❌ check_grads: FAILED !!!\n")
+            except Exception as e:
+                logger.error(f"❌ check_grads: FAILED with error: {e}\n")
+                grads_success = False
 
     # check weights
     weights_success = None
     if os.path.exists(f"{dump_path1}/weights.json") and os.path.exists(f"{dump_path2}/weights.json"):
-        logger.info("🔍 Start comparison weights (check_weights)...")
-        try:
-            weights_success = check_weights(dump_path1, dump_path2, cfg=cfg)
-            if weights_success:
-                logger.info("✅ check_weights: SUCCESS !!!\n")
-            else:
-                logger.error("❌ check_weights: FAILED !!!\n")
-        except Exception as e:
-            logger.error(f"❌ check_weights: FAILED with error: {e}\n")
-            weights_success = False
+        if len(os.listdir(f"{dump_path1}/weights")) == 0 or len(os.listdir(f"{dump_path2}/weights")) == 0:
+            logger.warning(f" ⚠️ Weights dir is empty of {dump_path1} or/and {dump_path2}\n")
+        else:
+            logger.info("🔍 Start comparison weights (check_weights)...")
+            try:
+                weights_success = check_weights(dump_path1, dump_path2, cfg=cfg)
+                if weights_success:
+                    logger.info("✅ check_weights: SUCCESS !!!\n")
+                else:
+                    logger.error("❌ check_weights: FAILED !!!\n")
+            except Exception as e:
+                logger.error(f"❌ check_weights: FAILED with error: {e}\n")
+                weights_success = False
 
     # check params
     params_success = None
     if os.path.exists(f"{dump_path1}/params.json") and os.path.exists(f"{dump_path2}/params.json"):
-        logger.info("🔍 Start comparison all parameters (check_params)...")
-        try:
-            params_success = check_params(dump_path1, dump_path2, cfg=cfg)
-            if params_success:
-                logger.info("✅ check_params: SUCCESS !!!\n")
-            else:
-                logger.error("❌ check_params: FAILED !!!\n")
-        except Exception as e:
-            logger.error(f"❌ check_params: FAILED with error: {e}\n")
-            params_success = False
+        if len(os.listdir(f"{dump_path1}/params")) == 0 or len(os.listdir(f"{dump_path2}/params")) == 0:
+            logger.warning(f" ⚠️ Params dir is empty of {dump_path1} or/and {dump_path2}\n")
+        else:
+            logger.info("🔍 Start comparison all parameters (check_params)...")
+            try:
+                params_success = check_params(dump_path1, dump_path2, cfg=cfg)
+                if params_success:
+                    logger.info("✅ check_params: SUCCESS !!!\n")
+                else:
+                    logger.error("❌ check_params: FAILED !!!\n")
+            except Exception as e:
+                logger.error(f"❌ check_params: FAILED with error: {e}\n")
+                params_success = False
 
     # final result
     success = report_success
diff --git a/padiff/utils/log.py b/padiff/utils/log.py
index 2ca457c..29b7807 100644
--- a/padiff/utils/log.py
+++ b/padiff/utils/log.py
@@ -15,6 +15,15 @@
 import os
 import shutil
 import logging
+import colorlog
+
+
+log_config = {
+    "DEBUG": {"level": 10, "color": "cyan"},
+    "INFO": {"level": 20, "color": "green"},
+    "WARNING": {"level": 30, "color": "yellow"},
+    "ERROR": {"level": 40, "color": "red"},
+}
 
 
 class Logger:
@@ -23,6 +32,14 @@ def __init__(self):
         self._is_initialized = False
         self.log_path = "padiff_log"
 
+        for key, conf in log_config.items():
+            logging.addLevelName(conf["level"], key)
+
+        self.colored_formatter = colorlog.ColoredFormatter(
+            "%(log_color)s[AutoDiff] [%(levelname)s]%(reset)s %(message)s",
+            log_colors={key: conf["color"] for key, conf in log_config.items()},
+        )
+
     def setup(self, log_parent_dir):
         if self._is_initialized:
             return
@@ -50,8 +67,7 @@ def setup(self, log_parent_dir):
         file_handler.setFormatter(file_formatter)
 
         console_handler = logging.StreamHandler()
-        console_formatter = logging.Formatter("[AutoDiff] [%(levelname)s] %(message)s")
-        console_handler.setFormatter(console_formatter)
+        console_handler.setFormatter(self.colored_formatter)
 
         self._logger.addHandler(file_handler)
         self._logger.addHandler(console_handler)
diff --git a/requirements.txt b/requirements.txt
index 3716c53..c14b1a6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ graphviz
 numpy
 coverage
 pyyaml
+colorlog

From 63e23a6016e17e055440420173c64180ed650f5e Mon Sep 17 00:00:00 2001
From: lijialin03 <lijialin03@baidu.com>
Date: Mon, 8 Sep 2025 09:01:35 +0000
Subject: [PATCH 2/2] fix: fix tests bugs

---
 Makefile                                      |  10 +-
 README.md                                     |  12 +-
 padiff/abstracts/hooks/guard.py               |   4 +-
 padiff/tools/load.py                          |  50 +++--
 padiff/utils/log.py                           |  15 +-
 tests/__init__.py                             |  13 ++
 tests/padiff_unittests.py                     |  78 +++++--
 tests/test_api_to_Layer.py                    |   4 -
 ...t_offline_compare.py => test_auto_diff.py} |   0
 tests/test_check_weight_grad.py               |  11 +-
 tests/test_cli_end_to_end.py                  | 204 ++++++++++++++++++
 tests/test_many_usages.py                     |  64 ++++--
 12 files changed, 387 insertions(+), 78 deletions(-)
 create mode 100644 tests/__init__.py
 rename tests/{test_offline_compare.py => test_auto_diff.py} (100%)
 create mode 100644 tests/test_cli_end_to_end.py

diff --git a/Makefile b/Makefile
index feca153..f8c8e89 100644
--- a/Makefile
+++ b/Makefile
@@ -34,11 +34,17 @@ lint-all:
 # # # # # # # # # # # # # # # Test Block # # # # # # # # # # # # # # # 
 
 .PHONY: test
-test: unit-test
+test: unit-test unit-test-special coverage-report
 
 unit-test:
 	@echo "Running unit tests with coverage..."
-	PYTHONPATH=. coverage run --source=. tests/padiff_unittests.py
+	PADIFF_SILENT=1 PYTHONPATH="$(shell pwd):$(PYTHONPATH)" coverage run --source=. tests/padiff_unittests.py
+
+unit-test-special:
+	@echo "Running test_api_to_Layer.py with PADIFF_API_CHECK=ON"
+	PADIFF_SILENT=1 PADIFF_API_CHECK=ON PYTHONPATH="$(shell pwd):$(PYTHONPATH)" coverage run --source=. --append tests/test_api_to_Layer.py
+
+coverage-report:
 	@echo ""
 	@echo "Coverage Report:"
 	coverage report -m
diff --git a/README.md b/README.md
index 700a1a8..435a0b9 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 **P**addle  **A**utomatically  **Diff**  precision toolkits.
 
 
-## 最近更新（latest 9.2）
+## 最近更新（latest 9.8）
 
 ### 使用单行命令对齐（支持前反向对齐）
 
@@ -54,9 +54,15 @@ compare_mode: "mean"
 action_name: "equal"
 ```
 
-### 开启 debug 模式（获取更多 log 信息）
+### log 设置
 
-设置环境变量 `export PADIFF_DEBUG=1`，或使用命令运行 `PADIFF_DEBUG=1 python -m padiff.cli ...`
+#### 开启 debug 模式
+
+为了获取更多 log 信息，可以设置环境变量 `export PADIFF_LOG_LEVEL=DEBUG`，或使用命令运行 `PADIFF_LOG_LEVEL=DEBUG python -m padiff.cli ...`
+
+#### 开启静默模式
+
+或者为了保持控制台信息简洁，可以设置环境变量 `PADIFF_SILENT=1`，以便仅保存 log 文件，不在控制台输出 log 信息
 
 
 ## 简介
diff --git a/padiff/abstracts/hooks/guard.py b/padiff/abstracts/hooks/guard.py
index 91e3709..2711494 100644
--- a/padiff/abstracts/hooks/guard.py
+++ b/padiff/abstracts/hooks/guard.py
@@ -235,12 +235,12 @@ def PaDiffGuard(
         # set max calls
         calls_context.set_limit(max_calls)
 
-        logger.info(f"PaDiffGuard: creating proxy model.")
         proxy_model = create_model(model, name=name, reset_dir=reset_flag)
         model._padiff_proxy = proxy_model
+        logger.debug(f"PaDiffGuard: creating proxy model.")
 
         if optimizer is not None and not hasattr(optimizer, "_padiff_proxy_model"):
-            logger.info(f"PaDiffGuard: wrapping optimizer.step().")
+            logger.debug(f"PaDiffGuard: wrapping optimizer.step().")
             optimizer._padiff_proxy_model = proxy_model
             wrap_optimizer_step(optimizer)
 
diff --git a/padiff/tools/load.py b/padiff/tools/load.py
index c60b3c5..54ba6de 100644
--- a/padiff/tools/load.py
+++ b/padiff/tools/load.py
@@ -47,6 +47,8 @@ def load_first_input_from_dump(report_path, tar_framework):
     args = []
     kwargs = {}
 
+    NATIVE_TYPES = (int, float, str, bool, type(None))
+
     for item in meta_info:
         file_path = os.path.join(input_dir, item["path"])
         key = item.get("key")
@@ -62,31 +64,45 @@ def load_first_input_from_dump(report_path, tar_framework):
                     tensor.requires_grad_(True)
                 else:
                     raise ValueError(f"Unsupported framework: {tar_framework}")
+                value = tensor
 
-                if key is None:
-                    args.append(tensor)
-                else:
-                    kwargs[key] = tensor
             else:
+                with open(file_path, "r") as f:
+                    full_item = json.load(f)
+
                 if item["type"] == "dict":
-                    reconstructed_dict = {}
-                    for k, v in item["data"].items():
-                        reconstructed_dict[k] = v
-                    value = reconstructed_dict
-                elif item["type"] in ["list", "tuple"]:
-                    reconstructed_list = [v for v in item["data"]]
-                    value = tuple(reconstructed_list) if item["type"] == "tuple" else reconstructed_list
+                    value = {k: v for k, v in full_item["data"].items()}
+                elif item["type"] == "list":
+                    value = [v for v in full_item["data"]]
+                elif item["type"] == "tuple":
+                    value = tuple(v for v in full_item["data"])
+                elif item["type"] == "int":
+                    value = int(full_item["data"])
+                elif item["type"] == "float":
+                    value = float(full_item["data"])
+                elif item["type"] == "bool":
+                    value = full_item["data"].lower() == "true"
+                elif item["type"] == "NoneType":
+                    value = None
+                elif item["type"] == "str":
+                    value = full_item["data"]
                 else:
-                    value = item["data"]
+                    logger.warning(f"Skipping unsupported input type '{item['type']}' for input(key={key}).")
+                    continue
+
+            if key is None:
+                args.append(value)
+            else:
+                kwargs[key] = value
 
-                if key is None:
-                    args.append(value)
-                else:
-                    kwargs[key] = value
         except Exception as e:
-            logger.error(f"Error loading metadata file {file_path}: {e}")
+            logger.error(f"Error loading input(key={key}) in {file_path}: {e}")
             raise
 
+    if not args and not kwargs:
+        logger.warning("No valid inputs were loaded from the dump.")
+        return None
+
     return (args, kwargs)
 
 
diff --git a/padiff/utils/log.py b/padiff/utils/log.py
index 29b7807..d47226c 100644
--- a/padiff/utils/log.py
+++ b/padiff/utils/log.py
@@ -46,13 +46,11 @@ def setup(self, log_parent_dir):
 
         self._logger = logging.getLogger("padiff")
 
-        debug_flag = os.getenv("PADIFF_DEBUG")
+        silent_flag = os.getenv("PADIFF_SILENT")
         log_level_flag = os.getenv("PADIFF_LOG_LEVEL")
 
         if log_level_flag and log_level_flag.upper() in ("DEBUG", "INFO", "WARNING", "ERROR"):
             log_level = getattr(logging, log_level_flag.upper())
-        elif debug_flag and debug_flag.strip().lower() in ("1", "true", "on"):
-            log_level = logging.DEBUG
         else:
             log_level = logging.INFO
         self._logger.setLevel(log_level)
@@ -65,14 +63,15 @@ def setup(self, log_parent_dir):
         file_handler = logging.FileHandler(log_file_path, encoding="utf-8")
         file_formatter = logging.Formatter("[AutoDiff] [%(levelname)s] %(message)s")
         file_handler.setFormatter(file_formatter)
+        self._logger.addHandler(file_handler)
 
-        console_handler = logging.StreamHandler()
-        console_handler.setFormatter(self.colored_formatter)
+        if not silent_flag or silent_flag.strip().lower() not in ("1", "true", "on"):
+            console_handler = logging.StreamHandler()
+            console_handler.setFormatter(self.colored_formatter)
+            self._logger.addHandler(console_handler)
 
-        self._logger.addHandler(file_handler)
-        self._logger.addHandler(console_handler)
+            self._logger.info(f"Logging initialized. Log file: {log_file_path}")
 
-        self._logger.info(f"Logging initialized. Log file: {log_file_path}")
         self._is_initialized = True
         self.log_path = log_parent_dir
 
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..a9cc79c
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/padiff_unittests.py b/tests/padiff_unittests.py
index 9f88d8c..955861e 100644
--- a/tests/padiff_unittests.py
+++ b/tests/padiff_unittests.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
+import unittest
 import os
 import sys
-import subprocess
-
-
-for root, dirs, files in os.walk("./"):
-    for fname in files:
-        if fname.endswith(".py") and fname.startswith("test_"):
-            fpath = root + "/" + fname
-            (status, output) = subprocess.getstatusoutput("python " + fpath)
-            if status != 0:
-                err_info = f"*** ===================== {fpath} ========================= ***\n"
-                err_info += f"{output}\n"
-                print(f"Failed on unittest {fname} with error message \n {err_info}.", end="\n", flush=True)
-                sys.exit(1)
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+EXCLUDE_FILES = {"test_api_to_Layer"}
+
+
+def discover_and_run_filtered_tests():
+    loader = unittest.TestLoader()
+    all_tests = loader.discover(start_dir=".", pattern="test_*.py")
+
+    filtered_suite = unittest.TestSuite()
+
+    def add_filtered_tests(test):
+        if isinstance(test, unittest.TestCase):
+            module = sys.modules.get(test.__class__.__module__)
+            if module and hasattr(module, "__file__"):
+                filename = os.path.basename(module.__file__)
+                filename_without_ext = os.path.splitext(filename)[0]
+                if filename_without_ext not in EXCLUDE_FILES:
+                    filtered_suite.addTest(test)
+                else:
+                    print(f"Excluding test from file: {filename}")
             else:
-                print(f"Succeed on unittest {fname}.", end="\n", flush=True)
-            os.system("rm -rf ./tests/padiff_dump ./tests/padiff_log")
+                filtered_suite.addTest(test)
+        elif isinstance(test, unittest.TestSuite):
+            for subtest in test:
+                add_filtered_tests(subtest)
+
+    for suite in all_tests:
+        add_filtered_tests(suite)
+
+    if filtered_suite.countTestCases() == 0:
+        print("No tests to run after filtering.")
+        return False
+
+    runner = unittest.TextTestRunner(verbosity=0)
+    result = runner.run(filtered_suite)
+
+    os.system("rm -rf ./tests/padiff_dump ./tests/padiff_log")
+
+    return result.wasSuccessful()
+
+
+def main():
+    try:
+        success = discover_and_run_filtered_tests()
+        if not success:
+            sys.exit(1)
+    except Exception as e:
+        print(f"An error occurred during test execution: {type(e).__name__}: {str(e)}")
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_api_to_Layer.py b/tests/test_api_to_Layer.py
index 2e35735..665769a 100644
--- a/tests/test_api_to_Layer.py
+++ b/tests/test_api_to_Layer.py
@@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-
-os.environ["PADIFF_API_CHECK"] = "ON"
 
 import unittest
 
@@ -66,4 +63,3 @@ def test_api_to_Layer(self):
 
 if __name__ == "__main__":
     unittest.main()
-    os.environ["PADIFF_API_CHECK"] = "OFF"
diff --git a/tests/test_offline_compare.py b/tests/test_auto_diff.py
similarity index 100%
rename from tests/test_offline_compare.py
rename to tests/test_auto_diff.py
diff --git a/tests/test_check_weight_grad.py b/tests/test_check_weight_grad.py
index 5226d95..205784f 100644
--- a/tests/test_check_weight_grad.py
+++ b/tests/test_check_weight_grad.py
@@ -20,9 +20,9 @@
 
 
 from padiff import *
-from padiff.checker import check_grads, check_weights
-from padiff.dump_tools import dump_grads, dump_weights
-from padiff.interfaces.diff_utils import default_loss
+from padiff import check_grads, check_weights
+from padiff import dump_grads, dump_weights
+from padiff.comparison.auto import default_loss
 
 
 class SimpleLayer(paddle.nn.Layer):
@@ -63,18 +63,17 @@ def test_weight_grad_check_0(self):
         module = create_model(SimpleModule())
 
         inp = paddle.rand((100, 100)).numpy().astype("float32")
+        inp_modified = inp * 2
 
         assign_weight(layer, module)
         out = layer(paddle.to_tensor(inp))
         loss = default_loss(out, "paddle")
         layer.backward(loss)
 
-        out = module(torch.as_tensor(inp))
+        out = module(torch.as_tensor(inp_modified))
         loss = default_loss(out, "torch")
         module.backward(loss)
 
-        module.model.zero_grad()
-
         dump_weights(layer, layer.dump_path)
         dump_weights(module, module.dump_path)
 
diff --git a/tests/test_cli_end_to_end.py b/tests/test_cli_end_to_end.py
new file mode 100644
index 0000000..5536191
--- /dev/null
+++ b/tests/test_cli_end_to_end.py
@@ -0,0 +1,204 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import unittest
+import os
+import shutil
+import tempfile
+import sys
+from unittest.mock import patch
+from io import StringIO
+import numpy as np
+from padiff.cli import main as padiff_cli_main
+
+
+PADDLE_SCRIPT_TEMPLATE = """
+import paddle
+import numpy as np
+
+class SimpleLayer(paddle.nn.Layer):
+    def __init__(self):
+        super(SimpleLayer, self).__init__()
+        self.linear1 = paddle.nn.Linear({input_dim}, {hidden_dim})
+        self.linear2 = paddle.nn.Linear({hidden_dim}, {output_dim})
+        self.act = paddle.nn.ReLU()
+
+    def forward(self, x):
+        residual = x
+        x = self.linear1(x)
+        x = self.act(x)
+        x = x + residual
+        x = self.linear2(x)
+        return x
+
+def main():
+    inp = np.load("{input_file}")
+    inp = paddle.to_tensor(inp)
+
+    model = SimpleLayer()
+    optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=0.1)
+
+    out = model(inp)
+    loss = paddle.mean(out)
+    loss.backward()
+    optimizer.step()
+    optimizer.clear_grad()
+
+if __name__ == "__main__":
+    import os
+    os.makedirs("{dump_path}", exist_ok=True)
+    main()
+"""
+
+TORCH_SCRIPT_TEMPLATE = """
+import torch
+import numpy as np
+
+class SimpleModule(torch.nn.Module):
+    def __init__(self):
+        super(SimpleModule, self).__init__()
+        self.linear1 = torch.nn.Linear({input_dim}, {hidden_dim})
+        self.linear2 = torch.nn.Linear({hidden_dim}, {output_dim})
+        self.act = torch.nn.ReLU()
+
+    def forward(self, x):
+        residual = x
+        x = self.linear1(x)
+        x = self.act(x)
+        x = x + residual
+        x = self.linear2(x)
+        return x
+
+def main():
+    inp = np.load("{input_file}")
+    inp = torch.as_tensor(inp)
+
+    model = SimpleModule()
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
+
+    out = model(inp)
+    loss = torch.mean(out)
+    loss.backward()
+    optimizer.step()
+    optimizer.zero_grad()
+
+if __name__ == "__main__":
+    import os
+    os.makedirs("{dump_path}", exist_ok=True)
+    main()
+"""
+
+
+class TestCliEndToEnd(unittest.TestCase):
+    def setUp(self):
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+        self.paddle_script_path = os.path.join(self.test_dir, "paddle_script.py")
+        self.torch_script_path = os.path.join(self.test_dir, "torch_script.py")
+        self.input_file = os.path.join(self.test_dir, "input.npy")
+        self.log_dir = os.path.join(self.test_dir, "padiff_logs")
+
+        os.makedirs(self.log_dir, exist_ok=True)
+
+        inp = np.random.rand(100, 100).astype("float32")
+        np.save(self.input_file, inp)
+        assert os.path.exists(self.input_file), f"Input file not created: {self.input_file}"
+
+        torch_script = TORCH_SCRIPT_TEMPLATE.format(
+            input_dim=100,
+            hidden_dim=100,
+            output_dim=10,
+            input_file=self.input_file,
+            model_name="model_torch",
+            dump_path=os.path.join(self.log_dir, "torch"),
+        )
+
+        paddle_script = PADDLE_SCRIPT_TEMPLATE.format(
+            input_dim=100,
+            hidden_dim=100,
+            output_dim=10,
+            input_file=self.input_file,
+            model_name="model_paddle",
+            dump_path=os.path.join(self.log_dir, "paddle"),
+        )
+
+        with open(self.torch_script_path, "w") as f:
+            f.write(torch_script)
+        with open(self.paddle_script_path, "w") as f:
+            f.write(paddle_script)
+
+        assert os.path.exists(self.torch_script_path), f"torch_script.py not created: {self.torch_script_path}"
+        assert os.path.exists(self.paddle_script_path), f"paddle_script.py not created: {self.paddle_script_path}"
+        assert os.path.getsize(self.torch_script_path) > 0, f"torch_script.py is empty: {self.torch_script_path}"
+        assert os.path.getsize(self.paddle_script_path) > 0, f"paddle_script.py is empty: {self.paddle_script_path}"
+
+    def _run_cli_test(self, extra_args):
+        test_args = [
+            "padiff",
+            "--pt_cmd",
+            f"python {self.torch_script_path}",
+            "--pd_cmd",
+            f"python {self.paddle_script_path}",
+            "--pt_model_name",
+            "model",
+            "--pd_model_name",
+            "model",
+            "--log_dir",
+            self.log_dir,
+        ]
+        test_args.extend(extra_args)
+
+        with patch.object(sys, "argv", test_args):
+            with patch("sys.stdout", new=StringIO()) as fake_out:
+                with patch("sys.stderr", new=StringIO()) as fake_err:
+                    try:
+                        padiff_cli_main()
+                    except SystemExit as e:
+                        if e.code != 0:
+                            self.fail(f"CLI failed with exit code {e.code}: {fake_err.getvalue()}")
+                    except Exception as e:
+                        self.fail(f"CLI raised an unexpected exception: {type(e).__name__}: {str(e)}")
+
+    def test_end_to_end_basic(self):
+        self._run_cli_test([])
+
+    def test_end_to_end_with_optimizer(self):
+        self._run_cli_test(["--pt_optim_name", "optimizer", "--pd_optim_name", "optimizer"])
+
+    def test_end_to_end_with_align_depth(self):
+        self._run_cli_test(["--align_depth", "0"])
+
+    def test_end_to_end_with_single_step(self):
+        self._run_cli_test(["--single_step_mode", "forward"])
+        self._run_cli_test(["--single_step_mode", "backward"])
+        self._run_cli_test(["--single_step_mode", "both"])
+
+    def test_end_to_end_with_black_list(self):
+        self._run_cli_test(["--black_list", "Linear"])
+
+    def test_end_to_end_with_different_atol_rtol(self):
+        self._run_cli_test(["--atol", "1e-3", "--rtol", "1e-4"])
+
+    def test_end_to_end_with_different_compare_mode(self):
+        self._run_cli_test(["--compare_mode", "strict"])
+        self._run_cli_test(["--compare_mode", "abs_mean"])
+
+    def test_end_to_end_with_different_action(self):
+        self._run_cli_test(["--action_name", "loose_equal"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_many_usages.py b/tests/test_many_usages.py
index 7eac23a..be799c2 100644
--- a/tests/test_many_usages.py
+++ b/tests/test_many_usages.py
@@ -16,11 +16,14 @@
 import unittest
 import paddle
 import os
+import json
 from paddle.distributed.fleet.utils import recompute
 
 default_path = get_dump_root_path()
 
 train_step = 10
+dump_freq = 2
+rank = paddle.distributed.get_rank() if paddle.distributed.is_initialized() else 0
 
 
 class SimpleLayer(paddle.nn.Layer):
@@ -54,13 +57,34 @@ def __len__(self):
         return self.num_samples
 
 
+def check_file_integrity(dump_path, step, rank):
+    step_dump_path = os.path.join(dump_path, f"step_{step}", f"rank_{rank}")
+
+    report_json_path = os.path.join(step_dump_path, "report.json")
+    params_json_path = os.path.join(step_dump_path, "params.json")
+
+    assert os.path.exists(report_json_path), f"report.json not found: {report_json_path}"
+    assert os.path.exists(params_json_path), f"params.json not found: {params_json_path}"
+
+    try:
+        with open(report_json_path, "r") as f:
+            report_data = json.load(f)
+        with open(params_json_path, "r") as f:
+            params_data = json.load(f)
+    except Exception as e:
+        raise RuntimeError(f"Failed to load JSON files: {e}")
+
+    assert "tree" in report_data, "Invalid report.json format"
+    assert "tree" in params_data, "Invalid params.json format"
+
+
 class Test0SingleModelRun(unittest.TestCase):
     # single model run
     def test_single_model_run(self):
         print("Test for single model run.")
         layer = SimpleLayer()
         set_dump_root_path(os.path.join(default_path, "single_model_run"))
-        layer = create_model(layer, dump_freq=2)
+        layer = create_model(layer, dump_freq=dump_freq)
         inp = paddle.rand((100, 100)).numpy().astype("float32")
         opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=layer.model.parameters())
 
@@ -72,8 +96,8 @@ def test_single_model_run(self):
             opt.clear_grad()
             layer.try_dump()
 
-        assert check_report(layer.dump_path, layer.dump_path)
-        assert check_params(layer.dump_path, layer.dump_path)
+            if i % dump_freq == 0:
+                check_file_integrity(layer.dump_path, i, rank)
 
 
 class Test1DataloaderRun(unittest.TestCase):
@@ -83,12 +107,12 @@ def test_dataloader_run(self):
         print("Test for real dataloader.")
         layer = SimpleLayer()
         set_dump_root_path(os.path.join(default_path, "real_dataLoader"))
-        layer = create_model(layer, dump_freq=2)
+        layer = create_model(layer, dump_freq=dump_freq)
         opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=layer.model.parameters())
 
         dataset = RandomDataset(train_step)
         loader = paddle.io.DataLoader(dataset)
-        for inp in loader():
+        for step, inp in enumerate(loader()):
             out = layer(paddle.to_tensor(inp))
             loss = out.mean()
             layer.backward(loss)
@@ -96,8 +120,8 @@ def test_dataloader_run(self):
             opt.clear_grad()
             layer.try_dump()
 
-        assert check_report(layer.dump_path, layer.dump_path)
-        assert check_params(layer.dump_path, layer.dump_path)
+            if step % dump_freq == 0:
+                check_file_integrity(layer.dump_path, step, rank)
 
 
 class Test2WhiteLayerRun(unittest.TestCase):
@@ -108,13 +132,13 @@ def test_white_layer_class_run(self):
         print("Test for single model run.")
         layer = SimpleLayer()
         set_dump_root_path(os.path.join(default_path, "white_layer_class"))
-        layer = create_model(layer, dump_freq=2)
+        layer = create_model(layer, dump_freq=dump_freq)
         layer.update_white_list_with_class(paddle.nn.Linear, mode="all")
         opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=layer.model.parameters())
 
         dataset = RandomDataset(train_step)
         loader = paddle.io.DataLoader(dataset)
-        for inp in loader():
+        for step, inp in enumerate(loader()):
             out = layer(paddle.to_tensor(inp))
             loss = out.mean()
             layer.backward(loss)
@@ -122,8 +146,8 @@ def test_white_layer_class_run(self):
             opt.clear_grad()
             layer.try_dump()
 
-        assert check_report(layer.dump_path, layer.dump_path)
-        assert check_params(layer.dump_path, layer.dump_path)
+            if step % dump_freq == 0:
+                check_file_integrity(layer.dump_path, step, rank)
 
 
 class Test3GradAccumulationRun(unittest.TestCase):
@@ -135,7 +159,7 @@ def test_grad_accumulation_run(self):
         print("Test for gradient accumulation.")
         layer = SimpleLayer()
         set_dump_root_path(os.path.join(default_path, "grad_accumulation"))
-        layer = create_model(layer, dump_freq=2)
+        layer = create_model(layer, dump_freq=dump_freq)
         layer.update_white_list_with_class(paddle.nn.Linear, mode="all")
         opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=layer.model.parameters())
 
@@ -150,8 +174,8 @@ def test_grad_accumulation_run(self):
                 opt.clear_grad()
             layer.try_dump()
 
-        assert check_report(layer.dump_path, layer.dump_path)
-        assert check_params(layer.dump_path, layer.dump_path)
+            if step % dump_freq == 0:
+                check_file_integrity(layer.dump_path, step, rank)
 
 
 class Test4RecomputeRun(unittest.TestCase):
@@ -164,7 +188,7 @@ def test_recompute_run(self):
         print("Test for recompute.")
         layer = SimpleLayer()
         set_dump_root_path(os.path.join(default_path, "recompute"))
-        layer = create_model(layer, dump_freq=2)
+        layer = create_model(layer, dump_freq=dump_freq)
         layer.update_white_list_with_class(paddle.nn.Linear, mode="all")
         opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=layer.model.parameters())
 
@@ -181,8 +205,8 @@ def test_recompute_run(self):
                 opt.clear_grad()
             layer.try_dump()
 
-        assert check_report(layer.dump_path, layer.dump_path)
-        assert check_params(layer.dump_path, layer.dump_path)
+            if step % dump_freq == 0:
+                check_file_integrity(layer.dump_path, step, rank)
 
 
 class Test5AMPRun(unittest.TestCase):
@@ -197,7 +221,7 @@ def test_amp_run(self):
         layer = SimpleLayer()
         layer = paddle.amp.decorate(layer, level="O2")
         set_dump_root_path(os.path.join(default_path, "amp"))
-        layer = create_model(layer, dump_freq=2)
+        layer = create_model(layer, dump_freq=dump_freq)
         layer.update_white_list_with_class(paddle.nn.Linear, mode="all")
         opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=layer.model.parameters())
 
@@ -215,8 +239,8 @@ def test_amp_run(self):
                 opt.clear_grad()
             layer.try_dump()
 
-        assert check_report(layer.dump_path, layer.dump_path)
-        assert check_params(layer.dump_path, layer.dump_path)
+            if step % dump_freq == 0:
+                check_file_integrity(layer.dump_path, step, rank)
 
 
 if __name__ == "__main__":