From 21bc0a27cada31d1f08730104622a6209094b077 Mon Sep 17 00:00:00 2001
From: lijialin03 <lijialin03@baidu.com>
Date: Wed, 10 Sep 2025 07:05:54 +0000
Subject: [PATCH 1/2] fix:fix bug of single step mode and docs

---
 padiff/abstracts/hooks/hook.py | 17 +++++++------
 padiff/cli.py                  | 45 ++++++++++++++++++++--------------
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/padiff/abstracts/hooks/hook.py b/padiff/abstracts/hooks/hook.py
index 1126dbd..58058e6 100644
--- a/padiff/abstracts/hooks/hook.py
+++ b/padiff/abstracts/hooks/hook.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import contextlib
 from functools import partial
 
@@ -175,7 +174,7 @@ def info_hook(model, input, output, net_id):
         t.register_hook(partial(tensor_hook, bwd_item=bwd_item, nth_tensor=i, net_id=net_id))
 
     # if under single step forward guard
-    if single_step_state() == "forward" and net_id != -1:
+    if single_step_state() in ("forward", "both") and net_id != -1:
         # two report_item with same id, the step_idx should be corresponded
         step_idx = len(list(filter(lambda x: x.type == "forward" and x.net_id == net_id, report.items))) - 1
         base_report_node = single_step_check(report, net_id, step_idx, _model.__class__.__name__, "forward")
@@ -196,12 +195,14 @@ def tensor_hook(x_grad, bwd_item, nth_tensor, net_id):
     new_grad = clone_tensors(x_grad)
     bwd_item.set_input_grads(nth_tensor, new_grad[0])
 
-    if single_step_state() == "backward" and net_id != -1:
+    if single_step_state() in ("backward", "both") and net_id != -1:
         report = current_report()
         step_idx = (
             list(filter(lambda x: x.type == "backward" and x.net_id == net_id, report.items)).index(bwd_item) - 1
         )
-        base_report_node = find_base_report_node(net_id, step_idx)
+        base_report_node = single_step_check(
+            report, net_id, step_idx, bwd_item.net.__class__.__name__, "backward", bwd_item=bwd_item
+        )
 
         value = np.load(base_report_node["bwd_grads"][nth_tensor]["path"])
         if isinstance(x_grad, paddle.Tensor):
@@ -285,7 +286,7 @@ def single_step_check(report, net_id, step_idx, current_name, node_type, bwd_ite
         base_report_node = find_base_report_node(net_id, step_idx)
         if base_report_node["name"] != current_name:
             warning_msg = (
-                f"\n   ⚠️ Single-step alignment FAILED: {node_type} with net_id={net_id} mismatch!\n"
+                f"\n   ⚠️ Single-step alignment WARNING: {node_type} with net_id={net_id} mismatch!\n"
                 f"   📌 Mismatch {node_type.capitalize()}: {base_report_node['name']}(base) vs {current_name}(raw)\n"
                 f"   💡 Suggestion: Models have different architectures or initialization order. "
                 "Please check the model implementation or decrease 'align_depth' to reduce the alignment "
@@ -313,10 +314,10 @@ def single_step_check(report, net_id, step_idx, current_name, node_type, bwd_ite
             route = report.stack._top().net.route
 
         logger.error(
-            f"\n   ❌ Single-step alignment FAILED: Execution path mismatch in {node_type}!"
+            f"\n   ❌ Single-step alignment FAILED: {node_type} with net_id={net_id} mismatch!"
             f"\n   📌 Layer '{route}' called {current_calls} times (current) vs {base_max_calls} times (base)."
-            f"\n   📌 Check the {node_type} logic in both models around this layer."
+            f"\n   📌 This indicates that the layer might be called but the call is not needed."
+            f"\n   📌 Please check the {node_type} logic in both models around this layer."
         )
-        sys.exit(1)
 
     return None
diff --git a/padiff/cli.py b/padiff/cli.py
index c2baeb0..e0f7551 100644
--- a/padiff/cli.py
+++ b/padiff/cli.py
@@ -90,7 +90,7 @@ def main():
                 pt_cmd: python torch_model.py
                 pd_cmd: python paddle_model.py
                 align_depth: inf
-           * 使用方式: padiff --config config.yaml
+           * 使用方式: python -m padiff.cli --config config.yaml
            * 命令行参数会覆盖配置文件中的同名参数。
 
         1. 命令参数 (--pt_cmd, --pd_cmd):
@@ -120,11 +120,21 @@ def main():
               那么您应该使用：
                 --pd_model_name net
 
+              如果您的 Paddle 脚本中有：
+                trainer = SFTTrainer(
+                    args=training_args,
+                    model="Qwen/Qwen2.5-0.5B-Instruct",
+                    train_dataset=dataset,
+                )
+                trainer.train()
+              那么您应该使用：
+                --pd_model_name trainer.model
+
         3. 优化器名参数 (--pt_optim_name, --pd_optim_name):
            这些参数指定您在脚本中创建优化器实例的**变量名**。
            * 它们不是类名，也不是文件名。
            * 它们是优化器实例化时 `=` 左边的标识符。
-           * 默认值: None (不传递优化器)
+           * 该参数为非必须参数，默认值: None (不传递优化器)
 
            示例：
               如果您的 PyTorch 脚本中有：
@@ -138,16 +148,13 @@ def main():
                 --pt_optim_name optim
 
               如果您的 Paddle 脚本中有：
-                optimizer = paddle.optimizer.Adam(
-                    parameters=transformer.parameters(),
-                    learning_rate=1.0,
-                    epsilon=1e-09,
-                    beta1=0.9,
-                    beta2=0.98,
-                    weight_decay=0.0,
+                trainer = SFTTrainer(
+                    args=training_args,
+                    model="Qwen/Qwen2.5-0.5B-Instruct",
+                    train_dataset=dataset,
                 )
-              那么您应该使用：
-                --pd_optim_name optimizer
+                trainer.train()
+              由于 trainer.train() 中通常已经包含了完整的前反向过程，因此不需要传递此参数
 
         4. 日志目录参数 (--log_dir):
            指定生成报告和日志的目录。
@@ -156,8 +163,8 @@ def main():
         5. 对齐深度参数 (--align_depth):
            控制对齐的粒度。通过指定一个深度值，可以忽略该深度以下的所有子模块。
            * 值为整数: 指定一个具体的深度。例如，--align_depth 1 会忽略深度为1及以下的所有子模块。
-           * 值为 'inf': (默认) 无限深度，会对齐到最细粒度的层（如 Linear, ReLU）。
-           * 值为整数，且数值超过模型最大迭代深度时，相当于 'inf'。
+           * 默认值: 'inf' ，即无限深度，会对齐到最细粒度的层（如 Linear, ReLU）。
+           * 值为整数，当数值超过模型最大迭代深度时，相当于 'inf'。
            * 示例：
               --align_depth 0  # 只对齐顶层模块
               --align_depth 1  # 对齐到第一层子模块
@@ -166,15 +173,15 @@ def main():
         6. 单步对齐模式参数 (--single_step_mode):
            启用逐层对齐模式。
            * 可选值: forward, backward, both
-           * 默认值: None (禁用)
+           * 默认值: None (不启用)
            * 当启用时，工具会从自动加载基准模型的输出，并用其替换对齐模型的相应层输出。
 
         7. 结果对比参数:
            控制模型输出结果的对比精度和模式。
            * --atol: 绝对误差容忍度 (default: 1e-6)
            * --rtol: 相对误差容忍度 (default: 1e-6)
-           * --compare_mode: 对比模式。可选值: mean, strict, abs_mean (default: mean)
-           * --action_name: 激活函数名称，可选值: equal, loose_equal, 用于特定的对比逻辑 (default: equal)
+           * --compare_mode: 对比模式，具体内容请看对应文档。可选值: mean, strict, abs_mean, 默认值: "mean"
+           * --action_name: 对比逻辑，具体内容请看对应文档。可选值: equal, loose_equal, 默认值: "equal"
            * 示例:
               --atol 1e-4 --rtol 1e-5 --compare_mode mean --action_name equal
 
@@ -216,13 +223,13 @@ def main():
         "--pt_optim_name",
         type=str,
         default=None,
-        help="The model name that appears in the pytorch script's code (default: 'model')",
+        help="The model name that appears in the pytorch script's code (default: None)",
     )
     parser.add_argument(
         "--pd_optim_name",
         type=str,
         default=None,
-        help="The model name that appears in the paddle script's code (default: 'model')",
+        help="The model name that appears in the paddle script's code (default: None)",
     )
     parser.add_argument(
         "--log_dir",
@@ -244,7 +251,7 @@ def main():
         help="List of layer names to add to the black list.",
     )
     parser.add_argument(
-        "--atol", type=float, default=1e-6, help="Absolute tolerance for result comparison (default: 1e-4)"
+        "--atol", type=float, default=1e-6, help="Absolute tolerance for result comparison (default: 1e-6)"
     )
     parser.add_argument(
         "--rtol", type=float, default=1e-6, help="Relative tolerance for result comparison (default: 1e-6)"

From 717caf33046792bc97e20f26a358e24375a693a4 Mon Sep 17 00:00:00 2001
From: lijialin03 <lijialin03@baidu.com>
Date: Wed, 10 Sep 2025 07:57:29 +0000
Subject: [PATCH 2/2] ci:separate document checks

---
 .github/workflows/docs-check.yml | 60 ++++++++++++++++++++++++++++++++
 .github/workflows/lint.yml       |  8 ++++-
 .github/workflows/tests.yml      |  8 ++++-
 3 files changed, 74 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/docs-check.yml

diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml
new file mode 100644
index 0000000..f9851ab
--- /dev/null
+++ b/.github/workflows/docs-check.yml
@@ -0,0 +1,60 @@
+name: Docs Check
+
+on:
+  push:
+    paths:
+      - 'docs/**'
+      - '**.md'
+      - '**.rst'
+  pull_request:
+    paths:
+      - 'docs/**'
+      - '**.md'
+      - '**.rst'
+
+jobs:
+  link-check:
+    name: Check Links
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check Markdown Links
+        uses: gaurav0/markdown-link-check@v1
+        with:
+          use-quiet-mode: 'yes'
+          use-verbose-mode: 'no'
+
+  spell-check:
+    name: Spell Check
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Spell Check
+        uses: streetsidesoftware/action-spellcheck@v0
+        with:
+          check-co-authored-commits: true
+
+  format-check:
+    name: Format Check
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: '18'
+
+      - name: Install Prettier
+        run: npm install --global prettier
+
+      - name: Check Markdown Format
+        run: |
+          # 使用 prettier 检查格式，但不自动修改
+          npx prettier --check "docs/**/*.md" "**/*.md"
+          # 如果格式不正确，此命令会失败，CI 将标记为失败
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 6a89272..ad7265f 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,6 +1,12 @@
 name: Lint
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
 
 jobs:
   Lint:
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 35fec40..fc59fbe 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,6 +1,12 @@
 name: Test
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
 
 jobs:
   Test: