From 21bc0a27cada31d1f08730104622a6209094b077 Mon Sep 17 00:00:00 2001 From: lijialin03 Date: Wed, 10 Sep 2025 07:05:54 +0000 Subject: [PATCH 1/2] fix:fix bug of single step mode and docs --- padiff/abstracts/hooks/hook.py | 17 +++++++------ padiff/cli.py | 45 ++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/padiff/abstracts/hooks/hook.py b/padiff/abstracts/hooks/hook.py index 1126dbd..58058e6 100644 --- a/padiff/abstracts/hooks/hook.py +++ b/padiff/abstracts/hooks/hook.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import contextlib from functools import partial @@ -175,7 +174,7 @@ def info_hook(model, input, output, net_id): t.register_hook(partial(tensor_hook, bwd_item=bwd_item, nth_tensor=i, net_id=net_id)) # if under single step forward guard - if single_step_state() == "forward" and net_id != -1: + if single_step_state() in ("forward", "both") and net_id != -1: # two report_item with same id, the step_idx should be corresponded step_idx = len(list(filter(lambda x: x.type == "forward" and x.net_id == net_id, report.items))) - 1 base_report_node = single_step_check(report, net_id, step_idx, _model.__class__.__name__, "forward") @@ -196,12 +195,14 @@ def tensor_hook(x_grad, bwd_item, nth_tensor, net_id): new_grad = clone_tensors(x_grad) bwd_item.set_input_grads(nth_tensor, new_grad[0]) - if single_step_state() == "backward" and net_id != -1: + if single_step_state() in ("backward", "both") and net_id != -1: report = current_report() step_idx = ( list(filter(lambda x: x.type == "backward" and x.net_id == net_id, report.items)).index(bwd_item) - 1 ) - base_report_node = find_base_report_node(net_id, step_idx) + base_report_node = single_step_check( + report, net_id, step_idx, bwd_item.net.__class__.__name__, "backward", bwd_item=bwd_item + ) value = np.load(base_report_node["bwd_grads"][nth_tensor]["path"]) if isinstance(x_grad, paddle.Tensor): @@ -285,7 +286,7 @@ def single_step_check(report, net_id, step_idx, current_name, node_type, bwd_ite base_report_node = find_base_report_node(net_id, step_idx) if base_report_node["name"] != current_name: warning_msg = ( - f"\n ⚠️ Single-step alignment FAILED: {node_type} with net_id={net_id} mismatch!\n" + f"\n ⚠️ Single-step alignment WARNING: {node_type} with net_id={net_id} mismatch!\n" f" 📌 Mismatch {node_type.capitalize()}: {base_report_node['name']}(base) vs {current_name}(raw)\n" f" 💡 Suggestion: Models have different architectures or initialization order. " "Please check the model implementation or decrease 'align_depth' to reduce the alignment " @@ -313,10 +314,10 @@ def single_step_check(report, net_id, step_idx, current_name, node_type, bwd_ite route = report.stack._top().net.route logger.error( - f"\n ❌ Single-step alignment FAILED: Execution path mismatch in {node_type}!" + f"\n ❌ Single-step alignment FAILED: {node_type} with net_id={net_id} mismatch!" f"\n 📌 Layer '{route}' called {current_calls} times (current) vs {base_max_calls} times (base)." - f"\n 📌 Check the {node_type} logic in both models around this layer." + f"\n 📌 This indicates that the layer might be called but the call is not needed." + f"\n 📌 Please check the {node_type} logic in both models around this layer." ) - sys.exit(1) return None diff --git a/padiff/cli.py b/padiff/cli.py index c2baeb0..e0f7551 100644 --- a/padiff/cli.py +++ b/padiff/cli.py @@ -90,7 +90,7 @@ def main(): pt_cmd: python torch_model.py pd_cmd: python paddle_model.py align_depth: inf - * 使用方式: padiff --config config.yaml + * 使用方式: python -m padiff.cli --config config.yaml * 命令行参数会覆盖配置文件中的同名参数。 1. 命令参数 (--pt_cmd, --pd_cmd): @@ -120,11 +120,21 @@ def main(): 那么您应该使用: --pd_model_name net + 如果您的 Paddle 脚本中有: + trainer = SFTTrainer( + args=training_args, + model="Qwen/Qwen2.5-0.5B-Instruct", + train_dataset=dataset, + ) + trainer.train() + 那么您应该使用: + --pd_model_name trainer.model + 3. 优化器名参数 (--pt_optim_name, --pd_optim_name): 这些参数指定您在脚本中创建优化器实例的**变量名**。 * 它们不是类名,也不是文件名。 * 它们是优化器实例化时 `=` 左边的标识符。 - * 默认值: None (不传递优化器) + * 该参数为非必须参数,默认值: None (不传递优化器) 示例: 如果您的 PyTorch 脚本中有: @@ -138,16 +148,13 @@ def main(): --pt_optim_name optim 如果您的 Paddle 脚本中有: - optimizer = paddle.optimizer.Adam( - parameters=transformer.parameters(), - learning_rate=1.0, - epsilon=1e-09, - beta1=0.9, - beta2=0.98, - weight_decay=0.0, + trainer = SFTTrainer( + args=training_args, + model="Qwen/Qwen2.5-0.5B-Instruct", + train_dataset=dataset, ) - 那么您应该使用: - --pd_optim_name optimizer + trainer.train() + 由于 trainer.train() 中通常已经包含了完整的前反向过程,因此不需要传递此参数 4. 日志目录参数 (--log_dir): 指定生成报告和日志的目录。 @@ -156,8 +163,8 @@ def main(): 5. 对齐深度参数 (--align_depth): 控制对齐的粒度。通过指定一个深度值,可以忽略该深度以下的所有子模块。 * 值为整数: 指定一个具体的深度。例如,--align_depth 1 会忽略深度为1及以下的所有子模块。 - * 值为 'inf': (默认) 无限深度,会对齐到最细粒度的层(如 Linear, ReLU)。 - * 值为整数,且数值超过模型最大迭代深度时,相当于 'inf'。 + * 默认值: 'inf' ,即无限深度,会对齐到最细粒度的层(如 Linear, ReLU)。 + * 值为整数,当数值超过模型最大迭代深度时,相当于 'inf'。 * 示例: --align_depth 0 # 只对齐顶层模块 --align_depth 1 # 对齐到第一层子模块 @@ -166,15 +173,15 @@ def main(): 6. 单步对齐模式参数 (--single_step_mode): 启用逐层对齐模式。 * 可选值: forward, backward, both - * 默认值: None (禁用) + * 默认值: None (不启用) * 当启用时,工具会从自动加载基准模型的输出,并用其替换对齐模型的相应层输出。 7. 结果对比参数: 控制模型输出结果的对比精度和模式。 * --atol: 绝对误差容忍度 (default: 1e-6) * --rtol: 相对误差容忍度 (default: 1e-6) - * --compare_mode: 对比模式。可选值: mean, strict, abs_mean (default: mean) - * --action_name: 激活函数名称,可选值: equal, loose_equal, 用于特定的对比逻辑 (default: equal) + * --compare_mode: 对比模式,具体内容请看对应文档。可选值: mean, strict, abs_mean, 默认值: "mean" + * --action_name: 对比逻辑,具体内容请看对应文档。可选值: equal, loose_equal, 默认值: "equal" * 示例: --atol 1e-4 --rtol 1e-5 --compare_mode mean --action_name equal @@ -216,13 +223,13 @@ def main(): "--pt_optim_name", type=str, default=None, - help="The model name that appears in the pytorch script's code (default: 'model')", + help="The model name that appears in the pytorch script's code (default: None)", ) parser.add_argument( "--pd_optim_name", type=str, default=None, - help="The model name that appears in the paddle script's code (default: 'model')", + help="The model name that appears in the paddle script's code (default: None)", ) parser.add_argument( "--log_dir", @@ -244,7 +251,7 @@ def main(): help="List of layer names to add to the black list.", ) parser.add_argument( - "--atol", type=float, default=1e-6, help="Absolute tolerance for result comparison (default: 1e-4)" + "--atol", type=float, default=1e-6, help="Absolute tolerance for result comparison (default: 1e-6)" ) parser.add_argument( "--rtol", type=float, default=1e-6, help="Relative tolerance for result comparison (default: 1e-6)" From 717caf33046792bc97e20f26a358e24375a693a4 Mon Sep 17 00:00:00 2001 From: lijialin03 Date: Wed, 10 Sep 2025 07:57:29 +0000 Subject: [PATCH 2/2] ci:separate document checks --- .github/workflows/docs-check.yml | 60 ++++++++++++++++++++++++++++++++ .github/workflows/lint.yml | 8 ++++- .github/workflows/tests.yml | 8 ++++- 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/docs-check.yml diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml new file mode 100644 index 0000000..f9851ab --- /dev/null +++ b/.github/workflows/docs-check.yml @@ -0,0 +1,60 @@ +name: Docs Check + +on: + push: + paths: + - 'docs/**' + - '**.md' + - '**.rst' + pull_request: + paths: + - 'docs/**' + - '**.md' + - '**.rst' + +jobs: + link-check: + name: Check Links + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Check Markdown Links + uses: gaurav0/markdown-link-check@v1 + with: + use-quiet-mode: 'yes' + use-verbose-mode: 'no' + + spell-check: + name: Spell Check + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Spell Check + uses: streetsidesoftware/action-spellcheck@v0 + with: + check-co-authored-commits: true + + format-check: + name: Format Check + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '18' + + - name: Install Prettier + run: npm install --global prettier + + - name: Check Markdown Format + run: | + # 使用 prettier 检查格式,但不自动修改 + npx prettier --check "docs/**/*.md" "**/*.md" + # 如果格式不正确,此命令会失败,CI 将标记为失败 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6a89272..ad7265f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,6 +1,12 @@ name: Lint -on: [push, pull_request] +on: + push: + paths-ignore: + - 'docs/**' + pull_request: + paths-ignore: + - 'docs/**' jobs: Lint: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 35fec40..fc59fbe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,6 +1,12 @@ name: Test -on: [push, pull_request] +on: + push: + paths-ignore: + - 'docs/**' + pull_request: + paths-ignore: + - 'docs/**' jobs: Test: