PaddlePaddle · lijialin03 · Sep 9, 2025 · Sep 9, 2025
diff --git a/padiff/abstracts/hooks/hook.py b/padiff/abstracts/hooks/hook.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -80,6 +80,17 @@ def init_weights_hook(model, input):
             if isinstance(param, (paddle.Tensor, torch.Tensor)):
                 np_array = get_numpy_from_tensor(param)
                 init_weights[name] = np_array
+                logger.debug(f"Register(init_weights_hook): '{name}'(param)")
+
+        for name, buffer in model.named_buffers():
+            if isinstance(buffer, (paddle.Tensor, torch.Tensor)) and name not in init_weights:
+                try:
+                    np_array = get_numpy_from_tensor(buffer)
+                    init_weights[name] = np_array
+                    logger.debug(f"Register(init_weights_hook): '{name}'(buffer)")
+                except Exception as e:
+                    logger.warning(f"Skip(init_weights_hook): '{name}'(unitialized buffer): {e}")
+
         report.init_weights = init_weights
         report.init_weights_saved = True
     return None

diff --git a/padiff/abstracts/marker.py b/padiff/abstracts/marker.py
@@ -107,10 +107,7 @@ def traversal_for_hook(self):
     def traversal_for_assign_weight(self):
         yield self.proxy_model
         for model in traversal_for_assign_weight(self.proxy_model, self):
-            if (
-                model.model not in self.unassigned_weights_list_recursively
-                and len(list(model.parameters(recursively=False))) == 0
-            ):
+            if model.model not in self.unassigned_weights_list_recursively and no_avaliable_params(model):
                 continue
             yield model
 
@@ -182,3 +179,11 @@ def traversal_for_hook(model, marker):
 
 def traversal_for_assign_weight(model, marker):
     yield from traversal_layers_assign_weight(model, marker)
+
+
+def no_avaliable_params(model):
+    if list(model.named_parameters(recursively=False)):
+        return False
+    if list(model.named_buffers(recursively=False)):
+        return False
+    return True
diff --git a/padiff/abstracts/proxy/model.py b/padiff/abstracts/proxy/model.py
@@ -173,8 +173,6 @@ def update_black_list_with_name(self, class_names, mode="all"):
         if matched_layers:
             self.update_black_list(matched_layers, mode)
             logger.info(f"update blacklist: {len(matched_layers)} added with name(s) {class_names}")
-        else:
-            logger.warning(f"update blacklist: No layers matched for {class_names}")
 
     def set_layer_map(self, layers):
         self.marker.set_layer_map(layers)
@@ -229,6 +227,9 @@ def parameters(self, recursively):
     def named_parameters(self, recursively):
         raise NotImplementedError()
 
+    def named_buffers(self, recursively):
+        raise NotImplementedError()
+
     # child sublayers, do not include self
     def children(self):
         raise NotImplementedError()
@@ -271,6 +272,10 @@ def named_parameters(self, recursively=True):
         origin_iter = self.model.named_parameters(include_sublayers=recursively)
         return deco_iter(origin_iter, ProxyParam.create_from)
 
+    def named_buffers(self, recursively=True):
+        origin_iter = self.model.named_buffers(include_sublayers=recursively)
+        return deco_iter(origin_iter, ProxyParam.create_from)
+
     def children(self):
         origin_iter = self.model.children()
         return deco_iter(origin_iter, ProxyModel.create_from)
@@ -337,6 +342,10 @@ def named_parameters(self, recursively=True):
         origin_iter = self.model.named_parameters(recurse=recursively)
         return deco_iter(origin_iter, ProxyParam.create_from)
 
+    def named_buffers(self, recursively=True):
+        origin_iter = self.model.named_buffers(recurse=recursively)
+        return deco_iter(origin_iter, ProxyParam.create_from)
+
     def children(self):
         origin_iter = self.model.children()
         return deco_iter(origin_iter, ProxyModel.create_from)

diff --git a/padiff/abstracts/proxy/params.py b/padiff/abstracts/proxy/params.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
 
 import paddle
 import torch
+from ...utils import get_numpy_from_tensor
 
 
 class ProxyParam:
@@ -29,8 +30,10 @@ def create_from(param):
             return PaddleParam(param)
         elif isinstance(param, torch.nn.parameter.Parameter):
             return TorchParam(param)
+        elif isinstance(param, (torch.Tensor, paddle.Tensor)):
+            return ProxyTensor(param)
         else:
-            raise RuntimeError(f"Can not create ProxyParam from {type(param)}")
+            logger.error(f"Can not create ProxyParam from {type(param)}")
 
     def numpy(self):
         raise NotImplementedError()
@@ -52,15 +55,8 @@ class PaddleParam(ProxyParam):
     def __init__(self, param):
         super().__init__(param, "paddle")
 
-    def _numpy(self, tensor):
-        if tensor.dtype == paddle.bfloat16:
-            np_array = tensor.astype("float32").numpy()
-        else:
-            np_array = tensor.numpy()
-        return np_array
-
     def numpy(self):
-        return self._numpy(self.param)
+        return get_numpy_from_tensor(self.param)
 
     def set_data(self, np_value):
         paddle.assign(paddle.to_tensor(np_value, dtype=self.param.dtype), self.param)
@@ -70,14 +66,14 @@ def shape(self):
 
     def grad(self):
         if self.param.grad is not None:
-            return self._numpy(self.param.grad)
+            return get_numpy_from_tensor(self.param.grad)
         else:
             return None
 
     def main_grad(self):
         if hasattr(self.param, "main_grad") and self.param.main_grad is not None:
             assert self.param.grad is None
-            return self._numpy(self.param.main_grad)
+            return get_numpy_from_tensor(self.param.main_grad)
 
         else:
             return None
@@ -87,15 +83,8 @@ class TorchParam(ProxyParam):
     def __init__(self, param):
         super().__init__(param, "torch")
 
-    def _numpy(self, tensor):
-        if tensor.dtype == torch.bfloat16:
-            np_array = tensor.cpu().detach().float().numpy()
-        else:
-            np_array = tensor.cpu().detach().numpy()
-        return np_array
-
     def numpy(self):
-        return self._numpy(self.param.data)
+        return get_numpy_from_tensor(self.param.data)
 
     def set_data(self, np_value):
         self.param.data = torch.as_tensor(np_value).type(self.param.dtype).to(self.param.device)
@@ -105,9 +94,23 @@ def shape(self):
 
     def grad(self):
         if self.param.grad is not None:
-            return self._numpy(self.param.grad.data)
+            return get_numpy_from_tensor(self.param.grad.data)
         else:
             return None
 
     def main_grad(self):
         return None
+
+
+class ProxyTensor(ProxyParam):
+    def __init__(self, param):
+        super().__init__(param, "tensor")
+
+    def numpy(self):
+        return get_numpy_from_tensor(self.param)
+
+    def grad(self):
+        return None
+
+    def main_grad(self):
+        return None
diff --git a/padiff/comparison/checker/params.py b/padiff/comparison/checker/params.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -54,7 +54,6 @@ def _check_param_impl(report_path_0, report_path_1, compare_target, cfg=None):
     for path_0, path_1 in zip(all_ranks_path_0, all_ranks_path_1):
         reports = [load_json(path_0, f"{compare_target}.json"), load_json(path_1, f"{compare_target}.json")]
         node_lists = [traversal_node(rep["tree"], []) for rep in reports]
-
         logger.info(f"Checking {compare_target} in {path_0} and {path_1}")
         param_rst = param_rst and _check_params_impl(node_lists, reports, compare_target, cfg)
     return param_rst
@@ -101,7 +100,7 @@ def _check_params_impl(node_lists, reports, compare_target, cfg):
                     f"=========================\n"
                     f"FAILED!!! {compare_target.capitalize()} Mismatch:\n"
                     f"   Layer: {node_0['name']}(base) vs {node_1['name']}(raw)\n"
-                    f"   Route: {node_0['route']}.{param_name_0}(base) vs {node_1['route']}.{param_name_1}(raw)\n"
+                    f"   Route.param: {node_0['route']}.{param_name_0}(base) vs {node_1['route']}.{param_name_1}(raw)\n"
                     f"{e}\n\n"
                 )
                 logger.log_file(log_name, "a", info)

diff --git a/padiff/tools/dump.py b/padiff/tools/dump.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -142,8 +142,16 @@ def dump_param_with_fn(model, fn, target_models):
         }
         if model.model in target_models:  # only record sublayers specified by marker
             param_info["available"] = True
+            params_found = set()
             for param_name, param in model.named_parameters(recursively=False):
                 fn(param_name, param, param_info)
+                params_found.add(param_name)
+            for buffer_name, buffer in model.named_buffers(recursively=False):
+                if buffer_name not in params_found:
+                    fn(buffer_name, buffer, param_info)
+        else:
+            logger.debug(f"Layer {model.class_name} ({model.route}) is NOT in target_models. Skipping.")
+
         for name, child in model.named_children():
             param_info["children"].append(dump_param_with_fn(child, fn, target_models))
         return param_info
@@ -174,8 +182,6 @@ def _dump(param_name, param, param_info):
         elif param.grad() is not None:
             file_name = grad_dumper(param.grad())
             param_info["grads"][param_name] = file_name
-        else:
-            param_info["grads"][param_name] = None
 
     dump_param_prototype(model, _dump, f"{path}/params.json")
 
@@ -201,7 +207,8 @@ def _dump(param_name, param, param_info):
             grad = param.param._collected_grad
             grad = get_numpy_from_tensor(grad) if grad is not None else None
 
-        param_info["grads"][param_name] = grad_dumper(grad) if grad is not None else None
+        if grad is not None:
+            param_info["grads"][param_name] = grad_dumper(grad)
 
     dump_param_prototype(model, _dump, f"{path}/grads.json")
 

diff --git a/padiff/tools/load.py b/padiff/tools/load.py
@@ -168,7 +168,7 @@ def load_init_weights_from_dump(
                     param_key = param_name
 
                 if param_key not in loaded_weights:
-                    logger.info(f"param {param_key}({param_name}) not found, skip it.")
+                    logger.warning(f"param {param_key}({param_name}) not found, skip it.")
                     continue
                 np_value = loaded_weights[param_key]
 

diff --git a/tests/test_auto_diff.py b/tests/test_auto_diff.py
@@ -50,7 +50,7 @@ def forward(self, x):
         return x
 
 
-class TestOfflineCompare(unittest.TestCase):
+class TestAutoDiff(unittest.TestCase):
     def test_check_success(self):
         layer = SimpleLayer()
         layer.eval()

diff --git a/tests/test_model_with_buffer.py b/tests/test_model_with_buffer.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import unittest
+from padiff import *
+import paddle
+import torch
+
+
+class LayerWithinitializedBuffer(paddle.nn.Layer):
+    initialized_buffer: paddle.Tensor
+
+    def __init__(self):
+        super(LayerWithinitializedBuffer, self).__init__()
+        self.linear1 = paddle.nn.Linear(100, 100)
+        self.linear2 = paddle.nn.Linear(100, 10)
+        self.act = paddle.nn.ReLU()
+        self.register_buffer("initialized_buffer", paddle.zeros([1]))
+
+    def forward(self, x):
+        resdual = x
+        x = self.linear1(x)
+        x = self.act(x)
+        x = x + resdual + self.initialized_buffer
+        x = self.linear2(x)
+        return x
+
+
+class ModuleWithinitializedBuffer(torch.nn.Module):
+    initialized_buffer: torch.Tensor
+
+    def __init__(self):
+        super(ModuleWithinitializedBuffer, self).__init__()
+        self.linear1 = torch.nn.Linear(100, 100)
+        self.linear2 = torch.nn.Linear(100, 10)
+        self.act = torch.nn.ReLU()
+        self.register_buffer("initialized_buffer", torch.zeros([1]))
+
+    def forward(self, x):
+        resdual = x
+        x = self.linear1(x)
+        x = self.act(x)
+        x = x + resdual + self.initialized_buffer
+        x = self.linear2(x)
+        return x
+
+
+class LayerWithUninitializedBuffer(paddle.nn.Layer):
+    uninitialized_buffer: paddle.Tensor
+
+    def __init__(self):
+        super(LayerWithUninitializedBuffer, self).__init__()
+        self.linear = paddle.nn.Linear(10, 10)
+        self.register_buffer("uninitialized_buffer", None)
+        self._first_forward = True
+
+    def forward(self, x):
+        if self._first_forward:
+            self.uninitialized_buffer = paddle.zeros_like(x)
+            self._first_forward = False
+        return self.linear(x) + self.uninitialized_buffer
+
+
+class ModuleWithUninitializedBuffer(torch.nn.Module):
+    uninitialized_buffer: torch.Tensor
+
+    def __init__(self):
+        super(ModuleWithUninitializedBuffer, self).__init__()
+        self.linear = torch.nn.Linear(10, 10)
+        self.register_buffer("uninitialized_buffer", torch.empty(0))
+        self._first_forward = True
+
+    def forward(self, x):
+        if self._first_forward:
+            self.uninitialized_buffer = torch.zeros_like(x)
+            self._first_forward = False
+        return self.linear(x) + self.uninitialized_buffer
+
+
+class TestModelWithBuffer(unittest.TestCase):
+    def test_initialized_buffer(self):
+        layer = create_model(LayerWithinitializedBuffer())
+        module = create_model(ModuleWithinitializedBuffer())
+
+        inp = paddle.rand((1, 100)).numpy().astype("float32")
+        inp = ({"x": torch.as_tensor(inp)}, {"x": paddle.to_tensor(inp)})
+        assert auto_diff(module, layer, inp, atol=1e-4) is True, "Failed. expected success."
+
+    def test_uninitialized_buffer(self):
+        layer = create_model(LayerWithUninitializedBuffer())
+        module = create_model(ModuleWithUninitializedBuffer())
+
+        inp = paddle.rand((1, 10)).numpy().astype("float32")
+        inp = ({"x": torch.as_tensor(inp)}, {"x": paddle.to_tensor(inp)})
+
+        assert auto_diff(module, layer, inp, atol=1e-4) is True, "Failed. expected success."
+
+
+if __name__ == "__main__":
+    unittest.main()