Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion padiff/abstracts/hooks/hook.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -80,6 +80,17 @@ def init_weights_hook(model, input):
if isinstance(param, (paddle.Tensor, torch.Tensor)):
np_array = get_numpy_from_tensor(param)
init_weights[name] = np_array
logger.debug(f"Register(init_weights_hook): '{name}'(param)")

for name, buffer in model.named_buffers():
if isinstance(buffer, (paddle.Tensor, torch.Tensor)) and name not in init_weights:
try:
np_array = get_numpy_from_tensor(buffer)
init_weights[name] = np_array
logger.debug(f"Register(init_weights_hook): '{name}'(buffer)")
except Exception as e:
logger.warning(f"Skip(init_weights_hook): '{name}'(unitialized buffer): {e}")

report.init_weights = init_weights
report.init_weights_saved = True
return None
Expand Down
13 changes: 9 additions & 4 deletions padiff/abstracts/marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,7 @@ def traversal_for_hook(self):
def traversal_for_assign_weight(self):
yield self.proxy_model
for model in traversal_for_assign_weight(self.proxy_model, self):
if (
model.model not in self.unassigned_weights_list_recursively
and len(list(model.parameters(recursively=False))) == 0
):
if model.model not in self.unassigned_weights_list_recursively and no_avaliable_params(model):
continue
yield model

Expand Down Expand Up @@ -182,3 +179,11 @@ def traversal_for_hook(model, marker):

def traversal_for_assign_weight(model, marker):
yield from traversal_layers_assign_weight(model, marker)


def no_avaliable_params(model):
if list(model.named_parameters(recursively=False)):
return False
if list(model.named_buffers(recursively=False)):
return False
return True
13 changes: 11 additions & 2 deletions padiff/abstracts/proxy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,6 @@ def update_black_list_with_name(self, class_names, mode="all"):
if matched_layers:
self.update_black_list(matched_layers, mode)
logger.info(f"update blacklist: {len(matched_layers)} added with name(s) {class_names}")
else:
logger.warning(f"update blacklist: No layers matched for {class_names}")

def set_layer_map(self, layers):
self.marker.set_layer_map(layers)
Expand Down Expand Up @@ -229,6 +227,9 @@ def parameters(self, recursively):
def named_parameters(self, recursively):
raise NotImplementedError()

def named_buffers(self, recursively):
raise NotImplementedError()

# child sublayers, do not include self
def children(self):
raise NotImplementedError()
Expand Down Expand Up @@ -271,6 +272,10 @@ def named_parameters(self, recursively=True):
origin_iter = self.model.named_parameters(include_sublayers=recursively)
return deco_iter(origin_iter, ProxyParam.create_from)

def named_buffers(self, recursively=True):
origin_iter = self.model.named_buffers(include_sublayers=recursively)
return deco_iter(origin_iter, ProxyParam.create_from)

def children(self):
origin_iter = self.model.children()
return deco_iter(origin_iter, ProxyModel.create_from)
Expand Down Expand Up @@ -337,6 +342,10 @@ def named_parameters(self, recursively=True):
origin_iter = self.model.named_parameters(recurse=recursively)
return deco_iter(origin_iter, ProxyParam.create_from)

def named_buffers(self, recursively=True):
origin_iter = self.model.named_buffers(recurse=recursively)
return deco_iter(origin_iter, ProxyParam.create_from)

def children(self):
origin_iter = self.model.children()
return deco_iter(origin_iter, ProxyModel.create_from)
Expand Down
45 changes: 24 additions & 21 deletions padiff/abstracts/proxy/params.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,7 @@

import paddle
import torch
from ...utils import get_numpy_from_tensor


class ProxyParam:
Expand All @@ -29,8 +30,10 @@ def create_from(param):
return PaddleParam(param)
elif isinstance(param, torch.nn.parameter.Parameter):
return TorchParam(param)
elif isinstance(param, (torch.Tensor, paddle.Tensor)):
return ProxyTensor(param)
else:
raise RuntimeError(f"Can not create ProxyParam from {type(param)}")
logger.error(f"Can not create ProxyParam from {type(param)}")

def numpy(self):
raise NotImplementedError()
Expand All @@ -52,15 +55,8 @@ class PaddleParam(ProxyParam):
def __init__(self, param):
super().__init__(param, "paddle")

def _numpy(self, tensor):
if tensor.dtype == paddle.bfloat16:
np_array = tensor.astype("float32").numpy()
else:
np_array = tensor.numpy()
return np_array

def numpy(self):
return self._numpy(self.param)
return get_numpy_from_tensor(self.param)

def set_data(self, np_value):
paddle.assign(paddle.to_tensor(np_value, dtype=self.param.dtype), self.param)
Expand All @@ -70,14 +66,14 @@ def shape(self):

def grad(self):
if self.param.grad is not None:
return self._numpy(self.param.grad)
return get_numpy_from_tensor(self.param.grad)
else:
return None

def main_grad(self):
if hasattr(self.param, "main_grad") and self.param.main_grad is not None:
assert self.param.grad is None
return self._numpy(self.param.main_grad)
return get_numpy_from_tensor(self.param.main_grad)

else:
return None
Expand All @@ -87,15 +83,8 @@ class TorchParam(ProxyParam):
def __init__(self, param):
super().__init__(param, "torch")

def _numpy(self, tensor):
if tensor.dtype == torch.bfloat16:
np_array = tensor.cpu().detach().float().numpy()
else:
np_array = tensor.cpu().detach().numpy()
return np_array

def numpy(self):
return self._numpy(self.param.data)
return get_numpy_from_tensor(self.param.data)

def set_data(self, np_value):
self.param.data = torch.as_tensor(np_value).type(self.param.dtype).to(self.param.device)
Expand All @@ -105,9 +94,23 @@ def shape(self):

def grad(self):
if self.param.grad is not None:
return self._numpy(self.param.grad.data)
return get_numpy_from_tensor(self.param.grad.data)
else:
return None

def main_grad(self):
return None


class ProxyTensor(ProxyParam):
def __init__(self, param):
super().__init__(param, "tensor")

def numpy(self):
return get_numpy_from_tensor(self.param)

def grad(self):
return None

def main_grad(self):
return None
5 changes: 2 additions & 3 deletions padiff/comparison/checker/params.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,7 +54,6 @@ def _check_param_impl(report_path_0, report_path_1, compare_target, cfg=None):
for path_0, path_1 in zip(all_ranks_path_0, all_ranks_path_1):
reports = [load_json(path_0, f"{compare_target}.json"), load_json(path_1, f"{compare_target}.json")]
node_lists = [traversal_node(rep["tree"], []) for rep in reports]

logger.info(f"Checking {compare_target} in {path_0} and {path_1}")
param_rst = param_rst and _check_params_impl(node_lists, reports, compare_target, cfg)
return param_rst
Expand Down Expand Up @@ -101,7 +100,7 @@ def _check_params_impl(node_lists, reports, compare_target, cfg):
f"=========================\n"
f"FAILED!!! {compare_target.capitalize()} Mismatch:\n"
f" Layer: {node_0['name']}(base) vs {node_1['name']}(raw)\n"
f" Route: {node_0['route']}.{param_name_0}(base) vs {node_1['route']}.{param_name_1}(raw)\n"
f" Route.param: {node_0['route']}.{param_name_0}(base) vs {node_1['route']}.{param_name_1}(raw)\n"
f"{e}\n\n"
)
logger.log_file(log_name, "a", info)
Expand Down
15 changes: 11 additions & 4 deletions padiff/tools/dump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -142,8 +142,16 @@ def dump_param_with_fn(model, fn, target_models):
}
if model.model in target_models: # only record sublayers specified by marker
param_info["available"] = True
params_found = set()
for param_name, param in model.named_parameters(recursively=False):
fn(param_name, param, param_info)
params_found.add(param_name)
for buffer_name, buffer in model.named_buffers(recursively=False):
if buffer_name not in params_found:
fn(buffer_name, buffer, param_info)
else:
logger.debug(f"Layer {model.class_name} ({model.route}) is NOT in target_models. Skipping.")

for name, child in model.named_children():
param_info["children"].append(dump_param_with_fn(child, fn, target_models))
return param_info
Expand Down Expand Up @@ -174,8 +182,6 @@ def _dump(param_name, param, param_info):
elif param.grad() is not None:
file_name = grad_dumper(param.grad())
param_info["grads"][param_name] = file_name
else:
param_info["grads"][param_name] = None

dump_param_prototype(model, _dump, f"{path}/params.json")

Expand All @@ -201,7 +207,8 @@ def _dump(param_name, param, param_info):
grad = param.param._collected_grad
grad = get_numpy_from_tensor(grad) if grad is not None else None

param_info["grads"][param_name] = grad_dumper(grad) if grad is not None else None
if grad is not None:
param_info["grads"][param_name] = grad_dumper(grad)

dump_param_prototype(model, _dump, f"{path}/grads.json")

Expand Down
2 changes: 1 addition & 1 deletion padiff/tools/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def load_init_weights_from_dump(
param_key = param_name

if param_key not in loaded_weights:
logger.info(f"param {param_key}({param_name}) not found, skip it.")
logger.warning(f"param {param_key}({param_name}) not found, skip it.")
continue
np_value = loaded_weights[param_key]

Expand Down
2 changes: 1 addition & 1 deletion tests/test_auto_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def forward(self, x):
return x


class TestOfflineCompare(unittest.TestCase):
class TestAutoDiff(unittest.TestCase):
def test_check_success(self):
layer = SimpleLayer()
layer.eval()
Expand Down
112 changes: 112 additions & 0 deletions tests/test_model_with_buffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import unittest
from padiff import *
import paddle
import torch


class LayerWithinitializedBuffer(paddle.nn.Layer):
initialized_buffer: paddle.Tensor

def __init__(self):
super(LayerWithinitializedBuffer, self).__init__()
self.linear1 = paddle.nn.Linear(100, 100)
self.linear2 = paddle.nn.Linear(100, 10)
self.act = paddle.nn.ReLU()
self.register_buffer("initialized_buffer", paddle.zeros([1]))

def forward(self, x):
resdual = x
x = self.linear1(x)
x = self.act(x)
x = x + resdual + self.initialized_buffer
x = self.linear2(x)
return x


class ModuleWithinitializedBuffer(torch.nn.Module):
initialized_buffer: torch.Tensor

def __init__(self):
super(ModuleWithinitializedBuffer, self).__init__()
self.linear1 = torch.nn.Linear(100, 100)
self.linear2 = torch.nn.Linear(100, 10)
self.act = torch.nn.ReLU()
self.register_buffer("initialized_buffer", torch.zeros([1]))

def forward(self, x):
resdual = x
x = self.linear1(x)
x = self.act(x)
x = x + resdual + self.initialized_buffer
x = self.linear2(x)
return x


class LayerWithUninitializedBuffer(paddle.nn.Layer):
uninitialized_buffer: paddle.Tensor

def __init__(self):
super(LayerWithUninitializedBuffer, self).__init__()
self.linear = paddle.nn.Linear(10, 10)
self.register_buffer("uninitialized_buffer", None)
self._first_forward = True

def forward(self, x):
if self._first_forward:
self.uninitialized_buffer = paddle.zeros_like(x)
self._first_forward = False
return self.linear(x) + self.uninitialized_buffer


class ModuleWithUninitializedBuffer(torch.nn.Module):
uninitialized_buffer: torch.Tensor

def __init__(self):
super(ModuleWithUninitializedBuffer, self).__init__()
self.linear = torch.nn.Linear(10, 10)
self.register_buffer("uninitialized_buffer", torch.empty(0))
self._first_forward = True

def forward(self, x):
if self._first_forward:
self.uninitialized_buffer = torch.zeros_like(x)
self._first_forward = False
return self.linear(x) + self.uninitialized_buffer


class TestModelWithBuffer(unittest.TestCase):
def test_initialized_buffer(self):
layer = create_model(LayerWithinitializedBuffer())
module = create_model(ModuleWithinitializedBuffer())

inp = paddle.rand((1, 100)).numpy().astype("float32")
inp = ({"x": torch.as_tensor(inp)}, {"x": paddle.to_tensor(inp)})
assert auto_diff(module, layer, inp, atol=1e-4) is True, "Failed. expected success."

def test_uninitialized_buffer(self):
layer = create_model(LayerWithUninitializedBuffer())
module = create_model(ModuleWithUninitializedBuffer())

inp = paddle.rand((1, 10)).numpy().astype("float32")
inp = ({"x": torch.as_tensor(inp)}, {"x": paddle.to_tensor(inp)})

assert auto_diff(module, layer, inp, atol=1e-4) is True, "Failed. expected success."


if __name__ == "__main__":
unittest.main()