Possibly the tolerance needs to be relaxed. The max relative error is large, but the assertion doesn't show the actual values, and unfortunately it looks like currently pytest's seed isn't printed out in CI so it's not easily reproducible to examine the actual values.
=================================== FAILURES ===================================
_________ test_qwen_scope_sae_huggingface_loader_with_mocked_download __________
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_qwen_scope_sae_huggingfac0')
monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7f1b973a0340>
def test_qwen_scope_sae_huggingface_loader_with_mocked_download(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
):
d_in = 2048
d_sae = 32 * 1024
# Qwen Scope stores W_enc as (d_sae, d_in) and W_dec as (d_in, d_sae).
# The loader transposes both to SAELens convention.
raw_W_enc = torch.randn(d_sae, d_in)
raw_W_dec = torch.randn(d_in, d_sae)
11.6723],
[ -2.7288, 34.1195, -67.8960, ..., 16.9614, -35.8170,
-54.5632]]])
expected = tensor([[[ -15.0251, -59.9843, -26.1955, ..., -35.3796, -26.8852,
29.8179],
[ 28.6777, 64...7,
11.6723],
[ -2.7288, 34.1195, -67.8960, ..., 16.9614, -35.8170,
-54.5632]]])
def assert_close(
actual: torch.Tensor,
expected: torch.Tensor,
*,
allow_subclasses: bool = True,
atol: float | None = 1e-8,
rtol: float | None = 1e-5,
equal_nan: bool = False,
check_device: bool = True,
check_dtype: bool = True,
check_layout: bool = True,
check_stride: bool = False,
msg: str | None = None,
) -> None:
"""
torch.testing.assert_close() with torch.allclose() defaults (atol=1e-8, rtol=1e-5).
Pass a message string to customize the error header instead of writing lambda functions.
"""
final_msg = msg and (lambda error_msg: f"{msg}\n\n{error_msg}")
> torch.testing.assert_close(
actual,
expected=expected,
allow_subclasses=allow_subclasses,
atol=atol,
rtol=rtol,
equal_nan=equal_nan,
check_device=check_device,
check_dtype=check_dtype,
check_layout=check_layout,
check_stride=check_stride,
msg=final_msg,
)
E AssertionError: Tensor-likes are not close!
E
E Mismatched elements: 10288 / 458752 (2.2%)
E Greatest absolute difference: 0.0001125335693359375 at index (0, 4, 20308) (up to 1e-08 allowed)
E Greatest relative difference: 0.03291536122560501 at index (1, 5, 26494) (up to 1e-05 allowed)
tests/helpers.py:672: AssertionError
test_qwen_scope_sae_huggingface_loader_with_mocked_download randomly failed in CI (the PR #687 content is likely unrelated) https://github.com/decoderesearch/SAELens/actions/runs/27042996849/job/79822998134.
Possibly the tolerance needs to be relaxed. The max relative error is large, but the assertion doesn't show the actual values, and unfortunately it looks like currently pytest's seed isn't printed out in CI so it's not easily reproducible to examine the actual values.