Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions tests/models/albef/test_albef.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ def albef_model_output(albef_model):
def test_albef_image_embeddings(albef_model_output):
expected = Tensor(
[
[[1.364883, -1.003092, -0.361791], [-0.634884, 1.411830, -0.776947]],
[[1.401580, -0.537510, -0.864071], [1.378901, -0.417473, -0.961429]],
[[1.337287, -0.270253, -1.067032], [1.414201, -0.705347, -0.708854]],
[[1.402044, -0.540827, -0.861216], [-1.410284, 0.613923, 0.796362]],
]
)
assert_expected(albef_model_output.image_embeddings, expected, rtol=0, atol=1e-4)
Expand All @@ -92,8 +92,8 @@ def test_albef_image_embeddings(albef_model_output):
def test_albef_image_embeddings_momentum(albef_model_output):
expected = Tensor(
[
[[1.364883, -1.003092, -0.361791], [-0.634884, 1.411830, -0.776947]],
[[1.401580, -0.537510, -0.864070], [1.378902, -0.417473, -0.961429]],
[[1.337286, -0.270253, -1.067033], [1.414201, -0.705347, -0.708854]],
[[1.402043, -0.540827, -0.861217], [-1.410284, 0.613922, 0.796362]],
]
)
assert_expected(albef_model_output.image_embeddings_m, expected, rtol=0, atol=1e-4)
Expand Down Expand Up @@ -122,8 +122,8 @@ def test_albef_text_embeddings_momentum(albef_model_output):
def test_albef_multimodal_embeddings(albef_model_output):
expected = Tensor(
[
[[-0.068738, 1.257666, -1.188928], [1.409873, -0.609056, -0.800817]],
[[-1.402520, 0.544084, 0.858435], [1.202279, -1.246038, 0.043760]],
[[1.228663, -0.007874, -1.220789], [1.401657, -0.863674, -0.537983]],
[[-1.021501, 1.357746, -0.336245], [1.409910, -0.800425, -0.609485]],
]
)
assert_expected(
Expand All @@ -134,8 +134,8 @@ def test_albef_multimodal_embeddings(albef_model_output):
def test_albef_multimodal_embeddings_momentum(albef_model_output):
expected = Tensor(
[
[[-0.068738, 1.257666, -1.188928], [1.409873, -0.609056, -0.800817]],
[[-1.402520, 0.544084, 0.858435], [1.202279, -1.246038, 0.043760]],
[[1.228662, -0.007872, -1.220790], [1.401657, -0.863674, -0.537983]],
[[-1.021501, 1.357746, -0.336245], [1.409910, -0.800426, -0.609485]],
]
)
assert_expected(
Expand Down
4 changes: 2 additions & 2 deletions tests/models/albef/test_image_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def test_vision_transformer(self):
output = vit(input)
expected = Tensor(
[
[1.399478, -0.875986, -0.523492],
[-0.869867, 1.400589, -0.530722],
[1.407929, -0.819281, -0.588648],
[-0.692709, 1.414116, -0.721407],
]
).unsqueeze(0)
assert_expected(output, expected, rtol=0, atol=1e-4)
Expand Down
18 changes: 9 additions & 9 deletions tests/models/test_omnivore.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,19 @@ def test_omnivore_swin_t_forward(omnivore_swin_t_model, device):

assert_expected(image_score.size(), torch.Size((1, 1000)))
assert_expected(
image_score.abs().sum(), torch.tensor(184.01417), rtol=1e-3, atol=1e-3
image_score.abs().sum(), torch.tensor(208.64470), rtol=1e-3, atol=1e-3
)

rgbd = torch.randn((1, 4, 1, 112, 112), device=device)
rgbd_score = model(rgbd, input_type="rgbd")
assert_expected(rgbd_score.size(), torch.Size((1, 19)))
assert_expected(rgbd_score.abs().sum(), torch.tensor(3.60813), rtol=1e-3, atol=1e-3)
assert_expected(rgbd_score.abs().sum(), torch.tensor(4.42536), rtol=1e-3, atol=1e-3)

video = torch.randn((1, 3, 4, 112, 112), device=device)
video_score = model(video, input_type="video")
assert_expected(video_score.size(), torch.Size((1, 400)))
assert_expected(
video_score.abs().sum(), torch.tensor(110.70048), rtol=1e-3, atol=1e-3
video_score.abs().sum(), torch.tensor(71.27126), rtol=1e-3, atol=1e-3
)


Expand All @@ -64,19 +64,19 @@ def test_omnivore_swin_s_forward(omnivore_swin_s_model, device):

assert_expected(image_score.size(), torch.Size((1, 1000)))
assert_expected(
image_score.abs().sum(), torch.tensor(239.73104), rtol=1e-3, atol=1e-3
image_score.abs().sum(), torch.tensor(215.43909), rtol=1e-3, atol=1e-3
)

rgbd = torch.randn((1, 4, 1, 112, 112), device=device)
rgbd_score = model(rgbd, input_type="rgbd")
assert_expected(rgbd_score.size(), torch.Size((1, 19)))
assert_expected(rgbd_score.abs().sum(), torch.tensor(5.80919), rtol=1e-3, atol=1e-3)
assert_expected(rgbd_score.abs().sum(), torch.tensor(4.23604), rtol=1e-3, atol=1e-3)

video = torch.randn((1, 3, 4, 112, 112), device=device)
video_score = model(video, input_type="video")
assert_expected(video_score.size(), torch.Size((1, 400)))
assert_expected(
video_score.abs().sum(), torch.tensor(136.49894), rtol=1e-3, atol=1e-3
video_score.abs().sum(), torch.tensor(136.16075), rtol=1e-3, atol=1e-3
)


Expand All @@ -88,19 +88,19 @@ def test_omnivore_swin_b_forward(omnivore_swin_b_model, device):

assert_expected(image_score.size(), torch.Size((1, 1000)))
assert_expected(
image_score.abs().sum(), torch.tensor(278.06488), rtol=1e-3, atol=1e-3
image_score.abs().sum(), torch.tensor(251.88954), rtol=1e-3, atol=1e-3
)

rgbd = torch.randn((1, 4, 1, 112, 112), device=device)
rgbd_score = model(rgbd, input_type="rgbd")
assert_expected(rgbd_score.size(), torch.Size((1, 19)))
assert_expected(rgbd_score.abs().sum(), torch.tensor(4.52186), rtol=1e-3, atol=1e-3)
assert_expected(rgbd_score.abs().sum(), torch.tensor(3.82680), rtol=1e-3, atol=1e-3)

video = torch.randn((1, 3, 4, 112, 112), device=device)
video_score = model(video, input_type="video")
assert_expected(video_score.size(), torch.Size((1, 400)))
assert_expected(
video_score.abs().sum(), torch.tensor(138.22859), rtol=1e-3, atol=1e-3
video_score.abs().sum(), torch.tensor(140.93663), rtol=1e-3, atol=1e-3
)


Expand Down
2 changes: 1 addition & 1 deletion tests/modules/encoders/test_swin_transformer_3d_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_swin_transformer_3d_encoder(self):

scores = self.encoder(image)
self.assertEqual(scores.size(), torch.Size([1, 768]))
self.assertAlmostEqual(scores.abs().sum().item(), 247.14674, 2)
self.assertAlmostEqual(scores.abs().sum().item(), 279.50031, 2)

def test_swin_transformer_3d_scripting(self):
torch.jit.script(self.encoder)
Loading