facundoq
diff --git a/‎docs/examples/Variance to rotations of a CNN trained on MNIST with PyTorch.ipynb‎
Lines changed: 4 additions & 3 deletions b/‎docs/examples/Variance to rotations of a CNN trained on MNIST with PyTorch.ipynb‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎docs/examples/basic_example_pytorch.py‎
Lines changed: 2 additions & 2 deletions b/‎docs/examples/basic_example_pytorch.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tests/pytorch/test_measure.py‎
Lines changed: 103 additions & 0 deletions b/‎tests/pytorch/test_measure.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎tmeasures/measure.py‎
Lines changed: 1 addition & 0 deletions b/‎tmeasures/measure.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tmeasures/pytorch/activations_iterator.py‎
Lines changed: 134 additions & 48 deletions b/‎tmeasures/pytorch/activations_iterator.py‎
Lines changed: 134 additions & 48 deletions
@@ -36,7 +36,7 @@
     "\n",
     "torch.manual_seed(0)\n",
     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "results_path = Path(\"~/tm_example_pytorch/\").expanduser()\n",
+    "results_path = Path(\"~/.tmeasures/\").expanduser()\n",
     "results_path.mkdir(parents=True, exist_ok=True)"
    ]
   },
@@ -175,7 +175,8 @@
     "train_augmentation = [transforms.RandomRotation(degree_range)]\n",
     "train_transform = transforms.Compose(train_augmentation + base_preprocessing)\n",
     "measure_transform = transforms.Compose(base_preprocessing)\n",
-    "path = results_path / 'mnist'\n",
+    "path = Path('~/.datasets/mnist').expanduser()\n",
+    "path.mkdir(exist_ok=True,parents=True)\n",
     "\n",
     "train_dataset = datasets.MNIST(path, train=True, download=True,\n",
     "                                transform=train_transform)\n",
@@ -353,7 +354,7 @@
     "\n",
     "Last step before computing the measure: we need to define a PyTorchMeasureOptions object to configure where and the measure will be computed. The `batch_size` and `num_workers` keywords are analogous to the ones used in [PyTorch's DataLoader](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html). \n",
     "\n",
-    "The `data_device`, `model_device` and `measure_device`   indicate, respectively, where the transformations and data preprocessing is performed, where the activations of the model are computed, and finally where the actual measure is computed. In simple cases, these devices could all be the same.\n",
+    "The `data_device`, `model_device` and `measure_device`   indicate, respectively, where the transformations and data preprocessing are performed, where the activations of the model are computed, and finally where the actual measure is computed. In most cases, using the same device in all cases will have the most performance; however, in some cases it is necessary or desirable to perform data preprocessing in `cpu` and model and measure computations in a `gpu` or other accelerator.\n",
     "\n",
     "Finally, we can `eval` the measure with the dataset, transformation, model and options, obtaining a `PyTorchMeasureResult`, which can be handily converted to a `numpy` version for easy visualization.\n"
    ]
 
@@ -56,7 +56,7 @@ def forward(self, x):
 
     torch.manual_seed(0)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    results_path = Path("~/tm_example_pytorch/").expanduser()
+    results_path = Path("~/.tm_example_pytorch/").expanduser()
     results_path.mkdir(parents=True, exist_ok=True)
 
     # DATASET
@@ -147,7 +147,7 @@ def __getitem__(self, index):
     for measure,model in measures:
         exp_id = f"rot{degree_range}_{measure}"
         result_filepath = results_path / f'{exp_id}_result.pickle'
-        if os.path.exists(result_filepath) and False:
+        if os.path.exists(result_filepath):
             print(f"Measure {measure} already evaluated, loading...")
             # Load result (optional, in case you don't want to run the above or your session died)
             with open(result_filepath, 'rb') as f:
 
@@ -15,14 +15,15 @@ dependencies = [
     "scipy",
     "scikit-image>=0.25.0",
     "scikit-learn",
-    "data-science-types",
     "statsmodels>=0.14.4",
     "tqdm>=4.67.1",
 ]
 
 
 [dependency-groups]
 dev = [
+    "torch>=2",
+    "torchvision",
     "pandas-stubs>=2.2.3.250308",
     "scipy-stubs>=1.15.2.1",
     "microsoft-python-type-stubs",
@@ -32,6 +33,8 @@ dev = [
     "types-tqdm>=4.67.0.20250401",
     "pre-commit>=4.2.0",
     "poethepoet>=0.35.0",
+    "data-science-types",
+    "poutyne",
 ]
 docs = [
     "sphinx>=8.2.3",
 
@@ -0,0 +1,103 @@
+import torch
+import pytest
+import tmeasures as tm
+import numpy as np
+from numpy.testing import assert_allclose
+
+
+class ConstantModel(torch.nn.Module):
+    def __init__(self,value=torch.Tensor(0)) -> None:
+        super().__init__()
+        self.value=value
+    def forward(self,x:torch.Tensor):
+        n = x.shape[0]
+        result =  self.value.expand(n,*self.value.shape)
+        return result
+class IdentityModel(torch.nn.Module):
+    def __init__(self,) -> None:
+        super().__init__()
+    def forward(self,x:torch.Tensor):
+        return x
+    
+class RandomModel(torch.nn.Module):
+    def __init__(self,shape:tuple,mean=0.0,std=1.0) -> None:
+        super().__init__()
+        self.mean=mean
+        self.std=std
+        self.shape=shape
+    def forward(self,x:torch.Tensor):
+        n = x.shape[0]
+        shape = (n,*self.shape)
+        return torch.normal(mean=self.mean,std=self.std,size=shape)
+        
+
+
+class ConstantDataset(torch.utils.data.Dataset):
+    def __init__(self,value=0,shape=(10,10)):
+        super().__init__()
+        self.dataset = torch.utils.data.TensorDataset(torch.ones(shape)*value)
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, index):
+        return self.dataset[index][0]
+    
+default_options = tm.pytorch.PyTorchMeasureOptions(batch_size=1024)
+large_options = tm.pytorch.PyTorchMeasureOptions(batch_size=2**14,num_workers=128)
+
+def assert_instance(measure,dataset,transformations,activations_model,expected_result,atol=1e-5,options=default_options):
+    print(options.batch_size)
+    result = measure.eval(dataset,transformations,activations_model,options)
+    result = result.numpy()
+    for name,layer,expected_layer in zip(result.layer_names,result.layers,expected_result):
+        assert_allclose(layer,expected_layer,err_msg=f"Error in {measure} for activation '{name}'",atol=atol)
+
+        
+def test_constant_model_invariance():
+    output_shape = (2,2)
+    output = torch.rand(output_shape)
+    expected_results = np.zeros(output.shape)
+    expected_results_normalized = np.ones(output.shape)
+    model = torch.nn.Sequential(ConstantModel(output))
+    measures_results = [(tm.pytorch.SampleVarianceInvariance(),[expected_results]),
+                      (tm.pytorch.TransformationVarianceInvariance(),[expected_results]),
+                      (tm.pytorch.NormalizedVarianceInvariance(),[expected_results_normalized]),
+                      ]
+    transformations = tm.pytorch.transformations.IdentityTransformationSet()
+    
+    dataset = ConstantDataset(2,(100,5))
+    activations_model = tm.pytorch.AutoActivationsModule(model)
+    for measure,expected_result in measures_results:
+       assert_instance(measure,dataset,transformations,activations_model,expected_result)
+
+class RepeatedIdentitySet(tm.pytorch.transformations.PyTorchTransformationSet):
+    def __init__(self,transformations=1):
+        super().__init__([tm.pytorch.transformations.IdentityTransformation()]*transformations)
+    def valid_input(self):
+        return True
+    def copy(self):
+        return self
+    def id(self):
+        return "Identity"
+
+def test_random_model_invariance():
+    output_shape = (2,2)
+    mean,std=2.0,3
+    model = torch.nn.Sequential(RandomModel(output_shape,2,3))
+    expected_results = np.ones(output_shape)*std
+    expected_results_normalized = np.ones(output_shape)
+    measures_results = [(tm.pytorch.SampleVarianceInvariance(),[expected_results]),
+                      (tm.pytorch.TransformationVarianceInvariance(),[expected_results]),
+                      (tm.pytorch.NormalizedVarianceInvariance(),[expected_results_normalized]),
+                      ]
+    sample_size_order = 2
+    n = 10**sample_size_order
+    atol = 10**(-np.sqrt(sample_size_order//2))
+    transformations = RepeatedIdentitySet(n)
+    dataset = ConstantDataset(2,(n,2))
+    activations_model = tm.pytorch.AutoActivationsModule(model)
+    for measure,expected_result in measures_results:
+        assert_instance(measure,dataset,transformations,activations_model,expected_result,atol=1e-1,options=large_options)
+
+
+if __name__ == "__main__":
+    test_random_model_invariance()
@@ -5,6 +5,7 @@
 import re
 from .utils import get_all
 
+## todo change np.ndarray to something more general
 ActivationsByLayer = List[np.ndarray]
 
 # TODO change `layer` for `activation` in variable/methods to unify vocabulary
 
@@ -1,3 +1,7 @@
+from collections.abc import Generator
+import typing
+
+from tmeasures.pytorch.transformations import PyTorchTransformation
 from .dataset2d import STDataset, Dataset2D
 import torch
 from torch.utils.data import DataLoader
@@ -25,12 +29,12 @@
 class ActivationsTransformer(abc.ABC):
 
     @abc.abstractmethod
-    def transform(self, activations: torch.Tensor, x: torch.Tensor, transformations: List[Transformation]) -> torch.Tensor:
+    def transform(self, activations: torch.Tensor, x: torch.Tensor, transformations: List[PyTorchTransformation]) -> torch.Tensor:
         pass
 
 
 class IdentityActivationsTransformer(ActivationsTransformer):
-    def transform(self, activations: torch.Tensor, x: torch.Tensor, transformations: List[Transformation]) -> torch.Tensor:
+    def transform(self, activations: torch.Tensor, x: torch.Tensor, transformations: List[PyTorchTransformation]) -> torch.Tensor:
         return activations
 
 from tmeasures import logger
@@ -44,6 +48,7 @@ def __init__(self,layers:list[str],rows:int,n_batch:int,stop=False) -> None:
         self.layers = layers
         self.qs = {l: IterableQueue(rows,maxsize=1,name=f"q({l})") for l in layers}
         self.row_qs = {l: IterableQueue(n_batch,maxsize=1,name=f"q({l}_row)") for l in layers}
+        
     @property
     def queues(self):
         return list(self.qs.values())+list(self.row_qs.values())
@@ -115,55 +120,136 @@ def check_finished(self,worker_futures,server_future,tm:ThreadsManager):
                     if not e is None:
                         logger.info(f"Worker exception, about to re raise from main thread\n{e}\n thread id {threading.get_ident()}\n")
                         raise e
-                    
+    
+    def move_activations_to_measure_device(self,activations:list[torch.Tensor]):
+        for i, layer_activations in enumerate(activations):
+                if self.o.model_device != self.o.measure_device:
+                    layer_activations=layer_activations.to(self.o.measure_device,non_blocking=True)
+
+    def transform_activations(self,activations:list[torch.Tensor],x_transformed,transformations)->list[torch.Tensor]:
+        for i, layer_activations in enumerate(activations):
+            activations[i] = self.activations_transformer.transform(layer_activations, x_transformed,transformations)
+
+    @torch.no_grad
+    def feed_threads2(self,tm:ThreadsManager):
+        layers = self.model.activation_names()
+        rows, cols = self.dataset.len0, self.dataset.len1
+    
+        # print(f"act it starting,num workers {self.o.num_workers}:")
+        dataloader = DataLoader(self.dataset, batch_size=self.o.batch_size, shuffle=False, num_workers=self.o.num_workers,pin_memory=True)
+        i=0
+        
+        for row in range(rows):
+            
+            for k, q in tm.qs.items():
+                logger.info(f"AI: putting row {row} dataloader for  layer {k}")
+                q.put(tm.row_qs[k])
+
+            # print(f"AI: finished putting row {row} dataloaders for all layers")
+            # for k,q in qs.items():
+            #     print(f"AI: {k}→ {q.queue.qsize()} items")
+            if tm.stop:
+                    logger.info("Server thread stopping, exception detected")
+                    return
+            col = 0
+            # print("col",col)
+        for batch_i,x_transformed in tqdm.tqdm(enumerate(dataloader), disable=not self.o.verbose, leave=False):
+            sample_i_start = batch_i*self.o.batch_size
+            i_samples = [self.dataset.d1tod2(i) for i in range(sample_i_start,sample_i_start+self.o.batch_size)]
+            i_rows, i_cols = typing.cast(tuple[list[int],list[int]], zip(*i_samples))
+            # print(f"AI: {batch_i}: moving to device {self.o.model_device}... ")
+            x_transformed = x_transformed.to(self.o.model_device,non_blocking=True)
+            # print("AI: getting activations..")
+            activations = self.model.forward_activations(x_transformed)
+            # print("AI: got activations")
+            transformations = self.dataset.get_transformations(i_rows,i_cols)
+            col_to = col + x_transformed.shape[0]
+            # Move acti
+            self.move_activations_to_measure_device(activations)
+            activations = self.transform_activations(activations,x_transformed,transformations)
+            if tm.stop:
+                    logger.info("Server thread stopping, exception detected")
+                    return
+            
+                # print(f"AI: act it, shape {layer_activations.shape}")
+                # print(f"AI: putting col {col} batch for layer {i} ({layers[i]})")
+            for row, row_activations in self.split_row_activations(activations,i_rows):
+                for i,layer_activations in enumerate(row_activations):
+                    tm.row_qs[layers[i]].put(layer_activations)
+
+                # print("AI: finished row")
+            # print("AI: finished all rows")
+
+    def split_row_activations(self,activations:list[torch.Tensor],i_rows:list[int])->Generator[tuple[int,list[torch.Tensor]]]:
+        all_rows = list(range(min(i_rows),max(i_rows)+1))
+        start = 0
+        last = all_rows[-1]
+        for current_row in all_rows:
+            if current_row == last:
+                end = len(i_rows)+1
+            else:
+                end = i_rows.index(current_row+1)
+                                
+            activations_row = [a[start:end,] for a in activations]
+            start=end+1
+            yield current_row,activations_row
+
+
+    @torch.no_grad
     def feed_threads(self,tm:ThreadsManager):
-         layers = self.model.activation_names()
-         rows, cols = self.dataset.len0, self.dataset.len1
-         
-         with torch.no_grad():
-                # print(f"act it starting,num workers {self.o.num_workers}:")
-                for row in tqdm.trange(rows, disable=not self.o.verbose, leave=False):
-                    row_dataset = self.dataset.row_dataset(row)
-                    row_dataloader = DataLoader(row_dataset, batch_size=self.o.batch_size, shuffle=False, num_workers=0,pin_memory=True)
-                    
-                    for k, q in tm.qs.items():
-                        logger.info(f"AI: putting row {row} dataloader for  layer {k}")
-                        q.put(tm.row_qs[k])
+        layers = self.model.activation_names()
+        rows, cols = self.dataset.len0, self.dataset.len1
 
-                    # print(f"AI: finished putting row {row} dataloaders for all layers")
-                    # for k,q in qs.items():
-                    #     print(f"AI: {k}→ {q.queue.qsize()} items")
-                    if tm.stop:
-                            logger.info("Server thread stopping, exception detected")
-                            return
-                    col = 0
-                    # print("col",col)
+        # print(f"act it starting,num workers {self.o.num_workers}:")
+        for row in tqdm.trange(rows, disable=not self.o.verbose, leave=False):
+            row_dataset = self.dataset.row_dataset(row)
+            row_dataloader = DataLoader(row_dataset, batch_size=self.o.batch_size, shuffle=False, num_workers=0,pin_memory=True)
+            
+            for k, q in tm.qs.items():
+                logger.info(f"AI: putting row {row} dataloader for  layer {k}")
+                q.put(tm.row_qs[k])
+
+            # print(f"AI: finished putting row {row} dataloaders for all layers")
+            # for k,q in qs.items():
+            #     print(f"AI: {k}→ {q.queue.qsize()} items")
+            if tm.stop:
+                    logger.info("Server thread stopping, exception detected")
+                    return
+            col = 0
+            # print("col",col)
+            
+            for batch_i,x_transformed in enumerate(row_dataloader):
+                # print(f"AI: {batch_i}: moving to device {self.o.model_device}... ")
+                x_transformed = x_transformed.to(self.o.model_device,non_blocking=True)
+                # print("AI: getting activations..")
+                activations = self.model.forward_activations(x_transformed)
+                # print("AI: got activations")
+                
+                n_batch = x_transformed.shape[0]
+                col_to = col + n_batch
+                i_rows = [row]*n_batch
+                i_cols = list(range(col,col_to))
+
+                transformations = self.dataset.get_transformations(i_rows,i_cols)
+
+                for i, layer_activations in enumerate(activations):
+                    if self.o.model_device != self.o.measure_device:
+                        layer_activations=layer_activations.to(self.o.measure_device,non_blocking=True)
+
+                    
 
-                    for batch_i,x_transformed in enumerate(row_dataloader):
-                        # print(f"AI: {batch_i}: moving to device {self.o.model_device}... ")
-                        x_transformed = x_transformed.to(self.o.model_device,non_blocking=True)
-                        # print("AI: getting activations..")
-                        activations = self.model.forward_activations(x_transformed)
-                        # print("AI: got activations")
-                        col_to = col + x_transformed.shape[0]
-                        for i, layer_activations in enumerate(activations):
-                            if self.o.model_device != self.o.measure_device:
-                                layer_activations=layer_activations.to(self.o.measure_device,non_blocking=True)
-
-                            
-                            transformations = self.dataset.get_transformations(row, col, col_to)
-                            layer_activations = self.activations_transformer.transform(layer_activations, x_transformed,transformations)
-                            # print(f"AI: act it, shape {layer_activations.shape}")
-                            # print(f"AI: putting col {col} batch for layer {i} ({layers[i]})")
-                            tm.row_qs[layers[i]].put(layer_activations)
-                            # print(f"put {layer_activations.shape} into {layers[i]} {row_qs[layers[i]]}")
-                            # Check if there's been an exception 
-                            if tm.stop:
-                                logger.info("Server thread stopping, exception detected")
-                                return
-                        col = col_to
-                        # print("AI: finished row")
-                    # print("AI: finished all rows")
+                    layer_activations = self.activations_transformer.transform(layer_activations, x_transformed,transformations)
+                    # print(f"AI: act it, shape {layer_activations.shape}")
+                    # print(f"AI: putting col {col} batch for layer {i} ({layers[i]})")
+                    tm.row_qs[layers[i]].put(layer_activations)
+                    # print(f"put {layer_activations.shape} into {layers[i]} {row_qs[layers[i]]}")
+                    # Check if there's been an exception 
+                    if tm.stop:
+                        logger.info("Server thread stopping, exception detected")
+                        return
+                col = col_to
+                # print("AI: finished row")
+            # print("AI: finished all rows")
 
 
     def evaluate(self, m: PyTorchLayerMeasure):