From 93d11f7f6ff183811a9b2b060538da32f3ff5dbd Mon Sep 17 00:00:00 2001
From: qingyuan hou <1399717445@qq.com>
Date: Tue, 13 Aug 2024 17:58:57 -0400
Subject: [PATCH 1/9] usd SGD to optimize the energy function and replace the
 recurrent

---
 main_cifar.py |  28 +++++++-----
 prednet.py    | 122 +++++++++++++++++++++++++++++++++++++++++++++++---
 utils.py      |  92 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 225 insertions(+), 17 deletions(-)

diff --git a/main_cifar.py b/main_cifar.py
index 2adfd23..bda1690 100644
--- a/main_cifar.py
+++ b/main_cifar.py
@@ -12,14 +12,15 @@
 from utils import progress_bar
 from torch.autograd import Variable
 
-def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False):
+def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False, train=True):
     use_cuda = True # torch.cuda.is_available()
     best_acc = 0  # best test accuracy
     start_epoch = 0  # start from epoch 0 or last checkpoint epoch
-    batchsize = 128
+    batchsize = 1
     root = './'
     rep = 1
     lr = 0.01
+    train_flag = train
     
     models = {'PredNetBpD':PredNetBpD}
     modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP'
@@ -48,11 +49,11 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=
     trainset = torchvision.datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train)
     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=2)
     testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)
-    testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=False, num_workers=2)
+    testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=2)
     
     # Model
     print('==> Building model..')
-    net = models[model](num_classes=100,cls=circles,Tied=Tied)
+    net = models[model](num_classes=100,cls=circles,Tied=Tied, solver='SGD')
        
     
     # Define objective function
@@ -150,13 +151,18 @@ def decrease_learning_rate():
         for param_group in optimizer.param_groups:
             param_group['lr'] /= 10
 
-    
-    for epoch in range(start_epoch, start_epoch+300):
+    if train_flag:
+        for epoch in range(start_epoch, start_epoch+300):
+            statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+')
+            if epoch==150 or epoch==225 or epoch == 262:
+                decrease_learning_rate()       
+            train(epoch)
+            test(epoch)
+    else:
         statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+')
-        if epoch==150 or epoch==225 or epoch == 262:
-            decrease_learning_rate()       
-        train(epoch)
-        test(epoch)
+        checkpoint = torch.load(checkpointpath + 'PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
+        net.load_state_dict(checkpoint['net'])
+        test(0)
 
 if __name__ == '__main__':
-    main_cifar()
+    main_cifar(train=False)
diff --git a/prednet.py b/prednet.py
index f639c6a..8b8776f 100644
--- a/prednet.py
+++ b/prednet.py
@@ -30,12 +30,115 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
 
     def forward(self, x):
         y = self.relu(self.FFconv(x))
-        b0 = F.relu(self.b0[0]+1.0).expand_as(y)
         for _ in range(self.cls):
-            y = self.FFconv(self.relu(x - self.FBconv(y)))*b0 + y
+            y = self.FFconv(self.relu(x - self.FBconv(y))) + y
         y = y + self.bypass(x)
         return y
 
+    def la_sigmoid(self, x):
+        return 0.5+0.25*x-0.0212*x**3
+    
+class PcConvBp_SGD(nn.Module):
+    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False):
+        super().__init__()
+        self.padding = padding
+        self.stride = stride
+        self.kernel_size = kernel_size
+        self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias)
+        self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias)
+        self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
+        self.relu = nn.ReLU(inplace=True)
+        self.cls = cls
+        self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
+
+    def forward(self, x):
+        y = self.relu(self.FFconv(x))
+        y, energies = self.find_optimal_r(x, y)
+        y = y + self.bypass(x)
+        return y
+
+    def find_optimal_r(self, x, y):
+        weight = self.FBconv.weight.data
+        expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding)
+        expanded_weights = expanded_weights.to(y.device)
+        flattened_x = x.view(1, -1).clone().detach()
+
+        """
+        Implement with SGD
+        """
+        # Initialize flattened_y as a tensor with requires_grad=True
+        num_iterations = 1
+        y = F.pad(y, (self.padding, self.padding, self.padding, self.padding))
+        flattened_y = y.view(1, -1).clone().detach().requires_grad_(True)
+        energy_record = []
+        optimizer = torch.optim.SGD([flattened_y], lr=0.01)
+        for _ in range(num_iterations):
+            optimizer.zero_grad()
+            energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
+            energy.backward()
+            optimizer.step()
+            energy_record.append(energy.item())
+
+        # Reshape the flattened_y to the original shape
+        _, C_in, H_in, W_in = y.shape
+        C_out, _, K, _ = weight.shape
+        H_out = (H_in - K + 2 * self.padding) // self.stride + 1
+        W_out = (W_in - K + 2 * self.padding) // self.stride + 1
+        optimal_y = flattened_y.view(-1, C_out, H_out, W_out)
+        # Cut off the padding area
+        optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding]
+        optimal_y = optimal_y.to(y.device)
+
+        return optimal_y.detach(), energy_record
+
+    def Energy_Function(self, x, W, y):
+        energy = -2* x @ W @ y.T + y @ (W.T @ W) @ y.T
+        return energy
+    
+    def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding=0):
+        """
+        Expand the convolution weights to a matrix suitable for multiplying with a flattened input vector.
+        
+        Args:
+        - input_shape (tuple): Shape of the input (C_in, H_in, W_in)
+        - weight_tensor (torch.Tensor): Convolution weights of shape (C_out, C_in, K, K)
+        - stride (int): Stride of the convolution
+        - padding (int): Padding size
+
+        Returns:
+        - expanded_weights (torch.Tensor): The expanded weight matrix for matrix multiplication
+        """
+        C_in, H_in, W_in = input_shape
+        C_out, _, K, _ = weight_tensor.shape
+        
+        # Compute output dimensions
+        H_out = (H_in + 2 * padding - K) // stride + 1
+        W_out = (W_in + 2 * padding - K) // stride + 1
+
+        # Initialize expanded weight matrix
+        expanded_weights = torch.zeros((C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding)))
+
+        # Fill the expanded weight matrix
+        for c_out in range(C_out):
+            for h in range(H_out):
+                for w in range(W_out):
+                    # Calculate the starting index for each filter application
+                    start_h = h * stride
+                    start_w = w * stride
+                    # Flattened receptive field index
+                    filter_idx = c_out * H_out * W_out + h * W_out + w
+                    # Fill the appropriate section of the expanded weight matrix
+                    for c_in in range(C_in):
+                        for i in range(K):
+                            for j in range(K):
+                                # Calculate the input index considering padding
+                                input_idx = (c_in * (H_in + 2 * padding) + (start_h + i)) * (W_in + 2 * padding) + (start_w + j)
+                                # Assign the weight to the correct position
+                                expanded_weights[filter_idx, input_idx] = weight_tensor[c_out, c_in, i, j]
+
+        return expanded_weights
+
+
 ''' Architecture PredNetBpE '''
 class PredNetBpE(nn.Module):
     def __init__(self, num_classes=1000, cls=0, Tied = False):
@@ -77,7 +180,7 @@ def forward(self, x):
 
 ''' Architecture PredNetBpD '''
 class PredNetBpD(nn.Module):
-    def __init__(self, num_classes=10, cls=0, Tied = False):
+    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None):
         super().__init__()
         self.ics = [3,  64, 64, 128, 128, 256, 256, 512] # input chanels
         self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels
@@ -88,9 +191,18 @@ def __init__(self, num_classes=10, cls=0, Tied = False):
         # construct PC layers
         # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future.
         if Tied == False:
-            self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+            if solver is None:
+                print('No solver in used, still using convolution in recurrent layer')
+                self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+            elif solver == 'SGD':
+                print(f'Solver {solver} is in use')
+                self.PcConvs = nn.ModuleList([PcConvBp_SGD(3, 64, cls=self.cls)])
+                self.PcConvs.extend([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(1, self.nlays)])
+            else:
+                print(f'Solver {solver} not supported')
         else:
             self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+
         self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)])
         # Linear layer
         self.linear = nn.Linear(self.ocs[-1], num_classes)
@@ -111,7 +223,7 @@ def forward(self, x):
         out = self.linear(out)
         return out
 
-''' Architecture PredNetBpD '''
+''' Architecture PredNetBpC '''
 class PredNetBpC(nn.Module):
     def __init__(self, num_classes=10, cls=0, Tied = False):
         super().__init__()
diff --git a/utils.py b/utils.py
index 4c9b3f9..96c4388 100644
--- a/utils.py
+++ b/utils.py
@@ -7,7 +7,8 @@
 import sys
 import time
 import math
-
+import torch
+import numpy as np
 import torch.nn as nn
 import torch.nn.init as init
 
@@ -122,3 +123,92 @@ def format_time(seconds):
     if f == '':
         f = '0ms'
     return f
+
+class BRIMconfig:
+    def __init__(
+        self,
+        R=31e3,
+        C=49e-15,
+        temperature_start=0.5, # 2.4
+        temperature_end=0.5, #0.5
+        t_step=2.2e-11,
+        t_stop=10e-9,
+        h2v=True,
+        fixed_temperature=False
+    ) -> None:
+        self.R: float = R
+        self.C: float = C
+        if fixed_temperature:
+            self.temperature_start: float = temperature_start
+            self.temperature_end: float = temperature_start
+        else:
+            self.temperature_start: float = temperature_start
+            self.temperature_end: float = temperature_end
+        self.t_step: float = t_step
+        self.t_stop: float = t_stop
+        self.h2v: bool = h2v
+
+
+def absv(spin):
+    return torch.sign(spin)
+
+
+def make_equivalent_ising(couplings: torch.tensor,
+                          visible_bias: torch.tensor,
+                          hidden_bias: torch.tensor,
+                          visible_binary: torch.tensor,
+                          hidden_binary: torch.tensor):
+    
+    J_new = couplings / 4
+    a_new = couplings.sum(axis=1) / 4 + visible_bias / 2
+    b_new = couplings.sum(axis=0) / 4 + hidden_bias / 2
+    offset = (couplings / 4).sum() +\
+        visible_bias.sum() / 2 + hidden_bias.sum() / 2
+    visible_spins = 2 * visible_binary - 1
+    hidden_spins = 2 * hidden_binary - 1
+    
+    return J_new, a_new, b_new, offset, visible_spins, hidden_spins
+
+
+def format_check(W: torch.tensor, 
+                 visible: torch.tensor, 
+                 hidden: torch.tensor, 
+                 visible_bias: torch.tensor, 
+                 hidden_bias: torch.tensor):
+    offset = 0
+    convert_flag = 0
+    # convert to spin if input are binary
+    if (torch.all((visible == 0) | (visible == 1)) and torch.all((hidden == 0) | (hidden == 1))):
+        W, visible_bias, hidden_bias, offset, visible, hidden =\
+        make_equivalent_ising(W, visible_bias, hidden_bias, visible, hidden)
+        convert_flag = 1
+    # pass if input are spin
+    elif (torch.all((visible == -1) | (visible == 1)) and torch.all((hidden == -1) | (hidden == 1))):
+        convert_flag = 0
+    # not support other input
+    else:
+        raise ValueError("visible and hidden are not binary or spin")
+    
+    return W, visible, hidden, visible_bias, hidden_bias, offset, convert_flag
+
+
+def _sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+def calculate_logprob(W,  # (num_visible, num_hidden)
+                      visible_bias,  # (1, num_visible)
+                      hidden_bias,  # (1, num_hidden)
+                      logZ,
+                      data,  # (num_samples, num_visible)
+                      offset):
+    print(f'weight: {W.shape}, visible_bias: {visible_bias.shape}, hidden_bias: {hidden_bias.shape}')
+    hidden = (np.sum(_sigmoid((hidden_bias + np.matmul(data, W))), axis=0) / len(data))[None, :]
+    visible = (np.sum(data, axis=0) / len(data))[None, :]
+    expectation_energy = -(visible @ W @ hidden.T + visible @ visible_bias.T + hidden_bias @ hidden.T)
+    logprob = - expectation_energy - logZ
+    return logprob
+
+def normalize_and_quantize(spin, quantize):
+    normalized_spin = (spin - spin.min()) / (spin.max() - spin.min()) * 2 - 1
+    normalized_spin = ((normalized_spin + 1) / 2 * (quantize - 1)).round() / (quantize - 1) * 2 - 1
+    return normalized_spin
\ No newline at end of file

From 7afbc88ae60fbd97f9740419cb35f2f696825c49 Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Thu, 15 Aug 2024 22:31:29 -0400
Subject: [PATCH 2/9] use L2 norm instead of energy function

---
 cifar_test.py | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++
 main_cifar.py |  31 +++-----
 prednet.py    |   1 +
 3 files changed, 216 insertions(+), 19 deletions(-)
 create mode 100644 cifar_test.py

diff --git a/cifar_test.py b/cifar_test.py
new file mode 100644
index 0000000..7353879
--- /dev/null
+++ b/cifar_test.py
@@ -0,0 +1,203 @@
+'''Train CIFAR10 with PyTorch.'''
+from __future__ import print_function
+import os
+import torch.optim as optim
+import torch.backends.cudnn as cudnn
+import torchvision
+import torchvision.transforms as transforms
+import argparse
+from prednet import *
+from torch.autograd import Variable
+from torch.utils.data import Subset
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tqdm import tqdm
+
+
+class PcConvBp_SGD(nn.Module):
+    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False):
+        super().__init__()
+        self.padding = padding
+        self.stride = stride
+        self.kernel_size = kernel_size
+        self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias)
+        self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias)
+        self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
+        self.relu = nn.ReLU(inplace=True)
+        self.cls = cls
+        self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
+
+    def forward(self, x):
+        y = self.relu(self.FFconv(x))
+        y, energies = self.find_optimal_r(x, y)
+        y = y + self.bypass(x)
+        return y
+
+    def find_optimal_r(self, x, y):
+        weight = self.FBconv.weight.data
+        expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding)
+        expanded_weights = expanded_weights.to(y.device)
+        flattened_x = torch.flatten(x, start_dim=1).clone().detach()
+
+        """
+        Implement with SGD
+        """
+        # Initialize flattened_y as a tensor with requires_grad=True
+        num_iterations = 500
+        y = F.pad(y, (self.padding, self.padding, self.padding, self.padding))
+        flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True)
+        energy_record = []
+        optimizer = torch.optim.SGD([flattened_y], lr=0.001)
+        for _ in range(num_iterations):
+            optimizer.zero_grad()
+            energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
+            energy.backward()
+            optimizer.step()
+            energy_record.append(energy.item())
+
+        # Reshape the flattened_y to the original shape
+        _, C_in, H_in, W_in = y.shape
+        C_out, _, K, _ = weight.shape
+        H_out = (H_in - K + 2 * self.padding) // self.stride + 1
+        W_out = (W_in - K + 2 * self.padding) // self.stride + 1
+        optimal_y = flattened_y.view(-1, C_out, H_out, W_out)
+        # Cut off the padding area
+        optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding]
+        optimal_y = optimal_y.to(y.device)
+
+        return optimal_y.detach(), energy_record
+
+    def Energy_Function(self, x, W, y):
+        energy = -2* x @ W @ y.T + y @ (W.T @ W) @ y.T
+        return energy
+    
+    def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding=0):
+        """
+        Expand the convolution weights to a matrix suitable for multiplying with a flattened input vector.
+        
+        Args:
+        - input_shape (tuple): Shape of the input (C_in, H_in, W_in)
+        - weight_tensor (torch.Tensor): Convolution weights of shape (C_out, C_in, K, K)
+        - stride (int): Stride of the convolution
+        - padding (int): Padding size
+
+        Returns:
+        - expanded_weights (torch.Tensor): The expanded weight matrix for matrix multiplication
+        """
+        C_in, H_in, W_in = input_shape
+        C_out, _, K, _ = weight_tensor.shape
+        
+        # Compute output dimensions
+        H_out = (H_in + 2 * padding - K) // stride + 1
+        W_out = (W_in + 2 * padding - K) // stride + 1
+
+        # Initialize expanded weight matrix
+        expanded_weights = torch.zeros((C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding)))
+
+        # Fill the expanded weight matrix
+        for c_out in range(C_out):
+            for h in range(H_out):
+                for w in range(W_out):
+                    # Calculate the starting index for each filter application
+                    start_h = h * stride
+                    start_w = w * stride
+                    # Flattened receptive field index
+                    filter_idx = c_out * H_out * W_out + h * W_out + w
+                    # Fill the appropriate section of the expanded weight matrix
+                    for c_in in range(C_in):
+                        for i in range(K):
+                            for j in range(K):
+                                # Calculate the input index considering padding
+                                input_idx = (c_in * (H_in + 2 * padding) + (start_h + i)) * (W_in + 2 * padding) + (start_w + j)
+                                # Assign the weight to the correct position
+                                expanded_weights[filter_idx, input_idx] = weight_tensor[c_out, c_in, i, j]
+
+        return expanded_weights
+    
+    
+    
+''' Architecture PredNetBpD '''
+from prednet import PcConvBp
+class PredNetBpD(nn.Module):
+    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None):
+        super().__init__()
+        self.ics = [3,  64, 64, 128, 128, 256, 256, 512] # input chanels
+        self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels
+        self.maxpool = [False, False, True, False, True, False, False, False] # downsample flag
+        self.cls = cls # num of time steps
+        self.nlays = len(self.ics)
+
+        # construct PC layers
+        # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future.
+        if Tied == False:
+            if solver is None:
+                print('No solver in used, still using convolution in recurrent layer')
+                self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+            elif solver == 'SGD':
+                print(f'Solver {solver} is in use')
+                self.PcConvs = nn.ModuleList([PcConvBp_SGD(3, 64, cls=self.cls)])
+                self.PcConvs.extend([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(1, self.nlays)])
+            else:
+                print(f'Solver {solver} not supported')
+        else:
+            self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+
+        self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)])
+        # Linear layer
+        self.linear = nn.Linear(self.ocs[-1], num_classes)
+        self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.relu = nn.ReLU(inplace=True)
+        self.BNend = nn.BatchNorm2d(self.ocs[-1])
+
+    def forward(self, x):
+        for i in range(self.nlays):
+            x = self.BNs[i](x)
+            x = self.PcConvs[i](x)  # ReLU + Conv
+            if self.maxpool[i]:
+                x = self.maxpool2d(x)
+
+        # classifier                
+        out = F.avg_pool2d(self.relu(self.BNend(x)), x.size(-1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+if __name__ == '__main__':
+    batchsize = 500
+    test_ratio = 1
+    transform_test = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
+    testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)
+    num_samples = len(testset)
+    subset_size = int(test_ratio * num_samples)
+
+    # Create a subset of the test set
+    indices = list(range(num_samples))
+    subset_indices = indices[:subset_size]
+    test_subset = Subset(testset, subset_indices)
+
+    # Create a DataLoader for the subset
+    testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=True, num_workers=6)
+
+    # Create an instance of the PredNetBpD class
+    checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
+    prednet = PredNetBpD(num_classes=100, cls=5, Tied=False)
+    prednet = nn.DataParallel(prednet)
+    prednet.load_state_dict(checkpoint_weight['net'])
+    prednet = prednet.cuda()
+    prednet.eval()
+    total = 0
+    correct = 0
+    for batch_idx, (inputs, targets) in tqdm(enumerate(testloader), total=len(testloader)):
+        inputs, targets = inputs.cuda(), targets.cuda()
+        output_tensor = prednet(inputs)
+        # Get the predicted class
+        _, predicted = torch.max(output_tensor, 1)
+        total += targets.size(0)
+        correct += (predicted == targets).sum().item()
+
+    # Calculate the accuracy
+    accuracy = 100 * correct / total
+    print(f'Test Accuracy: {accuracy:.2f}%')
\ No newline at end of file
diff --git a/main_cifar.py b/main_cifar.py
index bda1690..6b67c56 100644
--- a/main_cifar.py
+++ b/main_cifar.py
@@ -12,15 +12,15 @@
 from utils import progress_bar
 from torch.autograd import Variable
 
-def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False, train=True):
+def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False):
     use_cuda = True # torch.cuda.is_available()
     best_acc = 0  # best test accuracy
     start_epoch = 0  # start from epoch 0 or last checkpoint epoch
-    batchsize = 1
+    batchsize = 256
     root = './'
     rep = 1
     lr = 0.01
-    train_flag = train
+    solver = None
     
     models = {'PredNetBpD':PredNetBpD}
     modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP'
@@ -46,15 +46,14 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=
     transform_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
-    trainset = torchvision.datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train)
+    trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train)
     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=2)
-    testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)
+    testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
     testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=2)
     
     # Model
     print('==> Building model..')
-    net = models[model](num_classes=100,cls=circles,Tied=Tied, solver='SGD')
-       
+    net = models[model](num_classes=10,cls=circles,Tied=Tied, solver=solver)
     
     # Define objective function
     criterion = nn.CrossEntropyLoss()
@@ -151,18 +150,12 @@ def decrease_learning_rate():
         for param_group in optimizer.param_groups:
             param_group['lr'] /= 10
 
-    if train_flag:
-        for epoch in range(start_epoch, start_epoch+300):
-            statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+')
-            if epoch==150 or epoch==225 or epoch == 262:
-                decrease_learning_rate()       
-            train(epoch)
-            test(epoch)
-    else:
+    for epoch in range(start_epoch, start_epoch+300):
         statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+')
-        checkpoint = torch.load(checkpointpath + 'PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
-        net.load_state_dict(checkpoint['net'])
-        test(0)
+        if epoch==150 or epoch==225 or epoch == 262:
+            decrease_learning_rate()       
+        train(epoch)
+        test(epoch)
 
 if __name__ == '__main__':
-    main_cifar(train=False)
+    main_cifar()
diff --git a/prednet.py b/prednet.py
index 8b8776f..1055b59 100644
--- a/prednet.py
+++ b/prednet.py
@@ -25,6 +25,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         self.FBconv = nn.ConvTranspose2d(outchan, inchan, kernel_size, stride, padding, bias=bias)
         self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))])
         self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
         self.cls = cls
         self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
 

From 8439930a040f281bbf84a0e2bffcbaab8956d241 Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Sat, 17 Aug 2024 20:57:57 -0400
Subject: [PATCH 3/9] fix the energy function issue with adding sqrt and x*x.t

---
 cifar_test.py | 16 ++++-----
 main_cifar.py |  6 ++--
 utils.py      | 89 +--------------------------------------------------
 3 files changed, 12 insertions(+), 99 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index 7353879..8255e08 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -38,20 +38,20 @@ def find_optimal_r(self, x, y):
         weight = self.FBconv.weight.data
         expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding)
         expanded_weights = expanded_weights.to(y.device)
-        flattened_x = torch.flatten(x, start_dim=1).clone().detach()
+        flattened_x = x.view(1, -1).clone().detach()
 
         """
         Implement with SGD
         """
         # Initialize flattened_y as a tensor with requires_grad=True
-        num_iterations = 500
+        num_iterations = 1
         y = F.pad(y, (self.padding, self.padding, self.padding, self.padding))
-        flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True)
+        flattened_y = y.view(1, -1).clone().detach().requires_grad_(True)
         energy_record = []
-        optimizer = torch.optim.SGD([flattened_y], lr=0.001)
+        optimizer = torch.optim.SGD([flattened_y], lr=0.01)
         for _ in range(num_iterations):
             optimizer.zero_grad()
-            energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
+            energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
             energy.backward()
             optimizer.step()
             energy_record.append(energy.item())
@@ -165,11 +165,11 @@ def forward(self, x):
 
 if __name__ == '__main__':
     batchsize = 500
-    test_ratio = 1
+    test_ratio = 0.1
     transform_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
-    testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)
+    testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
     num_samples = len(testset)
     subset_size = int(test_ratio * num_samples)
 
@@ -183,7 +183,7 @@ def forward(self, x):
 
     # Create an instance of the PredNetBpD class
     checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
-    prednet = PredNetBpD(num_classes=100, cls=5, Tied=False)
+    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD')
     prednet = nn.DataParallel(prednet)
     prednet.load_state_dict(checkpoint_weight['net'])
     prednet = prednet.cuda()
diff --git a/main_cifar.py b/main_cifar.py
index 6b67c56..394ecb5 100644
--- a/main_cifar.py
+++ b/main_cifar.py
@@ -16,11 +16,11 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=
     use_cuda = True # torch.cuda.is_available()
     best_acc = 0  # best test accuracy
     start_epoch = 0  # start from epoch 0 or last checkpoint epoch
-    batchsize = 256
+    batchsize = 1
     root = './'
     rep = 1
     lr = 0.01
-    solver = None
+    solver = 'SGD'
     
     models = {'PredNetBpD':PredNetBpD}
     modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP'
@@ -58,7 +58,7 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=
     # Define objective function
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.SGD(net.parameters(), momentum=0.9, lr=lr, weight_decay=weightDecay, nesterov=nesterov)
-      
+
     # Parallel computing
     if use_cuda:
         net.cuda()
diff --git a/utils.py b/utils.py
index 96c4388..42b454d 100644
--- a/utils.py
+++ b/utils.py
@@ -124,91 +124,4 @@ def format_time(seconds):
         f = '0ms'
     return f
 
-class BRIMconfig:
-    def __init__(
-        self,
-        R=31e3,
-        C=49e-15,
-        temperature_start=0.5, # 2.4
-        temperature_end=0.5, #0.5
-        t_step=2.2e-11,
-        t_stop=10e-9,
-        h2v=True,
-        fixed_temperature=False
-    ) -> None:
-        self.R: float = R
-        self.C: float = C
-        if fixed_temperature:
-            self.temperature_start: float = temperature_start
-            self.temperature_end: float = temperature_start
-        else:
-            self.temperature_start: float = temperature_start
-            self.temperature_end: float = temperature_end
-        self.t_step: float = t_step
-        self.t_stop: float = t_stop
-        self.h2v: bool = h2v
-
-
-def absv(spin):
-    return torch.sign(spin)
-
-
-def make_equivalent_ising(couplings: torch.tensor,
-                          visible_bias: torch.tensor,
-                          hidden_bias: torch.tensor,
-                          visible_binary: torch.tensor,
-                          hidden_binary: torch.tensor):
-    
-    J_new = couplings / 4
-    a_new = couplings.sum(axis=1) / 4 + visible_bias / 2
-    b_new = couplings.sum(axis=0) / 4 + hidden_bias / 2
-    offset = (couplings / 4).sum() +\
-        visible_bias.sum() / 2 + hidden_bias.sum() / 2
-    visible_spins = 2 * visible_binary - 1
-    hidden_spins = 2 * hidden_binary - 1
-    
-    return J_new, a_new, b_new, offset, visible_spins, hidden_spins
-
-
-def format_check(W: torch.tensor, 
-                 visible: torch.tensor, 
-                 hidden: torch.tensor, 
-                 visible_bias: torch.tensor, 
-                 hidden_bias: torch.tensor):
-    offset = 0
-    convert_flag = 0
-    # convert to spin if input are binary
-    if (torch.all((visible == 0) | (visible == 1)) and torch.all((hidden == 0) | (hidden == 1))):
-        W, visible_bias, hidden_bias, offset, visible, hidden =\
-        make_equivalent_ising(W, visible_bias, hidden_bias, visible, hidden)
-        convert_flag = 1
-    # pass if input are spin
-    elif (torch.all((visible == -1) | (visible == 1)) and torch.all((hidden == -1) | (hidden == 1))):
-        convert_flag = 0
-    # not support other input
-    else:
-        raise ValueError("visible and hidden are not binary or spin")
-    
-    return W, visible, hidden, visible_bias, hidden_bias, offset, convert_flag
-
-
-def _sigmoid(x):
-    return 1 / (1 + np.exp(-x))
-
-def calculate_logprob(W,  # (num_visible, num_hidden)
-                      visible_bias,  # (1, num_visible)
-                      hidden_bias,  # (1, num_hidden)
-                      logZ,
-                      data,  # (num_samples, num_visible)
-                      offset):
-    print(f'weight: {W.shape}, visible_bias: {visible_bias.shape}, hidden_bias: {hidden_bias.shape}')
-    hidden = (np.sum(_sigmoid((hidden_bias + np.matmul(data, W))), axis=0) / len(data))[None, :]
-    visible = (np.sum(data, axis=0) / len(data))[None, :]
-    expectation_energy = -(visible @ W @ hidden.T + visible @ visible_bias.T + hidden_bias @ hidden.T)
-    logprob = - expectation_energy - logZ
-    return logprob
-
-def normalize_and_quantize(spin, quantize):
-    normalized_spin = (spin - spin.min()) / (spin.max() - spin.min()) * 2 - 1
-    normalized_spin = ((normalized_spin + 1) / 2 * (quantize - 1)).round() / (quantize - 1) * 2 - 1
-    return normalized_spin
\ No newline at end of file
+

From 8a81162365822c12c77b6735ad0923427294a596 Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Sun, 18 Aug 2024 00:31:22 -0400
Subject: [PATCH 4/9] fix the problem that expanding weight takes too much time

---
 cifar_test.py | 99 ++++++++++++++++++++++++++-------------------------
 prednet.py    |  2 +-
 2 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index 8255e08..2778601 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -5,9 +5,6 @@
 import torch.backends.cudnn as cudnn
 import torchvision
 import torchvision.transforms as transforms
-import argparse
-from prednet import *
-from torch.autograd import Variable
 from torch.utils.data import Subset
 import torch
 import torch.nn as nn
@@ -16,11 +13,14 @@
 
 
 class PcConvBp_SGD(nn.Module):
-    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False):
+    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, num_iterations=5):
         super().__init__()
+        self.num_iterations = num_iterations
         self.padding = padding
         self.stride = stride
         self.kernel_size = kernel_size
+        self.C_in = inchan
+        self.C_out = outchan
         self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias)
         self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias)
         self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
@@ -28,99 +28,89 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         self.cls = cls
         self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
 
-    def forward(self, x):
+    def forward(self, x, layer_idx):
         y = self.relu(self.FFconv(x))
-        y, energies = self.find_optimal_r(x, y)
+        y, energies = self.find_optimal_r(x, y, layer_idx)
         y = y + self.bypass(x)
         return y
 
-    def find_optimal_r(self, x, y):
-        weight = self.FBconv.weight.data
-        expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding)
+    def find_optimal_r(self, x, y, layer_idx):
+        expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt')
         expanded_weights = expanded_weights.to(y.device)
-        flattened_x = x.view(1, -1).clone().detach()
+        flattened_x = torch.flatten(x, start_dim=1).clone().detach()
 
         """
         Implement with SGD
         """
         # Initialize flattened_y as a tensor with requires_grad=True
-        num_iterations = 1
         y = F.pad(y, (self.padding, self.padding, self.padding, self.padding))
-        flattened_y = y.view(1, -1).clone().detach().requires_grad_(True)
+        flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True)
         energy_record = []
         optimizer = torch.optim.SGD([flattened_y], lr=0.01)
-        for _ in range(num_iterations):
+        for _ in range(self.num_iterations):
             optimizer.zero_grad()
-            energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
+            # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
+            energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
             energy.backward()
             optimizer.step()
             energy_record.append(energy.item())
 
         # Reshape the flattened_y to the original shape
         _, C_in, H_in, W_in = y.shape
-        C_out, _, K, _ = weight.shape
-        H_out = (H_in - K + 2 * self.padding) // self.stride + 1
-        W_out = (W_in - K + 2 * self.padding) // self.stride + 1
-        optimal_y = flattened_y.view(-1, C_out, H_out, W_out)
+        H_out = (H_in - self.kernel_size + 2 * self.padding) // self.stride + 1
+        W_out = (W_in - self.kernel_size + 2 * self.padding) // self.stride + 1
+        optimal_y = flattened_y.view(-1, self.C_out, H_out, W_out)
+        del flattened_y, flattened_x, energy, expanded_weights
         # Cut off the padding area
         optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding]
         optimal_y = optimal_y.to(y.device)
-
+        
         return optimal_y.detach(), energy_record
 
     def Energy_Function(self, x, W, y):
-        energy = -2* x @ W @ y.T + y @ (W.T @ W) @ y.T
+        energy = torch.sqrt(x @ x.T -2* x @ W @ y.T + y @ (W.T @ W) @ y.T)
         return energy
     
     def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding=0):
-        """
-        Expand the convolution weights to a matrix suitable for multiplying with a flattened input vector.
-        
-        Args:
-        - input_shape (tuple): Shape of the input (C_in, H_in, W_in)
-        - weight_tensor (torch.Tensor): Convolution weights of shape (C_out, C_in, K, K)
-        - stride (int): Stride of the convolution
-        - padding (int): Padding size
-
-        Returns:
-        - expanded_weights (torch.Tensor): The expanded weight matrix for matrix multiplication
-        """
         C_in, H_in, W_in = input_shape
         C_out, _, K, _ = weight_tensor.shape
-        
+
         # Compute output dimensions
         H_out = (H_in + 2 * padding - K) // stride + 1
         W_out = (W_in + 2 * padding - K) // stride + 1
 
-        # Initialize expanded weight matrix
-        expanded_weights = torch.zeros((C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding)))
+        # List to store sparse indices and values
+        indices = []
+        values = []
 
-        # Fill the expanded weight matrix
         for c_out in range(C_out):
             for h in range(H_out):
                 for w in range(W_out):
-                    # Calculate the starting index for each filter application
                     start_h = h * stride
                     start_w = w * stride
-                    # Flattened receptive field index
                     filter_idx = c_out * H_out * W_out + h * W_out + w
-                    # Fill the appropriate section of the expanded weight matrix
                     for c_in in range(C_in):
                         for i in range(K):
                             for j in range(K):
-                                # Calculate the input index considering padding
                                 input_idx = (c_in * (H_in + 2 * padding) + (start_h + i)) * (W_in + 2 * padding) + (start_w + j)
-                                # Assign the weight to the correct position
-                                expanded_weights[filter_idx, input_idx] = weight_tensor[c_out, c_in, i, j]
+                                value = weight_tensor[c_out, c_in, i, j].item()
+                                if value != 0:
+                                    indices.append([filter_idx, input_idx])
+                                    values.append(value)
+
+        # Convert to sparse tensor
+        indices = torch.tensor(indices, dtype=torch.long).t()
+        values = torch.tensor(values, dtype=torch.float32)
+        size = (C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding))
+        expanded_weights = torch.sparse_coo_tensor(indices, values, size=size)
 
         return expanded_weights
     
     
-    
 ''' Architecture PredNetBpD '''
 from prednet import PcConvBp
 class PredNetBpD(nn.Module):
-    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None):
+    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None):
         super().__init__()
         self.ics = [3,  64, 64, 128, 128, 256, 256, 512] # input chanels
         self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels
@@ -133,11 +123,19 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None):
         if Tied == False:
             if solver is None:
                 print('No solver in used, still using convolution in recurrent layer')
+                assert layer_number is None, 'layer_number must be None if solver is None'
                 self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
             elif solver == 'SGD':
                 print(f'Solver {solver} is in use')
-                self.PcConvs = nn.ModuleList([PcConvBp_SGD(3, 64, cls=self.cls)])
-                self.PcConvs.extend([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(1, self.nlays)])
+                assert layer_number is not None, 'layer_number must be provided if solver is not None'
+                assert layer_number <= self.nlays, f'layer_number must be less than or equal to the number of layers: {self.nlays}'
+                self.PcConvs = nn.ModuleList()
+                for i in range(self.nlays):
+                    # if i <= (layer_number-1):
+                    if i == (layer_number-1):
+                        self.PcConvs.append(PcConvBp_SGD(self.ics[i], self.ocs[i], cls=self.cls, num_iterations=num_iterations))
+                    else:
+                        self.PcConvs.append(PcConvBp(self.ics[i], self.ocs[i], cls=self.cls))
             else:
                 print(f'Solver {solver} not supported')
         else:
@@ -153,7 +151,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None):
     def forward(self, x):
         for i in range(self.nlays):
             x = self.BNs[i](x)
-            x = self.PcConvs[i](x)  # ReLU + Conv
+            x = self.PcConvs[i](x, i)  # ReLU + Conv
             if self.maxpool[i]:
                 x = self.maxpool2d(x)
 
@@ -162,10 +160,12 @@ def forward(self, x):
         out = out.view(out.size(0), -1)
         out = self.linear(out)
         return out
+      
+
 
 if __name__ == '__main__':
     batchsize = 500
-    test_ratio = 0.1
+    test_ratio = 1
     transform_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
@@ -183,7 +183,7 @@ def forward(self, x):
 
     # Create an instance of the PredNetBpD class
     checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
-    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD')
+    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=1)
     prednet = nn.DataParallel(prednet)
     prednet.load_state_dict(checkpoint_weight['net'])
     prednet = prednet.cuda()
@@ -197,6 +197,7 @@ def forward(self, x):
         _, predicted = torch.max(output_tensor, 1)
         total += targets.size(0)
         correct += (predicted == targets).sum().item()
+        print(f' Temporal Accuracy: {100 * correct / total:.2f}%')
 
     # Calculate the accuracy
     accuracy = 100 * correct / total
diff --git a/prednet.py b/prednet.py
index 1055b59..8f47a21 100644
--- a/prednet.py
+++ b/prednet.py
@@ -29,7 +29,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         self.cls = cls
         self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
 
-    def forward(self, x):
+    def forward(self, x, layer_idx):
         y = self.relu(self.FFconv(x))
         for _ in range(self.cls):
             y = self.FFconv(self.relu(x - self.FBconv(y))) + y

From d78b59acb957fbd6b18da3e65deb732a7cfd86af Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Thu, 22 Aug 2024 18:16:42 -0400
Subject: [PATCH 5/9] save a backup before adding LD

---
 cifar_test.py | 99 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 66 insertions(+), 33 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index 2778601..1ec7d31 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -10,11 +10,15 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from tqdm import tqdm
+from scipy.optimize import dual_annealing
+import numpy as np
 
 
-class PcConvBp_SGD(nn.Module):
-    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, num_iterations=5):
+class PcConvBp_DS(nn.Module):
+    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, solver='SGD', num_iterations=5, train_weight=False):
         super().__init__()
+        self.solver = solver
+        self.train_weight = train_weight
         self.num_iterations = num_iterations
         self.padding = padding
         self.stride = stride
@@ -30,42 +34,72 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
 
     def forward(self, x, layer_idx):
         y = self.relu(self.FFconv(x))
-        y, energies = self.find_optimal_r(x, y, layer_idx)
+        y = self.find_optimal_r(x, y, layer_idx, solver=self.solver)
         y = y + self.bypass(x)
         return y
 
-    def find_optimal_r(self, x, y, layer_idx):
-        expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt')
-        expanded_weights = expanded_weights.to(y.device)
+    def find_optimal_r(self, x, y, layer_idx, solver):
+        if self.train_weight:
+            expanded_weights = torch.load(f'./expanded_weights_train/expanded_weights_{layer_idx}.pt')
+            expanded_weights.clone().detach().requires_grad_(True)
+        else:
+            expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt')
         flattened_x = torch.flatten(x, start_dim=1).clone().detach()
-
-        """
-        Implement with SGD
-        """
-        # Initialize flattened_y as a tensor with requires_grad=True
         y = F.pad(y, (self.padding, self.padding, self.padding, self.padding))
-        flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True)
-        energy_record = []
-        optimizer = torch.optim.SGD([flattened_y], lr=0.01)
-        for _ in range(self.num_iterations):
-            optimizer.zero_grad()
-            # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
-            energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
-            energy.backward()
-            optimizer.step()
-            energy_record.append(energy.item())
-
+        
+        if solver == 'SGD':
+            """ Implement with SGD """
+            # Initialize flattened_y as a tensor with requires_grad=True
+            expanded_weights = expanded_weights.to(y.device)
+            flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True)
+            energy = 0
+            optimizer_y = torch.optim.SGD([flattened_y], lr=0.01)
+            optimizer_w = torch.optim.SGD([expanded_weights], lr=0.01) if self.train_weight else None
+            for _ in range(self.num_iterations):
+                optimizer_y.zero_grad()
+                # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
+                energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
+                energy.backward()
+                optimizer_y.step()
+
+            if self.train_weight:
+                for _ in range(5):
+                    optimizer_w.zero_grad()
+                    energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
+                    energy.backward()
+                    optimizer_w.step()
+                    torch.save(expanded_weights, f'./expanded_weights_train/expanded_weights_{layer_idx}.pt')
+
+        elif solver == 'SA':
+            flattened_x_np = flattened_x.cpu().numpy()
+            expanded_weights_np = expanded_weights.to_dense().numpy()
+            flattened_y_np = torch.flatten(y, start_dim=1).cpu().detach()
+            flattened_y_np = flattened_y_np.numpy()
+
+            def e_f(y, x, W):
+                energy = np.linalg.norm(x - y @ W.T, ord=2)
+                return energy.item()
+
+            # Define bounds for each element in flattened_y_np
+            bounds = [(-2.5, 2.5) for _ in range(flattened_y_np.size)]
+
+            result = dual_annealing(e_f, bounds, x0=np.squeeze(flattened_y_np), args=(flattened_x_np, expanded_weights_np), maxiter=self.num_iterations, maxfun=5)
+            flattened_y = torch.tensor(result.x, dtype=torch.float32)
+        
+        else:
+            raise ValueError(f'Solver {solver} not supported')
+        
         # Reshape the flattened_y to the original shape
         _, C_in, H_in, W_in = y.shape
         H_out = (H_in - self.kernel_size + 2 * self.padding) // self.stride + 1
         W_out = (W_in - self.kernel_size + 2 * self.padding) // self.stride + 1
         optimal_y = flattened_y.view(-1, self.C_out, H_out, W_out)
-        del flattened_y, flattened_x, energy, expanded_weights
+        del flattened_y, flattened_x, expanded_weights
         # Cut off the padding area
         optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding]
         optimal_y = optimal_y.to(y.device)
         
-        return optimal_y.detach(), energy_record
+        return optimal_y.detach()
 
     def Energy_Function(self, x, W, y):
         energy = torch.sqrt(x @ x.T -2* x @ W @ y.T + y @ (W.T @ W) @ y.T)
@@ -110,7 +144,7 @@ def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding
 ''' Architecture PredNetBpD '''
 from prednet import PcConvBp
 class PredNetBpD(nn.Module):
-    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None):
+    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None, train_weight=False):
         super().__init__()
         self.ics = [3,  64, 64, 128, 128, 256, 256, 512] # input chanels
         self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels
@@ -125,7 +159,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe
                 print('No solver in used, still using convolution in recurrent layer')
                 assert layer_number is None, 'layer_number must be None if solver is None'
                 self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
-            elif solver == 'SGD':
+            elif solver in ['SGD', 'SA']:
                 print(f'Solver {solver} is in use')
                 assert layer_number is not None, 'layer_number must be provided if solver is not None'
                 assert layer_number <= self.nlays, f'layer_number must be less than or equal to the number of layers: {self.nlays}'
@@ -133,7 +167,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe
                 for i in range(self.nlays):
                     # if i <= (layer_number-1):
                     if i == (layer_number-1):
-                        self.PcConvs.append(PcConvBp_SGD(self.ics[i], self.ocs[i], cls=self.cls, num_iterations=num_iterations))
+                        self.PcConvs.append(PcConvBp_DS(self.ics[i], self.ocs[i], cls=self.cls, solver=solver, num_iterations=num_iterations, train_weight=train_weight))
                     else:
                         self.PcConvs.append(PcConvBp(self.ics[i], self.ocs[i], cls=self.cls))
             else:
@@ -160,12 +194,11 @@ def forward(self, x):
         out = out.view(out.size(0), -1)
         out = self.linear(out)
         return out
-      
 
 
 if __name__ == '__main__':
-    batchsize = 500
-    test_ratio = 1
+    batchsize = 1
+    test_ratio = 0.001
     transform_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
@@ -179,11 +212,11 @@ def forward(self, x):
     test_subset = Subset(testset, subset_indices)
 
     # Create a DataLoader for the subset
-    testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=True, num_workers=6)
+    testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6)
 
     # Create an instance of the PredNetBpD class
     checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
-    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=1)
+    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=50, train_weight=False)
     prednet = nn.DataParallel(prednet)
     prednet.load_state_dict(checkpoint_weight['net'])
     prednet = prednet.cuda()
@@ -197,7 +230,7 @@ def forward(self, x):
         _, predicted = torch.max(output_tensor, 1)
         total += targets.size(0)
         correct += (predicted == targets).sum().item()
-        print(f' Temporal Accuracy: {100 * correct / total:.2f}%')
+        print(f' Temperary Accuracy: {100 * correct / total:.2f}%')
 
     # Calculate the accuracy
     accuracy = 100 * correct / total

From 83acea0e9fcc9baef2092a8a184650f3f7f723b5 Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Fri, 23 Aug 2024 14:46:47 -0400
Subject: [PATCH 6/9] save as backup before adding noise

---
 cifar_test.py | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index 1ec7d31..04dafd2 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -85,7 +85,21 @@ def e_f(y, x, W):
 
             result = dual_annealing(e_f, bounds, x0=np.squeeze(flattened_y_np), args=(flattened_x_np, expanded_weights_np), maxiter=self.num_iterations, maxfun=5)
             flattened_y = torch.tensor(result.x, dtype=torch.float32)
-        
+            
+        elif solver == 'LD':
+            expanded_weights = expanded_weights.to(y.device)
+            c = -2 * torch.sparse.mm(flattened_x, expanded_weights)
+            def LD(W, c, r1, lr=0.001):
+                # Q is  W.T @ W
+                # c is  -2 * r0 @ W
+                x = r1.squeeze(0)
+                c = c.squeeze(0)
+                for i in range(self.num_iterations):
+                    # Perform sparse matrix multiplication instead of forming Q explicitly
+                    gradient = torch.sparse.mm(expanded_weights.T, torch.sparse.mm(expanded_weights, x.unsqueeze(1))).squeeze(1) + c
+                    x = x - lr * gradient
+                return x.view(1, -1)
+            flattened_y = LD(expanded_weights, c, torch.flatten(y, start_dim=1))
         else:
             raise ValueError(f'Solver {solver} not supported')
         
@@ -159,7 +173,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe
                 print('No solver in used, still using convolution in recurrent layer')
                 assert layer_number is None, 'layer_number must be None if solver is None'
                 self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
-            elif solver in ['SGD', 'SA']:
+            elif solver in ['SGD', 'SA', 'LD']:
                 print(f'Solver {solver} is in use')
                 assert layer_number is not None, 'layer_number must be provided if solver is not None'
                 assert layer_number <= self.nlays, f'layer_number must be less than or equal to the number of layers: {self.nlays}'
@@ -197,8 +211,10 @@ def forward(self, x):
 
 
 if __name__ == '__main__':
-    batchsize = 1
-    test_ratio = 0.001
+    batchsize = 500
+    test_ratio = 1
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    print(f'Using device: {device}')
     transform_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])
@@ -215,16 +231,16 @@ def forward(self, x):
     testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6)
 
     # Create an instance of the PredNetBpD class
-    checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7')
-    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=50, train_weight=False)
+    checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7', map_location=device)
+    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver=None, layer_number=None, num_iterations=500, train_weight=False)
     prednet = nn.DataParallel(prednet)
     prednet.load_state_dict(checkpoint_weight['net'])
-    prednet = prednet.cuda()
+    prednet = prednet.to(device)
     prednet.eval()
     total = 0
     correct = 0
     for batch_idx, (inputs, targets) in tqdm(enumerate(testloader), total=len(testloader)):
-        inputs, targets = inputs.cuda(), targets.cuda()
+        inputs, targets = inputs.to(device), targets.to(device)
         output_tensor = prednet(inputs)
         # Get the predicted class
         _, predicted = torch.max(output_tensor, 1)

From a0602ea0681b747c2b50fee6e485df39646f2c0c Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Wed, 28 Aug 2024 16:30:10 -0400
Subject: [PATCH 7/9] apply solver for PCN_5, acuracy not good

---
 cifar_test.py | 56 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index 04dafd2..3b1c691 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -15,8 +15,10 @@
 
 
 class PcConvBp_DS(nn.Module):
-    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, solver='SGD', num_iterations=5, train_weight=False):
+    def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, 
+                 solver='SGD', num_iterations=5, train_weight=False, noise_level=None):
         super().__init__()
+        self.noise_level = noise_level
         self.solver = solver
         self.train_weight = train_weight
         self.num_iterations = num_iterations
@@ -27,7 +29,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         self.C_out = outchan
         self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias)
         self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias)
-        self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
+        # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
         self.relu = nn.ReLU(inplace=True)
         self.cls = cls
         self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
@@ -43,7 +45,12 @@ def find_optimal_r(self, x, y, layer_idx, solver):
             expanded_weights = torch.load(f'./expanded_weights_train/expanded_weights_{layer_idx}.pt')
             expanded_weights.clone().detach().requires_grad_(True)
         else:
-            expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt')
+            expanded_weights = torch.load(f'./expanded_weights/PCN_5/expanded_weights_{layer_idx}.pt')
+            if self.noise_level is not None:
+                noise = self.noise_level * torch.randn(expanded_weights.shape) * expanded_weights
+                noise = noise.to_sparse()
+                expanded_weights += noise
+
         flattened_x = torch.flatten(x, start_dim=1).clone().detach()
         y = F.pad(y, (self.padding, self.padding, self.padding, self.padding))
         
@@ -61,7 +68,7 @@ def find_optimal_r(self, x, y, layer_idx, solver):
                 energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2)
                 energy.backward()
                 optimizer_y.step()
-
+                
             if self.train_weight:
                 for _ in range(5):
                     optimizer_w.zero_grad()
@@ -69,7 +76,7 @@ def find_optimal_r(self, x, y, layer_idx, solver):
                     energy.backward()
                     optimizer_w.step()
                     torch.save(expanded_weights, f'./expanded_weights_train/expanded_weights_{layer_idx}.pt')
-
+            
         elif solver == 'SA':
             flattened_x_np = flattened_x.cpu().numpy()
             expanded_weights_np = expanded_weights.to_dense().numpy()
@@ -89,17 +96,20 @@ def e_f(y, x, W):
         elif solver == 'LD':
             expanded_weights = expanded_weights.to(y.device)
             c = -2 * torch.sparse.mm(flattened_x, expanded_weights)
-            def LD(W, c, r1, lr=0.001):
+            def LD(expanded_weights, c, r1, lr=0.001):
                 # Q is  W.T @ W
                 # c is  -2 * r0 @ W
-                x = r1.squeeze(0)
-                c = c.squeeze(0)
+                x = r1.squeeze(0).cpu()
+                c = c.squeeze(0).cpu()
                 for i in range(self.num_iterations):
                     # Perform sparse matrix multiplication instead of forming Q explicitly
                     gradient = torch.sparse.mm(expanded_weights.T, torch.sparse.mm(expanded_weights, x.unsqueeze(1))).squeeze(1) + c
                     x = x - lr * gradient
+                    del gradient
                 return x.view(1, -1)
+            expanded_weights = expanded_weights.cpu()
             flattened_y = LD(expanded_weights, c, torch.flatten(y, start_dim=1))
+            del c
         else:
             raise ValueError(f'Solver {solver} not supported')
         
@@ -158,11 +168,13 @@ def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding
 ''' Architecture PredNetBpD '''
 from prednet import PcConvBp
 class PredNetBpD(nn.Module):
-    def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None, train_weight=False):
+    def __init__(self, num_classes=10, cls=0, Tied = False, 
+                 solver=None, layer_number=None, num_iterations=None, train_weight=False,
+                 noise_level=None):
         super().__init__()
-        self.ics = [3,  64, 64, 128, 128, 256, 256, 512] # input chanels
-        self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels
-        self.maxpool = [False, False, True, False, True, False, False, False] # downsample flag
+        self.ics = [ 3, 32, 64,  64, 128] # input chanels
+        self.ocs = [32, 64, 64, 128, 128] # output chanels
+        self.maxpool = [False, True, False, True, False] # downsample flag
         self.cls = cls # num of time steps
         self.nlays = len(self.ics)
 
@@ -181,14 +193,17 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe
                 for i in range(self.nlays):
                     # if i <= (layer_number-1):
                     if i == (layer_number-1):
-                        self.PcConvs.append(PcConvBp_DS(self.ics[i], self.ocs[i], cls=self.cls, solver=solver, num_iterations=num_iterations, train_weight=train_weight))
+                        self.PcConvs.append(PcConvBp_DS(self.ics[i], self.ocs[i], cls=self.cls, 
+                                                        solver=solver, num_iterations=num_iterations, train_weight=train_weight,
+                                                        noise_level=noise_level))
                     else:
                         self.PcConvs.append(PcConvBp(self.ics[i], self.ocs[i], cls=self.cls))
             else:
                 print(f'Solver {solver} not supported')
         else:
             self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
-
+        if noise_level is not None:
+            print(f'Adding noise to the solver {solver} with noise level {noise_level}')
         self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)])
         # Linear layer
         self.linear = nn.Linear(self.ocs[-1], num_classes)
@@ -231,11 +246,18 @@ def forward(self, x):
     testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6)
 
     # Create an instance of the PredNetBpD class
-    checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7', map_location=device)
-    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver=None, layer_number=None, num_iterations=500, train_weight=False)
+    checkpoint_weight = torch.load('checkpoint/PCN_5.t7', map_location=device)
+    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, 
+                         solver='SGD', layer_number=1, num_iterations=50, train_weight=False,
+                         noise_level=None)
+    prednet = prednet.to(device)
     prednet = nn.DataParallel(prednet)
+    # new_state_dict = {}
+    # for key, value in checkpoint_weight['net'].items():
+    #     new_key = key.replace('module.', '')  # Remove 'module.' prefix
+    #     new_state_dict[new_key] = value
+    # prednet.load_state_dict(new_state_dict)
     prednet.load_state_dict(checkpoint_weight['net'])
-    prednet = prednet.to(device)
     prednet.eval()
     total = 0
     correct = 0

From bb7ef1d0666f38afc201e2ca3793a1e1ff38d7e7 Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Wed, 28 Aug 2024 16:31:13 -0400
Subject: [PATCH 8/9] comment the b0 so that PCN_5 weights can be loaded

---
 cifar_test.py | 2 +-
 prednet.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index 3b1c691..f41d0a8 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -247,7 +247,7 @@ def forward(self, x):
 
     # Create an instance of the PredNetBpD class
     checkpoint_weight = torch.load('checkpoint/PCN_5.t7', map_location=device)
-    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, 
+    prednet = PredNetBpD(num_classes=10, cls=0, Tied=False, 
                          solver='SGD', layer_number=1, num_iterations=50, train_weight=False,
                          noise_level=None)
     prednet = prednet.to(device)
diff --git a/prednet.py b/prednet.py
index 8f47a21..d7c512e 100644
--- a/prednet.py
+++ b/prednet.py
@@ -23,7 +23,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         super().__init__()
         self.FFconv = nn.Conv2d(inchan, outchan, kernel_size, stride, padding, bias=bias)
         self.FBconv = nn.ConvTranspose2d(outchan, inchan, kernel_size, stride, padding, bias=bias)
-        self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))])
+        # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))])
         self.relu = nn.ReLU(inplace=True)
         self.sigmoid = nn.Sigmoid()
         self.cls = cls

From 8e01e8405ae99671e7160194d8165ba91c296493 Mon Sep 17 00:00:00 2001
From: qingyuan <1399717445@qq.com>
Date: Wed, 4 Sep 2024 17:27:03 -0400
Subject: [PATCH 9/9] version with GD, SA and LD (basically GD)

---
 cifar_test.py | 32 +++++++++++-----------
 main_cifar.py |  9 +++---
 prednet.py    | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 95 insertions(+), 22 deletions(-)

diff --git a/cifar_test.py b/cifar_test.py
index f41d0a8..a8fe0d7 100644
--- a/cifar_test.py
+++ b/cifar_test.py
@@ -29,7 +29,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         self.C_out = outchan
         self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias)
         self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias)
-        # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
+        self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))])
         self.relu = nn.ReLU(inplace=True)
         self.cls = cls
         self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False)
@@ -60,8 +60,8 @@ def find_optimal_r(self, x, y, layer_idx, solver):
             expanded_weights = expanded_weights.to(y.device)
             flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True)
             energy = 0
-            optimizer_y = torch.optim.SGD([flattened_y], lr=0.01)
-            optimizer_w = torch.optim.SGD([expanded_weights], lr=0.01) if self.train_weight else None
+            optimizer_y = torch.optim.SGD([flattened_y], lr=0.001)
+            optimizer_w = torch.optim.SGD([expanded_weights], lr=0.001) if self.train_weight else None
             for _ in range(self.num_iterations):
                 optimizer_y.zero_grad()
                 # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y)
@@ -99,15 +99,14 @@ def e_f(y, x, W):
             def LD(expanded_weights, c, r1, lr=0.001):
                 # Q is  W.T @ W
                 # c is  -2 * r0 @ W
-                x = r1.squeeze(0).cpu()
-                c = c.squeeze(0).cpu()
+                x = r1.squeeze(0)
+                c = c.squeeze(0)
                 for i in range(self.num_iterations):
                     # Perform sparse matrix multiplication instead of forming Q explicitly
                     gradient = torch.sparse.mm(expanded_weights.T, torch.sparse.mm(expanded_weights, x.unsqueeze(1))).squeeze(1) + c
                     x = x - lr * gradient
                     del gradient
                 return x.view(1, -1)
-            expanded_weights = expanded_weights.cpu()
             flattened_y = LD(expanded_weights, c, torch.flatten(y, start_dim=1))
             del c
         else:
@@ -175,6 +174,12 @@ def __init__(self, num_classes=10, cls=0, Tied = False,
         self.ics = [ 3, 32, 64,  64, 128] # input chanels
         self.ocs = [32, 64, 64, 128, 128] # output chanels
         self.maxpool = [False, True, False, True, False] # downsample flag
+        # self.ics = [ 3, 32, 64] # input chanels
+        # self.ocs = [32, 64, 64] # output chanels
+        # self.maxpool = [False, True, False] # downsample flag
+        # self.ics = [3,  64, 64, 128, 128, 256, 256, 512] # input chanels
+        # self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels
+        # self.maxpool = [False, False, True, False, True, False, False, False] # downsample flag
         self.cls = cls # num of time steps
         self.nlays = len(self.ics)
 
@@ -226,7 +231,7 @@ def forward(self, x):
 
 
 if __name__ == '__main__':
-    batchsize = 500
+    batchsize = 128
     test_ratio = 1
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     print(f'Using device: {device}')
@@ -246,19 +251,14 @@ def forward(self, x):
     testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6)
 
     # Create an instance of the PredNetBpD class
-    checkpoint_weight = torch.load('checkpoint/PCN_5.t7', map_location=device)
-    prednet = PredNetBpD(num_classes=10, cls=0, Tied=False, 
-                         solver='SGD', layer_number=1, num_iterations=50, train_weight=False,
+    checkpoint_weight = torch.load('checkpoint/PredNetBpD_5_5CLS_FalseNes_0.001WD_FalseTIED_2REP_best_ckpt.t7', map_location=device)
+    prednet = PredNetBpD(num_classes=10, cls=5, Tied=False,
+                         solver='SGD', layer_number=5, num_iterations=5, train_weight=False,
                          noise_level=None)
     prednet = prednet.to(device)
     prednet = nn.DataParallel(prednet)
-    # new_state_dict = {}
-    # for key, value in checkpoint_weight['net'].items():
-    #     new_key = key.replace('module.', '')  # Remove 'module.' prefix
-    #     new_state_dict[new_key] = value
-    # prednet.load_state_dict(new_state_dict)
     prednet.load_state_dict(checkpoint_weight['net'])
-    prednet.eval()
+    # prednet.eval()
     total = 0
     correct = 0
     for batch_idx, (inputs, targets) in tqdm(enumerate(testloader), total=len(testloader)):
diff --git a/main_cifar.py b/main_cifar.py
index 394ecb5..b233f99 100644
--- a/main_cifar.py
+++ b/main_cifar.py
@@ -12,17 +12,16 @@
 from utils import progress_bar
 from torch.autograd import Variable
 
-def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False):
+def main_cifar(model='PredNetBpD_3', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False):
     use_cuda = True # torch.cuda.is_available()
     best_acc = 0  # best test accuracy
     start_epoch = 0  # start from epoch 0 or last checkpoint epoch
-    batchsize = 1
+    batchsize = 512
     root = './'
     rep = 1
     lr = 0.01
-    solver = 'SGD'
     
-    models = {'PredNetBpD':PredNetBpD}
+    models = {'PredNetBpD_3':PredNetBpD_3}
     modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP'
     
     # clearn folder
@@ -53,7 +52,7 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=
     
     # Model
     print('==> Building model..')
-    net = models[model](num_classes=10,cls=circles,Tied=Tied, solver=solver)
+    net = models[model](num_classes=10,cls=circles,Tied=Tied)
     
     # Define objective function
     criterion = nn.CrossEntropyLoss()
diff --git a/prednet.py b/prednet.py
index d7c512e..56aa516 100644
--- a/prednet.py
+++ b/prednet.py
@@ -23,7 +23,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b
         super().__init__()
         self.FFconv = nn.Conv2d(inchan, outchan, kernel_size, stride, padding, bias=bias)
         self.FBconv = nn.ConvTranspose2d(outchan, inchan, kernel_size, stride, padding, bias=bias)
-        # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))])
+        self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))])
         self.relu = nn.ReLU(inplace=True)
         self.sigmoid = nn.Sigmoid()
         self.cls = cls
@@ -38,7 +38,81 @@ def forward(self, x, layer_idx):
 
     def la_sigmoid(self, x):
         return 0.5+0.25*x-0.0212*x**3
+
+''' Architecture PredNetBpD_5 '''
+class PredNetBpD_5(nn.Module):
+    def __init__(self, num_classes=10, cls=0, Tied = False):
+        super().__init__()
+        self.ics = [3,  32, 64, 64, 128] # input chanels
+        self.ocs = [32, 64, 64, 128, 128] # output chanels
+        self.maxpool = [False, True, False, True, False] # downsample flag
+        self.cls = cls # num of time steps
+        self.nlays = len(self.ics)
+
+        # construct PC layers
+        # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future.
+        if Tied == False:
+            self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+        else:
+            self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+        self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)])
+        # Linear layer
+        self.linear = nn.Linear(self.ocs[-1], num_classes)
+        self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.relu = nn.ReLU(inplace=True)
+        self.BNend = nn.BatchNorm2d(self.ocs[-1])
+
+    def forward(self, x):
+        for i in range(self.nlays):
+            x = self.BNs[i](x)
+            x = self.PcConvs[i](x, i)  # ReLU + Conv
+            if self.maxpool[i]:
+                x = self.maxpool2d(x)
+
+        # classifier                
+        out = F.avg_pool2d(self.relu(self.BNend(x)), x.size(-1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+''' Architecture PredNetBpD '''
+class PredNetBpD_3(nn.Module):
+    def __init__(self, num_classes=10, cls=0, Tied = False):
+        super().__init__()
+        self.ics = [3,  32, 64] # input chanels
+        self.ocs = [32, 64, 64] # output chanels
+        self.maxpool = [False, True, False] # downsample flag
+        self.cls = cls # num of time steps
+        self.nlays = len(self.ics)
+
+        # construct PC layers
+        # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future.
+        if Tied == False:
+            self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+        else:
+            self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)])
+        self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)])
+        # Linear layer
+        self.linear = nn.Linear(self.ocs[-1], num_classes)
+        self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.relu = nn.ReLU(inplace=True)
+        self.BNend = nn.BatchNorm2d(self.ocs[-1])
+
+    def forward(self, x):
+        for i in range(self.nlays):
+            x = self.BNs[i](x)
+            x = self.PcConvs[i](x, i)  # ReLU + Conv
+            if self.maxpool[i]:
+                x = self.maxpool2d(x)
+
+        # classifier                
+        out = F.avg_pool2d(self.relu(self.BNend(x)), x.size(-1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
     
+
 class PcConvBp_SGD(nn.Module):
     def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False):
         super().__init__()