From 93d11f7f6ff183811a9b2b060538da32f3ff5dbd Mon Sep 17 00:00:00 2001 From: qingyuan hou <1399717445@qq.com> Date: Tue, 13 Aug 2024 17:58:57 -0400 Subject: [PATCH 1/9] usd SGD to optimize the energy function and replace the recurrent --- main_cifar.py | 28 +++++++----- prednet.py | 122 +++++++++++++++++++++++++++++++++++++++++++++++--- utils.py | 92 ++++++++++++++++++++++++++++++++++++- 3 files changed, 225 insertions(+), 17 deletions(-) diff --git a/main_cifar.py b/main_cifar.py index 2adfd23..bda1690 100644 --- a/main_cifar.py +++ b/main_cifar.py @@ -12,14 +12,15 @@ from utils import progress_bar from torch.autograd import Variable -def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False): +def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False, train=True): use_cuda = True # torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch - batchsize = 128 + batchsize = 1 root = './' rep = 1 lr = 0.01 + train_flag = train models = {'PredNetBpD':PredNetBpD} modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP' @@ -48,11 +49,11 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay= trainset = torchvision.datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test) - testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=False, num_workers=2) + testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=2) # Model print('==> Building model..') - net = models[model](num_classes=100,cls=circles,Tied=Tied) + net = models[model](num_classes=100,cls=circles,Tied=Tied, solver='SGD') # Define objective function @@ -150,13 +151,18 @@ def decrease_learning_rate(): for param_group in optimizer.param_groups: param_group['lr'] /= 10 - - for epoch in range(start_epoch, start_epoch+300): + if train_flag: + for epoch in range(start_epoch, start_epoch+300): + statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+') + if epoch==150 or epoch==225 or epoch == 262: + decrease_learning_rate() + train(epoch) + test(epoch) + else: statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+') - if epoch==150 or epoch==225 or epoch == 262: - decrease_learning_rate() - train(epoch) - test(epoch) + checkpoint = torch.load(checkpointpath + 'PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') + net.load_state_dict(checkpoint['net']) + test(0) if __name__ == '__main__': - main_cifar() + main_cifar(train=False) diff --git a/prednet.py b/prednet.py index f639c6a..8b8776f 100644 --- a/prednet.py +++ b/prednet.py @@ -30,12 +30,115 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b def forward(self, x): y = self.relu(self.FFconv(x)) - b0 = F.relu(self.b0[0]+1.0).expand_as(y) for _ in range(self.cls): - y = self.FFconv(self.relu(x - self.FBconv(y)))*b0 + y + y = self.FFconv(self.relu(x - self.FBconv(y))) + y y = y + self.bypass(x) return y + def la_sigmoid(self, x): + return 0.5+0.25*x-0.0212*x**3 + +class PcConvBp_SGD(nn.Module): + def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False): + super().__init__() + self.padding = padding + self.stride = stride + self.kernel_size = kernel_size + self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias) + self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias) + self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) + self.relu = nn.ReLU(inplace=True) + self.cls = cls + self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) + + def forward(self, x): + y = self.relu(self.FFconv(x)) + y, energies = self.find_optimal_r(x, y) + y = y + self.bypass(x) + return y + + def find_optimal_r(self, x, y): + weight = self.FBconv.weight.data + expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding) + expanded_weights = expanded_weights.to(y.device) + flattened_x = x.view(1, -1).clone().detach() + + """ + Implement with SGD + """ + # Initialize flattened_y as a tensor with requires_grad=True + num_iterations = 1 + y = F.pad(y, (self.padding, self.padding, self.padding, self.padding)) + flattened_y = y.view(1, -1).clone().detach().requires_grad_(True) + energy_record = [] + optimizer = torch.optim.SGD([flattened_y], lr=0.01) + for _ in range(num_iterations): + optimizer.zero_grad() + energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) + energy.backward() + optimizer.step() + energy_record.append(energy.item()) + + # Reshape the flattened_y to the original shape + _, C_in, H_in, W_in = y.shape + C_out, _, K, _ = weight.shape + H_out = (H_in - K + 2 * self.padding) // self.stride + 1 + W_out = (W_in - K + 2 * self.padding) // self.stride + 1 + optimal_y = flattened_y.view(-1, C_out, H_out, W_out) + # Cut off the padding area + optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding] + optimal_y = optimal_y.to(y.device) + + return optimal_y.detach(), energy_record + + def Energy_Function(self, x, W, y): + energy = -2* x @ W @ y.T + y @ (W.T @ W) @ y.T + return energy + + def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding=0): + """ + Expand the convolution weights to a matrix suitable for multiplying with a flattened input vector. + + Args: + - input_shape (tuple): Shape of the input (C_in, H_in, W_in) + - weight_tensor (torch.Tensor): Convolution weights of shape (C_out, C_in, K, K) + - stride (int): Stride of the convolution + - padding (int): Padding size + + Returns: + - expanded_weights (torch.Tensor): The expanded weight matrix for matrix multiplication + """ + C_in, H_in, W_in = input_shape + C_out, _, K, _ = weight_tensor.shape + + # Compute output dimensions + H_out = (H_in + 2 * padding - K) // stride + 1 + W_out = (W_in + 2 * padding - K) // stride + 1 + + # Initialize expanded weight matrix + expanded_weights = torch.zeros((C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding))) + + # Fill the expanded weight matrix + for c_out in range(C_out): + for h in range(H_out): + for w in range(W_out): + # Calculate the starting index for each filter application + start_h = h * stride + start_w = w * stride + # Flattened receptive field index + filter_idx = c_out * H_out * W_out + h * W_out + w + # Fill the appropriate section of the expanded weight matrix + for c_in in range(C_in): + for i in range(K): + for j in range(K): + # Calculate the input index considering padding + input_idx = (c_in * (H_in + 2 * padding) + (start_h + i)) * (W_in + 2 * padding) + (start_w + j) + # Assign the weight to the correct position + expanded_weights[filter_idx, input_idx] = weight_tensor[c_out, c_in, i, j] + + return expanded_weights + + ''' Architecture PredNetBpE ''' class PredNetBpE(nn.Module): def __init__(self, num_classes=1000, cls=0, Tied = False): @@ -77,7 +180,7 @@ def forward(self, x): ''' Architecture PredNetBpD ''' class PredNetBpD(nn.Module): - def __init__(self, num_classes=10, cls=0, Tied = False): + def __init__(self, num_classes=10, cls=0, Tied = False, solver=None): super().__init__() self.ics = [3, 64, 64, 128, 128, 256, 256, 512] # input chanels self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels @@ -88,9 +191,18 @@ def __init__(self, num_classes=10, cls=0, Tied = False): # construct PC layers # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future. if Tied == False: - self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + if solver is None: + print('No solver in used, still using convolution in recurrent layer') + self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + elif solver == 'SGD': + print(f'Solver {solver} is in use') + self.PcConvs = nn.ModuleList([PcConvBp_SGD(3, 64, cls=self.cls)]) + self.PcConvs.extend([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(1, self.nlays)]) + else: + print(f'Solver {solver} not supported') else: self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)]) # Linear layer self.linear = nn.Linear(self.ocs[-1], num_classes) @@ -111,7 +223,7 @@ def forward(self, x): out = self.linear(out) return out -''' Architecture PredNetBpD ''' +''' Architecture PredNetBpC ''' class PredNetBpC(nn.Module): def __init__(self, num_classes=10, cls=0, Tied = False): super().__init__() diff --git a/utils.py b/utils.py index 4c9b3f9..96c4388 100644 --- a/utils.py +++ b/utils.py @@ -7,7 +7,8 @@ import sys import time import math - +import torch +import numpy as np import torch.nn as nn import torch.nn.init as init @@ -122,3 +123,92 @@ def format_time(seconds): if f == '': f = '0ms' return f + +class BRIMconfig: + def __init__( + self, + R=31e3, + C=49e-15, + temperature_start=0.5, # 2.4 + temperature_end=0.5, #0.5 + t_step=2.2e-11, + t_stop=10e-9, + h2v=True, + fixed_temperature=False + ) -> None: + self.R: float = R + self.C: float = C + if fixed_temperature: + self.temperature_start: float = temperature_start + self.temperature_end: float = temperature_start + else: + self.temperature_start: float = temperature_start + self.temperature_end: float = temperature_end + self.t_step: float = t_step + self.t_stop: float = t_stop + self.h2v: bool = h2v + + +def absv(spin): + return torch.sign(spin) + + +def make_equivalent_ising(couplings: torch.tensor, + visible_bias: torch.tensor, + hidden_bias: torch.tensor, + visible_binary: torch.tensor, + hidden_binary: torch.tensor): + + J_new = couplings / 4 + a_new = couplings.sum(axis=1) / 4 + visible_bias / 2 + b_new = couplings.sum(axis=0) / 4 + hidden_bias / 2 + offset = (couplings / 4).sum() +\ + visible_bias.sum() / 2 + hidden_bias.sum() / 2 + visible_spins = 2 * visible_binary - 1 + hidden_spins = 2 * hidden_binary - 1 + + return J_new, a_new, b_new, offset, visible_spins, hidden_spins + + +def format_check(W: torch.tensor, + visible: torch.tensor, + hidden: torch.tensor, + visible_bias: torch.tensor, + hidden_bias: torch.tensor): + offset = 0 + convert_flag = 0 + # convert to spin if input are binary + if (torch.all((visible == 0) | (visible == 1)) and torch.all((hidden == 0) | (hidden == 1))): + W, visible_bias, hidden_bias, offset, visible, hidden =\ + make_equivalent_ising(W, visible_bias, hidden_bias, visible, hidden) + convert_flag = 1 + # pass if input are spin + elif (torch.all((visible == -1) | (visible == 1)) and torch.all((hidden == -1) | (hidden == 1))): + convert_flag = 0 + # not support other input + else: + raise ValueError("visible and hidden are not binary or spin") + + return W, visible, hidden, visible_bias, hidden_bias, offset, convert_flag + + +def _sigmoid(x): + return 1 / (1 + np.exp(-x)) + +def calculate_logprob(W, # (num_visible, num_hidden) + visible_bias, # (1, num_visible) + hidden_bias, # (1, num_hidden) + logZ, + data, # (num_samples, num_visible) + offset): + print(f'weight: {W.shape}, visible_bias: {visible_bias.shape}, hidden_bias: {hidden_bias.shape}') + hidden = (np.sum(_sigmoid((hidden_bias + np.matmul(data, W))), axis=0) / len(data))[None, :] + visible = (np.sum(data, axis=0) / len(data))[None, :] + expectation_energy = -(visible @ W @ hidden.T + visible @ visible_bias.T + hidden_bias @ hidden.T) + logprob = - expectation_energy - logZ + return logprob + +def normalize_and_quantize(spin, quantize): + normalized_spin = (spin - spin.min()) / (spin.max() - spin.min()) * 2 - 1 + normalized_spin = ((normalized_spin + 1) / 2 * (quantize - 1)).round() / (quantize - 1) * 2 - 1 + return normalized_spin \ No newline at end of file From 7afbc88ae60fbd97f9740419cb35f2f696825c49 Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Thu, 15 Aug 2024 22:31:29 -0400 Subject: [PATCH 2/9] use L2 norm instead of energy function --- cifar_test.py | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++ main_cifar.py | 31 +++----- prednet.py | 1 + 3 files changed, 216 insertions(+), 19 deletions(-) create mode 100644 cifar_test.py diff --git a/cifar_test.py b/cifar_test.py new file mode 100644 index 0000000..7353879 --- /dev/null +++ b/cifar_test.py @@ -0,0 +1,203 @@ +'''Train CIFAR10 with PyTorch.''' +from __future__ import print_function +import os +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision +import torchvision.transforms as transforms +import argparse +from prednet import * +from torch.autograd import Variable +from torch.utils.data import Subset +import torch +import torch.nn as nn +import torch.nn.functional as F +from tqdm import tqdm + + +class PcConvBp_SGD(nn.Module): + def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False): + super().__init__() + self.padding = padding + self.stride = stride + self.kernel_size = kernel_size + self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias) + self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias) + self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) + self.relu = nn.ReLU(inplace=True) + self.cls = cls + self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) + + def forward(self, x): + y = self.relu(self.FFconv(x)) + y, energies = self.find_optimal_r(x, y) + y = y + self.bypass(x) + return y + + def find_optimal_r(self, x, y): + weight = self.FBconv.weight.data + expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding) + expanded_weights = expanded_weights.to(y.device) + flattened_x = torch.flatten(x, start_dim=1).clone().detach() + + """ + Implement with SGD + """ + # Initialize flattened_y as a tensor with requires_grad=True + num_iterations = 500 + y = F.pad(y, (self.padding, self.padding, self.padding, self.padding)) + flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True) + energy_record = [] + optimizer = torch.optim.SGD([flattened_y], lr=0.001) + for _ in range(num_iterations): + optimizer.zero_grad() + energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) + energy.backward() + optimizer.step() + energy_record.append(energy.item()) + + # Reshape the flattened_y to the original shape + _, C_in, H_in, W_in = y.shape + C_out, _, K, _ = weight.shape + H_out = (H_in - K + 2 * self.padding) // self.stride + 1 + W_out = (W_in - K + 2 * self.padding) // self.stride + 1 + optimal_y = flattened_y.view(-1, C_out, H_out, W_out) + # Cut off the padding area + optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding] + optimal_y = optimal_y.to(y.device) + + return optimal_y.detach(), energy_record + + def Energy_Function(self, x, W, y): + energy = -2* x @ W @ y.T + y @ (W.T @ W) @ y.T + return energy + + def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding=0): + """ + Expand the convolution weights to a matrix suitable for multiplying with a flattened input vector. + + Args: + - input_shape (tuple): Shape of the input (C_in, H_in, W_in) + - weight_tensor (torch.Tensor): Convolution weights of shape (C_out, C_in, K, K) + - stride (int): Stride of the convolution + - padding (int): Padding size + + Returns: + - expanded_weights (torch.Tensor): The expanded weight matrix for matrix multiplication + """ + C_in, H_in, W_in = input_shape + C_out, _, K, _ = weight_tensor.shape + + # Compute output dimensions + H_out = (H_in + 2 * padding - K) // stride + 1 + W_out = (W_in + 2 * padding - K) // stride + 1 + + # Initialize expanded weight matrix + expanded_weights = torch.zeros((C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding))) + + # Fill the expanded weight matrix + for c_out in range(C_out): + for h in range(H_out): + for w in range(W_out): + # Calculate the starting index for each filter application + start_h = h * stride + start_w = w * stride + # Flattened receptive field index + filter_idx = c_out * H_out * W_out + h * W_out + w + # Fill the appropriate section of the expanded weight matrix + for c_in in range(C_in): + for i in range(K): + for j in range(K): + # Calculate the input index considering padding + input_idx = (c_in * (H_in + 2 * padding) + (start_h + i)) * (W_in + 2 * padding) + (start_w + j) + # Assign the weight to the correct position + expanded_weights[filter_idx, input_idx] = weight_tensor[c_out, c_in, i, j] + + return expanded_weights + + + +''' Architecture PredNetBpD ''' +from prednet import PcConvBp +class PredNetBpD(nn.Module): + def __init__(self, num_classes=10, cls=0, Tied = False, solver=None): + super().__init__() + self.ics = [3, 64, 64, 128, 128, 256, 256, 512] # input chanels + self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels + self.maxpool = [False, False, True, False, True, False, False, False] # downsample flag + self.cls = cls # num of time steps + self.nlays = len(self.ics) + + # construct PC layers + # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future. + if Tied == False: + if solver is None: + print('No solver in used, still using convolution in recurrent layer') + self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + elif solver == 'SGD': + print(f'Solver {solver} is in use') + self.PcConvs = nn.ModuleList([PcConvBp_SGD(3, 64, cls=self.cls)]) + self.PcConvs.extend([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(1, self.nlays)]) + else: + print(f'Solver {solver} not supported') + else: + self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + + self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)]) + # Linear layer + self.linear = nn.Linear(self.ocs[-1], num_classes) + self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.relu = nn.ReLU(inplace=True) + self.BNend = nn.BatchNorm2d(self.ocs[-1]) + + def forward(self, x): + for i in range(self.nlays): + x = self.BNs[i](x) + x = self.PcConvs[i](x) # ReLU + Conv + if self.maxpool[i]: + x = self.maxpool2d(x) + + # classifier + out = F.avg_pool2d(self.relu(self.BNend(x)), x.size(-1)) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + +if __name__ == '__main__': + batchsize = 500 + test_ratio = 1 + transform_test = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) + testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test) + num_samples = len(testset) + subset_size = int(test_ratio * num_samples) + + # Create a subset of the test set + indices = list(range(num_samples)) + subset_indices = indices[:subset_size] + test_subset = Subset(testset, subset_indices) + + # Create a DataLoader for the subset + testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=True, num_workers=6) + + # Create an instance of the PredNetBpD class + checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') + prednet = PredNetBpD(num_classes=100, cls=5, Tied=False) + prednet = nn.DataParallel(prednet) + prednet.load_state_dict(checkpoint_weight['net']) + prednet = prednet.cuda() + prednet.eval() + total = 0 + correct = 0 + for batch_idx, (inputs, targets) in tqdm(enumerate(testloader), total=len(testloader)): + inputs, targets = inputs.cuda(), targets.cuda() + output_tensor = prednet(inputs) + # Get the predicted class + _, predicted = torch.max(output_tensor, 1) + total += targets.size(0) + correct += (predicted == targets).sum().item() + + # Calculate the accuracy + accuracy = 100 * correct / total + print(f'Test Accuracy: {accuracy:.2f}%') \ No newline at end of file diff --git a/main_cifar.py b/main_cifar.py index bda1690..6b67c56 100644 --- a/main_cifar.py +++ b/main_cifar.py @@ -12,15 +12,15 @@ from utils import progress_bar from torch.autograd import Variable -def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False, train=True): +def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False): use_cuda = True # torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch - batchsize = 1 + batchsize = 256 root = './' rep = 1 lr = 0.01 - train_flag = train + solver = None models = {'PredNetBpD':PredNetBpD} modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP' @@ -46,15 +46,14 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay= transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) - trainset = torchvision.datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train) + trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=2) - testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test) + testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=2) # Model print('==> Building model..') - net = models[model](num_classes=100,cls=circles,Tied=Tied, solver='SGD') - + net = models[model](num_classes=10,cls=circles,Tied=Tied, solver=solver) # Define objective function criterion = nn.CrossEntropyLoss() @@ -151,18 +150,12 @@ def decrease_learning_rate(): for param_group in optimizer.param_groups: param_group['lr'] /= 10 - if train_flag: - for epoch in range(start_epoch, start_epoch+300): - statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+') - if epoch==150 or epoch==225 or epoch == 262: - decrease_learning_rate() - train(epoch) - test(epoch) - else: + for epoch in range(start_epoch, start_epoch+300): statfile = open(logpath+'training_stats_'+modelname+'.txt', 'a+') - checkpoint = torch.load(checkpointpath + 'PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') - net.load_state_dict(checkpoint['net']) - test(0) + if epoch==150 or epoch==225 or epoch == 262: + decrease_learning_rate() + train(epoch) + test(epoch) if __name__ == '__main__': - main_cifar(train=False) + main_cifar() diff --git a/prednet.py b/prednet.py index 8b8776f..1055b59 100644 --- a/prednet.py +++ b/prednet.py @@ -25,6 +25,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b self.FBconv = nn.ConvTranspose2d(outchan, inchan, kernel_size, stride, padding, bias=bias) self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))]) self.relu = nn.ReLU(inplace=True) + self.sigmoid = nn.Sigmoid() self.cls = cls self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) From 8439930a040f281bbf84a0e2bffcbaab8956d241 Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Sat, 17 Aug 2024 20:57:57 -0400 Subject: [PATCH 3/9] fix the energy function issue with adding sqrt and x*x.t --- cifar_test.py | 16 ++++----- main_cifar.py | 6 ++-- utils.py | 89 +-------------------------------------------------- 3 files changed, 12 insertions(+), 99 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index 7353879..8255e08 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -38,20 +38,20 @@ def find_optimal_r(self, x, y): weight = self.FBconv.weight.data expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding) expanded_weights = expanded_weights.to(y.device) - flattened_x = torch.flatten(x, start_dim=1).clone().detach() + flattened_x = x.view(1, -1).clone().detach() """ Implement with SGD """ # Initialize flattened_y as a tensor with requires_grad=True - num_iterations = 500 + num_iterations = 1 y = F.pad(y, (self.padding, self.padding, self.padding, self.padding)) - flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True) + flattened_y = y.view(1, -1).clone().detach().requires_grad_(True) energy_record = [] - optimizer = torch.optim.SGD([flattened_y], lr=0.001) + optimizer = torch.optim.SGD([flattened_y], lr=0.01) for _ in range(num_iterations): optimizer.zero_grad() - energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) + energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) energy.backward() optimizer.step() energy_record.append(energy.item()) @@ -165,11 +165,11 @@ def forward(self, x): if __name__ == '__main__': batchsize = 500 - test_ratio = 1 + test_ratio = 0.1 transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) - testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test) + testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test) num_samples = len(testset) subset_size = int(test_ratio * num_samples) @@ -183,7 +183,7 @@ def forward(self, x): # Create an instance of the PredNetBpD class checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') - prednet = PredNetBpD(num_classes=100, cls=5, Tied=False) + prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD') prednet = nn.DataParallel(prednet) prednet.load_state_dict(checkpoint_weight['net']) prednet = prednet.cuda() diff --git a/main_cifar.py b/main_cifar.py index 6b67c56..394ecb5 100644 --- a/main_cifar.py +++ b/main_cifar.py @@ -16,11 +16,11 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay= use_cuda = True # torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch - batchsize = 256 + batchsize = 1 root = './' rep = 1 lr = 0.01 - solver = None + solver = 'SGD' models = {'PredNetBpD':PredNetBpD} modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP' @@ -58,7 +58,7 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay= # Define objective function criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), momentum=0.9, lr=lr, weight_decay=weightDecay, nesterov=nesterov) - + # Parallel computing if use_cuda: net.cuda() diff --git a/utils.py b/utils.py index 96c4388..42b454d 100644 --- a/utils.py +++ b/utils.py @@ -124,91 +124,4 @@ def format_time(seconds): f = '0ms' return f -class BRIMconfig: - def __init__( - self, - R=31e3, - C=49e-15, - temperature_start=0.5, # 2.4 - temperature_end=0.5, #0.5 - t_step=2.2e-11, - t_stop=10e-9, - h2v=True, - fixed_temperature=False - ) -> None: - self.R: float = R - self.C: float = C - if fixed_temperature: - self.temperature_start: float = temperature_start - self.temperature_end: float = temperature_start - else: - self.temperature_start: float = temperature_start - self.temperature_end: float = temperature_end - self.t_step: float = t_step - self.t_stop: float = t_stop - self.h2v: bool = h2v - - -def absv(spin): - return torch.sign(spin) - - -def make_equivalent_ising(couplings: torch.tensor, - visible_bias: torch.tensor, - hidden_bias: torch.tensor, - visible_binary: torch.tensor, - hidden_binary: torch.tensor): - - J_new = couplings / 4 - a_new = couplings.sum(axis=1) / 4 + visible_bias / 2 - b_new = couplings.sum(axis=0) / 4 + hidden_bias / 2 - offset = (couplings / 4).sum() +\ - visible_bias.sum() / 2 + hidden_bias.sum() / 2 - visible_spins = 2 * visible_binary - 1 - hidden_spins = 2 * hidden_binary - 1 - - return J_new, a_new, b_new, offset, visible_spins, hidden_spins - - -def format_check(W: torch.tensor, - visible: torch.tensor, - hidden: torch.tensor, - visible_bias: torch.tensor, - hidden_bias: torch.tensor): - offset = 0 - convert_flag = 0 - # convert to spin if input are binary - if (torch.all((visible == 0) | (visible == 1)) and torch.all((hidden == 0) | (hidden == 1))): - W, visible_bias, hidden_bias, offset, visible, hidden =\ - make_equivalent_ising(W, visible_bias, hidden_bias, visible, hidden) - convert_flag = 1 - # pass if input are spin - elif (torch.all((visible == -1) | (visible == 1)) and torch.all((hidden == -1) | (hidden == 1))): - convert_flag = 0 - # not support other input - else: - raise ValueError("visible and hidden are not binary or spin") - - return W, visible, hidden, visible_bias, hidden_bias, offset, convert_flag - - -def _sigmoid(x): - return 1 / (1 + np.exp(-x)) - -def calculate_logprob(W, # (num_visible, num_hidden) - visible_bias, # (1, num_visible) - hidden_bias, # (1, num_hidden) - logZ, - data, # (num_samples, num_visible) - offset): - print(f'weight: {W.shape}, visible_bias: {visible_bias.shape}, hidden_bias: {hidden_bias.shape}') - hidden = (np.sum(_sigmoid((hidden_bias + np.matmul(data, W))), axis=0) / len(data))[None, :] - visible = (np.sum(data, axis=0) / len(data))[None, :] - expectation_energy = -(visible @ W @ hidden.T + visible @ visible_bias.T + hidden_bias @ hidden.T) - logprob = - expectation_energy - logZ - return logprob - -def normalize_and_quantize(spin, quantize): - normalized_spin = (spin - spin.min()) / (spin.max() - spin.min()) * 2 - 1 - normalized_spin = ((normalized_spin + 1) / 2 * (quantize - 1)).round() / (quantize - 1) * 2 - 1 - return normalized_spin \ No newline at end of file + From 8a81162365822c12c77b6735ad0923427294a596 Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Sun, 18 Aug 2024 00:31:22 -0400 Subject: [PATCH 4/9] fix the problem that expanding weight takes too much time --- cifar_test.py | 99 ++++++++++++++++++++++++++------------------------- prednet.py | 2 +- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index 8255e08..2778601 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -5,9 +5,6 @@ import torch.backends.cudnn as cudnn import torchvision import torchvision.transforms as transforms -import argparse -from prednet import * -from torch.autograd import Variable from torch.utils.data import Subset import torch import torch.nn as nn @@ -16,11 +13,14 @@ class PcConvBp_SGD(nn.Module): - def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False): + def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, num_iterations=5): super().__init__() + self.num_iterations = num_iterations self.padding = padding self.stride = stride self.kernel_size = kernel_size + self.C_in = inchan + self.C_out = outchan self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias) self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias) self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) @@ -28,99 +28,89 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b self.cls = cls self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) - def forward(self, x): + def forward(self, x, layer_idx): y = self.relu(self.FFconv(x)) - y, energies = self.find_optimal_r(x, y) + y, energies = self.find_optimal_r(x, y, layer_idx) y = y + self.bypass(x) return y - def find_optimal_r(self, x, y): - weight = self.FBconv.weight.data - expanded_weights = self.expand_weights_to_matrix(y.shape[1:], weight.permute(1, 0, 2, 3), stride=self.stride, padding=self.padding) + def find_optimal_r(self, x, y, layer_idx): + expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt') expanded_weights = expanded_weights.to(y.device) - flattened_x = x.view(1, -1).clone().detach() + flattened_x = torch.flatten(x, start_dim=1).clone().detach() """ Implement with SGD """ # Initialize flattened_y as a tensor with requires_grad=True - num_iterations = 1 y = F.pad(y, (self.padding, self.padding, self.padding, self.padding)) - flattened_y = y.view(1, -1).clone().detach().requires_grad_(True) + flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True) energy_record = [] optimizer = torch.optim.SGD([flattened_y], lr=0.01) - for _ in range(num_iterations): + for _ in range(self.num_iterations): optimizer.zero_grad() - energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) + # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) + energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) energy.backward() optimizer.step() energy_record.append(energy.item()) # Reshape the flattened_y to the original shape _, C_in, H_in, W_in = y.shape - C_out, _, K, _ = weight.shape - H_out = (H_in - K + 2 * self.padding) // self.stride + 1 - W_out = (W_in - K + 2 * self.padding) // self.stride + 1 - optimal_y = flattened_y.view(-1, C_out, H_out, W_out) + H_out = (H_in - self.kernel_size + 2 * self.padding) // self.stride + 1 + W_out = (W_in - self.kernel_size + 2 * self.padding) // self.stride + 1 + optimal_y = flattened_y.view(-1, self.C_out, H_out, W_out) + del flattened_y, flattened_x, energy, expanded_weights # Cut off the padding area optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding] optimal_y = optimal_y.to(y.device) - + return optimal_y.detach(), energy_record def Energy_Function(self, x, W, y): - energy = -2* x @ W @ y.T + y @ (W.T @ W) @ y.T + energy = torch.sqrt(x @ x.T -2* x @ W @ y.T + y @ (W.T @ W) @ y.T) return energy def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding=0): - """ - Expand the convolution weights to a matrix suitable for multiplying with a flattened input vector. - - Args: - - input_shape (tuple): Shape of the input (C_in, H_in, W_in) - - weight_tensor (torch.Tensor): Convolution weights of shape (C_out, C_in, K, K) - - stride (int): Stride of the convolution - - padding (int): Padding size - - Returns: - - expanded_weights (torch.Tensor): The expanded weight matrix for matrix multiplication - """ C_in, H_in, W_in = input_shape C_out, _, K, _ = weight_tensor.shape - + # Compute output dimensions H_out = (H_in + 2 * padding - K) // stride + 1 W_out = (W_in + 2 * padding - K) // stride + 1 - # Initialize expanded weight matrix - expanded_weights = torch.zeros((C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding))) + # List to store sparse indices and values + indices = [] + values = [] - # Fill the expanded weight matrix for c_out in range(C_out): for h in range(H_out): for w in range(W_out): - # Calculate the starting index for each filter application start_h = h * stride start_w = w * stride - # Flattened receptive field index filter_idx = c_out * H_out * W_out + h * W_out + w - # Fill the appropriate section of the expanded weight matrix for c_in in range(C_in): for i in range(K): for j in range(K): - # Calculate the input index considering padding input_idx = (c_in * (H_in + 2 * padding) + (start_h + i)) * (W_in + 2 * padding) + (start_w + j) - # Assign the weight to the correct position - expanded_weights[filter_idx, input_idx] = weight_tensor[c_out, c_in, i, j] + value = weight_tensor[c_out, c_in, i, j].item() + if value != 0: + indices.append([filter_idx, input_idx]) + values.append(value) + + # Convert to sparse tensor + indices = torch.tensor(indices, dtype=torch.long).t() + values = torch.tensor(values, dtype=torch.float32) + size = (C_out * H_out * W_out, C_in * (H_in + 2 * padding) * (W_in + 2 * padding)) + expanded_weights = torch.sparse_coo_tensor(indices, values, size=size) return expanded_weights - ''' Architecture PredNetBpD ''' from prednet import PcConvBp class PredNetBpD(nn.Module): - def __init__(self, num_classes=10, cls=0, Tied = False, solver=None): + def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None): super().__init__() self.ics = [3, 64, 64, 128, 128, 256, 256, 512] # input chanels self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels @@ -133,11 +123,19 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None): if Tied == False: if solver is None: print('No solver in used, still using convolution in recurrent layer') + assert layer_number is None, 'layer_number must be None if solver is None' self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) elif solver == 'SGD': print(f'Solver {solver} is in use') - self.PcConvs = nn.ModuleList([PcConvBp_SGD(3, 64, cls=self.cls)]) - self.PcConvs.extend([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(1, self.nlays)]) + assert layer_number is not None, 'layer_number must be provided if solver is not None' + assert layer_number <= self.nlays, f'layer_number must be less than or equal to the number of layers: {self.nlays}' + self.PcConvs = nn.ModuleList() + for i in range(self.nlays): + # if i <= (layer_number-1): + if i == (layer_number-1): + self.PcConvs.append(PcConvBp_SGD(self.ics[i], self.ocs[i], cls=self.cls, num_iterations=num_iterations)) + else: + self.PcConvs.append(PcConvBp(self.ics[i], self.ocs[i], cls=self.cls)) else: print(f'Solver {solver} not supported') else: @@ -153,7 +151,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None): def forward(self, x): for i in range(self.nlays): x = self.BNs[i](x) - x = self.PcConvs[i](x) # ReLU + Conv + x = self.PcConvs[i](x, i) # ReLU + Conv if self.maxpool[i]: x = self.maxpool2d(x) @@ -162,10 +160,12 @@ def forward(self, x): out = out.view(out.size(0), -1) out = self.linear(out) return out + + if __name__ == '__main__': batchsize = 500 - test_ratio = 0.1 + test_ratio = 1 transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) @@ -183,7 +183,7 @@ def forward(self, x): # Create an instance of the PredNetBpD class checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') - prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD') + prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=1) prednet = nn.DataParallel(prednet) prednet.load_state_dict(checkpoint_weight['net']) prednet = prednet.cuda() @@ -197,6 +197,7 @@ def forward(self, x): _, predicted = torch.max(output_tensor, 1) total += targets.size(0) correct += (predicted == targets).sum().item() + print(f' Temporal Accuracy: {100 * correct / total:.2f}%') # Calculate the accuracy accuracy = 100 * correct / total diff --git a/prednet.py b/prednet.py index 1055b59..8f47a21 100644 --- a/prednet.py +++ b/prednet.py @@ -29,7 +29,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b self.cls = cls self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) - def forward(self, x): + def forward(self, x, layer_idx): y = self.relu(self.FFconv(x)) for _ in range(self.cls): y = self.FFconv(self.relu(x - self.FBconv(y))) + y From d78b59acb957fbd6b18da3e65deb732a7cfd86af Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Thu, 22 Aug 2024 18:16:42 -0400 Subject: [PATCH 5/9] save a backup before adding LD --- cifar_test.py | 99 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index 2778601..1ec7d31 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -10,11 +10,15 @@ import torch.nn as nn import torch.nn.functional as F from tqdm import tqdm +from scipy.optimize import dual_annealing +import numpy as np -class PcConvBp_SGD(nn.Module): - def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, num_iterations=5): +class PcConvBp_DS(nn.Module): + def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, solver='SGD', num_iterations=5, train_weight=False): super().__init__() + self.solver = solver + self.train_weight = train_weight self.num_iterations = num_iterations self.padding = padding self.stride = stride @@ -30,42 +34,72 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b def forward(self, x, layer_idx): y = self.relu(self.FFconv(x)) - y, energies = self.find_optimal_r(x, y, layer_idx) + y = self.find_optimal_r(x, y, layer_idx, solver=self.solver) y = y + self.bypass(x) return y - def find_optimal_r(self, x, y, layer_idx): - expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt') - expanded_weights = expanded_weights.to(y.device) + def find_optimal_r(self, x, y, layer_idx, solver): + if self.train_weight: + expanded_weights = torch.load(f'./expanded_weights_train/expanded_weights_{layer_idx}.pt') + expanded_weights.clone().detach().requires_grad_(True) + else: + expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt') flattened_x = torch.flatten(x, start_dim=1).clone().detach() - - """ - Implement with SGD - """ - # Initialize flattened_y as a tensor with requires_grad=True y = F.pad(y, (self.padding, self.padding, self.padding, self.padding)) - flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True) - energy_record = [] - optimizer = torch.optim.SGD([flattened_y], lr=0.01) - for _ in range(self.num_iterations): - optimizer.zero_grad() - # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) - energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) - energy.backward() - optimizer.step() - energy_record.append(energy.item()) - + + if solver == 'SGD': + """ Implement with SGD """ + # Initialize flattened_y as a tensor with requires_grad=True + expanded_weights = expanded_weights.to(y.device) + flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True) + energy = 0 + optimizer_y = torch.optim.SGD([flattened_y], lr=0.01) + optimizer_w = torch.optim.SGD([expanded_weights], lr=0.01) if self.train_weight else None + for _ in range(self.num_iterations): + optimizer_y.zero_grad() + # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) + energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) + energy.backward() + optimizer_y.step() + + if self.train_weight: + for _ in range(5): + optimizer_w.zero_grad() + energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) + energy.backward() + optimizer_w.step() + torch.save(expanded_weights, f'./expanded_weights_train/expanded_weights_{layer_idx}.pt') + + elif solver == 'SA': + flattened_x_np = flattened_x.cpu().numpy() + expanded_weights_np = expanded_weights.to_dense().numpy() + flattened_y_np = torch.flatten(y, start_dim=1).cpu().detach() + flattened_y_np = flattened_y_np.numpy() + + def e_f(y, x, W): + energy = np.linalg.norm(x - y @ W.T, ord=2) + return energy.item() + + # Define bounds for each element in flattened_y_np + bounds = [(-2.5, 2.5) for _ in range(flattened_y_np.size)] + + result = dual_annealing(e_f, bounds, x0=np.squeeze(flattened_y_np), args=(flattened_x_np, expanded_weights_np), maxiter=self.num_iterations, maxfun=5) + flattened_y = torch.tensor(result.x, dtype=torch.float32) + + else: + raise ValueError(f'Solver {solver} not supported') + # Reshape the flattened_y to the original shape _, C_in, H_in, W_in = y.shape H_out = (H_in - self.kernel_size + 2 * self.padding) // self.stride + 1 W_out = (W_in - self.kernel_size + 2 * self.padding) // self.stride + 1 optimal_y = flattened_y.view(-1, self.C_out, H_out, W_out) - del flattened_y, flattened_x, energy, expanded_weights + del flattened_y, flattened_x, expanded_weights # Cut off the padding area optimal_y = optimal_y[:, :, self.padding:-self.padding, self.padding:-self.padding] optimal_y = optimal_y.to(y.device) - return optimal_y.detach(), energy_record + return optimal_y.detach() def Energy_Function(self, x, W, y): energy = torch.sqrt(x @ x.T -2* x @ W @ y.T + y @ (W.T @ W) @ y.T) @@ -110,7 +144,7 @@ def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding ''' Architecture PredNetBpD ''' from prednet import PcConvBp class PredNetBpD(nn.Module): - def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None): + def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None, train_weight=False): super().__init__() self.ics = [3, 64, 64, 128, 128, 256, 256, 512] # input chanels self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels @@ -125,7 +159,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe print('No solver in used, still using convolution in recurrent layer') assert layer_number is None, 'layer_number must be None if solver is None' self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) - elif solver == 'SGD': + elif solver in ['SGD', 'SA']: print(f'Solver {solver} is in use') assert layer_number is not None, 'layer_number must be provided if solver is not None' assert layer_number <= self.nlays, f'layer_number must be less than or equal to the number of layers: {self.nlays}' @@ -133,7 +167,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe for i in range(self.nlays): # if i <= (layer_number-1): if i == (layer_number-1): - self.PcConvs.append(PcConvBp_SGD(self.ics[i], self.ocs[i], cls=self.cls, num_iterations=num_iterations)) + self.PcConvs.append(PcConvBp_DS(self.ics[i], self.ocs[i], cls=self.cls, solver=solver, num_iterations=num_iterations, train_weight=train_weight)) else: self.PcConvs.append(PcConvBp(self.ics[i], self.ocs[i], cls=self.cls)) else: @@ -160,12 +194,11 @@ def forward(self, x): out = out.view(out.size(0), -1) out = self.linear(out) return out - if __name__ == '__main__': - batchsize = 500 - test_ratio = 1 + batchsize = 1 + test_ratio = 0.001 transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) @@ -179,11 +212,11 @@ def forward(self, x): test_subset = Subset(testset, subset_indices) # Create a DataLoader for the subset - testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=True, num_workers=6) + testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6) # Create an instance of the PredNetBpD class checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') - prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=1) + prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=50, train_weight=False) prednet = nn.DataParallel(prednet) prednet.load_state_dict(checkpoint_weight['net']) prednet = prednet.cuda() @@ -197,7 +230,7 @@ def forward(self, x): _, predicted = torch.max(output_tensor, 1) total += targets.size(0) correct += (predicted == targets).sum().item() - print(f' Temporal Accuracy: {100 * correct / total:.2f}%') + print(f' Temperary Accuracy: {100 * correct / total:.2f}%') # Calculate the accuracy accuracy = 100 * correct / total From 83acea0e9fcc9baef2092a8a184650f3f7f723b5 Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Fri, 23 Aug 2024 14:46:47 -0400 Subject: [PATCH 6/9] save as backup before adding noise --- cifar_test.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index 1ec7d31..04dafd2 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -85,7 +85,21 @@ def e_f(y, x, W): result = dual_annealing(e_f, bounds, x0=np.squeeze(flattened_y_np), args=(flattened_x_np, expanded_weights_np), maxiter=self.num_iterations, maxfun=5) flattened_y = torch.tensor(result.x, dtype=torch.float32) - + + elif solver == 'LD': + expanded_weights = expanded_weights.to(y.device) + c = -2 * torch.sparse.mm(flattened_x, expanded_weights) + def LD(W, c, r1, lr=0.001): + # Q is W.T @ W + # c is -2 * r0 @ W + x = r1.squeeze(0) + c = c.squeeze(0) + for i in range(self.num_iterations): + # Perform sparse matrix multiplication instead of forming Q explicitly + gradient = torch.sparse.mm(expanded_weights.T, torch.sparse.mm(expanded_weights, x.unsqueeze(1))).squeeze(1) + c + x = x - lr * gradient + return x.view(1, -1) + flattened_y = LD(expanded_weights, c, torch.flatten(y, start_dim=1)) else: raise ValueError(f'Solver {solver} not supported') @@ -159,7 +173,7 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe print('No solver in used, still using convolution in recurrent layer') assert layer_number is None, 'layer_number must be None if solver is None' self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) - elif solver in ['SGD', 'SA']: + elif solver in ['SGD', 'SA', 'LD']: print(f'Solver {solver} is in use') assert layer_number is not None, 'layer_number must be provided if solver is not None' assert layer_number <= self.nlays, f'layer_number must be less than or equal to the number of layers: {self.nlays}' @@ -197,8 +211,10 @@ def forward(self, x): if __name__ == '__main__': - batchsize = 1 - test_ratio = 0.001 + batchsize = 500 + test_ratio = 1 + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print(f'Using device: {device}') transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) @@ -215,16 +231,16 @@ def forward(self, x): testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6) # Create an instance of the PredNetBpD class - checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7') - prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver='SGD', layer_number=1, num_iterations=50, train_weight=False) + checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7', map_location=device) + prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver=None, layer_number=None, num_iterations=500, train_weight=False) prednet = nn.DataParallel(prednet) prednet.load_state_dict(checkpoint_weight['net']) - prednet = prednet.cuda() + prednet = prednet.to(device) prednet.eval() total = 0 correct = 0 for batch_idx, (inputs, targets) in tqdm(enumerate(testloader), total=len(testloader)): - inputs, targets = inputs.cuda(), targets.cuda() + inputs, targets = inputs.to(device), targets.to(device) output_tensor = prednet(inputs) # Get the predicted class _, predicted = torch.max(output_tensor, 1) From a0602ea0681b747c2b50fee6e485df39646f2c0c Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Wed, 28 Aug 2024 16:30:10 -0400 Subject: [PATCH 7/9] apply solver for PCN_5, acuracy not good --- cifar_test.py | 56 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index 04dafd2..3b1c691 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -15,8 +15,10 @@ class PcConvBp_DS(nn.Module): - def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, solver='SGD', num_iterations=5, train_weight=False): + def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False, + solver='SGD', num_iterations=5, train_weight=False, noise_level=None): super().__init__() + self.noise_level = noise_level self.solver = solver self.train_weight = train_weight self.num_iterations = num_iterations @@ -27,7 +29,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b self.C_out = outchan self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias) self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias) - self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) + # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) self.relu = nn.ReLU(inplace=True) self.cls = cls self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) @@ -43,7 +45,12 @@ def find_optimal_r(self, x, y, layer_idx, solver): expanded_weights = torch.load(f'./expanded_weights_train/expanded_weights_{layer_idx}.pt') expanded_weights.clone().detach().requires_grad_(True) else: - expanded_weights = torch.load(f'./expanded_weights/expanded_weights_{layer_idx}.pt') + expanded_weights = torch.load(f'./expanded_weights/PCN_5/expanded_weights_{layer_idx}.pt') + if self.noise_level is not None: + noise = self.noise_level * torch.randn(expanded_weights.shape) * expanded_weights + noise = noise.to_sparse() + expanded_weights += noise + flattened_x = torch.flatten(x, start_dim=1).clone().detach() y = F.pad(y, (self.padding, self.padding, self.padding, self.padding)) @@ -61,7 +68,7 @@ def find_optimal_r(self, x, y, layer_idx, solver): energy = torch.norm(flattened_x - flattened_y @ expanded_weights.T, p=2) energy.backward() optimizer_y.step() - + if self.train_weight: for _ in range(5): optimizer_w.zero_grad() @@ -69,7 +76,7 @@ def find_optimal_r(self, x, y, layer_idx, solver): energy.backward() optimizer_w.step() torch.save(expanded_weights, f'./expanded_weights_train/expanded_weights_{layer_idx}.pt') - + elif solver == 'SA': flattened_x_np = flattened_x.cpu().numpy() expanded_weights_np = expanded_weights.to_dense().numpy() @@ -89,17 +96,20 @@ def e_f(y, x, W): elif solver == 'LD': expanded_weights = expanded_weights.to(y.device) c = -2 * torch.sparse.mm(flattened_x, expanded_weights) - def LD(W, c, r1, lr=0.001): + def LD(expanded_weights, c, r1, lr=0.001): # Q is W.T @ W # c is -2 * r0 @ W - x = r1.squeeze(0) - c = c.squeeze(0) + x = r1.squeeze(0).cpu() + c = c.squeeze(0).cpu() for i in range(self.num_iterations): # Perform sparse matrix multiplication instead of forming Q explicitly gradient = torch.sparse.mm(expanded_weights.T, torch.sparse.mm(expanded_weights, x.unsqueeze(1))).squeeze(1) + c x = x - lr * gradient + del gradient return x.view(1, -1) + expanded_weights = expanded_weights.cpu() flattened_y = LD(expanded_weights, c, torch.flatten(y, start_dim=1)) + del c else: raise ValueError(f'Solver {solver} not supported') @@ -158,11 +168,13 @@ def expand_weights_to_matrix(self, input_shape, weight_tensor, stride=1, padding ''' Architecture PredNetBpD ''' from prednet import PcConvBp class PredNetBpD(nn.Module): - def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_number=None, num_iterations=None, train_weight=False): + def __init__(self, num_classes=10, cls=0, Tied = False, + solver=None, layer_number=None, num_iterations=None, train_weight=False, + noise_level=None): super().__init__() - self.ics = [3, 64, 64, 128, 128, 256, 256, 512] # input chanels - self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels - self.maxpool = [False, False, True, False, True, False, False, False] # downsample flag + self.ics = [ 3, 32, 64, 64, 128] # input chanels + self.ocs = [32, 64, 64, 128, 128] # output chanels + self.maxpool = [False, True, False, True, False] # downsample flag self.cls = cls # num of time steps self.nlays = len(self.ics) @@ -181,14 +193,17 @@ def __init__(self, num_classes=10, cls=0, Tied = False, solver=None, layer_numbe for i in range(self.nlays): # if i <= (layer_number-1): if i == (layer_number-1): - self.PcConvs.append(PcConvBp_DS(self.ics[i], self.ocs[i], cls=self.cls, solver=solver, num_iterations=num_iterations, train_weight=train_weight)) + self.PcConvs.append(PcConvBp_DS(self.ics[i], self.ocs[i], cls=self.cls, + solver=solver, num_iterations=num_iterations, train_weight=train_weight, + noise_level=noise_level)) else: self.PcConvs.append(PcConvBp(self.ics[i], self.ocs[i], cls=self.cls)) else: print(f'Solver {solver} not supported') else: self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) - + if noise_level is not None: + print(f'Adding noise to the solver {solver} with noise level {noise_level}') self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)]) # Linear layer self.linear = nn.Linear(self.ocs[-1], num_classes) @@ -231,11 +246,18 @@ def forward(self, x): testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6) # Create an instance of the PredNetBpD class - checkpoint_weight = torch.load('checkpoint/PredNetBpD_5CLS_FalseNes_0.001WD_FalseTIED_1REP_best_ckpt.t7', map_location=device) - prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, solver=None, layer_number=None, num_iterations=500, train_weight=False) + checkpoint_weight = torch.load('checkpoint/PCN_5.t7', map_location=device) + prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, + solver='SGD', layer_number=1, num_iterations=50, train_weight=False, + noise_level=None) + prednet = prednet.to(device) prednet = nn.DataParallel(prednet) + # new_state_dict = {} + # for key, value in checkpoint_weight['net'].items(): + # new_key = key.replace('module.', '') # Remove 'module.' prefix + # new_state_dict[new_key] = value + # prednet.load_state_dict(new_state_dict) prednet.load_state_dict(checkpoint_weight['net']) - prednet = prednet.to(device) prednet.eval() total = 0 correct = 0 From bb7ef1d0666f38afc201e2ca3793a1e1ff38d7e7 Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Wed, 28 Aug 2024 16:31:13 -0400 Subject: [PATCH 8/9] comment the b0 so that PCN_5 weights can be loaded --- cifar_test.py | 2 +- prednet.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index 3b1c691..f41d0a8 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -247,7 +247,7 @@ def forward(self, x): # Create an instance of the PredNetBpD class checkpoint_weight = torch.load('checkpoint/PCN_5.t7', map_location=device) - prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, + prednet = PredNetBpD(num_classes=10, cls=0, Tied=False, solver='SGD', layer_number=1, num_iterations=50, train_weight=False, noise_level=None) prednet = prednet.to(device) diff --git a/prednet.py b/prednet.py index 8f47a21..d7c512e 100644 --- a/prednet.py +++ b/prednet.py @@ -23,7 +23,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b super().__init__() self.FFconv = nn.Conv2d(inchan, outchan, kernel_size, stride, padding, bias=bias) self.FBconv = nn.ConvTranspose2d(outchan, inchan, kernel_size, stride, padding, bias=bias) - self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))]) + # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))]) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() self.cls = cls From 8e01e8405ae99671e7160194d8165ba91c296493 Mon Sep 17 00:00:00 2001 From: qingyuan <1399717445@qq.com> Date: Wed, 4 Sep 2024 17:27:03 -0400 Subject: [PATCH 9/9] version with GD, SA and LD (basically GD) --- cifar_test.py | 32 +++++++++++----------- main_cifar.py | 9 +++--- prednet.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 95 insertions(+), 22 deletions(-) diff --git a/cifar_test.py b/cifar_test.py index f41d0a8..a8fe0d7 100644 --- a/cifar_test.py +++ b/cifar_test.py @@ -29,7 +29,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b self.C_out = outchan self.FFconv = nn.Conv2d(inchan, outchan, self.kernel_size, self.stride, self.padding, bias=bias) self.FBconv = nn.ConvTranspose2d(outchan, inchan, self.kernel_size, self.stride, self.padding, bias=bias) - # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) + self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1, outchan, 1, 1))]) self.relu = nn.ReLU(inplace=True) self.cls = cls self.bypass = nn.Conv2d(inchan, outchan, kernel_size=1, stride=1, bias=False) @@ -60,8 +60,8 @@ def find_optimal_r(self, x, y, layer_idx, solver): expanded_weights = expanded_weights.to(y.device) flattened_y = torch.flatten(y, start_dim=1).clone().detach().requires_grad_(True) energy = 0 - optimizer_y = torch.optim.SGD([flattened_y], lr=0.01) - optimizer_w = torch.optim.SGD([expanded_weights], lr=0.01) if self.train_weight else None + optimizer_y = torch.optim.SGD([flattened_y], lr=0.001) + optimizer_w = torch.optim.SGD([expanded_weights], lr=0.001) if self.train_weight else None for _ in range(self.num_iterations): optimizer_y.zero_grad() # energy = self.Energy_Function(flattened_x, expanded_weights, flattened_y) @@ -99,15 +99,14 @@ def e_f(y, x, W): def LD(expanded_weights, c, r1, lr=0.001): # Q is W.T @ W # c is -2 * r0 @ W - x = r1.squeeze(0).cpu() - c = c.squeeze(0).cpu() + x = r1.squeeze(0) + c = c.squeeze(0) for i in range(self.num_iterations): # Perform sparse matrix multiplication instead of forming Q explicitly gradient = torch.sparse.mm(expanded_weights.T, torch.sparse.mm(expanded_weights, x.unsqueeze(1))).squeeze(1) + c x = x - lr * gradient del gradient return x.view(1, -1) - expanded_weights = expanded_weights.cpu() flattened_y = LD(expanded_weights, c, torch.flatten(y, start_dim=1)) del c else: @@ -175,6 +174,12 @@ def __init__(self, num_classes=10, cls=0, Tied = False, self.ics = [ 3, 32, 64, 64, 128] # input chanels self.ocs = [32, 64, 64, 128, 128] # output chanels self.maxpool = [False, True, False, True, False] # downsample flag + # self.ics = [ 3, 32, 64] # input chanels + # self.ocs = [32, 64, 64] # output chanels + # self.maxpool = [False, True, False] # downsample flag + # self.ics = [3, 64, 64, 128, 128, 256, 256, 512] # input chanels + # self.ocs = [64, 64, 128, 128, 256, 256, 512, 512] # output chanels + # self.maxpool = [False, False, True, False, True, False, False, False] # downsample flag self.cls = cls # num of time steps self.nlays = len(self.ics) @@ -226,7 +231,7 @@ def forward(self, x): if __name__ == '__main__': - batchsize = 500 + batchsize = 128 test_ratio = 1 device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'Using device: {device}') @@ -246,19 +251,14 @@ def forward(self, x): testloader = torch.utils.data.DataLoader(test_subset, batch_size=batchsize, shuffle=False, num_workers=6) # Create an instance of the PredNetBpD class - checkpoint_weight = torch.load('checkpoint/PCN_5.t7', map_location=device) - prednet = PredNetBpD(num_classes=10, cls=0, Tied=False, - solver='SGD', layer_number=1, num_iterations=50, train_weight=False, + checkpoint_weight = torch.load('checkpoint/PredNetBpD_5_5CLS_FalseNes_0.001WD_FalseTIED_2REP_best_ckpt.t7', map_location=device) + prednet = PredNetBpD(num_classes=10, cls=5, Tied=False, + solver='SGD', layer_number=5, num_iterations=5, train_weight=False, noise_level=None) prednet = prednet.to(device) prednet = nn.DataParallel(prednet) - # new_state_dict = {} - # for key, value in checkpoint_weight['net'].items(): - # new_key = key.replace('module.', '') # Remove 'module.' prefix - # new_state_dict[new_key] = value - # prednet.load_state_dict(new_state_dict) prednet.load_state_dict(checkpoint_weight['net']) - prednet.eval() + # prednet.eval() total = 0 correct = 0 for batch_idx, (inputs, targets) in tqdm(enumerate(testloader), total=len(testloader)): diff --git a/main_cifar.py b/main_cifar.py index 394ecb5..b233f99 100644 --- a/main_cifar.py +++ b/main_cifar.py @@ -12,17 +12,16 @@ from utils import progress_bar from torch.autograd import Variable -def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False): +def main_cifar(model='PredNetBpD_3', circles=5, gpunum=1, Tied=False, weightDecay=1e-3, nesterov=False): use_cuda = True # torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch - batchsize = 1 + batchsize = 512 root = './' rep = 1 lr = 0.01 - solver = 'SGD' - models = {'PredNetBpD':PredNetBpD} + models = {'PredNetBpD_3':PredNetBpD_3} modelname = model+'_'+str(circles)+'CLS_'+str(nesterov)+'Nes_'+str(weightDecay)+'WD_'+str(Tied)+'TIED_'+str(rep)+'REP' # clearn folder @@ -53,7 +52,7 @@ def main_cifar(model='PredNetBpD', circles=5, gpunum=1, Tied=False, weightDecay= # Model print('==> Building model..') - net = models[model](num_classes=10,cls=circles,Tied=Tied, solver=solver) + net = models[model](num_classes=10,cls=circles,Tied=Tied) # Define objective function criterion = nn.CrossEntropyLoss() diff --git a/prednet.py b/prednet.py index d7c512e..56aa516 100644 --- a/prednet.py +++ b/prednet.py @@ -23,7 +23,7 @@ def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, b super().__init__() self.FFconv = nn.Conv2d(inchan, outchan, kernel_size, stride, padding, bias=bias) self.FBconv = nn.ConvTranspose2d(outchan, inchan, kernel_size, stride, padding, bias=bias) - # self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))]) + self.b0 = nn.ParameterList([nn.Parameter(torch.zeros(1,outchan,1,1))]) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() self.cls = cls @@ -38,7 +38,81 @@ def forward(self, x, layer_idx): def la_sigmoid(self, x): return 0.5+0.25*x-0.0212*x**3 + +''' Architecture PredNetBpD_5 ''' +class PredNetBpD_5(nn.Module): + def __init__(self, num_classes=10, cls=0, Tied = False): + super().__init__() + self.ics = [3, 32, 64, 64, 128] # input chanels + self.ocs = [32, 64, 64, 128, 128] # output chanels + self.maxpool = [False, True, False, True, False] # downsample flag + self.cls = cls # num of time steps + self.nlays = len(self.ics) + + # construct PC layers + # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future. + if Tied == False: + self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + else: + self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)]) + # Linear layer + self.linear = nn.Linear(self.ocs[-1], num_classes) + self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.relu = nn.ReLU(inplace=True) + self.BNend = nn.BatchNorm2d(self.ocs[-1]) + + def forward(self, x): + for i in range(self.nlays): + x = self.BNs[i](x) + x = self.PcConvs[i](x, i) # ReLU + Conv + if self.maxpool[i]: + x = self.maxpool2d(x) + + # classifier + out = F.avg_pool2d(self.relu(self.BNend(x)), x.size(-1)) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +''' Architecture PredNetBpD ''' +class PredNetBpD_3(nn.Module): + def __init__(self, num_classes=10, cls=0, Tied = False): + super().__init__() + self.ics = [3, 32, 64] # input chanels + self.ocs = [32, 64, 64] # output chanels + self.maxpool = [False, True, False] # downsample flag + self.cls = cls # num of time steps + self.nlays = len(self.ics) + + # construct PC layers + # Unlike PCN v1, we do not have a tied version here. We may or may not incorporate a tied version in the future. + if Tied == False: + self.PcConvs = nn.ModuleList([PcConvBp(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + else: + self.PcConvs = nn.ModuleList([PcConvBpTied(self.ics[i], self.ocs[i], cls=self.cls) for i in range(self.nlays)]) + self.BNs = nn.ModuleList([nn.BatchNorm2d(self.ics[i]) for i in range(self.nlays)]) + # Linear layer + self.linear = nn.Linear(self.ocs[-1], num_classes) + self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.relu = nn.ReLU(inplace=True) + self.BNend = nn.BatchNorm2d(self.ocs[-1]) + + def forward(self, x): + for i in range(self.nlays): + x = self.BNs[i](x) + x = self.PcConvs[i](x, i) # ReLU + Conv + if self.maxpool[i]: + x = self.maxpool2d(x) + + # classifier + out = F.avg_pool2d(self.relu(self.BNend(x)), x.size(-1)) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + class PcConvBp_SGD(nn.Module): def __init__(self, inchan, outchan, kernel_size=3, stride=1, padding=1, cls=0, bias=False): super().__init__()