diff --git a/Assignment2 b/96752745_Amal/assignment2.cu similarity index 91% rename from Assignment2 rename to 96752745_Amal/assignment2.cu index aa25e10..9ace3b7 100644 --- a/Assignment2 +++ b/96752745_Amal/assignment2.cu @@ -1,57 +1,57 @@ -#include -using namespace std; - -__global__ void Add(float *d_a,float *d_b,float *d_c,int r,int c){ - - int i =blockIdx.x*blockDim.x+threadIdx.x; - int j =blockIdx.y*blockDim.y+threadIdx.y; - int k = i+j*c; - //i is defined for horizontal traversal - if(i>r>>c; - float h_a[r][c],h_b[r][c],h_c[r][c]; - for(i=0;i>>(d_a,d_b,d_c,r,c); - cudaMemcpy(h_c, d_c, (r*c)*sizeof(float), cudaMemcpyDeviceToHost); - - cout<<"Sum of the 2 matrices is:\n"; - for(i=0;i +using namespace std; + +__global__ void Add(float *d_a,float *d_b,float *d_c,int r,int c){ + + int i =blockIdx.x*blockDim.x+threadIdx.x; + int j =blockIdx.y*blockDim.y+threadIdx.y; + int k = i+j*c; + //i is defined for horizontal traversal + if(i>r>>c; + float h_a[r][c],h_b[r][c],h_c[r][c]; + for(i=0;i>>(d_a,d_b,d_c,r,c); + cudaMemcpy(h_c, d_c, (r*c)*sizeof(float), cudaMemcpyDeviceToHost); + + cout<<"Sum of the 2 matrices is:\n"; + for(i=0;i +using namespace std; + +__global__ void Transpose(int *d_a,int max){ + +int i = blockIdx.x*blockDim.x+threadIdx.x; +int j = blockIdx.y*blockDim.y+threadIdx.y; +int id1 = i+max*j; +int id2 = j+max*i; +__syncthreads(); + +if(i>r>>c; + max=r>c?r:c; + int h_a[max][max]={0}; + for(i=0;i>>(d_a,max); + cudaMemcpy(h_a, d_a, max*max*sizeof(int), cudaMemcpyDeviceToHost); + cout<<"The transpose matrix is:\n"; + for(i=0;i