Skip to content
This repository was archived by the owner on Mar 14, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions Assignment2 → 96752745_Amal/assignment2.cu
Original file line number Diff line number Diff line change
@@ -1,57 +1,57 @@
#include<iostream>
using namespace std;

__global__ void Add(float *d_a,float *d_b,float *d_c,int r,int c){

int i =blockIdx.x*blockDim.x+threadIdx.x;
int j =blockIdx.y*blockDim.y+threadIdx.y;
int k = i+j*c;
//i is defined for horizontal traversal
if(i<c && j<r){
d_c[k]=d_a[k]+d_b[k];
}
}


int main()
{
int r,c,i,j;
cout<<"Enter the rows and columns\n";
cin>>r>>c;
float h_a[r][c],h_b[r][c],h_c[r][c];
for(i=0;i<r;i++)
{
for(j=0;j<c;j++)
{
h_a[i][j]=i+j+3;
h_b[i][j]=i*j;
}
}
float *d_a,*d_b,*d_c;
cudaMalloc((void**)&d_a, (r*c)*sizeof(float));
cudaMalloc((void**)&d_b, (r*c)*sizeof(float));
cudaMalloc((void**)&d_c, (r*c)*sizeof(float));

cudaMemcpy(d_a, h_a, r*c*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, h_b, r*c*sizeof(float), cudaMemcpyHostToDevice);

dim3 dimBlock(32, 32);
dim3 dimGrid((int)ceil(1.0*r/dimBlock.x),(int)ceil(1.0*c/dimBlock.y));
Add<<<dimGrid,dimBlock>>>(d_a,d_b,d_c,r,c);
cudaMemcpy(h_c, d_c, (r*c)*sizeof(float), cudaMemcpyDeviceToHost);

cout<<"Sum of the 2 matrices is:\n";
for(i=0;i<r;i++)
{
for(j=0;j<c;j++)
{
printf("%.2f ",h_c[i][j]);
}
cout<<"\n";
}

cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
#include<iostream>
using namespace std;
__global__ void Add(float *d_a,float *d_b,float *d_c,int r,int c){
int i =blockIdx.x*blockDim.x+threadIdx.x;
int j =blockIdx.y*blockDim.y+threadIdx.y;
int k = i+j*c;
//i is defined for horizontal traversal
if(i<c && j<r){
d_c[k]=d_a[k]+d_b[k];
}
}
int main()
{
int r,c,i,j;
cout<<"Enter the rows and columns\n";
cin>>r>>c;
float h_a[r][c],h_b[r][c],h_c[r][c];
for(i=0;i<r;i++)
{
for(j=0;j<c;j++)
{
h_a[i][j]=i+j+3;
h_b[i][j]=i*j;
}
}
float *d_a,*d_b,*d_c;
cudaMalloc((void**)&d_a, (r*c)*sizeof(float));
cudaMalloc((void**)&d_b, (r*c)*sizeof(float));
cudaMalloc((void**)&d_c, (r*c)*sizeof(float));
cudaMemcpy(d_a, h_a, r*c*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, h_b, r*c*sizeof(float), cudaMemcpyHostToDevice);
dim3 dimBlock(32, 32);
dim3 dimGrid((int)ceil(1.0*c/dimBlock.x),(int)ceil(1.0*r/dimBlock.y));
Add<<<dimGrid,dimBlock>>>(d_a,d_b,d_c,r,c);
cudaMemcpy(h_c, d_c, (r*c)*sizeof(float), cudaMemcpyDeviceToHost);
cout<<"Sum of the 2 matrices is:\n";
for(i=0;i<r;i++)
{
for(j=0;j<c;j++)
{
printf("%.2f ",h_c[i][j]);
}
cout<<"\n";
}
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
53 changes: 53 additions & 0 deletions 96752745_Amal/assignment3.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include<iostream>
using namespace std;

__global__ void Transpose(int *d_a,int max){

int i = blockIdx.x*blockDim.x+threadIdx.x;
int j = blockIdx.y*blockDim.y+threadIdx.y;
int id1 = i+max*j;
int id2 = j+max*i;
__syncthreads();

if(i<max && j<max)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to include another condition in the if statement to check if i<j (or i>j, any one of those). Because you need to operate only on one half of the matrix. You are swapping elements in one half with those in the other half, so only half the threads should perform this operation.

{
int t = d_a[id1];
__syncthreads();
d_a[id1]=d_a[id2];
__syncthreads();
d_a[id2]=t;
}

}

int main()
{
int r,c,i,j,max;
cout<<"Enter the number of rows and columns:\n";
cin>>r>>c;
max=r>c?r:c;
int h_a[max][max]={0};
for(i=0;i<r;i++)
{
for(j=0;j<c;j++)
h_a[i][j]=2*i+j;
}
int *d_a;
cudaMalloc((void**)&d_a, max*max*sizeof(int));

cudaMemcpy(d_a, h_a, max*max*sizeof(int), cudaMemcpyHostToDevice);
dim3 dimBlock(32, 32);
dim3 dimGrid((int)ceil(1.0*max/dimBlock.x), (int)ceil(1.0*max/dimBlock.y));
Transpose<<<dimGrid,dimBlock>>>(d_a,max);
cudaMemcpy(h_a, d_a, max*max*sizeof(int), cudaMemcpyDeviceToHost);
cout<<"The transpose matrix is:\n";
for(i=0;i<c;i++)
{
for(j=0;j<r;j++)
cout<<h_a[i][j]<<" ";
cout<<"\n";
}

cudaFree(d_a);
return 0;
}