-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnbodyProblem.cu
More file actions
113 lines (83 loc) · 2.88 KB
/
nbodyProblem.cu
File metadata and controls
113 lines (83 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "timer.h"
#include "files.h"
#define SOFTENING 1e-9f
/*
* Each body contains x, y, and z coordinate positions,
* as well as velocities in the x, y, and z directions.
*/
typedef struct { float x, y, z, vx, vy, vz; } Body;
/*
* Calculate the gravitational impact of all bodies in the system
* on all others.
*/
__global__ void bodyForce(Body *p, float dt, int n) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < n) {
float Fx = 0.0f; float Fy = 0.0f; float Fz = 0.0f;
for (int j = 0; j < n; j++) {
float dx = p[j].x - p[i].x;
float dy = p[j].y - p[i].y;
float dz = p[j].z - p[i].z;
float distSqr = dx*dx + dy*dy + dz*dz + SOFTENING;
float invDist = rsqrtf(distSqr);
float invDist3 = invDist * invDist * invDist;
Fx += dx * invDist3;
Fy += dy * invDist3;
Fz += dz * invDist3;
}
p[i].vx += Fx * dt;
p[i].vy += Fy * dt;
p[i].vz += Fz * dt;
}
}
__global__ void integrate_positions(Body *p, float dt, int n) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < n) {
p[i].x += p[i].vx * dt;
p[i].y += p[i].vy * dt;
p[i].z += p[i].vz * dt;
}
}
int main(const int argc, const char** argv) {
int nBodies = 2<<11;
if (argc > 1) nBodies = 2<<atoi(argv[1]);
// The assessment will pass hidden initialized values to check for correctness.
// You should not make changes to these files, or else the assessment will not work.
const char * initialized_values;
const char * solution_values;
if (nBodies == 2<<11) {
initialized_values = "09-nbody/files/initialized_4096";
solution_values = "09-nbody/files/solution_4096";
} else { // nBodies == 2<<15
initialized_values = "09-nbody/files/initialized_65536";
solution_values = "09-nbody/files/solution_65536";
}
if (argc > 2) initialized_values = argv[2];
if (argc > 3) solution_values = argv[3];
const float dt = 0.01f; // Time step
const int nIters = 10; // Simulation iterations
int bytes = nBodies * sizeof(Body);
float *buf;
cudaMallocManaged (&buf, bytes);
Body *p = (Body*)buf;
read_values_from_file(initialized_values, buf, bytes);
double totalTime = 0.0;
for (int iter = 0; iter < nIters; iter++) {
StartTimer();
int threadsPerBlock = 128;
int numberOfBlocks = (nBodies + threadsPerBlock - 1) / threadsPerBlock;
bodyForce<<<numberOfBlocks, threadsPerBlock>>> (p, dt, nBodies);
integrate_positions<<<numberOfBlocks, threadsPerBlock>>>(p, dt, nBodies);
cudaDeviceSynchronize();
const double tElapsed = GetTimer() / 1000.0;
totalTime += tElapsed;
}
double avgTime = totalTime / (double)(nIters);
float billionsOfOpsPerSecond = 1e-9 * nBodies * nBodies / avgTime;
write_values_to_file(solution_values, buf, bytes);
printf("%0.3f Billion Interactions / second\n", billionsOfOpsPerSecond);
cudaFree(buf);
}