SpatialOps
ParticleOperatorsCuda.h
1 //
2 // ParticleOperatorsCuda.h
3 //
4 //
5 // Created by Babak Goshayeshi on 12/28/15.
6 //
7 //
8 #ifdef ENABLE_CUDA
9 #ifndef ParticleOperatorsCuda_h
10 #define ParticleOperatorsCuda_h
11 
12 #include <spatialops/SpatialOpsConfigure.h>
13 
14 namespace SpatialOps{
15  namespace Particle{
16 
17 #define GPU_ERROR_CHECK(err) { gpu_error( err, __FILE__, __LINE__); }
18 
19  inline void gpu_error(cudaError err, const char *file, int line)
20  {
21  if( cudaSuccess != err ){
22  std::ostringstream msg;
23  msg << "GPU failed!\n"
24  << "Cuda message : " << cudaGetErrorString(err) << "\n"
25  << "\t - " << file << " : " << line << std::endl;
26  throw(std::runtime_error(msg.str()));
27  }
28  }
29 
30  // Maximum number of block in one dimension is 65535.
31  #define INNER_BLOCK_ONE_DIM 1024
32 
33  //------------------------------------------------------------------
34 
35  __global__ void _ppc_apply_to_field_in( const double* px,
36  const double* py,
37  const double* pz,
38  const double* psize,
39  double* dest,
40  const double dx,
41  const double dy,
42  const double dz,
43  const double xloface,
44  const double yloface,
45  const double zloface,
46  const size_t nparticle,
47  const int nmax1,
48  const int nmax2,
49  const int nmax3 );
50 
51  //------------------------------------------------------------------
52 
53  __global__ void _ppc_apply_to_field_ni( const double* px,
54  const double* py,
55  const double* pz,
56  double* dest,
57  const double dx,
58  const double dy,
59  const double dz,
60  const double xloface,
61  const double yloface,
62  const double zloface,
63  const size_t nparticle,
64  const int nmax1,
65  const int nmax2,
66  const int nmax3 );
67 
68  //------------------------------------------------------------------
69 
70  __global__ void _p2c_apply_to_field_ni( const double* px,
71  const double* py,
72  const double* pz,
73  const double* src,
74  double* dest,
75  const double dx,
76  const double dy,
77  const double dz,
78  const double xloface,
79  const double yloface,
80  const double zloface,
81  const size_t nparticle,
82  const int nmax1,
83  const int nmax2,
84  const int nmax3 );
85 
86  __global__ void _p2c_apply_to_field_in( const double* px,
87  const double* py,
88  const double* pz,
89  const double* psize,
90  const double* src,
91  double* dest,
92  const double dx,
93  const double dy,
94  const double dz,
95  const double xloface,
96  const double yloface,
97  const double zloface,
98  const size_t nparticle,
99  const int nmax1,
100  const int nmax2,
101  const int nmax3 );
102 
103  __global__ void _c2p_apply_to_field_ni( const double* px,
104  const double* py,
105  const double* pz,
106  const double* src,
107  double* dest,
108  const double dx,
109  const double dy,
110  const double dz,
111  const double xlo,
112  const double ylo,
113  const double zlo,
114  const size_t nparticle,
115  const int nmax1,
116  const int nmax2,
117  const int nmax3 );
118 
119  __global__ void _c2p_apply_to_field_in( const double* px,
120  const double* py,
121  const double* pz,
122  const double* psize,
123  const double* src,
124  double* dest,
125  const double dx,
126  const double dy,
127  const double dz,
128  const double xloface,
129  const double yloface,
130  const double zloface,
131  const size_t nparticle,
132  const int nmax1,
133  const int nmax2,
134  const int nmax3 );
135 
136  //------------------------------------------------------------------
137 
138  template<class CellField, class ParticleField>
139  __host__ inline void _ppc_apply_to_field( const ParticleField *px,
140  const ParticleField *py,
141  const ParticleField *pz,
142  const ParticleField *psize,
143  CellField& dest,
144  const double& dx,
145  const double& dy,
146  const double& dz,
147  const double& xlo,
148  const double& ylo,
149  const double& zlo,
150  const InterpOptions& method )
151  {
152  const IntVec& ngm = dest.get_ghost_data().get_minus();
153  const IntVec& nmax = dest.window_with_ghost().extent();
154 
155  const double xloface = xlo - dx*ngm[0] - dx/2;
156  const double yloface = ylo - dy*ngm[1] - dy/2;
157  const double zloface = zlo - dz*ngm[2] - dz/2;
158 
159  cudaError err;
160  err = cudaSetDevice( dest.active_device_index() );
161 
162  if( cudaSuccess != err ){
163  throw( std::runtime_error( cudaGetErrorString(err) ) );
164  }
165 
166  // Obtaing the number of particles
167  const size_t nparticle = psize->window_with_ghost().local_npts();
168 
169  // Calcuate the number of Blocks and Threads
170  const size_t nblock = nparticle / INNER_BLOCK_ONE_DIM + 1 ;
171  const size_t nthread = (nparticle > INNER_BLOCK_ONE_DIM) ? INNER_BLOCK_ONE_DIM : nparticle;
172 
173  dest <<= 0.0;
174  switch (method) {
175  case NO_INTERPOLATE:
176  _ppc_apply_to_field_ni<<<nblock,nthread>>>(px ? px->field_values(GPU_INDEX) : NULL,
177  py ? py->field_values(GPU_INDEX) : NULL,
178  pz ? pz->field_values(GPU_INDEX) : NULL,
179  dest.field_values(GPU_INDEX),
180  dx, dy, dz,
181  xloface, yloface, zloface,
182  nparticle,
183  nmax[0], nmax[1], nmax[2]);
184  break;
185  case INTERPOLATE:
186  _ppc_apply_to_field_in<<<nblock,nthread>>>(px ? px->field_values(GPU_INDEX) : NULL,
187  py ? py->field_values(GPU_INDEX) : NULL,
188  pz ? pz->field_values(GPU_INDEX) : NULL,
189  psize->field_values(GPU_INDEX),
190  dest.field_values(GPU_INDEX),
191  dx, dy, dz,
192  xloface, yloface, zloface,
193  nparticle,
194  nmax[0], nmax[1], nmax[2]);
195 
196 
197  break;
198  }
199  cudaDeviceSynchronize();
200  GPU_ERROR_CHECK(cudaGetLastError());
201  }
202 
203  //------------------------------------------------------------------
204 
205  template<class CellField, class ParticleField>
206  __host__ inline void _p2c_apply_to_field( const ParticleField *px,
207  const ParticleField *py,
208  const ParticleField *pz,
209  const ParticleField *psize,
210  const ParticleField& src,
211  CellField& dest,
212  const double& dx,
213  const double& dy,
214  const double& dz,
215  const double& xlo,
216  const double& ylo,
217  const double& zlo,
218  const InterpOptions& method )
219  {
220  const IntVec& ngm = dest.get_ghost_data().get_minus();
221  const IntVec& nmax = dest.window_with_ghost().extent();
222 
223  const double xloface = xlo - dx*ngm[0] - dx/2;
224  const double yloface = ylo - dy*ngm[1] - dy/2;
225  const double zloface = zlo - dz*ngm[2] - dz/2;
226 
227  cudaError err;
228  err = cudaSetDevice( dest.active_device_index() );
229 
230  if( cudaSuccess != err ){
231  throw( std::runtime_error( cudaGetErrorString(err) ) );
232  }
233 
234  // Obtaing the number of particles
235  const size_t nparticle = src.window_with_ghost().local_npts();
236 
237  // Calcuate the number of Blocks and Threads
238  const size_t nblock = nparticle / INNER_BLOCK_ONE_DIM + 1 ;
239  const size_t nthread = (nparticle > INNER_BLOCK_ONE_DIM) ? INNER_BLOCK_ONE_DIM : nparticle;
240 
241  dest <<= 0.0;
242  switch (method) {
243  case NO_INTERPOLATE:
244  _p2c_apply_to_field_ni<<<nblock,nthread>>>(px ? px->field_values(GPU_INDEX) : NULL,
245  py ? py->field_values(GPU_INDEX) : NULL,
246  pz ? pz->field_values(GPU_INDEX) : NULL,
247  src.field_values(GPU_INDEX),
248  dest.field_values(GPU_INDEX),
249  dx, dy, dz,
250  xloface, yloface, zloface,
251  nparticle,
252  nmax[0], nmax[1], nmax[2]);
253  break;
254  case INTERPOLATE:
255  _p2c_apply_to_field_in<<<nblock,nthread>>>(px ? px->field_values(GPU_INDEX) : NULL,
256  py ? py->field_values(GPU_INDEX) : NULL,
257  pz ? pz->field_values(GPU_INDEX) : NULL,
258  psize->field_values(GPU_INDEX),
259  src.field_values(GPU_INDEX),
260  dest.field_values(GPU_INDEX),
261  dx, dy, dz,
262  xloface, yloface, zloface,
263  nparticle,
264  nmax[0], nmax[1], nmax[2]);
265 
266 
267  break;
268  }
269  cudaDeviceSynchronize();
270  GPU_ERROR_CHECK(cudaGetLastError());
271  }
272 
273  //------------------------------------------------------------------
274 
275  template<class CellField, class ParticleField>
276  __host__ inline void _c2p_apply_to_field( const ParticleField *px,
277  const ParticleField *py,
278  const ParticleField *pz,
279  const ParticleField *psize,
280  const CellField& src,
281  ParticleField& dest,
282  const double& dx,
283  const double& dy,
284  const double& dz,
285  const double& xlo,
286  const double& ylo,
287  const double& zlo,
288  const InterpOptions& method )
289  {
290  const IntVec& ngm = src.get_ghost_data().get_minus();
291  const IntVec& nmax = src.window_with_ghost().extent();
292 
293  const double xloface = xlo - dx*ngm[0] - dx/2;
294  const double yloface = ylo - dy*ngm[1] - dy/2;
295  const double zloface = zlo - dz*ngm[2] - dz/2;
296 
297  cudaError err;
298  err = cudaSetDevice( dest.active_device_index() );
299 
300  if( cudaSuccess != err ){
301  throw( std::runtime_error( cudaGetErrorString(err) ) );
302  }
303 
304  // Obtaing the number of particles
305  const size_t nparticle = dest.window_with_ghost().local_npts();
306 
307  // Calcuate the number of Blocks and Threads
308  const size_t nblock = nparticle / INNER_BLOCK_ONE_DIM + 1;
309  const size_t nthread = (nparticle > INNER_BLOCK_ONE_DIM) ? INNER_BLOCK_ONE_DIM : nparticle;
310 
311  dest <<= 0.0;
312  switch (method) {
313  case NO_INTERPOLATE:
314  _c2p_apply_to_field_ni<<<nblock,nthread>>>(px ? px->field_values(GPU_INDEX) : NULL,
315  py ? py->field_values(GPU_INDEX) : NULL,
316  pz ? pz->field_values(GPU_INDEX) : NULL,
317  src.field_values(GPU_INDEX),
318  dest.field_values(GPU_INDEX),
319  dx, dy, dz,
320  xloface, yloface, zloface,
321  nparticle,
322  nmax[0], nmax[1], nmax[2]);
323 
324  break;
325  case INTERPOLATE:
326 
327  _c2p_apply_to_field_in<<<nblock,nthread>>>(px ? px->field_values(GPU_INDEX) : NULL,
328  py ? py->field_values(GPU_INDEX) : NULL,
329  pz ? pz->field_values(GPU_INDEX) : NULL,
330  psize->field_values(GPU_INDEX),
331  src.field_values(GPU_INDEX),
332  dest.field_values(GPU_INDEX),
333  dx, dy, dz,
334  xloface, yloface, zloface,
335  nparticle,
336  nmax[0], nmax[1], nmax[2]);
337 
338  break;
339  }
340  cudaDeviceSynchronize();
341  GPU_ERROR_CHECK(cudaGetLastError());
342  }
343 
344  //-------------------------------------------------------------------------
345 
346  }
347 }
348 
349 #endif /* ParticleOperatorsCuda_h */
350 #endif /* ENABLE_CUDA */
SpatialField< ParticleFieldTraits > ParticleField
defines a ParticleFieldNote that ParticleField objects should not have any structure associated with ...