SpatialOps
ParticleOperatorsImplementation.h
1 /*
2  * Copyright (c) 2014-2017 The University of Utah
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to
6  * deal in the Software without restriction, including without limitation the
7  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8  * sell copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20  * IN THE SOFTWARE.
21  */
22 
23 #ifndef ParticleOperatorsImplementation_h
24 #define ParticleOperatorsImplementation_h
25 
26 #include <spatialops/particles/ParticleOperators.h>
27 #include <spatialops/particles/ParticleGrid.h>
28 #ifdef ENABLE_CUDA
29 #include <spatialops/particles/ParticleOperatorsCuda.h>
30 #endif // ENABLE_CUDA
31 
32 #include <spatialops/util/TimeLogger.h>
33 
34 namespace SpatialOps{
35  namespace Particle{
36 
37 
38  // =================================================================
39  //
40  // Implementation
41  //
42  // =================================================================
43 
44 
45  template< typename CellField >
47  ParticlesPerCell( const double dx, const double xlo,
48  const double dy, const double ylo,
49  const double dz, const double zlo,
50  const InterpOptions method)
51  : dx_ ( dx<=0 ? 1.0 : dx ),
52  dy_ ( dy<=0 ? 1.0 : dy ),
53  dz_ ( dz<=0 ? 1.0 : dz ),
54  xlo_( dx<=0 ? 0.0 : xlo ),
55  ylo_( dy<=0 ? 0.0 : ylo ),
56  zlo_( dz<=0 ? 0.0 : zlo ),
57  method_( method )
58  {
59  px_ = NULL;
60  py_ = NULL;
61  pz_ = NULL;
62  psize_ = NULL;
63  }
64 
65  //------------------------------------------------------------------
66 
67  template< typename CellField >
68  void
71  const ParticleField *pycoord,
72  const ParticleField *pzcoord,
73  const ParticleField *psize )
74  {
75  px_ = pxcoord;
76  py_ = pycoord;
77  pz_ = pzcoord;
78  psize_ = psize ;
79  }
80 
81  //------------------------------------------------------------------
82 
83  template< typename CellField >
84  void
86  apply_to_field( DestFieldType& dest ) const
87  {
88  if (IS_GPU_INDEX(dest.active_device_index())) {
89 # ifdef __CUDACC__
90  _ppc_apply_to_field<CellField, ParticleField>(px_, py_, pz_, psize_,
91  dest,
92  dx_, dy_, dz_,
93  xlo_, ylo_, zlo_,
94  method_);
95  return;
96 # endif
97  } // IS_GPU_INDEX if
98  else {
99 
100  switch( method_ ){
101 
102  case NO_INTERPOLATE:{
103 
104  const IntVec& ngm = dest.get_ghost_data().get_minus();
105 
106  const double xloface = xlo_ - dx_*ngm[0] - dx_/2;
107  const double yloface = ylo_ - dy_*ngm[1] - dy_/2;
108  const double zloface = zlo_ - dz_*ngm[2] - dz_/2;
109 
110  dest <<= 0.0; // set to zero and accumulate contributions from each particle below.
111 
112  ParticleField::const_iterator ipx = px_ ? px_->begin() : psize_->begin();
113  ParticleField::const_iterator ipy = py_ ? py_->begin() : psize_->begin();
114  ParticleField::const_iterator ipz = pz_ ? pz_->begin() : psize_->begin();
115  ParticleField::const_iterator ipsize = psize_->begin();
116  const ParticleField::const_iterator ipsizeend = psize_->end();
117  for( ; ipsize != ipsizeend; ++ipx, ++ipy, ++ipz, ++ipsize ){
118  const int i = ( *ipx - xloface ) / dx_;
119  const int j = ( *ipy - yloface ) / dy_;
120  const int k = ( *ipz - zloface ) / dz_;
121  dest(i,j,k) += 1;
122  } // particle loop
123 
124  break;
125  }
126 
127  case INTERPOLATE:{
128  assert( psize_ != NULL );
129 
130  const IntVec& ngm = dest.get_ghost_data().get_minus();
131  const IntVec& nmax = dest.window_with_ghost().extent();
132 
133  const double xloface = xlo_ - dx_*ngm[0] - dx_/2;
134  const double yloface = ylo_ - dy_*ngm[1] - dy_/2;
135  const double zloface = zlo_ - dz_*ngm[2] - dz_/2;
136 
137  dest <<= 0.0; // set to zero and accumulate contributions from each particle below.
138 
139  // Note: here the outer loop is over particles and the inner loop is over
140  // cells to sum in the particle contributions. This should be optimal for
141  // situations where there are a relatively small number of particles. In
142  // cases where many particles per cell present, it may be more effective
143  // to invert the loop structure.
144  ParticleField::const_iterator ipx = px_ ? px_->begin() : psize_->begin();
145  ParticleField::const_iterator ipy = py_ ? py_->begin() : psize_->begin();
146  ParticleField::const_iterator ipz = pz_ ? pz_->begin() : psize_->begin();
147  ParticleField::const_iterator ipsize = psize_->begin();
148  const ParticleField::const_iterator ipsizeend = psize_->end();
149  for( ; ipsize != ipsizeend; ++ipx, ++ipy, ++ipz, ++ipsize ){
150 
151  const double rp = *ipsize * 0.5;
152 
153  // Identify the location of the particle boundary (assuming that it is a cube)
154  const double pxlo = px_ ? *ipx - rp : 0;
155  const double pylo = py_ ? *ipy - rp : 0;
156  const double pzlo = pz_ ? *ipz - rp : 0;
157 
158  const double pxhi = px_ ? pxlo + *ipsize : 0;
159  const double pyhi = py_ ? pylo + *ipsize : 0;
160  const double pzhi = pz_ ? pzlo + *ipsize : 0;
161 
162  const int ixlo = ( pxlo - xloface ) / dx_;
163  const int iylo = ( pylo - yloface ) / dy_;
164  const int izlo = ( pzlo - zloface ) / dz_;
165 
166  // hi indices are 1 past the end
167  const int ixhi = px_ ? std::min( nmax[0], int(( pxhi - xloface ) / dx_) + 1 ) : ixlo+1;
168  const int iyhi = py_ ? std::min( nmax[1], int(( pyhi - yloface ) / dy_) + 1 ) : iylo+1;
169  const int izhi = pz_ ? std::min( nmax[2], int(( pzhi - zloface ) / dz_) + 1 ) : izlo+1;
170 
171  // Distribute particle through the volume(s) it touches. Here we are
172  // doing a highly approximate job at approximating how much fractional
173  // particle volume is in each cell.
174  const double pvol = (px_ ? 2*rp : 1)
175  * (py_ ? 2*rp : 1)
176  * (pz_ ? 2*rp : 1);
177  for( int k=izlo; k<izhi; ++k ){
178  const double zcm = zloface + k*dz_;
179  const double zcp = zcm + dz_;
180  // determine the z bounding box for the particle in this cell
181  const double zcont = pz_ ? std::min(zcp,pzhi) - std::max(zcm,pzlo) : 1;
182  for( int j=iylo; j<iyhi; ++j ){
183  const double ycm = yloface + j*dy_;
184  const double ycp = ycm + dy_;
185  // determine the y bounding box for the particle in this cell
186  const double ycont = py_ ? std::min(ycp,pyhi) - std::max(ycm,pylo) : 1;
187  for( int i=ixlo; i<ixhi; ++i ){
188  const double xcm = xloface + i*dx_;
189  const double xcp = xcm + dx_;
190  const double xcont = px_ ? std::min(xcp,pxhi) - std::max(xcm,pxlo) : 1;
191  // contribution is the fraction of the particle volume in this cell.
192  const double contribution = xcont*ycont*zcont / pvol;
193  dest(i,j,k) += contribution;
194  }
195  }
196  }
197  } // particle loop
198 
199  break;
200  } // case INTERPOLATE
201 
202  } // switch
203  } // else IS_GPU
204 
205  }
206 
207  //==================================================================
208 
209  template< typename CellField >
211  ParticleToCell( const double dx, const double xlo,
212  const double dy, const double ylo,
213  const double dz, const double zlo,
214  const InterpOptions method )
215  : dx_ ( dx<=0 ? 1.0 : dx ),
216  dy_ ( dy<=0 ? 1.0 : dy ),
217  dz_ ( dz<=0 ? 1.0 : dz ),
218  xlo_( dx<=0 ? 0.0 : xlo ),
219  ylo_( dy<=0 ? 0.0 : ylo ),
220  zlo_( dz<=0 ? 0.0 : zlo ),
221  method_( method )
222  {
223  px_ = NULL;
224  py_ = NULL;
225  pz_ = NULL;
226  psize_ = NULL;
227  }
228 
229  //------------------------------------------------------------------
230 
231  template< typename CellField >
232  void
235  const ParticleField *pycoord,
236  const ParticleField *pzcoord,
237  const ParticleField *psize )
238  {
239  px_ = pxcoord;
240  py_ = pycoord;
241  pz_ = pzcoord;
242  psize_ = psize ;
243  }
244 
245  //------------------------------------------------------------------
246 
247  template< typename CellField >
248  void
251  DestFieldType& dest ) const
252  {
253  if (IS_GPU_INDEX(dest.active_device_index())) {
254 # ifdef __CUDACC__
255  _p2c_apply_to_field<CellField, ParticleField>(px_, py_, pz_, psize_,
256  src, dest,
257  dx_, dy_, dz_,
258  xlo_, ylo_, zlo_,
259  method_);
260  return;
261 # endif
262  } // IS_GPU_INDEX if
263  else {
264 
265  switch( method_ ){
266 
267  case NO_INTERPOLATE:{
268 
269  const IntVec& ngm = dest.get_ghost_data().get_minus();
270 
271  const double xloface = xlo_ - dx_*ngm[0] - dx_/2;
272  const double yloface = ylo_ - dy_*ngm[1] - dy_/2;
273  const double zloface = zlo_ - dz_*ngm[2] - dz_/2;
274 
275  dest <<= 0.0; // set to zero and accumulate contributions from each particle below.
276 
277  ParticleField::const_iterator ipx = px_ ? px_->begin() : src.begin();
278  ParticleField::const_iterator ipy = py_ ? py_->begin() : src.begin();
279  ParticleField::const_iterator ipz = pz_ ? pz_->begin() : src.begin();
281  const ParticleField::const_iterator ise = src.end();
282  for( ; isrc != ise; ++ipx, ++ipy, ++ipz, ++isrc ){
283  const int i = ( *ipx - xloface ) / dx_;
284  const int j = ( *ipy - yloface ) / dy_;
285  const int k = ( *ipz - zloface ) / dz_;
286  dest(i,j,k) += *isrc;
287  } // particle loop
288 
289  break;
290  }
291 
292  case INTERPOLATE:{
293  assert( psize_ != NULL );
294 
295  const IntVec& ngm = dest.get_ghost_data().get_minus();
296  const IntVec& nmax = dest.window_with_ghost().extent();
297 
298  const double xloface = xlo_ - dx_*ngm[0] - dx_/2;
299  const double yloface = ylo_ - dy_*ngm[1] - dy_/2;
300  const double zloface = zlo_ - dz_*ngm[2] - dz_/2;
301 
302  dest <<= 0.0; // set to zero and accumulate contributions from each particle below.
303 
304  // Note: here the outer loop is over particles and the inner loop is over
305  // cells to sum in the particle contributions. This should be optimal for
306  // situations where there are a relatively small number of particles. In
307  // cases where many particles per cell present, it may be more effective
308  // to invert the loop structure.
309  ParticleField::const_iterator ipx = px_ ? px_->begin() : src.begin();
310  ParticleField::const_iterator ipy = py_ ? py_->begin() : src.begin();
311  ParticleField::const_iterator ipz = pz_ ? pz_->begin() : src.begin();
312  ParticleField::const_iterator ipsize = psize_->begin();
314  const ParticleField::const_iterator ise = src.end();
315  for( ; isrc != ise; ++ipx, ++ipy, ++ipz, ++ipsize, ++isrc ){
316 
317  const double rp = *ipsize * 0.5;
318 
319  // Identify the location of the particle boundary (assuming that it is a cube)
320  const double pxlo = px_ ? *ipx - rp : 0;
321  const double pylo = py_ ? *ipy - rp : 0;
322  const double pzlo = pz_ ? *ipz - rp : 0;
323 
324  const double pxhi = px_ ? pxlo + *ipsize : 0;
325  const double pyhi = py_ ? pylo + *ipsize : 0;
326  const double pzhi = pz_ ? pzlo + *ipsize : 0;
327 
328  const int ixlo = ( pxlo - xloface ) / dx_;
329  const int iylo = ( pylo - yloface ) / dy_;
330  const int izlo = ( pzlo - zloface ) / dz_;
331 
332  // hi indices are 1 past the end
333  const int ixhi = px_ ? std::min( nmax[0], int(( pxhi - xloface ) / dx_) + 1 ) : ixlo+1;
334  const int iyhi = py_ ? std::min( nmax[1], int(( pyhi - yloface ) / dy_) + 1 ) : iylo+1;
335  const int izhi = pz_ ? std::min( nmax[2], int(( pzhi - zloface ) / dz_) + 1 ) : izlo+1;
336 
337  // Distribute particle through the volume(s) it touches. Here we are
338  // doing a highly approximate job at approximating how much fractional
339  // particle volume is in each cell.
340  const double pvol = (px_ ? 2*rp : 1)
341  * (py_ ? 2*rp : 1)
342  * (pz_ ? 2*rp : 1);
343 # ifndef NDEBUG
344  double sumterm=0.0;
345 # endif
346  for( int k=izlo; k<izhi; ++k ){
347  const double zcm = zloface + k*dz_;
348  const double zcp = zcm + dz_;
349  // determine the z bounding box for the particle in this cell
350  const double zcont = pz_ ? std::min(zcp,pzhi) - std::max(zcm,pzlo) : 1;
351  for( int j=iylo; j<iyhi; ++j ){
352  const double ycm = yloface + j*dy_;
353  const double ycp = ycm + dy_;
354  // determine the y bounding box for the particle in this cell
355  const double ycont = py_ ? std::min(ycp,pyhi) - std::max(ycm,pylo) : 1;
356  for( int i=ixlo; i<ixhi; ++i ){
357  const double xcm = xloface + i*dx_;
358  const double xcp = xcm + dx_;
359  const double xcont = px_ ? std::min(xcp,pxhi) - std::max(xcm,pxlo) : 1;
360  // contribution is the fraction of the particle volume in this cell.
361  const double contribution = xcont*ycont*zcont / pvol;
362  dest(i,j,k) += *isrc * contribution;
363 # ifndef NDEBUG
364  sumterm += contribution;
365 # endif
366  }
367  }
368  }
369 # ifndef NDEBUG
370  if( std::abs(1.0-sumterm) >= 1e-10 ) std::cout << "sum: " << sumterm << std::endl;
371  assert( std::abs(1.0-sumterm) < 1e-10 );
372 # endif
373  } // particle loop
374 
375  break;
376  } // case INTERPOLATE
377 
378  } // switch( method_ )
379  } // else IS_GPU
380  }
381 
382  //==================================================================
383 
384  template< typename CellField >
386  CellToParticle( const double dx, const double xlo,
387  const double dy, const double ylo,
388  const double dz, const double zlo,
389  const InterpOptions method )
390  : dx_ ( dx<=0 ? 1.0 : dx ),
391  dy_ ( dy<=0 ? 1.0 : dy ),
392  dz_ ( dz<=0 ? 1.0 : dz ),
393  xlo_( dx<=0 ? 0.0 : xlo ),
394  ylo_( dy<=0 ? 0.0 : ylo ),
395  zlo_( dz<=0 ? 0.0 : zlo ),
396  method_( method )
397  {
398  px_ = NULL;
399  py_ = NULL;
400  pz_ = NULL;
401  psize_ = NULL;
402  }
403 
404  //------------------------------------------------------------------
405 
406  template<typename CellField>
407  void
410  const ParticleField *pycoord,
411  const ParticleField *pzcoord,
412  const ParticleField *psize )
413  {
414  px_ = pxcoord;
415  py_ = pycoord;
416  pz_ = pzcoord;
417  psize_ = psize ;
418  }
419 
420  //------------------------------------------------------------------
421 
422  template<typename CellField>
423  void
425  apply_to_field( const SrcFieldType& src,
426  DestFieldType& dest ) const
427  {
428  if (IS_GPU_INDEX(dest.active_device_index())) {
429 # ifdef __CUDACC__
430  _c2p_apply_to_field<CellField, ParticleField>(px_, py_, pz_, psize_,
431  src, dest,
432  dx_, dy_, dz_,
433  xlo_, ylo_, zlo_,
434  method_);
435  return;
436 # endif
437  } // IS_GPU_INDEX if
438  else {
439 
440  switch( method_ ){
441  case NO_INTERPOLATE: {
442  const IntVec& ngm = src.get_ghost_data().get_minus();
443 
444  const double xloface = xlo_ - dx_*ngm[0] - dx_/2;
445  const double yloface = ylo_ - dy_*ngm[1] - dy_/2;
446  const double zloface = zlo_ - dz_*ngm[2] - dz_/2;
447 
448  ParticleField::const_iterator ipx = px_ ? px_->begin() : dest.begin();
449  ParticleField::const_iterator ipy = py_ ? py_->begin() : dest.begin();
450  ParticleField::const_iterator ipz = pz_ ? pz_->begin() : dest.begin();
451  ParticleField::iterator idst = dest.begin();
452  const ParticleField::iterator idste = dest.end();
453  for( ; idst != idste; ++ipx, ++ipy, ++ipz, ++idst ){
454 
455  const int i = ( *ipx - xloface ) / dx_;
456  const int j = ( *ipy - yloface ) / dy_;
457  const int k = ( *ipz - zloface ) / dz_;
458 
459  *idst = src(i,j,k);
460 
461  } // particle loop
462 
463  break;
464  } // case NO_INTERPOLATE
465 
466  case INTERPOLATE:{
467  const IntVec& ngm = src.get_ghost_data().get_minus();
468  const IntVec& nmax = src.window_with_ghost().extent();
469 
470  // Get the location of the (-) face associated with this cell field. Here we
471  // shift the xlo_ value (the value associated with this field type's origin)
472  // by its ghost offset and by the distance from the cell center to the face.
473  const double xloface = xlo_ - dx_*ngm[0] - dx_/2;
474  const double yloface = ylo_ - dy_*ngm[1] - dy_/2;
475  const double zloface = zlo_ - dz_*ngm[2] - dz_/2;
476 
477  ParticleField::const_iterator ipx = px_ ? px_->begin() : dest.begin();
478  ParticleField::const_iterator ipy = py_ ? py_->begin() : dest.begin();
479  ParticleField::const_iterator ipz = pz_ ? pz_->begin() : dest.begin();
480  ParticleField::const_iterator ipsize = psize_->begin();
481  ParticleField::iterator idst = dest.begin();
482  const ParticleField::iterator idste = dest.end();
483  for( ; idst != idste; ++ipx, ++ipy, ++ipz, ++ipsize, ++idst ){
484 
485  *idst = 0.0;
486 
487  const double rp = *ipsize * 0.5;
488 
489  // Identify the location of the particle boundary
490  const double pxlo = px_ ? *ipx - rp : 0;
491  const double pylo = py_ ? *ipy - rp : 0;
492  const double pzlo = pz_ ? *ipz - rp : 0;
493 
494  const double pxhi = px_ ? pxlo + *ipsize : 0;
495  const double pyhi = py_ ? pylo + *ipsize : 0;
496  const double pzhi = pz_ ? pzlo + *ipsize : 0;
497 
498  const int ixlo = ( pxlo - xloface ) / dx_;
499  const int iylo = ( pylo - yloface ) / dy_;
500  const int izlo = ( pzlo - zloface ) / dz_;
501 
502  // hi indices are 1 past the end
503  const int ixhi = px_ ? std::min( nmax[0], int(( pxhi - xloface ) / dx_) + 1 ) : ixlo+1;
504  const int iyhi = py_ ? std::min( nmax[1], int(( pyhi - yloface ) / dy_) + 1 ) : iylo+1;
505  const int izhi = pz_ ? std::min( nmax[2], int(( pzhi - zloface ) / dz_) + 1 ) : izlo+1;
506 
507  // Distribute particle through the volume(s) it touches. Here we are
508  // doing a highly approximate job at approximating how much fractional
509  // particle volume is in each cell.
510  const double pvol = (px_ ? 2*rp : 1)
511  * (py_ ? 2*rp : 1)
512  * (pz_ ? 2*rp : 1);
513 
514 # ifndef NDEBUG
515  double sumterm=0.0;
516 # endif
517  for( int k=izlo; k<izhi; ++k ){
518  const double zcm = zloface + k*dz_; // (-) face of the cell
519  const double zcp = zcm + dz_; // (+) face of the cell
520  // determine the z bounding box for the particle in this cell
521  const double zcont = pz_ ? std::min(zcp,pzhi) - std::max(zcm,pzlo) : 1;
522  for( int j=iylo; j<iyhi; ++j ){
523  const double ycm = yloface + j*dy_;
524  const double ycp = ycm + dy_;
525  // determine the y bounding box for the particle in this cell
526  const double ycont = py_ ? std::min(ycp,pyhi) - std::max(ycm,pylo) : 1;
527  for( int i=ixlo; i<ixhi; ++i ){
528  const double xcm = xloface + i*dx_;
529  const double xcp = xcm + dx_;
530  const double xcont = px_ ? std::min(xcp,pxhi) - std::max(xcm,pxlo) : 1;
531  // contribution is the fraction of the particle volume in this cell.
532  const double contribution = xcont*ycont*zcont / pvol;
533  *idst += src(i,j,k) * contribution;
534 # ifndef NDEBUG
535  assert( contribution > 0 );
536  sumterm += contribution;
537 # endif
538  }
539  }
540  }
541 # ifndef NDEBUG
542  if( std::abs(1.0-sumterm) >= 1e-10 ) std::cout << "C2P sum: " << sumterm << std::endl;
543  assert( std::abs(1.0-sumterm) < 1e-10 );
544 # endif
545  } // particle loop
546  } // case INTERPOLATE
547  break;
548 
549  } // switch( method_ )
550  } // else - GPU vs CPU
551  }
552 
553  //------------------------------------------------------------------
554 
555  RangeSearch::RangeSearch( const MemoryWindow& pParticleMemoryWindow,
556  const GhostData& pParticleGhostData,
557  const BoundaryCellInfo& pParticleBoundaryInfo,
558  const IntVec pParticleMinExtent,
559  const IntVec pParticleMaxExtent,
560  const size_t pNumParticles,
561  const double pSmoothingRadius,
562  const ParticleField& pxcoord,
563  const ParticleField& pycoord,
564  const ParticleField& pzcoord,
565  const bool pUseGrid,
566  const bool pSortData,
567  const short int location )
568  : px_(&pxcoord),
569  py_(&pycoord),
570  pz_(&pzcoord),
571  sortData_ (pSortData),
572  useGrid_(pUseGrid),
573  grid_( pNumParticles,
574  &pxcoord,
575  &pycoord,
576  &pzcoord,
577  pParticleMaxExtent,
578  pParticleMinExtent,
579  pSmoothingRadius,
580  pParticleMemoryWindow,
581  pParticleGhostData,
582  pParticleBoundaryInfo,
583  location)
584 
585  {
586 
587  assert (pxcoord.window_with_ghost().glob_npts() == pNumParticles);
588  assert (pycoord.window_with_ghost().glob_npts() == pNumParticles);
589  assert (pzcoord.window_with_ghost().glob_npts() == pNumParticles);
590 
591  assert (pParticleMinExtent < pParticleMaxExtent);
592 
593  if (pUseGrid)
594  setup_grid(pSortData, location);
595  }
596 
597  //------------------------------------------------------------------
598 
599  RangeSearch::RangeSearch( const MemoryWindow& pParticleMemoryWindow,
600  const GhostData& pParticleGhostData,
601  const BoundaryCellInfo& pParticleBoundaryInfo,
602  const IntVec pParticleMinExtent,
603  const IntVec pParticleMaxExtent,
604  const size_t pNumParticles,
605  const double pSmoothingRadius,
606  const ParticleField& pxcoord,
607  const ParticleField& pycoord,
608  const bool pUseGrid,
609  const bool pSortData,
610  const short int location )
611  : px_(&pxcoord),
612  py_(&pycoord),
613  pz_(nullptr),
614  sortData_ (pSortData),
615  useGrid_(pUseGrid),
616  grid_( pNumParticles,
617  &pxcoord,
618  &pycoord,
619  nullptr,
620  pParticleMaxExtent,
621  pParticleMinExtent,
622  pSmoothingRadius,
623  pParticleMemoryWindow,
624  pParticleGhostData,
625  pParticleBoundaryInfo,
626  location)
627 
628  {
629 
630  assert (pxcoord.window_with_ghost().glob_npts() == pNumParticles);
631  assert (pycoord.window_with_ghost().glob_npts() == pNumParticles);
632 
633  assert (pParticleMinExtent < pParticleMaxExtent);
634 
635  if (pUseGrid)
636  setup_grid(pSortData, location);
637  }
638 
639  //------------------------------------------------------------------
640 
641  void RangeSearch
642  ::setup_grid(bool pSortData, short int location)
643  {
644  Timer timer;
645 
646  timer.reset();
647  grid_.Setup();
648  const double setupTime = timer.stop();
649 
650  if ( IS_GPU_INDEX(location) &&
651  IS_GPU_INDEX(px_->active_device_index()) &&
652  IS_GPU_INDEX(py_->active_device_index()) &&
653  (!pz_ || IS_GPU_INDEX(pz_->active_device_index())) ) {
654  #ifdef __CUDACC__
655 
656  // Insert particles into the grid
657  timer.reset();
658  grid_._insert_particles_cuda();
659  const double particleInsertionTime = timer.stop();
660 
661  // Perform prefix sum -initial step prior to performing counting sorting
662  timer.reset();
663  grid_._prefix_sum_cuda();
664  const double prefixSumTime = timer.stop();
665 
666  double dataSortTime;
667 
668  if (pSortData)
669  {
670  timer.reset();
671  grid_._data_sort_cuda();
672  dataSortTime = timer.stop();
673  } else {
674 
675  timer.reset();
676  grid_._counting_sort_index_cuda();
677  dataSortTime = timer.stop();
678  }
679 
680  #ifndef NDEBUG
681  std::cout << "Setup Time: " << setupTime << std::endl;
682  std::cout << "Particle Insertion Time: " << particleInsertionTime << std::endl;
683  std::cout << "Prefix Sum Time: " << prefixSumTime << std::endl;
684  std::cout << "Setup Sort Time: " << dataSortTime << std::endl;
685  std::cout << "Total Amortized Time: " << (setupTime + particleInsertionTime + prefixSumTime + dataSortTime) << std::endl;
686  #endif
687 
688  #endif
689  } else { // IS_GPU_INDEX if
690 
691  // Insert particles into the grid
692  timer.reset();
693  grid_.InsertParticles();
694  const double particleInsertionTime = timer.stop();
695 
696  #ifndef NDEBUG
697  std::cout << "Setup Time: " << setupTime << std::endl;
698  std::cout << "Particle Insertion Time: " << particleInsertionTime << std::endl;
699  std::cout << "Total Amortized Time: " << (setupTime + particleInsertionTime) << std::endl;
700  #endif
701  }
702 
703  }
704 
705 
706  } // namespace Particle
707 } // namespace SpatialOps
708 
709 #endif // ParticleOperatorsImplementation_h
710 
711 
void apply_to_field(const SrcFieldType &src, DestFieldType &dest) const
Provides tools to index into a sub-block of memory.
Definition: MemoryWindow.h:63
void InsertParticles()
Insert particles into the particle grid, updating the internal memory, which represents its state...
Definition: ParticleGrid.h:865
CellToParticle(const double dx, const double xlo, const double dy=-1, const double ylo=0, const double dz=-1, const double zlo=0, const InterpOptions method=INTERPOLATE)
Construct a CellToParticle operator.
void apply_to_field(const SrcFieldType &src, DestFieldType &dest) const
void set_coordinate_information(const ParticleField *const pxcoord, const ParticleField *const pycoord, const ParticleField *const pzcoord, const ParticleField *const psize)
size_t glob_npts() const
obtain the global number of points in the field. Note that this is not necessarily contiguous memory ...
Definition: MemoryWindow.h:155
short int active_device_index() const
return the index of the current active device
Definition: SpatialField.h:331
RangeSearch(const MemoryWindow &pParticleMemoryWindow, const GhostData &pParticleGhostData, const BoundaryCellInfo &pParticleBoundaryInfo, const IntVec pParticleMinExtent, const IntVec pParticleMaxExtent, const size_t pNumParticles, const double pSmoothingRadius, const ParticleField &pXcoord, const ParticleField &pYcoord, const ParticleField &pZcoord, const bool pUseGrid=false, const bool pSortData=false, const short int pLocation=CPU_INDEX)
Construct a 3D RangeSearch operator.
const_iterator end() const
return a constant iterator to end for CPU with valid ghost cells
Definition: SpatialField.h:492
Holds information about the number of ghost cells on each side of the domain.
Definition: GhostData.h:54
const_iterator begin() const
return a constant iterator for CPU with valid ghost cells
Definition: SpatialField.h:478
void set_coordinate_information(const ParticleField *pxcoord, const ParticleField *pycoord, const ParticleField *pzcoord, const ParticleField *psize)
Provides basic timing functionality.
Definition: TimeLogger.h:49
void reset()
reset the timer
Definition: TimeLogger.h:75
provides iterator support for SpatialField. Only works for CPU.
Definition: MemoryWindow.h:334
provides iterator support for SpatialField. Only works for CPU.
Definition: MemoryWindow.h:326
void Setup()
Sets up the particle grid, the internal memory needed to maintain the state information used while se...
Definition: ParticleGrid.h:771
void set_coordinate_information(const ParticleField *const pxcoord, const ParticleField *const pycoord, const ParticleField *const pzcoord, const ParticleField *const psize)
ParticleToCell(const double dx, const double xlo, const double dy=-1, const double ylo=0, const double dz=-1, const double zlo=0, const InterpOptions method=INTERPOLATE)
construct a ParticleToCell operator
Abstracts a field.
Definition: SpatialField.h:132
ParticlesPerCell(const double dx, const double xlo, const double dy=-1, const double ylo=0, const double dz=-1, const double zlo=0, const InterpOptions method=INTERPOLATE)
construct a ParticlesPerCell operator
Provides information about boundary cells for various fields.
double stop()
Stop the timer and return the time since the last call to start(). This is different from elapsed_tim...
Definition: TimeLogger.h:69