SpatialOps
NeboStencils.h
1 /* This file was generated by fulmar version 0.9.2. */
2 
3 /*
4  * Copyright (c) 2014 The University of Utah
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef NEBO_STENCILS_H
26  #define NEBO_STENCILS_H
27 
28  namespace SpatialOps {
29  template<int Length>
31  public:
33  & o,
34  double const c)
35  : others_(o), coef_(c)
36  {}
37 
38  inline NeboStencilCoefCollection<Length + 1> const operator ()(double
39  const c) const {
41  }
42 
43  #ifdef __CUDACC__
44  __host__ __device__
45  #endif
46  /* __CUDACC__ */ inline double coef(void) const { return coef_; }
47 
48  inline double get_coef(int const index) const {
49  if(index < 0) {
50  std::ostringstream msg;
51  msg << "Nebo error in " << "Nebo Stencil Coefficient Collection"
52  << ":\n";
53  msg << "given negative value for coefficient index";
54  msg << "\n";
55  msg << "\t - " << __FILE__ << " : " << __LINE__;
56  throw(std::runtime_error(msg.str()));
57  };
58 
59  if(index >= Length) {
60  std::ostringstream msg;
61  msg << "Nebo error in " << "Nebo Stencil Coefficient Collection"
62  << ":\n";
63  msg << "trying to access a coefficient that does not exist";
64  msg << "\n";
65  msg << "\t - " << __FILE__ << " : " << __LINE__;
66  throw(std::runtime_error(msg.str()));
67  };
68 
69  return (index == Length - 1 ? coef() : others().get_coef(index));
70  }
71 
72  #ifdef __CUDACC__
73  __host__ __device__
74  #endif
75  /* __CUDACC__ */ inline NeboStencilCoefCollection<Length - 1> const
76  others(void) const { return others_; }
77 
78  inline double last(void) const { return others().last(); }
79 
80  inline NeboStencilCoefCollection<Length - 1> all_but_last(void) const {
81  return NeboStencilCoefCollection<Length - 1>(others().all_but_last(),
82  coef_);
83  }
84 
85  private:
86  NeboStencilCoefCollection<Length - 1> const others_;
87 
88  double const coef_;
89  };
90 
91  template<>
93  public:
94  NeboStencilCoefCollection(double const c)
95  : coef_(c)
96  {}
97 
98  NeboStencilCoefCollection(NeboNil nil, double const c)
99  : coef_(c)
100  {}
101 
102  inline NeboStencilCoefCollection<2> const operator ()(double const c) const {
103  return NeboStencilCoefCollection<2>(*this, c);
104  }
105 
106  #ifdef __CUDACC__
107  __host__ __device__
108  #endif
109  /* __CUDACC__ */ inline double coef(void) const { return coef_; }
110 
111  inline double get_coef(int const index) const {
112  if(index < 0) {
113  std::ostringstream msg;
114  msg << "Nebo error in " << "Nebo Stencil Coefficient Collection"
115  << ":\n";
116  msg << "given negative value for coefficient index";
117  msg << "\n";
118  msg << "\t - " << __FILE__ << " : " << __LINE__;
119  throw(std::runtime_error(msg.str()));
120  };
121 
122  if(index > 1) {
123  std::ostringstream msg;
124  msg << "Nebo error in " << "Nebo Stencil Coefficient Collection"
125  << ":\n";
126  msg << "trying to access a coefficient that does not exist";
127  msg << "\n";
128  msg << "\t - " << __FILE__ << " : " << __LINE__;
129  throw(std::runtime_error(msg.str()));
130  };
131 
132  return coef();
133  }
134 
135  inline double last(void) const { return coef_; }
136 
137  inline NeboNil all_but_last(void) const { return NeboNil(); }
138 
139  private:
140  double const coef_;
141  };
142 
143  inline NeboStencilCoefCollection<1> const build_coef_collection(double
144  const c) {
146  };
147 
148  inline NeboStencilCoefCollection<2> const build_two_point_coef_collection(double
149  const
150  c1,
151  double
152  const
153  c2) {
154  return NeboStencilCoefCollection<1>(c1)(c2);
155  };
156 
157  inline NeboStencilCoefCollection<3> const
158  build_three_point_coef_collection(double const c1,
159  double const c2,
160  double const c3) {
161  return NeboStencilCoefCollection<1>(c1)(c2)(c3);
162  };
163 
164  inline NeboStencilCoefCollection<4> const build_four_point_coef_collection(double
165  const
166  c1,
167  double
168  const
169  c2,
170  double
171  const
172  c3,
173  double
174  const
175  c4) {
176  return NeboStencilCoefCollection<1>(c1)(c2)(c3)(c4);
177  };
178 
179  inline NeboStencilCoefCollection<5> const build_five_point_coef_collection(double
180  const
181  c1,
182  double
183  const
184  c2,
185  double
186  const
187  c3,
188  double
189  const
190  c4,
191  double
192  const
193  c5) {
194  return NeboStencilCoefCollection<1>(c1)(c2)(c3)(c4)(c5);
195  };
196 
197  inline NeboStencilCoefCollection<7> const
198  build_seven_point_coef_collection(double const c1,
199  double const c2,
200  double const c3,
201  double const c4,
202  double const c5,
203  double const c6,
204  double const c7) {
205  return NeboStencilCoefCollection<1>(c1)(c2)(c3)(c4)(c5)(c6)(c7);
206  };
207 
208  template<typename PointType, typename CollectionType>
210  public:
211  PointType typedef Point;
212 
213  CollectionType typedef Collection;
214 
216 
217  Point typedef First;
218 
219  Collection typedef AllButFirst;
220 
221  typename Collection::Last typedef Last;
222 
224  typedef AllButLast;
225 
226  enum {length = 1 + Collection::length};
227 
228  template<typename NewPoint>
229  struct AddPoint {
231  };
232 
233  static inline GhostData possible_ghosts(void) {
234  return min(additive_reductive_point_to_ghost(Point::int_vec()),
235  Collection::possible_ghosts());
236  }
237 
238  static inline GhostData possible_ghosts(GhostData const & ghosts) {
239  return ghosts + possible_ghosts();
240  }
241 
242  static inline GhostData possible_additive_ghosts(void) {
243  return min(addative_point_to_ghost(Point::int_vec()),
244  Collection::possible_ghosts());
245  }
246 
247  static inline GhostData possible_additive_ghosts(GhostData const &
248  ghosts) {
249  return ghosts + possible_additive_ghosts();
250  }
251  };
252 
253  template<typename PointType>
254  struct NeboStencilPointCollection<PointType, NeboNil> {
255  public:
256  PointType typedef Point;
257 
258  NeboNil typedef Collection;
259 
261 
262  Point typedef Last;
263 
264  NeboNil typedef AllButLast;
265 
266  enum {length = 1};
267 
268  template<typename NewPoint>
269  struct AddPoint {
271  };
272 
273  static inline GhostData possible_ghosts(void) {
274  return additive_reductive_point_to_ghost(Point::int_vec());
275  }
276 
277  static inline GhostData possible_ghosts(GhostData const & ghosts) {
278  return ghosts + possible_ghosts();
279  }
280 
281  static inline GhostData possible_additive_ghosts(void) {
282  return additive_point_to_ghost(Point::int_vec());
283  }
284 
285  static inline GhostData possible_additive_ghosts(GhostData const &
286  ghosts) {
287  return ghosts + possible_additive_ghosts();
288  }
289  };
290 
291  template<typename CurrentMode,
292  typename Pts,
293  typename Arg,
294  typename FieldType>
295  struct NeboStencil;
296  template<typename Pts, typename Arg, typename FieldType>
297  struct NeboStencil<Initial, Pts, Arg, FieldType> {
298  public:
299  FieldType typedef field_type;
300 
302 
304  typedef SeqWalkType;
305 
306  #ifdef ENABLE_THREADS
308  typedef ResizeType;
309  #endif
310  /* ENABLE_THREADS */
311 
312  #ifdef __CUDACC__
314  typedef GPUWalkType;
315  #endif
316  /* __CUDACC__ */
317 
318  NeboStencil(Arg const & a, Coefs const & coefs)
319  : arg_(a), coefs_(coefs)
320  {}
321 
322  inline GhostData ghosts_with_bc(void) const {
323  return Pts::possible_ghosts(arg_.ghosts_with_bc());
324  }
325 
326  inline GhostData ghosts_without_bc(void) const {
327  return Pts::possible_ghosts(arg_.ghosts_without_bc());
328  }
329 
330  inline bool has_extents(void) const { return arg_.has_extents(); }
331 
332  inline IntVec extents(void) const { return arg_.extents(); }
333 
334  inline IntVec has_bc(void) const { return arg_.has_bc(); }
335 
336  inline SeqWalkType init(IntVec const & extents,
337  GhostData const & ghosts,
338  IntVec const & hasBC) const {
339  return SeqWalkType(arg_.init(extents, ghosts, hasBC), coefs_);
340  }
341 
342  #ifdef ENABLE_THREADS
343  inline ResizeType resize(void) const {
344  return ResizeType(arg_.resize(), coefs_);
345  }
346  #endif
347  /* ENABLE_THREADS */
348 
349  #ifdef __CUDACC__
350  inline bool cpu_ready(void) const { return arg_.cpu_ready(); }
351 
352  inline bool gpu_ready(int const deviceIndex) const {
353  return arg_.gpu_ready(deviceIndex);
354  }
355 
356  inline GPUWalkType gpu_init(IntVec const & extents,
357  GhostData const & ghosts,
358  IntVec const & hasBC,
359  int const deviceIndex,
360  cudaStream_t const & lhsStream) const {
361  return GPUWalkType(arg_.gpu_init(extents,
362  ghosts,
363  hasBC,
364  deviceIndex,
365  lhsStream),
366  coefs_);
367  }
368 
369  inline void stream_wait_event(cudaEvent_t const & event) const {
370  arg_.stream_wait_event(event);
371  }
372 
373  #ifdef NEBO_GPU_TEST
374  inline void gpu_prep(int const deviceIndex) const {
375  arg_.gpu_prep(deviceIndex);
376  }
377  #endif
378  /* NEBO_GPU_TEST */
379  #endif
380  /* __CUDACC__ */
381 
382  private:
383  Arg const arg_;
384 
385  Coefs const coefs_;
386  };
387  #ifdef ENABLE_THREADS
388  template<typename Pts, typename Arg, typename FieldType>
389  struct NeboStencil<Resize, Pts, Arg, FieldType> {
390  public:
391  FieldType typedef field_type;
392 
394 
396  typedef SeqWalkType;
397 
398  NeboStencil(Arg const & arg, Coefs const & coefs)
399  : arg_(arg), coefs_(coefs)
400  {}
401 
402  inline SeqWalkType init(IntVec const & extents,
403  GhostData const & ghosts,
404  IntVec const & hasBC) const {
405  return SeqWalkType(arg_.init(extents, ghosts, hasBC), coefs_);
406  }
407 
408  private:
409  Arg const arg_;
410 
411  Coefs const coefs_;
412  }
413  #endif
414  /* ENABLE_THREADS */;
415  template<typename Pts, typename Arg, typename FieldType>
416  struct NeboStencil<SeqWalk, Pts, Arg, FieldType> {
417  public:
418  FieldType typedef field_type;
419 
420  typename field_type::value_type typedef value_type;
421 
423 
424  template<typename PointCollection>
425  struct EvalExpr {
427 
428  typename PointCollection::Point typedef Point;
429 
430  typename PointCollection::Collection typedef Collection;
431 
432  static inline value_type eval(Arg const & arg,
433  Coefs const & coefs,
434  int const x,
435  int const y,
436  int const z) {
437  return EvalExpr<Collection>::eval(arg, coefs.others(), x, y, z)
438  + arg.eval(x + Point::value(0),
439  y + Point::value(1),
440  z + Point::value(2)) * coefs.coef();
441  }
442  };
443 
444  template<typename Point>
445  struct EvalExpr<NeboStencilPointCollection<Point, NeboNil> > {
446  NeboStencilCoefCollection<1> typedef Coefs;
447 
448  static inline value_type eval(Arg const & arg,
449  Coefs const & coefs,
450  int const x,
451  int const y,
452  int const z) {
453  return arg.eval(x + Point::value(0),
454  y + Point::value(1),
455  z + Point::value(2)) * coefs.coef();
456  }
457  };
458 
459  NeboStencil(Arg const & arg, Coefs const & coefs)
460  : arg_(arg), coefs_(coefs)
461  {}
462 
463  inline value_type eval(int const x, int const y, int const z) const {
464  return EvalExpr<Pts>::eval(arg_, coefs_, x, y, z);
465  }
466 
467  private:
468  Arg arg_;
469 
470  Coefs const coefs_;
471  };
472  #ifdef __CUDACC__
473  template<typename Pts, typename Arg, typename FieldType>
474  struct NeboStencil<GPUWalk, Pts, Arg, FieldType> {
475  public:
476  FieldType typedef field_type;
477 
478  typename field_type::value_type typedef value_type;
479 
481 
482  template<typename PointCollection>
483  struct EvalExpr {
485  ;
486 
487  typename PointCollection::Point typedef Point;
488 
489  typename PointCollection::Collection typedef Collection;
490 
491  __device__ static inline value_type eval(Arg const & arg,
492  Coefs const & coefs,
493  int const x,
494  int const y,
495  int const z) {
496  return EvalExpr<Collection>::eval(arg,
497  coefs.others(),
498  x,
499  y,
500  z) + arg.eval(x + Point::
501  value_gpu(0),
502  y + Point::
503  value_gpu(1),
504  z + Point::
505  value_gpu(2))
506  * coefs.coef();
507  }
508  };
509 
510  template<typename Point>
511  struct EvalExpr<NeboStencilPointCollection<Point, NeboNil> > {
512  NeboStencilCoefCollection<1> typedef Coefs;
513 
514  __device__ static inline value_type eval(Arg const & arg,
515  Coefs const & coefs,
516  int const x,
517  int const y,
518  int const z) {
519  return arg.eval(x + Point::value_gpu(0),
520  y + Point::value_gpu(1),
521  z + Point::value_gpu(2)) * coefs.coef();
522  }
523  };
524 
525  NeboStencil(Arg const & a, Coefs const & coefs)
526  : arg_(a), coefs_(coefs)
527  {}
528 
529  __device__ inline value_type eval(int const x,
530  int const y,
531  int const z) const {
532  return EvalExpr<Pts>::eval(arg_, coefs_, x, y, z);
533  }
534 
535  private:
536  Arg arg_;
537 
538  Coefs const coefs_;
539  }
540  #endif
541  /* __CUDACC__ */;
542 
543  template<typename CurrentMode,
544  typename Pts,
545  typename Arg,
546  typename FieldType>
548  template<typename Pts, typename Arg, typename FieldType>
549  struct NeboEdgelessStencil<Initial, Pts, Arg, FieldType> {
550  public:
551  FieldType typedef field_type;
552 
554 
556  typedef SeqWalkType;
557 
558  #ifdef ENABLE_THREADS
559  NeboEdgelessStencil<Resize,
560  Pts,
561  typename Arg::ResizeType,
562  FieldType> typedef ResizeType;
563  #endif
564  /* ENABLE_THREADS */
565 
566  #ifdef __CUDACC__
567  NeboEdgelessStencil<GPUWalk,
568  Pts,
569  typename Arg::GPUWalkType,
570  FieldType> typedef GPUWalkType;
571  #endif
572  /* __CUDACC__ */
573 
574  NeboEdgelessStencil(Arg const & a, Coefs const & coefs)
575  : arg_(a), coefs_(coefs)
576  {}
577 
578  inline GhostData ghosts_with_bc(void) const {
579  return Pts::possible_additive_ghosts(arg_.ghosts_with_bc());
580  }
581 
582  inline GhostData ghosts_without_bc(void) const {
583  return Pts::possible_additive_ghosts(arg_.ghosts_without_bc());
584  }
585 
586  inline bool has_extents(void) const { return arg_.has_extent(); }
587 
588  inline IntVec extents(void) const { return arg_.extents(); }
589 
590  inline IntVec has_bc(void) const { return arg_.has_bc(); }
591 
592  inline SeqWalkType init(IntVec const & extents,
593  GhostData const & ghosts,
594  IntVec const & hasBC) const {
595  return SeqWalkType(arg_.init(extents, ghosts, hasBC),
596  coefs_,
597  lowest_indicies(),
598  highest_indicies());
599  }
600 
601  #ifdef ENABLE_THREADS
602  inline ResizeType resize(void) const {
603  return ResizeType(arg_.resize(),
604  coefs_,
605  lowest_indicies(),
606  highest_indicies());
607  }
608  #endif
609  /* ENABLE_THREADS */
610 
611  #ifdef __CUDACC__
612  inline bool cpu_ready(void) const { return arg_.cpu_ready(); }
613 
614  inline bool gpu_ready(int const deviceIndex) const {
615  return arg_.gpu_ready(deviceIndex);
616  }
617 
618  inline GPUWalkType gpu_init(IntVec const & extents,
619  GhostData const & ghosts,
620  IntVec const & hasBC,
621  int const deviceIndex,
622  cudaStream_t const & lhsStream) const {
623  return GPUWalkType(arg_.gpu_init(extents,
624  ghosts,
625  hasBC,
626  deviceIndex,
627  lhsStream),
628  coefs_);
629  }
630 
631  inline void stream_wait_event(cudaEvent_t const & event) const {
632  arg_.stream_wait_event(event);
633  }
634 
635  #ifdef NEBO_GPU_TEST
636  inline void gpu_prep(int const deviceIndex) const {
637  arg_.gpu_prep(deviceIndex);
638  }
639  #endif
640  /* NEBO_GPU_TEST */
641  #endif
642  /* __CUDACC__ */
643 
644  inline GhostData actual_ghosts(void) const {
645  return Pts::possible_ghosts(arg_.ghosts_with_bc());
646  }
647 
648  inline IntVec lowest_indicies(void) const {
649  return -(actual_ghosts().get_minus());
650  }
651 
652  inline IntVec highest_indicies(void) const {
653  return actual_ghosts().get_plus() + extents();
654  }
655 
656  private:
657  Arg const arg_;
658 
659  Coefs const coefs_;
660  };
661  #ifdef ENABLE_THREADS
662  template<typename Pts, typename Arg, typename FieldType>
663  struct NeboEdgelessStencil<Resize, Pts, Arg, FieldType> {
664  public:
665  FieldType typedef field_type;
666 
668 
669  NeboEdgelessStencil<SeqWalk,
670  Pts,
671  typename Arg::SeqWalkType,
672  FieldType> typedef SeqWalkType;
673 
674  NeboEdgelessStencil(Arg const & arg,
675  Coefs const & coefs,
676  IntVec const & low,
677  IntVec const & high)
678  : arg_(arg), coefs_(coefs), low_(low), high_(high)
679  {}
680 
681  inline SeqWalkType init(IntVec const & extents,
682  GhostData const & ghosts,
683  IntVec const & hasBC) const {
684  return SeqWalkType(arg_.init(extents, ghosts, hasBC),
685  coefs_,
686  low_,
687  high_);
688  }
689 
690  private:
691  Arg const arg_;
692 
693  Coefs const coefs_;
694 
695  IntVec const low_;
696 
697  IntVec const high_;
698  }
699  #endif
700  /* ENABLE_THREADS */;
701  template<typename Pts, typename Arg, typename FieldType>
702  struct NeboEdgelessStencil<SeqWalk, Pts, Arg, FieldType> {
703  public:
704  FieldType typedef field_type;
705 
706  typename field_type::value_type typedef value_type;
707 
709 
710  template<typename PointCollection>
711  struct EvalExpr {
713 
714  typename PointCollection::Point typedef Point;
715 
716  typename PointCollection::Collection typedef Collection;
717 
718  static inline value_type eval(Arg const & arg,
719  Coefs const & coefs,
720  int const x,
721  int const y,
722  int const z) {
723  return EvalExpr<Collection>::eval(arg, coefs.others(), x, y, z)
724  + arg.eval(x + Point::value(0),
725  y + Point::value(1),
726  z + Point::value(2)) * coefs.coef();
727  }
728  };
729 
730  template<typename Point>
731  struct EvalExpr<NeboStencilPointCollection<Point, NeboNil> > {
732  NeboStencilCoefCollection<1> typedef Coefs;
733 
734  static inline value_type eval(Arg const & arg,
735  Coefs const & coefs,
736  int const x,
737  int const y,
738  int const z) {
739  return arg.eval(x + Point::value(0),
740  y + Point::value(1),
741  z + Point::value(2)) * coefs.coef();
742  }
743  };
744 
745  NeboEdgelessStencil(Arg const & arg,
746  Coefs const & coefs,
747  IntVec const & low,
748  IntVec const & high)
749  : arg_(arg), coefs_(coefs), low_(low), high_(high)
750  {}
751 
752  inline value_type eval(int const x, int const y, int const z) const {
753  #ifndef NDEBUG
754  IntVec index = IntVec(x, y, z);
755  if(index < low_ || index >= high_) {
756  std::ostringstream msg;
757  msg << "Nebo error in " << "Nebo Edgeless Stencil" << ":\n";
758  msg << " - " << low_ << " < " << index << " <= " << high_;
759  msg << "\n";
760  msg << "\t - " << __FILE__ << " : " << __LINE__;
761  throw(std::runtime_error(msg.str()));;
762  }
763  #endif
764  /* NDEBUG */;
765 
766  return EvalExpr<Pts>::eval(arg_, coefs_, x, y, z);
767  }
768 
769  private:
770  Arg arg_;
771 
772  Coefs const coefs_;
773 
774  IntVec const low_;
775 
776  IntVec const high_;
777  };
778  #ifdef __CUDACC__
779  template<typename Pts, typename Arg, typename FieldType>
780  struct NeboEdgelessStencil<GPUWalk, Pts, Arg, FieldType> {
781  public:
782  FieldType typedef field_type;
783 
784  typename field_type::value_type typedef value_type;
785 
787 
788  template<typename PointCollection>
789  struct EvalExpr {
791  ;
792 
793  typename PointCollection::Point typedef Point;
794 
795  typename PointCollection::Collection typedef Collection;
796 
797  __device__ static inline value_type eval(Arg const & arg,
798  Coefs const & coefs,
799  int const x,
800  int const y,
801  int const z) {
802  return EvalExpr<Collection>::eval(arg,
803  coefs.others(),
804  x,
805  y,
806  z) + arg.eval(x + Point::
807  value_gpu(0),
808  y + Point::
809  value_gpu(1),
810  z + Point::
811  value_gpu(2))
812  * coefs.coef();
813  }
814  };
815 
816  template<typename Point>
817  struct EvalExpr<NeboStencilPointCollection<Point, NeboNil> > {
818  NeboStencilCoefCollection<1> typedef Coefs;
819 
820  __device__ static inline value_type eval(Arg const & arg,
821  Coefs const & coefs,
822  int const x,
823  int const y,
824  int const z) {
825  return arg.eval(x + Point::value_gpu(0),
826  y + Point::value_gpu(1),
827  z + Point::value_gpu(2)) * coefs.coef();
828  }
829  };
830 
831  NeboEdgelessStencil(Arg const & a, Coefs const & coefs)
832  : arg_(a), coefs_(coefs)
833  {}
834 
835  __device__ inline value_type eval(int const x,
836  int const y,
837  int const z) const {
838  return EvalExpr<Pts>::eval(arg_, coefs_, x, y, z);
839  }
840 
841  private:
842  Arg arg_;
843 
844  Coefs const coefs_;
845  }
846  #endif
847  /* __CUDACC__ */;
848 
849  template<typename CurrentMode,
850  typename Pts,
851  typename Arg,
852  typename FieldType>
854  template<typename Pts, typename Arg, typename FieldType>
855  struct NeboSumStencil<Initial, Pts, Arg, FieldType> {
856  public:
857  FieldType typedef field_type;
858 
860  typedef SeqWalkType;
861 
862  #ifdef ENABLE_THREADS
864  typedef ResizeType;
865  #endif
866  /* ENABLE_THREADS */
867 
868  #ifdef __CUDACC__
870  typedef GPUWalkType;
871  #endif
872  /* __CUDACC__ */
873 
874  NeboSumStencil(Arg const & a)
875  : arg_(a)
876  {}
877 
878  inline GhostData ghosts_with_bc(void) const {
879  return Pts::possible_ghosts(arg_.ghosts_with_bc());
880  }
881 
882  inline GhostData ghosts_without_bc(void) const {
883  return Pts::possible_ghosts(arg_.ghosts_without_bc());
884  }
885 
886  inline bool has_extents(void) const { return arg_.has_extents(); }
887 
888  inline IntVec extents(void) const { return arg_.extents(); }
889 
890  inline IntVec has_bc(void) const { return arg_.has_bc(); }
891 
892  inline SeqWalkType init(IntVec const & extents,
893  GhostData const & ghosts,
894  IntVec const & hasBC) const {
895  return SeqWalkType(arg_.init(extents, ghosts, hasBC));
896  }
897 
898  #ifdef ENABLE_THREADS
899  inline ResizeType resize(void) const {
900  return ResizeType(arg_.resize());
901  }
902  #endif
903  /* ENABLE_THREADS */
904 
905  #ifdef __CUDACC__
906  inline bool cpu_ready(void) const { return arg_.cpu_ready(); }
907 
908  inline bool gpu_ready(int const deviceIndex) const {
909  return arg_.gpu_ready(deviceIndex);
910  }
911 
912  inline GPUWalkType gpu_init(IntVec const & extents,
913  GhostData const & ghosts,
914  IntVec const & hasBC,
915  int const deviceIndex,
916  cudaStream_t const & lhsStream) const {
917  return GPUWalkType(arg_.gpu_init(extents,
918  ghosts,
919  hasBC,
920  deviceIndex,
921  lhsStream));
922  }
923 
924  inline void stream_wait_event(cudaEvent_t const & event) const {
925  arg_.stream_wait_event(event);
926  }
927 
928  #ifdef NEBO_GPU_TEST
929  inline void gpu_prep(int const deviceIndex) const {
930  arg_.gpu_prep(deviceIndex);
931  }
932  #endif
933  /* NEBO_GPU_TEST */
934  #endif
935  /* __CUDACC__ */
936 
937  private:
938  Arg const arg_;
939  };
940  #ifdef ENABLE_THREADS
941  template<typename Pts, typename Arg, typename FieldType>
942  struct NeboSumStencil<Resize, Pts, Arg, FieldType> {
943  public:
944  FieldType typedef field_type;
945 
947  typedef SeqWalkType;
948 
949  NeboSumStencil(Arg const & arg)
950  : arg_(arg)
951  {}
952 
953  inline SeqWalkType init(IntVec const & extents,
954  GhostData const & ghosts,
955  IntVec const & hasBC) const {
956  return SeqWalkType(arg_.init(extents, ghosts, hasBC));
957  }
958 
959  private:
960  Arg const arg_;
961  }
962  #endif
963  /* ENABLE_THREADS */;
964  template<typename Pts, typename Arg, typename FieldType>
965  struct NeboSumStencil<SeqWalk, Pts, Arg, FieldType> {
966  public:
967  FieldType typedef field_type;
968 
969  typename field_type::value_type typedef value_type;
970 
971  template<typename PointCollection>
972  struct EvalExpr {
973  typename PointCollection::Point typedef Point;
974 
975  typename PointCollection::Collection typedef Collection;
976 
977  static inline value_type eval(Arg const & arg,
978  int const x,
979  int const y,
980  int const z) {
981  return EvalExpr<Collection>::eval(arg, x, y, z) + arg.eval(x +
982  Point::
983  value(0),
984  y +
985  Point::
986  value(1),
987  z +
988  Point::
989  value(2));
990  }
991  };
992 
993  template<typename Point>
994  struct EvalExpr<NeboStencilPointCollection<Point, NeboNil> > {
995  static inline value_type eval(Arg const & arg,
996  int const x,
997  int const y,
998  int const z) {
999  return arg.eval(x + Point::value(0),
1000  y + Point::value(1),
1001  z + Point::value(2));
1002  }
1003  };
1004 
1005  NeboSumStencil(Arg const & arg)
1006  : arg_(arg)
1007  {}
1008 
1009  inline value_type eval(int const x, int const y, int const z) const {
1010  return EvalExpr<Pts>::eval(arg_, x, y, z);
1011  }
1012 
1013  private:
1014  Arg arg_;
1015  };
1016  #ifdef __CUDACC__
1017  template<typename Pts, typename Arg, typename FieldType>
1018  struct NeboSumStencil<GPUWalk, Pts, Arg, FieldType> {
1019  public:
1020  FieldType typedef field_type;
1021 
1022  typename field_type::value_type typedef value_type;
1023 
1024  template<typename PointCollection>
1025  struct EvalExpr {
1026  typename PointCollection::Point typedef Point;
1027 
1028  typename PointCollection::Collection typedef Collection;
1029 
1030  __device__ static inline value_type eval(Arg const & arg,
1031  int const x,
1032  int const y,
1033  int const z) {
1034  return EvalExpr<Collection>::eval(arg, x, y, z) + arg.eval(x
1035  +
1036  Point::
1037  value_gpu(0),
1038  y
1039  +
1040  Point::
1041  value_gpu(1),
1042  z
1043  +
1044  Point::
1045  value_gpu(2));
1046  }
1047  };
1048 
1049  template<typename Point>
1050  struct EvalExpr<NeboStencilPointCollection<Point, NeboNil> > {
1051  __device__ static inline value_type eval(Arg const & arg,
1052  int const x,
1053  int const y,
1054  int const z) {
1055  return arg.eval(x + Point::value_gpu(0),
1056  y + Point::value_gpu(1),
1057  z + Point::value_gpu(2));
1058  }
1059  };
1060 
1061  NeboSumStencil(Arg const & a)
1062  : arg_(a)
1063  {}
1064 
1065  __device__ inline value_type eval(int const x,
1066  int const y,
1067  int const z) const {
1068  return EvalExpr<Pts>::eval(arg_, x, y, z);
1069  }
1070 
1071  private:
1072  Arg arg_;
1073  }
1074  #endif
1075  /* __CUDACC__ */;
1076 
1077  template<typename Point>
1078  static inline GhostData point_possible_ghosts(GhostData const & ghosts) {
1079  return ghosts + additive_reductive_point_to_ghost(Point::int_vec());
1080  };
1081 
1082  template<typename CurrentMode,
1083  typename Point,
1084  typename Arg,
1085  typename FieldType>
1087  template<typename Point, typename Arg, typename FieldType>
1088  struct NeboMaskShift<Initial, Point, Arg, FieldType> {
1089  public:
1090  FieldType typedef field_type;
1091 
1092  typename Arg::SeqWalkType typedef ArgSeqWalkType;
1093 
1094  #ifdef __CUDACC__
1095  typename Arg::GPUWalkType typedef ArgGPUWalkType;
1096  #endif
1097  /* __CUDACC__ */
1098 
1100  SeqWalkType;
1101 
1102  #ifdef ENABLE_THREADS
1104  typedef ResizeType;
1105  #endif
1106  /* ENABLE_THREADS */
1107 
1108  #ifdef __CUDACC__
1110  GPUWalkType;
1111  #endif
1112  /* __CUDACC__ */
1113 
1114  NeboMaskShift(Arg const & a)
1115  : arg_(a)
1116  {}
1117 
1118  inline GhostData ghosts_with_bc(void) const {
1119  return point_possible_ghosts<Point>(arg_.ghosts_with_bc());
1120  }
1121 
1122  inline GhostData ghosts_without_bc(void) const {
1123  return point_possible_ghosts<Point>(arg_.ghosts_without_bc());
1124  }
1125 
1126  inline bool has_extents(void) const { return arg_.has_extents(); }
1127 
1128  inline IntVec extents(void) const { return arg_.extents(); }
1129 
1130  inline IntVec has_bc(void) const { return arg_.has_bc(); }
1131 
1132  inline SeqWalkType init(IntVec const & extents,
1133  GhostData const & ghosts,
1134  IntVec const & hasBC) const {
1135  return SeqWalkType(arg_.init(extents, ghosts, hasBC));
1136  }
1137 
1138  #ifdef ENABLE_THREADS
1139  inline ResizeType resize(void) const {
1140  return ResizeType(arg_.resize());
1141  }
1142  #endif
1143  /* ENABLE_THREADS */
1144 
1145  #ifdef __CUDACC__
1146  inline bool cpu_ready(void) const { return arg_.cpu_ready(); }
1147 
1148  inline bool gpu_ready(int const deviceIndex) const {
1149  return arg_.gpu_ready(deviceIndex);
1150  }
1151 
1152  inline GPUWalkType gpu_init(IntVec const & extents,
1153  GhostData const & ghosts,
1154  IntVec const & hasBC,
1155  int const deviceIndex,
1156  cudaStream_t const & lhsStream) const {
1157  return GPUWalkType(arg_.gpu_init(extents,
1158  ghosts,
1159  hasBC,
1160  deviceIndex,
1161  lhsStream));
1162  }
1163 
1164  inline void stream_wait_event(cudaEvent_t const & event) const {
1165  arg_.stream_wait_event(event);
1166  }
1167 
1168  #ifdef NEBO_GPU_TEST
1169  inline void gpu_prep(int const deviceIndex) const {
1170  arg_.gpu_prep(deviceIndex);
1171  }
1172  #endif
1173  /* NEBO_GPU_TEST */
1174  #endif
1175  /* __CUDACC__ */
1176 
1177  private:
1178  Arg const arg_;
1179  };
1180  #ifdef ENABLE_THREADS
1181  template<typename Point, typename Arg, typename FieldType>
1182  struct NeboMaskShift<Resize, Point, Arg, FieldType> {
1183  public:
1184  FieldType typedef field_type;
1185 
1186  typename Arg::SeqWalkType typedef ArgSeqWalkType;
1187 
1189  SeqWalkType;
1190 
1191  NeboMaskShift(Arg const & arg)
1192  : arg_(arg)
1193  {}
1194 
1195  inline SeqWalkType init(IntVec const & extents,
1196  GhostData const & ghosts,
1197  IntVec const & hasBC) const {
1198  return SeqWalkType(arg_.init(extents, ghosts, hasBC));
1199  }
1200 
1201  private:
1202  Arg const arg_;
1203  }
1204  #endif
1205  /* ENABLE_THREADS */;
1206  template<typename Point, typename Arg, typename FieldType>
1207  struct NeboMaskShift<SeqWalk, Point, Arg, FieldType> {
1208  public:
1209  FieldType typedef field_type;
1210 
1211  typename field_type::value_type typedef value_type;
1212 
1213  NeboMaskShift(Arg const & arg)
1214  : arg_(arg)
1215  {}
1216 
1217  inline bool eval(int const x, int const y, int const z) const {
1218  return arg_.eval(x + Point::value(0),
1219  y + Point::value(1),
1220  z + Point::value(2));
1221  }
1222 
1223  private:
1224  Arg arg_;
1225  };
1226  #ifdef __CUDACC__
1227  template<typename Point, typename Arg, typename FieldType>
1228  struct NeboMaskShift<GPUWalk, Point, Arg, FieldType> {
1229  public:
1230  FieldType typedef field_type;
1231 
1232  typename field_type::value_type typedef value_type;
1233 
1234  NeboMaskShift(Arg const & a)
1235  : arg_(a)
1236  {}
1237 
1238  __device__ inline bool eval(int const x, int const y, int const z) const {
1239  return arg_.eval(x + Point::value_gpu(0),
1240  y + Point::value_gpu(1),
1241  z + Point::value_gpu(2));
1242  }
1243 
1244  private:
1245  Arg arg_;
1246  }
1247  #endif
1248  /* __CUDACC__ */;
1249  } /* SpatialOps */
1250 
1251 #endif
1252 /* NEBO_STENCILS_H */
Holds information about the number of ghost cells on each side of the domain.
Definition: GhostData.h:54