SpatialOps
NeboLhs.h
1 /* This file was generated by fulmar version 0.9.2. */
2 
3 /*
4  * Copyright (c) 2014-2017 The University of Utah
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef NEBO_LHS_H
26  #define NEBO_LHS_H
27 
28  namespace SpatialOps {
29  #ifdef __CUDACC__
30  template<typename LhsType, typename RhsType, typename OptionalArgT>
31  __global__ void gpu_assign_kernel(LhsType lhs,
32  RhsType rhs,
33  int const xLow,
34  int const xHigh,
35  int const yLow,
36  int const yHigh,
37  int const zLow,
38  int const zHigh) {
39  lhs.template gpuwalk_assign<RhsType, OptionalArgT>(rhs,
40  xLow,
41  xHigh,
42  yLow,
43  yHigh,
44  zLow,
45  zHigh);
46  }
47  #endif
48  /* __CUDACC__ */;
49 
50  template<typename CurrentMode, typename FieldType>
51  struct NeboField;
52  template<typename FieldType>
53  struct NeboField<Initial, FieldType> {
54  public:
55  FieldType typedef field_type;
56 
57  NeboField<Initial, FieldType> typedef MyType;
58 
59  NeboField<SeqWalk, FieldType> typedef SeqWalkType;
60 
61  #ifdef ENABLE_THREADS
62  NeboField<Resize, FieldType> typedef ResizeType;
63  #endif
64  /* ENABLE_THREADS */
65 
66  #ifdef __CUDACC__
67  NeboField<GPUWalk, FieldType> typedef GPUWalkType;
68  #endif
69  /* __CUDACC__ */
70 
71  NeboField(FieldType f)
72  : field_(f)
73  {}
74 
75  template<typename RhsType>
76  inline void assign(bool const useGhost, RhsType rhs) {
77  GhostData const ghosts = calculate_actual_ghost(useGhost,
78  field_.get_ghost_data(),
79  field_.boundary_info(),
80  rhs.ghosts_with_bc());
81 
82  IntVec const extents = field_.window_with_ghost().extent() -
83  field_.get_valid_ghost_data().get_minus() - field_.get_valid_ghost_data().get_plus();
84 
85  IntVec const hasBC = field_.boundary_info().has_bc(PLUS_SIDE);
86 
87  const GhostData limits = GhostData(- ghosts.get_minus(0),
88  extents[0] + ghosts.get_plus(0),
89  - ghosts.get_minus(1),
90  extents[1] + ghosts.get_plus(1),
91  - ghosts.get_minus(2),
92  extents[2] + ghosts.get_plus(2));
93 
94  EvalTimeFlagsAndAssign(field_.boundary_info().has_bc(MINUS_SIDE),
95  hasBC)(*this,
96  rhs,
97  extents,
98  ghosts,
99  hasBC,
100  limits);
101  }
102 
103  template<typename RhsType, typename OptionalArgT>
104  inline void assign_after_eval_flags(RhsType rhs,
105  IntVec const & extents,
106  GhostData const & ghosts,
107  IntVec const & hasBC,
108  GhostData const limits) {
109  if(limits.get_plus(0) - limits.get_minus(0) > 0 &&
110  limits.get_plus(1) - limits.get_minus(1) > 0 &&
111  limits.get_plus(2) - limits.get_minus(2) > 0) {
112  /* field_.reset_valid_ghosts(ghosts) */;
113 
114  #ifdef __CUDACC__
115  #ifdef NEBO_GPU_TEST
116  gpu_test_assign<RhsType, OptionalArgT>(rhs,
117  extents,
118  ghosts,
119  hasBC,
120  limits)
121  #else
122  if(gpu_ready()) {
123  if(rhs.gpu_ready(gpu_device_index())) {
124  gpu_assign<RhsType, OptionalArgT>(rhs,
125  extents,
126  ghosts,
127  hasBC,
128  limits);
129  }
130  else {
131  std::ostringstream msg;
132  msg << "Nebo error in " << "Nebo Assignment" <<
133  ":\n";
134  msg << "Left-hand side of assignment allocated ";
135  msg << "on ";
136  msg << "GPU but right-hand side is not ";
137  msg << "(completely) accessible on the same GPU";
138  msg << "\n";
139  msg << "\t - " << __FILE__ << " : " << __LINE__;
140  throw(std::runtime_error(msg.str()));
141  };
142  }
143  else {
144  if(cpu_ready()) {
145  if(rhs.cpu_ready()) {
146  cpu_assign<RhsType, OptionalArgT>(rhs,
147  extents,
148  ghosts,
149  hasBC,
150  limits);
151  }
152  else {
153  std::ostringstream msg;
154  msg << "Nebo error in " << "Nebo Assignment" <<
155  ":\n";
156  msg << "Left-hand side of assignment allocated ";
157  msg << "on ";
158  msg << "CPU but right-hand side is not ";
159  msg << "(completely) accessible on the same CPU"
160  ;
161  msg << "\n";
162  msg << "\t - " << __FILE__ << " : " << __LINE__;
163  throw(std::runtime_error(msg.str()));
164  };
165  }
166  else {
167  std::ostringstream msg;
168  msg << "Nebo error in " << "Nebo Assignment" <<
169  ":\n";
170  msg << "Left-hand side of assignment allocated ";
171  msg << "on ";
172  msg << "unknown device - not on CPU or GPU";
173  msg << "\n";
174  msg << "\t - " << __FILE__ << " : " << __LINE__;
175  throw(std::runtime_error(msg.str()));
176  };
177  }
178  #endif
179  /* NEBO_GPU_TEST */
180  #else
181  cpu_assign<RhsType, OptionalArgT>(rhs,
182  extents,
183  ghosts,
184  hasBC,
185  limits)
186  #endif
187  /* __CUDACC__ */;
188  };
189  }
190 
191  template<typename RhsType>
192  inline void masked_assign(SpatialMask<FieldType> const & mask,
193  RhsType rhs) {
194  EvalTimeFlagsAndMaskedAssign(field_.boundary_info().has_bc(MINUS_SIDE),
195  field_.boundary_info().has_bc(PLUS_SIDE))(mask,
196  *
197  this,
198  rhs,
199  IntVec(0,
200  0,
201  0),
202  GhostData(0),
203  IntVec(0,
204  0,
205  0),
206  GhostData(0));
207  }
208 
209  template<typename RhsType, typename OptionalArgT>
210  inline void masked_assign_after(SpatialMask<FieldType> const & mask,
211  RhsType rhs,
212  IntVec const & extents,
213  GhostData const & ghosts,
214  IntVec const & hasBC,
215  GhostData const limits) {
216  #ifdef NEBO_REPORT_BACKEND
217  std::cout << "Starting Nebo masked assignment" << std::endl
218  #endif
219  /* NEBO_REPORT_BACKEND */;
220 
221  #ifdef __CUDACC__
222  if(gpu_ready()) {
223  std::ostringstream msg;
224  msg << "Nebo error in " << "Nebo Masked Assignment" << ":\n"
225  ;
226  msg << "Left-hand side of masked assignment allocated on ";
227  msg << "GPU and this backend does not support GPU execution"
228  ;
229  msg << "\n";
230  msg << "\t - " << __FILE__ << " : " << __LINE__;
231  throw(std::runtime_error(msg.str()));
232  }
233  else {
234  if(cpu_ready()) {
235  if(rhs.cpu_ready()) {
236  SeqWalkType lhs = init();
237 
238  typename RhsType::SeqWalkType expr = rhs.init(IntVec(0,
239  0,
240  0),
241  GhostData(0),
242  IntVec(0,
243  0,
244  0));
245 
246  std::vector<IntVec>::const_iterator ip = mask.points().begin();
247 
248  std::vector<IntVec>::const_iterator const ep = mask.points().end();
249 
250  for(; ip != ep; ip++) {
251  int const x = (*ip)[0];
252 
253  int const y = (*ip)[1];
254 
255  int const z = (*ip)[2];
256 
257  lhs.ref(x, y, z) = expr.template eval<OptionalArgT>(x,
258  y,
259  z);
260  };
261  }
262  else {
263  std::ostringstream msg;
264  msg << "Nebo error in " << "Nebo Assignment" << ":\n";
265  msg << "Left-hand side of assignment allocated ";
266  msg << "on ";
267  msg << "CPU but right-hand side is not ";
268  msg << "(completely) accessible on the same CPU";
269  msg << "\n";
270  msg << "\t - " << __FILE__ << " : " << __LINE__;
271  throw(std::runtime_error(msg.str()));
272  };
273  }
274  else {
275  std::ostringstream msg;
276  msg << "Nebo error in " << "Nebo Assignment" << ":\n";
277  msg << "Left-hand side of assignment allocated ";
278  msg << "on ";
279  msg << "unknown device - not on CPU or GPU";
280  msg << "\n";
281  msg << "\t - " << __FILE__ << " : " << __LINE__;
282  throw(std::runtime_error(msg.str()));
283  };
284  }
285  #else
286  {
287  SeqWalkType lhs = init();
288 
289  typename RhsType::SeqWalkType expr = rhs.init(IntVec(0, 0, 0),
290  GhostData(0),
291  IntVec(0, 0, 0));
292 
293  std::vector<IntVec>::const_iterator ip = mask.points().begin();
294 
295  std::vector<IntVec>::const_iterator const ep = mask.points().end();
296 
297  for(; ip != ep; ip++) {
298  int const x = (*ip)[0];
299 
300  int const y = (*ip)[1];
301 
302  int const z = (*ip)[2];
303 
304  lhs.ref(x, y, z) = expr.template eval<OptionalArgT>(x,
305  y,
306  z);
307  };
308  }
309  #endif
310  /* __CUDACC__ */;
311 
312  #ifdef NEBO_REPORT_BACKEND
313  std::cout << "Finished Nebo masked assignment" << std::endl
314  #endif
315  /* NEBO_REPORT_BACKEND */;
316  }
317 
318  inline SeqWalkType init(void) { return SeqWalkType(field_); }
319 
320  struct EvalTimeFlagsAndAssign {
321  EvalTimeFlagsAndAssign(IntVec const negBC, IntVec const plusBC)
322  : negBC_(negBC), plusBC_(plusBC)
323  {}
324 
325  template<typename RhsType, typename ... OptionalArgT>
326  inline void after_flags_determined(MyType & lhs,
327  RhsType rhs,
328  IntVec const & extents,
329  GhostData const & ghosts,
330  IntVec const & hasBC,
331  GhostData const limits) {
332  lhs.template assign_after_eval_flags<RhsType,
333  CompileTimeOptionalArgs<OptionalArgT
334  ...,
335  void>
336  >(rhs, extents, ghosts, hasBC, limits);
337  }
338 
339  template<typename RhsType>
340  inline void operator()(MyType & lhs,
341  RhsType rhs,
342  IntVec const & extents,
343  GhostData const & ghosts,
344  IntVec const & hasBC,
345  GhostData const limits) {
346  determine_bc_flag_x<RhsType>(lhs,
347  rhs,
348  extents,
349  ghosts,
350  hasBC,
351  limits);
352  }
353 
354  private:
355  template<typename RhsType, typename ... OptionalArgT>
356  inline void determine_bc_flag_z(MyType & lhs,
357  RhsType rhs,
358  IntVec const & extents,
359  GhostData const & ghosts,
360  IntVec const & hasBC,
361  GhostData const limits) {
362  using namespace CompileTimeOptionalArgsNamespace;
363 
364  if(negBC_[2] || plusBC_[2]) {
365  after_flags_determined<RhsType, HasBCOnZ, OptionalArgT ...>(lhs,
366  rhs,
367  extents,
368  ghosts,
369  hasBC,
370  limits);
371  }
372  else {
373  after_flags_determined<RhsType, OptionalArgT ...>(lhs,
374  rhs,
375  extents,
376  ghosts,
377  hasBC,
378  limits);
379  };
380  }
381 
382  template<typename RhsType, typename ... OptionalArgT>
383  inline void determine_bc_flag_y(MyType & lhs,
384  RhsType rhs,
385  IntVec const & extents,
386  GhostData const & ghosts,
387  IntVec const & hasBC,
388  GhostData const limits) {
389  using namespace CompileTimeOptionalArgsNamespace;
390 
391  if(negBC_[1] || plusBC_[1]) {
392  determine_bc_flag_z<RhsType, HasBCOnY, OptionalArgT ...>(lhs,
393  rhs,
394  extents,
395  ghosts,
396  hasBC,
397  limits);
398  }
399  else {
400  determine_bc_flag_z<RhsType, OptionalArgT ...>(lhs,
401  rhs,
402  extents,
403  ghosts,
404  hasBC,
405  limits);
406  };
407  }
408 
409  template<typename RhsType, typename ... OptionalArgT>
410  inline void determine_bc_flag_x(MyType & lhs,
411  RhsType rhs,
412  IntVec const & extents,
413  GhostData const & ghosts,
414  IntVec const & hasBC,
415  GhostData const limits) {
416  using namespace CompileTimeOptionalArgsNamespace;
417 
418  if(negBC_[0] || plusBC_[0]) {
419  determine_bc_flag_y<RhsType, HasBCOnX, OptionalArgT ...>(lhs,
420  rhs,
421  extents,
422  ghosts,
423  hasBC,
424  limits);
425  }
426  else {
427  determine_bc_flag_y<RhsType, OptionalArgT ...>(lhs,
428  rhs,
429  extents,
430  ghosts,
431  hasBC,
432  limits);
433  };
434  }
435 
436  private:
437  IntVec const negBC_;
438 
439  IntVec const plusBC_;
440  };
441 
442  struct EvalTimeFlagsAndMaskedAssign {
443  EvalTimeFlagsAndMaskedAssign(IntVec const negBC,
444  IntVec const plusBC)
445  : negBC_(negBC), plusBC_(plusBC)
446  {}
447 
448  template<typename RhsType, typename ... OptionalArgT>
449  inline void after_flags_determined(SpatialMask<FieldType> const &
450  mask,
451  MyType & lhs,
452  RhsType rhs,
453  IntVec const & extents,
454  GhostData const & ghosts,
455  IntVec const & hasBC,
456  GhostData const limits) {
457  lhs.template masked_assign_after<RhsType,
458  CompileTimeOptionalArgs<OptionalArgT
459  ...,
460  void> >(mask,
461  rhs,
462  extents,
463  ghosts,
464  hasBC,
465  limits);
466  }
467 
468  template<typename RhsType>
469  inline void operator()(SpatialMask<FieldType> const & mask,
470  MyType & lhs,
471  RhsType rhs,
472  IntVec const & extents,
473  GhostData const & ghosts,
474  IntVec const & hasBC,
475  GhostData const limits) {
476  determine_bc_flag_x<RhsType>(mask,
477  lhs,
478  rhs,
479  extents,
480  ghosts,
481  hasBC,
482  limits);
483  }
484 
485  private:
486  template<typename RhsType, typename ... OptionalArgT>
487  inline void determine_bc_flag_z(SpatialMask<FieldType> const &
488  mask,
489  MyType & lhs,
490  RhsType rhs,
491  IntVec const & extents,
492  GhostData const & ghosts,
493  IntVec const & hasBC,
494  GhostData const limits) {
495  using namespace CompileTimeOptionalArgsNamespace;
496 
497  if(negBC_[2] || plusBC_[2]) {
498  after_flags_determined<RhsType, HasBCOnZ, OptionalArgT ...>(mask,
499  lhs,
500  rhs,
501  extents,
502  ghosts,
503  hasBC,
504  limits);
505  }
506  else {
507  after_flags_determined<RhsType, OptionalArgT ...>(mask,
508  lhs,
509  rhs,
510  extents,
511  ghosts,
512  hasBC,
513  limits);
514  };
515  }
516 
517  template<typename RhsType, typename ... OptionalArgT>
518  inline void determine_bc_flag_y(SpatialMask<FieldType> const &
519  mask,
520  MyType & lhs,
521  RhsType rhs,
522  IntVec const & extents,
523  GhostData const & ghosts,
524  IntVec const & hasBC,
525  GhostData const limits) {
526  using namespace CompileTimeOptionalArgsNamespace;
527 
528  if(negBC_[1] || plusBC_[1]) {
529  determine_bc_flag_z<RhsType, HasBCOnY, OptionalArgT ...>(mask,
530  lhs,
531  rhs,
532  extents,
533  ghosts,
534  hasBC,
535  limits);
536  }
537  else {
538  determine_bc_flag_z<RhsType, OptionalArgT ...>(mask,
539  lhs,
540  rhs,
541  extents,
542  ghosts,
543  hasBC,
544  limits);
545  };
546  }
547 
548  template<typename RhsType, typename ... OptionalArgT>
549  inline void determine_bc_flag_x(SpatialMask<FieldType> const &
550  mask,
551  MyType & lhs,
552  RhsType rhs,
553  IntVec const & extents,
554  GhostData const & ghosts,
555  IntVec const & hasBC,
556  GhostData const limits) {
557  using namespace CompileTimeOptionalArgsNamespace;
558 
559  if(negBC_[0] || plusBC_[0]) {
560  determine_bc_flag_y<RhsType, HasBCOnX, OptionalArgT ...>(mask,
561  lhs,
562  rhs,
563  extents,
564  ghosts,
565  hasBC,
566  limits);
567  }
568  else {
569  determine_bc_flag_y<RhsType, OptionalArgT ...>(mask,
570  lhs,
571  rhs,
572  extents,
573  ghosts,
574  hasBC,
575  limits);
576  };
577  }
578 
579  private:
580  IntVec const negBC_;
581 
582  IntVec const plusBC_;
583  };
584 
585  private:
586  template<typename RhsType, typename OptionalArgT>
587  inline void cpu_assign(RhsType rhs,
588  IntVec const & extents,
589  GhostData const & ghosts,
590  IntVec const & hasBC,
591  GhostData const limits) {
592  #ifdef ENABLE_THREADS
593  if(is_thread_parallel()) {
594  thread_parallel_assign<RhsType, OptionalArgT>(rhs,
595  extents,
596  ghosts,
597  hasBC,
598  limits);
599  }
600  else {
601  sequential_assign<RhsType, OptionalArgT>(rhs,
602  extents,
603  ghosts,
604  hasBC,
605  limits);
606  }
607  #else
608  sequential_assign<RhsType, OptionalArgT>(rhs,
609  extents,
610  ghosts,
611  hasBC,
612  limits)
613  #endif
614  /* ENABLE_THREADS */;
615  }
616 
617  template<typename RhsType, typename OptionalArgT>
618  inline void sequential_assign(RhsType rhs,
619  IntVec const & extents,
620  GhostData const & ghosts,
621  IntVec const & hasBC,
622  GhostData const limits) {
623  #ifdef NEBO_REPORT_BACKEND
624  std::cout << "Starting Nebo sequential" << std::endl
625  #endif
626  /* NEBO_REPORT_BACKEND */;
627 
628  init().template seqwalk_assign<typename RhsType::SeqWalkType,
629  OptionalArgT>(rhs.init(extents,
630  ghosts,
631  hasBC),
632  limits);
633 
634  #ifdef NEBO_REPORT_BACKEND
635  std::cout << "Finished Nebo sequential" << std::endl
636  #endif
637  /* NEBO_REPORT_BACKEND */;
638  }
639 
640  #ifdef ENABLE_THREADS
641  template<typename RhsType, typename OptionalArgT>
642  inline void thread_parallel_assign(RhsType rhs,
643  IntVec const & extents,
644  GhostData const & ghosts,
645  IntVec const & hasBC,
646  GhostData const limits) {
647  #ifdef NEBO_REPORT_BACKEND
648  std::cout << "Starting Nebo thread parallel" << std::endl
649  #endif
650  /* NEBO_REPORT_BACKEND */;
651 
652  Semaphore semaphore(0);
653 
654  const int thread_count = field_.get_partition_count();
655 
656  typename RhsType::ResizeType typedef RhsResizeType;
657 
658  ResizeType new_lhs = resize();
659 
660  RhsResizeType new_rhs = rhs.resize();
661 
662  GhostData localLimits;
663 
664  const IntVec split = nebo_find_partition(IntVec(limits.get_plus(0)
665  - limits.get_minus(0),
666  limits.get_plus(1)
667  - limits.get_minus(1),
668  limits.get_plus(2)
669  - limits.get_minus(2)),
670  thread_count);
671 
672  const int max = nebo_partition_count(split);
673 
674  IntVec location = IntVec(0, 0, 0);
675 
676  for(int count = 0; count < max; count++) {
677  nebo_set_up_extents(location, split, localLimits, limits);
678 
679  ThreadPoolFIFO::self().schedule(boost::bind(&ResizeType::
680  template
681  resize_assign<RhsResizeType,
682  OptionalArgT>,
683  new_lhs,
684  new_rhs,
685  extents,
686  ghosts,
687  hasBC,
688  localLimits,
689  &semaphore));
690 
691  location = nebo_next_partition(location, split);
692  };
693 
694  for(int ii = 0; ii < max; ii++) { semaphore.wait(); };
695 
696  #ifdef NEBO_REPORT_BACKEND
697  std::cout << "Finished Nebo thread parallel" << std::endl
698  #endif
699  /* NEBO_REPORT_BACKEND */;
700  }
701 
702  inline ResizeType resize(void) { return ResizeType(field_); }
703  #endif
704  /* ENABLE_THREADS */
705 
706  #ifdef __CUDACC__
707  template<typename RhsType, typename OptionalArgT>
708  inline void gpu_assign(RhsType rhs,
709  IntVec const & extents,
710  GhostData const & ghosts,
711  IntVec const & hasBC,
712  GhostData const limits) {
713  #ifdef NEBO_REPORT_BACKEND
714  std::cout << "Starting Nebo CUDA" << std::endl
715  #endif
716  /* NEBO_REPORT_BACKEND */;
717 
718  typename RhsType::GPUWalkType typedef RhsGPUWalkType;
719 
720  int xExtent = limits.get_plus(0) - limits.get_minus(0);
721 
722  int yExtent = limits.get_plus(1) - limits.get_minus(1);
723 
724  int blockDim = 16;
725 
726  int xGDim = xExtent / blockDim + ((xExtent % blockDim) > 0 ? 1
727  : 0);
728 
729  int yGDim = yExtent / blockDim + ((yExtent % blockDim) > 0 ? 1
730  : 0);
731 
732  dim3 dimBlock(blockDim, blockDim);
733 
734  dim3 dimGrid(xGDim, yGDim);
735 
736  #ifndef NDEBUG
737  cudaError err;
738 
739  if(cudaSuccess != (err = cudaStreamSynchronize(field_.get_stream())))
740  {
741  std::ostringstream msg;
742  msg << "Nebo error in " << "CUDA Kernel - before call" <<
743  ":\n";
744  msg << " - " << cudaGetErrorString(err);
745  msg << "\n";
746  msg << "\t - " << __FILE__ << " : " << __LINE__;
747  throw(std::runtime_error(msg.str()));;
748  }
749  #endif
750  /* NDEBUG */;
751 
752  gpu_assign_kernel<GPUWalkType, RhsGPUWalkType, OptionalArgT><<<dimGrid,
753  dimBlock,
754  0,
755  field_.get_stream()>>>(gpu_init(),
756  rhs.gpu_init(extents,
757  ghosts,
758  hasBC,
759  gpu_device_index(),
760  field_.get_stream()),
761  limits.get_minus(0),
762  limits.get_plus(0),
763  limits.get_minus(1),
764  limits.get_plus(1),
765  limits.get_minus(2),
766  limits.get_plus(2));
767 
768  cudaEventRecord(field_.get_last_event(), field_.get_stream());
769 
770  rhs.stream_wait_event(field_.get_last_event());
771 
772  #ifndef NDEBUG
773  if(cudaSuccess != (err = cudaStreamSynchronize(field_.get_stream())))
774  {
775  std::ostringstream msg;
776  msg << "Nebo error in " << "CUDA Kernel - after call" <<
777  ":\n";
778  msg << " - " << cudaGetErrorString(err);
779  msg << "\n";
780  msg << "\t - " << __FILE__ << " : " << __LINE__;
781  throw(std::runtime_error(msg.str()));;
782  }
783  #endif
784  /* NDEBUG */;
785 
786  #ifdef NEBO_REPORT_BACKEND
787  std::cout << "Finished Nebo CUDA" << std::endl
788  #endif
789  /* NEBO_REPORT_BACKEND */;
790  }
791 
792  inline bool cpu_ready(void) const {
793  return IS_CPU_INDEX(field_.active_device_index());
794  }
795 
796  inline bool gpu_ready(void) const {
797  return IS_GPU_INDEX(field_.active_device_index());
798  }
799 
800  inline int gpu_device_index(void) const {
801  return field_.active_device_index();
802  }
803 
804  inline GPUWalkType gpu_init(void) { return GPUWalkType(field_); }
805 
806  #ifdef NEBO_GPU_TEST
807  template<typename RhsType, typename OptionalArgT>
808  inline void gpu_test_assign(RhsType rhs,
809  IntVec const & extents,
810  GhostData const & ghosts,
811  IntVec const & hasBC,
812  GhostData const limits) {
813  #ifdef NEBO_REPORT_BACKEND
814  std::cout << "Starting Nebo CUDA with Nebo copying" <<
815  std::endl
816  #endif
817  /* NEBO_REPORT_BACKEND */;
818 
819  rhs.gpu_prep(0);
820 
821  if(CPU_INDEX == field_.active_device_index()) {
822  FieldType gpu_field(field_.window_with_ghost(),
823  field_.boundary_info(),
824  field_.get_valid_ghost_data(),
825  NULL,
826  InternalStorage,
827  GPU_INDEX);
828 
829  NeboField<Initial, FieldType> gpu_lhs(gpu_field);
830 
831  ema::cuda::CUDADeviceInterface & CDI = ema::cuda::
832  CUDADeviceInterface::self();
833 
834  FieldType const & ftmp_ = field_;
835 
836  CDI.memcpy_to(gpu_field.field_values(GPU_INDEX),
837  ftmp_.field_values(),
838  ftmp_.allocated_bytes(),
839  0,
840  ftmp_.get_stream());
841 
842  gpu_lhs.template gpu_assign<RhsType, OptionalArgT>(rhs,
843  extents,
844  ghosts,
845  hasBC,
846  limits);
847 
848  CDI.memcpy_from(field_.field_values(),
849  gpu_field.field_values(GPU_INDEX),
850  field_.allocated_bytes(),
851  0,
852  field_.get_stream());
853  }
854  else {
855  gpu_assign<RhsType, OptionalArgT>(rhs,
856  extents,
857  ghosts,
858  hasBC,
859  limits);
860  };
861 
862  #ifdef NEBO_REPORT_BACKEND
863  std::cout << "Finished Nebo CUDA with Nebo copying" <<
864  std::endl
865  #endif
866  /* NEBO_REPORT_BACKEND */;
867  }
868  #endif
869  /* NEBO_GPU_TEST */
870  #endif
871  /* __CUDACC__ */
872 
873  FieldType field_;
874  };
875  #ifdef ENABLE_THREADS
876  template<typename FieldType>
877  struct NeboField<Resize, FieldType> {
878  public:
879  FieldType typedef field_type;
880 
881  NeboField<SeqWalk, FieldType> typedef SeqWalkType;
882 
883  NeboField(FieldType f)
884  : field_(f)
885  {}
886 
887  #ifdef ENABLE_THREADS
888  template<typename RhsType, typename OptionalArgT>
889  inline void resize_assign(RhsType const & rhs,
890  IntVec const & extents,
891  GhostData const & ghosts,
892  IntVec const & hasBC,
893  GhostData const limits,
894  Semaphore * semaphore) {
895  init().template seqwalk_assign<typename RhsType::SeqWalkType,
896  OptionalArgT>(rhs.init(extents,
897  ghosts,
898  hasBC),
899  limits);
900 
901  semaphore->post();
902  }
903  #endif
904  /* ENABLE_THREADS */
905 
906  private:
907  inline SeqWalkType init(void) { return SeqWalkType(field_); }
908 
909  FieldType field_;
910  }
911  #endif
912  /* ENABLE_THREADS */;
913  template<typename FieldType>
914  struct NeboField<SeqWalk, FieldType> {
915  public:
916  FieldType typedef field_type;
917 
918  typename field_type::value_type typedef value_type;
919 
920  NeboField(FieldType f)
921  : xGlob_(f.window_with_ghost().glob_dim(0)),
922  yGlob_(f.window_with_ghost().glob_dim(1)),
923  base_(f.field_values(CPU_INDEX) + (f.window_with_ghost().offset(0) +
924  f.get_valid_ghost_data().get_minus(0))
925  + (f.window_with_ghost().glob_dim(0) * ((f.window_with_ghost().offset(1)
926  + f.get_valid_ghost_data().get_minus(1))
927  + (f.window_with_ghost().glob_dim(1)
928  * (f.window_with_ghost().offset(2)
929  + f.get_valid_ghost_data().get_minus(2))))))
930  {}
931 
932  template<typename RhsType, typename OptionalArgT>
933  inline void seqwalk_assign(RhsType rhs, GhostData const limits) {
934  for(int z = limits.get_minus(2); z < limits.get_plus(2); z++) {
935  for(int y = limits.get_minus(1); y < limits.get_plus(1); y++) {
936  for(int x = limits.get_minus(0); x < limits.get_plus(0); x++)
937  { ref(x, y, z) = rhs.template eval<OptionalArgT>(x, y, z); };
938  };
939  };
940  }
941 
942  inline value_type & ref(int const x, int const y, int const z) {
943  return base_[x + xGlob_ * (y + (yGlob_ * z))];
944  }
945 
946  private:
947  int const xGlob_;
948 
949  int const yGlob_;
950 
951  value_type * base_;
952  };
953  #ifdef __CUDACC__
954  template<typename FieldType>
955  struct NeboField<GPUWalk, FieldType> {
956  public:
957  FieldType typedef field_type;
958 
959  typename field_type::value_type typedef value_type;
960 
961  NeboField(FieldType f)
962  : base_(f.field_values(f.active_device_index()) + (f.window_with_ghost().offset(0)
963  + f.get_valid_ghost_data().get_minus(0))
964  + (f.window_with_ghost().glob_dim(0) * ((f.window_with_ghost().offset(1)
965  + f.get_valid_ghost_data().get_minus(1))
966  + (f.window_with_ghost().glob_dim(1)
967  * (f.window_with_ghost().offset(2)
968  + f.get_valid_ghost_data().get_minus(2)))))),
969  valid_(false),
970  xGlob_(f.window_with_ghost().glob_dim(0)),
971  yGlob_(f.window_with_ghost().glob_dim(1))
972  {}
973 
974  template<typename RhsType, typename OptionalArgT>
975  __device__ inline void gpuwalk_assign(RhsType rhs,
976  int const xLow,
977  int const xHigh,
978  int const yLow,
979  int const yHigh,
980  int const zLow,
981  int const zHigh) {
982  const int ii = blockIdx.x * blockDim.x + threadIdx.x;
983 
984  const int jj = blockIdx.y * blockDim.y + threadIdx.y;
985 
986  const int x = ii + xLow;
987 
988  const int y = jj + yLow;
989 
990  start(x, y, xHigh, yHigh);
991 
992  for(int z = zLow; z < zHigh; z++) {
993  if(valid()) {
994  ref(x, y, z) = rhs.template eval<OptionalArgT>(x, y, z);
995  };
996  };
997  }
998 
999  private:
1000  __device__ inline bool valid(void) { return valid_; }
1001 
1002  __device__ inline void start(int x,
1003  int y,
1004  int const xHigh,
1005  int const yHigh) {
1006  valid_ = (x < xHigh && y < yHigh);
1007  }
1008 
1009  __device__ inline value_type & ref(int const x,
1010  int const y,
1011  int const z) {
1012  return base_[x + xGlob_ * (y + (yGlob_ * z))];
1013  }
1014 
1015  value_type * base_;
1016 
1017  int valid_;
1018 
1019  int const xGlob_;
1020 
1021  int const yGlob_;
1022  }
1023  #endif
1024  /* __CUDACC__ */;
1025  } /* SpatialOps */
1026 
1027 #endif
1028 /* NEBO_LHS_H */
const std::vector< IntVec > & points(void) const
return reference to list of points in given list NOTE: Not supported for external field types ...
Definition: SpatialMask.h:187
Abstracts a mask.
Definition: SpatialMask.h:70
IntVec get_plus() const
obtain the IntVec containing the number of ghost cells on the (+) faces
Definition: GhostData.h:145
void post()
release a resource
Definition: Semaphore.h:29
Holds information about the number of ghost cells on each side of the domain.
Definition: GhostData.h:54
Provide resource management for multithreaded situations.
Definition: Semaphore.h:20
static ThreadPoolFIFO & self()
obtain the singleton instance of ThreadPoolFIFO
Definition: ThreadPool.cpp:194
IntVec get_minus() const
obtain the IntVec containing the number of ghost cells on the (-) faces
Definition: GhostData.h:135
void wait()
Wait until a resource is available (a call to post is made).
Definition: Semaphore.h:38