SpatialOps
NeboLhs.h
1 /* This file was generated by fulmar version 0.9.2. */
2 
3 /*
4  * Copyright (c) 2014 The University of Utah
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef NEBO_LHS_H
26  #define NEBO_LHS_H
27 
28  namespace SpatialOps {
29  #ifdef __CUDACC__
30  template<typename LhsType, typename RhsType>
31  __global__ void gpu_assign_kernel(LhsType lhs,
32  RhsType rhs,
33  int const xLow,
34  int const xHigh,
35  int const yLow,
36  int const yHigh,
37  int const zLow,
38  int const zHigh) {
39  lhs.gpuwalk_assign(rhs, xLow, xHigh, yLow, yHigh, zLow, zHigh);
40  }
41  #endif
42  /* __CUDACC__ */;
43 
44  template<typename CurrentMode, typename FieldType>
45  struct NeboField;
46  template<typename FieldType>
47  struct NeboField<Initial, FieldType> {
48  public:
49  FieldType typedef field_type;
50 
51  NeboField<SeqWalk, FieldType> typedef SeqWalkType;
52 
53  #ifdef ENABLE_THREADS
54  NeboField<Resize, FieldType> typedef ResizeType;
55  #endif
56  /* ENABLE_THREADS */
57 
58  #ifdef __CUDACC__
59  NeboField<GPUWalk, FieldType> typedef GPUWalkType;
60  #endif
61  /* __CUDACC__ */
62 
63  NeboField(FieldType f)
64  : field_(f)
65  {}
66 
67  template<typename RhsType>
68  inline void assign(bool const useGhost, RhsType rhs) {
69  GhostData const ghosts = calculate_actual_ghost(useGhost,
70  field_.get_ghost_data(),
71  field_.boundary_info(),
72  rhs.ghosts_with_bc());
73 
74  IntVec const extents = field_.window_with_ghost().extent() -
75  field_.get_valid_ghost_data().get_minus() - field_.get_valid_ghost_data().get_plus();
76 
77  IntVec const hasBC = field_.boundary_info().has_bc();
78 
79  const GhostData limits = GhostData(- ghosts.get_minus(0),
80  extents[0] + ghosts.get_plus(0),
81  - ghosts.get_minus(1),
82  extents[1] + ghosts.get_plus(1),
83  - ghosts.get_minus(2),
84  extents[2] + ghosts.get_plus(2));
85 
86  if(limits.get_plus(0) - limits.get_minus(0) > 0 &&
87  limits.get_plus(1) - limits.get_minus(1) > 0 &&
88  limits.get_plus(2) - limits.get_minus(2) > 0) {
89  /* field_.reset_valid_ghosts(ghosts) */;
90 
91  #ifdef __CUDACC__
92  #ifdef NEBO_GPU_TEST
93  gpu_test_assign<RhsType>(rhs,
94  extents,
95  ghosts,
96  hasBC,
97  limits)
98  #else
99  if(gpu_ready()) {
100  if(rhs.gpu_ready(gpu_device_index())) {
101  gpu_assign<RhsType>(rhs,
102  extents,
103  ghosts,
104  hasBC,
105  limits);
106  }
107  else {
108  std::ostringstream msg;
109  msg << "Nebo error in " << "Nebo Assignment" <<
110  ":\n";
111  msg << "Left-hand side of assignment allocated ";
112  msg << "on ";
113  msg << "GPU but right-hand side is not ";
114  msg << "(completely) accessible on the same GPU";
115  msg << "\n";
116  msg << "\t - " << __FILE__ << " : " << __LINE__;
117  throw(std::runtime_error(msg.str()));
118  };
119  }
120  else {
121  if(cpu_ready()) {
122  if(rhs.cpu_ready()) {
123  cpu_assign<RhsType>(rhs,
124  extents,
125  ghosts,
126  hasBC,
127  limits);
128  }
129  else {
130  std::ostringstream msg;
131  msg << "Nebo error in " << "Nebo Assignment" <<
132  ":\n";
133  msg << "Left-hand side of assignment allocated ";
134  msg << "on ";
135  msg << "CPU but right-hand side is not ";
136  msg << "(completely) accessible on the same CPU"
137  ;
138  msg << "\n";
139  msg << "\t - " << __FILE__ << " : " << __LINE__;
140  throw(std::runtime_error(msg.str()));
141  };
142  }
143  else {
144  std::ostringstream msg;
145  msg << "Nebo error in " << "Nebo Assignment" <<
146  ":\n";
147  msg << "Left-hand side of assignment allocated ";
148  msg << "on ";
149  msg << "unknown device - not on CPU or GPU";
150  msg << "\n";
151  msg << "\t - " << __FILE__ << " : " << __LINE__;
152  throw(std::runtime_error(msg.str()));
153  };
154  }
155  #endif
156  /* NEBO_GPU_TEST */
157  #else
158  cpu_assign<RhsType>(rhs, extents, ghosts, hasBC, limits)
159  #endif
160  /* __CUDACC__ */;
161  };
162  }
163 
164  template<typename RhsType>
165  inline void masked_assign(SpatialMask<FieldType> const & mask,
166  RhsType rhs) {
167  #ifdef NEBO_REPORT_BACKEND
168  std::cout << "Starting Nebo masked assignment" << std::endl
169  #endif
170  /* NEBO_REPORT_BACKEND */;
171 
172  #ifdef __CUDACC__
173  if(gpu_ready()) {
174  std::ostringstream msg;
175  msg << "Nebo error in " << "Nebo Masked Assignment" << ":\n"
176  ;
177  msg << "Left-hand side of masked assignment allocated on ";
178  msg << "GPU and this backend does not support GPU execution"
179  ;
180  msg << "\n";
181  msg << "\t - " << __FILE__ << " : " << __LINE__;
182  throw(std::runtime_error(msg.str()));
183  }
184  else {
185  if(cpu_ready()) {
186  if(rhs.cpu_ready()) {
187  SeqWalkType lhs = init();
188 
189  typename RhsType::SeqWalkType expr = rhs.init(IntVec(0,
190  0,
191  0),
192  GhostData(0),
193  IntVec(0,
194  0,
195  0));
196 
197  std::vector<IntVec>::const_iterator ip = mask.points().begin();
198 
199  std::vector<IntVec>::const_iterator const ep = mask.points().end();
200 
201  for(; ip != ep; ip++) {
202  int const x = (*ip)[0];
203 
204  int const y = (*ip)[1];
205 
206  int const z = (*ip)[2];
207 
208  lhs.ref(x, y, z) = expr.eval(x, y, z);
209  };
210  }
211  else {
212  std::ostringstream msg;
213  msg << "Nebo error in " << "Nebo Assignment" << ":\n";
214  msg << "Left-hand side of assignment allocated ";
215  msg << "on ";
216  msg << "CPU but right-hand side is not ";
217  msg << "(completely) accessible on the same CPU";
218  msg << "\n";
219  msg << "\t - " << __FILE__ << " : " << __LINE__;
220  throw(std::runtime_error(msg.str()));
221  };
222  }
223  else {
224  std::ostringstream msg;
225  msg << "Nebo error in " << "Nebo Assignment" << ":\n";
226  msg << "Left-hand side of assignment allocated ";
227  msg << "on ";
228  msg << "unknown device - not on CPU or GPU";
229  msg << "\n";
230  msg << "\t - " << __FILE__ << " : " << __LINE__;
231  throw(std::runtime_error(msg.str()));
232  };
233  }
234  #else
235  {
236  SeqWalkType lhs = init();
237 
238  typename RhsType::SeqWalkType expr = rhs.init(IntVec(0, 0, 0),
239  GhostData(0),
240  IntVec(0, 0, 0));
241 
242  std::vector<IntVec>::const_iterator ip = mask.points().begin();
243 
244  std::vector<IntVec>::const_iterator const ep = mask.points().end();
245 
246  for(; ip != ep; ip++) {
247  int const x = (*ip)[0];
248 
249  int const y = (*ip)[1];
250 
251  int const z = (*ip)[2];
252 
253  lhs.ref(x, y, z) = expr.eval(x, y, z);
254  };
255  }
256  #endif
257  /* __CUDACC__ */;
258 
259  #ifdef NEBO_REPORT_BACKEND
260  std::cout << "Finished Nebo masked assignment" << std::endl
261  #endif
262  /* NEBO_REPORT_BACKEND */;
263  }
264 
265  inline SeqWalkType init(void) { return SeqWalkType(field_); }
266 
267  private:
268  template<typename RhsType>
269  inline void cpu_assign(RhsType rhs,
270  IntVec const & extents,
271  GhostData const & ghosts,
272  IntVec const & hasBC,
273  GhostData const limits) {
274  #ifdef ENABLE_THREADS
275  if(is_thread_parallel()) {
276  thread_parallel_assign<RhsType>(rhs,
277  extents,
278  ghosts,
279  hasBC,
280  limits);
281  }
282  else {
283  sequential_assign<RhsType>(rhs,
284  extents,
285  ghosts,
286  hasBC,
287  limits);
288  }
289  #else
290  sequential_assign<RhsType>(rhs, extents, ghosts, hasBC, limits)
291  #endif
292  /* ENABLE_THREADS */;
293  }
294 
295  template<typename RhsType>
296  inline void sequential_assign(RhsType rhs,
297  IntVec const & extents,
298  GhostData const & ghosts,
299  IntVec const & hasBC,
300  GhostData const limits) {
301  #ifdef NEBO_REPORT_BACKEND
302  std::cout << "Starting Nebo sequential" << std::endl
303  #endif
304  /* NEBO_REPORT_BACKEND */;
305 
306  init().seqwalk_assign(rhs.init(extents, ghosts, hasBC), limits);
307 
308  #ifdef NEBO_REPORT_BACKEND
309  std::cout << "Finished Nebo sequential" << std::endl
310  #endif
311  /* NEBO_REPORT_BACKEND */;
312  }
313 
314  #ifdef ENABLE_THREADS
315  template<typename RhsType>
316  inline void thread_parallel_assign(RhsType rhs,
317  IntVec const & extents,
318  GhostData const & ghosts,
319  IntVec const & hasBC,
320  GhostData const limits) {
321  #ifdef NEBO_REPORT_BACKEND
322  std::cout << "Starting Nebo thread parallel" << std::endl
323  #endif
324  /* NEBO_REPORT_BACKEND */;
325 
326  Semaphore semaphore(0);
327 
328  const int thread_count = field_.get_partition_count();
329 
330  typename RhsType::ResizeType typedef RhsResizeType;
331 
332  ResizeType new_lhs = resize();
333 
334  RhsResizeType new_rhs = rhs.resize();
335 
336  GhostData localLimits;
337 
338  const IntVec split = nebo_find_partition(IntVec(limits.get_plus(0)
339  - limits.get_minus(0),
340  limits.get_plus(1)
341  - limits.get_minus(1),
342  limits.get_plus(2)
343  - limits.get_minus(2)),
344  thread_count);
345 
346  const int max = nebo_partition_count(split);
347 
348  IntVec location = IntVec(0, 0, 0);
349 
350  for(int count = 0; count < max; count++) {
351  nebo_set_up_extents(location, split, localLimits, limits);
352 
353  ThreadPoolFIFO::self().schedule(boost::bind(&ResizeType::
354  template
355  resize_assign<RhsResizeType>,
356  new_lhs,
357  new_rhs,
358  extents,
359  ghosts,
360  hasBC,
361  localLimits,
362  &semaphore));
363 
364  location = nebo_next_partition(location, split);
365  };
366 
367  for(int ii = 0; ii < max; ii++) { semaphore.wait(); };
368 
369  #ifdef NEBO_REPORT_BACKEND
370  std::cout << "Finished Nebo thread parallel" << std::endl
371  #endif
372  /* NEBO_REPORT_BACKEND */;
373  }
374 
375  inline ResizeType resize(void) { return ResizeType(field_); }
376  #endif
377  /* ENABLE_THREADS */
378 
379  #ifdef __CUDACC__
380  template<typename RhsType>
381  inline void gpu_assign(RhsType rhs,
382  IntVec const & extents,
383  GhostData const & ghosts,
384  IntVec const & hasBC,
385  GhostData const limits) {
386  #ifdef NEBO_REPORT_BACKEND
387  std::cout << "Starting Nebo CUDA" << std::endl
388  #endif
389  /* NEBO_REPORT_BACKEND */;
390 
391  typename RhsType::GPUWalkType typedef RhsGPUWalkType;
392 
393  int xExtent = limits.get_plus(0) - limits.get_minus(0);
394 
395  int yExtent = limits.get_plus(1) - limits.get_minus(1);
396 
397  int blockDim = 16;
398 
399  int xGDim = xExtent / blockDim + ((xExtent % blockDim) > 0 ? 1
400  : 0);
401 
402  int yGDim = yExtent / blockDim + ((yExtent % blockDim) > 0 ? 1
403  : 0);
404 
405  dim3 dimBlock(blockDim, blockDim);
406 
407  dim3 dimGrid(xGDim, yGDim);
408 
409  #ifndef NDEBUG
410  cudaError err;
411 
412  if(cudaSuccess != (err = cudaStreamSynchronize(field_.get_stream())))
413  {
414  std::ostringstream msg;
415  msg << "Nebo error in " << "CUDA Kernel - before call" <<
416  ":\n";
417  msg << " - " << cudaGetErrorString(err);
418  msg << "\n";
419  msg << "\t - " << __FILE__ << " : " << __LINE__;
420  throw(std::runtime_error(msg.str()));;
421  }
422  #endif
423  /* NDEBUG */;
424 
425  gpu_assign_kernel<GPUWalkType, RhsGPUWalkType><<<dimGrid,
426  dimBlock,
427  0,
428  field_.get_stream()>>>(gpu_init(),
429  rhs.gpu_init(extents,
430  ghosts,
431  hasBC,
432  gpu_device_index(),
433  field_.get_stream()),
434  limits.get_minus(0),
435  limits.get_plus(0),
436  limits.get_minus(1),
437  limits.get_plus(1),
438  limits.get_minus(2),
439  limits.get_plus(2));
440 
441  cudaEventRecord(field_.get_last_event(), field_.get_stream());
442 
443  rhs.stream_wait_event(field_.get_last_event());
444 
445  #ifndef NDEBUG
446  if(cudaSuccess != (err = cudaStreamSynchronize(field_.get_stream())))
447  {
448  std::ostringstream msg;
449  msg << "Nebo error in " << "CUDA Kernel - after call" <<
450  ":\n";
451  msg << " - " << cudaGetErrorString(err);
452  msg << "\n";
453  msg << "\t - " << __FILE__ << " : " << __LINE__;
454  throw(std::runtime_error(msg.str()));;
455  }
456  #endif
457  /* NDEBUG */;
458 
459  #ifdef NEBO_REPORT_BACKEND
460  std::cout << "Finished Nebo CUDA" << std::endl
461  #endif
462  /* NEBO_REPORT_BACKEND */;
463  }
464 
465  inline bool cpu_ready(void) const {
466  return IS_CPU_INDEX(field_.active_device_index());
467  }
468 
469  inline bool gpu_ready(void) const {
470  return IS_GPU_INDEX(field_.active_device_index());
471  }
472 
473  inline int gpu_device_index(void) const {
474  return field_.active_device_index();
475  }
476 
477  inline GPUWalkType gpu_init(void) { return GPUWalkType(field_); }
478 
479  #ifdef NEBO_GPU_TEST
480  template<typename RhsType>
481  inline void gpu_test_assign(RhsType rhs,
482  IntVec const & extents,
483  GhostData const & ghosts,
484  IntVec const & hasBC,
485  GhostData const limits) {
486  #ifdef NEBO_REPORT_BACKEND
487  std::cout << "Starting Nebo CUDA with Nebo copying" <<
488  std::endl
489  #endif
490  /* NEBO_REPORT_BACKEND */;
491 
492  rhs.gpu_prep(0);
493 
494  if(CPU_INDEX == field_.active_device_index()) {
495  FieldType gpu_field(field_.window_with_ghost(),
496  field_.boundary_info(),
497  field_.get_valid_ghost_data(),
498  NULL,
499  InternalStorage,
500  GPU_INDEX);
501 
502  NeboField<Initial, FieldType> gpu_lhs(gpu_field);
503 
504  ema::cuda::CUDADeviceInterface & CDI = ema::cuda::
505  CUDADeviceInterface::self();
506 
507  FieldType const & ftmp_ = field_;
508 
509  CDI.memcpy_to(gpu_field.field_values(GPU_INDEX),
510  ftmp_.field_values(),
511  ftmp_.allocated_bytes(),
512  0,
513  ftmp_.get_stream());
514 
515  gpu_lhs.template gpu_assign<RhsType>(rhs,
516  extents,
517  ghosts,
518  hasBC,
519  limits);
520 
521  CDI.memcpy_from(field_.field_values(),
522  gpu_field.field_values(GPU_INDEX),
523  field_.allocated_bytes(),
524  0,
525  field_.get_stream());
526  }
527  else {
528  gpu_assign<RhsType>(rhs, extents, ghosts, hasBC, limits);
529  };
530 
531  #ifdef NEBO_REPORT_BACKEND
532  std::cout << "Finished Nebo CUDA with Nebo copying" <<
533  std::endl
534  #endif
535  /* NEBO_REPORT_BACKEND */;
536  }
537  #endif
538  /* NEBO_GPU_TEST */
539  #endif
540  /* __CUDACC__ */
541 
542  FieldType field_;
543  };
544  #ifdef ENABLE_THREADS
545  template<typename FieldType>
546  struct NeboField<Resize, FieldType> {
547  public:
548  FieldType typedef field_type;
549 
550  NeboField<SeqWalk, FieldType> typedef SeqWalkType;
551 
552  NeboField(FieldType f)
553  : field_(f)
554  {}
555 
556  #ifdef ENABLE_THREADS
557  template<typename RhsType>
558  inline void resize_assign(RhsType const & rhs,
559  IntVec const & extents,
560  GhostData const & ghosts,
561  IntVec const & hasBC,
562  GhostData const limits,
563  Semaphore * semaphore) {
564  init().seqwalk_assign(rhs.init(extents, ghosts, hasBC),
565  limits);
566 
567  semaphore->post();
568  }
569  #endif
570  /* ENABLE_THREADS */
571 
572  private:
573  inline SeqWalkType init(void) { return SeqWalkType(field_); }
574 
575  FieldType field_;
576  }
577  #endif
578  /* ENABLE_THREADS */;
579  template<typename FieldType>
580  struct NeboField<SeqWalk, FieldType> {
581  public:
582  FieldType typedef field_type;
583 
584  typename field_type::value_type typedef value_type;
585 
586  NeboField(FieldType f)
587  : xGlob_(f.window_with_ghost().glob_dim(0)),
588  yGlob_(f.window_with_ghost().glob_dim(1)),
589  base_(f.field_values(CPU_INDEX) + (f.window_with_ghost().offset(0) +
590  f.get_valid_ghost_data().get_minus(0))
591  + (f.window_with_ghost().glob_dim(0) * ((f.window_with_ghost().offset(1)
592  + f.get_valid_ghost_data().get_minus(1))
593  + (f.window_with_ghost().glob_dim(1)
594  * (f.window_with_ghost().offset(2)
595  + f.get_valid_ghost_data().get_minus(2))))))
596  {}
597 
598  template<typename RhsType>
599  inline void seqwalk_assign(RhsType rhs, GhostData const limits) {
600  for(int z = limits.get_minus(2); z < limits.get_plus(2); z++) {
601  for(int y = limits.get_minus(1); y < limits.get_plus(1); y++) {
602  for(int x = limits.get_minus(0); x < limits.get_plus(0); x++)
603  { ref(x, y, z) = rhs.eval(x, y, z); };
604  };
605  };
606  }
607 
608  inline value_type & ref(int const x, int const y, int const z) {
609  return base_[x + xGlob_ * (y + (yGlob_ * z))];
610  }
611 
612  private:
613  int const xGlob_;
614 
615  int const yGlob_;
616 
617  value_type * base_;
618  };
619  #ifdef __CUDACC__
620  template<typename FieldType>
621  struct NeboField<GPUWalk, FieldType> {
622  public:
623  FieldType typedef field_type;
624 
625  typename field_type::value_type typedef value_type;
626 
627  NeboField(FieldType f)
628  : base_(f.field_values(f.active_device_index()) + (f.window_with_ghost().offset(0)
629  + f.get_valid_ghost_data().get_minus(0))
630  + (f.window_with_ghost().glob_dim(0) * ((f.window_with_ghost().offset(1)
631  + f.get_valid_ghost_data().get_minus(1))
632  + (f.window_with_ghost().glob_dim(1)
633  * (f.window_with_ghost().offset(2)
634  + f.get_valid_ghost_data().get_minus(2)))))),
635  valid_(false),
636  xGlob_(f.window_with_ghost().glob_dim(0)),
637  yGlob_(f.window_with_ghost().glob_dim(1))
638  {}
639 
640  template<typename RhsType>
641  __device__ inline void gpuwalk_assign(RhsType rhs,
642  int const xLow,
643  int const xHigh,
644  int const yLow,
645  int const yHigh,
646  int const zLow,
647  int const zHigh) {
648  const int ii = blockIdx.x * blockDim.x + threadIdx.x;
649 
650  const int jj = blockIdx.y * blockDim.y + threadIdx.y;
651 
652  const int x = ii + xLow;
653 
654  const int y = jj + yLow;
655 
656  start(x, y, xHigh, yHigh);
657 
658  for(int z = zLow; z < zHigh; z++) {
659  if(valid()) { ref(x, y, z) = rhs.eval(x, y, z); };
660  };
661  }
662 
663  private:
664  __device__ inline bool valid(void) { return valid_; }
665 
666  __device__ inline void start(int x,
667  int y,
668  int const xHigh,
669  int const yHigh) {
670  valid_ = (x < xHigh && y < yHigh);
671  }
672 
673  __device__ inline value_type & ref(int const x,
674  int const y,
675  int const z) {
676  return base_[x + xGlob_ * (y + (yGlob_ * z))];
677  }
678 
679  value_type * base_;
680 
681  int valid_;
682 
683  int const xGlob_;
684 
685  int const yGlob_;
686  }
687  #endif
688  /* __CUDACC__ */;
689  } /* SpatialOps */
690 
691 #endif
692 /* NEBO_LHS_H */
const std::vector< IntVec > & points(void) const
return reference to list of points in given list NOTE: Not supported for external field types ...
Definition: SpatialMask.h:187
Abstracts a mask.
Definition: SpatialMask.h:70
IntVec get_plus() const
obtain the IntVec containing the number of ghost cells on the (+) faces
Definition: GhostData.h:145
void post()
release a resource
Definition: Semaphore.h:29
Holds information about the number of ghost cells on each side of the domain.
Definition: GhostData.h:54
Provide resource management for multithreaded situations.
Definition: Semaphore.h:20
static ThreadPoolFIFO & self()
obtain the singleton instance of ThreadPoolFIFO
Definition: ThreadPool.cpp:194
IntVec get_minus() const
obtain the IntVec containing the number of ghost cells on the (-) faces
Definition: GhostData.h:135
void wait()
Wait until a resource is available (a call to post is made).
Definition: Semaphore.h:38