SpatialOps
CudaMemoryAllocator.h
1 /* Copyright (c) 2014-2017 The University of Utah
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to
5  * deal in the Software without restriction, including without limitation the
6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE.
20  */
35 #ifdef DEBUG_CUDA_VERBOSE
36 #define DEBUG_EXT_ALLOC_CUDA_DEVICE_MNGR
37 #define DEBUG_EXT_ALLOC_MEM
38 #define DEBUG_EXT_CUDA_SHARED_PTR
39 #define DEBUG_EXT_SET_MEM
40 #endif
41 
42 //Standard includes
43 #include <iostream>
44 #include <string>
45 #include <sstream>
46 #include <stdexcept>
47 #include <vector>
48 
49 //Spatial Operator includes
50 #include <spatialops/structured/ExternalAllocators.h>
51 
52 //Boost includes
53 #ifdef ENABLE_THREADS
54 #include <boost/thread/mutex.hpp>
55 #include <boost/interprocess/sync/scoped_lock.hpp>
56 #endif
57 
58 #ifndef CUDAMEMORYALLOCATOR_H_
59 #define CUDAMEMORYALLOCATOR_H_
60 
61 #define byte char;
62 
63 namespace ema {
64 namespace cuda {
65 
66 /*---- CUDA wrappers with error checking/processing */
67 void CudaSetDevice(const int device);
68 void CudaMalloc(void** src, const size_t sz, const unsigned int device);
69 void CudaHostAlloc(void**, const size_t sz, const unsigned int device);
70 void CudaFree(void* src, const unsigned int device);
71 void CudaFreeHost(void* src, const unsigned int device);
72 void CudaMemcpy(void* src, const void* dest, const unsigned int device, const size_t sz,
73  cudaMemcpyKind cmkk);
74 void CudaStreamSync(cudaStream_t stream );
75 
76 /* \brief Device management structure for all GPU devices */
78  friend class CUDADeviceInterface;
80 
81  public:
82 
84 
86  static CUDADeviceManager& self();
87 
89  int get_device_count() const;
90 
92  void sync_stream( cudaStream_t stream );
93 
95  int get_best_device() const;
96 
98  void get_memory_statistics( CUDAMemStats& cms, const int K = 0 ) const;
99 
105 
107  void print_device_info() const;
108 
109  private:
110  int device_count;
111  std::vector<cudaDeviceProp*> device_props;
112  std::vector<CUDAMemStats*> device_stats;
113 
114 
115  //TODO-> not sure if this needs a mutex... maybe later (NEED SOME MORE INFO TO IT)
121  class ExecMutex {
122 # ifdef ENABLE_THREADS
123  const boost::mutex::scoped_lock lock;
124  inline boost::mutex& get_mutex() const {static boost::mutex m; return m;}
125 
126  public:
127  ExecMutex() : lock( get_mutex() ) {}
128  ~ExecMutex() {}
129 # else
130  public:
131  ExecMutex(){}
132  ~ExecMutex(){}
133 # endif
134  };
135 };
136 
137 } // End Namespace ema_CUDA
138 } // End Namespace ema
139 
140 #endif /* CUDAMEMORYALLOCATOR_H_ */
void update_memory_statistics()
Updates the &#39;device_stats&#39; structures with the most current memory usage statistics Please note that ...
int get_best_device() const
return sthe best possible device from multiple GPUs
void sync_stream(cudaStream_t stream)
perform synchronization on a stream
void get_memory_statistics(CUDAMemStats &cms, const int K=0) const
Returns the memory structure associated with device K.
int get_device_count() const
Returns the number of available CUDA capable compute devices.
void print_device_info() const
output a list of all available CUDA hardware and compute capabilities