SpatialOps
MatVecOps.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2017 The University of Utah
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to
6  * deal in the Software without restriction, including without limitation the
7  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8  * sell copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20  * IN THE SOFTWARE.
21  * ----------------------------------------------------------------------------
22 */
23 
24 #ifndef SpatialOps_MatVecOps_h
25 #define SpatialOps_MatVecOps_h
26 #define USE_EIGEN
27 #include <stdexcept>
28 #include <sstream>
31 #include <spatialops/SpatialOpsConfigure.h>
32 #ifdef USE_EIGEN
33 #include <Eigen/Dense> // Eigen library
34 #else // !USE_EIGEN
35 // Boost::ublas includes //
36 #include <boost/numeric/ublas/vector.hpp>
37 #include <boost/numeric/ublas/vector_proxy.hpp>
38 #include <boost/numeric/ublas/matrix.hpp>
39 #include <boost/numeric/ublas/triangular.hpp>
40 #include <boost/numeric/ublas/lu.hpp>
41 #include <boost/numeric/ublas/io.hpp>
42 #endif // USE_EIGEN
43 #ifdef ENABLE_THREADS
44 #include <spatialops/Semaphore.h>
45 #include <spatialops/ThreadPool.h>
46 #endif // ENABLE_THREADS
47 #ifdef __CUDACC__
48 #include <cuda_runtime.h>
49 #include <cublas_v2.h>
50 #include "spatialops/structured/CudaMatVecUtility.h"
51 #endif
52 
70 namespace SpatialOps
71 {
72 template< typename FieldT >
73 struct DotProduct;
74 template< typename FieldT >
75 struct MatVecMult;
76 
89 template< typename OpT >
91 {
92 public:
93  typedef typename OpT::LeftT LeftT;
94  typedef typename OpT::RightT RightT;
95  typedef typename OpT::ResultT ResultT;
96 
105  BinaryMatOp ( const LeftT& left, const RightT& right )
106  : op1_ ( &left ), op2_ ( &right )
107  {}
108 
118  void eval ( ResultT* const result ) const;
119 private:
120  const LeftT* const op1_;
121  const RightT* const op2_;
122 };
123 
136 template< typename OpT >
138 {
139 public:
140  typedef typename OpT::LeftT LeftT;
141  typedef typename OpT::RightT RightT;
142  typedef typename OpT::ResultT ResultT;
143 
152  BinaryMatScalarOp ( const LeftT& left, const RightT& right )
153  : op1_ ( left ), op2_ ( &right )
154  {}
155 
165  void eval ( ResultT* const result ) const;
166 private:
167  const LeftT op1_;
168  const RightT* const op2_;
169 };
170 
183 template< typename OpT >
184 class VecScalarOp : public BinaryMatScalarOp<OpT>
185 {
186 public:
195  VecScalarOp ( const typename BinaryMatScalarOp<OpT>::LeftT& left, const typename BinaryMatScalarOp<OpT>::RightT& right )
196  : BinaryMatScalarOp<OpT>( left, right )
197  {}
198 };
199 
212 template< typename OpT >
213 class MatScalarOp : public BinaryMatScalarOp<OpT>
214 {
215 public:
224  MatScalarOp ( const typename BinaryMatScalarOp<OpT>::LeftT& left, const typename BinaryMatScalarOp<OpT>::RightT& right )
225  : BinaryMatScalarOp<OpT>( left, right )
226  {}
227 };
228 
241 template< typename OpT >
242 class VecVecOp : public BinaryMatOp<OpT>
243 {
244 public:
253  VecVecOp ( const typename BinaryMatOp<OpT>::LeftT& left, const typename BinaryMatOp<OpT>::RightT& right )
254  : BinaryMatOp<OpT>( left, right )
255  {}
256 };
257 
270 template< typename OpT >
271 class MatVecOp : public BinaryMatOp<OpT>
272 {
273 public:
274 
283  MatVecOp ( const typename BinaryMatOp<OpT>::LeftT& left, const typename BinaryMatOp<OpT>::RightT& right )
284  : BinaryMatOp<OpT>( left, right )
285  {}
286 };
287 
300 template< typename OpT >
301 class MatMatOp : public BinaryMatOp<OpT>
302 {
303 public:
312  MatMatOp ( const typename BinaryMatOp<OpT>::LeftT& left, const typename BinaryMatOp<OpT>::RightT& right )
313  : BinaryMatOp<OpT>( left, right )
314  {}
315 };
316 
328 template< typename OpT >
330 {
331 public:
332  typedef typename OpT::RhsT RhsT;
333  typedef typename OpT::ResultT ResultT;
334 
342  MatUnaryOp ( const RhsT& mat )
343  : mat_ ( &mat )
344  {}
345 
355  void eval ( ResultT* const result ) const;
356 private:
357  const RhsT* const mat_;
358 };
359 
371 template< typename OpT >
373 {
374 public:
375  typedef typename OpT::RhsT RhsT;
376  typedef typename OpT::ResultT1 ResultT1;
377  typedef typename OpT::ResultT2 ResultT2;
378 
386  BinaryRetMatUnaryOp ( const RhsT& mat )
387  : mat_ ( &mat )
388  {}
389 
400  void eval ( ResultT1* const result1, ResultT2* const result2) const;
401 private:
402  const RhsT* const mat_;
403 };
404 
411 template< typename FieldT >
412 class MatVecOp< DotProduct< FieldT > >
413 {
414 public:
415  typedef FieldVector< FieldT > RightT;
416  typedef FieldVector< FieldT > LeftT;
417  typedef FieldT ResultT;
418  MatVecOp ( const LeftT& left, const RightT& right )
419  : op1_ ( &left ), op2_ ( &right )
420  {}
421  void eval ( FieldT* const result ) const;
422 private:
423  const LeftT* const op1_;
424  const RightT* const op2_;
425 };
426 
435 template< typename FieldT >
436 struct DotProduct
437 {
438  typedef FieldVector<FieldT> RightT;
439  typedef FieldVector<FieldT> LeftT;
440  typedef FieldT ResultT;
441 
449  static void launch( FieldT* const result, const LeftT& vec1, const RightT& vec2 ){
450  DotProduct<FieldT>::operate ( result, vec1, vec2 );
451  }
452 
460  static void operate( FieldT* const result, const LeftT& vec1, const RightT& vec2 );
461 };
462 
467 template< typename OpT, typename RightT, typename LeftT, typename ResultT >
469 {
479  static void cpu_launch( ResultT* const result, const LeftT& left, const RightT& right ){
480  OpT::operate ( result, left, right );
481  }
482 #ifdef __CUDACC__
483 
492  static void gpu_launch( ResultT* const result, const LeftT& left, const RightT& right ){
493  OpT::operate ( result, left, right );
494  }
495 #endif /* __CUDACC__ */
496 };
497 
504 template< typename FieldT >
505 struct VecScalarMult : public MatOperation<VecScalarMult<FieldT>, FieldVector<FieldT>, typename FieldT::value_type, FieldVector<FieldT> >
506 { typedef FieldVector<FieldT> RightT;
507  typedef typename FieldT::value_type LeftT;
508  typedef FieldVector<FieldT> ResultT;
509 
517  static void operate( ResultT* const result, const LeftT scal, const RightT& vec );
518 };
519 
526 template< typename FieldT >
527 struct MatScalarMult : public MatOperation<MatScalarMult<FieldT>, FieldMatrix<FieldT>, typename FieldT::value_type, FieldMatrix<FieldT> >
528 { typedef FieldMatrix<FieldT> RightT;
529  typedef typename FieldT::value_type LeftT;
530  typedef FieldMatrix<FieldT> ResultT;
531 
539  static void operate( ResultT* const result, const LeftT scal, const RightT& mat );
540 };
541 
548 template< typename FieldT >
549 struct MatVecMult : public MatOperation<MatVecMult<FieldT>, FieldVector<FieldT>, FieldMatrix<FieldT>, FieldVector<FieldT> >
550 { typedef FieldVector<FieldT> RightT;
551  typedef FieldMatrix<FieldT> LeftT;
552  typedef FieldVector<FieldT> ResultT;
553 
561  static void operate( ResultT* const result, const LeftT& mat, const RightT& vec );
562 };
563 
570 template< typename FieldT >
571 struct MatMatMult : public MatOperation<MatMatMult<FieldT>, FieldMatrix<FieldT>, FieldMatrix<FieldT>, FieldMatrix<FieldT> >
572 { typedef FieldMatrix<FieldT> RightT;
573  typedef FieldMatrix<FieldT> LeftT;
574  typedef FieldMatrix<FieldT> ResultT;
575 
583  static void operate( ResultT* const result, const LeftT& mat1, const RightT& mat2 );
584 };
585 
592 template< typename FieldT >
593 struct VecVecAdd : public MatOperation<VecVecAdd<FieldT>, FieldVector<FieldT>, FieldVector<FieldT>, FieldVector<FieldT> >
594 { typedef FieldVector<FieldT> RightT;
595  typedef FieldVector<FieldT> LeftT;
596  typedef FieldVector<FieldT> ResultT;
597 
605  static void operate( ResultT* const result, const LeftT& vec1, const RightT& vec2 );
606 };
607 
614 template< typename FieldT >
615 struct MatMatAdd : public MatOperation<MatMatAdd<FieldT>, FieldMatrix<FieldT>, FieldMatrix<FieldT>, FieldMatrix<FieldT> >
616 { typedef FieldMatrix<FieldT> RightT;
617  typedef FieldMatrix<FieldT> LeftT;
618  typedef FieldMatrix<FieldT> ResultT;
619 
627  static void operate( ResultT* const result, const LeftT& mat1, const RightT& mat2 );
628 };
629 
636 template< typename FieldT >
637 struct VecVecSub : public MatOperation<VecVecSub<FieldT>, FieldVector<FieldT>, FieldVector<FieldT>, FieldVector<FieldT> >
638 { typedef FieldVector<FieldT> RightT;
639  typedef FieldVector<FieldT> LeftT;
640  typedef FieldVector<FieldT> ResultT;
641 
649  static void operate( ResultT* const result, const LeftT& vec1, const RightT& vec2 );
650 };
651 
658 template< typename FieldT >
659 struct MatMatSub : public MatOperation<MatMatSub<FieldT>, FieldMatrix<FieldT>, FieldMatrix<FieldT>, FieldMatrix<FieldT> >
660 { typedef FieldMatrix<FieldT> RightT;
661  typedef FieldMatrix<FieldT> LeftT;
662  typedef FieldMatrix<FieldT> ResultT;
663 
671  static void operate( ResultT* const result, const LeftT& mat1, const RightT& mat2 );
672 };
673 
680 template< typename FieldT >
681 struct MatScalDiagOffset : public MatOperation<MatScalDiagOffset<FieldT>, FieldMatrix<FieldT>, typename FieldT::value_type, FieldMatrix<FieldT> >
682 { typedef FieldMatrix<FieldT> RightT;
683  typedef typename FieldT::value_type LeftT;
684  typedef FieldMatrix<FieldT> ResultT;
685 
693  static void operate( ResultT* const result, const LeftT scal, const RightT& mat );
694 };
695 
705 template< typename FieldT >
707 {
708 public:
709  typedef FieldVector<FieldT> RightT;
710  typedef FieldMatrix<FieldT> LeftT;
711  typedef FieldVector<FieldT> ResultT;
712 
725  static void cpu_launch( ResultT* const result, const LeftT& mat, const RightT& vec );
726 #ifdef __CUDACC__
727 
737  static void gpu_launch( ResultT* const result, const LeftT& mat, const RightT& vec );
738 #endif /* __CUDACC__ */
739  typedef typename FieldT::value_type ValT;
740 private:
741 
761  static void operate( ResultT* const result,
762  const RightT& vec,
763  const LeftT& mat,
764  const int thread
765 # ifdef ENABLE_THREADS
766  , Semaphore & sem );
767 # else
768  );
769 # endif // ENABLE_THREADS
770 
771 #ifdef __CUDACC__
772 
791  static void gpu_operate( cublasHandle_t & handle,
792  ResultT* const result,
793  const RightT& vec,
794  const LeftT& mat );
795 #endif /* __CUDACC__ */
796 
797 
798 };
799 
808 template< typename FieldT >
810 {
811 public:
812  typedef FieldMatrix<FieldT> RhsT;
813  typedef FieldVector<FieldT> ResultT;
814  typedef ResultT ResultT1;
815  typedef FieldVector<FieldT> ResultT2;
816 
829  static void cpu_launch( ResultT* const result, const RhsT& mat );
838  static void cpu_launch( ResultT1* const result1, ResultT2* const result2, const RhsT& mat );
839 #ifdef __CUDACC__
840 
850  static void gpu_launch( ResultT* const result, const RhsT& mat );
859  static void gpu_launch( ResultT1* const result1, ResultT2* const result2, const RhsT& mat );
860 #endif /* __CUDACC__ */
861 private:
862 
876  static void operate( ResultT1* const result1,
877  ResultT2* const result2,
878  const RhsT& mat,
879  const int thread
880 # ifdef ENABLE_THREADS
881  , Semaphore & sem
882 # endif // ENABLE_THREADS
883  );
884 #ifdef __CUDACC__
885 
896  static void gpu_operate( ResultT1* const result1,
897  ResultT2* const result2,
898  const RhsT& mat );
899 #endif /* __CUDACC__ */
900 
901 };
902 
903 template< typename FieldT >
904 void
905 FieldMatrix<FieldT>::eigenvalues(FieldVector<FieldT>& realVector, FieldVector<FieldT>& complexVector)
906 { return BinaryRetMatUnaryOp< EigenDecomposition<FieldT> > ( *this ).eval(&realVector, &complexVector); }
907 
908 template< typename FieldT >
910 FieldMatrix<FieldT>::real_eigenvalues()
911 { return MatUnaryOp< EigenDecomposition<FieldT> > ( *this ); }
912 
913 template< typename FieldT >
915 FieldMatrix<FieldT>::solve ( const FieldVector< FieldT >& vec )
916 { return MatVecOp< LinearSolve< FieldT > > ( *this, vec ); }
917 
918 template< typename FieldT >
919 void
920 FieldMatrix<FieldT>::add_to_diagonal ( const typename FieldT::value_type c )
921 { (MatScalarOp< MatScalDiagOffset< FieldT > > ( c, *this )).eval(this); }
922 
931 template< typename FieldT >
932 VecScalarOp< VecScalarMult< FieldT > > operator * ( const typename FieldT::value_type& left, const FieldVector< FieldT >& right )
933 { return VecScalarOp< VecScalarMult<FieldT> > ( left, right ); }
934 template< typename FieldT >
935 VecScalarOp< VecScalarMult< FieldT > > operator * ( const FieldVector< FieldT >& right, const typename FieldT::value_type& left )
936 { return VecScalarOp< VecScalarMult<FieldT> > ( left, right ); }
937 
946 template< typename FieldT >
947 MatScalarOp< MatScalarMult< FieldT > > operator * ( const typename FieldT::value_type& left, const FieldMatrix< FieldT >& right )
948 { return MatScalarOp< MatScalarMult<FieldT> > ( left, right ); }
949 template< typename FieldT >
950 MatScalarOp< MatScalarMult< FieldT > > operator * ( const FieldMatrix< FieldT >& right, const typename FieldT::value_type& left )
951 { return MatScalarOp< MatScalarMult<FieldT> > ( left, right ); }
952 
961 template< typename FieldT >
962 MatVecOp< MatVecMult< FieldT > > operator * ( const FieldMatrix< FieldT >& left, const FieldVector< FieldT >& right )
963 { return MatVecOp< MatVecMult<FieldT> > ( left, right ); }
964 
973 template< typename FieldT >
974 MatMatOp< MatMatMult< FieldT > > operator * ( const FieldMatrix< FieldT >& left, const FieldMatrix< FieldT >& right )
975 { return MatMatOp< MatMatMult<FieldT> > ( left, right ); }
976 
985 template< typename FieldT >
986 VecVecOp< VecVecAdd< FieldT > > operator + ( const FieldVector< FieldT >& left, const FieldVector< FieldT >& right )
987 { return VecVecOp< VecVecAdd<FieldT> > ( left, right ); }
988 
997 template< typename FieldT >
998 MatMatOp< MatMatAdd< FieldT > > operator + ( const FieldMatrix< FieldT >& left, const FieldMatrix< FieldT >& right )
999 { return MatMatOp< MatMatAdd<FieldT> > ( left, right ); }
1000 
1009 template< typename FieldT >
1010 VecVecOp< VecVecSub< FieldT > > operator - ( const FieldVector< FieldT >& left, const FieldVector< FieldT >& right )
1011 { return VecVecOp< VecVecSub<FieldT> > ( left, right ); }
1012 
1021 template< typename FieldT >
1022 MatMatOp< MatMatSub< FieldT > > operator - ( const FieldMatrix< FieldT >& left, const FieldMatrix< FieldT >& right )
1023 { return MatMatOp< MatMatSub<FieldT> > ( left, right ); }
1024 
1035 template< typename FieldT >
1036 static MatVecOp< DotProduct< FieldT > > dot_product_proto ( const FieldVector< FieldT >& vec1, const FieldVector< FieldT >& vec2 )
1037 { return MatVecOp< DotProduct< FieldT > > ( vec1, vec2 ); }
1038 
1039 template< typename OpT >
1040 void BinaryMatOp< OpT >::eval ( ResultT* const result ) const
1041 { assert ( result->elements() == op1_->elements() && op1_->elements() == op2_->elements() ); // ensure dimensions are consistent
1042 #ifdef __CUDACC__
1043  enum INCORRECT_MEMORY {NONE, GPU, CPU, NEITHER};
1044  INCORRECT_MEMORY incorrectMemory = NONE;
1045  const int deviceIndex = result->active_device_index();
1046  if( IS_GPU_INDEX(deviceIndex) ){
1047  if( op1_->is_valid(deviceIndex) && op2_->is_valid(deviceIndex) ){
1048  OpT::gpu_launch ( result, *op1_, *op2_ );
1049  }
1050  else{
1051  incorrectMemory = GPU;
1052  }
1053  }
1054  else if(result->is_valid(CPU_INDEX)){
1055  if(op1_->is_valid(CPU_INDEX) && op2_->is_valid(CPU_INDEX)){
1056  OpT::cpu_launch ( result, *op1_, *op2_ );
1057  }
1058  else{
1059  incorrectMemory = CPU;
1060  }
1061  }
1062  else{
1063  incorrectMemory = NEITHER;
1064  }
1065 
1066  if( incorrectMemory != NONE ){
1067  std::ostringstream msg;
1068  msg << "Nebo error in " << "Nebo Mat Binary Op Launch" <<
1069  ":\n";
1070  msg << "Left-hand side of assignment allocated ";
1071  msg << "on ";
1072  switch( incorrectMemory ){
1073  case GPU:
1074  msg << "GPU but right-hand side is not ";
1075  msg << "(completely) accessible on the same GPU";
1076  break;
1077  case CPU:
1078  msg << "CPU but right-hand side is not ";
1079  msg << "(completely) accessible on the same CPU";
1080  break;
1081  case NEITHER:
1082  msg << "unknown device - not on CPU or GPU";
1083  break;
1084  }
1085  msg << "\n";
1086  msg << "\t - " << __FILE__ << " : " << __LINE__;
1087  throw(std::runtime_error(msg.str()));
1088  }
1089 #else
1090  OpT::cpu_launch ( result, *op1_, *op2_ );
1091 #endif /* __CUDACC__ */
1092 }
1093 
1094 template< typename OpT >
1095 void BinaryMatScalarOp< OpT >::eval ( ResultT* const result ) const
1096 { assert ( result->elements() == op2_->elements() ); // ensure dimensions are consistent
1097 #ifdef __CUDACC__
1098  enum INCORRECT_MEMORY {NONE, GPU, CPU, NEITHER};
1099  INCORRECT_MEMORY incorrectMemory = NONE;
1100  const int deviceIndex = result->active_device_index();
1101  if( IS_GPU_INDEX(deviceIndex) ){
1102  if( op2_->is_valid(deviceIndex) ){
1103  OpT::gpu_launch ( result, op1_, *op2_ );
1104  }
1105  else{
1106  incorrectMemory = GPU;
1107  }
1108  }
1109  else if(result->is_valid(CPU_INDEX)){
1110  if(op2_->is_valid(CPU_INDEX)){
1111  OpT::cpu_launch ( result, op1_, *op2_ );
1112  }
1113  else{
1114  incorrectMemory = CPU;
1115  }
1116  }
1117  else{
1118  incorrectMemory = NEITHER;
1119  }
1120 
1121  if( incorrectMemory != NONE ){
1122  std::ostringstream msg;
1123  msg << "Nebo error in " << "Nebo Mat Binary Op Launch" <<
1124  ":\n";
1125  msg << "Left-hand side of assignment allocated ";
1126  msg << "on ";
1127  switch( incorrectMemory ){
1128  case GPU:
1129  msg << "GPU but right-hand side is not ";
1130  msg << "(completely) accessible on the same GPU";
1131  break;
1132  case CPU:
1133  msg << "CPU but right-hand side is not ";
1134  msg << "(completely) accessible on the same CPU";
1135  break;
1136  case NEITHER:
1137  msg << "unknown device - not on CPU or GPU";
1138  break;
1139  }
1140  msg << "\n";
1141  msg << "\t - " << __FILE__ << " : " << __LINE__;
1142  throw(std::runtime_error(msg.str()));
1143  }
1144 #else
1145  OpT::cpu_launch ( result, op1_, *op2_ );
1146 #endif /* __CUDACC__ */
1147 }
1148 
1149 template< typename OpT >
1150 void MatUnaryOp< OpT >::eval ( ResultT* const result ) const
1151 { assert ( result->elements() == mat_->elements() ); // ensure dimensions are consistent
1152 #ifdef __CUDACC__
1153  enum INCORRECT_MEMORY {NONE, GPU, CPU, NEITHER};
1154  INCORRECT_MEMORY incorrectMemory = NONE;
1155  const int deviceIndex = result->active_device_index();
1156  if( IS_GPU_INDEX(deviceIndex) ){
1157  if( mat_->is_valid(deviceIndex) ){
1158  OpT::gpu_launch ( result, *mat_ );
1159  }
1160  else{
1161  incorrectMemory = GPU;
1162  }
1163  }
1164  else if( result->is_valid(CPU_INDEX) ){
1165  if( mat_->is_valid(CPU_INDEX) ){
1166  OpT::cpu_launch ( result, *mat_ );
1167  }
1168  else{
1169  incorrectMemory = CPU;
1170  }
1171  }
1172  else{
1173  incorrectMemory = NEITHER;
1174  }
1175 
1176  if( incorrectMemory != NONE ){
1177  std::ostringstream msg;
1178  msg << "Nebo error in MatVec Launch:\n"
1179  << "Left-hand side of assignment allocated on ";
1180  switch( incorrectMemory ){
1181  case GPU:
1182  msg << "GPU but right-hand side is not (completely) accessible on the same GPU";
1183  break;
1184  case CPU:
1185  msg << "CPU but right-hand side is not (completely) accessible on the same CPU";
1186  break;
1187  case NEITHER:
1188  msg << "unknown device - not on CPU or GPU";
1189  break;
1190  }
1191  msg << "\n";
1192  msg << "\t - " << __FILE__ << " : " << __LINE__;
1193  throw(std::runtime_error(msg.str()));
1194  }
1195 #else
1196  OpT::cpu_launch ( result, *mat_ );
1197 #endif /* __CUDACC__ */
1198 }
1199 
1200 template< typename OpT >
1201 void BinaryRetMatUnaryOp< OpT >::eval ( ResultT1* const result1, ResultT2* const result2 ) const
1202 { assert ( result1->elements() == mat_->elements() && result2->elements() == mat_->elements() ); // ensure dimensions are consistent
1203 #ifdef __CUDACC__
1204  enum INCORRECT_MEMORY {NONE, GPU, CPU, NEITHER};
1205  INCORRECT_MEMORY incorrectMemoryResults = NONE;
1206  INCORRECT_MEMORY incorrectMemory = NONE;
1207  const int deviceIndex = result1->active_device_index();
1208  if( IS_GPU_INDEX(deviceIndex) ){
1209  if( result2->is_valid(deviceIndex) ) {
1210  if( mat_->is_valid(deviceIndex) ){
1211  OpT::gpu_launch ( result1, result2, *mat_ );
1212  }
1213  else{
1214  incorrectMemory = GPU;
1215  }
1216  }
1217  else{
1218  incorrectMemoryResults = GPU;
1219  }
1220  }
1221  else if( result1->is_valid(CPU_INDEX) ){
1222  if( result2->is_valid(CPU_INDEX) )
1223  {
1224  if( mat_->is_valid(CPU_INDEX) ){
1225  OpT::cpu_launch ( result1, result2, *mat_ );
1226  }
1227  else{
1228  incorrectMemory = CPU;
1229  }
1230  }
1231  else{
1232  incorrectMemoryResults = CPU;
1233  }
1234  }
1235  else{
1236  incorrectMemory = NEITHER;
1237  }
1238 
1239  if( incorrectMemoryResults != NONE ){
1240  std::ostringstream msg;
1241  msg << "Nebo error in MatVec Launch:\n"
1242  << "First left-hand side of assignment allocated on ";
1243  switch( incorrectMemoryResults ){
1244  case GPU:
1245  msg << "GPU but other left-hand side is not (completely) accessible on the same GPU";
1246  break;
1247  case CPU:
1248  msg << "CPU but other left-hand side is not (completely) accessible on the same CPU";
1249  break;
1250  case NEITHER:
1251  msg << "unknown device - not on CPU or GPU";
1252  break;
1253  }
1254  msg << "\n";
1255  msg << "\t - " << __FILE__ << " : " << __LINE__;
1256  throw(std::runtime_error(msg.str()));
1257  }
1258  if( incorrectMemory != NONE ){
1259  std::ostringstream msg;
1260  msg << "Nebo error in MatVec Launch:\n"
1261  << "Left-hand side of assignment allocated on ";
1262  switch( incorrectMemory ){
1263  case GPU:
1264  msg << "GPU but right-hand side is not (completely) accessible on the same GPU";
1265  break;
1266  case CPU:
1267  msg << "CPU but right-hand side is not (completely) accessible on the same CPU";
1268  break;
1269  case NEITHER:
1270  msg << "unknown device - not on CPU or GPU";
1271  break;
1272  }
1273  msg << "\n";
1274  msg << "\t - " << __FILE__ << " : " << __LINE__;
1275  throw(std::runtime_error(msg.str()));
1276  }
1277 #else
1278  OpT::cpu_launch ( result1, result2, *mat_ );
1279 #endif /* __CUDACC__ */
1280 }
1281 
1282 template< typename FieldT >
1283 void MatVecOp< DotProduct< FieldT > >::eval ( FieldT* const result ) const
1284 {
1285  assert ( op1_->elements() == op2_->elements() ); // ensure dimensions are consistent
1286  assert ( result->window_with_ghost().glob_npts() == op1_->at(0).window_with_ghost().glob_npts() ); // ensure lengths are consistent
1287  DotProduct<FieldT>::launch ( result, *op1_, *op2_ );
1288 }
1289 
1290 #ifdef __CUDACC__
1291 template< typename FieldT >
1292 void EigenDecomposition< FieldT >::gpu_launch ( ResultT* const result, const RhsT& mat )
1293 {
1294  assert ( result->elements() == mat.elements() ); // ensure dimensions are consistent
1295 
1296  gpu_launch(result, NULL, mat);
1297 }
1298 template< typename FieldT >
1299 void EigenDecomposition< FieldT >::gpu_launch ( ResultT1* const result1, ResultT2* const result2, const RhsT& mat )
1300 {
1301  assert ( result1->elements() == mat.elements() ); // ensure dimensions are consistent
1302  assert ( result2 == NULL || result2->elements() == mat.elements() ); // ensure dimensions are consistent
1303 
1304  gpu_operate(result1, result2, mat);
1305 }
1306 #endif /* __CUDACC__ */
1307 template< typename FieldT >
1308 void EigenDecomposition< FieldT >::cpu_launch ( ResultT* const result, const RhsT& mat )
1309 {
1310  assert ( result->elements() == mat.elements() ); // ensure dimensions are consistent
1311 
1312  cpu_launch(result, NULL, mat);
1313 }
1314 template< typename FieldT >
1315 void EigenDecomposition< FieldT >::cpu_launch ( ResultT1* const result1, ResultT2* const result2, const RhsT& mat )
1316 {
1317  assert ( result1->elements() == mat.elements() ); // ensure dimensions are consistent
1318  assert ( result2 == NULL || result2->elements() == mat.elements() ); // ensure dimensions are consistent
1319 
1320  int t = NTHREADS; // we use boost::bind to call ::operate on all NTHREADS
1321 # ifdef ENABLE_THREADS
1322  Semaphore sem;
1323  while ( t-- ){
1324  ThreadPoolFIFO::self().schedule ( boost::bind ( operate,
1325  result1,
1326  result2,
1327  boost::cref(mat),
1328  t,
1329  boost::ref(sem)
1330  ) );
1331  }
1332  t = NTHREADS; // wait on all threads to finish
1333  while ( t-- ){ sem.wait(); }
1334 # else // !ENABLE_THREADS
1335  operate ( result1,
1336  result2,
1337  mat,
1338  --t
1339  );
1340 # endif // ENABLE_THREADS
1341 }
1342 
1343 #ifdef __CUDACC__
1344 template< typename FieldT >
1345 void LinearSolve<FieldT>::gpu_launch ( ResultT* const result, const LeftT& mat, const RightT& vec )
1346 {
1347  assert ( result->elements() == vec.elements() && mat.elements() == vec.elements() ); // matrix is square
1348 
1349  cublasHandle_t handle;
1350  CudaMatVecUtility::cublas_err_chk(cublasCreate(&handle));
1351 
1352  gpu_operate(handle, result, vec, mat);
1353 
1354  CudaMatVecUtility::cublas_err_chk(cublasDestroy(handle));
1355 }
1356 #endif /* __CUDACC__ */
1357 template< typename FieldT >
1358 void LinearSolve<FieldT>::cpu_launch ( ResultT* const result, const LeftT& mat, const RightT& vec )
1359 {
1360  int t = NTHREADS; // we use boost::bind to call ::operate on all NTHREADS
1361 # ifdef ENABLE_THREADS
1362  Semaphore sem;
1363  while ( t-- ){
1364  ThreadPoolFIFO::self().schedule ( boost::bind ( LinearSolve<FieldT>::operate, result, boost::cref(vec), boost::cref(mat), t, boost::ref(sem) ) );
1365  }
1366  t = NTHREADS; // wait on all threads to finish
1367  while( t-- ){ sem.wait(); }
1368 # else // !ENABLE_THREADS
1369  LinearSolve<FieldT>::operate ( result, vec, mat, --t );
1370 # endif // ENABLE_THREADS
1371 # ifndef NDEBUG
1372  size_t i = result->elements();
1373  while( i-- ){
1374  FieldT& r = result->at(i);
1375  size_t xyz = r.window_with_ghost().glob_npts();
1376  while( xyz-- ){
1377  const typename FieldT::value_type val = r[xyz];
1378  if( std::isnan(val) || std::isinf(val) ){
1379  std::ostringstream msg;
1380  msg << "Nebo error in " << "LinAlgCPU Kernel" <<
1381  ":\n";
1382  msg << " - " << "Matrix " << i << " may be singular.";
1383  msg << "\n";
1384  msg << "\t - " << __FILE__ << " : " << __LINE__;
1385  throw(std::runtime_error(msg.str()));;
1386  }
1387  }
1388  }
1389 # endif /* NDEBUG */
1390 }
1391 
1392 template< typename FieldT >
1393 void DotProduct<FieldT>::operate ( FieldT* const result, const LeftT& vec1, const RightT& vec2 )
1394 {
1395  const int stride = 5;
1396  FieldT& res = *result; // grab the reference for readability
1397  int j = vec1.elements();
1398  if( j-- < stride ){
1399  res <<= vec1 ( j ) * vec2 ( j );
1400  } // assignment before accumulation loop
1401  else{ // in prev line "if( j-- )" changed the value of j
1402  res <<= vec1 ( j ) * vec2 ( j ) + vec1 ( j-1 ) * vec2 ( j-1 ) + vec1 ( j-2 ) * vec2 ( j-2 ) \
1403  + vec1 ( j-3 ) * vec2 ( j-3 ) + vec1 ( j-4 ) * vec2 ( j-4 ); // assignment before accumulation loop
1404  for( j -= ( stride - 1 ); j >= stride; j -= stride ){ // accumulate "stride" elements at a time
1405  res <<= res + vec1 ( j-1 ) * vec2 ( j-1 ) + vec1 ( j-2 ) * vec2 ( j-2 ) + vec1 ( j-3 ) * vec2 ( j-3 ) \
1406  + vec1 ( j-4 ) * vec2 ( j-4 ) + vec1 ( j-5 ) * vec2 ( j-5 ); }
1407  }
1408  while ( j-- ){ // remaining elements less than "stride"
1409  res <<= res + vec1 ( j ) * vec2 ( j );
1410  }
1411 }
1412 
1413 template< typename FieldT >
1414 void VecScalarMult< FieldT >::operate ( ResultT* const result, const LeftT scal, const RightT& vec )
1415 {
1416  assert( result->elements() == vec.elements() );
1417  const int stride = 5;
1418  const int elements = result->elements();
1419  int i;
1420  i = elements;
1421  while( i - stride >= 0 )
1422  {
1423  result->at(i-1) <<= scal * vec(i-1);
1424  result->at(i-2) <<= scal * vec(i-2);
1425  result->at(i-3) <<= scal * vec(i-3);
1426  result->at(i-4) <<= scal * vec(i-4);
1427  result->at(i-5) <<= scal * vec(i-5);
1428  i -= stride;
1429  }
1430  while( i-- ) {
1431  result->at(i) <<= scal * vec(i);
1432  }
1433 }
1434 
1435 template< typename FieldT >
1436 void MatScalarMult< FieldT >::operate ( ResultT* const result, const LeftT scal, const RightT& mat )
1437 {
1438  assert( result->elements() == mat.elements() );
1439  const int stride = 5;
1440  const int elements = result->elements();
1441  int ij;
1442  ij = elements*elements;
1443  while( ij - stride >= 0 )
1444  {
1445  result->at(ij-1) <<= scal * mat(ij-1);
1446  result->at(ij-2) <<= scal * mat(ij-2);
1447  result->at(ij-3) <<= scal * mat(ij-3);
1448  result->at(ij-4) <<= scal * mat(ij-4);
1449  result->at(ij-5) <<= scal * mat(ij-5);
1450  ij -= stride;
1451  }
1452  while( ij-- ) {
1453  result->at(ij) <<= scal * mat(ij);
1454  }
1455 }
1456 
1457 template< typename FieldT >
1458 void VecVecAdd< FieldT >::operate ( ResultT* const result, const LeftT& vec1, const RightT& vec2 )
1459 {
1460  assert( result->elements() == vec1.elements() && vec1.elements() == vec2.elements() );
1461  const int stride = 5;
1462  const int elements = result->elements();
1463  int i;
1464  i = elements;
1465  while( i - stride >= 0 )
1466  {
1467  result->at(i-1) <<= vec1(i-1) + vec2(i-1);
1468  result->at(i-2) <<= vec1(i-2) + vec2(i-2);
1469  result->at(i-3) <<= vec1(i-3) + vec2(i-3);
1470  result->at(i-4) <<= vec1(i-4) + vec2(i-4);
1471  result->at(i-5) <<= vec1(i-5) + vec2(i-5);
1472  i -= stride;
1473  }
1474  while( i-- ) {
1475  result->at(i) <<= vec1(i) + vec2(i);
1476  }
1477 }
1478 
1479 template< typename FieldT >
1480 void MatMatAdd< FieldT >::operate ( ResultT* const result, const LeftT& mat1, const RightT& mat2 )
1481 {
1482  assert( result->elements() == mat1.elements() && mat1.elements() == mat2.elements() );
1483  const int stride = 5;
1484  const int elements = result->elements();
1485  int ij;
1486  ij = elements * elements;
1487  while( ij - stride >= 0 )
1488  {
1489  result->at(ij-1) <<= mat1(ij-1) + mat2(ij-1);
1490  result->at(ij-2) <<= mat1(ij-2) + mat2(ij-2);
1491  result->at(ij-3) <<= mat1(ij-3) + mat2(ij-3);
1492  result->at(ij-4) <<= mat1(ij-4) + mat2(ij-4);
1493  result->at(ij-5) <<= mat1(ij-5) + mat2(ij-5);
1494  ij -= stride;
1495  }
1496  while( ij-- ) {
1497  result->at(ij) <<= mat1(ij) + mat2(ij);
1498  }
1499 }
1500 
1501 template< typename FieldT >
1502 void VecVecSub< FieldT >::operate ( ResultT* const result, const LeftT& vec1, const RightT& vec2 )
1503 {
1504  assert( result->elements() == vec1.elements() && vec1.elements() == vec2.elements() );
1505  const int stride = 5;
1506  const int elements = result->elements();
1507  int i;
1508  i = elements;
1509  while( i - stride >= 0 )
1510  {
1511  result->at(i-1) <<= vec1(i-1) - vec2(i-1);
1512  result->at(i-2) <<= vec1(i-2) - vec2(i-2);
1513  result->at(i-3) <<= vec1(i-3) - vec2(i-3);
1514  result->at(i-4) <<= vec1(i-4) - vec2(i-4);
1515  result->at(i-5) <<= vec1(i-5) - vec2(i-5);
1516  i -= stride;
1517  }
1518  while( i-- ) {
1519  result->at(i) <<= vec1(i) - vec2(i);
1520  }
1521 }
1522 
1523 template< typename FieldT >
1524 void MatMatSub< FieldT >::operate ( ResultT* const result, const LeftT& mat1, const RightT& mat2 )
1525 {
1526  assert( result->elements() == mat1.elements() && mat1.elements() == mat2.elements() );
1527  const int stride = 5;
1528  const int elements = result->elements();
1529  int ij;
1530  ij = elements * elements;
1531  while( ij - stride >= 0 )
1532  {
1533  result->at(ij-1) <<= mat1(ij-1) - mat2(ij-1);
1534  result->at(ij-2) <<= mat1(ij-2) - mat2(ij-2);
1535  result->at(ij-3) <<= mat1(ij-3) - mat2(ij-3);
1536  result->at(ij-4) <<= mat1(ij-4) - mat2(ij-4);
1537  result->at(ij-5) <<= mat1(ij-5) - mat2(ij-5);
1538  ij -= stride;
1539  }
1540  while( ij-- ) {
1541  result->at(ij) <<= mat1(ij) - mat2(ij);
1542  }
1543 }
1544 
1545 template< typename FieldT >
1546 void MatVecMult< FieldT >::operate ( ResultT* const result, const LeftT& mat, const RightT& vec )
1547 {
1548  const int stride = 5;
1549  const int elements = result->elements();
1550  int i, j, ij;
1551  i = elements;
1552  ij = elements * elements;
1553  while ( i-- ){
1554  j = elements;
1555  FieldT& res = result->at(i); // grab a reference for readability
1556  if( j < stride ){
1557  res <<= mat ( --ij ) * vec ( --j );
1558  }
1559  else{
1560  res <<= mat ( ij-1 ) * vec ( j-1 ) + mat ( ij-2 ) * vec ( j-2 ) + mat ( ij-3 ) * vec ( j-3 ) \
1561  + mat ( ij-4 ) * vec ( j-4 ) + mat ( ij-5 ) * vec ( j-5 );
1562  ij -= stride;
1563  for( j -= stride; j >= stride; j -= stride, ij -= stride ){
1564  res <<= res + mat ( ij-1 ) * vec ( j-1 ) + mat ( ij-2 ) * vec ( j-2 ) + mat ( ij-3 ) * vec ( j-3 ) \
1565  + mat ( ij-4 ) * vec ( j-4 ) + mat ( ij-5 ) * vec ( j-5 ); }
1566  }
1567  while ( j-- ){
1568  res <<= res + mat ( --ij ) * vec ( j );
1569  }
1570  }
1571 }
1572 
1573 template< typename FieldT >
1574 void MatMatMult< FieldT >::operate ( ResultT* const result, const LeftT& mat1, const RightT& mat2 )
1575 {
1576  assert( result->elements() == mat1.elements() && mat1.elements() == mat2.elements() );
1577  const int stride = 5;
1578  const int elements = result->elements();
1579  int i, j, ij, columns;
1580  columns = elements;
1581  while( columns-- ){
1582  i = elements;
1583  ij = elements * elements;
1584  while ( i-- ){
1585  j = elements;
1586  FieldT& res = result->at(columns + i*elements); // grab a reference for readability
1587  if( j < stride ){
1588  res <<= mat1 ( --ij ) * mat2 ( columns + (--j)*elements );
1589  }
1590  else{
1591  res <<= mat1 ( ij-1 ) * mat2 ( columns+(j-1)*elements ) + mat1 ( ij-2 ) * mat2 ( columns+(j-2)*elements ) + mat1 ( ij-3 ) * mat2 ( columns+(j-3)*elements ) \
1592  + mat1 ( ij-4 ) * mat2 ( columns+(j-4)*elements ) + mat1 ( ij-5 ) * mat2 ( columns+(j-5)*elements );
1593  ij -= stride;
1594  for( j -= stride; j >= stride; j -= stride, ij -= stride ){
1595  res <<= res + mat1 ( ij-1 ) * mat2 ( columns+(j-1)*elements ) + mat1 ( ij-2 ) * mat2 ( columns+(j-2)*elements ) + mat1 ( ij-3 ) * mat2 ( columns+(j-3)*elements ) \
1596  + mat1 ( ij-4 ) * mat2 ( columns+(j-4)*elements ) + mat1 ( ij-5 ) * mat2 ( columns+(j-5)*elements ); }
1597  }
1598  while ( j-- ){
1599  res <<= res + mat1 ( --ij ) * mat2 ( columns+j*elements );
1600  }
1601  }
1602  }
1603 }
1604 
1605 template< typename FieldT >
1606 void MatScalDiagOffset< FieldT >::operate ( ResultT* const result, const LeftT scal, const RightT& mat )
1607 {
1608  assert( result == &mat );
1609  const int elements = result->elements();
1610  int diag;
1611  diag = elements*elements;
1612  while( --diag >= 0 )
1613  {
1614  result->at(diag) <<= scal + mat(diag);
1615  diag -= elements;
1616  }
1617 }
1618 
1619 
1620 template< typename FieldT >
1621 void EigenDecomposition< FieldT >::operate ( ResultT1* const result1,
1622  ResultT2* const result2,
1623  const RhsT& mat,
1624  const int thread
1625 # ifdef ENABLE_THREADS
1626  , Semaphore & sem
1627 # endif // ENABLE_THREADS
1628  )
1629 {
1630  typedef typename FieldT::value_type ValT;
1631  const int dim = mat.elements();
1632  const ValT* mVals[dim][dim];
1633  int i, j;
1634  i = dim;
1635  while( i-- ){
1636  j = dim;
1637  while( j-- ){
1638  mVals[i][j] = mat ( i, j ).field_values();
1639  }
1640  }
1641 # ifdef USE_EIGEN
1642  Eigen::MatrixXd mPt = Eigen::MatrixXd ( dim, dim );
1643 # endif // USE_EIGEN
1644  int xyz = (mat ( 0 ).window_with_ghost().glob_npts()-1) - thread;
1645  for( ; xyz >= 0; xyz -= NTHREADS ){
1646  i = dim;
1647  while( i-- ){
1648  j = dim;
1649  while( j-- ){
1650  mPt ( i, j ) = mVals[i][j][xyz];
1651  }
1652  }
1653 # ifdef USE_EIGEN
1654  Eigen::EigenSolver<Eigen::MatrixXd> solver ( mPt );
1655  const Eigen::EigenSolver<Eigen::MatrixXd>::EigenvalueType eigs = solver.eigenvalues();
1656  if(result2 == NULL)
1657  {
1658  i = dim;
1659  while( i-- ){
1660  result1->at ( i ) [xyz] = eigs.coeffRef ( i ).real();
1661  }
1662  }
1663  else
1664  {
1665  i = dim;
1666  while( i-- ){
1667  result1->at ( i ) [xyz] = eigs.coeffRef ( i ).real();
1668  result2->at ( i ) [xyz] = eigs.coeffRef ( i ).imag();
1669  }
1670  }
1671 # endif // USE_EIGEN
1672  }
1673 # ifdef ENABLE_THREADS
1674  sem.post();
1675 # endif // ENABLE_THREADS
1676 }
1677 #ifdef __CUDACC__
1678 template< typename FieldT >
1679 void EigenDecomposition< FieldT >::gpu_operate ( ResultT1* const result1,
1680  ResultT2* const result2,
1681  const RhsT& mat)
1682 {
1683  using namespace CudaMatVecUtility;
1684  assert( result1->active_device_index() == mat.active_device_index() );
1685  assert( result2 == NULL || result2->active_device_index() == mat.active_device_index() );
1686 
1687  typedef typename FieldT::value_type ValT;
1688 
1689  const int dim = result1->elements();
1690  const int activeIndex = mat.active_device_index();
1691 
1692  const bool returnComplex = result2 != NULL;
1693 
1694  ema::cuda::CUDADeviceInterface& CDI = ema::cuda::CUDADeviceInterface::self();
1695 
1696  const int matDim = dim*dim;
1697 
1698  ValT* h_lAddrs1[dim];
1699  ValT** h_lAddrs2;
1700  const ValT* h_mAddrs[matDim];
1701  int i, j;
1702  i = dim;
1703  if(returnComplex)
1704  {
1705  h_lAddrs2 = new ValT*[dim];
1706  while ( i-- )
1707  { h_lAddrs1[i] = result1->at ( i ).field_values(activeIndex);
1708  h_lAddrs2[i] = result2->at ( i ).field_values(activeIndex);
1709  j = dim;
1710  while ( j-- )
1711  { h_mAddrs[i*dim + j] = mat ( i, j ).field_values(activeIndex); }
1712  }
1713  }
1714  else
1715  {
1716  while ( i-- )
1717  { h_lAddrs1[i] = result1->at ( i ).field_values(activeIndex);
1718  j = dim;
1719  while ( j-- )
1720  { h_mAddrs[i*dim + j] = mat ( i, j ).field_values(activeIndex); }
1721  }
1722  }
1723 
1724  const int xyz = result1->at(0).window_with_ghost().glob_npts();
1725 
1726  //Copy addresses to GPU in native interleaved order
1727  PoolAutoPtr<ValT*> d_mAddrs( Pool<ValT*>::get( activeIndex, matDim ), activeIndex);
1728  PoolAutoPtr<ValT*> d_lAddrs1(Pool<ValT*>::get( activeIndex, dim ) , activeIndex);
1729  PoolAutoPtr<ValT*> d_lAddrs2(returnComplex ? Pool<ValT*>::get( activeIndex, dim ) : NULL,
1730  activeIndex);
1731  {
1732  CDI.memcpy_to(d_mAddrs, h_mAddrs, matDim*sizeof(ValT*), activeIndex, result1->at(0).get_stream());
1733  CDI.memcpy_to(d_lAddrs1, h_lAddrs1, dim*sizeof(ValT*), activeIndex, result1->at(0).get_stream());
1734 
1735  if(returnComplex)
1736  {
1737  //No race condition here as far as deleting memory since this behaves
1738  //synchronously when using pageable memory.
1739  CDI.memcpy_to(d_lAddrs2, h_lAddrs2, dim*sizeof(ValT*), activeIndex, result1->at(0).get_stream());
1740  delete[] h_lAddrs2;
1741  }
1742 
1743  #ifndef NDEBUG
1744  cudaError err;
1745 
1746  if( cudaSuccess != (err = cudaDeviceSynchronize()) ){
1747  std::ostringstream msg;
1748  msg << "Nebo error in LinAlgCUDA Kernel - after copy:\n\t"
1749  << cudaGetErrorString(err)
1750  << "\n\t" << __FILE__ << " : " << __LINE__;
1751  throw(std::runtime_error(msg.str()));;
1752  }
1753  #endif
1754  /* NDEBUG */;
1755  }
1756 
1757 
1758  //Make call to eigen decomp in cuda mat vec utility
1759  {
1760  PoolAutoPtr<int> d_status(Pool<int>::get( activeIndex, xyz ), activeIndex);
1761  MatVecOptions_e option = returnComplex ? MatVecOptions::NONE : MatVecOptions::DISREGARD_IMAGINARY;
1762 
1763  int computeBlockDim = 128;
1764  int computeGridDim = xyz / computeBlockDim + ((xyz % computeBlockDim) > 0 ? 1 : 0);
1765 
1766  sync_field_streams(mat, matDim, result1->at(0), false);
1767  sync_field_streams(*result1, dim, result1->at(0), true);
1768  if(returnComplex)
1769  sync_field_streams(*result2, dim, result1->at(0), true);
1770 
1771  CudaMatVecUtility::balanc<<<computeGridDim, computeBlockDim, activeIndex, result1->at(0).get_stream()>>>(d_mAddrs.get(), dim, xyz);
1772  CudaMatVecUtility::elmhes<<<computeGridDim, computeBlockDim, activeIndex, result1->at(0).get_stream()>>>(d_mAddrs.get(), dim, xyz);
1773  CudaMatVecUtility::hqr <<<computeGridDim, computeBlockDim, activeIndex, result1->at(0).get_stream()>>>(d_mAddrs.get(), dim, xyz, d_lAddrs1.get(), d_lAddrs2.get(), d_status, option);
1774 
1775  #ifndef NDEBUG
1776  int* h_status = new int[xyz];
1777  CDI.memcpy_from(h_status, d_status, xyz*sizeof(int), activeIndex, result1->at(0).get_stream());
1778  for( int i = 0; i < xyz; i++ ){
1779  if( h_status[i] != MatVecStatus::SUCCESS ){
1780  std::ostringstream msg;
1781  if( h_status[i] == MatVecStatus::IMAGINARY_NUMBER_FOUND ){
1782  msg << "Nebo error in " << "LinAlgCPU Kernel" <<
1783  ":\n";
1784  msg << " - " << "Matrix " << i << " has a complex eigen value.";
1785  msg << "\n";
1786  msg << "\t - " << __FILE__ << " : " << __LINE__;
1787  }
1788  else{
1789  msg << "Nebo error in LinAlgCUDA Kernel - after eigen decomposition:\n\t"
1790  << "Matrix " << i << " did not succeed.\n\t"
1791  << "MatVec Status Code: " << h_status[i] << ".\n\t"
1792  << __FILE__ << " : " << __LINE__;
1793  }
1794 
1795  delete[] h_status;
1796  throw(std::runtime_error(msg.str()));;
1797  }
1798  }
1799  delete[] h_status;
1800  #endif
1801  }
1802 
1803  sync_field_streams(result1->at(0), mat, matDim, false);
1804  sync_field_streams(result1->at(0), *result1, dim, true);
1805  if(returnComplex)
1806  sync_field_streams(result1->at(0), *result2, dim, true);
1807 
1808 }
1809 #endif /* __CUDACC__ */
1810 
1811 template< typename FieldT >
1812 void LinearSolve<FieldT>::operate ( ResultT* const result,
1813  const RightT& vec,
1814  const LeftT& mat,
1815  const int thread
1816 # ifdef ENABLE_THREADS
1817  , Semaphore & sem )
1818 # else
1819  )
1820 # endif // ENABLE_THREADS
1821 {
1822  const int dim = result->elements();
1823  ValT* lVals[dim];
1824  const ValT* vVals[dim];
1825  const ValT* mVals[dim][dim];
1826  int i, j;
1827  i = dim;
1828  while( i-- ){
1829  lVals[i] = result->at ( i ).field_values();
1830  vVals[i] = vec ( i ).field_values();
1831  j = dim;
1832  while( j-- ){
1833  mVals[i][j] = mat ( i, j ).field_values();
1834  }
1835  }
1836 # ifdef USE_EIGEN
1837  Eigen::MatrixXd mPt = Eigen::MatrixXd ( dim, dim );
1838  Eigen::VectorXd vPt = Eigen::MatrixXd ( dim, 1 );
1839  Eigen::VectorXd lPt = Eigen::MatrixXd ( dim, 1 );
1840 # else // !USE_EIGEN
1841  namespace ublas = boost::numeric::ublas;
1842  ublas::matrix<ValT> mPt ( dim, dim );
1843  ublas::vector<ValT> vPt ( dim );
1844  ublas::permutation_matrix<size_t> pm ( mPt.size1() );
1845 # endif // USE_EIGEN
1846  int xyz = (result->at ( 0 ).window_with_ghost().glob_npts()-1) - thread;
1847  for( ; xyz >= 0; xyz -= NTHREADS ){
1848  i = dim;
1849  while( i-- ){
1850  vPt ( i ) = vVals[i][xyz];
1851  j = dim;
1852  while( j-- ){
1853  mPt ( i, j ) = mVals[i][j][xyz];
1854  }
1855  }
1856 # ifdef USE_EIGEN
1857  lPt = mPt.lu().solve ( vPt );
1858  i = dim;
1859  while( i-- ){
1860  lVals[i][xyz] = lPt ( i );
1861  }
1862 # else // !USE_EIGEN
1863  ublas::lu_factorize ( mPt, pm );
1864  ublas::lu_substitute ( mPt, pm, vPt );
1865  i = dim;
1866  while( i-- ){
1867  lVals[i][xyz] = vPt ( i );
1868  }
1869 # endif // USE_EIGEN
1870  }
1871 # ifdef ENABLE_THREADS
1872  sem.post();
1873 # endif // ENABLE_THREADS
1874 }
1875 #ifdef __CUDACC__
1876 template<typename FieldT>
1877 void LinearSolve<FieldT>::gpu_operate ( cublasHandle_t & handle,
1878  ResultT* const result,
1879  const RightT& vec,
1880  const LeftT& mat )
1881 {
1882  using namespace CudaMatVecUtility;
1883  assert( result->active_device_index() == vec.active_device_index() && vec.active_device_index() == mat.active_device_index() );
1884 
1886  const int dim = result->elements();
1887  const int activeIndex = vec.active_device_index();
1888 
1889  cublasSetStream(handle, result->at(0).get_stream());
1890 
1891  ema::cuda::CUDADeviceInterface& CDI = ema::cuda::CUDADeviceInterface::self();
1892 
1893  const int matDim = dim*dim;
1894 
1895  ValT* h_lAddrs[dim];
1896  const ValT* h_vAddrs[dim];
1897  const ValT* h_mAddrs[matDim];
1898  int i, j;
1899  i = dim;
1900  while( i-- ){
1901  h_lAddrs[i] = result->at ( i ).field_values(activeIndex);
1902  h_vAddrs[i] = vec ( i ).field_values(activeIndex);
1903  j = dim;
1904  while( j-- ){
1905  h_mAddrs[i*dim + j] = mat ( i, j ).field_values(activeIndex);
1906  }
1907  }
1908 
1909  const int xyz = result->at(0).window_with_ghost().glob_npts();
1910  PoolAutoPtr<ValT> d_vNormal(Pool<ValT>::get( activeIndex, xyz*dim ), activeIndex);
1911  PoolAutoPtr<ValT> d_mNormal(Pool<ValT>::get( activeIndex, xyz*matDim ), activeIndex);
1912 
1913  int blockDim = 16;
1914  int xGDim = xyz / blockDim + ((xyz % blockDim) > 0 ? 1 : 0);
1915  int yGDim = dim / blockDim + ((dim % blockDim) > 0 ? 1 : 0);
1916  int yGDim2 = matDim / blockDim + ((matDim % blockDim) > 0 ? 1 : 0);
1917  dim3 dimBlock (blockDim, blockDim);
1918  dim3 dimGrid (xGDim, yGDim);
1919  dim3 dimGridReverse (yGDim, xGDim);
1920  dim3 dimGrid2 (xGDim, yGDim2);
1921 
1922  //Copy matrix values to GPU in Column Major order
1923  PoolAutoPtr<ValT*> d_vNaddrs(Pool<ValT*>::get( activeIndex, xyz ), activeIndex);
1924  PoolAutoPtr<ValT*> d_mNaddrs(Pool<ValT*>::get( activeIndex, xyz ), activeIndex);
1925  PoolAutoPtr<ValT*> d_lAddrs (Pool<ValT*>::get( activeIndex, dim ), activeIndex);
1926  {
1927  ValT* h_Naddrs[xyz];
1928  //No race condition here as far as deleting memory since this behaves
1929  //synchronously when using pageable memory.
1930  PoolAutoPtr<ValT*> d_Addrs(Pool<ValT*>::get( activeIndex, matDim ), activeIndex);
1931  for( int i = 0; i < xyz; i++ ){
1932  h_Naddrs[i] = d_mNormal + i*matDim;
1933  }
1934  sync_field_streams(mat, matDim, result->at(0), false);
1935  CDI.memcpy_to(d_mNaddrs, h_Naddrs, xyz*sizeof(ValT*), activeIndex, result->at(0).get_stream());
1936  CDI.memcpy_to(d_Addrs, h_mAddrs, matDim*sizeof(ValT*), activeIndex, result->at(0).get_stream());
1937  mat_vec_copy_column_major<<<dimGrid2,dimBlock, activeIndex, result->at(0).get_stream()>>>(d_mNaddrs.get(), d_Addrs.get(), dim, matDim, xyz);
1938  #ifndef NDEBUG
1939  cudaError err;
1940 
1941  if( cudaSuccess != (err = cudaDeviceSynchronize()) ){
1942  std::ostringstream msg;
1943  msg << "Nebo error in LinAlgCUDA Kernel - after matrix copy:\n\t"
1944  << cudaGetErrorString(err)
1945  << "\n\t" << __FILE__ << " : " << __LINE__;
1946  cublasSetStream(handle, NULL);
1947  throw(std::runtime_error(msg.str()));;
1948  }
1949  #endif
1950  /* NDEBUG */;
1951  }
1952 
1953 
1954  {
1955  PoolAutoPtr<int> d_infoArray(Pool<int>::get( activeIndex, xyz ), activeIndex);
1956  int* h_infoArray = new int[xyz];
1957  int h_info = 0;
1958 
1959  //LU factorize
1960  int* d_pivotArray = NULL;
1961  cublas_err_chk(blasDispatch::cublasgetrfBatched(handle,
1962  dim,
1963  d_mNaddrs,
1964  dim,
1965  d_pivotArray,
1966  d_infoArray,
1967  xyz));
1968 
1969  #ifndef NDEBUG
1970  CDI.memcpy_from(h_infoArray, d_infoArray, xyz*sizeof(int), activeIndex, result->at(0).get_stream());
1971  for( int i = 0; i < xyz; i++ ){
1972  if( h_infoArray[i] != 0 ){
1973  std::ostringstream msg;
1974  msg << "Nebo error in LinAlgCUDA Kernel - after LU decomposition:\n\t"
1975  << "Matrix " << i << " may be singular.\n\t"
1976  << __FILE__ << " : " << __LINE__;
1977  delete[] h_infoArray;
1978  cublasSetStream(handle, NULL);
1979  throw(std::runtime_error(msg.str()));;
1980  }
1981  }
1982  #endif
1983 
1984  //Copy vector values to GPU in Column Major order
1985  {
1986  ValT* h_Naddrs[xyz];
1987  PoolAutoPtr<ValT*> d_Addrs(Pool<ValT*>::get( activeIndex, matDim ), activeIndex);
1988  for( int i = 0; i < xyz; i++ ){
1989  h_Naddrs[i] = d_vNormal + i*dim;
1990  }
1991  sync_field_streams(vec, dim, result->at(0), false);
1992  CDI.memcpy_to(d_vNaddrs, h_Naddrs, xyz*sizeof(ValT*), activeIndex, result->at(0).get_stream());
1993  CDI.memcpy_to(d_Addrs, h_vAddrs, dim*sizeof(ValT*), activeIndex, result->at(0).get_stream());
1994  mat_vec_copy_column_major<<<dimGrid,dimBlock, activeIndex, result->at(0).get_stream()>>>(d_vNaddrs.get(), d_Addrs.get(), 1, dim, xyz);
1995  //Copy plain l addresses
1996  CDI.memcpy_to(d_lAddrs, h_lAddrs, dim*sizeof(ValT*), activeIndex, result->at(0).get_stream());
1997  #ifndef NDEBUG
1998  cudaError err;
1999 
2000  if( cudaSuccess != (err = cudaDeviceSynchronize()) ){
2001  std::ostringstream msg;
2002  msg << "Nebo error in LinAlgCUDA Kernel - after vector copy:\n\t"
2003  << cudaGetErrorString(err)
2004  << "\n\t" << __FILE__ << " : " << __LINE__;
2005  delete[] h_infoArray;
2006  cublasSetStream(handle, NULL);
2007  throw(std::runtime_error(msg.str()));;
2008  }
2009  #endif
2010  /* NDEBUG */;
2011  }
2012 
2013  cublas_err_chk(blasDispatch::cublasgetrsBatched(handle,
2014  CUBLAS_OP_N,
2015  dim,
2016  1,
2017  const_cast<const ValT**>(d_mNaddrs.get()),
2018  dim,
2019  d_pivotArray,
2020  d_vNaddrs,
2021  dim,
2022  &h_info,
2023  xyz));
2024 
2025  #ifndef NDEBUG
2026  if( h_info != 0 ){
2027  std::ostringstream msg;
2028  msg << "Nebo error in LinAlgCUDA Kernel - after matrix solve:\n\t"
2029  << "Matrix " << -h_info << " may be singular.\n\t"
2030  << __FILE__ << " : " << __LINE__;
2031  delete[] h_infoArray;
2032  cublasSetStream(handle, NULL);
2033  throw(std::runtime_error(msg.str()));;
2034  }
2035  #endif
2036  delete[] h_infoArray;
2037  }
2038 
2039  mat_vec_copy_column_major<<<dimGridReverse,dimBlock, activeIndex, result->at(0).get_stream()>>>(d_lAddrs.get(), d_vNaddrs.get(), 1, xyz, dim);
2040  sync_field_streams(result->at(0), vec, dim, false);
2041  sync_field_streams(result->at(0), mat, matDim, false);
2042  sync_field_streams(result->at(0), *result, dim, true);
2043 
2044  cublasSetStream(handle, NULL);
2045 
2046 }
2047 #endif /* __CUDACC__ */
2048 
2049 } // namespace SpatialOps
2050 
2051 #endif // SpatialOps_MatVecOps_h
FieldVector< FieldT > RightT
right operand is a vector
Definition: MatVecOps.h:438
MatScalarOp(const typename BinaryMatScalarOp< OpT >::LeftT &left, const typename BinaryMatScalarOp< OpT >::RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:224
Sub a vector to a vector on the right.
Definition: MatVecOps.h:637
Operator to perform eigen decomposition.
Definition: MatVecOps.h:809
FieldVector< FieldT > ResultT
result is a vector
Definition: MatVecOps.h:508
MatUnaryOp(const RhsT &mat)
Construct an object from a rhs consisting of a single matrix.
Definition: MatVecOps.h:342
void eval(ResultT *const result) const
obtain a pointer to the lhs and call the operator::launch method
Definition: MatVecOps.h:1150
fwd declaration as this is a specialized T parameter
Definition: MatVecOps.h:75
FieldMatrix< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:660
FieldMatrix< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:572
BinaryMatOp(const LeftT &left, const RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:105
static void operate(ResultT *const result, const LeftT &vec1, const RightT &vec2)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1458
FieldVector< FieldT > RightT
right operand is a vector
Definition: MatVecOps.h:506
FieldMatrix< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:662
OpT::ResultT2 ResultT2
type of the second lhs
Definition: MatVecOps.h:377
Multiply a matrix by a scalar.
Definition: MatVecOps.h:527
FieldMatrix< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:682
OpT::LeftT LeftT
the left operand on the right hand side
Definition: MatVecOps.h:140
BinaryMatScalarOp(const LeftT &left, const RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:152
static void launch(FieldT *const result, const LeftT &vec1, const RightT &vec2)
immediately call operate, Nebo handles differing backends
Definition: MatVecOps.h:449
static void operate(ResultT *const result, const LeftT &mat1, const RightT &mat2)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1480
Performs the dot product on two vectors.
Definition: MatVecOps.h:73
static void operate(ResultT *const result, const LeftT scal, const RightT &mat)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1436
Find the solution to result = inv(mat) * rhsVec with LU decomposition.
Definition: MatVecOps.h:706
fwd declaration as this is a specialized T parameter
Definition: MatVecOps.h:90
VecVecOp(const typename BinaryMatOp< OpT >::LeftT &left, const typename BinaryMatOp< OpT >::RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:253
FieldVector< FieldT > ResultT2
complex value result is a vector
Definition: MatVecOps.h:815
FieldVector< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:594
FieldVector< FieldT > RightT
right operand is a vector
Definition: MatVecOps.h:709
T * get()
return the raw pointer to the memory managed
Definition: MemoryPool.h:136
FieldVector< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:596
OpT::ResultT ResultT
the result on the left hand side
Definition: MatVecOps.h:95
FieldMatrix< FieldT > LeftT
left operand is a matrix
Definition: MatVecOps.h:710
FieldMatrix< FieldT > RhsT
right hand side is a matrix
Definition: MatVecOps.h:812
OpT::ResultT ResultT
type of the lhs
Definition: MatVecOps.h:333
OpT::LeftT LeftT
the left operand on the right hand side
Definition: MatVecOps.h:93
static void operate(ResultT *const result, const LeftT &mat, const RightT &vec)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1546
FieldT::value_type ValT
underlying data type, double by default
Definition: MatVecOps.h:739
Wraps RHS of a unary matrix operation.
Definition: MatVecOps.h:329
static void cpu_launch(ResultT *const result, const RhsT &mat)
receive pointers to the operand and result and call operate
Definition: MatVecOps.h:1308
Operation base class defined on matrices and/or vectors.
Definition: MatVecOps.h:468
void eval(ResultT *const result) const
obtain a pointer to the lhs and call the operator::launch method
Definition: MatVecOps.h:1040
BinaryRetMatUnaryOp(const RhsT &mat)
Construct an object from a rhs consisting of a single matrix.
Definition: MatVecOps.h:386
static MatVecOp< DotProduct< FieldT > > dot_product_proto(const FieldVector< FieldT > &vec1, const FieldVector< FieldT > &vec2)
calculate the dot product of two vectors
Definition: MatVecOps.h:1036
FieldMatrix< FieldT > LeftT
left operand is matrix
Definition: MatVecOps.h:573
takes ownership over memory allocated from the pool and puts the memory back into the pool when it is...
Definition: MemoryPool.h:121
static void operate(ResultT *const result, const LeftT &mat1, const RightT &mat2)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1524
Add a scalar value to the diagonal elements of a matrix.
Definition: MatVecOps.h:681
Wraps RHS of a Vector - Scalar or Scalar - Vector operation.
Definition: MatVecOps.h:184
FieldT::value_type LeftT
left operand is scalar
Definition: MatVecOps.h:683
Wraps RHS of a binary operator with a scalar.
Definition: MatVecOps.h:137
Wraps RHS of a Matrix - Scalar or Scalar - Matrix operation.
Definition: MatVecOps.h:213
FieldVector< FieldT > LeftT
left operand is a vector
Definition: MatVecOps.h:439
static void cpu_launch(ResultT *const result, const LeftT &left, const RightT &right)
Called to initiate evaluation.
Definition: MatVecOps.h:479
MatVecOp(const LeftT &left, const RightT &right)
Definition: MatVecOps.h:418
FieldMatrix< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:528
Wraps RHS of a Vector - Vector operation.
Definition: MatVecOps.h:242
void post()
release a resource
Definition: Semaphore.h:29
static void operate(ResultT *const result, const LeftT &vec1, const RightT &vec2)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1502
FieldVector< FieldT > ResultT
result is a vector
Definition: MatVecOps.h:711
Wraps RHS of a unary matrix operation.
Definition: MatVecOps.h:372
static void operate(ResultT *const result, const LeftT scal, const RightT &mat)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1606
VecScalarOp(const typename BinaryMatScalarOp< OpT >::LeftT &left, const typename BinaryMatScalarOp< OpT >::RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:195
FieldVector< FieldT > LeftT
left operand is matrix
Definition: MatVecOps.h:639
FieldT::value_type LeftT
left operand is scalar
Definition: MatVecOps.h:529
FieldMatrix< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:616
FieldT::value_type LeftT
left operand is scalar
Definition: MatVecOps.h:507
static void cpu_launch(ResultT *const result, const LeftT &mat, const RightT &vec)
this method is the interface to receive needed pointers
Definition: MatVecOps.h:1358
Add a vector to a vector on the right.
Definition: MatVecOps.h:593
FieldVector< FieldT > ResultT
result is a vector
Definition: MatVecOps.h:552
FieldMatrix< FieldT > LeftT
left operand is matrix
Definition: MatVecOps.h:661
static void operate(ResultT *const result, const LeftT scal, const RightT &vec)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1414
static void operate(FieldT *const result, const LeftT &vec1, const RightT &vec2)
perform the operations using Nebo
Definition: MatVecOps.h:1393
Sub a matix to a matrix on the right.
Definition: MatVecOps.h:659
FieldMatrix< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:684
OpT::ResultT1 ResultT1
type of the first lhs
Definition: MatVecOps.h:376
OpT::RightT RightT
the right operand on the right hand side
Definition: MatVecOps.h:94
void eval(ResultT1 *const result1, ResultT2 *const result2) const
obtain a pointer to the multiple lhs and call the operator::launch method
Definition: MatVecOps.h:1201
Provide resource management for multithreaded situations.
Definition: Semaphore.h:20
OpT::ResultT ResultT
the result on the left hand side
Definition: MatVecOps.h:142
OpT::RhsT RhsT
type of the rhs
Definition: MatVecOps.h:332
FieldMatrix< FieldT > LeftT
left operand is matrix
Definition: MatVecOps.h:551
Multiply a matix by a matrix on the right.
Definition: MatVecOps.h:571
FieldMatrix< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:530
Add a matix to a matrix on the right.
Definition: MatVecOps.h:615
FieldMatrix< FieldT > LeftT
left operand is matrix
Definition: MatVecOps.h:617
FieldMatrix< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:574
FieldVector< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:640
Wraps RHS of a Matrix - Vector operation.
Definition: MatVecOps.h:271
FieldT ResultT
result is a field
Definition: MatVecOps.h:440
static void operate(ResultT *const result, const LeftT &mat1, const RightT &mat2)
Evaluate this operation using Nebo as the backend.
Definition: MatVecOps.h:1574
static ThreadPoolFIFO & self()
obtain the singleton instance of ThreadPoolFIFO
Definition: ThreadPool.cpp:194
FieldVector< FieldT > RightT
right operand is a vector
Definition: MatVecOps.h:550
Multiply a scalar by a vector.
Definition: MatVecOps.h:505
MatMatOp(const typename BinaryMatOp< OpT >::LeftT &left, const typename BinaryMatOp< OpT >::RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:312
OpT::RhsT RhsT
type of the rhs
Definition: MatVecOps.h:375
FieldVector< FieldT > ResultT
real value result is a vector
Definition: MatVecOps.h:813
void eval(ResultT *const result) const
obtain a pointer to the lhs and call the operator::launch method
Definition: MatVecOps.h:1095
Defines objects for containing and operating on matrices of fields.
FieldMatrix< FieldT > ResultT
result is a matrix
Definition: MatVecOps.h:618
void wait()
Wait until a resource is available (a call to post is made).
Definition: Semaphore.h:38
OpT::RightT RightT
the right operand on the right hand side
Definition: MatVecOps.h:141
MatVecOp(const typename BinaryMatOp< OpT >::LeftT &left, const typename BinaryMatOp< OpT >::RightT &right)
Construct an object from a rhs.
Definition: MatVecOps.h:283
FieldVector< FieldT > RightT
right operand is a matrix
Definition: MatVecOps.h:638
FieldVector< FieldT > LeftT
left operand is matrix
Definition: MatVecOps.h:595
Wraps RHS of a Matrix - Matrix operation.
Definition: MatVecOps.h:301