NRVec: constructor with GPU location option, otimes2vec

2024-05-17 16:17:43 +02:00
parent 0b91e88dca
commit ea2b494abb
2 changed files with 88 additions and 2 deletions
--- a/vec.h
+++ b/vec.h
@@ -130,7 +130,8 @@ public:
 	};
 	
 	//! inlined constructor creating vector of given size filled with prescribed value 
-	inline NRVec(const T &a, const int n);
+	//inline NRVec(const T &a, const int n);
+	inline NRVec(const T &a, const int n, const GPUID loc = undefined);

 	//! inlined constructor creating vector froman array
 	template<int SIZE> inline NRVec(const T (&a)[SIZE]);
@@ -356,9 +357,13 @@ public:
 	
 	//! compute the outer product of two vectors 
 	const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
+
 	//! opeartor for outer product computation
 	inline const NRMat<T> operator|(const NRVec<T> &rhs) const { return otimes(rhs,true); };

+        //! compute the outer product of two vectors, result interpreted as a vector
+        const NRVec otimes2vec(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
+
 	//! compute the sum of the vector elements 
 	inline const T sum() const {
 		T sum(v[0]);
@@ -678,6 +683,8 @@ std::istream & operator>>(std::istream &s, NRVec<T> &x) {
 * @param[in] a value to be assigned to all vector elements
 * @param[in] n required vector size
 ******************************************************************************/
+
+/* replaced by the one with optional GPUID
 template <typename T>
 inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
 	*count = 1;
@@ -699,6 +706,30 @@ inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
 	}
 #endif
 }
+*/
+
+template <typename T>
+inline NRVec<T>::NRVec(const T& a, const int n,  const GPUID loc): nn(n), count(new int) {
+	*count = 1;
+#ifdef CUDALA
+	location = (loc==undefined?DEFAULT_LOC:loc);
+	if(location == cpu){
+#endif
+		v = new T[n];
+                if(!LA_traits<T>::is_plaindata() || a != (T)0){
+                        for(register int i=0; i<n; i++) v[i] = a;
+                }else{
+                        memset(v, 0, nn*sizeof(T));
+               }
+#ifdef CUDALA
+	}else{
+		if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
+		v = (T*) gpualloc(n*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
+		smart_gpu_set(n, a, v);
+	}
+#endif
+}

 /***************************************************************************//**
 * inline constructor creating vector from an array