diff --git a/mat.cc b/mat.cc
index a5f315c..976e7e7 100644
--- a/mat.cc
+++ b/mat.cc
@@ -550,6 +550,7 @@ template <typename T>
 const NRMat<T> NRMat<T>::operator&(const NRMat<T> &b) const {
 	SAME_LOC(*this, b);
 	NRMat<T> result((T)0, nn + b.nn, mm + b.mm, getlocation());
+	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 #ifdef CUDALA
 	if(location == cpu){
 #endif
@@ -738,6 +739,7 @@ const NRMat<T> NRMat<T>::submatrix(const int fromrow, const int torow, const int
 	const int n = torow - fromrow + 1;
 	const int m = tocol - fromcol + 1;
 	NRMat<T> r(n, m, getlocation());
+	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 
 #ifdef CUDALA
 	if(location == cpu){
@@ -775,6 +777,7 @@ void NRMat<T>::storesubmatrix(const int fromrow, const int fromcol, const NRMat
 	if(fromrow<0 || fromrow>=nn || torow>=nn || fromcol<0 || fromcol>=mm || tocol>=mm) laerror("bad indices in storesubmatrix");
 #endif
 	SAME_LOC(*this, rhs);
+	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 
 	const int m = tocol - fromcol + 1;
 	for(register int i = fromrow; i <= torow; ++i){
diff --git a/mat.h b/mat.h
index e7d6017..4391179 100644
--- a/mat.h
+++ b/mat.h
@@ -478,7 +478,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n),
 	#else
 		p = v = new T[nm];
 	#endif
-		if (a != (T)0){
+		if (!LA_traits<T>::is_plaindata() || a != (T)0){
 			for (register int i=0; i<nm; i++) *p++ = a;
 		}else{
 			memset(p, 0, nm*sizeof(T));
@@ -488,6 +488,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n),
 		if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
 
 		v = (T*)gpualloc(nm*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		smart_gpu_set(nm, a, v);
 	}
 #endif
@@ -522,7 +523,7 @@ inline NRMat<T>::NRMat(const T (&a)[R][C]) : count(new int) {
 	#endif
 #ifdef CUDALA
 	}else{
-		if!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		v = (T*) gpualloc(nn*mm*sizeof(T));
 		cublasSetVector(nm, sizeof(T), a, 1, v, 1);
 	}
@@ -554,13 +555,14 @@ NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new
 	#else
 		p = v = new T[nm];
 	#endif
-	if (a != (T)0)
+	if (!LA_traits<T>::is_plaindata() || a != (T)0)
 		for (register int i=0; i<nm; i++) *p++ = a;
 	else
 		memset(p, 0, nm*sizeof(T));
 #ifdef CUDALA
 	}else{
 		v = (T*)gpualloc(nm*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		smart_gpu_set(nm, a, v);
 	}
 #endif
@@ -588,14 +590,17 @@ NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new
 		v = new T*[n];
 		v[0] = new T[nm];
 		for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
-		memcpy(v[0], a, nm*sizeof(T));
+		if(LA_traits<T>::is_plaindata()) memcpy(v[0], a, nm*sizeof(T));
+		else for(int i=0; i<nm; ++i) v[0][i] = a[i];
 	#else
 		v = new T[nm];
-		memcpy(v, a, nm*sizeof(T));
+		if(LA_traits<T>::is_plaindata()) memcpy(v, a, nm*sizeof(T));
+		else for(int i=0; i<nm; ++i) v[i] = a[i];
 	#endif
 #ifdef CUDALA
 	}else{
 		v = (T*) gpualloc(nm*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		cublasSetVector(nm, sizeof(T), a, 1, v, 1);
 	}
 #endif
diff --git a/smat.h b/smat.h
index 052c7a1..c2c74e2 100644
--- a/smat.h
+++ b/smat.h
@@ -230,12 +230,13 @@ inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) {
 	if(location == cpu){
 #endif
 		v = new T[NN2];
-		if(a != (T)0) for(register size_t i = 0; i<NN2; i++) v[i] = a;
+		if(!LA_traits<T>::is_plaindata() || a != (T)0) for(register size_t i = 0; i<NN2; i++) v[i] = a;
 		else memset(v, 0, NN2*sizeof(T));
 
 #ifdef CUDALA
 	}else{
 		v = (T*) gpualloc(NN2*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
 	}
 #endif
@@ -253,10 +254,12 @@ inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) {
         location = DEFAULT_LOC;
         if(location == cpu){
 #endif
-		memcpy(v, a, NN2*sizeof(T));
+		if(LA_traits<T>::is_plaindata()) memcpy(v, a, NN2*sizeof(T));
+		else for( int i=0; i<NN2; i++) v[i] = a[i];
 #ifdef CUDALA
 	}else{
 		v = (T*) gpualloc(NN2*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
 	}
 #endif
diff --git a/vec.cc b/vec.cc
index 7fe3348..fc121ce 100644
--- a/vec.cc
+++ b/vec.cc
@@ -443,7 +443,7 @@ NRVec<T>& NRVec<T>::operator=(const T &a){
 	NOT_GPU(*this);
 	copyonwrite();
 
-	if(a != (T)0){
+	if(!LA_traits<T>::is_plaindata() || a != (T)0){
 		for(register int i=0; i<nn; i++) v[i] = a;
 	}else{
 		memset(v, 0, nn*sizeof(T));
@@ -883,6 +883,22 @@ for(int cycle=1; cycle<=p.size(); ++cycle)
 	}
 }
 
+template<typename T>
+const int NRVec<T>::find(const T &val) const
+{
+for(int i=0; i<nn; ++i) if(val==v[i]) return i;
+return -1;
+}
+
+template<typename T>
+const int NRVec<T>::findthr(const T &val, const  typename LA_traits<T>::normtype &thr) const
+{
+for(int i=0; i<nn; ++i) if(MYABS(val-v[i])<thr) return i;
+return -1;
+}
+
+
+
 
 /***************************************************************************//**
  * forced instantization in the corespoding object file
diff --git a/vec.h b/vec.h
index 14c3f05..52ce300 100644
--- a/vec.h
+++ b/vec.h
@@ -352,6 +352,10 @@ public:
 	//! get normalized copy of this vector 
 	inline const NRVec unitvector() const;
 
+	//! find an element by value with threshold, first from left 
+	const int find(const T &val) const;
+	const int findthr(const T &val, const  typename LA_traits<T>::normtype &thr=0) const;
+
 	//! determine the maximal element (in the absolute value) of this vector 
 	inline const T amax() const;
 	//! determine the minimal element (in the absolute value) of this vector 
@@ -570,14 +574,15 @@ inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
 	if(location == cpu){
 #endif
 		v = new T[n];
-		if(a != (T)0){
-			for(register int i=0; i<n; i++) v[i] = a;
-		}else{
-			memset(v, 0, nn*sizeof(T));
-		}
+                if(!LA_traits<T>::is_plaindata() || a != (T)0){
+                        for(register int i=0; i<n; i++) v[i] = a;
+                }else{
+                        memset(v, 0, nn*sizeof(T));
+               }
 #ifdef CUDALA
 	}else{
 		v = (T*) gpualloc(n*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		smart_gpu_set(n, a, v);
 	}
 #endif
@@ -602,7 +607,7 @@ inline NRVec<T>::NRVec(const T (&a)[SIZE]) : count(new int) {
 #ifdef CUDALA
         }else{
                 v = (T*) gpualloc(nn*sizeof(T));
-		if!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		cublasSetVector(nn, sizeof(T), a, 1, v, 1);
                 TEST_CUBLAS("cublasSetVector");
         }
@@ -626,10 +631,12 @@ inline NRVec<T>::NRVec(const T *a, const int n): nn(n), count(new int) {
 #endif
 		v = new T[n];
 		*count = 1;
-		memcpy(v, a, n*sizeof(T));
+		if(LA_traits<T>::is_plaindata()) memcpy(v, a, n*sizeof(T));
+		else for( int i=0; i<n; i++) v[i] = a[i];
 #ifdef CUDALA
 	}else{
 		v = (T*) gpualloc(n*sizeof(T));
+		if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 		cublasSetVector(n, sizeof(T), a, 1, v, 1);
 		TEST_CUBLAS("cublasSetVector");
 	}
@@ -653,10 +660,12 @@ inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
 #endif
 			v = new T[n];
 			*count = 1;
-			memcpy(v, a, n*sizeof(T));
+			if(LA_traits<T>::is_plaindata()) memcpy(v, a, n*sizeof(T));
+			else for( int i=0; i<n; i++) v[i] = a[i];
 #ifdef CUDALA
 		}else{
 			v= (T*) gpualloc(n*sizeof(T));
+			if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 			cublasSetVector(n, sizeof(T), a, 1, v, 1);
 			TEST_CUBLAS("cublasSetVector");
 		}