Implemented deep copyonwrite() for nested types

2021-05-23 10:28:10 +02:00
parent 88628fb306
commit 80eb98411f
5 changed files with 55 additions and 5 deletions
--- a/la_traits.h
+++ b/la_traits.h
@@ -73,6 +73,7 @@ extern bool _LA_count_check;

 //forward declarations
 template<typename C> class NRVec;
+template<typename C> class NRVec_from1;
 template<typename C> class NRMat;
 template<typename C> class NRMat_from1;
 template<typename C> class NRSMat;
@@ -270,6 +271,7 @@ static void multiput(size_t n, int fd, const std::complex<C> *x, bool dimensions
 static void copy(std::complex<C> *dest, std::complex<C> *src, size_t n) {memcpy(dest,src,n*sizeof(std::complex<C>));}
 static void clear(std::complex<C> *dest, size_t n) {memset(dest,0,n*sizeof(std::complex<C>));}
 static void copyonwrite(std::complex<C> &x) {};
+static bool is_plaindata() {return true;}
 static void clearme(std::complex<C> &x) {x=0;};
 static void deallocate(std::complex<C> &x) {};
 static inline std::complex<C> conjugate(const std::complex<C> &x) {return std::complex<C>(x.real(),-x.imag());};
@@ -325,6 +327,7 @@ static void multiput(size_t n, int fd, const C *x, bool dimensions=0)
 static void copy(C *dest, C *src, size_t n) {memcpy(dest,src,n*sizeof(C));}
 static void clear(C *dest, size_t n) {memset(dest,0,n*sizeof(C));}
 static void copyonwrite(C &x) {};
+static bool is_plaindata() {return true;}
 static void clearme(C &x) {x=0;};
 static void deallocate(C &x) {};
 static inline C conjugate(const C &x) {return x;};
@@ -358,6 +361,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
 static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
 static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\
 static void copyonwrite(X<C> &x) {x.copyonwrite();}\
+static bool is_plaindata() {return false;}\
 static void clearme(X<C> &x) {x.clear();}\
 static void deallocate(X<C> &x) {x.dealloc();}\
 };
@@ -367,6 +371,7 @@ static void deallocate(X<C> &x) {x.dealloc();}\
 generate_traits(NRMat)
 generate_traits(NRMat_from1)
 generate_traits(NRVec)
+generate_traits(NRVec_from1)
 generate_traits(SparseMat)
 generate_traits(SparseSMat) //product leading to non-symmetric result not implemented
 generate_traits(CSRMat) 
@@ -394,6 +399,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
 static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];}  \
 static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \
 static void copyonwrite(X<C> &x) {x.copyonwrite();} \
+static bool is_plaindata() {return false;}\
 static void clearme(X<C> &x) {x.clear();} \
 static void deallocate(X<C> &x) {x.dealloc();} \
 };
--- a/mat.h
+++ b/mat.h
@@ -1036,16 +1036,31 @@ void NRMat<T>::copyonwrite(bool detachonly) {
 		#ifdef MATPTR
 			T **newv = new T*[nn];
 			newv[0] = new T[(size_t)mm*nn];
-			if(!detachonly) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
+			if(!detachonly) 
+				{
+				if(LA_traits<T>::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
+                                else
+                                        {
+                                        for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
+                                        }
+				}
 			v = newv;
 			for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
 		#else
 			T *newv = new T[(size_t)mm*nn];
-			if(!detachonly) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
+			if(!detachonly) 
+				{
+				if(LA_traits<T>::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
+				else
+                                        {
+                                        for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
+                                        }
+				}
 			v = newv;
 		#endif
 #ifdef CUDALA
 		}else{ //matrix is in GPU memory
+			if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
 			T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
 			if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
 			if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
--- a/smat.h
+++ b/smat.h
@@ -937,9 +937,17 @@ void NRSMat<T>::copyonwrite(bool detachonly) {
 		if(location == cpu) {
 #endif
 			newv = new T[NN2];
-			if(!detachonly) memcpy(newv, v, NN2*sizeof(T));
+			if(!detachonly) 
+				{
+				if(LA_traits<T>::is_plaindata()) memcpy(newv, v, NN2*sizeof(T));
+				else
+                                        {
+                                        for(int i=0; i<NN2; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
+                                        }
+				}
 #ifdef CUDALA
 		}else{
+			f(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
 			newv = (T *) gpualloc(NN2*sizeof(T));
 			if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
 			if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
--- a/sparsemat.cc
+++ b/sparsemat.cc
@@ -152,7 +152,20 @@ void SparseMat<T>::copyonwrite(bool detachonly)
                count = new int; *count=1;
 		if(!list) laerror("empty list with count>1");
 		unsort();
-		if(!detachonly) copylist(list);
+		if(!detachonly) 
+			{
+			copylist(list);
+			if(!LA_traits<T>::is_plaindata()) //nested copyonwrite
+				{
+				matel<T> *l =list;
+				while(l)
+        				{
+				        LA_traits<T>::copyonwrite(l->elem);
+				        l=l->next;
+       					}
+
+				}
+			}
                }
 }

--- a/vec.h
+++ b/vec.h
@@ -899,9 +899,17 @@ void NRVec<T>::copyonwrite(bool detachonly) {
 		if(location == cpu){
 #endif
 			newv = new T[nn];
-			if(!detachonly) memcpy(newv, v, nn*sizeof(T));
+			if(!detachonly) 
+				{
+				if(LA_traits<T>::is_plaindata()) memcpy(newv, v, nn*sizeof(T));
+				else
+					{
+					for(int i=0; i<nn; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
+					}
+				}
 #ifdef CUDALA
 		}else{
+			if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
 			newv = (T *) gpualloc(nn*sizeof(T));
 			if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
 			if(!detachonly) cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);