From 80eb98411ff63a1e3be85d5ab99dbfdd511cf02f Mon Sep 17 00:00:00 2001 From: Jiri Pittner Date: Sun, 23 May 2021 10:28:10 +0200 Subject: [PATCH] Implemented deep copyonwrite() for nested types --- la_traits.h | 6 ++++++ mat.h | 19 +++++++++++++++++-- smat.h | 10 +++++++++- sparsemat.cc | 15 ++++++++++++++- vec.h | 10 +++++++++- 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/la_traits.h b/la_traits.h index f525acc..b51c4a1 100644 --- a/la_traits.h +++ b/la_traits.h @@ -73,6 +73,7 @@ extern bool _LA_count_check; //forward declarations template class NRVec; +template class NRVec_from1; template class NRMat; template class NRMat_from1; template class NRSMat; @@ -270,6 +271,7 @@ static void multiput(size_t n, int fd, const std::complex *x, bool dimensions static void copy(std::complex *dest, std::complex *src, size_t n) {memcpy(dest,src,n*sizeof(std::complex));} static void clear(std::complex *dest, size_t n) {memset(dest,0,n*sizeof(std::complex));} static void copyonwrite(std::complex &x) {}; +static bool is_plaindata() {return true;} static void clearme(std::complex &x) {x=0;}; static void deallocate(std::complex &x) {}; static inline std::complex conjugate(const std::complex &x) {return std::complex(x.real(),-x.imag());}; @@ -325,6 +327,7 @@ static void multiput(size_t n, int fd, const C *x, bool dimensions=0) static void copy(C *dest, C *src, size_t n) {memcpy(dest,src,n*sizeof(C));} static void clear(C *dest, size_t n) {memset(dest,0,n*sizeof(C));} static void copyonwrite(C &x) {}; +static bool is_plaindata() {return true;} static void clearme(C &x) {x=0;}; static void deallocate(C &x) {}; static inline C conjugate(const C &x) {return x;}; @@ -358,6 +361,7 @@ static void multiget(size_t n,int fd, X *x, bool dimensions=1) {for(size_t i= static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i &x) {x.copyonwrite();}\ +static bool is_plaindata() {return false;}\ static void clearme(X &x) {x.clear();}\ static void deallocate(X &x) {x.dealloc();}\ }; @@ -367,6 +371,7 @@ static void deallocate(X &x) {x.dealloc();}\ generate_traits(NRMat) generate_traits(NRMat_from1) generate_traits(NRVec) +generate_traits(NRVec_from1) generate_traits(SparseMat) generate_traits(SparseSMat) //product leading to non-symmetric result not implemented generate_traits(CSRMat) @@ -394,6 +399,7 @@ static void multiget(size_t n,int fd, X *x, bool dimensions=1) {for(size_t i= static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i &x) {x.copyonwrite();} \ +static bool is_plaindata() {return false;}\ static void clearme(X &x) {x.clear();} \ static void deallocate(X &x) {x.dealloc();} \ }; diff --git a/mat.h b/mat.h index 089b030..9964b53 100644 --- a/mat.h +++ b/mat.h @@ -1036,16 +1036,31 @@ void NRMat::copyonwrite(bool detachonly) { #ifdef MATPTR T **newv = new T*[nn]; newv[0] = new T[(size_t)mm*nn]; - if(!detachonly) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T)); + if(!detachonly) + { + if(LA_traits::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T)); + else + { + for(int i=0; i::copyonwrite(newv[i]);} + } + } v = newv; for(register int i=1; i::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T)); + else + { + for(int i=0; i::copyonwrite(newv[i]);} + } + } v = newv; #endif #ifdef CUDALA }else{ //matrix is in GPU memory + if(!LA_traits::is_plaindata()) laerror("nested types not supported on gpu memory"); T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T)); if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem"); if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1); diff --git a/smat.h b/smat.h index 3dd2c10..cc3c87e 100644 --- a/smat.h +++ b/smat.h @@ -937,9 +937,17 @@ void NRSMat::copyonwrite(bool detachonly) { if(location == cpu) { #endif newv = new T[NN2]; - if(!detachonly) memcpy(newv, v, NN2*sizeof(T)); + if(!detachonly) + { + if(LA_traits::is_plaindata()) memcpy(newv, v, NN2*sizeof(T)); + else + { + for(int i=0; i::copyonwrite(newv[i]);} + } + } #ifdef CUDALA }else{ + f(!LA_traits::is_plaindata()) laerror("nested types not supported on gpu memory"); newv = (T *) gpualloc(NN2*sizeof(T)); if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat::copyonwrite()"); if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1); diff --git a/sparsemat.cc b/sparsemat.cc index d5c4227..4f08786 100644 --- a/sparsemat.cc +++ b/sparsemat.cc @@ -152,7 +152,20 @@ void SparseMat::copyonwrite(bool detachonly) count = new int; *count=1; if(!list) laerror("empty list with count>1"); unsort(); - if(!detachonly) copylist(list); + if(!detachonly) + { + copylist(list); + if(!LA_traits::is_plaindata()) //nested copyonwrite + { + matel *l =list; + while(l) + { + LA_traits::copyonwrite(l->elem); + l=l->next; + } + + } + } } } diff --git a/vec.h b/vec.h index 39d9473..4ba8ac2 100644 --- a/vec.h +++ b/vec.h @@ -899,9 +899,17 @@ void NRVec::copyonwrite(bool detachonly) { if(location == cpu){ #endif newv = new T[nn]; - if(!detachonly) memcpy(newv, v, nn*sizeof(T)); + if(!detachonly) + { + if(LA_traits::is_plaindata()) memcpy(newv, v, nn*sizeof(T)); + else + { + for(int i=0; i::copyonwrite(newv[i]);} + } + } #ifdef CUDALA }else{ + if(!LA_traits::is_plaindata()) laerror("nested types not supported on gpu memory"); newv = (T *) gpualloc(nn*sizeof(T)); if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec::copyonwrite()"); if(!detachonly) cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);