Implemented deep copyonwrite() for nested types

This commit is contained in:
Jiri Pittner 2021-05-23 10:28:10 +02:00
parent 88628fb306
commit 80eb98411f
5 changed files with 55 additions and 5 deletions

View File

@ -73,6 +73,7 @@ extern bool _LA_count_check;
//forward declarations
template<typename C> class NRVec;
template<typename C> class NRVec_from1;
template<typename C> class NRMat;
template<typename C> class NRMat_from1;
template<typename C> class NRSMat;
@ -270,6 +271,7 @@ static void multiput(size_t n, int fd, const std::complex<C> *x, bool dimensions
static void copy(std::complex<C> *dest, std::complex<C> *src, size_t n) {memcpy(dest,src,n*sizeof(std::complex<C>));}
static void clear(std::complex<C> *dest, size_t n) {memset(dest,0,n*sizeof(std::complex<C>));}
static void copyonwrite(std::complex<C> &x) {};
static bool is_plaindata() {return true;}
static void clearme(std::complex<C> &x) {x=0;};
static void deallocate(std::complex<C> &x) {};
static inline std::complex<C> conjugate(const std::complex<C> &x) {return std::complex<C>(x.real(),-x.imag());};
@ -325,6 +327,7 @@ static void multiput(size_t n, int fd, const C *x, bool dimensions=0)
static void copy(C *dest, C *src, size_t n) {memcpy(dest,src,n*sizeof(C));}
static void clear(C *dest, size_t n) {memset(dest,0,n*sizeof(C));}
static void copyonwrite(C &x) {};
static bool is_plaindata() {return true;}
static void clearme(C &x) {x=0;};
static void deallocate(C &x) {};
static inline C conjugate(const C &x) {return x;};
@ -358,6 +361,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\
static void copyonwrite(X<C> &x) {x.copyonwrite();}\
static bool is_plaindata() {return false;}\
static void clearme(X<C> &x) {x.clear();}\
static void deallocate(X<C> &x) {x.dealloc();}\
};
@ -367,6 +371,7 @@ static void deallocate(X<C> &x) {x.dealloc();}\
generate_traits(NRMat)
generate_traits(NRMat_from1)
generate_traits(NRVec)
generate_traits(NRVec_from1)
generate_traits(SparseMat)
generate_traits(SparseSMat) //product leading to non-symmetric result not implemented
generate_traits(CSRMat)
@ -394,6 +399,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \
static void copyonwrite(X<C> &x) {x.copyonwrite();} \
static bool is_plaindata() {return false;}\
static void clearme(X<C> &x) {x.clear();} \
static void deallocate(X<C> &x) {x.dealloc();} \
};

19
mat.h
View File

@ -1036,16 +1036,31 @@ void NRMat<T>::copyonwrite(bool detachonly) {
#ifdef MATPTR
T **newv = new T*[nn];
newv[0] = new T[(size_t)mm*nn];
if(!detachonly) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
else
{
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
v = newv;
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
#else
T *newv = new T[(size_t)mm*nn];
if(!detachonly) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
else
{
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
v = newv;
#endif
#ifdef CUDALA
}else{ //matrix is in GPU memory
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);

10
smat.h
View File

@ -937,9 +937,17 @@ void NRSMat<T>::copyonwrite(bool detachonly) {
if(location == cpu) {
#endif
newv = new T[NN2];
if(!detachonly) memcpy(newv, v, NN2*sizeof(T));
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, NN2*sizeof(T));
else
{
for(int i=0; i<NN2; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
#ifdef CUDALA
}else{
f(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
newv = (T *) gpualloc(NN2*sizeof(T));
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);

View File

@ -152,7 +152,20 @@ void SparseMat<T>::copyonwrite(bool detachonly)
count = new int; *count=1;
if(!list) laerror("empty list with count>1");
unsort();
if(!detachonly) copylist(list);
if(!detachonly)
{
copylist(list);
if(!LA_traits<T>::is_plaindata()) //nested copyonwrite
{
matel<T> *l =list;
while(l)
{
LA_traits<T>::copyonwrite(l->elem);
l=l->next;
}
}
}
}
}

10
vec.h
View File

@ -899,9 +899,17 @@ void NRVec<T>::copyonwrite(bool detachonly) {
if(location == cpu){
#endif
newv = new T[nn];
if(!detachonly) memcpy(newv, v, nn*sizeof(T));
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, nn*sizeof(T));
else
{
for(int i=0; i<nn; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
#ifdef CUDALA
}else{
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
newv = (T *) gpualloc(nn*sizeof(T));
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
if(!detachonly) cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);