Implemented deep copyonwrite() for nested types
This commit is contained in:
parent
88628fb306
commit
80eb98411f
@ -73,6 +73,7 @@ extern bool _LA_count_check;
|
||||
|
||||
//forward declarations
|
||||
template<typename C> class NRVec;
|
||||
template<typename C> class NRVec_from1;
|
||||
template<typename C> class NRMat;
|
||||
template<typename C> class NRMat_from1;
|
||||
template<typename C> class NRSMat;
|
||||
@ -270,6 +271,7 @@ static void multiput(size_t n, int fd, const std::complex<C> *x, bool dimensions
|
||||
static void copy(std::complex<C> *dest, std::complex<C> *src, size_t n) {memcpy(dest,src,n*sizeof(std::complex<C>));}
|
||||
static void clear(std::complex<C> *dest, size_t n) {memset(dest,0,n*sizeof(std::complex<C>));}
|
||||
static void copyonwrite(std::complex<C> &x) {};
|
||||
static bool is_plaindata() {return true;}
|
||||
static void clearme(std::complex<C> &x) {x=0;};
|
||||
static void deallocate(std::complex<C> &x) {};
|
||||
static inline std::complex<C> conjugate(const std::complex<C> &x) {return std::complex<C>(x.real(),-x.imag());};
|
||||
@ -325,6 +327,7 @@ static void multiput(size_t n, int fd, const C *x, bool dimensions=0)
|
||||
static void copy(C *dest, C *src, size_t n) {memcpy(dest,src,n*sizeof(C));}
|
||||
static void clear(C *dest, size_t n) {memset(dest,0,n*sizeof(C));}
|
||||
static void copyonwrite(C &x) {};
|
||||
static bool is_plaindata() {return true;}
|
||||
static void clearme(C &x) {x=0;};
|
||||
static void deallocate(C &x) {};
|
||||
static inline C conjugate(const C &x) {return x;};
|
||||
@ -358,6 +361,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
|
||||
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
|
||||
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\
|
||||
static void copyonwrite(X<C> &x) {x.copyonwrite();}\
|
||||
static bool is_plaindata() {return false;}\
|
||||
static void clearme(X<C> &x) {x.clear();}\
|
||||
static void deallocate(X<C> &x) {x.dealloc();}\
|
||||
};
|
||||
@ -367,6 +371,7 @@ static void deallocate(X<C> &x) {x.dealloc();}\
|
||||
generate_traits(NRMat)
|
||||
generate_traits(NRMat_from1)
|
||||
generate_traits(NRVec)
|
||||
generate_traits(NRVec_from1)
|
||||
generate_traits(SparseMat)
|
||||
generate_traits(SparseSMat) //product leading to non-symmetric result not implemented
|
||||
generate_traits(CSRMat)
|
||||
@ -394,6 +399,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
|
||||
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
|
||||
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \
|
||||
static void copyonwrite(X<C> &x) {x.copyonwrite();} \
|
||||
static bool is_plaindata() {return false;}\
|
||||
static void clearme(X<C> &x) {x.clear();} \
|
||||
static void deallocate(X<C> &x) {x.dealloc();} \
|
||||
};
|
||||
|
19
mat.h
19
mat.h
@ -1036,16 +1036,31 @@ void NRMat<T>::copyonwrite(bool detachonly) {
|
||||
#ifdef MATPTR
|
||||
T **newv = new T*[nn];
|
||||
newv[0] = new T[(size_t)mm*nn];
|
||||
if(!detachonly) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
|
||||
if(!detachonly)
|
||||
{
|
||||
if(LA_traits<T>::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
|
||||
else
|
||||
{
|
||||
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||
}
|
||||
}
|
||||
v = newv;
|
||||
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
|
||||
#else
|
||||
T *newv = new T[(size_t)mm*nn];
|
||||
if(!detachonly) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
|
||||
if(!detachonly)
|
||||
{
|
||||
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
|
||||
else
|
||||
{
|
||||
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||
}
|
||||
}
|
||||
v = newv;
|
||||
#endif
|
||||
#ifdef CUDALA
|
||||
}else{ //matrix is in GPU memory
|
||||
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
||||
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
|
||||
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
|
||||
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
||||
|
10
smat.h
10
smat.h
@ -937,9 +937,17 @@ void NRSMat<T>::copyonwrite(bool detachonly) {
|
||||
if(location == cpu) {
|
||||
#endif
|
||||
newv = new T[NN2];
|
||||
if(!detachonly) memcpy(newv, v, NN2*sizeof(T));
|
||||
if(!detachonly)
|
||||
{
|
||||
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, NN2*sizeof(T));
|
||||
else
|
||||
{
|
||||
for(int i=0; i<NN2; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||
}
|
||||
}
|
||||
#ifdef CUDALA
|
||||
}else{
|
||||
f(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
||||
newv = (T *) gpualloc(NN2*sizeof(T));
|
||||
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
|
||||
if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
||||
|
15
sparsemat.cc
15
sparsemat.cc
@ -152,7 +152,20 @@ void SparseMat<T>::copyonwrite(bool detachonly)
|
||||
count = new int; *count=1;
|
||||
if(!list) laerror("empty list with count>1");
|
||||
unsort();
|
||||
if(!detachonly) copylist(list);
|
||||
if(!detachonly)
|
||||
{
|
||||
copylist(list);
|
||||
if(!LA_traits<T>::is_plaindata()) //nested copyonwrite
|
||||
{
|
||||
matel<T> *l =list;
|
||||
while(l)
|
||||
{
|
||||
LA_traits<T>::copyonwrite(l->elem);
|
||||
l=l->next;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
10
vec.h
10
vec.h
@ -899,9 +899,17 @@ void NRVec<T>::copyonwrite(bool detachonly) {
|
||||
if(location == cpu){
|
||||
#endif
|
||||
newv = new T[nn];
|
||||
if(!detachonly) memcpy(newv, v, nn*sizeof(T));
|
||||
if(!detachonly)
|
||||
{
|
||||
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, nn*sizeof(T));
|
||||
else
|
||||
{
|
||||
for(int i=0; i<nn; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||
}
|
||||
}
|
||||
#ifdef CUDALA
|
||||
}else{
|
||||
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
||||
newv = (T *) gpualloc(nn*sizeof(T));
|
||||
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
|
||||
if(!detachonly) cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
||||
|
Loading…
Reference in New Issue
Block a user