Implemented deep copyonwrite() for nested types
This commit is contained in:
parent
88628fb306
commit
80eb98411f
@ -73,6 +73,7 @@ extern bool _LA_count_check;
|
|||||||
|
|
||||||
//forward declarations
|
//forward declarations
|
||||||
template<typename C> class NRVec;
|
template<typename C> class NRVec;
|
||||||
|
template<typename C> class NRVec_from1;
|
||||||
template<typename C> class NRMat;
|
template<typename C> class NRMat;
|
||||||
template<typename C> class NRMat_from1;
|
template<typename C> class NRMat_from1;
|
||||||
template<typename C> class NRSMat;
|
template<typename C> class NRSMat;
|
||||||
@ -270,6 +271,7 @@ static void multiput(size_t n, int fd, const std::complex<C> *x, bool dimensions
|
|||||||
static void copy(std::complex<C> *dest, std::complex<C> *src, size_t n) {memcpy(dest,src,n*sizeof(std::complex<C>));}
|
static void copy(std::complex<C> *dest, std::complex<C> *src, size_t n) {memcpy(dest,src,n*sizeof(std::complex<C>));}
|
||||||
static void clear(std::complex<C> *dest, size_t n) {memset(dest,0,n*sizeof(std::complex<C>));}
|
static void clear(std::complex<C> *dest, size_t n) {memset(dest,0,n*sizeof(std::complex<C>));}
|
||||||
static void copyonwrite(std::complex<C> &x) {};
|
static void copyonwrite(std::complex<C> &x) {};
|
||||||
|
static bool is_plaindata() {return true;}
|
||||||
static void clearme(std::complex<C> &x) {x=0;};
|
static void clearme(std::complex<C> &x) {x=0;};
|
||||||
static void deallocate(std::complex<C> &x) {};
|
static void deallocate(std::complex<C> &x) {};
|
||||||
static inline std::complex<C> conjugate(const std::complex<C> &x) {return std::complex<C>(x.real(),-x.imag());};
|
static inline std::complex<C> conjugate(const std::complex<C> &x) {return std::complex<C>(x.real(),-x.imag());};
|
||||||
@ -325,6 +327,7 @@ static void multiput(size_t n, int fd, const C *x, bool dimensions=0)
|
|||||||
static void copy(C *dest, C *src, size_t n) {memcpy(dest,src,n*sizeof(C));}
|
static void copy(C *dest, C *src, size_t n) {memcpy(dest,src,n*sizeof(C));}
|
||||||
static void clear(C *dest, size_t n) {memset(dest,0,n*sizeof(C));}
|
static void clear(C *dest, size_t n) {memset(dest,0,n*sizeof(C));}
|
||||||
static void copyonwrite(C &x) {};
|
static void copyonwrite(C &x) {};
|
||||||
|
static bool is_plaindata() {return true;}
|
||||||
static void clearme(C &x) {x=0;};
|
static void clearme(C &x) {x=0;};
|
||||||
static void deallocate(C &x) {};
|
static void deallocate(C &x) {};
|
||||||
static inline C conjugate(const C &x) {return x;};
|
static inline C conjugate(const C &x) {return x;};
|
||||||
@ -358,6 +361,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
|
|||||||
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
|
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
|
||||||
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\
|
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\
|
||||||
static void copyonwrite(X<C> &x) {x.copyonwrite();}\
|
static void copyonwrite(X<C> &x) {x.copyonwrite();}\
|
||||||
|
static bool is_plaindata() {return false;}\
|
||||||
static void clearme(X<C> &x) {x.clear();}\
|
static void clearme(X<C> &x) {x.clear();}\
|
||||||
static void deallocate(X<C> &x) {x.dealloc();}\
|
static void deallocate(X<C> &x) {x.dealloc();}\
|
||||||
};
|
};
|
||||||
@ -367,6 +371,7 @@ static void deallocate(X<C> &x) {x.dealloc();}\
|
|||||||
generate_traits(NRMat)
|
generate_traits(NRMat)
|
||||||
generate_traits(NRMat_from1)
|
generate_traits(NRMat_from1)
|
||||||
generate_traits(NRVec)
|
generate_traits(NRVec)
|
||||||
|
generate_traits(NRVec_from1)
|
||||||
generate_traits(SparseMat)
|
generate_traits(SparseMat)
|
||||||
generate_traits(SparseSMat) //product leading to non-symmetric result not implemented
|
generate_traits(SparseSMat) //product leading to non-symmetric result not implemented
|
||||||
generate_traits(CSRMat)
|
generate_traits(CSRMat)
|
||||||
@ -394,6 +399,7 @@ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=
|
|||||||
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
|
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
|
||||||
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \
|
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \
|
||||||
static void copyonwrite(X<C> &x) {x.copyonwrite();} \
|
static void copyonwrite(X<C> &x) {x.copyonwrite();} \
|
||||||
|
static bool is_plaindata() {return false;}\
|
||||||
static void clearme(X<C> &x) {x.clear();} \
|
static void clearme(X<C> &x) {x.clear();} \
|
||||||
static void deallocate(X<C> &x) {x.dealloc();} \
|
static void deallocate(X<C> &x) {x.dealloc();} \
|
||||||
};
|
};
|
||||||
|
19
mat.h
19
mat.h
@ -1036,16 +1036,31 @@ void NRMat<T>::copyonwrite(bool detachonly) {
|
|||||||
#ifdef MATPTR
|
#ifdef MATPTR
|
||||||
T **newv = new T*[nn];
|
T **newv = new T*[nn];
|
||||||
newv[0] = new T[(size_t)mm*nn];
|
newv[0] = new T[(size_t)mm*nn];
|
||||||
if(!detachonly) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
|
if(!detachonly)
|
||||||
|
{
|
||||||
|
if(LA_traits<T>::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||||
|
}
|
||||||
|
}
|
||||||
v = newv;
|
v = newv;
|
||||||
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
|
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
|
||||||
#else
|
#else
|
||||||
T *newv = new T[(size_t)mm*nn];
|
T *newv = new T[(size_t)mm*nn];
|
||||||
if(!detachonly) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
|
if(!detachonly)
|
||||||
|
{
|
||||||
|
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||||
|
}
|
||||||
|
}
|
||||||
v = newv;
|
v = newv;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{ //matrix is in GPU memory
|
}else{ //matrix is in GPU memory
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
||||||
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
|
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
|
||||||
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
|
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
|
||||||
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
||||||
|
10
smat.h
10
smat.h
@ -937,9 +937,17 @@ void NRSMat<T>::copyonwrite(bool detachonly) {
|
|||||||
if(location == cpu) {
|
if(location == cpu) {
|
||||||
#endif
|
#endif
|
||||||
newv = new T[NN2];
|
newv = new T[NN2];
|
||||||
if(!detachonly) memcpy(newv, v, NN2*sizeof(T));
|
if(!detachonly)
|
||||||
|
{
|
||||||
|
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, NN2*sizeof(T));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(int i=0; i<NN2; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||||
|
}
|
||||||
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
|
f(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
||||||
newv = (T *) gpualloc(NN2*sizeof(T));
|
newv = (T *) gpualloc(NN2*sizeof(T));
|
||||||
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
|
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
|
||||||
if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
||||||
|
15
sparsemat.cc
15
sparsemat.cc
@ -152,7 +152,20 @@ void SparseMat<T>::copyonwrite(bool detachonly)
|
|||||||
count = new int; *count=1;
|
count = new int; *count=1;
|
||||||
if(!list) laerror("empty list with count>1");
|
if(!list) laerror("empty list with count>1");
|
||||||
unsort();
|
unsort();
|
||||||
if(!detachonly) copylist(list);
|
if(!detachonly)
|
||||||
|
{
|
||||||
|
copylist(list);
|
||||||
|
if(!LA_traits<T>::is_plaindata()) //nested copyonwrite
|
||||||
|
{
|
||||||
|
matel<T> *l =list;
|
||||||
|
while(l)
|
||||||
|
{
|
||||||
|
LA_traits<T>::copyonwrite(l->elem);
|
||||||
|
l=l->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
vec.h
10
vec.h
@ -899,9 +899,17 @@ void NRVec<T>::copyonwrite(bool detachonly) {
|
|||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
newv = new T[nn];
|
newv = new T[nn];
|
||||||
if(!detachonly) memcpy(newv, v, nn*sizeof(T));
|
if(!detachonly)
|
||||||
|
{
|
||||||
|
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, nn*sizeof(T));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(int i=0; i<nn; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
||||||
|
}
|
||||||
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
||||||
newv = (T *) gpualloc(nn*sizeof(T));
|
newv = (T *) gpualloc(nn*sizeof(T));
|
||||||
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
|
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
|
||||||
if(!detachonly) cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
if(!detachonly) cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
||||||
|
Loading…
Reference in New Issue
Block a user