added tests for plain data in constructors of vec mat smat
This commit is contained in:
parent
b50f9b36b1
commit
d96531f340
3
mat.cc
3
mat.cc
@ -550,6 +550,7 @@ template <typename T>
|
|||||||
const NRMat<T> NRMat<T>::operator&(const NRMat<T> &b) const {
|
const NRMat<T> NRMat<T>::operator&(const NRMat<T> &b) const {
|
||||||
SAME_LOC(*this, b);
|
SAME_LOC(*this, b);
|
||||||
NRMat<T> result((T)0, nn + b.nn, mm + b.mm, getlocation());
|
NRMat<T> result((T)0, nn + b.nn, mm + b.mm, getlocation());
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
@ -738,6 +739,7 @@ const NRMat<T> NRMat<T>::submatrix(const int fromrow, const int torow, const int
|
|||||||
const int n = torow - fromrow + 1;
|
const int n = torow - fromrow + 1;
|
||||||
const int m = tocol - fromcol + 1;
|
const int m = tocol - fromcol + 1;
|
||||||
NRMat<T> r(n, m, getlocation());
|
NRMat<T> r(n, m, getlocation());
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
@ -775,6 +777,7 @@ void NRMat<T>::storesubmatrix(const int fromrow, const int fromcol, const NRMat
|
|||||||
if(fromrow<0 || fromrow>=nn || torow>=nn || fromcol<0 || fromcol>=mm || tocol>=mm) laerror("bad indices in storesubmatrix");
|
if(fromrow<0 || fromrow>=nn || torow>=nn || fromcol<0 || fromcol>=mm || tocol>=mm) laerror("bad indices in storesubmatrix");
|
||||||
#endif
|
#endif
|
||||||
SAME_LOC(*this, rhs);
|
SAME_LOC(*this, rhs);
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
|
|
||||||
const int m = tocol - fromcol + 1;
|
const int m = tocol - fromcol + 1;
|
||||||
for(register int i = fromrow; i <= torow; ++i){
|
for(register int i = fromrow; i <= torow; ++i){
|
||||||
|
15
mat.h
15
mat.h
@ -478,7 +478,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n),
|
|||||||
#else
|
#else
|
||||||
p = v = new T[nm];
|
p = v = new T[nm];
|
||||||
#endif
|
#endif
|
||||||
if (a != (T)0){
|
if (!LA_traits<T>::is_plaindata() || a != (T)0){
|
||||||
for (register int i=0; i<nm; i++) *p++ = a;
|
for (register int i=0; i<nm; i++) *p++ = a;
|
||||||
}else{
|
}else{
|
||||||
memset(p, 0, nm*sizeof(T));
|
memset(p, 0, nm*sizeof(T));
|
||||||
@ -488,6 +488,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n),
|
|||||||
if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
|
if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
|
||||||
|
|
||||||
v = (T*)gpualloc(nm*sizeof(T));
|
v = (T*)gpualloc(nm*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
smart_gpu_set(nm, a, v);
|
smart_gpu_set(nm, a, v);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -522,7 +523,7 @@ inline NRMat<T>::NRMat(const T (&a)[R][C]) : count(new int) {
|
|||||||
#endif
|
#endif
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
if!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
v = (T*) gpualloc(nn*mm*sizeof(T));
|
v = (T*) gpualloc(nn*mm*sizeof(T));
|
||||||
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
|
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
|
||||||
}
|
}
|
||||||
@ -554,13 +555,14 @@ NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new
|
|||||||
#else
|
#else
|
||||||
p = v = new T[nm];
|
p = v = new T[nm];
|
||||||
#endif
|
#endif
|
||||||
if (a != (T)0)
|
if (!LA_traits<T>::is_plaindata() || a != (T)0)
|
||||||
for (register int i=0; i<nm; i++) *p++ = a;
|
for (register int i=0; i<nm; i++) *p++ = a;
|
||||||
else
|
else
|
||||||
memset(p, 0, nm*sizeof(T));
|
memset(p, 0, nm*sizeof(T));
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*)gpualloc(nm*sizeof(T));
|
v = (T*)gpualloc(nm*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
smart_gpu_set(nm, a, v);
|
smart_gpu_set(nm, a, v);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -588,14 +590,17 @@ NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new
|
|||||||
v = new T*[n];
|
v = new T*[n];
|
||||||
v[0] = new T[nm];
|
v[0] = new T[nm];
|
||||||
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
|
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
|
||||||
memcpy(v[0], a, nm*sizeof(T));
|
if(LA_traits<T>::is_plaindata()) memcpy(v[0], a, nm*sizeof(T));
|
||||||
|
else for(int i=0; i<nm; ++i) v[0][i] = a[i];
|
||||||
#else
|
#else
|
||||||
v = new T[nm];
|
v = new T[nm];
|
||||||
memcpy(v, a, nm*sizeof(T));
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, nm*sizeof(T));
|
||||||
|
else for(int i=0; i<nm; ++i) v[i] = a[i];
|
||||||
#endif
|
#endif
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*) gpualloc(nm*sizeof(T));
|
v = (T*) gpualloc(nm*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
|
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
7
smat.h
7
smat.h
@ -230,12 +230,13 @@ inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) {
|
|||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
v = new T[NN2];
|
v = new T[NN2];
|
||||||
if(a != (T)0) for(register size_t i = 0; i<NN2; i++) v[i] = a;
|
if(!LA_traits<T>::is_plaindata() || a != (T)0) for(register size_t i = 0; i<NN2; i++) v[i] = a;
|
||||||
else memset(v, 0, NN2*sizeof(T));
|
else memset(v, 0, NN2*sizeof(T));
|
||||||
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*) gpualloc(NN2*sizeof(T));
|
v = (T*) gpualloc(NN2*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
|
cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -253,10 +254,12 @@ inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) {
|
|||||||
location = DEFAULT_LOC;
|
location = DEFAULT_LOC;
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
memcpy(v, a, NN2*sizeof(T));
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, NN2*sizeof(T));
|
||||||
|
else for( int i=0; i<NN2; i++) v[i] = a[i];
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*) gpualloc(NN2*sizeof(T));
|
v = (T*) gpualloc(NN2*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
|
cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
18
vec.cc
18
vec.cc
@ -443,7 +443,7 @@ NRVec<T>& NRVec<T>::operator=(const T &a){
|
|||||||
NOT_GPU(*this);
|
NOT_GPU(*this);
|
||||||
copyonwrite();
|
copyonwrite();
|
||||||
|
|
||||||
if(a != (T)0){
|
if(!LA_traits<T>::is_plaindata() || a != (T)0){
|
||||||
for(register int i=0; i<nn; i++) v[i] = a;
|
for(register int i=0; i<nn; i++) v[i] = a;
|
||||||
}else{
|
}else{
|
||||||
memset(v, 0, nn*sizeof(T));
|
memset(v, 0, nn*sizeof(T));
|
||||||
@ -883,6 +883,22 @@ for(int cycle=1; cycle<=p.size(); ++cycle)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
const int NRVec<T>::find(const T &val) const
|
||||||
|
{
|
||||||
|
for(int i=0; i<nn; ++i) if(val==v[i]) return i;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
const int NRVec<T>::findthr(const T &val, const typename LA_traits<T>::normtype &thr) const
|
||||||
|
{
|
||||||
|
for(int i=0; i<nn; ++i) if(MYABS(val-v[i])<thr) return i;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/***************************************************************************//**
|
/***************************************************************************//**
|
||||||
* forced instantization in the corespoding object file
|
* forced instantization in the corespoding object file
|
||||||
|
25
vec.h
25
vec.h
@ -352,6 +352,10 @@ public:
|
|||||||
//! get normalized copy of this vector
|
//! get normalized copy of this vector
|
||||||
inline const NRVec unitvector() const;
|
inline const NRVec unitvector() const;
|
||||||
|
|
||||||
|
//! find an element by value with threshold, first from left
|
||||||
|
const int find(const T &val) const;
|
||||||
|
const int findthr(const T &val, const typename LA_traits<T>::normtype &thr=0) const;
|
||||||
|
|
||||||
//! determine the maximal element (in the absolute value) of this vector
|
//! determine the maximal element (in the absolute value) of this vector
|
||||||
inline const T amax() const;
|
inline const T amax() const;
|
||||||
//! determine the minimal element (in the absolute value) of this vector
|
//! determine the minimal element (in the absolute value) of this vector
|
||||||
@ -570,14 +574,15 @@ inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
|
|||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
v = new T[n];
|
v = new T[n];
|
||||||
if(a != (T)0){
|
if(!LA_traits<T>::is_plaindata() || a != (T)0){
|
||||||
for(register int i=0; i<n; i++) v[i] = a;
|
for(register int i=0; i<n; i++) v[i] = a;
|
||||||
}else{
|
}else{
|
||||||
memset(v, 0, nn*sizeof(T));
|
memset(v, 0, nn*sizeof(T));
|
||||||
}
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*) gpualloc(n*sizeof(T));
|
v = (T*) gpualloc(n*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
smart_gpu_set(n, a, v);
|
smart_gpu_set(n, a, v);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -602,7 +607,7 @@ inline NRVec<T>::NRVec(const T (&a)[SIZE]) : count(new int) {
|
|||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*) gpualloc(nn*sizeof(T));
|
v = (T*) gpualloc(nn*sizeof(T));
|
||||||
if!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
cublasSetVector(nn, sizeof(T), a, 1, v, 1);
|
cublasSetVector(nn, sizeof(T), a, 1, v, 1);
|
||||||
TEST_CUBLAS("cublasSetVector");
|
TEST_CUBLAS("cublasSetVector");
|
||||||
}
|
}
|
||||||
@ -626,10 +631,12 @@ inline NRVec<T>::NRVec(const T *a, const int n): nn(n), count(new int) {
|
|||||||
#endif
|
#endif
|
||||||
v = new T[n];
|
v = new T[n];
|
||||||
*count = 1;
|
*count = 1;
|
||||||
memcpy(v, a, n*sizeof(T));
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, n*sizeof(T));
|
||||||
|
else for( int i=0; i<n; i++) v[i] = a[i];
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v = (T*) gpualloc(n*sizeof(T));
|
v = (T*) gpualloc(n*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
cublasSetVector(n, sizeof(T), a, 1, v, 1);
|
cublasSetVector(n, sizeof(T), a, 1, v, 1);
|
||||||
TEST_CUBLAS("cublasSetVector");
|
TEST_CUBLAS("cublasSetVector");
|
||||||
}
|
}
|
||||||
@ -653,10 +660,12 @@ inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
|
|||||||
#endif
|
#endif
|
||||||
v = new T[n];
|
v = new T[n];
|
||||||
*count = 1;
|
*count = 1;
|
||||||
memcpy(v, a, n*sizeof(T));
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, n*sizeof(T));
|
||||||
|
else for( int i=0; i<n; i++) v[i] = a[i];
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
v= (T*) gpualloc(n*sizeof(T));
|
v= (T*) gpualloc(n*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
cublasSetVector(n, sizeof(T), a, 1, v, 1);
|
cublasSetVector(n, sizeof(T), a, 1, v, 1);
|
||||||
TEST_CUBLAS("cublasSetVector");
|
TEST_CUBLAS("cublasSetVector");
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user