bugfix in NRMat:: rsum() and csum()

This commit is contained in:
Jiri Pittner 2024-02-08 15:01:06 +01:00
parent 684c1cde5d
commit 7172e26a50
3 changed files with 31 additions and 27 deletions

46
mat.cc
View File

@ -593,12 +593,12 @@ const NRMat<T> NRMat<T>::operator|(const NRMat<T> &b) const {
template <typename T>
const NRVec<T> NRMat<T>::csum() const {
NOT_GPU(*this);
NRVec<T> result(nn, getlocation());
NRVec<T> result(mm, getlocation());
T sum;
for(register int i=0; i<nn; i++) {
for(register int i=0; i<mm; i++) {
sum = (T)0;
for(int j=0; j<mm; j++) sum += (*this)[i][j];
for(int j=0; j<nn; j++) sum += (*this)[j][i];
result[i] = sum;
}
return result;
@ -610,18 +610,18 @@ const NRVec<T> NRMat<T>::csum() const {
******************************************************************************/
template <>
const NRVec<double> NRMat<double>::csum() const {
NRVec<double> result(nn, getlocation());
result = 0.0;
NRVec<double> result(mm, getlocation());
result.clear();
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=0; i<mm; i++){
cblas_daxpy(nn, 1.0, &((*this)(0, i)), nn, result.v, 1);
cblas_daxpy(nn, 1.0, &((*this)(0, i)), mm, result.v+i, 0);
}
#ifdef CUDALA
}else{
for(register int i=0; i<mm; i++){
cublasDaxpy(nn, 1.0, v + i, nn, result.v, 1);
cublasDaxpy(nn, 1.0, v + i, mm, result.v+i, 0);
TEST_CUBLAS("cublasDaxpy");
}
}
@ -635,18 +635,18 @@ const NRVec<double> NRMat<double>::csum() const {
******************************************************************************/
template <>
const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
NRVec<std::complex<double> > result(nn, getlocation());
result = 0.0;
NRVec<std::complex<double> > result(mm, getlocation());
result.clear();
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=0;i<mm;i++){
cblas_zaxpy(nn, &CONE, &((*this)(0, i)), nn, result.v, 1);
cblas_zaxpy(nn, &CONE, &((*this)(0, i)), mm, result.v+i, 0);
}
#ifdef CUDALA
}else{
for(register int i=0;i<nn;i++){
cublasZaxpy(nn, CUONE, (cuDoubleComplex*)(v + i), nn, (cuDoubleComplex*)(result.v), 1);
for(register int i=0;i<mm;i++){
cublasZaxpy(nn, CUONE, (cuDoubleComplex*)(v + i), mm, (cuDoubleComplex*)(result.v+i), 0);
TEST_CUBLAS("cublasZaxpy");
}
}
@ -661,12 +661,12 @@ const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
template <typename T>
const NRVec<T> NRMat<T>::rsum() const {
NOT_GPU(*this);
NRVec<T> result(mm, getlocation());
NRVec<T> result(nn, getlocation());
T sum;
for(register int i=0; i<mm; i++) {
for(register int i=0; i<nn; i++) {
sum = (T)0;
for(int j=0; j<nn; j++) sum += (*this)[j][i];
for(int j=0; j<mm; j++) sum += (*this)[i][j];
result[i] = sum;
}
return result;
@ -678,18 +678,18 @@ const NRVec<T> NRMat<T>::rsum() const {
******************************************************************************/
template <>
const NRVec<double> NRMat<double>::rsum() const {
NRVec<double> result(mm, getlocation());
result = 0.0;
NRVec<double> result(nn, getlocation());
result.clear();
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=0;i<nn;i++){
cblas_daxpy(mm, 1.0, (*this)[i], 1, result.v, 1);
cblas_daxpy(mm, 1.0, (*this)[i], 1, result.v+i, 0);
}
#ifdef CUDALA
}else{
for(register int i=0;i<nn;i++){
cublasDaxpy(mm, 1.0, v + i*(size_t)mm, 1, result.v, 1);
cublasDaxpy(mm, 1.0, v + i*(size_t)mm, 1, result.v+i, 0);
TEST_CUBLAS("cublasDaxpy");
}
}
@ -703,18 +703,18 @@ const NRVec<double> NRMat<double>::rsum() const {
******************************************************************************/
template <>
const NRVec<std::complex<double> > NRMat<std::complex<double> >::rsum() const {
NRVec<std::complex<double> > result(mm, getlocation());
result = 0.0;
NRVec<std::complex<double> > result(nn, getlocation());
result.clear();
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=0;i<nn;i++){
cblas_zaxpy(mm, &CONE, (*this)[i], 1, result.v, 1);
cblas_zaxpy(mm, &CONE, (*this)[i], 1, result.v+i, 0);
}
#ifdef CUDALA
}else{
for(register int i=0;i<nn;i++){
cublasZaxpy(mm, CUONE, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(result.v), 1);
cublasZaxpy(mm, CUONE, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(result.v+i), 0);
TEST_CUBLAS("cublasZaxpy");
}
}

4
mat.h
View File

@ -253,9 +253,9 @@ public:
//! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$
const NRSMat<T> timestransposed() const;
//! sum the rows
//! sum numbers in the rows (sum over column index)
const NRVec<T> rsum() const;
//! sum the columns
//! sum numbers in the columns (sum over row index)
const NRVec<T> csum() const;
//! orthonormalize this matrix

8
t.cc
View File

@ -2461,7 +2461,7 @@ Polynomial<double> pp({1,2,3,4,5});
cout<<pp;
}
if(1)
if(0)
{
//prepare random symmetric mat3
int seed;
@ -3153,8 +3153,12 @@ for(int i=0; i<a.size(); ++i)
}
}
if(0)
if(1)
{
NRMat<double> a;
cin>>a;
cout <<a.rsum()<<endl;
cout <<a.csum()<<endl;
}