bugfix in NRMat:: rsum() and csum()

This commit is contained in:
Jiri Pittner 2024-02-08 15:01:06 +01:00
parent 684c1cde5d
commit 7172e26a50
3 changed files with 31 additions and 27 deletions

46
mat.cc
View File

@ -593,12 +593,12 @@ const NRMat<T> NRMat<T>::operator|(const NRMat<T> &b) const {
template <typename T> template <typename T>
const NRVec<T> NRMat<T>::csum() const { const NRVec<T> NRMat<T>::csum() const {
NOT_GPU(*this); NOT_GPU(*this);
NRVec<T> result(nn, getlocation()); NRVec<T> result(mm, getlocation());
T sum; T sum;
for(register int i=0; i<nn; i++) { for(register int i=0; i<mm; i++) {
sum = (T)0; sum = (T)0;
for(int j=0; j<mm; j++) sum += (*this)[i][j]; for(int j=0; j<nn; j++) sum += (*this)[j][i];
result[i] = sum; result[i] = sum;
} }
return result; return result;
@ -610,18 +610,18 @@ const NRVec<T> NRMat<T>::csum() const {
******************************************************************************/ ******************************************************************************/
template <> template <>
const NRVec<double> NRMat<double>::csum() const { const NRVec<double> NRMat<double>::csum() const {
NRVec<double> result(nn, getlocation()); NRVec<double> result(mm, getlocation());
result = 0.0; result.clear();
#ifdef CUDALA #ifdef CUDALA
if(location == cpu){ if(location == cpu){
#endif #endif
for(register int i=0; i<mm; i++){ for(register int i=0; i<mm; i++){
cblas_daxpy(nn, 1.0, &((*this)(0, i)), nn, result.v, 1); cblas_daxpy(nn, 1.0, &((*this)(0, i)), mm, result.v+i, 0);
} }
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
for(register int i=0; i<mm; i++){ for(register int i=0; i<mm; i++){
cublasDaxpy(nn, 1.0, v + i, nn, result.v, 1); cublasDaxpy(nn, 1.0, v + i, mm, result.v+i, 0);
TEST_CUBLAS("cublasDaxpy"); TEST_CUBLAS("cublasDaxpy");
} }
} }
@ -635,18 +635,18 @@ const NRVec<double> NRMat<double>::csum() const {
******************************************************************************/ ******************************************************************************/
template <> template <>
const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const { const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
NRVec<std::complex<double> > result(nn, getlocation()); NRVec<std::complex<double> > result(mm, getlocation());
result = 0.0; result.clear();
#ifdef CUDALA #ifdef CUDALA
if(location == cpu){ if(location == cpu){
#endif #endif
for(register int i=0;i<mm;i++){ for(register int i=0;i<mm;i++){
cblas_zaxpy(nn, &CONE, &((*this)(0, i)), nn, result.v, 1); cblas_zaxpy(nn, &CONE, &((*this)(0, i)), mm, result.v+i, 0);
} }
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
for(register int i=0;i<nn;i++){ for(register int i=0;i<mm;i++){
cublasZaxpy(nn, CUONE, (cuDoubleComplex*)(v + i), nn, (cuDoubleComplex*)(result.v), 1); cublasZaxpy(nn, CUONE, (cuDoubleComplex*)(v + i), mm, (cuDoubleComplex*)(result.v+i), 0);
TEST_CUBLAS("cublasZaxpy"); TEST_CUBLAS("cublasZaxpy");
} }
} }
@ -661,12 +661,12 @@ const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
template <typename T> template <typename T>
const NRVec<T> NRMat<T>::rsum() const { const NRVec<T> NRMat<T>::rsum() const {
NOT_GPU(*this); NOT_GPU(*this);
NRVec<T> result(mm, getlocation()); NRVec<T> result(nn, getlocation());
T sum; T sum;
for(register int i=0; i<mm; i++) { for(register int i=0; i<nn; i++) {
sum = (T)0; sum = (T)0;
for(int j=0; j<nn; j++) sum += (*this)[j][i]; for(int j=0; j<mm; j++) sum += (*this)[i][j];
result[i] = sum; result[i] = sum;
} }
return result; return result;
@ -678,18 +678,18 @@ const NRVec<T> NRMat<T>::rsum() const {
******************************************************************************/ ******************************************************************************/
template <> template <>
const NRVec<double> NRMat<double>::rsum() const { const NRVec<double> NRMat<double>::rsum() const {
NRVec<double> result(mm, getlocation()); NRVec<double> result(nn, getlocation());
result = 0.0; result.clear();
#ifdef CUDALA #ifdef CUDALA
if(location == cpu){ if(location == cpu){
#endif #endif
for(register int i=0;i<nn;i++){ for(register int i=0;i<nn;i++){
cblas_daxpy(mm, 1.0, (*this)[i], 1, result.v, 1); cblas_daxpy(mm, 1.0, (*this)[i], 1, result.v+i, 0);
} }
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
for(register int i=0;i<nn;i++){ for(register int i=0;i<nn;i++){
cublasDaxpy(mm, 1.0, v + i*(size_t)mm, 1, result.v, 1); cublasDaxpy(mm, 1.0, v + i*(size_t)mm, 1, result.v+i, 0);
TEST_CUBLAS("cublasDaxpy"); TEST_CUBLAS("cublasDaxpy");
} }
} }
@ -703,18 +703,18 @@ const NRVec<double> NRMat<double>::rsum() const {
******************************************************************************/ ******************************************************************************/
template <> template <>
const NRVec<std::complex<double> > NRMat<std::complex<double> >::rsum() const { const NRVec<std::complex<double> > NRMat<std::complex<double> >::rsum() const {
NRVec<std::complex<double> > result(mm, getlocation()); NRVec<std::complex<double> > result(nn, getlocation());
result = 0.0; result.clear();
#ifdef CUDALA #ifdef CUDALA
if(location == cpu){ if(location == cpu){
#endif #endif
for(register int i=0;i<nn;i++){ for(register int i=0;i<nn;i++){
cblas_zaxpy(mm, &CONE, (*this)[i], 1, result.v, 1); cblas_zaxpy(mm, &CONE, (*this)[i], 1, result.v+i, 0);
} }
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
for(register int i=0;i<nn;i++){ for(register int i=0;i<nn;i++){
cublasZaxpy(mm, CUONE, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(result.v), 1); cublasZaxpy(mm, CUONE, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(result.v+i), 0);
TEST_CUBLAS("cublasZaxpy"); TEST_CUBLAS("cublasZaxpy");
} }
} }

4
mat.h
View File

@ -253,9 +253,9 @@ public:
//! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$ //! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$
const NRSMat<T> timestransposed() const; const NRSMat<T> timestransposed() const;
//! sum the rows //! sum numbers in the rows (sum over column index)
const NRVec<T> rsum() const; const NRVec<T> rsum() const;
//! sum the columns //! sum numbers in the columns (sum over row index)
const NRVec<T> csum() const; const NRVec<T> csum() const;
//! orthonormalize this matrix //! orthonormalize this matrix

8
t.cc
View File

@ -2461,7 +2461,7 @@ Polynomial<double> pp({1,2,3,4,5});
cout<<pp; cout<<pp;
} }
if(1) if(0)
{ {
//prepare random symmetric mat3 //prepare random symmetric mat3
int seed; int seed;
@ -3153,8 +3153,12 @@ for(int i=0; i<a.size(); ++i)
} }
} }
if(0) if(1)
{ {
NRMat<double> a;
cin>>a;
cout <<a.rsum()<<endl;
cout <<a.csum()<<endl;
} }