bugfix in NRMat:: rsum() and csum()
This commit is contained in:
parent
684c1cde5d
commit
7172e26a50
46
mat.cc
46
mat.cc
@ -593,12 +593,12 @@ const NRMat<T> NRMat<T>::operator|(const NRMat<T> &b) const {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
const NRVec<T> NRMat<T>::csum() const {
|
const NRVec<T> NRMat<T>::csum() const {
|
||||||
NOT_GPU(*this);
|
NOT_GPU(*this);
|
||||||
NRVec<T> result(nn, getlocation());
|
NRVec<T> result(mm, getlocation());
|
||||||
T sum;
|
T sum;
|
||||||
|
|
||||||
for(register int i=0; i<nn; i++) {
|
for(register int i=0; i<mm; i++) {
|
||||||
sum = (T)0;
|
sum = (T)0;
|
||||||
for(int j=0; j<mm; j++) sum += (*this)[i][j];
|
for(int j=0; j<nn; j++) sum += (*this)[j][i];
|
||||||
result[i] = sum;
|
result[i] = sum;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@ -610,18 +610,18 @@ const NRVec<T> NRMat<T>::csum() const {
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <>
|
template <>
|
||||||
const NRVec<double> NRMat<double>::csum() const {
|
const NRVec<double> NRMat<double>::csum() const {
|
||||||
NRVec<double> result(nn, getlocation());
|
NRVec<double> result(mm, getlocation());
|
||||||
result = 0.0;
|
result.clear();
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
for(register int i=0; i<mm; i++){
|
for(register int i=0; i<mm; i++){
|
||||||
cblas_daxpy(nn, 1.0, &((*this)(0, i)), nn, result.v, 1);
|
cblas_daxpy(nn, 1.0, &((*this)(0, i)), mm, result.v+i, 0);
|
||||||
}
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
for(register int i=0; i<mm; i++){
|
for(register int i=0; i<mm; i++){
|
||||||
cublasDaxpy(nn, 1.0, v + i, nn, result.v, 1);
|
cublasDaxpy(nn, 1.0, v + i, mm, result.v+i, 0);
|
||||||
TEST_CUBLAS("cublasDaxpy");
|
TEST_CUBLAS("cublasDaxpy");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -635,18 +635,18 @@ const NRVec<double> NRMat<double>::csum() const {
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <>
|
template <>
|
||||||
const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
|
const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
|
||||||
NRVec<std::complex<double> > result(nn, getlocation());
|
NRVec<std::complex<double> > result(mm, getlocation());
|
||||||
result = 0.0;
|
result.clear();
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
for(register int i=0;i<mm;i++){
|
for(register int i=0;i<mm;i++){
|
||||||
cblas_zaxpy(nn, &CONE, &((*this)(0, i)), nn, result.v, 1);
|
cblas_zaxpy(nn, &CONE, &((*this)(0, i)), mm, result.v+i, 0);
|
||||||
}
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
for(register int i=0;i<nn;i++){
|
for(register int i=0;i<mm;i++){
|
||||||
cublasZaxpy(nn, CUONE, (cuDoubleComplex*)(v + i), nn, (cuDoubleComplex*)(result.v), 1);
|
cublasZaxpy(nn, CUONE, (cuDoubleComplex*)(v + i), mm, (cuDoubleComplex*)(result.v+i), 0);
|
||||||
TEST_CUBLAS("cublasZaxpy");
|
TEST_CUBLAS("cublasZaxpy");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -661,12 +661,12 @@ const NRVec<std::complex<double> > NRMat<std::complex<double> >::csum() const {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
const NRVec<T> NRMat<T>::rsum() const {
|
const NRVec<T> NRMat<T>::rsum() const {
|
||||||
NOT_GPU(*this);
|
NOT_GPU(*this);
|
||||||
NRVec<T> result(mm, getlocation());
|
NRVec<T> result(nn, getlocation());
|
||||||
T sum;
|
T sum;
|
||||||
|
|
||||||
for(register int i=0; i<mm; i++) {
|
for(register int i=0; i<nn; i++) {
|
||||||
sum = (T)0;
|
sum = (T)0;
|
||||||
for(int j=0; j<nn; j++) sum += (*this)[j][i];
|
for(int j=0; j<mm; j++) sum += (*this)[i][j];
|
||||||
result[i] = sum;
|
result[i] = sum;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@ -678,18 +678,18 @@ const NRVec<T> NRMat<T>::rsum() const {
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <>
|
template <>
|
||||||
const NRVec<double> NRMat<double>::rsum() const {
|
const NRVec<double> NRMat<double>::rsum() const {
|
||||||
NRVec<double> result(mm, getlocation());
|
NRVec<double> result(nn, getlocation());
|
||||||
result = 0.0;
|
result.clear();
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
for(register int i=0;i<nn;i++){
|
for(register int i=0;i<nn;i++){
|
||||||
cblas_daxpy(mm, 1.0, (*this)[i], 1, result.v, 1);
|
cblas_daxpy(mm, 1.0, (*this)[i], 1, result.v+i, 0);
|
||||||
}
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
for(register int i=0;i<nn;i++){
|
for(register int i=0;i<nn;i++){
|
||||||
cublasDaxpy(mm, 1.0, v + i*(size_t)mm, 1, result.v, 1);
|
cublasDaxpy(mm, 1.0, v + i*(size_t)mm, 1, result.v+i, 0);
|
||||||
TEST_CUBLAS("cublasDaxpy");
|
TEST_CUBLAS("cublasDaxpy");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -703,18 +703,18 @@ const NRVec<double> NRMat<double>::rsum() const {
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <>
|
template <>
|
||||||
const NRVec<std::complex<double> > NRMat<std::complex<double> >::rsum() const {
|
const NRVec<std::complex<double> > NRMat<std::complex<double> >::rsum() const {
|
||||||
NRVec<std::complex<double> > result(mm, getlocation());
|
NRVec<std::complex<double> > result(nn, getlocation());
|
||||||
result = 0.0;
|
result.clear();
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location == cpu){
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
for(register int i=0;i<nn;i++){
|
for(register int i=0;i<nn;i++){
|
||||||
cblas_zaxpy(mm, &CONE, (*this)[i], 1, result.v, 1);
|
cblas_zaxpy(mm, &CONE, (*this)[i], 1, result.v+i, 0);
|
||||||
}
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}else{
|
}else{
|
||||||
for(register int i=0;i<nn;i++){
|
for(register int i=0;i<nn;i++){
|
||||||
cublasZaxpy(mm, CUONE, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(result.v), 1);
|
cublasZaxpy(mm, CUONE, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(result.v+i), 0);
|
||||||
TEST_CUBLAS("cublasZaxpy");
|
TEST_CUBLAS("cublasZaxpy");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
4
mat.h
4
mat.h
@ -253,9 +253,9 @@ public:
|
|||||||
//! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$
|
//! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$
|
||||||
const NRSMat<T> timestransposed() const;
|
const NRSMat<T> timestransposed() const;
|
||||||
|
|
||||||
//! sum the rows
|
//! sum numbers in the rows (sum over column index)
|
||||||
const NRVec<T> rsum() const;
|
const NRVec<T> rsum() const;
|
||||||
//! sum the columns
|
//! sum numbers in the columns (sum over row index)
|
||||||
const NRVec<T> csum() const;
|
const NRVec<T> csum() const;
|
||||||
|
|
||||||
//! orthonormalize this matrix
|
//! orthonormalize this matrix
|
||||||
|
8
t.cc
8
t.cc
@ -2461,7 +2461,7 @@ Polynomial<double> pp({1,2,3,4,5});
|
|||||||
cout<<pp;
|
cout<<pp;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(1)
|
if(0)
|
||||||
{
|
{
|
||||||
//prepare random symmetric mat3
|
//prepare random symmetric mat3
|
||||||
int seed;
|
int seed;
|
||||||
@ -3153,8 +3153,12 @@ for(int i=0; i<a.size(); ++i)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(0)
|
if(1)
|
||||||
{
|
{
|
||||||
|
NRMat<double> a;
|
||||||
|
cin>>a;
|
||||||
|
cout <<a.rsum()<<endl;
|
||||||
|
cout <<a.csum()<<endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user