*** empty log message ***

This commit is contained in:
jiri 2018-11-07 18:21:59 +00:00
parent de49120e1b
commit 9dd4b18976
3 changed files with 251 additions and 13 deletions

View File

@ -1,3 +1,12 @@
07.11.2018 NRMat nonsymmetry and nonhermiticity
03.11.2018 bugfix in bitvector class
25.07.2018 minor bugfix and improvement in davidson.h
12.07.2018 bugfix in fourindex_dense<antisymtwoelectronrealdirac,T,DUMMY>::operator()
11.07.2018 added new symmetry type to fourindex.h
06.11.2017 smat.cc fixed complex lvalue problem for g++ 6.4.0
25.10.2017 bitvector improved input from stream
27.09.2017 DIIS extended to optionally compute extrapolated error vector
08.11.2016 implemented swap_rows(i,j) and swap_cols(i,j) for NRMat
28.06.2016 minor changes in Makefile.am to work smoothly with recent autotools 28.06.2016 minor changes in Makefile.am to work smoothly with recent autotools
28.06.2016 fixed determinant sign issue due to dgesv ipiv counting from 1 in nonclass.cc 28.06.2016 fixed determinant sign issue due to dgesv ipiv counting from 1 in nonclass.cc
11.11.2015 autotool files changed to support separate only-static ATLAS libraries 11.11.2015 autotool files changed to support separate only-static ATLAS libraries

245
mat.cc
View File

@ -828,7 +828,7 @@ NRMat<T>& NRMat<T>::transposeme(const int _n) {
} }
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
laerror("transposeme not implemented on GPU yet");
} }
#endif #endif
@ -836,6 +836,108 @@ NRMat<T>& NRMat<T>::transposeme(const int _n) {
} }
/***************************************************************************//**
* compute matrix non-symmetry
******************************************************************************/
template <typename T>
const typename LA_traits<T>::normtype NRMat<T>::nonsymmetry() const {
#ifdef DEBUG
if (nn != mm) laerror("NRMat<T>:nonsymmetry() invalid for non-square matrix");
#endif
typename LA_traits<T>::normtype sum = 0;
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=1; i<nn; i++){
for(register int j=0; j<i; j++){
#ifdef MATPTR
sum += (v[i][j]-v[j][i])*(v[i][j]-v[j][i]);
#else
register int a, b;
a = i*(size_t)mm + j;
b = j*(size_t)mm + i;
sum += (v[a] - v[b])*(v[a] - v[b]);
#endif
}
}
#ifdef CUDALA
}else{
laerror("nonsymmetry not implemented on GPU yet");
}
#endif
return sum;
}
/***************************************************************************//**
* compute matrix non-hermiticity
******************************************************************************/
template <>
const double NRMat<complex<double> >::nonhermiticity() const {
#ifdef DEBUG
if (nn != mm) laerror("NRMat<T>:nonsymmetry() invalid for non-square matrix");
#endif
double sum = 0;
complex<double> tmp;
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=1; i<nn; i++){
for(register int j=0; j<=i; j++){
#ifdef MATPTR
tmp = complex<double> (v[i][j].real()-v[j][i].real(),v[i][j].imag()+v[j][i].imag());
#else
register int a, b;
a = i*(size_t)mm + j;
b = j*(size_t)mm + i;
tmp = complex<double> (v[a].real() - v[b].real(), v[a].imag()+v[b].imag());
#endif
sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
}
}
#ifdef CUDALA
}else{
laerror("nonsymmetry not implemented on GPU yet");
}
#endif
return sum;
}
template <>
const double NRMat<complex<double> >::nonsymmetry() const {
#ifdef DEBUG
if (nn != mm) laerror("NRMat<T>:nonsymmetry() invalid for non-square matrix");
#endif
double sum = 0;
complex<double> tmp;
#ifdef CUDALA
if(location == cpu){
#endif
for(register int i=1; i<nn; i++){
for(register int j=0; j<i; j++){
#ifdef MATPTR
tmp = v[i][j]-v[j][i];
#else
register int a, b;
a = i*(size_t)mm + j;
b = j*(size_t)mm + i;
tmp = v[a] - v[b];
#endif
sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
}
}
#ifdef CUDALA
}else{
laerror("nonsymmetry not implemented on GPU yet");
}
#endif
return sum;
}
/***************************************************************************//** /***************************************************************************//**
* create complex double-precision matrix from real double-precision matrix \f$A\f$ * create complex double-precision matrix from real double-precision matrix \f$A\f$
* @param[in] rhs real double-precision matrix \f$A\f$ * @param[in] rhs real double-precision matrix \f$A\f$
@ -1962,7 +2064,7 @@ NRMat<double>& NRMat<double>::conjugateme() {
} }
/***************************************************************************//** /***************************************************************************//**
* conjugate this complex matrix \f$A\f$, i.e. do nothing :-) * conjugate this complex matrix \f$A\f$, or leading minor of size n
* @return reference to the modified matrix * @return reference to the modified matrix
******************************************************************************/ ******************************************************************************/
template<> template<>
@ -2337,9 +2439,9 @@ const double* NRMat<double>::diagonalof(NRVec<double> &r, const bool divide, boo
******************************************************************************/ ******************************************************************************/
template<> template<>
void NRMat<double>::diagonalset(const NRVec<double> &r) { void NRMat<double>::diagonalset(const NRVec<double> &r) {
int nnmin= nn<=mm?nn:mm;
#ifdef DEBUG #ifdef DEBUG
if(r.size() != nn) laerror("incompatible vectors int NRMat<double>::diagonalset(...)"); if(r.size() != nnmin) laerror("incompatible vectors int NRMat<double>::diagonalset(...)");
if(nn != mm) laerror("NRMat<double>::diagonalset(...) can be used only for square matrices");
#endif #endif
SAME_LOC(*this, r); SAME_LOC(*this, r);
@ -2350,14 +2452,14 @@ void NRMat<double>::diagonalset(const NRVec<double> &r) {
#endif #endif
#ifdef MATPTR #ifdef MATPTR
for (int i=0; i<nn; i++) v[i][i] = r[i]; for (int i=0; i<nnmin; i++) v[i][i] = r[i];
#else #else
cblas_dcopy(nn, r.v, 1, v, nn+1); //{int i,j; for (i=j=0; j< nn; ++j, i+=nn+1) v[i] = r[j];} cblas_dcopy(nnmin, r.v, 1, v, mm+1); //{int i,j; for (i=j=0; j< nnmin; ++j, i+=mm+1) v[i] = r[j];}
#endif #endif
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
cublasDcopy(nn, r.v, 1, v, nn+1); cublasDcopy(nnmin, r.v, 1, v, mm+1);
} }
#endif #endif
} }
@ -2369,9 +2471,9 @@ void NRMat<double>::diagonalset(const NRVec<double> &r) {
******************************************************************************/ ******************************************************************************/
template<> template<>
void NRMat<complex<double> >::diagonalset(const NRVec<complex<double> > &r) { void NRMat<complex<double> >::diagonalset(const NRVec<complex<double> > &r) {
int nnmin= nn<=mm?nn:mm;
#ifdef DEBUG #ifdef DEBUG
if(r.size() != nn) laerror("incompatible vectors int NRMat<complex<double> >::diagonalset(...)"); if(r.size() != nnmin) laerror("incompatible vectors int NRMat<complex<double> >::diagonalset(...)");
if(nn != mm) laerror("NRMat<complex<double> >::diagonalset(...) can be used only for square matrices");
#endif #endif
SAME_LOC(*this, r); SAME_LOC(*this, r);
copyonwrite(); copyonwrite();
@ -2380,13 +2482,13 @@ void NRMat<complex<double> >::diagonalset(const NRVec<complex<double> > &r) {
if(location == cpu){ if(location == cpu){
#endif #endif
#ifdef MATPTR #ifdef MATPTR
for (int i=0; i<nn; i++) v[i][i] = r[i]; for (int i=0; i<nnmin; i++) v[i][i] = r[i];
#else #else
cblas_zcopy(nn, r.v, 1, v, nn+1);//{int i,j; for (i=j=0; j<nn; ++j, i+=nn+1) v[i] = r[j];} cblas_zcopy(nnmin, r.v, 1, v, mm+1);//{int i,j; for (i=j=0; j<nnmin; ++j, i+=mm+1) v[i] = r[j];}
#endif #endif
#ifdef CUDALA #ifdef CUDALA
}else{ }else{
cublasZcopy(nn, (cuDoubleComplex*)(r.v), 1, (cuDoubleComplex*)(this->v), 1); cublasZcopy(nnmin, (cuDoubleComplex*)(r.v), 1, (cuDoubleComplex*)(this->v), mm+1);
} }
#endif #endif
} }
@ -2559,6 +2661,23 @@ NRMat<double>& NRMat<double>::swap_rows(){
return *this; return *this;
} }
template<>
NRMat<double>& NRMat<double>::swap_rows(const int i, const int j){
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_dswap(mm, (*this)[i], 1, (*this)[j], 1);
#ifdef CUDALA
}else{
cublasDswap(mm, v + i*(size_t)mm, 1, v + j*mm, 1);
TEST_CUBLAS("cublasDswap");
}
#endif
return *this;
}
/***************************************************************************//** /***************************************************************************//**
* interchange the order of the rows of the current (complex) matrix * interchange the order of the rows of the current (complex) matrix
* @return reference to the modified matrix * @return reference to the modified matrix
@ -2585,6 +2704,23 @@ NRMat<complex<double> >& NRMat<complex<double> >::swap_rows(){
return *this; return *this;
} }
template<>
NRMat<complex<double> >& NRMat<complex<double> >::swap_rows(const int i, const int j){
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zswap(mm, (*this)[i], 1, (*this)[j], 1);
#ifdef CUDALA
}else{
cublasZswap(mm, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(v + j*(size_t)mm), 1);
TEST_CUBLAS("cublasZswap");
}
#endif
return *this;
}
/***************************************************************************//** /***************************************************************************//**
* interchange the order of the rows of the current general matrix of type T * interchange the order of the rows of the current general matrix of type T
* for GPU computations, the condition sizeof(T)%sizeof(float) is required * for GPU computations, the condition sizeof(T)%sizeof(float) is required
@ -2643,6 +2779,24 @@ NRMat<double>& NRMat<double>::swap_cols(){
#endif #endif
return *this; return *this;
} }
template<>
NRMat<double>& NRMat<double>::swap_cols(const int i, const int j){
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_dswap(nn, &((*this)(0, i)), mm, &((*this)(0, j)), mm);
#ifdef CUDALA
}else{
cublasDswap(nn, v + i, mm, v + j, mm);
TEST_CUBLAS("cublasDswap");
}
#endif
return *this;
}
/***************************************************************************//** /***************************************************************************//**
* interchange the order of the columns of the current (complex) matrix * interchange the order of the columns of the current (complex) matrix
* @return reference to the modified matrix * @return reference to the modified matrix
@ -2669,6 +2823,23 @@ NRMat<complex<double> >& NRMat<complex<double> >::swap_cols(){
return *this; return *this;
} }
template<>
NRMat<complex<double> >& NRMat<complex<double> >::swap_cols(const int i, const int j){
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zswap(nn, &((*this)(0, i)), mm, &((*this)(0, j)), mm);
#ifdef CUDALA
}else{
cublasZswap(nn, (cuDoubleComplex*)(v + i), mm, (cuDoubleComplex*)(v + j), mm);
TEST_CUBLAS("cublasZswap");
}
#endif
return *this;
}
/***************************************************************************//** /***************************************************************************//**
* interchange the order of the columns of the current general matrix of type T * interchange the order of the columns of the current general matrix of type T
* because of the cuBlas implementation, the GPU version requires that * because of the cuBlas implementation, the GPU version requires that
@ -2704,6 +2875,56 @@ NRMat<T>& NRMat<T>::swap_cols(){
return *this; return *this;
} }
/*interchange two columns*/
template<typename T>
NRMat<T>& NRMat<T>::swap_cols(const int a, const int b){
T tmp;
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
for(register int j=0;j<nn;j++){
tmp = (*this)(j, a);
(*this)(j, a) = (*this)(j,b);
(*this)(j,b) = tmp;
}
#ifdef CUDALA
}else{
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem in NRMat<T>::swap_cols");
cublasSswap(nn*sizeof(T)/sizeof(float),
(float *)(v + a), mm*sizeof(T)/sizeof(float),
(float *)(v + b), mm*sizeof(T)/sizeof(float) );
TEST_CUBLAS("cublasSswap");
}
#endif
return *this;
}
/*interchange two rows*/
template<typename T>
NRMat<T>& NRMat<T>::swap_rows(const int a, const int b){
T tmp;
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
for(register int j=0;j<mm;j++){
tmp = (*this)(a,j);
(*this)(a,j) = (*this)(b,j);
(*this)(b,j) = tmp;
}
#ifdef CUDALA
}else{
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem in NRMat<T>::swap_rows");
cublasSswap(nn*sizeof(T)/sizeof(float),
(float *)(v + a*mm), sizeof(T)/sizeof(float),
(float *)(v + b*mm), sizeof(T)/sizeof(float) );
TEST_CUBLAS("cublasSswap");
}
#endif
return *this;
}
/***************************************************************************//** /***************************************************************************//**
* interchange the order of the rows and columns of the current * interchange the order of the rows and columns of the current
* real matrix \f$A\f$ of type T, i.e. perform the operation * real matrix \f$A\f$ of type T, i.e. perform the operation

10
mat.h
View File

@ -330,6 +330,11 @@ public:
//! swap the order of the rows and columns of the current matrix //! swap the order of the rows and columns of the current matrix
NRMat & swap_rows_cols(); NRMat & swap_rows_cols();
// LV - swapping of columns i and j
NRMat & swap_cols(const int i, const int j);
// LV - swapping of rows i and j
NRMat & swap_rows(const int i, const int j);
//! multiply by sparse matrix //! multiply by sparse matrix
SparseSMat<T> operator*(const SparseSMat<T> &rhs) const; SparseSMat<T> operator*(const SparseSMat<T> &rhs) const;
@ -351,6 +356,9 @@ public:
inline void simplify() {}; inline void simplify() {};
bool issymmetric() const { return 0; }; bool issymmetric() const { return 0; };
const typename LA_traits<T>::normtype nonsymmetry() const;
const typename LA_traits<T>::normtype nonhermiticity() const;
#ifndef NO_STRASSEN #ifndef NO_STRASSEN
//! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat<T>::gemm() ) //! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat<T>::gemm() )
void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha); void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);
@ -579,7 +587,7 @@ template <typename T>
NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset) NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset)
{ {
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length"); if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
if(offset!=0) std::cout << "dangerous: if the underlying vector is deallocated before the matrix, wrong delete[] will result for nonzero offset!!!\n";
#ifdef CUDALA #ifdef CUDALA
location=rhs.location; location=rhs.location;
#endif #endif