From 9dd4b1897601bfccb71744960f464131dc2e7520 Mon Sep 17 00:00:00 2001
From: jiri <jiri>
Date: Wed, 7 Nov 2018 18:21:59 +0000
Subject: [PATCH] *** empty log message ***

---
 ChangeLog |   9 ++
 mat.cc    | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 mat.h     |  10 ++-
 3 files changed, 251 insertions(+), 13 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index a559cf5..3355ecc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+07.11.2018 NRMat nonsymmetry and nonhermiticity
+03.11.2018 bugfix in bitvector class
+25.07.2018 minor bugfix and improvement in davidson.h
+12.07.2018 bugfix in fourindex_dense<antisymtwoelectronrealdirac,T,DUMMY>::operator()
+11.07.2018 added new symmetry type to fourindex.h
+06.11.2017 smat.cc fixed complex lvalue problem for g++ 6.4.0
+25.10.2017 bitvector improved input from stream
+27.09.2017 DIIS extended to optionally compute extrapolated error vector
+08.11.2016 implemented swap_rows(i,j) and swap_cols(i,j) for NRMat
 28.06.2016 minor changes in Makefile.am to work smoothly with recent autotools
 28.06.2016 fixed determinant sign issue due to dgesv ipiv counting from 1 in nonclass.cc
 11.11.2015 autotool files changed to support separate only-static ATLAS libraries
diff --git a/mat.cc b/mat.cc
index 62a9f5e..b7fdfa9 100644
--- a/mat.cc
+++ b/mat.cc
@@ -828,7 +828,7 @@ NRMat<T>& NRMat<T>::transposeme(const int _n) {
 		}
 #ifdef CUDALA
 	}else{
-
+laerror("transposeme not implemented on GPU yet");
 
 	}
 #endif
@@ -836,6 +836,108 @@ NRMat<T>& NRMat<T>::transposeme(const int _n) {
 }
 
 
+/***************************************************************************//**
+ * compute matrix non-symmetry
+ ******************************************************************************/
+template <typename T>
+const typename  LA_traits<T>::normtype  NRMat<T>::nonsymmetry() const {
+#ifdef DEBUG
+	if (nn != mm) laerror("NRMat<T>:nonsymmetry() invalid for non-square matrix");
+#endif
+typename  LA_traits<T>::normtype sum = 0;
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+		for(register int i=1; i<nn; i++){
+			for(register int j=0; j<i; j++){
+			#ifdef MATPTR
+				sum += (v[i][j]-v[j][i])*(v[i][j]-v[j][i]); 
+			#else
+				register int a, b; 
+				a = i*(size_t)mm + j;
+				b = j*(size_t)mm + i;
+				sum += (v[a] - v[b])*(v[a] - v[b]);
+			#endif
+			}
+		}
+#ifdef CUDALA
+	}else{
+laerror("nonsymmetry not implemented on GPU yet");
+
+	}
+#endif
+	return sum;
+}
+
+/***************************************************************************//**
+ * compute matrix non-hermiticity
+ ******************************************************************************/
+template <>
+const double NRMat<complex<double> >::nonhermiticity() const {
+#ifdef DEBUG
+	if (nn != mm) laerror("NRMat<T>:nonsymmetry() invalid for non-square matrix");
+#endif
+double sum = 0;
+complex<double> tmp;
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+		for(register int i=1; i<nn; i++){
+			for(register int j=0; j<=i; j++){
+			#ifdef MATPTR
+				tmp = complex<double> (v[i][j].real()-v[j][i].real(),v[i][j].imag()+v[j][i].imag()); 
+			#else
+				register int a, b; 
+				a = i*(size_t)mm + j;
+				b = j*(size_t)mm + i;
+				tmp = complex<double> (v[a].real() - v[b].real(), v[a].imag()+v[b].imag());
+			#endif
+			sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
+			}
+		}
+#ifdef CUDALA
+	}else{
+laerror("nonsymmetry not implemented on GPU yet");
+
+	}
+#endif
+	return sum;
+}
+
+template <>
+const double NRMat<complex<double> >::nonsymmetry() const {
+#ifdef DEBUG
+        if (nn != mm) laerror("NRMat<T>:nonsymmetry() invalid for non-square matrix");
+#endif
+double sum = 0;
+complex<double> tmp;
+#ifdef CUDALA
+        if(location == cpu){
+#endif
+                for(register int i=1; i<nn; i++){
+                        for(register int j=0; j<i; j++){
+                        #ifdef MATPTR
+                                tmp = v[i][j]-v[j][i];
+                        #else
+                                register int a, b;
+                                a = i*(size_t)mm + j;
+                                b = j*(size_t)mm + i;
+                                tmp = v[a] - v[b];
+                        #endif
+                        sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
+                        }
+                }
+#ifdef CUDALA
+        }else{
+laerror("nonsymmetry not implemented on GPU yet");
+
+        }
+#endif
+        return sum;
+}
+
+
+
 /***************************************************************************//**
  * create complex double-precision matrix from real double-precision matrix \f$A\f$
  * @param[in] rhs real double-precision matrix \f$A\f$
@@ -1962,7 +2064,7 @@ NRMat<double>& NRMat<double>::conjugateme() {
 }
 
 /***************************************************************************//**
- * conjugate this complex matrix \f$A\f$, i.e. do nothing :-)
+ * conjugate this complex matrix \f$A\f$, or leading minor of size n
  * @return reference to the modified matrix
  ******************************************************************************/
 template<>
@@ -2337,9 +2439,9 @@ const double* NRMat<double>::diagonalof(NRVec<double> &r, const bool divide, boo
  ******************************************************************************/
 template<>
 void NRMat<double>::diagonalset(const NRVec<double> &r) {
+int nnmin= nn<=mm?nn:mm;
 #ifdef DEBUG
-	if(r.size() != nn) laerror("incompatible vectors int NRMat<double>::diagonalset(...)");
-	if(nn != mm) laerror("NRMat<double>::diagonalset(...) can be used only for square matrices");
+	if(r.size() != nnmin) laerror("incompatible vectors int NRMat<double>::diagonalset(...)");
 #endif
 
 	SAME_LOC(*this, r);
@@ -2350,14 +2452,14 @@ void NRMat<double>::diagonalset(const NRVec<double> &r) {
 #endif
 
 #ifdef MATPTR
-	for (int i=0; i<nn; i++) v[i][i] = r[i];
+	for (int i=0; i<nnmin; i++) v[i][i] = r[i];
 #else
-	cblas_dcopy(nn, r.v, 1, v, nn+1); //{int i,j; for (i=j=0; j< nn; ++j, i+=nn+1) v[i] = r[j];}
+	cblas_dcopy(nnmin, r.v, 1, v, mm+1); //{int i,j; for (i=j=0; j< nnmin; ++j, i+=mm+1) v[i] = r[j];}
 #endif
 
 #ifdef CUDALA
 	}else{
-		cublasDcopy(nn, r.v, 1, v, nn+1);
+		cublasDcopy(nnmin, r.v, 1, v, mm+1);
 	}
 #endif
 }
@@ -2369,9 +2471,9 @@ void NRMat<double>::diagonalset(const NRVec<double> &r) {
  ******************************************************************************/
 template<>
 void NRMat<complex<double> >::diagonalset(const NRVec<complex<double> > &r) {
+int nnmin= nn<=mm?nn:mm;
 #ifdef DEBUG
-	if(r.size() != nn) laerror("incompatible vectors int NRMat<complex<double> >::diagonalset(...)");
-	if(nn != mm) laerror("NRMat<complex<double> >::diagonalset(...) can be used only for square matrices");
+	if(r.size() != nnmin) laerror("incompatible vectors int NRMat<complex<double> >::diagonalset(...)");
 #endif
 	SAME_LOC(*this, r);
 	copyonwrite();
@@ -2380,13 +2482,13 @@ void NRMat<complex<double> >::diagonalset(const NRVec<complex<double> > &r) {
 	if(location == cpu){
 #endif
 	#ifdef MATPTR
-		for (int i=0; i<nn; i++) v[i][i] = r[i];
+		for (int i=0; i<nnmin; i++) v[i][i] = r[i];
 	#else
-		cblas_zcopy(nn, r.v, 1, v, nn+1);//{int i,j; for (i=j=0; j<nn; ++j, i+=nn+1) v[i] = r[j];}
+		cblas_zcopy(nnmin, r.v, 1, v, mm+1);//{int i,j; for (i=j=0; j<nnmin; ++j, i+=mm+1) v[i] = r[j];}
 	#endif
 #ifdef CUDALA
 	}else{
-		cublasZcopy(nn, (cuDoubleComplex*)(r.v), 1, (cuDoubleComplex*)(this->v), 1);
+		cublasZcopy(nnmin, (cuDoubleComplex*)(r.v), 1, (cuDoubleComplex*)(this->v), mm+1);
 	}
 #endif
 }
@@ -2559,6 +2661,23 @@ NRMat<double>& NRMat<double>::swap_rows(){
         return *this;
 }
 
+template<>
+NRMat<double>& NRMat<double>::swap_rows(const int i, const int j){
+        copyonwrite();
+
+#ifdef CUDALA
+	if(location == cpu){
+#endif		
+		        cblas_dswap(mm, (*this)[i], 1, (*this)[j], 1);
+#ifdef CUDALA
+	}else{
+		        cublasDswap(mm, v + i*(size_t)mm, 1, v + j*mm, 1);
+			TEST_CUBLAS("cublasDswap");
+	}
+#endif	
+        return *this;
+}
+
 /***************************************************************************//**
  * interchange the order of the rows of the current (complex) matrix
  * @return reference to the modified matrix
@@ -2585,6 +2704,23 @@ NRMat<complex<double> >& NRMat<complex<double> >::swap_rows(){
         return *this;
 }
 
+template<>
+NRMat<complex<double> >& NRMat<complex<double> >::swap_rows(const int i, const int j){
+        copyonwrite();
+
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+		        cblas_zswap(mm, (*this)[i], 1, (*this)[j], 1);
+#ifdef CUDALA
+	}else{
+		        cublasZswap(mm, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(v + j*(size_t)mm), 1);
+			TEST_CUBLAS("cublasZswap");
+	}
+#endif
+        return *this;
+}
+
 /***************************************************************************//**
  * interchange the order of the rows of the current general matrix of type T
  * for GPU computations, the condition sizeof(T)%sizeof(float) is required
@@ -2643,6 +2779,24 @@ NRMat<double>& NRMat<double>::swap_cols(){
 #endif	
 	return *this;
 }
+
+template<>
+NRMat<double>& NRMat<double>::swap_cols(const int i, const int j){
+        copyonwrite();
+
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+			cblas_dswap(nn, &((*this)(0, i)), mm, &((*this)(0, j)), mm);
+#ifdef CUDALA
+	}else{
+			cublasDswap(nn, v + i, mm, v + j, mm);
+			TEST_CUBLAS("cublasDswap");
+	}
+#endif	
+	return *this;
+}
+
 /***************************************************************************//**
  * interchange the order of the columns of the current (complex) matrix
  * @return reference to the modified matrix
@@ -2669,6 +2823,23 @@ NRMat<complex<double> >& NRMat<complex<double> >::swap_cols(){
 	return *this;
 }
 
+template<>
+NRMat<complex<double> >& NRMat<complex<double> >::swap_cols(const int i, const int j){
+        copyonwrite();
+
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+			cblas_zswap(nn, &((*this)(0, i)), mm, &((*this)(0, j)), mm);
+#ifdef CUDALA
+	}else{
+			cublasZswap(nn, (cuDoubleComplex*)(v + i), mm, (cuDoubleComplex*)(v + j), mm);
+			TEST_CUBLAS("cublasZswap");
+	}
+#endif	
+	return *this;
+}
+
 /***************************************************************************//**
  * interchange the order of the columns of the current general matrix of type T
  * because of the cuBlas implementation, the GPU version requires that
@@ -2704,6 +2875,56 @@ NRMat<T>& NRMat<T>::swap_cols(){
         return *this;
 }
 
+/*interchange two columns*/
+template<typename T>
+NRMat<T>& NRMat<T>::swap_cols(const int a, const int b){
+	T tmp;
+        copyonwrite();
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+			for(register int j=0;j<nn;j++){
+				tmp = (*this)(j, a);
+				(*this)(j, a) = (*this)(j,b);
+				(*this)(j,b) = tmp;
+			}
+#ifdef CUDALA
+	}else{
+		if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem in NRMat<T>::swap_cols");
+		        cublasSswap(nn*sizeof(T)/sizeof(float), 
+					(float *)(v + a), mm*sizeof(T)/sizeof(float), 
+					(float *)(v + b), mm*sizeof(T)/sizeof(float) );
+			TEST_CUBLAS("cublasSswap");
+	}
+#endif	
+        return *this;
+}
+
+/*interchange two rows*/
+template<typename T>
+NRMat<T>& NRMat<T>::swap_rows(const int a, const int b){
+	T tmp;
+        copyonwrite();
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+			for(register int j=0;j<mm;j++){
+				tmp = (*this)(a,j);
+				(*this)(a,j) = (*this)(b,j);
+				(*this)(b,j) = tmp;
+			}
+#ifdef CUDALA
+	}else{
+		if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem in NRMat<T>::swap_rows");
+		        cublasSswap(nn*sizeof(T)/sizeof(float), 
+					(float *)(v + a*mm), sizeof(T)/sizeof(float), 
+					(float *)(v + b*mm), sizeof(T)/sizeof(float) );
+			TEST_CUBLAS("cublasSswap");
+	}
+#endif	
+        return *this;
+}
+
 /***************************************************************************//**
  * interchange the order of the rows and columns of the current
  * real matrix \f$A\f$ of type T, i.e. perform the operation
diff --git a/mat.h b/mat.h
index f950ea6..8a997a4 100644
--- a/mat.h
+++ b/mat.h
@@ -330,6 +330,11 @@ public:
 	//! swap the order of the rows and columns of the current matrix
 	NRMat & swap_rows_cols();
 
+	// LV - swapping of columns i and j
+	NRMat & swap_cols(const int i, const int j);
+	// LV - swapping of rows i and j
+	NRMat & swap_rows(const int i, const int j);
+
 	//! multiply by sparse matrix
 	SparseSMat<T> operator*(const SparseSMat<T> &rhs) const;
 
@@ -351,6 +356,9 @@ public:
         inline void simplify() {};
 	bool issymmetric() const { return 0; };
 
+	const typename  LA_traits<T>::normtype  nonsymmetry() const;
+	const typename  LA_traits<T>::normtype  nonhermiticity() const;
+
 #ifndef NO_STRASSEN
 	//! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat<T>::gemm() )
 	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);
@@ -579,7 +587,7 @@ template <typename T>
 NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset)
 {
 	if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
-
+	if(offset!=0) std::cout << "dangerous: if the underlying vector is deallocated before the matrix, wrong delete[] will result for nonzero offset!!!\n";
 #ifdef CUDALA
 	location=rhs.location;
 #endif