implemented complex conjugation method where it was not available yet

2024-05-03 16:56:21 +02:00 · 2024-05-03 16:56:21 +02:00 · 3ba6d03eee
commit 3ba6d03eee
parent 518c75fb20
8 changed files with 129 additions and 11 deletions
--- a/mat.cc
+++ b/mat.cc
@ -2078,11 +2078,15 @@ NRMat< std::complex<double> >::operator*(const NRSMat< std::complex<double> > &r
 /***************************************************************************//**
- * conjugate this non-complex matrix \f$A\f$, i.e. do nothing :-)
+ * conjugate this matrix
 * @return reference to the (unmodified) matrix
 ******************************************************************************/
 template<typename T>
 NRMat<T>& NRMat<T>::conjugateme() {
 #ifdef CUDALA
        if(location != cpu) laerror("general conjugation only on CPU");
 #endif          
 	for(int i=0; i<nn; ++i) for(int j=0; j<mm; ++j) (*this)(i,j) = LA_traits<T>::conjugate((*this)(i,j));
 	return *this;
 }
--- a/mat.h
+++ b/mat.h
@ -333,7 +333,7 @@ public:
 	//! in case of square matrix, transpose the leading minor of order <code>n</code>
 	NRMat& transposeme(const int n = 0);
-	//! conjugate a square matrix
+	//! conjugate a  matrix
 	NRMat& conjugateme();
 	//! transpose this matrix and return the result by value
--- a/smat.cc
+++ b/smat.cc
@ -866,6 +866,58 @@ NRSMat<std::complex<double> >  NRSMat<std::complex<double> >::inverse() {return
 /***************************************************************************//**
 * conjugate this general matrix
 * @return reference to the (unmodified) matrix
 ******************************************************************************/
 template<typename T>
 NRSMat<T>& NRSMat<T>::conjugateme() {
 #ifdef CUDALA
        if(location != cpu) laerror("general conjugation only on CPU");
 #endif
 	for(int i=0; i<NN2; ++i) v[i] =  LA_traits<T>::conjugate(v[i]);
        return *this;
 }
 /***************************************************************************//**
 * conjugate this complex matrix
 * @return reference to the modified matrix
 ******************************************************************************/
 template<>
 NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::conjugateme() {
        copyonwrite();
 #ifdef CUDALA
        if(location == cpu){
 #endif
                cblas_dscal((size_t)NN2, -1.0, ((double *)v) + 1, 2);
 #ifdef CUDALA
        }else{
                cublasDscal((size_t)NN2, -1.0, ((double *)v) + 1, 2);
        }
 #endif
        return *this;
 }
 template<>
 NRSMat<std::complex<float> >& NRSMat<std::complex<float> >::conjugateme() {
        copyonwrite();
 #ifdef CUDALA
        if(location == cpu){
 #endif
                cblas_sscal((size_t)NN2, -1.0, ((float *)v) + 1, 2);
 #ifdef CUDALA
        }else{
                cublasSscal((size_t)NN2, -1.0, ((float *)v) + 1, 2);
        }
 #endif
        return *this;
 }
 /***************************************************************************//**
 * forced instantization in the corresponding object file
--- a/smat.h
+++ b/smat.h
@ -99,6 +99,10 @@ public:
 	//! inverse matrix
        NRSMat inverse();
 	//! conjugate a  matrix
        NRSMat& conjugateme();
        const NRSMat conjugate() const {NRSMat r(*this); r.conjugateme(); return r;};
 	//! permute matrix elements
        const NRSMat permuted(const NRPerm<int> &p, const bool inverse=false) const;
--- a/tensor.cc
+++ b/tensor.cc
@ -647,26 +647,26 @@ return r;
 template<typename T>
-static void auxmatmult(int nn, int mm, int kk, T *r, T *a, T *b,  T alpha=1, T beta=0) //R(nn,mm) = A * B^T
+static void auxmatmult(int nn, int mm, int kk, T *r, T *a, T *b,  T alpha=1, T beta=0, bool conjugate=false) //R(nn,mm) = A * B^T
 {
 for(int i=0; i<nn; ++i) for(int j=0; j<mm; ++j)
 	{
 	if(beta==0)  r[i*mm+j]=0; else r[i*mm+j] *= beta;
-	for(int k=0; k<kk; ++k) r[i*mm+j] += alpha * a[i*kk+k] * b[j*kk+k];
+	for(int k=0; k<kk; ++k) r[i*mm+j] += alpha * a[i*kk+k] * conjugate? LA_traits<T>::conjugate(b[j*kk+k]) : b[j*kk+k];
 	}
 }
 template<>
-void auxmatmult<double>(int nn, int mm, int kk, double *r, double *a, double *b, double alpha, double beta)
+void auxmatmult<double>(int nn, int mm, int kk, double *r, double *a, double *b, double alpha, double beta, bool conjugate)
 {
 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nn, mm, kk, alpha, a, kk, b, kk, beta, r, mm);
 }
 template<>
-void auxmatmult<std::complex<double> >(int nn, int mm, int kk, std::complex<double> *r, std::complex<double> *a, std::complex<double> *b,  std::complex<double> alpha,  std::complex<double> beta)
+void auxmatmult<std::complex<double> >(int nn, int mm, int kk, std::complex<double> *r, std::complex<double> *a, std::complex<double> *b,  std::complex<double> alpha,  std::complex<double> beta, bool conjugate)
 {
-cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nn, mm, kk, &alpha, a, kk, b, kk, &beta, r, mm);
+cblas_zgemm(CblasRowMajor, CblasNoTrans, (conjugate?CblasConjTrans:CblasTrans), nn, mm, kk, &alpha, a, kk, b, kk, &beta, r, mm);
 }
@ -679,7 +679,7 @@ cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nn, mm, kk, &alpha, a, kk,
 //The index unwinding is unfortunately a big burden, and in principle could be eliminated in case of non-symmetric indices
 //
 template<typename T>
-void Tensor<T>::addcontraction(const Tensor &rhs1, int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha, T beta, bool doresize)
+void Tensor<T>::addcontraction(const Tensor &rhs1, int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha, T beta, bool doresize, bool conjugate)
 {
 if(group<0||group>=rhs1.shape.size()) laerror("wrong group number in contraction");
 if(rhsgroup<0||rhsgroup>=rhs.shape.size()) laerror("wrong rhsgroup number in contraction");
@ -711,7 +711,7 @@ kk=u.groupsizes[0];
 if(kk!=rhsu.groupsizes[0]) laerror("internal error in contraction");
 nn=1; for(int i=1; i<u.shape.size(); ++i) nn*= u.groupsizes[i];
 mm=1; for(int i=1; i<rhsu.shape.size(); ++i) mm*= rhsu.groupsizes[i];
-auxmatmult<T>(nn,mm,kk,&data[0],&u.data[0], &rhsu.data[0],alpha,beta);
+auxmatmult<T>(nn,mm,kk,&data[0],&u.data[0], &rhsu.data[0],alpha,beta,conjugate);
 }
--- a/tensor.h
+++ b/tensor.h
@ -147,6 +147,10 @@ public:
 	inline Tensor& operator/=(const T &a) {data/=a; return *this;};
 	inline Tensor operator/(const T &a) const {Tensor r(*this); r /=a; return r;};
 	Tensor& conjugateme() {data.conjugateme(); return *this;};
        inline Tensor conjugate() const {Tensor r(*this); r.conjugateme(); return r;};
 	inline Tensor& operator+=(const Tensor &rhs) 
 		{
@ -180,8 +184,8 @@ public:
 	Tensor permute_index_groups(const NRPerm<int> &p) const; //rearrange the tensor storage permuting index groups as a whole
 	Tensor unwind_index(int group, int index) const; //separate an index from a group and expand it to full range as the least significant one
-	void addcontraction(const Tensor &rhs1, int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha=1, T beta=1, bool doresize=false);
+	void addcontraction(const Tensor &rhs1, int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha=1, T beta=1, bool doresize=false, bool conjugate=false);
-	inline Tensor contraction(int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha=1) const {Tensor<T> r; r.addcontraction(*this,group,index,rhs,rhsgroup,rhsindex,alpha,0,true); return r; }
+	inline Tensor contraction(int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha=1, bool conjugate=false) const {Tensor<T> r; r.addcontraction(*this,group,index,rhs,rhsgroup,rhsindex,alpha,0,true, conjugate); return r; }
 	void apply_permutation_algebra(const  Tensor &rhs, const PermutationAlgebra<int,T> &pa, bool inverse=false, T alpha=1, T beta=0); //general (not optimally efficient) symmetrizers, antisymmetrizers etc. acting on the flattened index list:
 //								 this *=beta; for I over this: this(I) += alpha * sum_P c_P rhs(P(I))
--- a/vec.cc
+++ b/vec.cc
@ -903,6 +903,57 @@ void NRVec<T>::storesubvector(const NRVec<int> &selection, const NRVec &rhs)
                }
 }
 /***************************************************************************//**
 * conjugate this general vector
 * @return reference to the (unmodified) matrix
 ******************************************************************************/
 template<typename T>
 NRVec<T>& NRVec<T>::conjugateme() {
 #ifdef CUDALA
        if(location != cpu) laerror("general conjugation only on CPU");
 #endif
 	for(int i=0; i<nn; ++i) v[i] =  LA_traits<T>::conjugate(v[i]);
        return *this;
 }
 /***************************************************************************//**
 * conjugate this complex vector 
 * @return reference to the modified matrix
 ******************************************************************************/
 template<>
 NRVec<std::complex<double> >& NRVec<std::complex<double> >::conjugateme() {
        copyonwrite();
 #ifdef CUDALA
        if(location == cpu){
 #endif
                cblas_dscal((size_t)nn, -1.0, ((double *)v) + 1, 2);
 #ifdef CUDALA
        }else{
                cublasDscal((size_t)nn, -1.0, ((double *)v) + 1, 2);
        }
 #endif
        return *this;
 }
 template<>
 NRVec<std::complex<float> >& NRVec<std::complex<float> >::conjugateme() {
        copyonwrite();
 #ifdef CUDALA
        if(location == cpu){
 #endif
                cblas_sscal((size_t)nn, -1.0, ((float *)v) + 1, 2);
 #ifdef CUDALA
        }else{
                cublasSscal((size_t)nn, -1.0, ((float *)v) + 1, 2);
        }
 #endif
        return *this;
 }
 /***************************************************************************//**
--- a/vec.h
+++ b/vec.h
@ -298,6 +298,9 @@ public:
                v[0] = a;
                }
 	//! complex conjugate 
        NRVec& conjugateme();
        inline NRVec conjugate() const {NRVec r(*this); r.conjugateme(); return r;};
 	//! determine the actual value of the reference counter 
 	inline int getcount() const {return count?*count:0;}