diff --git a/davidson.h b/davidson.h index 0103ad8..223d044 100644 --- a/davidson.h +++ b/davidson.h @@ -13,3 +13,7 @@ export template extern void davidson(const Matrix &bigmat, NRVec &eivals, NRVec *eivecs, const char *eivecsfile, int nroots=1, const bool verbose=0, const double eps=1e-6, const bool incore=1, int maxit=100, const int maxkrylov = 500); + +//@@@options: left eigenvectors by matrix transpose, overridesymmetric (for nrmat) +//@@@small matrix gdiagonalize - shift complex roots up (option to gdiagonalize?) +//@@@test gdiagonalize whether it sorts the roots and what for complex ones diff --git a/la_traits.h b/la_traits.h index 7290515..a14f487 100644 --- a/la_traits.h +++ b/la_traits.h @@ -13,9 +13,13 @@ using namespace std; #include #include "laerror.h" +#ifdef NONCBLAS +#include "noncblas.h" +#else extern "C" { #include "cblas.h" } +#endif #ifdef _GLIBCPP_NO_TEMPLATE_EXPORT # define export @@ -64,16 +68,25 @@ SCALAR(void *) //now declare the traits for scalars and for composed classes +//NOTE! methods in traits classes have to be declared static, +//since the class itself is never instantiated. +//for performance, it can be also inlined at the same time template struct LA_traits_aux {}; + +//TRAITS SPECIALIZATIONS + //complex scalars template struct LA_traits_aux, scalar_true> { typedef complex elementtype; typedef complex producttype; typedef C normtype; -static normtype norm (const complex &x) {return abs(x);} -static void axpy (complex &s, const complex &x, const complex &c) {s+=x*c;} +static inline bool gencmp(const complex *x, const complex *y, int n) {return memcmp(x,y,n*sizeof(complex));} +static bool bigger(const complex &x, const complex &y) {laerror("complex comparison undefined"); return false;} +static bool smaller(const complex &x, const complex &y) {laerror("complex comparison undefined"); return false;} +static inline normtype norm (const complex &x) {return abs(x);} +static inline void axpy (complex &s, const complex &x, const complex &c) {s+=x*c;} static void get(int fd, complex &x, bool dimensions=0) {if(sizeof(complex)!=read(fd,&x,sizeof(complex))) laerror("read error");} static void put(int fd, const complex &x, bool dimensions=0) {if(sizeof(complex)!=write(fd,&x,sizeof(complex))) laerror("write error");} static void multiget(unsigned int n,int fd, complex *x, bool dimensions=0){if((ssize_t)(n*sizeof(complex))!=read(fd,x,n*sizeof(complex))) laerror("read error");} @@ -87,8 +100,11 @@ struct LA_traits_aux { typedef C elementtype; typedef C producttype; typedef C normtype; -static normtype norm (const C &x) {return abs(x);} -static void axpy (C &s, const C &x, const C &c) {s+=x*c;} +static inline bool gencmp(const C *x, const C *y, int n) {return memcmp(x,y,n*sizeof(C));} +static inline bool bigger(const C &x, const C &y) {return x>y;} +static inline bool smaller(const C &x, const C &y) {return x struct LA_traits; //forward declaration needed for template recursion @@ -107,8 +123,11 @@ struct LA_traits_aux, scalar_false> { \ typedef C elementtype; \ typedef X producttype; \ typedef typename LA_traits::normtype normtype; \ -static normtype norm (const X &x) {return x.norm();} \ -static void axpy (X&s, const X &x, const C c) {s.axpy(c,x);} \ +static bool gencmp(const C *x, const C *y, int n) {for(int i=0; iy;} \ +static inline bool smaller(const C &x, const C &y) {return x &x) {return x.norm();} \ +static inline void axpy (X&s, const X &x, const C c) {s.axpy(c,x);} \ static void put(int fd, const C &x, bool dimensions=1) {x.put(fd,dimensions);} \ static void get(int fd, C &x, bool dimensions=1) {x.get(fd,dimensions);} \ static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i struct LA_traits_aux, scalar_false> { typedef C elementtype; typedef NRMat producttype; typedef typename LA_traits::normtype normtype; -static normtype norm (const NRSMat &x) {return x.norm();} -static void axpy (NRSMat&s, const NRSMat &x, const C c) {s.axpy(c,x);} +static bool gencmp(const C *x, const C *y, int n) {for(int i=0; iy;} +static inline bool smaller(const C &x, const C &y) {return x &x) {return x.norm();} +static inline void axpy (NRSMat&s, const NRSMat &x, const C c) {s.axpy(c,x);} static void put(int fd, const C &x, bool dimensions=1) {x.put(fd,dimensions);} static void get(int fd, C &x, bool dimensions=1) {x.get(fd,dimensions);} static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i; template NRMat< complex >; template NRMat; +template NRMat; template NRMat; @@ -798,6 +799,7 @@ template istream & operator>>(istream &s, NRMat< T > &x); \ INSTANTIZE(double) INSTANTIZE(complex) INSTANTIZE(int) +INSTANTIZE(short) INSTANTIZE(char) diff --git a/mat.h b/mat.h index 9e587ea..f0f6e23 100644 --- a/mat.h +++ b/mat.h @@ -29,9 +29,9 @@ public: #endif ~NRMat(); #ifdef MATPTR - const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return(memcmp(v[0],rhs.v[0],nn*mm*sizeof(T)));} + const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits::gencmp(v[0],rhs.v[0],nn*mm);} //memcmp for scalars else elementwise #else - const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return(memcmp(v,rhs.v,nn*mm*sizeof(T)));} + const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits::gencmp(v,rhs.v,nn*mm);} //memcmp for scalars else elementwise #endif const bool operator==(const NRMat &rhs) const {return !(*this != rhs);}; inline int getcount() const {return count?*count:0;} @@ -101,6 +101,7 @@ public: explicit NRMat(const SparseMat &rhs); // dense from sparse NRMat & operator+=(const SparseMat &rhs); NRMat & operator-=(const SparseMat &rhs); + void gemm(const T &beta, const SparseMat &a, const char transa, const NRMat &b, const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this inline void simplify() {}; //just for compatibility with sparse ones bool issymmetric() const {return 0;}; diff --git a/noncblas.h b/noncblas.h index 5e970df..3a6c749 100644 --- a/noncblas.h +++ b/noncblas.h @@ -595,4 +595,64 @@ int cblas_errprn(int ierr, int info, char *form, ...); #endif /* end #ifdef CBLAS_ENUM_ONLY */ #endif + + +/* + * Automatically Tuned Linear Algebra Software v3.4.1 + * (C) Copyright 1997 R. Clint Whaley + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the ATLAS group or the names of its contributers may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef ATLAS_ENUM_H + #define ATLAS_ENUM_H + + + #define ATLAS_ORDER CBLAS_ORDER + #define AtlasRowMajor CblasRowMajor + #define AtlasColMajor CblasColMajor + #define ATLAS_TRANS CBLAS_TRANSPOSE + #define AtlasNoTrans CblasNoTrans + #define AtlasTrans CblasTrans + #define AtlasConjTrans CblasConjTrans + #define ATLAS_UPLO CBLAS_UPLO + #define AtlasUpper CblasUpper + #define AtlasLower CblasLower + #define ATLAS_DIAG CBLAS_DIAG + #define AtlasNonUnit CblasNonUnit + #define AtlasUnit CblasUnit + #define ATLAS_SIDE CBLAS_SIDE + #define AtlasLeft CblasLeft + #define AtlasRight CblasRight + +#endif + +/*from clapack.h*/ +int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS, + double *A, const int lda, int *ipiv, + double *B, const int ldb); + + #endif diff --git a/nonclass.cc b/nonclass.cc index 7b36641..f51d973 100644 --- a/nonclass.cc +++ b/nonclass.cc @@ -26,6 +26,7 @@ template void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, \ INSTANTIZE(double) INSTANTIZE(complex) INSTANTIZE(int) +INSTANTIZE(short) INSTANTIZE(char) template diff --git a/nonclass.h b/nonclass.h index 765ceb2..5235fb8 100644 --- a/nonclass.h +++ b/nonclass.h @@ -122,12 +122,30 @@ return det; //general submatrix, INDEX will typically be NRVec or even int* //NOTE: in order to check consistency between nrows and rows in rows is a NRVec //some advanced metaprogramming would be necessary +//application: e.g. ignoresign=true, equalsigns=true, indexshift= -1 ... elements of Slater overlaps for RHF template -const NRMat::elementtype> submatrix(const MAT a, const int nrows, const INDEX rows, const int ncols, const INDEX cols, int indexshift=0, bool ignoresign=false) +const NRMat::elementtype> submatrix(const MAT a, const int nrows, const INDEX rows, const int ncols, const INDEX cols, int indexshift=0, bool ignoresign=false, bool equalsigns=false) { NRMat::elementtype> r(nrows,ncols); +if(equalsigns) //make the element zero if signs of both indices are opposite +{ +if(ignoresign) +{ +for(int i=0; i +int genqsort(INDEX l, INDEX r,COMPAR (*cmp)(const INDEX, const INDEX), void (*swap)(const INDEX,const INDEX)) +{ +INDEX i,j,piv; +int parity=0; + +if(r<=l) return parity; //1 element +if(cmp(r,l)<0) {parity^=1; swap(l,r);} +if(r-l==1) return parity; //2 elements and preparation for median +piv= (l+r)/2; //pivoting by median of 3 - safer +if(cmp(piv,l)<0) {parity^=1; swap(l,piv);} //and change the pivot element implicitly +if(cmp(r,piv)<0) {parity^=1; swap(r,piv);} //and change the pivot element implicitly +if(r-l==2) return parity; //in the case of 3 elements we are finished too + +//general case , l-th r-th already processed +i=l+1; j=r-1; +do{ + //important sharp inequality - stops at sentinel element for efficiency + // this is inefficient if all keys are equal - unnecessary n log n swaps are done, but we assume that it is atypical input + while(cmp(i++,piv)<0); + i--; + while(cmp(j--,piv)>0); + j++; + if(i; template NRSMat< complex >; template NRSMat; +template NRSMat; template NRSMat; @@ -338,5 +339,6 @@ template istream & operator>>(istream &s, NRSMat< T > &x); \ INSTANTIZE(double) INSTANTIZE(complex) INSTANTIZE(int) +INSTANTIZE(short) INSTANTIZE(char) diff --git a/smat.h b/smat.h index 913959d..2f09ab5 100644 --- a/smat.h +++ b/smat.h @@ -24,7 +24,7 @@ public: NRSMat & operator|=(const NRSMat &rhs); //assignment to a new copy NRSMat & operator=(const NRSMat &rhs); //assignment NRSMat & operator=(const T &a); //assign a to diagonal - const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return(memcmp(v,rhs.v,NN2*sizeof(T)));} + const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits::gencmp(v,rhs.v,NN2);} //memcmp for scalars else elementwise const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);}; inline NRSMat & operator*=(const T &a); inline NRSMat & operator+=(const T &a); diff --git a/sparsemat.h b/sparsemat.h index 7e033d8..9f5b259 100644 --- a/sparsemat.h +++ b/sparsemat.h @@ -85,7 +85,8 @@ public: inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;} inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general - inline const NRVec operator*(const NRVec &rhs) const; // Mat * Vec + inline const NRVec operator*(const NRVec &rhs) const; // SparseMat * Vec + inline const NRMat operator*(const NRMat &rhs) const; // SparseMat * Mat void diagonalof(NRVec &, const bool divide=0) const; //get diagonal const SparseMat operator*(const SparseMat &rhs) const; SparseMat & oplusequal(const SparseMat &rhs); //direct sum @@ -135,6 +136,10 @@ template inline const NRVec SparseMat::operator*(const NRVec &rhs) const {NRVec result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;}; +template +inline const NRMat SparseMat::operator*(const NRMat &rhs) const +{NRMat result(nn,rhs.ncols()); result.gemm((T)0,*this,'n',rhs,'n',(T)1); return result;}; + template extern istream& operator>>(istream &s, SparseMat &x); diff --git a/t.cc b/t.cc index 718272a..9b9cca0 100644 --- a/t.cc +++ b/t.cc @@ -194,7 +194,8 @@ cout <<" big1*big2 "< atest, btest,ctest; { int cc,c1,c2,c3; @@ -211,7 +212,7 @@ cout << dtest; NRMat etest(atest.nrows(),btest.ncols()); etest.strassen(0., atest, 't', btest, 'n', 1.); cout << etest; -*/ +} if(0) { @@ -511,6 +512,53 @@ cin>>n; cout <<"difference = "<<(res1-res2).norm()<>n>>k>>m; +{ +NRMat a(n,k),b(k,m),c(n,m),d(n,m); +for(int i=0;i aa(a); +d.gemm(0., aa, 'n', b, 'n', .6); +cout<>n>>m; diff --git a/vec.cc b/vec.cc index 0463cb2..5e79a1f 100644 --- a/vec.cc +++ b/vec.cc @@ -141,6 +141,35 @@ const NRVec NRVec::operator-() const return result; } + +//comparison operators (for lexical order) + +template +const bool NRVec::operator>(const NRVec &rhs) const +{ +int n=nn; if(rhs.nn::bigger(v[i],rhs.v[i])) return true; + if(LA_traits::smaller(v[i],rhs.v[i])) return false; + } +return nn>rhs.nn; +} + +template +const bool NRVec::operator<(const NRVec &rhs) const +{ +int n=nn; if(rhs.nn::smaller(v[i],rhs.v[i])) return true; + if(LA_traits::bigger(v[i],rhs.v[i])) return false; + } +return nn::axpy(const double alpha, const NRVec &x) { diff --git a/vec.h b/vec.h index 9d980bc..b48f613 100644 --- a/vec.h +++ b/vec.h @@ -51,8 +51,12 @@ public: NRVec & operator=(const NRVec &rhs); NRVec & operator=(const T &a); //assign a to every element NRVec & operator|=(const NRVec &rhs); - const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return(memcmp(v,rhs.v,nn*sizeof(T)));} + const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits::gencmp(v,rhs.v,nn);} //memcmp for scalars else elementwise const bool operator==(const NRVec &rhs) const {return !(*this != rhs);}; + const bool operator>(const NRVec &rhs) const; + const bool operator<(const NRVec &rhs) const; + const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);}; + const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);}; const NRVec operator-() const; inline NRVec & operator+=(const NRVec &rhs); inline NRVec & operator-=(const NRVec &rhs);