diff --git a/csrmat.cc b/csrmat.cc index 7f09814..4682ebd 100644 --- a/csrmat.cc +++ b/csrmat.cc @@ -30,6 +30,9 @@ namespace LA { +/* + Commented out by Roman for ICC + #define INSTANTIZE(T) \ template void CSRMat::gemm(const T beta, const CSRMat &a, const char transa, const CSRMat &b, const char transb, const T alpha); \ template CSRMat & CSRMat::operator*=(const T &a); \ @@ -45,8 +48,8 @@ template void CSRMat::put(int fd, bool dimen, bool transp) const; \ INSTANTIZE(double) - INSTANTIZE(complex) +*/ //// forced instantization of functions in the header in the corresponding object file template class CSRMat; diff --git a/csrmat.h b/csrmat.h index cbd81a0..d715018 100644 --- a/csrmat.h +++ b/csrmat.h @@ -71,6 +71,7 @@ public: CSRMat & operator=(const CSRMat &rhs); void copyonwrite(); void resize(const SPMatindex nn, const SPMatindex mm); + void dealloc(void) {resize(0,0);} void moveto(GPUID destination); void clear(); ~CSRMat(); @@ -128,5 +129,12 @@ public: */ }; +template +std::ostream & operator<<(std::ostream &s, const CSRMat &x); + +template +std::istream& operator>>(std::istream &s, CSRMat &x); + + }//namespace #endif //_CSRMAT_H_ diff --git a/davidson.h b/davidson.h index a07e613..4cf7572 100644 --- a/davidson.h +++ b/davidson.h @@ -33,26 +33,21 @@ namespace LA { //therefore the whole implementation must be a template in a header //Note that for efficiency in a direct CI case the diagonalof() should cache its result -template -extern void davidson(const Matrix &bigmat, NRVec &eivals, NRVec *eivecs, const char *eivecsfile, - int nroots=1, const bool verbose=0, const double eps=1e-6, - const bool incore=1, int maxit=100, const int maxkrylov = 500, - void (*initguess)(NRVec &)=NULL); //@@@options: left eigenvectors by matrix transpose, overridesymmetric (for nrmat) //@@@small matrix gdiagonalize - shift complex roots up (option to gdiagonalize?) //@@@test gdiagonalize whether it sorts the roots and what for complex ones +//@@@implement left eigenvectors for nonsymmetric case //Davidson algorithm: J. Comp. Phys. 17:817 (1975) -//@@@implement left eigenvectors for nonsymmetric case template -void davidson(const Matrix &bigmat, NRVec &eivals, NRVec *eivecs, const char *eivecsfile, - int nroots, const bool verbose, const double eps, - const bool incore, int maxit, const int maxkrylov, - void (*initguess)(NRVec &)) +extern void davidson(const Matrix &bigmat, NRVec &eivals, NRVec *eivecs, const char *eivecsfile, + int nroots=1, const bool verbose=0, const double eps=1e-6, + const bool incore=1, int maxit=100, const int maxkrylov = 500, + void (*initguess)(NRVec &)=NULL) { bool flag=0; int n=bigmat.nrows(); @@ -87,7 +82,7 @@ smallH=0; //default guess based on lowest diagonal element of the matrix -if(initguess) (*initguess)(vec1); +if(initguess) initguess(vec1); else { const T *diagonal = bigmat.diagonalof(vec2,false,true); diff --git a/fourindex.h b/fourindex.h index 6da9bde..3b490e4 100644 --- a/fourindex.h +++ b/fourindex.h @@ -226,6 +226,7 @@ public: inline matel4 *getlist() const {return list;} inline I size() const {return nn;} void resize(const I n); + void dealloc(void) {resize(0);} void copyonwrite(); unsigned long length() const; inline void add(const I i, const I j, const I k, const I l, const T elem) diff --git a/la.h b/la.h index 2fa5700..62d179f 100644 --- a/la.h +++ b/la.h @@ -42,6 +42,7 @@ #include "smat.h" #include "sparsemat.h" #include "sparsesmat.h" +#include "csrmat.h" #include "vec.h" using namespace LA; diff --git a/la_traits.h b/la_traits.h index 90beb60..7478fff 100644 --- a/la_traits.h +++ b/la_traits.h @@ -220,8 +220,30 @@ static inline normtype norm (const complex &x) {return std::abs(x);} static inline void axpy (complex &s, const complex &x, const complex &c) {s+=x*c;} static inline void get(int fd, complex &x, bool dimensions=0, bool transp=0) {if(sizeof(complex)!=read(fd,&x,sizeof(complex))) laerror("read error");} static inline void put(int fd, const complex &x, bool dimensions=0, bool transp=0) {if(sizeof(complex)!=write(fd,&x,sizeof(complex))) laerror("write error");} -static void multiget(size_t n,int fd, complex *x, bool dimensions=0){ssize_t r=read(fd,x,n*sizeof(complex)); if((ssize_t)(n*sizeof(complex))!=r) {std::cout<<"read returned "< *x, bool dimensions=0) {ssize_t r=write(fd,x,n*sizeof(complex)); if((ssize_t)(n*sizeof(complex))!=r) {std::cout<<"write returned "< *x, bool dimensions=0) + { + size_t total=0; + ssize_t r; + do{ + r=read(fd,x+total,(n-total)*sizeof(complex)); + if(r<0 || r==0 && n!=0 ) {std::cout<<"read returned "<); + if(r%sizeof(complex)) laerror("read error 2"); + } + while(total < n); + } +static void multiput(size_t n, int fd, const complex *x, bool dimensions=0) + { + size_t total=0; + ssize_t r; + do{ + r=write(fd,x+total,(n-total)*sizeof(complex)); + if(r<0 || r==0 && n!=0 ) {std::cout<<"write returned "<); + if(r%sizeof(complex)) laerror("write error 2"); + } + while(total < n); + } static void copy(complex *dest, complex *src, unsigned int n) {memcpy(dest,src,n*sizeof(complex));} static void clear(complex *dest, unsigned int n) {memset(dest,0,n*sizeof(complex));} static void copyonwrite(complex &x) {}; @@ -232,6 +254,7 @@ static inline C realpart(const complex &x) {return x.real();} static inline C imagpart(const complex &x) {return x.imag();} }; + //non-complex scalars template struct LA_traits_aux { @@ -248,8 +271,30 @@ static inline normtype norm (const C &x) {return std::abs(x);} static inline void axpy (C &s, const C &x, const C &c) {s+=x*c;} static inline void put(int fd, const C &x, bool dimensions=0, bool transp=0) {if(sizeof(C)!=write(fd,&x,sizeof(C))) laerror("write error");} static inline void get(int fd, C &x, bool dimensions=0, bool transp=0) {if(sizeof(C)!=read(fd,&x,sizeof(C))) laerror("read error");} -static void multiget(size_t n,int fd, C *x, bool dimensions=0){ssize_t r=read(fd,x,n*sizeof(C)); if((ssize_t)(n*sizeof(C))!=r) {std::cout<<"read returned "<::put(int fd, bool dim, bool transp) const { } } }else{ - LA_traits::multiput(nn*mm,fd, + LA_traits::multiput((size_t)nn*(size_t)mm,fd, #ifdef MATPTR v[0] #else @@ -202,7 +202,7 @@ void NRMat::get(int fd, bool dim, bool transp){ } } }else{ - LA_traits::multiget(nn*mm,fd, + LA_traits::multiget((size_t)nn*(size_t)mm,fd, #ifdef MATPTR v[0] #else @@ -838,8 +838,9 @@ NRMat& NRMat::transposeme(const int _n) { return *this; } + /***************************************************************************//** - * icreate complex double-precision matrix from real double-precision matrix \f$A\f$ + * create complex double-precision matrix from real double-precision matrix \f$A\f$ * @param[in] rhs real double-precision matrix \f$A\f$ * @param[in] imagpart flag indicating whether the matrix \f$A\f$ should be considered as a real * or imaginary part of the complex matrix being created @@ -877,6 +878,43 @@ NRMat >::NRMat(const NRMat &rhs, bool imagpart): nn(rhs. #endif } + + +/***************************************************************************//** + * create double-precision matrix from complex double-precision matrix \f$A\f$ + * @param[in] rhs complex double-precision matrix \f$A\f$ + * @param[in] imagpart flag indicating whether the matrix \f$A\f$ should be taken as the real + * or imaginary part of the input complex matrix + ******************************************************************************/ +template<> +NRMat::NRMat(const NRMat > &rhs, bool imagpart): nn(rhs.nrows()), mm(rhs.ncols()), count(new int(1)) { + const int nn_mm = nn*mm; +#ifdef CUDALA + if(location == cpu){ +#endif + #ifdef MATPTR + v = new double*[n]; + v[0] = new double[nn_mm]; + for(register int i=1; i >::randomize(const double &x) { #endif for(register int i=0; i(re, im); } } #ifdef CUDALA diff --git a/mat.h b/mat.h index d28a03d..e5d9425 100644 --- a/mat.h +++ b/mat.h @@ -80,6 +80,8 @@ public: //! complexifying constructor NRMat(const typename LA_traits_complex::NRMat_Noncomplex_type &rhs, bool imagpart = false); + //! explicit decomplexifying constructor + explicit NRMat(const NRMat > &rhs, bool imagpart = false); //! explicit constructor converting symmetric matrix stored in packed form into a NRMat object explicit NRMat(const NRSMat &rhs); @@ -280,6 +282,9 @@ public: //! resize the matrix void resize(int n, int m); + //! deallocate the matrix + void dealloc(void) {resize(0,0);} + //! get the pointer to the data inline operator T*(); //! get the const pointer to the data @@ -332,6 +337,8 @@ public: explicit NRMat(const SparseMat &rhs); // dense from sparse //! explicit constructor converting sparse symmetric matrix into \c NRMat object explicit NRMat(const SparseSMat &rhs); + //! explicit constructor converting sparse CSR matrix into \c NRMat object + explicit NRMat(const CSRMat &rhs); //! add up given sparse matrix NRMat & operator+=(const SparseMat &rhs); @@ -618,7 +625,6 @@ inline T* NRMat::operator[](const int i) { if (i < 0 || i >= nn) laerror("Mat [] out of range"); if (!v) laerror("unallocated matrix"); #endif - NOT_GPU(*this); #ifdef MATPTR return v[i]; #else diff --git a/matexp.h b/matexp.h index 9910cb5..9cdb4cf 100644 --- a/matexp.h +++ b/matexp.h @@ -42,8 +42,8 @@ else for(i=order-1; i>=0; i--) { //std::cerr<<"TEST polynom0 "<::deallocate(z); z=y*x;} //for large matrices avoid storing 4 ones simultaneously + LA_traits::deallocate(y); y=z+c[i]; } } @@ -346,9 +346,11 @@ int power; NRVec::normtype> taylor2=exp_aux::normtype>(mat,power,maxpower,maxtaylor,scale); V tmp; +bool washere=0; for(int i=1; i<=(1<1) rhs=result; //apply again to the result of previous application else result=rhs; tmp=rhs; //now rhs can be used as scratch @@ -361,6 +363,8 @@ for(int i=1; i<=(1< > &A, NRMat< complex > *B, complex *det, int n) +{ + int r, *ipiv; + + if (A.nrows() != A.ncols()) laerror("linear_solve() call for non-square matrix"); + if (B && A.nrows() != B->ncols()) laerror("incompatible matrices in linear_solve()"); + A.copyonwrite(); + if (B) B->copyonwrite(); + ipiv = new int[A.nrows()]; + n = A.nrows(); + int nrhs = B ? B->nrows() : 0; + int lda = A.ncols(); + int ldb = B ? B->ncols() : A.nrows(); + FORNAME(zgesv)(&n, &nrhs, (double *)A[0], &lda, ipiv, + B ? (double *)(*B)[0] : (double *)0, &ldb, &r); + if (r < 0) { + delete[] ipiv; + laerror("illegal argument in lapack_gesv"); + } + if (det && r>=0) { + *det = A[0][0]; + for (int i=1; i0 && B) laerror("singular matrix in zgesv"); +} + + + //other version of linear solver based on gesvx //------------------------------------------------------------------------------ @@ -793,6 +828,18 @@ extern "C" void FORNAME(dggev)(const char *JOBVL, const char *JOBVR, const FINT double *VL, const FINT *LDVL, double *VR, const FINT *LDVR, double *WORK, const FINT *LWORK, FINT *INFO ); +extern "C" void FORNAME(zgeev)(const char *JOBVL, const char *JOBVR, const FINT *N, + complex *A, const FINT *LDA, complex *W, complex *VL, const FINT *LDVL, + complex *VR, const FINT *LDVR, complex *WORK, const FINT *LWORK, + double *RWORK, FINT *INFO ); + +extern "C" void FORNAME(zggev)(const char *JOBVL, const char *JOBVR, const FINT *N, + complex *A, const FINT *LDA, complex *B, const FINT *LDB, complex *W, complex *WBETA, + complex *VL, const FINT *LDVL, complex *VR, const FINT *LDVR, + complex *WORK, const FINT *LWORK, double *RWORK, FINT *INFO ); + + + //statics for sorting static int *gdperm; @@ -904,11 +951,12 @@ void gdiagonalize(NRMat &a, NRVec &wr, NRVec &wi, #endif delete[] work; -//std::cout <<"TEST dgeev\n"< 0) laerror("convergence problem in ggev/geev in gdiagonalize()"); +//std::cout <<"TEST dgeev\n"< &a, NRVec &wr, NRVec &wi, } } + if(sorttype>0) { NRVec perm(n); @@ -997,12 +1046,119 @@ void gdiagonalize(NRMat &a, NRVec &wr, NRVec &wi, } + +//most general complex routine +template<> +void gdiagonalize(NRMat > &a, NRVec< complex > &w, + NRMat< complex >*vl, NRMat< complex > *vr, + const bool corder, int n, const int sorttype, const int biorthonormalize, + NRMat > *b, NRVec > *beta) +{ + + if(n<=0) n = a.nrows(); + if (n > a.ncols() || n>a.nrows() ) laerror("gdiagonalize() call for a non-square matrix"); + if (n > w.size()) + laerror("inconsistent dimension of eigen vector in gdiagonalize()"); + if (vl) if (n > vl->nrows() || n > vl->ncols()) + laerror("inconsistent dimension of vl in gdiagonalize()"); + if (vr) if (n > vr->nrows() || n > vr->ncols()) + laerror("inconsistent dimension of vr in gdiagonalize()"); + if (beta) if(n > beta ->size()) laerror("inconsistent dimension of beta in gdiagonalize()"); + if(b) if(n > b->nrows() || n > b->ncols()) + laerror("inconsistent dimension of b in gdiagonalize()"); + if(b && !beta || beta && !b) laerror("missing array for generalized diagonalization"); + + a.copyonwrite(); + w.copyonwrite(); + if (vl) vl->copyonwrite(); + if (vr) vr->copyonwrite(); + if (beta) beta->copyonwrite(); + if (b) b->copyonwrite(); + + char jobvl = vl ? 'V' : 'N'; + char jobvr = vr ? 'V' : 'N'; + complex work0; + FINT lwork = -1; + FINT r; + FINT lda=a.ncols(); + FINT ldb=0; + if(b) ldb=b->ncols(); + FINT ldvl= vl?vl->ncols():lda; + FINT ldvr= vr?vr->ncols():lda; + + double *rwork = new double[n*(b?8:2)]; + +#ifdef FORINT + FINT ntmp = n; + if(b) FORNAME(zggev)(&jobvr, &jobvl, &ntmp, a, &lda, *b, &ldb, w, *beta, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, &work0, &lwork, rwork, &r); + else FORNAME(zgeev)(&jobvr, &jobvl, &ntmp, a, &lda, w, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, &work0, &lwork, rwork, &r); +#else + if(b) FORNAME(zggev)(&jobvr, &jobvl, &n, a, &lda, *b, &ldb, w, *beta, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, &work0, &lwork, rwork, &r); + else FORNAME(zgeev)(&jobvr, &jobvl, &n, a, &lda, w, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, &work0, &lwork, rwork, &r); +#endif + + lwork = (FINT) work0.real(); + complex *work = new complex[lwork]; + +#ifdef FORINT + if(b) FORNAME(zggev)(&jobvr, &jobvl, &ntmp, a, &lda, *b, &ldb, w, *beta, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, work, &lwork, rwork, &r); + else FORNAME(zgeev)(&jobvr, &jobvl, &ntmp, a, &lda, w, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, work, &lwork, rwork, &r); +#else + if(b) FORNAME(zggev)(&jobvr, &jobvl, &n, a, &lda, *b, &ldb, w, *beta, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, work, &lwork, rwork, &r); + else FORNAME(zgeev)(&jobvr, &jobvl, &n, a, &lda, w, vr?vr[0]:(complex *)0, + &ldvr, vl?vl[0]:(complex *)0, &ldvl, work, &lwork, rwork, &r); +#endif + + delete[] work; + delete[] rwork; + +//std::cout <<"TEST zg(g|e)ev\n"< 0) laerror("convergence problem in ggev/geev in gdiagonalize()"); + + if(biorthonormalize && vl && vr) + { + if(b || beta) laerror("@@@ biorthonormalize not implemented yet for generalized non-hermitian eigenproblem");//metric b would be needed + for(int i=0; i tmp; + cblas_zdotc_sub(n,(*vr)[i],1,(*vl)[i], 1, &tmp); + tmp = 1./tmp; + std::cout <<"scaling by "<0) + { + laerror("sorting not implemented in complex gdiagonalize"); + } + + + if (corder) { + if (vl) vl->transposeme(n); + if (vr) vr->transposeme(n); + } + +} + + +template<> void gdiagonalize(NRMat &a, NRVec< complex > &w, NRMat< complex >*vl, NRMat< complex > *vr, const bool corder, int n, const int sorttype, const int biorthonormalize, NRMat *b, NRVec *beta) { - if(!corder) laerror("gdiagonalize() corder 0 not implemented"); if(n<=0) n = a.nrows(); if(n> a.nrows() || n == a.nrows() && n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix"); @@ -1020,19 +1176,43 @@ void gdiagonalize(NRMat &a, NRVec< complex > &w, i = 0; while (i < n) { if (wi[i] == 0) { + if(corder) + { + if (vl) for (int j=0; j((*rvl)[i][j], (*rvl)[i+1][j]); + (*vl)[j][i+1] = complex((*rvl)[i][j], -(*rvl)[i+1][j]); + } + else + { (*vl)[i][j] = complex((*rvl)[i][j], (*rvl)[i+1][j]); (*vl)[i+1][j] = complex((*rvl)[i][j], -(*rvl)[i+1][j]); + } } if (vr) for (int j=0; j((*rvr)[i][j], (*rvr)[i+1][j]); + (*vr)[j][i+1] = complex((*rvr)[i][j], -(*rvr)[i+1][j]); + } + else + { (*vr)[i][j] = complex((*rvr)[i][j], (*rvr)[i+1][j]); (*vr)[i+1][j] = complex((*rvr)[i][j], -(*rvr)[i+1][j]); + } } i += 2; } @@ -1043,35 +1223,78 @@ void gdiagonalize(NRMat &a, NRVec< complex > &w, } -const NRMat realpart(const NRMat< complex > &a) +template<> +const NRMat realpart > >(const NRMat< complex > &a) { +#ifdef CUDALA + if(location == cpu){ +#endif NRMat result(a.nrows(), a.ncols()); cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0], 2, result, 1); +#ifdef CUDALA + }else{ + laerror("not implemented for cuda yet"); + } +#endif return result; } -const NRMat imagpart(const NRMat< complex > &a) +template<> +const NRMat imagpart > >(const NRMat< complex > &a) { +#ifdef CUDALA + if(location == cpu){ +#endif + NRMat result(a.nrows(), a.ncols()); cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0]+1, 2, result, 1); +#ifdef CUDALA + }else{ + laerror("not implemented for cuda yet"); + } +#endif return result; } -const NRMat< complex > realmatrix (const NRMat &a) +template<> +const NRMat< complex > realmatrix > (const NRMat &a) { +#ifdef CUDALA + if(location == cpu){ +#endif + + NRMat > result(a.nrows(), a.ncols()); cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0], 2); +#ifdef CUDALA + }else{ + laerror("not implemented for cuda yet"); + } +#endif + return result; } -const NRMat< complex > imagmatrix (const NRMat &a) +template<> +const NRMat< complex > imagmatrix > (const NRMat &a) { +#ifdef CUDALA + if(location == cpu){ +#endif + NRMat< complex > result(a.nrows(), a.ncols()); cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0]+1, 2); +#ifdef CUDALA + }else{ + laerror("not implemented for cuda yet"); + } +#endif + return result; } -const NRMat< complex > complexmatrix (const NRMat &re, const NRMat &im) +template<> +const NRMat< complex > complexmatrix > (const NRMat &re, const NRMat &im) { if(re.nrows()!=im.nrows() || re.ncols() != im.ncols()) laerror("incompatible sizes of real and imaginary parts"); NRMat< complex > result(re.nrows(), re.ncols()); @@ -1080,57 +1303,60 @@ const NRMat< complex > complexmatrix (const NRMat &re, const NRM return result; } +template<> +const SparseSMat< complex > complexmatrix >(const SparseSMat &re, const SparseSMat &im) { + if(re.nrows()!=im.nrows() || re.ncols() != im.ncols()) laerror("incompatible sizes of real and imaginary parts"); + SparseSMat< complex > result(re.nrows(),re.ncols()); + complex tmp; + SparseSMat::iterator pre(re); + for(; pre.notend(); ++pre) { + tmp = pre->elem; + result.add(pre->row,pre->col,tmp,false); + } -NRMat matrixfunction(NRMat a, complex - (*f)(const complex &), const bool adjust) -{ - int n = a.nrows(); - NRMat< complex > u(n, n), v(n, n); - NRVec< complex > w(n); -/* -NRMat > a0=complexify(a); -*/ - gdiagonalize(a, w, &u, &v);//a gets destroyed, eigenvectors are rows - NRVec< complex > z = diagofproduct(u, v, 1, 1); -/* -std::cout <<"TEST matrixfunction\n"< > wz(n); -for (int i=0; i::iterator pim(im); + for(; pim.notend(); ++pim) { + tmp = complex(0,1)*(pim->elem); + result.add(pim->row,pim->col,tmp,false); + } - for (int i=0; i > r(n, n); - r.gemm(0.0, v, 'c', u, 'n', 1.0); - double inorm = cblas_dnrm2(n*n, (double *)r[0]+1, 2); - if (inorm > 1e-10) { - std::cout << "norm = " << inorm << std::endl; - laerror("nonzero norm of imaginary part of real matrixfunction"); - } - return realpart(r); + return result; } -NRMat matrixfunction(NRSMat a, double (*f) (double)) -{ - int n = a.nrows(); - NRVec w(n); - NRMat v(n, n); - diagonalize(a, w, &v, 0); +template<> +const SparseSMat< complex > realmatrix >(const SparseSMat &re) { + SparseSMat< complex > result(re.nrows(),re.ncols()); + complex tmp; - for (int i=0; i u = v; - v.diagmultl(w); - NRMat r(n, n); - r.gemm(0.0, u, 't', v, 'n', 1.0); - return r; + SparseSMat::iterator pre(re); + for(; pre.notend(); ++pre) { + tmp = pre->elem; + result.add(pre->row,pre->col,tmp,false); + } + + return result; } +template<> +const SparseSMat< complex > imagmatrix >(const SparseSMat &im) { + SparseSMat< complex > result(im.nrows(),im.ncols()); + complex tmp; + + + SparseSMat::iterator pim(im); + for(; pim.notend(); ++pim) { + tmp = complex(0,1)*(pim->elem); + result.add(pim->row,pim->col,tmp,false); + } + + return result; +} + + + + + NRMat realmatrixfunction(NRMat a, double (*f) (const double)) { int n = a.nrows(); @@ -1145,6 +1371,7 @@ NRMat realmatrixfunction(NRMat a, double (*f) (const double)) return r; } + NRMat > complexmatrixfunction(NRMat a, double (*fre) (const double), double (*fim) (const double)) { int n = a.nrows(); @@ -1169,6 +1396,16 @@ NRMat > complexmatrixfunction(NRMat a, double (*fre) (co // instantize template to an addresable function +complex myccopy (const complex &x) +{ + return x; +} + +double mycopy (const double x) +{ + return x; +} + complex myclog (const complex &x) { return log(x); @@ -1193,14 +1430,37 @@ double sqrtinv (const double x) NRMat log(const NRMat &a) { - return matrixfunction(a, &myclog, 1); + return matrixfunction(a, &myclog); } +NRMat > log(const NRMat > &a) +{ + return matrixfunction(a, &myclog); +} + + NRMat exp0(const NRMat &a) { - return matrixfunction(a, &mycexp, 1); + return matrixfunction(a, &mycexp); } +NRMat > exp0(const NRMat > &a) +{ + return matrixfunction(a, &mycexp); +} + +NRMat > copytest(const NRMat > &a) +{ + return matrixfunction(a, &myccopy); +} + +NRMat copytest(const NRMat &a) +{ + return matrixfunction(a, &myccopy); +} + + + const NRVec diagofproduct(const NRMat &a, const NRMat &b, diff --git a/nonclass.h b/nonclass.h index e0cdb37..f9fc244 100644 --- a/nonclass.h +++ b/nonclass.h @@ -88,8 +88,8 @@ extern const NRVec diagofproduct(const NRMat &a, const NRMat &b,\ extern T trace2(const NRMat &a, const NRMat &b, bool trb=0); \ extern T trace2(const NRSMat &a, const NRSMat &b, const bool diagscaled=0);\ extern T trace2(const NRSMat &a, const NRMat &b, const bool diagscaled=0);\ -extern void linear_solve(NRMat &a, NRMat *b, double *det=0,int n=0); /*solve Ax^T=b^T (b is nrhs x n) */ \ -extern void linear_solve(NRSMat &a, NRMat *b, double *det=0, int n=0); /*solve Ax^T=b^T (b is nrhs x n) */\ +extern void linear_solve(NRMat &a, NRMat *b, T *det=0,int n=0); /*solve Ax^T=b^T (b is nrhs x n) */ \ +extern void linear_solve(NRSMat &a, NRMat *b, T *det=0, int n=0); /*solve Ax^T=b^T (b is nrhs x n) */\ extern void linear_solve(NRMat &a, NRVec &b, double *det=0, int n=0); \ extern void linear_solve(NRSMat &a, NRVec &b, double *det=0, int n=0); \ extern void diagonalize(NRMat &a, NRVec::normtype> &w, const bool eivec=1, const bool corder=1, int n=0, NRMat *b=NULL, const int itype=1); \ @@ -104,36 +104,28 @@ declare_la(complex) // Separate declarations //general nonsymmetric matrix and generalized diagonalization +//corder =0 ... C rows are eigenvectors, =1 ... C columns are eigenvectors extern void gdiagonalize(NRMat &a, NRVec &wr, NRVec &wi, NRMat *vl, NRMat *vr, const bool corder=1, int n=0, const int sorttype=0, const int biorthonormalize=0, - NRMat *b=NULL, NRVec *beta=NULL); -extern void gdiagonalize(NRMat &a, NRVec< complex > &w, + NRMat *b=NULL, NRVec *beta=NULL); //this used real storage of eigenvectors like dgeev + +template +extern void gdiagonalize(NRMat &a, NRVec< complex > &w, NRMat< complex >*vl, NRMat< complex > *vr, const bool corder=1, int n=0, const int sorttype=0, const int biorthonormalize=0, - NRMat *b=NULL, NRVec *beta=NULL); -extern NRMat matrixfunction(NRSMat a, double (*f) (double)); -extern NRMat realmatrixfunction(NRMat a, double (*f) (double)); //a has to by in fact symmetric -extern NRMat > complexmatrixfunction(NRMat a, double (*fre) (double), double (*fim) (double)); //a has to by in fact symmetric -extern NRMat matrixfunction(NRMat a, complex (*f)(const complex &),const bool adjust=0); + NRMat *b=NULL, NRVec *beta=NULL); //eigenvectors are stored in complex matrices for T both double and complex -extern complex sqrtinv(const complex &); -extern double sqrtinv(const double); - -//functions on matrices -inline NRMat sqrt(const NRSMat &a) { return matrixfunction(a,&std::sqrt); } -inline NRMat sqrtinv(const NRSMat &a) { return matrixfunction(a,&sqrtinv); } -inline NRMat realsqrt(const NRMat &a) { return realmatrixfunction(a,&std::sqrt); } -inline NRMat realsqrtinv(const NRMat &a) { return realmatrixfunction(a,&sqrtinv); } -inline NRMat log(const NRSMat &a) { return matrixfunction(a,&std::log); } -extern NRMat log(const NRMat &a); -extern NRMat exp0(const NRMat &a); - - -extern const NRMat realpart(const NRMat< complex >&); -extern const NRMat imagpart(const NRMat< complex >&); -extern const NRMat< complex > realmatrix (const NRMat&); -extern const NRMat< complex > imagmatrix (const NRMat&); -extern const NRMat< complex > complexmatrix (const NRMat&, const NRMat&); +//complex,real,imaginary parts of various entities +template +extern const typename LA_traits::realtype realpart(const T&); +template +extern const typename LA_traits::realtype imagpart(const T&); +template +extern const typename LA_traits::complextype realmatrix (const T&); +template +extern const typename LA_traits::complextype imagmatrix (const T&); +template +extern const typename LA_traits::complextype complexmatrix (const T&, const T&); //Cholesky decomposition extern void cholesky(NRMat &a, bool upper=1); @@ -315,5 +307,84 @@ return r; } +//matrix functions via diagonalization + +extern NRMat realmatrixfunction(NRMat a, double (*f) (double)); //a has to by in fact symmetric +extern NRMat > complexmatrixfunction(NRMat a, double (*fre) (double), double (*fim) (double)); //a has to by in fact symmetric + +template +NRMat matrixfunction(NRSMat a, double (*f) (double)) //of symmetric/hermitian matrix +{ + int n = a.nrows(); + NRVec w(n); + NRMat v(n, n); + diagonalize(a, w, &v, 0); + + for (int i=0; i u = v; + NRVec ww=w; //diagmultl needs same type + v.diagmultl(ww); + NRMat r(n, n); + r.gemm(0.0, u, 't', v, 'n', 1.0); //gemm will use 'c' for complex ones + return r; +} + + +template +extern NRMat matrixfunction(NRMat a, complex (*f)(const complex &)) //of a general real/complex matrix +{ + int n = a.nrows(); + NRVec > w(n); + NRMat > u(n,n),v(n,n); + +#ifdef debugmf +NRMat > a0=a; +#endif + + gdiagonalize(a, w, &u, &v, false,n,0,false,NULL,NULL);//a gets destroyed, eigenvectors are rows + NRVec< complex > z = diagofproduct(u, v, 1, 1); + +#ifdef debugmf +std::cout <<"TEST matrixfunction\n"< > wz(n); +for (int i=0; i > r(n, n); + r.gemm(0.0, v, 'c', u, 'n', 1.0); + return (NRMat) r; //convert back to real if applicable by the explicit decomplexifying constructor; it is NOT checked to which accuracy the imaginary part is actually zero +} + + + + +extern complex sqrtinv(const complex &); +extern double sqrtinv(const double); + +//functions on matrices +inline NRMat sqrt(const NRSMat &a) { return matrixfunction(a,&std::sqrt); } +inline NRMat sqrtinv(const NRSMat &a) { return matrixfunction(a,&sqrtinv); } +inline NRMat realsqrt(const NRMat &a) { return realmatrixfunction(a,&std::sqrt); } +inline NRMat realsqrtinv(const NRMat &a) { return realmatrixfunction(a,&sqrtinv); } +inline NRMat log(const NRSMat &a) { return matrixfunction(a,&std::log); } +extern NRMat log(const NRMat &a); +extern NRMat > log(const NRMat > &a); +extern NRMat > exp0(const NRMat > &a); +extern NRMat > copytest(const NRMat > &a); +extern NRMat copytest(const NRMat &a); +extern NRMat exp0(const NRMat &a); + + }//namespace #endif diff --git a/smat.cc b/smat.cc index dac6aba..e8b62f4 100644 --- a/smat.cc +++ b/smat.cc @@ -58,7 +58,7 @@ void NRSMat::put(int fd, bool dim, bool transp) const { if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); } - LA_traits::multiput(NN2,fd,v,dim); + LA_traits::multiput((size_t)nn*(nn+1)/2,fd,v,dim); } /***************************************************************************//** @@ -89,7 +89,7 @@ void NRSMat::get(int fd, bool dim, bool transp) { }else{ copyonwrite(); } - LA_traits::multiget(NN2,fd,v,dim); + LA_traits::multiget((size_t)nn*(nn+1)/2,fd,v,dim); } diff --git a/smat.h b/smat.h index 5f94360..1427e18 100644 --- a/smat.h +++ b/smat.h @@ -159,6 +159,7 @@ public: void clear() {copyonwrite(); LA_traits::clear(v,NN2);}; //zero out void resize(const int n); + void dealloc(void) {resize(0);} inline operator T*(); inline operator const T*() const; diff --git a/sparsemat.cc b/sparsemat.cc index 33ef645..cf5651f 100644 --- a/sparsemat.cc +++ b/sparsemat.cc @@ -1245,6 +1245,9 @@ return *this; +/* + Commented out by Roman for ICC + #define INSTANTIZE(T) \ template SparseMat & SparseMat::oplusequal(const SparseMat &rhs);\ template SparseMat & SparseMat::oplusequal(const NRMat &rhs);\ @@ -1291,9 +1294,8 @@ template void SparseMat::permuteindices(const NRVec &p);\ INSTANTIZE(double) - INSTANTIZE(complex) //some functions are not OK for hermitean matrices, needs a revision!!! - +*/ ////////////////////////////////////////////////////////////////////////////// //// forced instantization in the corresponding object file diff --git a/sparsemat.h b/sparsemat.h index ece4a04..b0b7a84 100644 --- a/sparsemat.h +++ b/sparsemat.h @@ -120,6 +120,7 @@ public: void get(int fd, bool dimensions=1, bool transposed=false); void put(int fd, bool dimensions=1, bool transposed=false) const; void resize(const SPMatindex n, const SPMatindex m); //destructive + void dealloc(void) {resize(0,0);} void incsize(const SPMatindex n, const SPMatindex m); //increase size without destroying the data void transposeme(); const SparseMat transpose() const; diff --git a/sparsesmat.cc b/sparsesmat.cc index bff719a..586e3ed 100644 --- a/sparsesmat.cc +++ b/sparsesmat.cc @@ -257,6 +257,36 @@ if(divide) return divide?NULL:&r[0]; } +template +SparseSMat SparseSMat::submatrix(const int fromrow, const int torow, const int fromcol, const int tocol) const +{ +#ifdef DEBUG + if(fromrow<0 || fromrow>=nn|| torow<0 || torow>=nn || fromcol<0 || fromcol>=mm || tocol<0 || tocol>=mm || fromrow>torow || fromcol>tocol){ + laerror("invalid submatrix specification"); + } +#endif + const int m = tocol - fromcol + 1; + const int n = torow - fromrow + 1; + SparseSMat result(n, m); + typename SparseSMat::iterator p(*this); + for(; p.notend(); ++p) + if(p->row>=fromrow && p->row<= torow && p->col >= fromcol && p->col <= tocol) + result.add(p->row-fromrow, p->col-fromcol, p->elem, false); + +return result; +} + +template +void SparseSMat::storesubmatrix(const int fromrow, const int fromcol, const SparseSMat &rhs) +{ + const int tocol = fromcol + rhs.ncols() - 1; + const int torow = fromrow + rhs.nrows() - 1; +#ifdef DEBUG + if(fromrow<0 || fromrow>=nn || torow>=nn || fromcol<0 || fromcol>=mm || tocol>=mm) laerror("bad indices in storesubmatrix"); +#endif + typename SparseSMat::iterator p(rhs); + for(; p.notend(); ++p) add(p->row+fromrow, p->col+fromcol, p->elem, false); +} template @@ -305,6 +335,7 @@ void SparseSMat::put(int fd, bool dimen, bool transp) const { +/* Commented out by Roman for ICC #define INSTANTIZE(T) \ template void SparseSMat::gemm(const T beta, const SparseSMat &a, const char transa, const SparseSMat &b, const char transb, const T alpha); \ @@ -321,8 +352,8 @@ template void SparseSMat::put(int fd, bool dimen, bool transp) const; \ INSTANTIZE(double) - INSTANTIZE(complex) +*/ //// forced instantization of functions in the header in the corresponding object file template class SparseSMat; diff --git a/sparsesmat.h b/sparsesmat.h index 6a98609..7ac6bf5 100644 --- a/sparsesmat.h +++ b/sparsesmat.h @@ -61,9 +61,11 @@ public: explicit SparseSMat(const SparseMat &rhs); explicit SparseSMat(const NRSMat &rhs); explicit SparseSMat(const NRMat &rhs); + explicit SparseSMat(const CSRMat &rhs); SparseSMat & operator=(const SparseSMat &rhs); void copyonwrite(); void resize(const SPMatindex nn, const SPMatindex mm); + void dealloc(void) {resize(0,0);} inline void setcoldim(int i) {mm=(SPMatindex)i;}; //std::map *line(SPMatindex n) const {return v[n];}; typedef std::map *ROWTYPE; @@ -100,6 +102,8 @@ public: int nrows() const {return nn;} int ncols() const {return mm;} SparseSMat cholesky(void) const; + SparseSMat submatrix(const int fromrow, const int torow, const int fromcol, const int tocol) const; + void storesubmatrix(const int fromrow, const int fromcol, const SparseSMat &rhs); class iterator {//not efficient, just for output to ostreams private: diff --git a/t.cc b/t.cc index c808fb5..915a989 100644 --- a/t.cc +++ b/t.cc @@ -62,9 +62,10 @@ NRVec x(1.,10); NRVec y(2.,10); NRVec z(-2.,10); -cout.setf(ios::scientific); -//cc:cout.setf(ios::fixed); -cout.precision(12); +//cout.setf(ios::scientific); +cc:cout.setf(ios::fixed); +cout.precision(10); +cin.exceptions ( ifstream::eofbit | ifstream::failbit | ifstream::badbit ); if(0) test(x); @@ -415,6 +416,50 @@ cout < a; +cin >>a; +int n=a.nrows(); +NRMat > u(n,n),v(n,n); +NRVec >w(n); +gdiagonalize(a,w,&u,&v,0,n,0,1); +cout < >z=diagofproduct(u,v,1,1); +cout < > a; +cin >>a; +int n=a.nrows(); +NRMat > u(n,n),v(n,n); +NRVec >w(n); +gdiagonalize(a,w,&u,&v,0,n,0,1); +cout < >z=diagofproduct(u,v,1,1); +cout < a; +NRMat aa,bb,cc; +cin >>aa; +cc=copytest(aa); +cout < > a,b,c; +a=complexify(aa); +c=copytest(a); +cout < > a,b,c; +cin>>a; +c=copytest(a); +cout < a,b,c; cin >>a; +c=copytest(a); +cout < a; NRMat b=exp(a); NRMat c=log(b); cout < Y = tmp; NRMat S = vr.transpose() * vr; +cout <<"test S\n"< tmp2 = S * tmp; +cout <<"test tmp2\n"< numX = inverse(vr) * vrd; +NRMat vri = inverse(vr); + +NRMat numX = vri * vrd; cout <<" numerical X matrix \n"<< numX; cout <<" numerical X matrix test = "<< (vr * numX - vrd).norm()< hd(100); +int n; +cin >>n; +NRSMat hd(n); hd.randomize(1); SparseSMat h(hd); NRMat rd = hd*hd; @@ -1425,14 +1522,22 @@ NRMat r2(rx); cout <<"Error = "<<(r2-rd).norm()< > h; -cin>>h; -h *= complex(0,1); +SparseSMat h0; +cin>>h0; +cout <<"matrix read\n"; cout.flush(); +SparseSMat h1 = h0; //.submatrix(0,2047,0,2047); +SparseSMat > h = imagmatrix(h1); double t=clock()/((double) (CLOCKS_PER_SEC)); -SparseSMat > r = exp(h); -cout <<"SparseSMat time "< > r = h*h; +cout <<"SparseSMat mult time "< > h3(h); @@ -1443,6 +1548,7 @@ cout <<"errorx = "<<(r2-NRSMat >(r)).norm()< h1 = h0; +CSRMat > h = imagmatrix(h1); +double t=clock()/((double) (CLOCKS_PER_SEC)); +CSRMat > r = h*h; +cout <<"CSRMat mult time "< > h2(h); +NRMat >r2 = exp(h2); +cout <<"error = "<<(r2-NRMat >(r)).norm()< >::otimes(const NRVec > &b, const bool con if(conj){ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag()); - cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), 1); + cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn); TEST_CUBLAS("cublasZgerc"); result.conjugateme(); }else{ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag()); - cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), 1); + cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn); TEST_CUBLAS("cublasZgeru"); } } @@ -839,6 +839,9 @@ NRVec > complexify(const NRVec &rhs) { /***************************************************************************//** * forced instantization in the corespoding object file ******************************************************************************/ +/* + Commented out by Roman for ICC + #define INSTANTIZE(T) \ template void NRVec::put(int fd, bool dim, bool transp) const; \ template void NRVec::get(int fd, bool dim, bool transp); \ @@ -855,19 +858,7 @@ INSTANTIZE(unsigned short) INSTANTIZE(unsigned int) INSTANTIZE(unsigned long) INSTANTIZE(unsigned long long) - -template class NRVec; -template class NRVec >; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; -template class NRVec; +*/ #define INSTANTIZE_DUMMY(T) \ template<> void NRVec::gemv(const T beta, const NRMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ @@ -878,6 +869,11 @@ template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, template<> NRVec & NRVec::normalize(LA_traits::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \ template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat ();} +// Roman +// following gemv are not implemented +template<> void NRVec::gemv(const double beta, const SparseMat &a, const char trans, const double alpha, const NRVec &x, bool s) { laerror("gemv on unsupported types"); } +template<> void NRVec< complex >::gemv(const complex beta, const SparseMat< complex > &a, const char trans, const complex alpha, const NRVec< complex > &x, bool s) { laerror("gemv on unsupported types"); } + INSTANTIZE_DUMMY(char) INSTANTIZE_DUMMY(short) @@ -902,4 +898,17 @@ INSTANTIZE_DUMMY(complex) INSTANTIZE_DUMMY(complex >) INSTANTIZE_DUMMY(complex >) +template class NRVec; +template class NRVec >; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; +template class NRVec; + }//namespace diff --git a/vec.h b/vec.h index e488f64..b40ae79 100644 --- a/vec.h +++ b/vec.h @@ -287,6 +287,9 @@ public: //! resize the current vector void resize(const int n); + //!deallocate the current vector + void dealloc(void) {resize(0);} + //! determine the norm of this vector inline const typename LA_traits::normtype norm() const;