From 07c12d689636fe2908751baa2fe12437e794ec3e Mon Sep 17 00:00:00 2001 From: jiri Date: Thu, 8 Oct 2009 14:01:15 +0000 Subject: [PATCH] *** empty log message *** --- Makefile.am | 5 +- configure.ac | 1 + davidson.h | 2 +- la.h | 1 + la_traits.h | 78 ++++++++++++++++-- laerror.cc | 3 + mat.cc | 30 +++++++ matexp.h | 18 ++--- nonclass.cc | 9 +-- nonclass.h | 4 +- smat.cc | 18 +++++ sparsemat.cc | 62 +++++++-------- strassen.cc | 3 + t.cc | 38 ++++++++- vec.cc | 218 ++++++++++++++++----------------------------------- 15 files changed, 272 insertions(+), 218 deletions(-) diff --git a/Makefile.am b/Makefile.am index f410a78..dde7dc9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -12,11 +12,12 @@ EXTRA_DIST = LICENSE CXXFLAGS += -fno-omit-frame-pointer -O3 -g -fPIC -finline-limit=1000 CXXFLAGS += -DNO_STRASSEN -DFORTRAN_ +#remove this for production code efficiency CXXFLAGS += -DDEBUG -LDFLAGS += $(CBLASOPT) $(CLAPACKOPT) +CXXFLAGS += $(CBLASOPT) $(CLAPACKOPT) LDFLAGS += $(CBLASLIB) -LDFLAGS += $(TRACEBACKOPT) +CXXFLAGS += $(TRACEBACKOPT) LDFLAGS += $(TRACEBACKLIB) diff --git a/configure.ac b/configure.ac index cec29cd..1bc45b2 100644 --- a/configure.ac +++ b/configure.ac @@ -10,6 +10,7 @@ AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AC_PROG_CXX AC_PROG_CC AC_PROG_RANLIB +AC_LANG(C++) # Checks for libraries. AC_CHECK_LIB([blas], [dgemm_]) diff --git a/davidson.h b/davidson.h index 0e4e9d4..20c0a36 100644 --- a/davidson.h +++ b/davidson.h @@ -31,7 +31,7 @@ //therefore the whole implementation must be a template in a header //Note that for efficiency in a direct CI case the diagonalof() should cache its result -export template +template extern void davidson(const Matrix &bigmat, NRVec &eivals, NRVec *eivecs, const char *eivecsfile, int nroots=1, const bool verbose=0, const double eps=1e-6, const bool incore=1, int maxit=100, const int maxkrylov = 500); diff --git a/la.h b/la.h index 49a7119..afa431c 100644 --- a/la.h +++ b/la.h @@ -19,6 +19,7 @@ #define _LA_H_ //this should be the single include file for the end user +// #include "vec.h" #include "smat.h" diff --git a/la_traits.h b/la_traits.h index 229907a..2330ecb 100644 --- a/la_traits.h +++ b/la_traits.h @@ -15,6 +15,11 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ +// +//for autotools +// +#include "config.h" + //////////////////////////////////////////////////////////////////////////// //LA traits classes and generally needed includes @@ -22,6 +27,8 @@ #define _LA_TRAITS_INCL +extern bool _LA_count_check; + using namespace std; #include @@ -38,6 +45,14 @@ extern "C" { } #endif +#ifdef NONCLAPACK +#include "noncblas.h" +#else +extern "C" { +#include "clapack.h" +} +#endif + //forward declarations template class NRVec; @@ -47,6 +62,44 @@ template class NRSMat; template class NRSMat_from1; template class SparseMat; + +typedef class {} Dummy_type; +typedef class {} Dummy_type2; + +//for components of complex numbers +// +template +struct LA_traits_complex + { + typedef Dummy_type Component_type; + typedef Dummy_type NRVec_Noncomplex_type; + typedef Dummy_type NRMat_Noncomplex_type; + typedef Dummy_type2 NRSMat_Noncomplex_type; + }; + +#define SPECIALIZE_COMPLEX(T) \ +template<> \ +struct LA_traits_complex > \ + { \ + typedef T Component_type; \ + typedef NRVec NRVec_Noncomplex_type; \ + typedef NRMat NRMat_Noncomplex_type; \ + typedef NRSMat NRSMat_Noncomplex_type; \ + }; + + +SPECIALIZE_COMPLEX(double) +SPECIALIZE_COMPLEX(complex) +SPECIALIZE_COMPLEX(float) +SPECIALIZE_COMPLEX(complex) +SPECIALIZE_COMPLEX(char) +SPECIALIZE_COMPLEX(unsigned char) +SPECIALIZE_COMPLEX(short) +SPECIALIZE_COMPLEX(int) +SPECIALIZE_COMPLEX(unsigned int) +SPECIALIZE_COMPLEX(unsigned long) + + //for general sortable classes template struct LA_sort_traits; @@ -96,7 +149,12 @@ class isscalar { public: typedef scalar_false scalar_type;}; //specializations #define SCALAR(X) \ template<>\ -class isscalar {public: typedef scalar_true scalar_type;}; +class isscalar {public: typedef scalar_true scalar_type;};\ +template<>\ +class isscalar > {public: typedef scalar_true scalar_type;};\ +template<>\ +class isscalar > > {public: typedef scalar_true scalar_type;};\ + //declare what is scalar SCALAR(char) @@ -111,21 +169,24 @@ SCALAR(unsigned long) SCALAR(unsigned long long) SCALAR(float) SCALAR(double) -SCALAR(complex) -SCALAR(complex) SCALAR(void *) #undef SCALAR -//now declare the traits for scalars and for composed classes -//NOTE! methods in traits classes have to be declared static, -//since the class itself is never instantiated. -//for performance, it can be also inlined at the same time -template struct LA_traits_aux {}; +//declare this generically as traits for any unknown class +template struct LA_traits_aux + { + typedef Dummy_type normtype; + }; //TRAITS SPECIALIZATIONS +////now declare the traits for scalars and for composed classes +////NOTE! methods in traits classes have to be declared static, +////since the class itself is never instantiated. +////for performance, it can be also inlined at the same time +// //complex scalars template @@ -226,6 +287,7 @@ static void copyonwrite(X &x) {x.copyonwrite();} \ generate_traits_smat(NRSMat) generate_traits_smat(NRSMat_from1) + //the final traits class template struct LA_traits : LA_traits_aux::scalar_type> {}; diff --git a/laerror.cc b/laerror.cc index 5d3e0b7..ba130c1 100644 --- a/laerror.cc +++ b/laerror.cc @@ -30,6 +30,9 @@ #include "traceback.h" #endif +bool _LA_count_check=true; + +extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage void laerror(const char *s1) { diff --git a/mat.cc b/mat.cc index f393596..98e6757 100644 --- a/mat.cc +++ b/mat.cc @@ -369,6 +369,25 @@ if(n==0) n=nn; return *this; } + +//complex from real +template<> +NRMat >::NRMat(const NRMat &rhs, bool imagpart) +: nn(rhs.nrows()), mm(rhs.ncols()), count(new int(1)) +{ +#ifdef MATPTR + v = new complex*[n]; + v[0] = new complex[mm*nn]; + for (int i=1; i)); + cblas_dcopy(nn*mm,&rhs[0][0],1,((double *)v[0]) + (imagpart?1:0),2); +#else + v = new complex[mm*nn]; + memset(v, 0, nn*mm*sizeof(complex)); + cblas_dcopy(nn*mm,&rhs[0][0],1,((double *)v) + (imagpart?1:0),2); +#endif +} + // Output of Mat template void NRMat::fprintf(FILE *file, const char *format, const int modulo) const @@ -501,6 +520,17 @@ for(int i=0; i +void NRMat >::randomize(const double &x) +{ +for(int i=0; i @@ -184,7 +176,7 @@ return int(ceil(log(n)/log2-log(.75))); template -NRVec exp_aux(const T &x, int &power,int maxpower= -1, int maxtaylor= -1) +NRVec exp_aux(const T &x, int &power,int maxpower= -1, int maxtaylor= -1, C prescale=1.) { //should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc static double exptaylor[]={ @@ -210,7 +202,7 @@ static double exptaylor[]={ 8.2206352466243294955e-18, 4.1103176233121648441e-19, 0.}; -double mnorm= x.norm(); +double mnorm= x.norm() * abs(prescale); power=nextpow2(mnorm); if(maxpower>=0 && power>maxpower) power=maxpower; double scale=exp(-log(2.)*power); @@ -268,14 +260,14 @@ return r; //and probably not efficient either template -void exptimesdestructive(const M &mat, V &result, V &rhs, bool transpose=false, const double scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication +void exptimesdestructive(const M &mat, V &result, V &rhs, bool transpose=false, const typename LA_traits::elementtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication { if(mat_is_0) {result = rhs; LA_traits::copyonwrite(result); return;} //prevent returning a shallow copy of rhs if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in exptimes"); int power; //prepare the polynom of and effectively scale the matrix -NRVec::elementtype> taylor2=exp_aux::elementtype>(mat,power,maxpower,maxtaylor); +NRVec::elementtype> taylor2=exp_aux::elementtype>(mat,power,maxpower,maxtaylor,scale); V tmp; @@ -298,7 +290,7 @@ return; template -const V exptimes(const M &mat, V rhs, bool transpose=false, const double scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false ) +const V exptimes(const M &mat, V rhs, bool transpose=false, const typename LA_traits::elementtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false ) { V result; exptimesdestructive(mat,result,rhs,transpose,scale,maxpower,maxtaylor,mat_is_0); diff --git a/nonclass.cc b/nonclass.cc index 340db6a..3a5aee3 100644 --- a/nonclass.cc +++ b/nonclass.cc @@ -16,15 +16,8 @@ along with this program. If not, see . */ -#ifndef NONCBLAS -extern "C" { -#include "cblas.h" -#include "clapack.h" -} -#else +//this can be safely included since it contains ifdefs NONCBLAS and NONCLAPACK anyway #include "noncblas.h" -#endif - #include "vec.h" #include "smat.h" #include "mat.h" diff --git a/nonclass.h b/nonclass.h index aa76a9e..f7caa32 100644 --- a/nonclass.h +++ b/nonclass.h @@ -25,7 +25,7 @@ //MISC -export template +template const NRSMat twoside_transform(const NRSMat &S, const NRMat &C, bool transp=0) //calculate C^dagger S C { if(transp) @@ -44,7 +44,7 @@ return NRSMat(result); -export template +template const NRMat diagonalmatrix(const NRVec &x) { int n=x.size(); diff --git a/smat.cc b/smat.cc index 1640144..bdc6788 100644 --- a/smat.cc +++ b/smat.cc @@ -138,6 +138,14 @@ void NRSMat::randomize(const double &x) for(int i=0; i +void NRSMat >::randomize(const double &x) +{ +for(int i=0; i >::axpy(const complex alpha, cblas_zaxpy(nn, (void *)(&alpha), (void *)x.v, 1, (void *)v, 1); } +//complex from real +template<> +NRSMat >::NRSMat(const NRSMat &rhs, bool imagpart) +: nn(rhs.nrows()), v(new complex[rhs.nrows()*(rhs.nrows()+1)/2]), count(new int(1)) +{ +memset(v,0,nn*(nn+1)/2*sizeof(complex)); +cblas_dcopy(nn*(nn+1)/2,&rhs(0,0),1,((double *)v) + (imagpart?1:0),2); +} + + ////////////////////////////////////////////////////////////////////////////// diff --git a/sparsemat.cc b/sparsemat.cc index 79a4d07..57c1ae2 100644 --- a/sparsemat.cc +++ b/sparsemat.cc @@ -43,7 +43,7 @@ extern ssize_t write(int, const void *, size_t); } -export template +template void SparseMat::get(int fd, bool dimen, bool transp) { errno=0; @@ -75,7 +75,7 @@ list=l; } -export template +template void SparseMat::put(int fd,bool dimen, bool transp) const { errno=0; @@ -103,7 +103,7 @@ if(2*sizeof(SPMatindex) != write(fd,&sentinel,2*sizeof(SPMatindex))) laerror("ca //helpers to be used from different functions -export template +template void SparseMat::unsort() { if(symmetric) colsorted=NULL; @@ -113,7 +113,7 @@ colsorted=rowsorted=NULL; nonzero=0; } -export template +template void SparseMat::deletelist() { if(colsorted||rowsorted) unsort();//prevent obsolete pointers @@ -131,7 +131,7 @@ count=NULL; } //no checks, not to be public -export template +template void SparseMat::copylist(const matel *l) { list=NULL; @@ -142,7 +142,7 @@ while(l) } } -export template +template void SparseMat::copyonwrite() { if(!count) laerror("probably an assignment to undefined sparse matrix"); @@ -241,7 +241,7 @@ else } -export template +template unsigned int SparseMat::length() const { if(nonzero) return nonzero; @@ -258,7 +258,7 @@ return n; } -export template +template unsigned int SparseMat::sort(int type) const //must be const since used from operator* which must be const to be compatible with other stuff, dirty casts here { if(type==0&&rowsorted || type==1&&colsorted) return nonzero; @@ -295,7 +295,7 @@ return nonzero; //number of (in principle) nonzero elements } -export template +template void SparseMat::simplify() { unsigned int n; @@ -359,7 +359,7 @@ unsort(); //since there were NULLs introduced, rowsorted is not dense } -export template +template void SparseMat::resize(const SPMatindex n, const SPMatindex m) { unsort(); @@ -378,7 +378,7 @@ void SparseMat::resize(const SPMatindex n, const SPMatindex m) colsorted=rowsorted=NULL; } -export template +template void SparseMat::incsize(const SPMatindex n, const SPMatindex m) { if(symmetric && n!=m) laerror("unsymmetric size increment of a symmetric sparsemat"); @@ -391,7 +391,7 @@ void SparseMat::incsize(const SPMatindex n, const SPMatindex m) -export template +template void SparseMat::addsafe(const SPMatindex n, const SPMatindex m, const T elem) { #ifdef debug @@ -409,7 +409,7 @@ add(n,m,elem); //assignment operator -export template +template SparseMat & SparseMat::operator=(const SparseMat &rhs) { if (this != &rhs) @@ -427,7 +427,7 @@ SparseMat & SparseMat::operator=(const SparseMat &rhs) return *this; } -export template +template SparseMat & SparseMat::join(SparseMat &rhs) { if(symmetric!=rhs.symmetric||nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible matrices in join()"); @@ -442,7 +442,7 @@ return *this; } -export template +template SparseMat & SparseMat::addtriangle(const SparseMat &rhs, const bool lower, const char sign) { if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for +="); @@ -461,7 +461,7 @@ while(l) return *this; } -export template +template SparseMat & SparseMat::operator+=(const SparseMat &rhs) { if(symmetric&&!rhs.symmetric) laerror("cannot add general to symmetric sparse"); @@ -491,7 +491,7 @@ while(l) return *this; } -export template +template SparseMat & SparseMat::operator-=(const SparseMat &rhs) { if(symmetric&&!rhs.symmetric) laerror("cannot add general to symmetric sparse"); @@ -523,7 +523,7 @@ return *this; //constructor from a dense matrix -export template +template SparseMat::SparseMat(const NRMat &rhs) { nn=rhs.nrows(); @@ -583,7 +583,7 @@ return divide?NULL:&r[0]; //constructor dense matrix from sparse -export template +template NRMat::NRMat(const SparseMat &rhs) { nn=rhs.nrows(); @@ -618,7 +618,7 @@ while(l) //constructor dense symmetric packed matrix from sparse #define nn2 (nn*(nn+1)/2) -export template +template NRSMat::NRSMat(const SparseMat &rhs) { if(!rhs.issymmetric()||rhs.nrows()!=rhs.ncols()) laerror("sparse matrix is not symmetric"); @@ -636,7 +636,7 @@ while(l) #undef nn2 //constructor dense vector from sparse -export template +template NRVec::NRVec(const SparseMat &rhs) { if(rhs.nrows()>1 && rhs.ncols()>1) laerror("cannot construct a vector from a sparse matrix with more than one row/column"); @@ -659,7 +659,7 @@ else while(l) } //assignment of a scalar matrix -export template +template SparseMat & SparseMat::operator=(const T a) { if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix"); @@ -675,7 +675,7 @@ for(i=0;i +template SparseMat & SparseMat::operator+=(const T a) { if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix"); @@ -686,7 +686,7 @@ for(i=0;i +template SparseMat & SparseMat::operator-=(const T a) { if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix"); @@ -700,7 +700,7 @@ return *this; //constructor from a dense symmetric matrix -export template +template SparseMat::SparseMat(const NRSMat &rhs) { nn=rhs.nrows(); @@ -724,7 +724,7 @@ for(i=0;i +template void SparseMat::transposeme() { if(!count) laerror("transposeme on undefined lhs"); @@ -739,7 +739,7 @@ while(l) SWAP(nn,mm); } -export template +template void SparseMat::setunsymmetric() { if(!symmetric) return; @@ -760,7 +760,7 @@ while(l) //include the mirror picture of elements into the list } -export template +template SparseMat & SparseMat::operator*=(const T a) { if(!count) laerror("operator*= on undefined lhs"); @@ -1217,7 +1217,7 @@ simplify(); //direct sum and product -- only partly implemented at the moment -export template +template SparseMat & SparseMat::oplusequal(const NRMat &rhs) { if(issymmetric()) laerror("oplusequal symmetric-unsymmetric"); @@ -1237,7 +1237,7 @@ return *this; -export template +template SparseMat & SparseMat::oplusequal(const NRSMat &rhs) { if(!issymmetric()) laerror("oplusequal symmetric-unsymmetric"); @@ -1257,7 +1257,7 @@ return *this; -export template +template SparseMat & SparseMat::oplusequal(const SparseMat &rhs) { if(symmetric != rhs.symmetric) laerror("incompatible symmetry of sparsemats in oplusequal"); diff --git a/strassen.cc b/strassen.cc index 766e466..fa55468 100644 --- a/strassen.cc +++ b/strassen.cc @@ -27,8 +27,11 @@ extern "C" void fmm(const char c_transa,const char c_transb,const int m,const in double *d_aux,int i_naux); extern "C" void strassen_cutoff(int c, int c1, int c2, int c3); +template<> void NRMat::s_cutoff(const int c, const int c1, const int c2, const int c3) const { strassen_cutoff(c,c1,c2,c3);} + +template<> void NRMat::strassen(const double beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const double alpha) { int l(transa=='n'?a.nn:a.mm); diff --git a/t.cc b/t.cc index 9804623..749c72f 100644 --- a/t.cc +++ b/t.cc @@ -1172,7 +1172,7 @@ cout < v(3); +v[0]=1; v[1]=2; v[2]=3; +NRVec > vv = v; +NRVecu(v); +vv += u; +cout < scale; cin >> scale; + +NRMat > h; cin >>h; +NRVec > x(h.nrows()); +NRVec > y,z; + +x.randomize(1.); + +y=exptimes(h*scale,x,false,1.); +z=exptimes(h,x,false,scale); + +cout <::randomize(const double &x) for(int i=0; i +void NRVec >::randomize(const double &x) +{ +for(int i=0; i (x*(2.*random()/(1.+RAND_MAX) -1.),x*(2.*random()/(1.+RAND_MAX) -1.)); +} + + + +//complex from real constructor +template<> +NRVec >::NRVec(const NRVec &rhs, bool imagpart) +: nn(rhs.size()), v(new complex[rhs.size()]), count(new int(1)) +{ +memset(v,0,nn*sizeof(complex)); +cblas_dcopy(nn,&rhs[0],1,((double *)v) + (imagpart?1:0),2); +} + // axpy call for T = double (not strided) template<> @@ -287,158 +304,12 @@ template<> NRVec & NRVec::normalize() {laerror("normalize() impossible for integer types"); return *this;} -template<> -void NRVec::gemv(const int beta, - const NRSMat &A, const char trans, - const int alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const short beta, - const NRSMat &A, const char trans, - const short alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const unsigned long beta, - const NRSMat &A, const char trans, - const unsigned long alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const unsigned int beta, - const NRSMat &A, const char trans, - const unsigned int alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - - -template<> -void NRVec::gemv(const unsigned char beta, - const NRSMat &A, const char trans, - const unsigned char alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - - -template<> -void NRVec::gemv(const char beta, - const NRSMat &A, const char trans, - const char alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const int beta, - const NRMat &A, const char trans, - const int alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const short beta, - const NRMat &A, const char trans, - const short alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const unsigned long beta, - const NRMat &A, const char trans, - const unsigned long alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const unsigned int beta, - const NRMat &A, const char trans, - const unsigned int alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - - -template<> -void NRVec::gemv(const unsigned char beta, - const NRMat &A, const char trans, - const unsigned char alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - - - - -template<> -void NRVec::gemv(const char beta, - const NRMat &A, const char trans, - const char alpha, const NRVec &x) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const int beta, - const SparseMat &A, const char trans, - const int alpha, const NRVec &x, bool s) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const short beta, - const SparseMat &A, const char trans, - const short alpha, const NRVec &x, bool s) -{ -laerror("not yet implemented"); -} - - -template<> -void NRVec::gemv(const char beta, - const SparseMat &A, const char trans, - const char alpha, const NRVec &x, bool s) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const unsigned long beta, - const SparseMat &A, const char trans, - const unsigned long alpha, const NRVec &x, bool s) -{ -laerror("not yet implemented"); -} - -template<> -void NRVec::gemv(const unsigned int beta, - const SparseMat &A, const char trans, - const unsigned int alpha, const NRVec &x, bool s) -{ -laerror("not yet implemented"); -} - - - -template<> -void NRVec::gemv(const unsigned char beta, - const SparseMat &A, const char trans, - const unsigned char alpha, const NRVec &x, bool s) -{ -laerror("not yet implemented"); -} +#define INSTANTIZE_DUMMY(T) \ +template<> void NRVec::gemv(const T beta, const NRMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ +template<> void NRVec::gemv(const T beta, const NRSMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ +template<> void NRVec::gemv(const T beta, const SparseMat &a, const char trans, const T alpha, const NRVec &x, bool s) { laerror("gemv on unsupported types"); } \ +template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ +template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRSMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } @@ -457,6 +328,21 @@ void NRVec::gemv(const double beta, const NRMat &A, A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1); } + +template<> +void NRVec >::gemv(const double beta, const NRMat &A, + const char trans, const double alpha, const NRVec > &x) +{ +#ifdef DEBUG + if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) + laerror("incompatible sizes in gemv A*x"); +#endif + copyonwrite(); + cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), (double *)x.v, 2, beta, (double *)v, 2); + cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2); +} + + template<> void NRVec< complex >::gemv(const complex beta, const NRMat< complex > &A, const char trans, @@ -484,6 +370,19 @@ void NRVec::gemv(const double beta, const NRSMat &A, cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1); } +template<> +void NRVec >::gemv(const double beta, const NRSMat &A, + const char trans, const double alpha, const NRVec > &x) +{ +#ifdef DEBUG + if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv A*x"); +#endif + copyonwrite(); + cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, (double *)x.v, 2, beta, (double *)v, 2); + cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, ((double *)x.v)+1, 2, beta, ((double *)v)+1, 2); +} + + template<> void NRVec< complex >::gemv(const complex beta, @@ -543,3 +442,18 @@ template class NRVec; template class NRVec; template class NRVec; + +INSTANTIZE_DUMMY(char) +INSTANTIZE_DUMMY(unsigned char) +INSTANTIZE_DUMMY(short) +INSTANTIZE_DUMMY(int) +INSTANTIZE_DUMMY(unsigned int) +INSTANTIZE_DUMMY(unsigned long) +INSTANTIZE_DUMMY(complex) +INSTANTIZE_DUMMY(complex) +INSTANTIZE_DUMMY(complex) +INSTANTIZE_DUMMY(complex) +INSTANTIZE_DUMMY(complex) +INSTANTIZE_DUMMY(complex >) +INSTANTIZE_DUMMY(complex >) +INSTANTIZE_DUMMY(complex)