commit d7b55e98464705be89312a2d0bf39ccab92d5fa3 Author: jiri Date: Wed Mar 17 03:07:21 2004 +0000 *** empty log message *** diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da8168b --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# CVS default ignores begin +tags +TAGS +.make.state +.nse_depinfo +*~ +\#* +.#* +,* +_$* +*$ +*.old +*.bak +*.BAK +*.orig +*.rej +.del-* +*.a +*.olb +*.o +*.obj +*.so +*.exe +*.Z +*.elc +*.ln +core +# CVS default ignores end diff --git a/fourindex.h b/fourindex.h new file mode 100644 index 0000000..823dbbc --- /dev/null +++ b/fourindex.h @@ -0,0 +1,261 @@ +#ifndef _fourindex_included +#define _fourindex_included + +//element of a linked list, indices in a portable way, no bit shifts and endianity problems any more! + + +template +struct matel4 + { + T elem; + matel4 *next; + typedef union { + I packed[4]; + struct { + I i; + I j; + I k; + I l; + } indiv; + } packedindex; + packedindex index; + }; + +typedef enum {nosymmetry=0, twoelectronreal=1, twoelectroncomplex=2, twobodyantisym=3} fourindexsymtype; //if twoelectron, only permutation-nonequivalent elements are stored + +template +class fourindex { +protected: + I nn; + fourindexsymtype symmetry; + int *count; + matel4 *list; +private: + void deletelist(); + void copylist(const matel4 *l); +public: + //iterator + typedef class iterator { + private: + matel4 *p; + public: + iterator() {}; + ~iterator() {}; + iterator(matel4 *list): p(list) {}; + bool operator==(const iterator rhs) const {return p==rhs.p;} + bool operator!=(const iterator rhs) const {return p!=rhs.p;} + iterator operator++() {return p=p->next;} + iterator operator++(int) {matel4 *q=p; p=p->next; return q;} + matel4 & operator*() const {return *p;} + matel4 * operator->() const {return p;} + }; + iterator begin() const {return list;} + iterator end() const {return NULL;} + + //constructors etc. + inline fourindex() :nn(0),count(NULL),list(NULL) {}; + inline fourindex(const I n) :nn(n),count(new int(1)),list(NULL) {}; + fourindex(const fourindex &rhs); //copy constructor + inline int getcount() const {return count?*count:0;} + fourindex & operator=(const fourindex &rhs); + fourindex & operator+=(const fourindex &rhs); + inline void setsymmetry(fourindexsymtype s) {symmetry=s;} + fourindex & join(fourindex &rhs); //more efficient +=, rhs will be emptied + inline ~fourindex(); + inline matel4 *getlist() const {return list;} + inline I size() const {return nn;} + void resize(const I n); + void copyonwrite(); + int length() const; + inline void add(const I i, const I j, const I k, const I l, const T elem) + {matel4 *ltmp= new matel4; ltmp->next=list; list=ltmp; list->index.indiv.i=i;list->index.indiv.j=j;list->index.indiv.k=k;list->index.indiv.l=l; list->elem=elem;} + + inline void add(const typename matel4::packedindex &index , const T elem) + {matel4 *ltmp= new matel4; ltmp->next=list; list=ltmp; list->index=index; list->elem=elem;} + + inline void add(const I (&index)[4], const T elem) + {matel4 *ltmp= new matel4; ltmp->next=list; list=ltmp; memcpy(&list->index.packed, &index, sizeof(typename matel4::packedindex)); list->elem=elem;} + + + +}; + + +//destructor +template +fourindex::~fourindex() +{ + if(!count) return; + if(--(*count)<=0) + { + deletelist(); + delete count; + } +} + +//copy constructor (sort arrays are not going to be copied) +template +fourindex::fourindex(const fourindex &rhs) +{ +#ifdef debug +if(! &rhs) laerror("fourindex copy constructor with NULL argument"); +#endif + nn=rhs.nn; + if(rhs.list&&!rhs.count) laerror("some inconsistency in fourindex contructors or assignments"); + list=rhs.list; + if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared +} + + + +//assignment operator +template +fourindex & fourindex::operator=(const fourindex &rhs) +{ + if (this != &rhs) + { + if(count) + if(--(*count) ==0) {deletelist(); delete count;} // old stuff obsolete + list=rhs.list; + nn=rhs.nn; + if(list) count=rhs.count; else count= new int(0); //make the matrix defined, but empty and not shared, count will be incremented below + if(count) (*count)++; + } + return *this; +} + + +template +fourindex & fourindex::operator+=(const fourindex &rhs) +{ +if(nn!=rhs.nn) laerror("incompatible dimensions for +="); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); +register matel4 *l=rhs.list; +while(l) + { + add( l->index,l->elem); + l=l->next; + } +return *this; +} + +template +fourindex & fourindex::join(fourindex &rhs) +{ +if(nn!=rhs.nn) laerror("incompatible dimensions for join"); +if(*rhs.count!=1) laerror("shared rhs in join()"); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); +matel4 **last=&list; +while(*last) last= &((*last)->next); +*last=rhs.list; +rhs.list=NULL; +return *this; +} + +template +void fourindex::resize(const I n) +{ + if(n<=0 ) laerror("illegal fourindex dimension"); + if(count) + { + if(*count > 1) {(*count)--; count=NULL; list=NULL;} //detach from previous + else if(*count==1) deletelist(); + } + nn=n; + count=new int(1); //empty but defined matrix + list=NULL; +} + + +template +void fourindex::deletelist() +{ +if(*count >1) laerror("trying to delete shared list"); +matel4 *l=list; +while(l) + { + matel4 *ltmp=l; + l=l->next; + delete ltmp; + } +list=NULL; +delete count; +count=NULL; +} + +template +void fourindex::copylist(const matel4 *l) +{ +list=NULL; +while(l) + { + add(l->index,l->elem); + l=l->next; + } +} + +template +void fourindex::copyonwrite() +{ + if(!count) laerror("probably an assignment to undefined fourindex"); + if(*count > 1) + { + (*count)--; + count = new int; *count=1; + if(!list) laerror("empty list with count>1"); + copylist(list); + } +} + +template +int fourindex::length() const +{ +int n=0; +matel4 *l=list; +while(l) + { + ++n; + l=l->next; + } +return n; +} + + +template +ostream& operator<<(ostream &s, const fourindex &x) + { + int n; + n=x.size(); + s << n << '\n'; + typename fourindex::iterator it=x.begin(); + while(it!=x.end()) + { + s << (int)it->index.indiv.i << ' ' << (int)it->index.indiv.j<< ' ' <<(int)it->index.indiv.k << ' ' << (int)it->index.indiv.l << ' ' << it->elem << '\n'; + ++it; + } + s << "-1 -1 -1 -1\n"; + return s; + } + +template +istream& operator>>(istream &s, fourindex &x) + { + int i,j,k,l; + T elem; + int n; + s >> n ; + x.resize(n); + s >> i >> j >>k >>l; + while(i>=0 && j>=0 &&k>=0 &&l>=0) + { + s>>elem; + x.add(i,j,k,l,elem); + s >> i >> j >>k >>ll; + } + return s; + } + + +#endif /*_fourindex_included*/ diff --git a/la.h b/la.h new file mode 100644 index 0000000..a408c64 --- /dev/null +++ b/la.h @@ -0,0 +1,9 @@ +#ifndef _LA_H_ +#define _LA_H_ + +#include "vec.h" +#include "smat.h" +#include "mat.h" +#include "nonclass.h" + +#endif /* _LA_H_ */ diff --git a/la_traits.h b/la_traits.h new file mode 100644 index 0000000..6f35b50 --- /dev/null +++ b/la_traits.h @@ -0,0 +1,40 @@ +//////////////////////////////////////////////////////////////////////////// +//traits classes + +#ifndef _LA_TRAITS_INCL +#define _LA_TRAITS_INCL + +//default one, good for numbers +template struct NRMat_traits { +typedef C elementtype; +typedef C producttype; +static C norm (const C &x) {return abs(x);} +static void axpy (C &s, const C &x, const C &c) {s+=x*c;} +}; + +//specializations +template<> struct NRMat_traits > { +typedef double elementtype; +typedef NRMat producttype; +static double norm (const NRMat &x) {return x.norm();} +static void axpy (NRMat&s, const NRMat &x, const double c) {s.axpy(c,x);} +}; + +template<> struct NRMat_traits > { +typedef double elementtype; +typedef NRMat producttype; +static const double norm (const NRSMat &x) {return x.norm(0.);} +static void axpy (NRSMat&s, const NRSMat &x, const double c) {s.axpy(c,x);} +}; + + +template<> struct NRMat_traits > > { +typedef complex elementtype; +typedef NRMat > producttype; +static double norm (const NRMat > &x) {return x.norm();} +static void axpy (NRMat >&s, const NRMat > &x, const complex c) {s.axpy(c,x);} +}; + + + +#endif diff --git a/mat.cc b/mat.cc new file mode 100644 index 0000000..10ec8c6 --- /dev/null +++ b/mat.cc @@ -0,0 +1,844 @@ +#include "mat.h" +// TODO : +// + +////////////////////////////////////////////////////////////////////////////// +//// forced instantization in the corresponding object file +template NRMat; +template NRMat< complex >; + + +/* + * Templates first, specializations for BLAS next + */ + +// dtor +template +NRMat::~NRMat() +{ + if (!count) return; + if (--(*count) <= 0) { + if (v) { +#ifdef MATPTR + delete[] (v[0]); +#endif + delete[] v; + } + delete count; + } +} + +// assign NRMat = NRMat +template +NRMat & NRMat::operator=(const NRMat &rhs) +{ + if (this == &rhs) return *this; + if (count) { + if (--(*count) ==0 ) { +#ifdef MATPTR + delete[] (v[0]); +#endif + delete[] v; + delete count; + } + v = rhs.v; + nn = rhs.nn; + mm = rhs.mm; + count = rhs.count; + if (count) (*count)--; + } + return *this; +} + +// Assign diagonal +template +NRMat & NRMat::operator=(const T &a) +{ + copyonwrite(); +#ifdef DEBUG + if (nn != mm) laerror("RMat.operator=scalar on non-square matrix"); +#endif +#ifdef MATPTR + for (int i=0; i< nn; i++) v[i][i] = a; +#else + for (int i=0; i< nn*nn; i+=nn+1) v[i] = a; +#endif + return *this; +} + +// Explicit deep copy of NRmat +template +NRMat & NRMat::operator|=(const NRMat &rhs) +{ + if (this == &rhs) return *this; +#ifdef DEBUG + if (!rhs.v) laerror("unallocated rhs in Mat operator |="); +#endif + if (count) + if (*count > 1) { + --(*count); + nn = 0; + mm = 0; + count = 0; + v = 0; + } + if (nn != rhs.nn || mm != rhs.mm) { + if (v) { +#ifdef MATPTR + delete[] (v[0]); +#endif + delete[] (v); + v = 0; + } + nn = rhs.nn; + mm = rhs.mm; + } + if (!v) { +#ifdef MATPTR + v = new T*[nn]; + v[0] = new T[mm*nn]; +#else + v = new T[mm*nn]; +#endif + } + +#ifdef MATPTR + for (int i=1; i< nn; i++) v[i] = v[i-1] + mm; + memcpy(v[0], rhs.v[0], nn*mm*sizeof(T)); +#else + memcpy(v, rhs.v, nn*mm*sizeof(T)); +#endif + + if (!count) count = new int; + *count = 1; + + return *this; +} + +// M += a +template +NRMat & NRMat::operator+=(const T &a) +{ + copyonwrite(); +#ifdef DEBUG + if (nn != mm) laerror("Mat.operator+=scalar on non-square matrix"); +#endif +#ifdef MATPTR + for (int i=0; i< nn; i++) v[i][i] += a; +#else + for (int i=0; i< nn*nn; i+=nn+1) v[i] += a; +#endif + return *this; +} + +// M -= a +template +NRMat & NRMat::operator-=(const T &a) +{ + copyonwrite(); +#ifdef DEBUG + if (nn != mm) laerror("Mat.operator-=scalar on non-square matrix"); +#endif +#ifdef MATPTR + for (int i=0; i< nn; i++) v[i][i] -= a; +#else + for (int i=0; i< nn*nn; i+=nn+1) v[i] -= a; +#endif + return *this; +} + +// unary minus +template +const NRMat NRMat::operator-() const +{ + NRMat result(nn, mm); +#ifdef MATPTR + for (int i=0; i +const NRMat NRMat::operator&(const NRMat & b) const +{ + NRMat result((T)0, nn+b.nn, mm+b.mm); + for (int i=0; i +const NRMat NRMat::operator|(const NRMat &b) const +{ + NRMat result(nn*b.nn, mm*b.mm); + for (int i=0; i +const NRVec NRMat::csum() const +{ + NRVec result(nn); + T sum; + + for (int i=0; i +const NRVec NRMat::rsum() const +{ + NRVec result(nn); + T sum; + + for (int i=0; i +void NRMat::copyonwrite() +{ +#ifdef DEBUG + if (!count) laerror("Mat::copyonwrite of undefined matrix"); +#endif + if (*count > 1) { + (*count)--; + count = new int; + *count = 1; +#ifdef MATPTR + T **newv = new T*[nn]; + newv[0] = new T[mm*nn]; + memcpy(newv[0], v[0], mm*nn*sizeof(T)); + v = newv; + for (int i=1; i< nn; i++) v[i] = v[i-1] + mm; +#else + T *newv = new T[mm*nn]; + memcpy(newv, v, mm*nn*sizeof(T)); + v = newv; +#endif + } +} + +template +void NRMat::resize(const int n, const int m) +{ +#ifdef DEBUG + if (n<=0 || m<=0) laerror("illegal dimensions in Mat::resize()"); +#endif + if (count) + if (*count > 1) { + (*count)--; + count = 0; + v = 0; + nn = 0; + mm = 0; + } + if (!count) { + count = new int; + *count = 1; + nn = n; + mm = m; +#ifdef MATPTR + v = new T*[nn]; + v[0] = new T[m*n]; + for (int i=1; i< n; i++) v[i] = v[i-1] + m; +#else + v = new T[m*n]; +#endif + return; + } + // At this point *count = 1, check if resize is necessary + if (n!=nn || m!=mm) { + nn = n; + mm = m; +#ifdef MATPTR + delete[] (v[0]); +#endif + delete[] v; +#ifdef MATPTR + v = new T*[nn]; + v[0] = new T[m*n]; + for (int i=1; i< n; i++) v[i] = v[i-1] + m; +#else + v = new T[m*n]; +#endif + } +} + +// transpose Mat +template +NRMat & NRMat::transposeme() +{ +#ifdef DEBUG + if (nn != mm) laerror("transpose of non-square Mat"); +#endif + copyonwrite(); + for(int i=1; i +void NRMat::fprintf(FILE *file, const char *format, const int modulo) const +{ + lawritemat(file, (const T*)(*this), nn, mm, format, 2, modulo, 0); +} + +// Input of Mat +template +void NRMat::fscanf(FILE *f, const char *format) +{ + int n, m; + if (std::fscanf(f, "%d %d", &n, &m) != 2) + laerror("cannot read matrix dimensions in Mat::fscanf()"); + resize(n,m); + T *p = *this; + for(int i=0; i + */ + +// Mat *= a +NRMat & NRMat::operator*=(const double &a) +{ + copyonwrite(); + cblas_dscal(nn*mm, a, *this, 1); + return *this; +} +NRMat< complex > & +NRMat< complex >::operator*=(const complex &a) +{ + copyonwrite(); + cblas_zscal(nn*mm, &a, (void *)(*this)[0], 1); + return *this; +} + +// Mat += Mat +NRMat & NRMat::operator+=(const NRMat &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn || mm!= rhs.mm) + laerror("Mat += Mat of incompatible matrices"); +#endif + copyonwrite(); + cblas_daxpy(nn*mm, 1.0, rhs, 1, *this, 1); + return *this; +} +NRMat< complex > & +NRMat< complex >::operator+=(const NRMat< complex > &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn || mm!= rhs.mm) + laerror("Mat += Mat of incompatible matrices"); +#endif + copyonwrite(); + cblas_zaxpy(nn*mm, &CONE, (void *)rhs[0], 1, (void *)(*this)[0], 1); + return *this; +} + +// Mat -= Mat +NRMat & NRMat::operator-=(const NRMat &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn || mm!= rhs.mm) + laerror("Mat -= Mat of incompatible matrices"); +#endif + copyonwrite(); + cblas_daxpy(nn*mm, -1.0, rhs, 1, *this, 1); + return *this; +} +NRMat< complex > & +NRMat< complex >::operator-=(const NRMat< complex > &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn || mm!= rhs.mm) + laerror("Mat -= Mat of incompatible matrices"); +#endif + copyonwrite(); + cblas_zaxpy(nn*mm, &CMONE, (void *)rhs[0], 1, (void *)(*this)[0], 1); + return *this; +} + +// Mat += SMat +NRMat & NRMat::operator+=(const NRSMat &rhs) +{ +#ifdef DEBUG + if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat"); +#endif + const double *p = rhs; + copyonwrite(); + for (int i=0; i > & +NRMat< complex >::operator+=(const NRSMat< complex > &rhs) +{ +#ifdef DEBUG + if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat"); +#endif + const complex *p = rhs; + copyonwrite(); + for (int i=0; i & NRMat::operator-=(const NRSMat &rhs) +{ +#ifdef DEBUG + if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat"); +#endif + const double *p = rhs; + copyonwrite(); + for (int i=0; i > & +NRMat< complex >::operator-=(const NRSMat< complex > &rhs) +{ +#ifdef DEBUG + if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat"); +#endif + const complex *p = rhs; + copyonwrite(); + for (int i=0; i::dot(const NRMat &rhs) const +{ +#ifdef DEBUG + if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices"); +#endif + return cblas_ddot(nn*mm, (*this)[0], 1, rhs[0], 1); +} +const complex +NRMat< complex >::dot(const NRMat< complex > &rhs) const +{ +#ifdef DEBUG + if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices"); +#endif + complex dot; + cblas_zdotc_sub(nn*mm, (void *)(*this)[0], 1, (void *)rhs[0], 1, + (void *)(&dot)); + return dot; +} + +// Mat * Mat +const NRMat NRMat::operator*(const NRMat &rhs) const +{ +#ifdef DEBUG + if (mm != rhs.nn) laerror("product of incompatible matrices"); +#endif + NRMat result(nn, rhs.mm); + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm, 1.0, + *this, mm, rhs, rhs.mm, 0.0, result, rhs.mm); + return result; +} +const NRMat< complex > +NRMat< complex >::operator*(const NRMat< complex > &rhs) const +{ +#ifdef DEBUG + if (mm != rhs.nn) laerror("product of incompatible matrices"); +#endif + NRMat< complex > result(nn, rhs.mm); + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm, + (const void *)(&CONE),(const void *)(*this)[0], mm, (const void *)rhs[0], + rhs.mm, (const void *)(&CZERO), (void *)result[0], rhs.mm); + return result; +} + +// Multiply by diagonal from L +void NRMat::diagmultl(const NRVec &rhs) +{ +#ifdef DEBUG + if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl"); +#endif + copyonwrite(); + for(int i=0; i >::diagmultl(const NRVec< complex > &rhs) +{ +#ifdef DEBUG + if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl"); +#endif + copyonwrite(); + for (int i=0; i::diagmultr(const NRVec &rhs) +{ +#ifdef DEBUG + if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultr"); +#endif + copyonwrite(); + for (int i=0; i >::diagmultr(const NRVec< complex > &rhs) +{ +#ifdef DEBUG + if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultl"); +#endif + copyonwrite(); + for (int i=0; i +NRMat::operator*(const NRSMat &rhs) const +{ +#ifdef DEBUG + if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat"); +#endif + NRMat result(nn, rhs.ncols()); + for (int i=0; i > +NRMat< complex >::operator*(const NRSMat< complex > &rhs) const +{ +#ifdef DEBUG + if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat"); +#endif + NRMat< complex > result(nn, rhs.ncols()); + for (int i=0; i +NRMat::operator*(const NRVec &vec) const +{ +#ifdef DEBUG + if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec"); +#endif + NRVec result(nn); + cblas_dgemv(CblasRowMajor, CblasNoTrans, nn, mm, 1.0, (*this)[0], + mm, &vec[0], 1, 0.0, &result[0], 1); + return result; +} +const NRVec< complex > +NRMat< complex >::operator*(const NRVec< complex > &vec) const +{ +#ifdef DEBUG + if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec"); +#endif + NRVec< complex > result(nn); + cblas_zgemv(CblasRowMajor, CblasNoTrans, nn, mm, (void *)&CONE, (void *)(*this)[0], + mm, (void *)&vec[0], 1, (void *)&CZERO, (void *)&result[0], 1); + return result; +} + +// sum of rows +const NRVec NRMat::rsum() const +{ + NRVec result(mm); + for (int i=0; i NRMat::csum() const +{ + NRVec result(nn); + for (int i=0; i &NRMat::conjugateme() {return *this;} + +NRMat< complex > & NRMat< complex >::conjugateme() +{ + copyonwrite(); + cblas_dscal(mm*nn, -1.0, (double *)((*this)[0])+1, 2); + return *this; +} + +// transpose and optionally conjugate +const NRMat NRMat::transpose(bool conj) const +{ + NRMat result(mm,nn); + for(int i=0; i > +NRMat< complex >::transpose(bool conj) const +{ + NRMat< complex > result(mm,nn); + for (int i=0; i::gemm(const double &beta, const NRMat &a, + const char transa, const NRMat &b, const char transb, + const double &alpha) +{ + int l(transa=='n'?a.nn:a.mm); + int k(transa=='n'?a.mm:a.nn); + int kk(transb=='n'?b.nn:b.mm); + int ll(transb=='n'?b.mm:b.nn); + +#ifdef DEBUG + if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()"); +#endif + if (alpha==0.0 && beta==1.0) return; + + copyonwrite(); + cblas_dgemm(CblasRowMajor, (transa=='n' ? CblasNoTrans : CblasTrans), + (transb=='n' ? CblasNoTrans : CblasTrans), nn, mm, k, alpha, a, + a.mm, b , b.mm, beta, *this , mm); +} +void NRMat< complex >::gemm(const complex & beta, + const NRMat< complex > & a, const char transa, + const NRMat< complex > & b, const char transb, + const complex & alpha) +{ + int l(transa=='n'?a.nn:a.mm); + int k(transa=='n'?a.mm:a.nn); + int kk(transb=='n'?b.nn:b.mm); + int ll(transb=='n'?b.mm:b.nn); + +#ifdef DEBUG + if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()"); +#endif + if (alpha==CZERO && beta==CONE) return; + + copyonwrite(); + cblas_zgemm(CblasRowMajor, + (transa=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)), + (transb=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)), + nn, mm, k, &alpha, a , a.mm, b , b.mm, &beta, *this , mm); +} + +// norm of Mat +const double NRMat::norm(const double scalar) const +{ + if (!scalar) return cblas_dnrm2(nn*mm, (*this)[0], 1); + double sum = 0; + for (int i=0; i >::norm(const complex scalar) const +{ + if (scalar == CZERO) return cblas_dznrm2(nn*mm, (*this)[0], 1); + double sum = 0; + for (int i=0; i tmp; +#ifdef MATPTR + tmp = v[i][j]; +#else + tmp = v[i*mm+j]; +#endif + if (i==j) tmp -= scalar; + sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag(); + } + return sqrt(sum); +} + +// axpy: this = a * Mat +void NRMat::axpy(const double alpha, const NRMat &mat) +{ +#ifdef DEBUG + if (nn!=mat.nn || mm!=mat.mm) laerror("daxpy of incompatible matrices"); +#endif + copyonwrite(); + cblas_daxpy(nn*mm, alpha, mat, 1, *this, 1); +} +void NRMat< complex >::axpy(const complex alpha, + const NRMat< complex > & mat) +{ +#ifdef DEBUG + if (nn!=mat.nn || mm!=mat.mm) laerror("zaxpy of incompatible matrices"); +#endif + copyonwrite(); + cblas_zaxpy(nn*mm, (void *)&alpha, mat, 1, (void *)(*this)[0], 1); +} + +// trace of Mat +const double NRMat::trace() const +{ +#ifdef DEBUG + if (nn != mm) laerror("no-square matrix in Mat::trace()"); +#endif + return cblas_dasum(nn, (*this)[0], nn+1); +} +const complex NRMat< complex >::trace() const +{ +#ifdef DEBUG + if (nn != mm) laerror("no-square matrix in Mat::trace()"); +#endif + register complex sum = CZERO; + for (int i=0; i &x); \ +template istream & operator>>(istream &s, NRMat< T > &x); \ + +INSTANTIZE(double) +INSTANTIZE(complex) + + +export template +ostream& operator<<(ostream &s, const NRMat &x) + { + int i,j,n,m; + n=x.nrows(); + m=x.ncols(); + s << n << ' ' << m << '\n'; + for(i=0;i +istream& operator>>(istream &s, NRMat &x) + { + int i,j,n,m; + s >> n >> m; + x.resize(n,m); + for(i=0;i>x[i][j] ; + return s; + } + + + + + + + + + + + + + + + + diff --git a/mat.h b/mat.h new file mode 100644 index 0000000..2944f00 --- /dev/null +++ b/mat.h @@ -0,0 +1,346 @@ +#ifndef _LA_MAT_H_ +#define _LA_MAT_H_ + +#include "vec.h" +#include "smat.h" + +template +class NRMat { +protected: + int nn; + int mm; +#ifdef MATPTR + T **v; +#else + T *v; +#endif + int *count; +public: + friend class NRVec; + friend class NRSMat; + + inline NRMat() : nn(0), mm(0), v(0), count(0) {}; + inline NRMat(const int n, const int m); + inline NRMat(const T &a, const int n, const int m); + NRMat(const T *a, const int n, const int m); + inline NRMat(const NRMat &rhs); + explicit NRMat(const NRSMat &rhs); +#ifndef MATPTR + NRMat(const NRVec &rhs, const int n, const int m); +#endif + ~NRMat(); + inline int getcount() const {return count?*count:0;} + NRMat & operator=(const NRMat &rhs); //assignment + NRMat & operator=(const T &a); //assign a to diagonal + NRMat & operator|=(const NRMat &rhs); //assignment to a new copy + NRMat & operator+=(const T &a); //add diagonal + NRMat & operator-=(const T &a); //substract diagonal + NRMat & operator*=(const T &a); //multiply by a scalar + NRMat & operator+=(const NRMat &rhs); + NRMat & operator-=(const NRMat &rhs); + NRMat & operator+=(const NRSMat &rhs); + NRMat & operator-=(const NRSMat &rhs); + const NRMat operator-() const; //unary minus + inline const NRMat operator+(const T &a) const; + inline const NRMat operator-(const T &a) const; + inline const NRMat operator*(const T &a) const; + inline const NRMat operator+(const NRMat &rhs) const; + inline const NRMat operator-(const NRMat &rhs) const; + inline const NRMat operator+(const NRSMat &rhs) const; + inline const NRMat operator-(const NRSMat &rhs) const; + const T dot(const NRMat &rhs) const; // scalar product of Mat.Mat + const NRMat operator*(const NRMat &rhs) const; // Mat * Mat + void diagmultl(const NRVec &rhs); //multiply by a diagonal matrix from L + void diagmultr(const NRVec &rhs); //multiply by a diagonal matrix from R + const NRMat operator*(const NRSMat &rhs) const; // Mat * Smat + const NRMat operator&(const NRMat &rhs) const; // direct sum + const NRMat operator|(const NRMat &rhs) const; // direct product + const NRVec operator*(const NRVec &rhs) const; // Mat * Vec + const NRVec rsum() const; //sum of rows + const NRVec csum() const; //sum of columns + inline T* operator[](const int i); //subscripting: pointer to row i + inline const T* operator[](const int i) const; + inline T& operator()(const int i, const int j); // (i,j) subscripts + inline const T& operator()(const int i, const int j) const; + inline int nrows() const; + inline int ncols() const; + void copyonwrite(); + void resize(const int n, const int m); + inline operator T*(); //get a pointer to the data + inline operator const T*() const; + NRMat & transposeme(); // square matrices only + NRMat & conjugateme(); // square matrices only + const NRMat transpose(bool conj=false) const; + const NRMat conjugate() const; + void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b, + const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this +/* + void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, + const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this + void s_cutoff(const int,const int,const int,const int) const; +*/ + void fprintf(FILE *f, const char *format, const int modulo) const; + void fscanf(FILE *f, const char *format); + const double norm(const T scalar=(T)0) const; + void axpy(const T alpha, const NRMat &x); // this += a*x + inline const T amax() const; + const T trace() const; + +//members concerning sparse matrix + explicit NRMat(const SparseMat &rhs); // dense from sparse + NRMat & operator+=(const SparseMat &rhs); + NRMat & operator-=(const SparseMat &rhs); + inline void simplify() {}; //just for compatibility with sparse ones + +//Strassen's multiplication (better than n^3, analogous syntax to gemm) + void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this + void s_cutoff(const int,const int,const int,const int) const; + +}; + +// ctors +template +NRMat::NRMat(const int n, const int m) : nn(n), mm(m), count(new int) +{ + *count = 1; +#ifdef MATPTR + v = new T*[n]; + v[0] = new T[m*n]; + for (int i=1; i +NRMat::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int) +{ + int i; + T *p; + *count = 1; +#ifdef MATPTR + v = new T*[n]; + p = v[0] = new T[m*n]; + for (int i=1; i +NRMat::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int) +{ + *count = 1; +#ifdef MATPTR + v = new T*[n]; + v[0] = new T[m*n]; + for (int i=1; i +NRMat::NRMat(const NRMat &rhs) +{ + nn = rhs.nn; + mm = rhs.mm; + count = rhs.count; + v = rhs.v; + if (count) ++(*count); +} + +template +NRMat::NRMat(const NRSMat &rhs) +{ + int i; + nn = mm = rhs.nrows(); + count = new int; + *count = 1; +#ifdef MATPTR + v = new T*[nn]; + v[0] = new T[mm*nn]; + for (int i=1; i +NRMat::NRMat(const NRVec &rhs, const int n, const int m) +{ +#ifdef DEBUG + if (n*m != rhs.nn) laerror("matrix dimensions incompatible with vector length"); +#endif + nn = n; + mm = m; + count = rhs.count; + v = rhs.v; + (*count)++; +} +#endif + +// Mat + Smat +template +inline const NRMat NRMat::operator+(const NRSMat &rhs) const +{ + return NRMat(*this) += rhs; +} + +// Mat - Smat +template +inline const NRMat NRMat::operator-(const NRSMat &rhs) const +{ + return NRMat(*this) -= rhs; +} + +// Mat[i] : pointer to the first element of i-th row +template +inline T* NRMat::operator[](const int i) +{ +#ifdef DEBUG + if (*count != 1) laerror("Mat lval use of [] with count > 1"); + if (i<0 || i>=nn) laerror("Mat [] out of range"); + if (!v) laerror("[] for unallocated Mat"); +#endif +#ifdef MATPTR + return v[i]; +#else + return v+i*mm; +#endif +} +template +inline const T* NRMat::operator[](const int i) const +{ +#ifdef DEBUG + if (i<0 || i>=nn) laerror("Mat [] out of range"); + if (!v) laerror("[] for unallocated Mat"); +#endif +#ifdef MATPTR + return v[i]; +#else + return v+i*mm; +#endif +} + +// Mat(i,j) reference to the matrix element M_{ij} +template +inline T & NRMat::operator()(const int i, const int j) +{ +#ifdef DEBUG + if (*count != 1) laerror("Mat lval use of (,) with count > 1"); + if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range"); + if (!v) laerror("(,) for unallocated Mat"); +#endif +#ifdef MATPTR + return v[i][j]; +#else + return v[i*mm+j]; +#endif +} +template +inline const T & NRMat::operator()(const int i, const int j) const +{ +#ifdef DEBUG + if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range"); + if (!v) laerror("(,) for unallocated Mat"); +#endif +#ifdef MATPTR + return v[i][j]; +#else + return v[i*mm+j]; +#endif +} + +// number of rows +template +inline int NRMat::nrows() const +{ + return nn; +} + +// number of columns +template +inline int NRMat::ncols() const +{ + return mm; +} + +// reference pointer to Mat +template +inline NRMat::operator T* () +{ +#ifdef DEBUG + if (!v) laerror("unallocated Mat in operator T*"); +#endif +#ifdef MATPTR + return v[0]; +#else + return v; +#endif +} +template +inline NRMat::operator const T* () const +{ +#ifdef DEBUG + if (!v) laerror("unallocated Mat in operator T*"); +#endif +#ifdef MATPTR + return v[0]; +#else + return v; +#endif +} + +// max element of Mat +inline const double NRMat::amax() const +{ +#ifdef MATPTR + return v[0][cblas_idamax(nn*mm, v[0], 1)]; +#else + return v[cblas_idamax(nn*mm, v, 1)]; +#endif +} +inline const complex NRMat< complex >::amax() const +{ +#ifdef MATPTR + return v[0][cblas_izamax(nn*mm, (void *)v[0], 1)]; +#else + return v[cblas_izamax(nn*mm, (void *)v, 1)]; +#endif +} + + +// I/O +template extern ostream& operator<<(ostream &s, const NRMat &x); +template extern istream& operator>>(istream &s, NRMat &x); + + + + + +// generate operators: Mat + a, a + Mat, Mat * a +NRVECMAT_OPER(Mat,+) +NRVECMAT_OPER(Mat,-) +NRVECMAT_OPER(Mat,*) +// generate Mat + Mat, Mat - Mat +NRVECMAT_OPER2(Mat,+) +NRVECMAT_OPER2(Mat,-) + +#endif /* _LA_MAT_H_ */ diff --git a/matexp.h b/matexp.h new file mode 100644 index 0000000..ffb9bfc --- /dev/null +++ b/matexp.h @@ -0,0 +1,259 @@ +//general routine for polynomial of a matrix, tuned to minimize the number +//of matrix-matrix multiplications on cost of additions and memory +// the polynom and exp routines will work on any type, for which traits class +// is defined containing definition of an element type, norm and axpy operation + +#include "la_traits.h" +#include "sparsemat_traits.h" + +template +const T polynom2(const T &x, const NRVec &c) +{ +int order=c.size()-1; +T z,y; + +//trivial reference implementation by horner scheme +if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one +else + { + int i; + z=x*c[order]; + for(i=order-1; i>=0; i--) + { + if(i +const T polynom(const T &x, const NRVec &c) +{ +int n=c.size()-1; +int i,j,k,m=0,t; + +if(n<=4) return polynom2(x,c); //here the horner scheme is optimal + +//first find m which minimizes the number of multiplications +j=10*n; +for(i=2;i<=n+1;i++) + { + t=i-2+2*(n/i)-(n%i)?0:1; + if(tn) break; + if(j==0) {if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/} + else + NRMat_traits::axpy(s,xpows[j-1],c[k]); //general s+=xpows[j-1]*c[k]; but more efficient for matrices + } + + if(i==0) {r=s; f=xpows[m-1];} + else + { + r+= s*f; + f=f*xpows[m-1]; + } + } + +delete[] xpows; +return r; +} + + +//for general objects +template +const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1) +{ +T z; +if(right) {z=x; while(--nest>=0) z=z*y-y*z;} +else {z=y; while(--nest>=0) z=x*z-z*x;} +return z; +} + +template +const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1) +{ +T z; +if(right) {z=x; while(--nest>=0) z=z*y+y*z;} +else {z=y; while(--nest>=0) z=x*z+z*x;} +return z; +} + +//general BCH expansion (can be written more efficiently in a specialization for matrices) +template +const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=1)\ +{ +T result=h; +double factor=1.; +T z=h; +for(int i=1; i<=n; ++i) + { + factor/=i; + z= z*t-t*z; + if(verbose) cerr << "BCH contribution at order "< +const T ipow( const T &x, int i) +{ +if(i<0) laerror("negative exponent in ipow"); +if(i==0) {T r=x; r=1.; return r;}//trick for matrix dimension +if(i==1) return x; +T y,z; +z=x; +while(!(i&1)) + { + z = z*z; + i >>= 1; + } +y=z; +while((i >>= 1)/*!=0*/) + { + z = z*z; + if(i&1) y = y*z; + } +return y; +} + +inline int nextpow2(const double n) +{ +const double log2=log(2.); +if(n<=.75) return 0; //try to keep the taylor expansion short +if(n<=1.) return 1; +return int(ceil(log(n)/log2-log(.75))); +} + + +template +NRVec::elementtype> exp_aux(const T &x, int &power) +{ +//should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc +static double exptaylor[]={ +1., +1., +0.5, +0.1666666666666666666666, +0.0416666666666666666666, +0.0083333333333333333333, +0.0013888888888888888888, +0.00019841269841269841253, +2.4801587301587301566e-05, +2.7557319223985892511e-06, +2.7557319223985888276e-07, +2.5052108385441720224e-08, +2.0876756987868100187e-09, +1.6059043836821613341e-10, +1.1470745597729724507e-11, +7.6471637318198164055e-13, +4.7794773323873852534e-14, +2.8114572543455205981e-15, +1.5619206968586225271e-16, +8.2206352466243294955e-18, +4.1103176233121648441e-19, +0.}; +double mnorm= NRMat_traits::norm(x); +power=nextpow2(mnorm); +double scale=exp(-log(2.)*power); + + +//find how long taylor expansion will be necessary +const double precision=1e-16; +double s,t; +s=mnorm*scale; +int n=0; +t=1.; +do { + n++; + t*=s; + } +while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case + + +int i; //adjust the coefficients in order to avoid scaling the argument +NRVec::elementtype> taylor2(n+1); +for(i=0,t=1.;i<=n;i++) + { + taylor2[i]=exptaylor[i]*t; + t*=scale; + } +return taylor2; +} + + + +template +const T exp(const T &x) +{ +int power; + +//prepare the polynom of and effectively scale T +NRVec::elementtype> taylor2=exp_aux(x,power); + +T r=polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices + +//power the result back +for(int i=0; i +const typename NRMat_traits::elementtype determinant(MAT a)//again passed by value +{ +typename NRMat_traits::elementtype det; +if(a.nrows()!=a.ncols()) laerror("determinant of non-square matrix"); +linear_solve(a,NULL,&det); +return det; +} + + +template +const V exptimes(const M &mat, V vec) //uses just matrix vector multiplication +{ +if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes"); +int power; +//prepare the polynom of and effectively scale the matrix +NRVec::elementtype> taylor2=exp_aux(mat,power); + +V result(mat.nrows()); +for(int i=1; i<=(1<1) vec=result; //apply again to the result of previous application + //apply polynom of the matrix to the vector iteratively + V y=vec; + result=y*taylor2[0]; + for(int j=1; j) + +template +void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, + int nodim,int modulo, int issym) +{ + int i,j; + const char *f; + + /*print out title before %*/ + f=form0; + skiptext: + while (*f && *f !='%' ) {fputc(*f++,file);} + if (*f=='%' && f[1]=='%') { + fputc(*f,file); f+=2; + goto skiptext; + } + /* this has to be avoided when const arguments should be allowed *f=0; */ + /*use the rest as a format for numbers*/ + + if (modulo) nodim=0; + if (nodim==2) fprintf(file,"%d %d\n",r,c); + if (nodim==1) fprintf(file,"%d\n",c); + if (modulo) { + int n1, n2, l, m; + char ff[32]; + /* prepare integer format for column numbering */ + if (sscanf(f+1,"%d",&l) != 1) l=128/modulo; + l -= 2; + m = l/2; + l = l-m; + sprintf(ff,"%%%ds%%3d%%%ds", l, m); + n1 = 1; + while(n1 <= c) { + n2=n1+modulo-1; + if (n2 > c) n2 = c; + + /*write block between columns n1 and n2 */ + fprintf(file,"\n "); + for (i=n1; i<=n2; i++) fprintf(file,ff," ",i," "); + fprintf(file,"\n\n"); + + for (i=1; i<=r; i++) { + fprintf(file, "%3d ", i); + for (j=n1; j<=n2; j++) { + if(issym) { + int ii,jj; + if (i >= j) { + ii=i; + jj=j; + } else { + ii=j; + jj=i; + } + fprintf(file, f, ((complex)a[ii*(ii+1)/2+jj]).real(), ((complex)a[ii*(ii+1)/2+jj]).imag()); + } else fprintf(file, f, ((complex)a[(i-1)*c+j-1]).real(), ((complex)a[(i-1)*c+j-1]).imag()); + if (j < n2) fputc(' ',file); + } + fprintf(file, "\n"); + } + n1 = n2+1; + } + } else { + for (i=1; i<=r; i++) { + for (j=1; j<=c; j++) { + if (issym) { + int ii,jj; + if (i >= j) { + ii=i; + jj=j; + } else { + ii=j; + jj=i; + } + fprintf(file, f, ((complex)a[ii*(ii+1)/2+jj]).real(), ((complex)a[ii*(ii+1)/2+jj]).imag()); + } else fprintf(file,f,((complex)a[(i-1)*c+j-1]).real(), ((complex)a[(i-1)*c+j-1]).imag()); + putc(j &A, NRMat *B, double *det) +{ + int r, *ipiv; + + if (A.nrows() != A.ncols()) laerror("linear_solve() call for non-square matrix"); + if (B && A.nrows() != B->ncols()) laerror("incompatible matrices in linear_solve()"); + A.copyonwrite(); + if (B) B->copyonwrite(); + ipiv = new int[A.nrows()]; + r = clapack_dgesv(CblasRowMajor, A.nrows(), B ? B->nrows() : 0, A[0], A.ncols(), + ipiv, B ? B[0] : (double *)0, B ? B->ncols() : A.nrows()); + if (r < 0) { + delete[] ipiv; + laerror("illegal argument in lapack_gesv"); + } + if (det && r>=0) { + *det = A[0][0]; + for (int i=1; i0 && B) laerror("singular matrix in lapack_gesv"); +} + + +// Next routines are not available in clapack, fotran ones will b used with an +// additional swap/transpose of outputs when needed + +extern "C" void FORNAME(dspsv)(const char *UPLO, const int *N, const int *NRHS, + double *AP, int *IPIV, double *B, const int *LDB, int *INFO); + +void linear_solve(NRSMat &a, NRMat *b, double *det) +{ + int r, *ipiv; + if (det) cerr << "@@@ sign of the determinant not implemented correctly yet\n"; + if (b && a.nrows() != b->ncols()) + laerror("incompatible matrices in symmetric linear_solve()"); + a.copyonwrite(); + if (b) b->copyonwrite(); + ipiv = new int[a.nrows()]; + char U = 'U'; + int n = a.nrows(); + int nrhs = 0; + if (b) nrhs = b->nrows(); + int ldb = b ? b->ncols() : a.nrows(); + FORNAME(dspsv)(&U, &n, &nrhs, a, ipiv, b?(*b)[0]:0, &ldb,&r); + if (r < 0) { + delete[] ipiv; + laerror("illegal argument in spsv() call of linear_solve()"); + } + if (det && r >= 0) { + *det = a(0,0); + for (int i=1; i 0 && b) laerror("singular matrix in linear_solve(SMat&, Mat*, double*"); +} + + +extern "C" void FORNAME(dsyev)(const char *JOBZ, const char *UPLO, const int *N, + double *A, const int *LDA, double *W, double *WORK, const int *LWORK, int *INFO); + +// a will contain eigenvectors, w eigenvalues +void diagonalize(NRMat &a, NRVec &w, const bool eivec, + const bool corder) +{ + int n = a.nrows(); + if (n != a.ncols()) laerror("diagonalize() call with non-square matrix"); + if (a.nrows() != w.size()) + laerror("inconsistent dimension of eigenvalue vector in diagonalize()"); + + a.copyonwrite(); + w.copyonwrite(); + + int r = 0; + char U ='U'; + char vectors = 'V'; + if (!eivec) vectors = 'N'; + int LWORK = -1; + double WORKX; + + // First call is to determine size of workspace + FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, (double *)&WORKX, &LWORK, &r ); + LWORK = (int)WORKX; + double *WORK = new double[LWORK]; + FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, WORK, &LWORK, &r ); + delete[] WORK; + if (vectors == 'V' && corder) a.transposeme(); + + if (r < 0) laerror("illegal argument in syev() of diagonalize()"); + if (r > 0) laerror("convergence problem in syev() of diagonalize()"); +} + + +extern "C" void FORNAME(dspev)(const char *JOBZ, const char *UPLO, const int *N, + double *AP, double *W, double *Z, const int *LDZ, double *WORK, int *INFO); + +// v will contain eigenvectors, w eigenvalues +void diagonalize(NRSMat &a, NRVec &w, NRMat *v, + const bool corder) +{ + int n = a.nrows(); + if (v) if (v->nrows() != v ->ncols() || n != v->nrows()) + laerror("diagonalize() call with inconsistent dimensions"); + if (n != w.size()) laerror("inconsistent dimension of eigenvalue vector"); + + a.copyonwrite(); + w.copyonwrite(); + + int r = 0; + char U = 'U'; + char job = v ? 'v' : 'n'; + + double *WORK = new double[3*n]; + FORNAME(dspev)(&job, &U, &n, a, w, v?(*v)[0]:(double *)0, &n, WORK, &r ); + delete[] WORK; + if (v && corder) v->transposeme(); + + if (r < 0) laerror("illegal argument in spev() of diagonalize()"); + if (r > 0) laerror("convergence problem in spev() of diagonalize()"); +} + + +extern "C" void FORNAME(dgesvd)(const char *JOBU, const char *JOBVT, const int *M, + const int *N, double *A, const int *LDA, double *S, double *U, const int *LDU, + double *VT, const int *LDVT, double *WORK, const int *LWORK, int *INFO ); + +void singular_decomposition(NRMat &a, NRMat *u, NRVec &s, + NRMat *v, const bool corder) +{ + int m = a.nrows(); + int n = a.ncols(); + if (u) if (m != u->nrows() || m!= u->ncols()) + laerror("inconsistent dimension of U Mat in singular_decomposition()"); + if (s.size() < m && s.size() < n) + laerror("inconsistent dimension of S Vec in singular_decomposition()"); + if (v) if (n != v->nrows() || n != v->ncols()) + laerror("inconsistent dimension of V Mat in singular_decomposition()"); + + a.copyonwrite(); + s.copyonwrite(); + if (u) u->copyonwrite(); + if (v) v->copyonwrite(); + + // C-order (transposed) input and swap u,v matrices, + // v should be transposed at the end + char jobu = u ? 'A' : 'N'; + char jobv = v ? 'A' : 'N'; + double work0; + int lwork = -1; + int r; + FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n, + u?(*u)[0]:0, &m, &work0, &lwork, &r); + lwork = (int) work0; + double *work = new double[lwork]; + FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n, + u?(*u)[0]:0, &m, &work0, &lwork, &r); + delete[] work; + if (v && corder) v->transposeme(); + + if (r < 0) laerror("illegal argument in gesvd() of singular_decomposition()"); + if (r > 0) laerror("convergence problem in gesvd() of ingular_decomposition()"); +} + + +extern "C" void FORNAME(dgeev)(const char *JOBVL, const char *JOBVR, const int *N, + double *A, const int *LDA, double *WR, double *WI, double *VL, const int *LDVL, + double *VR, const int *LDVR, double *WORK, const int *LWORK, int *INFO ); + +void gdiagonalize(NRMat &a, NRVec &wr, NRVec &wi, + NRMat *vl, NRMat *vr, const bool corder) +{ + int n = a.nrows(); + if (n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix"); + if (n != wr.size()) + laerror("inconsistent dimension of eigen vector in gdiagonalize()"); + if (vl) if (n != vl->nrows() || n != vl->ncols()) + laerror("inconsistent dimension of vl in gdiagonalize()"); + if (vr) if (n != vr->nrows() || n != vr->ncols()) + laerror("inconsistent dimension of vr in gdiagonalize()"); + + a.copyonwrite(); + wr.copyonwrite(); + wi.copyonwrite(); + if (vl) vl->copyonwrite(); + if (vr) vr->copyonwrite(); + + char jobvl = vl ? 'V' : 'N'; + char jobvr = vr ? 'V' : 'N'; + double work0; + int lwork = -1; + int r; + FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0, + &n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r); + lwork = (int) work0; + double *work = new double[lwork]; + FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0, + &n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r); + delete[] work; + + if (corder) { + if (vl) vl->transposeme(); + if (vr) vr->transposeme(); + } + + if (r < 0) laerror("illegal argument in geev() of gdiagonalize()"); + if (r > 0) laerror("convergence problem in geev() of gdiagonalize()"); +} + +void gdiagonalize(NRMat &a, NRVec< complex > &w, + NRMat< complex >*vl, NRMat< complex > *vr) +{ + int n = a.nrows(); + if(n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix"); + + NRVec wr(n), wi(n); + NRMat *rvl = 0; + NRMat *rvr = 0; + if (vl) rvl = new NRMat(n, n); + if (vr) rvr = new NRMat(n, n); + gdiagonalize(a, wr, wi, rvl, rvr, 0); + + //process the results into complex matrices + int i; + for (i=0; i(wr[i], wi[i]); + if (rvl || rvr) { + i = 0; + while (i < n) { + if (wi[i] == 0) { + if (vl) for (int j=0; j((*rvl)[i][j], (*rvl)[i+1][j]); + (*vl)[i+1][j] = complex((*rvl)[i][j], -(*rvl)[i+1][j]); + } + if (vr) + for (int j=0; j((*rvr)[i][j], (*rvr)[i+1][j]); + (*vr)[i+1][j] = complex((*rvr)[i][j], -(*rvr)[i+1][j]); + } + i += 2; + } + } + } + if (rvl) delete rvl; + if (rvr) delete rvr; +} + + +const NRMat realpart(const NRMat< complex > &a) +{ + NRMat result(a.nrows(), a.ncols()); + cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0], 2, result, 1); + return result; +} + +const NRMat imagpart(const NRMat< complex > &a) +{ + NRMat result(a.nrows(), a.ncols()); + cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0]+1, 2, result, 1); + return result; +} + +const NRMat< complex > realmatrix (const NRMat &a) +{ + NRMat > result(a.nrows(), a.ncols()); + cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0], 2); + return result; +} + +const NRMat< complex > imagmatrix (const NRMat &a) +{ + NRMat< complex > result(a.nrows(), a.ncols()); + cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0]+1, 2); + return result; +} + + +NRMat matrixfunction(NRMat a, complex + (*f)(const complex &), const bool adjust) +{ + int n = a.nrows(); + NRMat< complex > u(n, n), v(n, n); + NRVec< complex > w(n); + gdiagonalize(a, w, &u, &v); + NRVec< complex > z = diagofproduct(u, v, 1, 1); + + for (int i=0; i > r(n, n); + r.gemm(0.0, v, 'c', u, 'n', 1.0); + double inorm = cblas_dnrm2(n*n, (double *)r[0]+1, 2); + if (inorm > 1e-10) { + cout << "norm = " << inorm << endl; + laerror("nonzero norm of imaginary part of real matrixfunction"); + } + return realpart(r); +} + +NRMat matrixfunction(NRSMat a, double (*f) (double)) +{ + int n = a.nrows(); + NRVec w(n); + NRMat v(n, n); + diagonalize(a, w, &v, 0); + + for (int i=0; i u = v; + v.diagmultl(w); + NRMat r(n, n); + r.gemm(0.0, u, 't', v, 'n', 1.0); + return r; +} + +// instantize template to an addresable function +complex myclog (const complex &x) +{ + return log(x); +} + +NRMat log(const NRMat &a) +{ + return matrixfunction(a, &myclog, 1); +} + + +const NRVec diagofproduct(const NRMat &a, const NRMat &b, + bool trb, bool conjb) +{ + if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) || + !trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows())) + laerror("incompatible Mats in diagofproduct()"); + NRVec result(a.nrows()); + if (trb) + for(int i=0; i > diagofproduct(const NRMat< complex > &a, + const NRMat< complex > &b, bool trb, bool conjb) +{ + if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) || + !trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows())) + laerror("incompatible Mats in diagofproduct()"); + NRVec< complex > result(a.nrows()); + if (trb) { + if (conjb) { + for(int i=0; i &a, const NRMat &b, bool trb) +{ + if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) || + !trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows())) + laerror("incompatible Mats in diagofproduct()"); + if (trb) return cblas_ddot(a.nrows()*a.ncols(), a, 1, b, 1); + + double sum = 0.0; + for (int i=0; i &a, const NRSMat &b, + const bool diagscaled) +{ + if (a.nrows() != b.nrows()) laerror("incompatible SMats in trace2()"); + + double r = 2.0*cblas_ddot(a.nrows()*(a.nrows()+1)/2, a, 1, b, 1); + if (diagscaled) return r; + for (int i=0; i extern const NRMat diagonalmatrix(const NRVec &x); +template extern const NRVec lineof(const NRMat &x, const int i); +template extern const NRVec columnof(const NRMat &x, const int i); +template extern const NRVec diagonalof(const NRMat &x); + +//more efficient commutator for a special case of full matrices +template +inline const NRMat commutator ( const NRMat &x, const NRMat &y, const bool trx=0, const bool tryy=0) +{ +NRMat r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols()); +r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1); +r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1); +return r; +} + +//more efficient commutator for a special case of full matrices +template +inline const NRMat anticommutator ( const NRMat &x, const NRMat &y, const bool trx=0, const bool tryy=0) +{ +NRMat r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols()); +r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1); +r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1); +return r; +} + + + + +////////////////////// +// LAPACK interface // +////////////////////// + +#define declare_la(T) \ +extern const NRVec diagofproduct(const NRMat &a, const NRMat &b,\ + bool trb=0, bool conjb=0); \ +extern T trace2(const NRMat &a, const NRMat &b, bool trb=0); \ +extern T trace2(const NRSMat &a, const NRSMat &b, const bool diagscaled=0);\ +extern void linear_solve(NRMat &a, NRMat *b, double *det=0); \ +extern void linear_solve(NRSMat &a, NRMat *b, double *det=0); \ +extern void diagonalize(NRMat &a, NRVec &w, const bool eivec=1,\ + const bool corder=1); \ +extern void diagonalize(NRSMat &a, NRVec &w, NRMat *v, const bool corder=1);\ +extern void singular_decomposition(NRMat &a, NRMat *u, NRVec &s,\ + NRMat *v, const bool corder=1); + +declare_la(double) +declare_la(complex) + +// Separate declarations +extern void gdiagonalize(NRMat &a, NRVec &wr, NRVec &wi, + NRMat *vl, NRMat *vr, const bool corder=1); +extern void gdiagonalize(NRMat &a, NRVec< complex > &w, + NRMat< complex >*vl, NRMat< complex > *vr); +extern NRMat matrixfunction(NRSMat a, double (*f) (double)); +extern NRMat matrixfunction(NRMat a, complex (*f)(const complex &),const bool adjust=0); + +//functions on matrices +inline NRMat sqrt(const NRSMat &a) { return matrixfunction(a,&sqrt); } +inline NRMat log(const NRSMat &a) { return matrixfunction(a,&log); } +extern NRMat log(const NRMat &a); + + +extern const NRMat realpart(const NRMat< complex >&); +extern const NRMat imagpart(const NRMat< complex >&); +extern const NRMat< complex > realmatrix (const NRMat&); +extern const NRMat< complex > imagmatrix (const NRMat&); + +//inverse by means of linear solve, preserving rhs intact +template +const NRMat inverse(NRMat a, T *det=0) +{ +#ifdef DEBUG + if(a.nrows()!=a.ncols()) laerror("inverse() for non-square matrix"); +#endif + NRMat result(a.nrows(),a.nrows()); + result = (T)1.; + linear_solve(a, &result, det); + return result; +} + diff --git a/smat.cc b/smat.cc new file mode 100644 index 0000000..9880023 --- /dev/null +++ b/smat.cc @@ -0,0 +1,399 @@ +#include "smat.h" +// TODO +// specialize unary minus + + +////////////////////////////////////////////////////////////////////////////// +////// forced instantization in the corresponding object file +template NRSMat; +template NRSMat< complex >; + + + +/* + * * Templates first, specializations for BLAS next + * + */ + +// conversion ctor, symmetrize general Mat into SMat +template +NRSMat::NRSMat(const NRMat &rhs) +{ +#ifdef DEBUG + if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat"); +#endif + count = new int; + *count = 1; + v = new T[NN2]; + int i, j, k=0; + for (i=0; i +NRSMat::~NRSMat() +{ + if (!count) return; + if (--(*count) <= 0) { + if (v) delete[] (v); + delete count; + } +} + + +// assignment with a physical copy +template +NRSMat & NRSMat::operator|=(const NRSMat &rhs) +{ + if (this != &rhs) { + if(!rhs.v) laerror("unallocated rhs in NRSMat operator |="); + if(count) + if(*count > 1) { // detach from the other + --(*count); + nn = 0; + count = 0; + v = 0; + } + if (nn != rhs.nn) { + if(v) delete [] (v); + nn = rhs.nn; + } + if (!v) v = new T[NN2]; + if (!count) count = new int; + *count = 1; + memcpy(v, rhs.v, NN2*sizeof(T)); + } + return *this; +} + +// assignment +template +NRSMat & NRSMat::operator=(const NRSMat & rhs) +{ + if (this == & rhs) return *this; + if (count) + if(--(*count) == 0) { + delete [] v; + delete count; + } + v = rhs.v; + nn = rhs.nn; + count = rhs.count; + if (count) (*count)++; + return *this; +} + +// assing to diagonal +template +NRSMat & NRSMat::operator=(const T &a) +{ + copyonwrite(); + for (int i=0; i +const NRSMat NRSMat::operator-() const +{ + NRSMat result(nn); + for(int i=0; i +const T NRSMat::trace() const +{ + T tmp = 0; + for (int i=0; i +void NRSMat::copyonwrite() +{ +#ifdef DEBUG + if (!count) laerror("probably an assignment to undefined Smat"); +#endif + if (*count > 1) { + (*count)--; + count = new int; + *count = 1; + T *newv = new T[NN2]; + memcpy(newv, v, NN2*sizeof(T)); + v = newv; + } +} + +// resize Smat +template +void NRSMat::resize(const int n) +{ +#ifdef DEBUG + if (n <= 0) laerror("illegal matrix dimension in resize of Smat"); +#endif + if (count) + if(*count > 1) { //detach from previous + (*count)--; + count = 0; + v = 0; + nn = 0; + } + if (!count) { //new uninitialized vector or just detached + count = new int; + *count = 1; + nn = n; + v = new T[NN2]; + return; + } + if (n != nn) { + nn = n; + delete[] v; + v = new T[NN2]; + } +} + +// write matrix to the file with specific format +template +void NRSMat::fprintf(FILE *file, const char *format, const int modulo) const +{ + lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1); +} + +// read matrix from the file with specific format +template +void NRSMat::fscanf(FILE *f, const char *format) +{ + int n, m; + if (std::fscanf(f,"%d %d",&n,&m) != 2) + laerror("cannot read matrix dimensions in SMat::fscanf"); + if (n != m) laerror("different dimensions of SMat"); + resize(n); + for (int i=0; i + */ + +// SMat * Mat +const NRMat NRSMat::operator*(const NRMat &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat"); +#endif + NRMat result(nn, rhs.ncols()); + for (int k=0; k > +NRSMat< complex >::operator*(const NRMat< complex > &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat"); +#endif + NRMat< complex > result(nn, rhs.ncols()); + for (int k=0; k NRSMat::operator*(const NRSMat &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat"); +#endif + NRMat result(0.0, nn, nn); + double *p, *q; + + p = v; + for (int i=0; i > +NRSMat< complex >::operator*(const NRSMat< complex > &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat"); +#endif + NRMat< complex > result(0.0, nn, nn); + NRMat< complex > rhsmat(rhs); + result = *this * rhsmat; + return result; +// laerror("complex SMat*Smat not implemented"); +} +// S dot S +const double NRSMat::dot(const NRSMat &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("dot of incompatible SMat's"); +#endif + return cblas_ddot(NN2, v, 1, rhs.v, 1); +} +const complex +NRSMat< complex >::dot(const NRSMat< complex > &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("dot of incompatible SMat's"); +#endif + complex dot; + cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot)); + return dot; +} + +// x = S * x +const NRVec NRSMat::operator*(const NRVec &rhs) const +{ +#ifdef DEBUG + if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec"); +#endif + NRVec result(nn); + cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs, 1, 0.0, result, 1); + return result; +} +const NRVec< complex > +NRSMat< complex >::operator*(const NRVec< complex > &rhs) const +{ +#ifdef DEBUG + if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec"); +#endif + NRVec< complex > result(nn); + cblas_zhpmv(CblasRowMajor, CblasLower, nn, (void *)(&CONE), (void *)v, + (const void *)rhs, 1, (void *)(&CZERO), (void *)result, 1); + return result; +} + +// norm of the matrix +const double NRSMat::norm(const double scalar) const +{ + if (!scalar) return cblas_dnrm2(NN2, v, 1); + double sum = 0; + int k = 0; + for (int i=0; i >::norm(const complex scalar) const +{ + if (!(scalar.real()) && !(scalar.imag())) + return cblas_dznrm2(NN2, (void *)v, 1); + double sum = 0; + complex tmp; + int k = 0; + for (int i=0; i::axpy(const double alpha, const NRSMat & x) +{ +#ifdef DEBUG + if (nn != x.nn) laerror("axpy of incompatible SMats"); +#endif + copyonwrite(); + cblas_daxpy(NN2, alpha, x.v, 1, v, 1); +} +void NRSMat< complex >::axpy(const complex alpha, + const NRSMat< complex > & x) +{ +#ifdef DEBUG + if (nn != x.nn) laerror("axpy of incompatible SMats"); +#endif + copyonwrite(); + cblas_zaxpy(nn, (void *)(&alpha), (void *)x.v, 1, (void *)v, 1); +} + + +export template +ostream& operator<<(ostream &s, const NRSMat &x) + { + int i,j,n; + n=x.nrows(); + s << n << ' ' << n << '\n'; + for(i=0;i +istream& operator>>(istream &s, NRSMat &x) + { + int i,j,n,m; + s >> n >> m; + if(n!=m) laerror("input symmetric matrix not square"); + x.resize(n); + for(i=0;i>x(i,j); + return s; + } + + +////////////////////////////////////////////////////////////////////////////// +//// forced instantization in the corespoding object file +#define INSTANTIZE(T) \ +template ostream & operator<<(ostream &s, const NRSMat< T > &x); \ +template istream & operator>>(istream &s, NRSMat< T > &x); \ + +INSTANTIZE(double) +INSTANTIZE(complex) + diff --git a/smat.h b/smat.h new file mode 100644 index 0000000..7a29511 --- /dev/null +++ b/smat.h @@ -0,0 +1,303 @@ +#ifndef _LA_SMAT_H_ +#define _LA_SMAT_H_ + +#include "vec.h" +#include "mat.h" + +#define NN2 (nn*(nn+1)/2) +template +class NRSMat { // symmetric or complex hermitean matrix in packed form +protected: + int nn; + T *v; + int *count; +public: + friend class NRVec; + friend class NRMat; + + inline NRSMat::NRSMat() : nn(0),v(0),count(0) {}; + inline explicit NRSMat(const int n); // Zero-based array + inline NRSMat(const T &a, const int n); //Initialize to constant + inline NRSMat(const T *a, const int n); // Initialize to array + inline NRSMat(const NRSMat &rhs); // Copy constructor + explicit NRSMat(const NRMat &rhs); // symmetric part of general matrix + explicit NRSMat(const NRVec &rhs, const int n); //construct matrix from vector + NRSMat & operator|=(const NRSMat &rhs); //assignment to a new copy + NRSMat & operator=(const NRSMat &rhs); //assignment + NRSMat & operator=(const T &a); //assign a to diagonal + inline NRSMat & operator*=(const T &a); + inline NRSMat & operator+=(const T &a); + inline NRSMat & operator-=(const T &a); + inline NRSMat & operator+=(const NRSMat &rhs); + inline NRSMat & operator-=(const NRSMat &rhs); + const NRSMat operator-() const; //unary minus + inline int getcount() const {return count?*count:0;} + inline const NRSMat operator*(const T &a) const; + inline const NRSMat operator+(const T &a) const; + inline const NRSMat operator-(const T &a) const; + inline const NRSMat operator+(const NRSMat &rhs) const; + inline const NRSMat operator-(const NRSMat &rhs) const; + inline const NRMat operator+(const NRMat &rhs) const; + inline const NRMat operator-(const NRMat &rhs) const; + const NRMat operator*(const NRSMat &rhs) const; // SMat*SMat + const NRMat operator*(const NRMat &rhs) const; // SMat*Mat + const T dot(const NRSMat &rhs) const; // Smat.Smat + const NRVec operator*(const NRVec &rhs) const; + inline const T& operator[](const int ij) const; + inline T& operator[](const int ij); + inline const T& operator()(const int i, const int j) const; + inline T& operator()(const int i, const int j); + inline int nrows() const; + inline int ncols() const; + const double norm(const T scalar=(T)0) const; + void axpy(const T alpha, const NRSMat &x); // this+= a*x + inline const T amax() const; + const T trace() const; + void copyonwrite(); + void resize(const int n); + inline operator T*(); //get a pointer to the data + inline operator const T*() const; //get a pointer to the data + ~NRSMat(); + void fprintf(FILE *f, const char *format, const int modulo) const; + void fscanf(FILE *f, const char *format); +//members concerning sparse matrix + explicit NRSMat(const SparseMat &rhs); // dense from sparse + inline void simplify() {}; //just for compatibility with sparse ones +}; + +// INLINES +// ctors +template +inline NRSMat::NRSMat(const int n) : nn(n), v(new T[NN2]), + count(new int) {*count = 1;} + +template +inline NRSMat::NRSMat(const T& a, const int n) : nn(n), + v(new T[NN2]), count(new int) +{ + *count =1; + if(a != (T)0) for(int i=0; i +inline NRSMat::NRSMat(const T *a, const int n) : nn(n), + v(new T[NN2]), count(new int) +{ + *count = 1; + memcpy(v, a, NN2*sizeof(T)); +} + +template +inline NRSMat::NRSMat(const NRSMat &rhs) //copy constructor +{ + v = rhs.v; + nn = rhs.nn; + count = rhs.count; + if (count) (*count)++; +} + +template +NRSMat::NRSMat(const NRVec &rhs, const int n) // type conversion +{ + nn = n; +#ifdef DEBUG + if (NN2 != rhs.size()) + laerror("matrix dimensions incompatible with vector length"); +#endif + count = rhs.count; + v = rhs.v; + (*count)++; +} + +// S *= a +inline NRSMat & NRSMat::operator*=(const double & a) +{ + copyonwrite(); + cblas_dscal(NN2, a, v, 1); + return *this; +} +inline NRSMat< complex > & +NRSMat< complex >::operator*=(const complex & a) +{ + copyonwrite(); + cblas_zscal(nn, (void *)(&a), (void *)v, 1); + return *this; +} + + +// S += D +template +inline NRSMat & NRSMat::operator+=(const T &a) +{ + copyonwrite(); + for (int i=0; i +inline NRSMat & NRSMat::operator-=(const T &a) +{ + copyonwrite(); + for (int i=0; i & +NRSMat::operator+=(const NRSMat & rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+="); +#endif + copyonwrite(); + cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1); + return *this; +} +NRSMat< complex > & +NRSMat< complex >::operator+=(const NRSMat< complex > & rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+="); +#endif + copyonwrite(); + cblas_zaxpy(NN2, (void *)(&CONE), (void *)(&rhs.v), 1, (void *)(&v), 1); + return *this; +} + +// S -= S +inline NRSMat & +NRSMat::operator-=(const NRSMat & rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-="); +#endif + copyonwrite(); + cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1); + return *this; +} +inline NRSMat< complex > & +NRSMat< complex >::operator-=(const NRSMat< complex > & rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-="); +#endif + copyonwrite(); + cblas_zaxpy(NN2, (void *)(&CMONE), (void *)(&rhs.v), 1, (void *)(&v), 1); + return *this; +} + +// SMat + Mat +template +inline const NRMat NRSMat::operator+(const NRMat &rhs) const +{ + return NRMat(rhs) += *this; +} + +// SMat - Mat +template +inline const NRMat NRSMat::operator-(const NRMat &rhs) const +{ + return NRMat(-rhs) += *this; +} + +// access the element, linear array case +template +inline T & NRSMat::operator[](const int ij) +{ +#ifdef DEBUG + if (*count != 1) laerror("lval [] with count > 1 in Smat"); + if (ij<0 || ij>=NN2) laerror("SMat [] out of range"); + if (!v) laerror("[] for unallocated Smat"); +#endif + return v[ij]; +} +template +inline const T & NRSMat::operator[](const int ij) const +{ +#ifdef DEBUG + if (ij<0 || ij>=NN2) laerror("SMat [] out of range"); + if (!v) laerror("[] for unallocated Smat"); +#endif + return v[ij]; +} + +// access the element, 2-dim array case +template +inline T & NRSMat::operator()(const int i, const int j) +{ +#ifdef DEBUG + if (*count != 1) laerror("lval (i,j) with count > 1 in Smat"); + if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range"); + if (!v) laerror("(i,j) for unallocated Smat"); +#endif + return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i]; +} +template +inline const T & NRSMat::operator()(const int i, const int j) const +{ +#ifdef DEBUG + if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range"); + if (!v) laerror("(i,j) for unallocated Smat"); +#endif + return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i]; +} + +// return the number of rows and columns +template +inline int NRSMat::nrows() const +{ + return nn; +} +template +inline int NRSMat::ncols() const +{ + return nn; +} + +// max value +inline const double NRSMat::amax() const +{ + return v[cblas_idamax(NN2, v, 1)]; +} +inline const complex NRSMat< complex >::amax() const +{ + return v[cblas_izamax(NN2, (void *)v, 1)]; +} + +// reference pointer to Smat +template +inline NRSMat:: operator T*() +{ +#ifdef DEBUG + if (!v) laerror("unallocated SMat in operator T*"); +#endif + return v; +} +template +inline NRSMat:: operator const T*() const +{ +#ifdef DEBUG + if (!v) laerror("unallocated SMat in operator T*"); +#endif + return v; +} + + + +// I/O +template extern ostream& operator<<(ostream &s, const NRSMat &x); +template extern istream& operator>>(istream &s, NRSMat &x); + + + + +// generate operators: SMat + a, a + SMat, SMat * a +NRVECMAT_OPER(SMat,+) +NRVECMAT_OPER(SMat,-) +NRVECMAT_OPER(SMat,*) +// generate SMat + SMat, SMat - SMat +NRVECMAT_OPER2(SMat,+) +NRVECMAT_OPER2(SMat,-) + +#endif /* _LA_SMAT_H_ */ diff --git a/sparsemat.cc b/sparsemat.cc new file mode 100644 index 0000000..d29c601 --- /dev/null +++ b/sparsemat.cc @@ -0,0 +1,1088 @@ +#include +#include +#include +#include + +#include "sparsemat.h" + +////////////////////////////////////////////////////////////////////////////// +//// forced instantization in the corresponding object file +template SparseMat; +template SparseMat< complex >; + + +#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT +# define export +#endif + + +export template +ostream& operator<<(ostream &s, const SparseMat &x) + { + SPMatindex n,m; + n=x.nrows(); + m=x.ncols(); + s << (int)n << ' ' << (int)m << '\n'; + matel *list=x.getlist(); + while(list) + { + s << (int)list->row << ' ' << (int)list->col << ' ' << list->elem << '\n'; + list=list->next; + } + s << "-1 -1\n"; + return s; + } + +export template +istream& operator>>(istream &s, SparseMat &x) + { + int i,j; + int n,m; + matel *l=NULL; + s >> n >> m; + x.resize(n,m); + s >> i >> j; + while(i>=0 && j>=0) + { + matel *ll = l; + l= new matel; + l->next= ll; + l->row=i; + l->col=j; + s >> l->elem; + s >> i >> j; + } + x.setlist(l); + return s; + } + +//helpers to be used from different functions +export template +void SparseMat::unsort() +{ +if(symmetric) colsorted=NULL; +if(colsorted) delete[] colsorted; +if(rowsorted) delete[] rowsorted; +colsorted=rowsorted=NULL; +nonzero=0; +} + +export template +void SparseMat::deletelist() +{ +if(colsorted||rowsorted) unsort();//prevent obsolete pointers +if(*count >1) laerror("trying to delete shared list"); +matel *l=list; +while(l) + { + matel *ltmp=l; + l=l->next; + delete ltmp; + } +list=NULL; +delete count; +count=NULL; +} + +//no checks, not to be public +export template +void SparseMat::copylist(const matel *l) +{ +list=NULL; +while(l) + { + add(l->row,l->col,l->elem); + l=l->next; + } +} + +export template +void SparseMat::copyonwrite() +{ + if(!count) laerror("probably an assignment to undefined sparse matrix"); + if(*count > 1) + { + (*count)--; + count = new int; *count=1; + if(!list) laerror("empty list with count>1"); + unsort(); + copylist(list); + } +} + + +//global for sort !!! is not thread-safe +static void *globsorted; + +//global functions cannot be partially specialized in templates, we have to make it a member function + +//!!! gencmp's and genswap are critical for performance, make sure that compiler really inlines them +template +struct gencmp { +inline static SPMatindexdiff EXEC(register const SPMatindex i, register const SPMatindex j) + { + register SPMatindexdiff k; + register matel *ii,*jj; + ii=((matel **)globsorted)[i]; + jj=((matel **)globsorted)[j]; + if (k=ii->col-jj->col) return k; else return ii->row-jj->row;} +}; + + +template +struct gencmp { +inline static SPMatindexdiff EXEC(register const SPMatindex i, register const SPMatindex j) + { + register SPMatindexdiff k; + register matel *ii,*jj; + ii=((matel **)globsorted)[i]; + jj=((matel **)globsorted)[j]; + if (k=ii->row-jj->row) return k; else return ii->col-jj->col;} +}; + + + + +template +inline void genswap(const SPMatindex i,const SPMatindex j) +{ +SWAP(((matel **)globsorted)[i],((matel **)globsorted)[j]); +} + + + +template +void genqsort(SPMatindex l,SPMatindex r) /*safer version for worst case*/ +{ +register SPMatindex i,j,piv; + +/* other method for small arrays recommended in NUMREC is not used here +does not give so large gain for moderate arrays and complicates the +things, but would be worth trying (cf. profile) */ + +if(r<=l) return; /*1 element*/ +if(gencmp::EXEC(r,l)<0) genswap(l,r); +if(r-l==1) return; /*2 elements and preparation for median*/ +piv= (l+r)/2; /*pivoting by median of 3 - safer */ +if(gencmp::EXEC(piv,l)<0) genswap(l,piv); /*and change the pivot element implicitly*/ +if(gencmp::EXEC(r,piv)<0) genswap(r,piv); /*and change the pivot element implicitly*/ +if(r-l==2) return; /*in the case of 3 elements we are finished too */ + +/*general case , l-th r-th already processed*/ +i=l+1; j=r-1; +do{ + /*important sharp inequality - stops at sentinel element for efficiency*/ + /* this is inefficient if all keys are equal - unnecessary n log n swaps are done, but we assume that it is atypical input*/ + while(gencmp::EXEC(i++,piv)<0); + i--; + while(gencmp::EXEC(j--,piv)>0); + j++; + if(i(i,j); + if(i==piv) piv=j; else if(j==piv) piv=i; + } + if(i<=j) {i++; j--;} + }while(i<=j); + +if(j-l < r-i) /*because of the stack in bad case process first the shorter subarray*/ + {if(l(l,j); if(i(i,r);} +else + {if(i(i,r); if(l(l,j);} +} + + +export template +unsigned int SparseMat::length() const +{ +if(nonzero) return nonzero; +unsigned int n=0; +matel *l=list; +while(l) + { + ++n; + l=l->next; + } + +const_cast *>(this)->nonzero=n; +return n; +} + + +export template +unsigned int SparseMat::sort(int type) const //must be const since used from operator* which must be const to be compatible with other stuff, dirty casts here +{ +if(type==0&&rowsorted || type==1&&colsorted) return nonzero; +if(!list) return ((SparseMat *)this)->nonzero=0; + +if(type!=2) const_cast *>(this) ->setunsymmetric(); else type=0;//symmetric and sorted not supported simultaneously, type 2 is special just for simplify + +//create array from list, reallocate as necessary +unsigned int size=3*MAX(nn,mm); //initial guess for a number of nonzero elements +matel **sorted= new matel* [size]; +((SparseMat *)this)->nonzero=0; +matel *l = list; +while(l) + { + sorted[(((SparseMat *)this)->nonzero)++]=l; + if(nonzero >= size ) //reallocate + { + size*=2; + matel **newsorted= new matel* [size]; + memcpy(newsorted,sorted,size/2*sizeof(matel*)); + delete[] sorted; + sorted=newsorted; + } + l= l->next; + } + +//now sort the array of pointers according to type +globsorted =sorted; +if(type==0) {genqsort(0,nonzero-1); ((SparseMat *)this)->rowsorted=sorted;} //type handled at compile time for more efficiency +else {genqsort(0,nonzero-1); ((SparseMat *)this)->colsorted=sorted;} //should better be const cast + +//cout <<"sort: nonzero ="< +void SparseMat::simplify() +{ +unsigned int n; +if(!list) return; +copyonwrite(); +if(symmetric) + { + unsort(); + matel *p; + p=list; + while(p) + { + if(p->row>p->col) SWAP(p->row,p->col); //get into one triangle, not OK for complex hermitean + p=p->next; + } + n=sort(2); //sort and further handle like a triangle matrix + } +else n=sort(0); //sorts according to row,column + +unsigned int i,j; +SPMatindex r,c; +j=0; +r=rowsorted[j]->row; +c=rowsorted[j]->col; +for(i=1; irow && c==rowsorted[i]->col) {rowsorted[j]->elem +=rowsorted[i]->elem; delete rowsorted[i]; rowsorted[i]=NULL;} + else + { + j=i; + r=rowsorted[j]->row; + c=rowsorted[j]->col; + } + } + +//check if summed to zero +for(i=0; ielem)elem +#endif + ) {delete rowsorted[i]; rowsorted[i]=NULL;} + +//restore connectivity +int nonz=0; +matel *p,*first,*prev; +first=NULL; +prev=NULL; +for(i=0; inext=p; + p->next=NULL; + prev=p; + } +list=first; +nonzero=nonz; +unsort(); //since there were NULLs introduced, rowsorted is not dense +} + + +export template +void SparseMat::resize(const SPMatindex n, const SPMatindex m) +{ + if(n<=0 || m<=0) laerror("illegal matrix dimension"); + unsort(); + if(count) + { + if(*count > 1) {(*count)--; count=NULL; list=NULL;} //detach from previous + else if(*count==1) deletelist(); + } + nn=n; + mm=m; + count=new int(1); //empty but defined matrix + list=NULL; + symmetric=0; + colsorted=rowsorted=NULL; +} + +export template +void SparseMat::addsafe(const SPMatindex n, const SPMatindex m, const T elem) +{ +#ifdef debug +if(n<0||n>=nn||m<0||m>=mm) laerror("SparseMat out of range"); +#endif +#ifdef SPARSEEPSILON +if(abs(elem) +SparseMat & SparseMat::operator=(const SparseMat &rhs) +{ + if (this != &rhs) + { + unsort(); + if(count) + if(--(*count) ==0) {deletelist(); delete count;} // old stuff obsolete + list=rhs.list; + nn=rhs.nn; + mm=rhs.mm; + if(list) count=rhs.count; else count= new int(0); //make the matrix defined, but empty and not shared, count will be incremented below + symmetric=rhs.symmetric; + if(count) (*count)++; + } + return *this; +} + +export template +SparseMat & SparseMat::join(SparseMat &rhs) +{ +if(symmetric!=rhs.symmetric||nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible matrices in join()"); +if(*rhs.count!=1) laerror("shared rhs in join()"); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); +matel **last=&list; +while(*last) last= &((*last)->next); +*last=rhs.list; +rhs.list=NULL; +return *this; +} + + +export template +SparseMat & SparseMat::addtriangle(const SparseMat &rhs, const bool lower, const char sign) +{ +if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for +="); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); +register matel *l=rhs.list; +while(l) + { + if(rhs.symmetric || lower && l->row <=l->col || !lower && l->row >=l->col) +#ifdef SPARSEEPSILON + if(abs(l->elem)>SPARSEEPSILON) +#endif + add( l->row,l->col,sign=='+'?l->elem:- l->elem); + l=l->next; + } +return *this; +} + +export template +SparseMat & SparseMat::operator+=(const SparseMat &rhs) +{ +if(symmetric&&!rhs.symmetric) laerror("cannot add general to symmetric sparse"); +if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for +="); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); +bool symmetrize= !symmetric && rhs.symmetric; +register matel *l=rhs.list; +if(symmetrize) +while(l) + { +#ifdef SPARSEEPSILON + if(abs(l->elem)>SPARSEEPSILON) +#endif + {add( l->row,l->col,l->elem); if( l->row!=l->col) add( l->col,l->row,l->elem);} + l=l->next; + } +else +while(l) + { +#ifdef SPARSEEPSILON + if(abs(l->elem)>SPARSEEPSILON) +#endif + add( l->row,l->col,l->elem); + l=l->next; + } +return *this; +} + +export template +SparseMat & SparseMat::operator-=(const SparseMat &rhs) +{ +if(symmetric&&!rhs.symmetric) laerror("cannot add general to symmetric sparse"); +if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for -="); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); +bool symmetrize= !symmetric && rhs.symmetric; +register matel *l=rhs.list; +if(symmetrize) +while(l) + { +#ifdef SPARSEEPSILON + if(abs(l->elem)>SPARSEEPSILON) +#endif + {add( l->row,l->col,- l->elem); if( l->row!=l->col) add( l->col,l->row,- l->elem);} + l=l->next; + } +else +while(l) + { +#ifdef SPARSEEPSILON + if(abs(l->elem)>SPARSEEPSILON) +#endif + add( l->row,l->col,- l->elem); + l=l->next; + } +return *this; +} + + +//constructor from a dense matrix +export template +SparseMat::SparseMat(const NRMat &rhs) +{ +nn=rhs.nrows(); +mm=rhs.ncols(); +count=new int; +*count=1; +list=NULL; +symmetric=0; +colsorted=rowsorted=NULL; +SPMatindex i,j; +for(i=0;iSPARSEEPSILON) +#else + if(t) +#endif + add(i,j,t); + } +} + +//constructor dense matrix from sparse +export template +NRMat::NRMat(const SparseMat &rhs) +{ +nn=rhs.nrows(); +mm=rhs.ncols(); +count=new int(1); +T *p; +#ifdef MATPTR + v= new T*[nn]; + p=v[0] = new T[mm*nn]; + for (int i=1; i< nn; i++) v[i] = v[i-1] + mm; +#else + p= v = new T[mm*nn]; +#endif +memset(p,0,nn*mm*sizeof(T)); +matel *l=rhs.getlist(); +bool sym=rhs.issymmetric(); +while(l) + { +#ifdef MATPTR + v[l->row][l->col] +=l->elem; + if(sym && l->row!=l->col) v[l->col][l->row] +=l->elem; +#else + v[l->row*mm+l->col] +=l->elem; + if(sym && l->row!=l->col) v[l->col*mm+l->row] +=l->elem; +#endif + l=l->next; + } +} + + + + +//constructor dense symmetric packed matrix from sparse +#define nn2 (nn*(nn+1)/2) +export template +NRSMat::NRSMat(const SparseMat &rhs) +{ +if(!rhs.issymmetric()||rhs.nrows()!=rhs.ncols()) laerror("sparse matrix is not symmetric"); +nn=rhs.nrows(); +count=new int(1); +v=new T[nn2]; +memset(v,0,nn2*sizeof(T)); +matel *l=rhs.getlist(); +while(l) + { + (*this)(l->row,l->col)=l->elem; + l=l->next; + } +} +#undef nn2 + +//constructor dense vector from sparse +export template +NRVec::NRVec(const SparseMat &rhs) +{ +if(rhs.nrows()>1 && rhs.ncols()>1) laerror("cannot construct a vector from a sparse matrix with more than one row/column"); +nn=rhs.nrows()>1?rhs.nrows():rhs.ncols(); +v=new T[nn]; +memset(v,0,nn*sizeof(T)); +count=new int(1); +matel *l=rhs.getlist(); + +if(rhs.nrows()>1) while(l) + { + v[l->row]+=l->elem; + l=l->next; + } +else while(l) + { + v[l->col]+=l->elem; + l=l->next; + } +} + +//assignment of a scalar matrix +export template +SparseMat & SparseMat::operator=(const T a) +{ +if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix"); +if(nn!=mm) laerror("assignment of scalar to non-square sparse matrix"); +resize(nn,mm);//clear +#ifdef SPARSEEPSILON +if(abs(a) +SparseMat & SparseMat::operator+=(const T a) +{ +if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix"); +if(nn!=mm) laerror("assignment of scalar to non-square sparse matrix"); +if(a==(T)0) return *this; +SPMatindex i; +for(i=0;i +SparseMat & SparseMat::operator-=(const T a) +{ +if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix"); +if(nn!=mm) laerror("assignment of scalar to non-square sparse matrix"); +if(a==(T)0) return *this; +SPMatindex i; +for(i=0;i +SparseMat::SparseMat(const NRSMat &rhs) +{ +nn=rhs.nrows(); +mm=rhs.ncols(); +count=new int; +*count=1; +list=NULL; +symmetric=1; +colsorted=rowsorted=NULL; +SPMatindex i,j; +for(i=0;iSPARSEEPSILON +#else + t=rhs(i,j) +#endif + ) add(i,j,t); + } +} + +export template +void SparseMat::transposeme() +{ +if(!count) laerror("transposeme on undefined lhs"); +if(symmetric||!list) return; +copyonwrite();//also unsort +register matel *l=list; +while(l) + { + SWAP(l->row,l->col); + l=l->next; + } +SWAP(nn,mm); +} + +export template +void SparseMat::setunsymmetric() +{ +if(!symmetric) return; +unsort(); +symmetric=0; +if(!count) return; +copyonwrite(); +matel *l=list; +while(l) //include the mirror picture of elements into the list + { + if( +#ifdef SPARSEEPSILON + abs(l->elem)>SPARSEEPSILON && +#endif + l->row!=l->col) add(l->col,l->row,l->elem); //not OK for complex-hermitean + l=l->next; + } +} + + +export template +SparseMat & SparseMat::operator*=(const T a) +{ +if(!count) laerror("operator*= on undefined lhs"); +if(!list||a==(T)1) return *this; +if(a==(T)0) resize(nn,mm); +else copyonwrite(); + +register matel *l=list; +while(l) + { + l->elem*=a; + l=l->next; + } +return *this; +} + +const double SparseMat::dot(const NRMat &rhs) const +{ +double r=0; +matel *l=list; +while(l) + { + r+= l->elem*rhs[l->row][l->col]; + if(symmetric&&l->row!=l->col) r+=l->elem*rhs[l->col][l->row]; + l=l->next; + } +return r; +} + + +template +void NRVec::gemv(const T beta, const SparseMat &a, const char trans, const T alpha, const NRVec &x) +{ +if((trans=='n'?a.ncols():a.nrows())!= (SPMatindex)x.size()) laerror("incompatible sizes in gemv"); +copyonwrite(); +if(beta!=(T)0) (*this) *= beta; +else memset(v,0,nn*sizeof(T)); + +bool transp = tolower(trans)!='n'; //not OK for complex + +matel *l=a.getlist(); + +if(alpha==(T)0 || !l) return; +T *vec=x.v; + +if(alpha==(T)1) +{ + if(a.issymmetric()) + { + while(l) + { + v[l->row]+= l->elem*vec[l->col]; + if(l->row!=l->col) v[l->col]+= l->elem*vec[l->row]; + l=l->next; + } + } + else + { + if(transp) + while(l) + { + v[l->col]+= l->elem*vec[l->row]; + l=l->next; + } + else + while(l) + { + v[l->row]+= l->elem*vec[l->col]; + l=l->next; + } + } +} +else +{ + if(a.issymmetric()) + { + while(l) + { + v[l->row]+= alpha*l->elem*vec[l->col]; + if(l->row!=l->col) v[l->col]+= alpha*l->elem*vec[l->row]; + l=l->next; + } + } + else + { + if(transp) + while(l) + { + v[l->col]+= alpha*l->elem*vec[l->row]; + l=l->next; + } + else + while(l) + { + v[l->row]+= alpha*l->elem*vec[l->col]; + l=l->next; + } + } +} +} + + +//multiplication with dense vector from both sides +template +const NRVec SparseMat::multiplyvector(const NRVec &vec, const bool transp) const +{ +if(transp && nn!=(SPMatindex)vec.size() || !transp && mm!=(SPMatindex)vec.size()) laerror("incompatible sizes in sparsemat*vector"); +NRVec result(transp?mm:nn); +result.gemv((T)0, *this, transp?'t':'n', (T)1., vec); +return result; +} + + +template +const NRVec NRVec::operator*(const SparseMat &mat) const +{ +if(mat.nrows()!= (SPMatindex)size()) laerror("incompatible sizes in vector*sparsemat"); +NRVec result((T)0,mat.ncols()); +matel *l=mat.getlist(); +bool symmetric=mat.issymmetric(); +while(l) + { + result.v[l->col]+= l->elem*v[l->row]; + if(symmetric&&l->row!=l->col) result.v[l->row]+= l->elem*v[l->col]; + l=l->next; + } +return result; + +} + +template +const T SparseMat::trace() const +{ +matel *l=list; +T sum(0); +while(l) + { + if(l->row==l->col) sum+= l->elem; + l=l->next; + } +return sum; +} + + +//not OK for complex hermitean +template +const T SparseMat::norm(const T scalar) const +{ +if(!list) return T(0); +const_cast *>(this)->simplify(); + +matel *l=list; +T sum(0); +if(scalar!=(T)0) + { + if(symmetric) + while(l) + { + T hlp=l->elem; + bool b=l->row==l->col; + if(b) hlp-=scalar; + T tmp=hlp*hlp; + sum+= tmp; + if(!b) sum+=tmp; + l=l->next; + } + else + while(l) + { + T hlp=l->elem; + if(l->row==l->col) hlp-=scalar; + sum+= hlp*hlp; + l=l->next; + } + } +else + { + if(symmetric) + while(l) + { + T tmp=l->elem*l->elem; + sum+= tmp; + if(l->row!=l->col) sum+=tmp; + l=l->next; + } + else + while(l) + { + sum+= l->elem*l->elem; + l=l->next; + } + } +return sqrt(sum); //not OK for int, would need traits technique +} + + +template +void SparseMat::axpy(const T alpha, const SparseMat &x, const bool transp) +{ +if(!transp && (nn!=x.nn||mm!=x.mm) || transp && (mm!=x.nn||nn!=x.mm) ) laerror("incompatible dimensions for axpy"); +if(!count) {count=new int; *count=1; list=NULL;} +else copyonwrite(); + +if(alpha==(T)0||x.list==NULL) return; +if(!transp||x.symmetric) + { + if(alpha==(T)1) {*this +=x; return;} + if(alpha==(T)-1) {*this -=x; return;} + } +if(symmetric!=x.symmetric) laerror("general axpy not supported for different symmetry types"); +//now does not matter if both are general or both symmetric (transposition will not matter) + +register matel *l=x.list; +if(transp) +while(l) + { + register T t=alpha*l->elem; +#ifdef SPARSEEPSILON + if(abs(t)>SPARSEEPSILON) +#endif + add( l->col,l->row,t); + l=l->next; + } +else +while(l) + { + register T t=alpha*l->elem; +#ifdef SPARSEEPSILON + if(abs(t)>SPARSEEPSILON) +#endif + add( l->row,l->col,t); + l=l->next; + } +} + + +template +const T SparseMat::dot(const SparseMat &rhs) const //complex conj. not implemented yet +{ +if(nn!=rhs.nn || mm!=rhs.mm) laerror("dot of incompatible sparse matrices"); +if(symmetric||rhs.symmetric) laerror("dot of symmetric sparse matrices not implemented"); + +T result=0; +if(list && rhs.list) //both nonzero + { + unsigned int na=sort(0); + unsigned int nb=rhs.sort(0); + + //now merge the sorted lists + register unsigned int i,j; + register SPMatindex ra,ca; + j=0; + for(i=0; irow; + ca=rowsorted[i]->col; + while(jrow) col) elem; + register unsigned int k; + /*j remembers the position, k forwards in the rhs.rowsorted to find all combinations*/ + k=j; + do { + result += tmp*rhs.rowsorted[k]->elem; + k++; + } while(krow == ra) && (rhs.rowsorted[k]->col == ca)); + } + /*else skip in left operand*/ + } + } +return result; +} + + + +template +const SparseMat SparseMat::operator*(const SparseMat &rhs) const +{ +if(mm!=rhs.nn) laerror("product of incompatible sparse matrices"); +if(symmetric||rhs.symmetric) laerror("product of symmetric sparse matrices not implemented"); + +SparseMat result(nn,rhs.mm); +if(list && rhs.list) //both nonzero + { + unsigned int na=sort(1); + unsigned int nb=rhs.sort(0); + + //now merge the sorted lists + register unsigned int i,j; + register SPMatindex rb=0,ca; + j=0; + for(i=0; icol; + while(jrow) elem; + register unsigned int k; + /*j remembers the position, k forwards in the rhs.rowsorted to find all combinations*/ + k=j; + do { + result.add(colsorted[i]->row,rhs.rowsorted[k]->col,tmp*rhs.rowsorted[k]->elem); + k++; + } while(krow) == ca)); + } + /*else skip in left operand*/ + } + result.simplify();//otherwise number of terms tends to grow exponentially + } +return result; +} + + + +template +void SparseMat::gemm(const T beta, const SparseMat &a, const char transa, const SparseMat &b, const char transb, const T alpha) +{ +SPMatindex l(transa=='n'?a.nn:a.mm); +SPMatindex k(transa=='n'?a.mm:a.nn); +SPMatindex kk(transb=='n'?b.nn:b.mm); +SPMatindex ll(transb=='n'?b.mm:b.nn); +if(a.symmetric||b.symmetric) laerror("symmetric sparse matrices not supported in gemm"); + +if(beta==(T)0) resize(l,ll); //empty matrix +else *this *= beta; //takes care about beta=1 +if(l!=nn|| ll!=mm||k!=kk) laerror("incompatible sparse matrices in gemm"); + +if(alpha==(T)0 || !a.list ||!b.list) return; +copyonwrite(); + +//regular case, specialize for transpositions +matel **ma,**mb; +unsigned int na,nb; +bool tra= transa!='n'; +bool trb= transb!='n'; +if(!tra) {na=a.sort(1); ma=a.colsorted;} else {na=a.sort(0); ma=a.rowsorted;} +if(!trb) {nb=b.sort(0); mb=b.rowsorted;} else {nb=b.sort(1); mb=b.colsorted;} + +//now merge the sorted lists +register unsigned int i,j; +register SPMatindex rb=0,ca,row; +j=0; +for(i=0; irow:ma[i]->col; + row=tra?ma[i]->col:ma[i]->row; + while(jcol:mb[j]->row) elem; + register unsigned int k; + /*j remembers the position, k forwards in the mb to find all combinations*/ + k=j; + do { + register SPMatindex col; + col=trb?mb[k]->row:mb[k]->col; + if(!symmetric||row<=col) add(row,col,tmp*mb[k]->elem); + k++; + } while(kcol:mb[k]->row) == ca)); + } + /*else skip in ma*/ + } + +simplify(); +} + + + + +#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT + +#define INSTANTIZE(T) \ +template ostream& operator<<(ostream &s, const SparseMat &x); \ +template istream& operator>>(istream &s, SparseMat &x); \ +template void SparseMat::copyonwrite(); \ +template void SparseMat::resize(const SPMatindex n, const SPMatindex m); \ +template void SparseMat::unsort(); \ +template unsigned int SparseMat::sort(int type) const; \ +template unsigned int SparseMat::length() const; \ +template void SparseMat::deletelist(); \ +template void SparseMat::simplify(); \ +template void SparseMat::copylist(const matel *l); \ +template void SparseMat::add(const SPMatindex n, const SPMatindex m, const T elem); \ +template SparseMat & SparseMat::operator=(const SparseMat &rhs); \ +template SparseMat & SparseMat::operator+=(const SparseMat &rhs); \ +template SparseMat & SparseMat::operator-=(const SparseMat &rhs); \ +template SparseMat::SparseMat(const NRMat &rhs); \ +template SparseMat::SparseMat(const NRSMat &rhs); \ +template void SparseMat::transposeme(); \ +template SparseMat & SparseMat::operator*=(const T a); \ +template void SparseMat::setunsymmetric(); \ +template SparseMat & SparseMat::operator=(const T a); \ +template SparseMat & SparseMat::operator+=(const T a); \ +template SparseMat & SparseMat::operator-=(const T a); \ +template NRMat::NRMat(const SparseMat &rhs); \ +template NRSMat::NRSMat(const SparseMat &rhs); \ +template NRVec::NRVec(const SparseMat &rhs); \ +template const NRVec SparseMat::operator*(const NRVec &vec) const; \ +template const NRVec NRVec::operator*(const SparseMat &mat) const; \ +template SparseMat & SparseMat::join(SparseMat &rhs); \ +template const T SparseMat::trace() const; \ +template const T SparseMat::norm(const T scalar) const; \ +template void SparseMat::axpy(const T alpha, const SparseMat &x, const bool transp); \ +template const SparseMat SparseMat::operator*(const SparseMat &rhs) const; \ +template const T SparseMat::dot(const SparseMat &rhs) const; \ +template void SparseMat::gemm(const T beta, const SparseMat &a, const char transa, const SparseMat &b, const char transb, const T alpha); \ +template void NRVec::gemv(const T beta, const SparseMat &a, const char trans, const T alpha, const NRVec &x);\ + + +INSTANTIZE(double) + + +// some functions are not OK for hermitean! INSTANTIZE(complex) + +#endif diff --git a/sparsemat.h b/sparsemat.h new file mode 100644 index 0000000..96468ee --- /dev/null +++ b/sparsemat.h @@ -0,0 +1,220 @@ +//for vectors and dense matrices we shall need +#include "la.h" + +template +inline const T MAX(const T &a, const T &b) + {return b > a ? (b) : (a);} + +template +inline void SWAP(T &a, T &b) + {T dum=a; a=b; b=dum;} + + +//threshold for neglecting elements, if not defined, no tests are done except exact zero test in simplify - might be even faster +//seems to perform better with a threshold, in spite of abs() tests +#define SPARSEEPSILON 1e-13 + +typedef unsigned int SPMatindex; +typedef int SPMatindexdiff; //more clear would be to use traits + +//element of a linked list +template +struct matel + { + T elem; + SPMatindex row; + SPMatindex col; + matel *next; + }; + + +template +class SparseMat { +protected: + SPMatindex nn; + SPMatindex mm; + bool symmetric; + unsigned int nonzero; + int *count; + matel *list; +private: + matel **rowsorted; //NULL terminated + matel **colsorted; //NULL terminated + void unsort(); + void deletelist(); + void copylist(const matel *l); +public: + //iterator + typedef class iterator { + private: + matel *p; + public: + iterator() {}; + ~iterator() {}; + iterator(matel *list): p(list) {}; + bool operator==(const iterator rhs) const {return p==rhs.p;} + bool operator!=(const iterator rhs) const {return p!=rhs.p;} + iterator operator++() {return p=p->next;} + iterator operator++(int) {matel *q=p; p=p->next; return q;} + matel & operator*() const {return *p;} + matel * operator->() const {return p;} + }; + iterator begin() const {return list;} + iterator end() const {return NULL;} + + //constructors etc. + inline SparseMat() :nn(0),mm(0),symmetric(0),nonzero(0),count(NULL),list(NULL),rowsorted(NULL),colsorted(NULL) {}; + inline SparseMat(const SPMatindex n, const SPMatindex m) :nn(n),mm(m),symmetric(0),nonzero(0),count(new int(1)),list(NULL),rowsorted(NULL),colsorted(NULL) {}; + SparseMat(const SparseMat &rhs); //copy constructor + inline int getcount() const {return count?*count:0;} + explicit SparseMat(const NRMat &rhs); //construct from a dense one + explicit SparseMat(const NRSMat &rhs); //construct from a dense symmetric one + SparseMat & operator=(const SparseMat &rhs); + SparseMat & operator=(const T a); //assign a to diagonal + SparseMat & operator+=(const T a); //assign a to diagonal + SparseMat & operator-=(const T a); //assign a to diagonal + SparseMat & operator*=(const T a); //multiply by a scalar + SparseMat & operator+=(const SparseMat &rhs); + SparseMat & addtriangle(const SparseMat &rhs, const bool lower, const char sign); + SparseMat & join(SparseMat &rhs); //more efficient +=, rhs will be emptied + SparseMat & operator-=(const SparseMat &rhs); + inline const SparseMat operator+(const T &rhs) const {return SparseMat(*this) += rhs;} + inline const SparseMat operator-(const T &rhs) const {return SparseMat(*this) -= rhs;} + inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;} + inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general + inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general + const NRVec multiplyvector(const NRVec &rhs, const bool transp=0) const; //sparse matrix * dense vector optionally transposed + inline const NRVec operator*(const NRVec &rhs) const {return multiplyvector(rhs);} //sparse matrix * dense vector + const SparseMat operator*(const SparseMat &rhs) const; + void gemm(const T beta, const SparseMat &a, const char transa, const SparseMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this, if this is symemtric, only half will be added onto it + const T dot(const SparseMat &rhs) const; //supervector dot product + const T dot(const NRMat &rhs) const; //supervector dot product + inline ~SparseMat(); + void axpy(const T alpha, const SparseMat &x, const bool transp=0); // this+= a*x(transposed) + inline matel *getlist() const {return list;} + void setlist(matel *l) {list=l;} + inline SPMatindex nrows() const {return nn;} + inline SPMatindex ncols() const {return mm;} + void resize(const SPMatindex n, const SPMatindex m); + void transposeme(); + const SparseMat transpose() const; + inline void setsymmetric() {if(nn!=mm) laerror("non-square cannot be symmetric"); symmetric=1;} + inline void defineunsymmetric() {symmetric=0;} //just define and do nothing with it + void setunsymmetric();//unwind the matrix assuming it was indeed symmetric + inline bool issymmetric() const {return symmetric;} + unsigned int length() const; + void copyonwrite(); + void simplify(); + const T trace() const; + const T norm(const T scalar=(T)0) const; //is const only mathematically, not in internal implementation - we have to simplify first + unsigned int sort(int type) const; + inline void add(const SPMatindex n, const SPMatindex m, const T elem) {matel *ltmp= new matel; ltmp->next=list; list=ltmp; list->row=n; list->col=m; list->elem=elem;} + void addsafe(const SPMatindex n, const SPMatindex m, const T elem); +}; + +template + extern istream& operator>>(istream &s, SparseMat &x); + +template + extern ostream& operator<<(ostream &s, const SparseMat &x); + +//destructor +template +SparseMat::~SparseMat() +{ + unsort(); + if(!count) return; + if(--(*count)<=0) + { + deletelist(); + delete count; + } +} + +//copy constructor (sort arrays are not going to be copied) +template +SparseMat::SparseMat(const SparseMat &rhs) +{ +#ifdef debug +if(! &rhs) laerror("SparseMat copy constructor with NULL argument"); +#endif + nn=rhs.nn; + mm=rhs.mm; + symmetric=rhs.symmetric; + if(rhs.list&&!rhs.count) laerror("some inconsistency in SparseMat contructors or assignments"); + list=rhs.list; + if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared + colsorted=rowsorted=NULL; + nonzero=0; +} + +template +const SparseMat SparseMat::transpose() const +{ +if(list&&!count) laerror("some inconsistency in SparseMat transpose"); +SparseMat result; +result.nn=mm; +result.mm=nn; +result.symmetric=symmetric; +if(result.symmetric) + { + result.list=list; + if(list) {result.count=count; (*result.count)++;} else result.count=new int(1); //make the matrix defined, but empty and not shared + } +else //really transpose it + { + result.count=new int(1); + result.list=NULL; + matel *l =list; + while(l) + { + result.add(l->col,l->row,l->elem); + l=l->next; + } + } +result.colsorted=result.rowsorted=NULL; +result.nonzero=0; +return result; +} + + + +template +inline const SparseMat commutator ( const SparseMat &x, const SparseMat &y, const bool trx=0, const bool tryy=0) +{ +SparseMat r; +r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1); +r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1); //saves a temporary and simplifies the whole sum +return r; +} + +template +inline const SparseMat anticommutator ( const SparseMat &x, const SparseMat &y, const bool trx=0, const bool tryy=0) +{ +SparseMat r; +r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1); +r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1); //saves a temporary and simplifies the whole sum +return r; +} + +//add sparse to dense +template +NRMat & NRMat::operator+=(const SparseMat &rhs) +{ +if(nn!=rhs.nrows()||mm!=rhs.ncols()) laerror("incompatible matrices in +="); +matel *l=rhs.getlist(); +bool sym=rhs.issymmetric(); +while(l) + { +#ifdef MATPTR + v[l->row][l->col] +=l->elem; + if(sym && l->row!=l->col) v[l->col][l->row] +=l->elem; +#else + v[l->row*mm+l->col] +=l->elem; + if(sym && l->row!=l->col) v[l->col*mm+l->row] +=l->elem; +#endif + l=l->next; + } +} + + diff --git a/sparsemat_traits.h b/sparsemat_traits.h new file mode 100644 index 0000000..c2bcf35 --- /dev/null +++ b/sparsemat_traits.h @@ -0,0 +1,15 @@ +//////////////////////////////////////////////////////////////////////////// +//traits classes + +#ifndef _SPARSEMAT_TRAITS_INCL +#define _SPARSEMAT_TRAITS_INCL + + +template<> struct NRMat_traits > { +typedef double elementtype; +typedef SparseMat producttype; +static double norm (const SparseMat &x) {return x.norm();} +static void axpy (SparseMat&s, const SparseMat &x, const double c) {s.axpy(c,x);} +}; + +#endif diff --git a/strassen.cc b/strassen.cc new file mode 100644 index 0000000..209248b --- /dev/null +++ b/strassen.cc @@ -0,0 +1,31 @@ +#include "la.h" +/*Strassen algorithm*/ +// called routine is fortran-compatible +extern "C" void fmm(const char c_transa,const char c_transb,const int m,const int n,const int k,const double alpha, + const double *a,const int lda,const double *b,const int ldb,const double beta,double *c,const int ldc, + double *d_aux,int i_naux); +extern "C" void strassen_cutoff(int c, int c1, int c2, int c3); + +void NRMat::s_cutoff(const int c, const int c1, const int c2, const int c3) const +{ strassen_cutoff(c,c1,c2,c3);} +void NRMat::strassen(const double beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const double alpha) +{ +int l(transa=='n'?a.nn:a.mm); +int k(transa=='n'?a.mm:a.nn); +int kk(transb=='n'?b.nn:b.mm); +int ll(transb=='n'?b.mm:b.nn); + +if(l!=nn|| ll!=mm||k!=kk) laerror("incompatible (or undefined size) matrices in strassen"); + +copyonwrite(); +//swap transpositions and order of matrices +fmm(transb,transa,mm,nn,k,alpha,b,b.mm, a, a.mm, beta,*this, mm,NULL,0); +} + +//stub for f77 blas called from strassen routine +extern "C" void xerbla_(const char *msg) +{ +laerror(msg); +} + + diff --git a/t.cc b/t.cc new file mode 100644 index 0000000..aca14ee --- /dev/null +++ b/t.cc @@ -0,0 +1,775 @@ +// g++ -D _GLIBCPP_NO_TEMPLATE_EXPORT -g testblas.cc testblas2.cc nrutil_modif.cc -L/usr/local/lib/atlas -lstrassen -lf77blas -lcblas -latlas -ltraceback -lbfd -liberty + +#include +#include "la.h" +#include "traceback.h" +#include "sparsemat.h" +#include "matexp.h" +#include "fourindex.h" + + +extern void test(const NRVec &); + +double ad; +void f1(const double *c) +{ +ad=*c; +} + +void f2(double *c) +{ +*c=ad; +} + + +inline int randind(const int n) +{ +return int(random()/(1.+RAND_MAX)*n); +} + +complex mycident (const complex&x) {return x;} + + +int main() +{ +sigtraceback(SIGSEGV,1); +sigtraceback(SIGABRT,1); +sigtraceback(SIGBUS,1); +sigtraceback(SIGFPE,1); +NRVec x(1.,10); +NRVec y(2.,10); +NRVec z(-2.,10); + +y.axpy(3,x); + +y+=z; +/* +cout < a(x); + +NRVec b; +b|=x; + +NRVec c; +c=a; + +y =10. *y ; + +int i; +for(i=0;i u; +u=x+y; +cout <<"u= "< aa(0.,3,3); +aa[0][0]=aa[1][1]=aa(2,2)=2.; + +NRMat bb(aa); + +double *p; +aa.copyonwrite(); p= &aa[2][2]; +*p=3.; +bb.copyonwrite(); bb(0,2)=1.; + +cout << "aa= " < cc=aa & bb; +cout << "aa o+ bb= " << cc <<"\n"; +cout << cc.rsum() <<"\n"; +cout << cc.csum() <<"\n"; + +NRVecw(3); +w[0]=1; w[1]=2;w[2]=3; +NRVec v(0.,3); +v.gemv(0.,bb,'n',1.,w); +cout << " v= " < bb(1.,n,n); +for(int i=0;i amat,bmat,cmat; +cin >>amat; +cin >>bmat; +cmat=amat*bmat; +cout< amat(1.,2,2); +NRMat bmat(amat); +NRMat dmat(amat); +//NRMat cmat; cmat=bmat*2.; +NRMat cmat(bmat*2); //more efficient +dmat.copyonwrite(); dmat[0][0]=0; + +cout< amat; +NRVec avec; + +cin >>amat; +cin >>avec; + +cout << amat*avec; +cout << avec*amat; + +NRVec avec(0.,10); + +f1(avec); +f2(avec); + +NRVec uu(3); +uu[0]=1; uu[1]=2; uu[2]=3; +cout << uu << (uu|uu) <<"\n"; + +NRSMat sa(0.,3); +sa(0,0)=1; sa(0,2)=5; sa(2,2)=10;sa(1,0)=2;sa(1,1)=3; sa(2,1)=-1; + +NRSMat sb(0.,3); +sb(0,0)=-2; sb(0,2)=1; sb(2,2)=2;sb(1,0)=-1;sb(1,1)=7; sb(2,1)=3; + +cout << "symetr\n" < m10(10.,3,3); + cout << "10 + sa" << m10 + sa <<"\n"; +*/ + +/* + +const int dim=256; +NRMat big1(dim,dim),big2(dim,dim),big3; +for(int i=0;i atest, btest,ctest; +{ +int cc,c1,c2,c3; +cin >>cc>>c1>>c2>>c3; +atest.s_cutoff(cc,c1,c2,c3); +} +cin>>atest; +cin>>btest; + +NRMat dtest(atest.nrows(),btest.ncols()); +dtest.gemm(0., atest, 't', btest, 'n', 1.); +cout << dtest; + +NRMat etest(atest.nrows(),btest.ncols()); +etest.strassen(0., atest, 't', btest, 'n', 1.); +cout << etest; +*/ + +if(0) +{ +int dim; +cin >>dim; +NRMat big1(dim,dim),big2(dim,dim),big3,big4(dim,dim); +for(int i=0;i a(3,3),b; +NRVec v(3); +for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= i*i+j; v[i]=10-i;} +b=a; +b*= sin(1.)+1; +cout << a < a(3,3),b; +NRVec v(10); +v[0]=2;v[1]=3;v[2]=1;v[3]=-3;v[4]=2;v[5]=-1;v[6]=3;v[7]=-2;v[8]=1;v[9]=1; +for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; } +cout < a(3,3); +for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; } +NRSMat b(a); +NRMat c(b); +cout < a(3,3); +a[0][0]=1; a[0][1]=2;a[0][2]=3; +a[1][0]=4; a[1][1]=-5;a[1][2]=7; +a[2][0]=-3;a[2][1]=10;a[2][2]=2; +NRMat b(2,3); +b[0][0]=1;b[0][1]=2;b[0][2]=3; +b[1][0]=2;b[1][1]=4;b[1][2]=6; +cout < a(3,3); +for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; } +NRVec b(3); +cout < a(3); +NRMatv(3,3); +for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a(i,j)= (i+j)/10.; } +NRVec b(3); +cout <c=(NRMat)a; //nebo NRMatc(a); +NRMatd=exp(c); +diagonalize(a,b,&v); +cout < a; +cin >>a ; +NRMat b=a.transpose(); +NRMat u(a.nrows(),a.nrows()),v(a.ncols(),a.ncols()); +NRVecs(a.ncols()); +singular_decomposition(a,&u,s,&v); +//singular_decomposition(a,NULL,s,NULL); //this does not work when linked with static version of lapack, works with .so.3 version (from suse distrib) +cout < a(aa,3,3); +NRMat a; +cin >>a; +cout < u(n,n),v(n,n); +NRVecwr(n),wi(n); +gdiagonalize(a,wr,wi,&u,&v,0); +cout <z=diagofproduct(u,v,1); +for(int i=0;i a; +cin >>a; +cout < > u(n,n),v(n,n); +NRVec >w(n); +gdiagonalize(a,w,&u,&v); +cout < >z=diagofproduct(u,v,1,1); +//NRMat > zz=u*v.transpose(1); +cout < a(4,4); +NRVec v(4); +v[0]=1;v[1]=2;v[2]=3;v[3]=4; +a=1.; +a.copyonwrite(); +a.add(3,0,.5); +a.add(0,2,.2); +a.add(2,1,.1); +a.add(3,3,1.); +a.add(1,1,-1.); +SparseMat c(a); +c*=10.; +cout <b(c); +cout < a(4,4),b(4,4); +a=1.; +a.copyonwrite(); +a.add(3,0,.5); +b.add(0,2,.2); +b.add(2,1,.1); +b.add(3,3,1.); +b.add(1,1,-1.); +SparseMatc=a+b; +cout < a(4,4),b(4,4); +a=0.; b=2; +a.add(3,0,.5); +a.add(0,2,.2); +a.add(1,1,1); +a.add(1,0,.2); +b.add(2,1,.1); +b.add(3,3,1.); +b.add(1,1,-1.); +NRMat aa(a),bb(b); +SparseMatc; +NRMatcc; +//cout << NRMat(c); +//cout <(c); +cout <<"norms2 "< aa(n,n); + cout << "\n\n\ntiming for size "< a(0.,n,n); + for(int i=0; i b(exp(a)); + //cout <(a); + } + else + { + for(int i=0; i bb(exp(aa)); + //cout <>n; + SparseMat aa(n,n); + for(int i=0; i bb=exp(aa); + NRVec v(n); + for(int i=0; i res1=bb*v; + NRVec res2=exptimes(aa,v); + cout <<"difference = "<<(res1-res2).norm()< a(4,4),b(4,4),d; +a=0.; b=2; +a.add(3,0,.5); +a.add(0,2,.2); +a.add(1,1,1); +a.add(1,0,.2); +b.add(2,1,.1); +b.add(3,3,1.); +b.add(1,1,-1.); +NRMat aa(a),bb(b),dd; +SparseMatc; +NRMatcc; + +c=commutator(a,b); +cc=commutator(aa,bb); + +cout <(c); +cout <<"norms2 "< v(10.,10); +v+= 5.; +cout < a(n,n); +for(int i=0;i b; b|=a; +NRVec er(n),ei(n); +NRMat vr(n,n),vl(n,n); +gdiagonalize(b,er,ei,&vl,&vr); +cout < u=exp(a*.125); +cout <<"norms "<>n; +NRMat a(n,n); +for(int i=0;i b=exp(a); +cout < a,b; +cin >>b; +int n=b.nrows(); +cout <<"difference from identity = "< x(0.,n,n),x0; + double r; +int i=0; +do + { + x0=x; + NRMat y=exp(x*-.5); + x+= y*b*y; + x-= 1.; + x=(x-x.transpose())*.5; + cout <<"matrix x\n"<1e-10); +cout <<"result\n"< c=log(b); //matrixfunction(a,&mycident,1); +cout < d=exp(c); +cout <<"exp(log(x))\n"<>n; +NRMat a(n,n); +for(int i=0;i b=exp(a); +NRMat s=exp(a*.5); +NRMat y(0.,n,n); +NRMat z(0.,n,n); + double r; +int i=0; +y=b;z=1.; +cout << "norm = "< tmp=z*y*-1.+3.; + NRMat ynew=y*tmp*.5; + z=tmp*z*.5; + y=ynew; + cout <<"iter "<1e-10); +} + + +if(0) +{ +int n=3; +NRMat a(n,n); + a(0,0)=1.; + a(0,1)=2.; + a(1,0)=2.; + a(1,1)=6.; +a(2,2)=-4; +a(0,2)=1; +cout < c=inverse(a,&d); +cout < a(3,3); +NRMat b=a; +for(int i=1; i<4;i++) b=b*b; +} + +if(0) +{ +NRMat a; +cin >>a; +NRMat b=exp(a); +NRMat c=log(b); +cout < a; +cin >>a; +NRMat c=log(a); //matrixfunction(a,&mycident,1); +cout < b=exp(c); +cout <<"exp(log(x))\n"< a; +cin >>a; +NRMat aa(a); +NRMat b=exp(aa); +NRMat c=matrixfunction(a,&exp); +cout < h; +NRMat t; +cin >>h; +cin >>t; +NRMat r1= exp(-t) * h * exp(t); +NRMat r2=BCHexpansion(h,t,30); +cout < +#include "vec.h" + +////////////////////////////////////////////////////////////////////////////// +//// forced instantization in the corespoding object file +#define INSTANTIZE(T) \ +template ostream & operator<<(ostream &s, const NRVec< T > &x); \ +template istream & operator>>(istream &s, NRVec< T > &x); \ + +INSTANTIZE(double) +INSTANTIZE(complex) +template NRVec; +template NRVec< complex >; + + +/* + * Templates first, specializations for BLAS next + */ + +// conversion ctor +#ifndef MATPTR +template +NRVec::NRVec(const NRMat &rhs) +{ + nn = rhs.nn*rhs.mm; + v = rhs.v; + count = rhs.count; + (*count)++; +} +#endif + +// dtor +template +NRVec::~NRVec() +{ + if(!count) return; + if(--(*count) <= 0) { + if(v) delete[] (v); + delete count; + } +} + +// detach from a physical vector and make own copy +template +void NRVec::copyonwrite() +{ +#ifdef DEBUG + if(!count) laerror("probably an assignment to undefined vector"); +#endif + if(*count > 1) + { + (*count)--; + count = new int; + *count = 1; + T *newv = new T[nn]; + memcpy(newv, v, nn*sizeof(T)); + v = newv; + } +} + +// Asignment +template +NRVec & NRVec::operator=(const NRVec &rhs) +{ + if (this != &rhs) + { + if(count) + if(--(*count) == 0) + { + delete[] v; + delete count; + } + v = rhs.v; + nn = rhs.nn; + count = rhs.count; + if(count) (*count)++; + } + return *this; +} + +// Resize +template +void NRVec::resize(const int n) +{ +#ifdef DEBUG + if(n<=0) laerror("illegal vector dimension"); +#endif + if(count) + if(*count > 1) { + (*count)--; + count = 0; + v = 0; + nn = 0; + } + if(!count) { + count = new int; + *count = 1; + nn = n; + v = new T[nn]; + return; + } + // *count = 1 in this branch + if (n != nn) { + nn = n; + delete[] v; + v = new T[nn]; + } +} + +// ostream << NRVec +template +ostream & operator<<(ostream &s, const NRVec &x) +{ + int i, n; + + n = x.size(); + s << n << endl; + for(i=0; i> NRVec +template +istream & operator>>(istream &s, NRVec &x) +{ + int i,n; + + s >> n; + x.resize(n); + for(i=0; i> x[i]; + return s; +} + +// formatted print for NRVec +template +void NRVec::fprintf(FILE *file, const char *format, const int modulo) const +{ + lawritemat(file, v, 1, nn, format, 1, modulo, 0); +} + +// formatted scan for NRVec +template +void NRVec::fscanf(FILE *f, const char *format) +{ + int n; + + if(std::fscanf(f, "%d", &n) != 1) laerror("cannot read vector dimension"); + resize(n); + for (int i=0; i +NRVec & NRVec::operator|=(const NRVec &rhs) +{ + if (this != &rhs) { +#ifdef DEBUG + if (!rhs.v) laerror("unallocated rhs in NRVec operator |="); +#endif + if (count) + if (*count > 1) { + --(*count); + nn = 0; + count = 0; + v = 0; + } + if (nn != rhs.nn) { + if (v) delete[] (v); + nn = rhs.nn; + } + if(!v) v = new T[nn]; + if(!count) count = new int; + *count = 1; + memcpy(v, rhs.v, nn*sizeof(T)); + } + return *this; +} + +// unary minus +template +const NRVec NRVec::operator-() const +{ + NRVec result(nn); + for (int i=0; i::axpy(const double alpha, const NRVec &x) +{ +#ifdef DEBUG + if (nn != x.nn) laerror("axpy of incompatible vectors"); +#endif + copyonwrite(); + cblas_daxpy(nn, alpha, x.v, 1, v, 1); +} + +// axpy call for T = complex (not strided) +void NRVec< complex >::axpy(const complex alpha, + const NRVec< complex > &x) +{ +#ifdef DEBUG + if (nn != x.nn) laerror("axpy of incompatible vectors"); +#endif + copyonwrite(); + cblas_zaxpy(nn, (void *)(&alpha), (void *)(x.v), 1, (void *)v, 1); +} + +// axpy call for T = double (strided) +void NRVec::axpy(const double alpha, const double *x, const int stride) +{ + copyonwrite(); + cblas_daxpy(nn, alpha, x, stride, v, 1); +} + +// axpy call for T = complex (strided) +void NRVec< complex >::axpy(const complex alpha, + const complex *x, const int stride) +{ + copyonwrite(); + cblas_zaxpy(nn, (void *)(&alpha), (void *)x, stride, v, 1); +} + +// unary minus +const NRVec NRVec::operator-() const +{ + NRVec result(*this); + result.copyonwrite(); + cblas_dscal(nn, -1.0, result.v, 1); + return result; +} +const NRVec< complex > +NRVec< complex >::operator-() const +{ + NRVec< complex > result(*this); + result.copyonwrite(); + cblas_zdscal(nn, -1.0, (void *)(result.v), 1); + return result; +} + +// assignment of scalar to every element +template +NRVec & NRVec::operator=(const T &a) +{ + copyonwrite(); + if(a != (T)0) + for (int i=0; i +NRVec & NRVec::normalize() +{ + double tmp; + + tmp = cblas_dnrm2(nn, v, 1); +#ifdef DEBUG + if(!tmp) laerror("normalization of zero vector"); +#endif + copyonwrite(); + tmp = 1.0/tmp; + cblas_dscal(nn, tmp, v, 1); + return *this; +} + +// Normalization of NRVec< complex > +NRVec< complex > & NRVec< complex >::normalize() +{ + complex tmp; + tmp = cblas_dznrm2(nn, (void *)v, 1); +#ifdef DEBUG + if(!(tmp.real()) && !(tmp.imag())) laerror("normalization of zero vector"); +#endif + copyonwrite(); + tmp = 1.0/tmp; + cblas_zscal(nn, (void *)(&tmp), (void *)v, 1); + return *this; +} + +// gemv call +void NRVec::gemv(const double beta, const NRMat &A, + const char trans, const double alpha, const NRVec &x) +{ +#ifdef DEBUG + if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) + laerror("incompatible sizes in gemv A*x"); +#endif + cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), + A.nrows(), A.ncols(), alpha, A[0], A.ncols(), x.v, 1, beta, v, 1); +} +void NRVec< complex >::gemv(const complex beta, + const NRMat< complex > &A, const char trans, + const complex alpha, const NRVec &x) +{ +#ifdef DEBUG + if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) + laerror("incompatible sizes in gemv A*x"); +#endif + cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), + A.nrows(), A.ncols(), (void *)(&alpha), (void *)A[0], A.ncols(), + (void *)x.v, 1, (void *)(&beta), (void *)v, 1); +} + +// Vec * Mat +const NRVec NRVec::operator*(const NRMat &mat) const +{ +#ifdef DEBUG + if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat"); +#endif + int n = mat.ncols(); + NRVec result(n); + cblas_dgemv(CblasRowMajor, CblasTrans, nn, n, 1.0, mat[0], n, v, 1, + 0.0, result.v, 1); + return result; +} +const NRVec< complex > +NRVec< complex >::operator*(const NRMat< complex > &mat) const +{ +#ifdef DEBUG + if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat"); +#endif + int n = mat.ncols(); + NRVec< complex > result(n); + cblas_zgemv(CblasRowMajor, CblasTrans, nn, n, &CONE, mat[0], n, v, 1, + &CZERO, result.v, 1); + return result; +} + +// Direc product Mat = Vec | Vec +const NRMat NRVec::operator|(const NRVec &b) const +{ + NRMat result(0.,nn,b.nn); + cblas_dger(CblasRowMajor, nn, b.nn, 1., v, 1, b.v, 1, result, b.nn); + return result; +} +const NRMat< complex > +NRVec< complex >::operator|(const NRVec< complex > &b) const +{ + NRMat< complex > result(0.,nn,b.nn); + cblas_zgerc(CblasRowMajor, nn, b.nn, &CONE, v, 1, b.v, 1, result, b.nn); + return result; +} + + diff --git a/vec.h b/vec.h new file mode 100644 index 0000000..330ad4c --- /dev/null +++ b/vec.h @@ -0,0 +1,380 @@ +#ifndef _LA_VEC_H_ +#define _LA_VEC_H_ + +extern "C" { +#include "cblas.h" +} +#include +#include +#include +#include + +using namespace std; + +template class NRVec; +template class NRSMat; +template class NRMat; +template class SparseMat; + +////////////////////////////////////////////////////////////////////////////// +// Forward declarations +void laerror(const char *s1=0, const char *s2=0, const char *s3=0, const char *s4=0); +template void lawritemat(FILE *file,const T *a,int r,int c, + const char *form0,int nodim,int modulo, int issym); + +// Memory allocated constants for cblas routines +const static complex CONE = 1.0, CMONE = -1.0, CZERO = 0.0; + +// Macros to construct binary operators +,-,*, from +=, -=, *= +// for 3 cases: X + a, a + X, X + Y +#define NRVECMAT_OPER(E,X) \ +template \ + inline const NR##E NR##E::operator X(const T &a) const \ +{ return NR##E(*this) X##= a; } \ + \ + template \ + inline const NR##E operator X(const T &a, const NR##E &rhs) \ +{ return NR##E(rhs) X##= a; } + +#define NRVECMAT_OPER2(E,X) \ +template \ + inline const NR##E NR##E::operator X(const NR##E &a) const \ +{ return NR##E(*this) X##= a; } + +#include "smat.h" +#include "mat.h" + +// NRVec class +template +class NRVec { +protected: + int nn; + T *v; + int *count; +public: + friend class NRSMat; + friend class NRMat; + + inline NRVec(): nn(0),v(0),count(0){}; + inline explicit NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {}; + inline NRVec(const T &a, const int n); + inline NRVec(const T *a, const int n); + inline NRVec(const NRVec &rhs); + inline explicit NRVec(const NRSMat & S); +#ifndef MATPTR + explicit NRVec(const NRMat &rhs); +#endif + NRVec & operator=(const NRVec &rhs); + NRVec & operator=(const T &a); //assign a to every element + NRVec & operator|=(const NRVec &rhs); + const NRVec operator-() const; + inline NRVec & operator+=(const NRVec &rhs); + inline NRVec & operator-=(const NRVec &rhs); + inline NRVec & operator+=(const T &a); + inline NRVec & operator-=(const T &a); + inline NRVec & operator*=(const T &a); + inline int getcount() const {return count?*count:0;} + inline const NRVec operator+(const NRVec &rhs) const; + inline const NRVec operator-(const NRVec &rhs) const; + inline const NRVec operator+(const T &a) const; + inline const NRVec operator-(const T &a) const; + inline const NRVec operator*(const T &a) const; + inline const T operator*(const NRVec &rhs) const; //scalar product -> ddot + inline const NRVec operator*(const NRSMat & S) const; + const NRVec operator*(const NRMat &mat) const; + const NRMat operator|(const NRVec &rhs) const; + inline const T sum() const; //sum of its elements + inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector + inline T & operator[](const int i); + inline const T & operator[](const int i) const; + inline int size() const; + inline operator T*(); //get a pointer to the data + inline operator const T*() const; //get a pointer to the data + ~NRVec(); + void axpy(const T alpha, const NRVec &x); // this+= a*x + void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x + void gemv(const T beta, const NRMat &a, const char trans, + const T alpha, const NRVec &x); + void copyonwrite(); + void resize(const int n); + NRVec & normalize(); + inline const double norm() const; + inline const T amax() const; + inline const NRVec unitvector() const; + void fprintf(FILE *f, const char *format, const int modulo) const; + void fscanf(FILE *f, const char *format); +//sparse matrix concerning members + explicit NRVec(const SparseMat &rhs); // dense from sparse matrix with one of dimensions =1 + const NRVec operator*(const SparseMat &mat) const; //vector*matrix + inline void simplify() {}; //just for compatibility with sparse ones + void gemv(const T beta, const SparseMat &a, const char trans, const T alpha, const NRVec &x); +}; + +template ostream & operator<<(ostream &s, const NRVec &x); +template istream & operator>>(istream &s, NRVec &x); + +// INLINES + +// ctors +template +inline NRVec::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new int) +{ + *count = 1; + if(a != (T)0) + for(int i=0; i +inline NRVec::NRVec(const T *a, const int n) : nn(n), v(new T[n]), count(new int) +{ + *count = 1; + memcpy(v, a, n*sizeof(T)); +} + +template +inline NRVec::NRVec(const NRVec &rhs) +{ + v = rhs.v; + nn = rhs.nn; + count = rhs.count; + if(count) (*count)++; +} + +template +inline NRVec::NRVec(const NRSMat &rhs) +{ + nn = rhs.nn; + nn = NN2; + v = rhs.v; + count = rhs.count; + (*count)++; +} + +// x += a +inline NRVec & NRVec::operator+=(const double &a) +{ + copyonwrite(); + cblas_daxpy(nn, 1.0, &a, 0, v, 1); + return *this; +} +inline NRVec< complex > & +NRVec< complex >::operator+=(const complex &a) +{ + copyonwrite(); + cblas_zaxpy(nn, (void *)(&CONE), (void *)(&a), 0, (void *)v, 1); + return *this; +} + +// x -= a +inline NRVec & NRVec::operator-=(const double &a) +{ + copyonwrite(); + cblas_daxpy(nn, 1.0, &a, 0, v, 1); + return *this; +} +inline NRVec< complex > & +NRVec< complex >::operator-=(const complex &a) +{ + copyonwrite(); + cblas_zaxpy(nn, (void *)(&CMONE), (void *)(&a), 0, (void *)v, 1); + return *this; +} + +// x += x +inline NRVec & NRVec::operator+=(const NRVec &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("daxpy of incompatible vectors"); +#endif + copyonwrite(); + cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1); + return *this; +} +inline NRVec< complex > & +NRVec< complex >::operator+=(const NRVec< complex > &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("daxpy of incompatible vectors"); +#endif + copyonwrite(); + cblas_zaxpy(nn, (void *)(&CONE), rhs.v, 1, v, 1); + return *this; +} + +// x -= x +inline NRVec & NRVec::operator-=(const NRVec &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("daxpy of incompatible vectors"); +#endif + copyonwrite(); + cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1); + return *this; +} +inline NRVec< complex > & +NRVec< complex >::operator-=(const NRVec< complex > &rhs) +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("daxpy of incompatible vectors"); +#endif + copyonwrite(); + cblas_zaxpy(nn, (void *)(&CMONE), (void *)rhs.v, 1, (void *)v, 1); + return *this; +} + +// x *= a +inline NRVec & NRVec::operator*=(const double &a) +{ + copyonwrite(); + cblas_dscal(nn, a, v, 1); + return *this; +} +inline NRVec< complex > & +NRVec< complex >::operator*=(const complex &a) +{ + copyonwrite(); + cblas_zscal(nn, (void *)(&a), (void *)v, 1); + return *this; +} + +// scalar product x.y +inline const double NRVec::operator*(const NRVec &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("ddot of incompatible vectors"); +#endif + return cblas_ddot(nn, v, 1, rhs.v, 1); +} +inline const complex +NRVec< complex >::operator*(const NRVec< complex > &rhs) const +{ +#ifdef DEBUG + if (nn != rhs.nn) laerror("ddot of incompatible vectors"); +#endif + complex dot; + cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot)); + return dot; +} + +// Vec * SMat = SMat * Vec +template +inline const NRVec NRVec::operator*(const NRSMat & S) const +{ + return S * (*this); +} + +// Sum of elements +inline const double NRVec::sum() const +{ + return cblas_dasum(nn, v, 1); +} +inline const complex +NRVec< complex >::sum() const +{ + complex sum = CZERO; + for (int i=0; i::dot(const double *y, const int stride) const +{ + return cblas_ddot(nn, y, stride, v, 1); +} +inline const complex +NRVec< complex >::dot(const complex *y, const int stride) const +{ + complex dot; + cblas_zdotc_sub(nn, y, stride, v, 1, (void *)(&dot)); + return dot; +} + +// x[i] returns i-th element +template +inline T & NRVec::operator[](const int i) +{ +#ifdef DEBUG + if(*count != 1) laerror("possible lval [] with count > 1"); + if(i < 0 || i >= nn) laerror("NRVec out of range"); + if(!v) laerror("[] on unallocated NRVec"); +#endif + return v[i]; +} +template +inline const T & NRVec::operator[](const int i) const +{ +#ifdef DEBUG + if(i < 0 || i >= nn) laerror("NRVec out of range"); + if(!v) laerror("[] on unallocated NRVec"); +#endif + return v[i]; +} + +// length of the vector +template +inline int NRVec::size() const +{ + return nn; +} + +// reference Vec to the first element +template +inline NRVec::operator T*() +{ +#ifdef DEBUG + if(!v) laerror("unallocated NRVec in operator T*"); +#endif + return v; +} +template +inline NRVec::operator const T*() const +{ +#ifdef DEBUG + if(!v) laerror("unallocated NRVec in operator T*"); +#endif + return v; +} + +// return norm of the Vec +inline const double NRVec::norm() const +{ + return cblas_dnrm2(nn, v, 1); +} +inline const double NRVec< complex >::norm() const +{ + return cblas_dznrm2(nn, (void *)v, 1); +} + +// Max element of the array +inline const double NRVec::amax() const +{ + return v[cblas_idamax(nn, v, 1)]; +} +inline const complex NRVec< complex >::amax() const +{ + return v[cblas_izamax(nn, (void *)v, 1)]; +} + + +// Make Vec unitvector +template +inline const NRVec NRVec::unitvector() const +{ + return NRVec(*this).normalize(); +} + +// generate operators: Vec + a, a + Vec, Vec * a +NRVECMAT_OPER(Vec,+) +NRVECMAT_OPER(Vec,-) +NRVECMAT_OPER(Vec,*) +// generate operators: Vec + Vec, Vec - Vec +NRVECMAT_OPER2(Vec,+) +NRVECMAT_OPER2(Vec,-) + +// Few forward declarations + + +#endif /* _LA_VEC_H_ */