diff --git a/mat.cc b/mat.cc index 0d972f6..d6bb304 100644 --- a/mat.cc +++ b/mat.cc @@ -27,6 +27,7 @@ #include #include #include +#include namespace LA { @@ -2754,6 +2755,7 @@ NRMat& NRMat::swap_rows(){ return *this; } + /***************************************************************************//** * interchange the order of the columns of the current (real) matrix * @return reference to the modified matrix @@ -2925,6 +2927,61 @@ NRMat& NRMat::swap_rows(const int a, const int b){ return *this; } + +/*rotate rows or columns of a matrix - general implementation, more efficient version could be done with BLAS scal and axpy operations + * but it would require allocation of temporary storage + */ + +template +NRMat& NRMat::rotate_rows(const int a, const int b, const T phi){ + T tmp1,tmp2; + copyonwrite(); + T c=cos(phi); + T s=sin(phi); +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int j=0;j +NRMat& NRMat::rotate_cols(const int a, const int b, const T phi){ + T tmp1,tmp2; + copyonwrite(); + T c=cos(phi); + T s=sin(phi); +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int j=0;j& NRMat::swap_rows_cols(){ return *this; } +//permutation matrix +template +NRMat::NRMat(const NRPerm &p, const bool direction) +{ +int n=p.size(); +resize(n,n); +clear(); +for(int i=0; i -const NRMat NRMat::permute_rows(const NRPerm &p) const +const NRMat NRMat::permuted_rows(const NRPerm &p, const bool inverse) const { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of matrix"); @@ -3088,12 +3159,13 @@ if(n!=nn) laerror("incompatible permutation and matrix"); if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRMat r(nn,mm); -for(int i=1; i<=n; ++i) {int pi=p[i]-1; for(int j=0; j -const NRMat NRMat::permute_cols(const NRPerm &p) const +const NRMat NRMat::permuted_cols(const NRPerm &p, const bool inverse) const { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of matrix"); @@ -3104,12 +3176,13 @@ if(n!=mm) laerror("incompatible permutation and matrix"); if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRMat r(nn,mm); -for(int i=1; i<=n; ++i) {int pi=p[i]-1; for(int j=0; j -const NRMat NRMat::permute_both(const NRPerm &p, const NRPerm &q) const +const NRMat NRMat::permuted_both(const NRPerm &p, const NRPerm &q, const bool inverse) const { #ifdef DEBUG if(!p.is_valid() || !q.is_valid() ) laerror("invalid permutation of matrix"); @@ -3121,11 +3194,171 @@ if(n!=nn ||m!=mm) laerror("incompatible permutation and matrix"); if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRMat r(nn,mm); -for(int i=1; i<=n; ++i) {int pi=p[i]-1; for(int j=1; j<=m; ++j) r(i-1,j-1) = (*this)(pi,q[j]-1);} +if(inverse) for(int i=1; i<=n; ++i) {int pi=p[i]-1; for(int j=1; j<=m; ++j) r(i-1,j-1) = (*this)(pi,q[j]-1);} +else for(int i=1; i<=n; ++i) {int pi=p[i]-1; for(int j=1; j<=m; ++j) r(pi,q[j]-1) = (*this)(i-1,j-1);} return r; } +template +void NRMat::permuteme_rows(const CyclePerm &p) +{ +#ifdef DEBUG +if(!p.is_valid()) laerror("invalid permutation of matrix"); +#endif +if(p.max()>nn) laerror("incompatible permutation and matrix"); +#ifdef CUDALA + if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); +#endif +copyonwrite(); +T *tmp = new T[mm]; +for(int cycle=1; cycle<=p.size(); ++cycle) + { + int length= p[cycle].size(); + if(length<=1) continue; //trivial cycle + for(int j=0; j1; --i) + for(int j=0; j +void NRMat::permuteme_cols(const CyclePerm &p) +{ +#ifdef DEBUG +if(!p.is_valid()) laerror("invalid permutation of matrix"); +#endif +if(p.max()>mm) laerror("incompatible permutation and matrix"); +#ifdef CUDALA + if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); +#endif +copyonwrite(); +T *tmp = new T[nn]; +for(int cycle=1; cycle<=p.size(); ++cycle) + { + int length= p[cycle].size(); + if(length<=1) continue; //trivial cycle + for(int j=0; j1; --i) + for(int j=0; j +void NRMat::scale_row(const int i, const double f) +{ +#ifdef DEBUG +if(i<0||i>=nn) laerror("index out of range in scale_row"); +#endif +copyonwrite(); +#ifdef CUDALA + if(location == cpu) { +#endif + cblas_dscal(mm, f, &(*this)(i,0), 1); +#ifdef CUDALA + }else{ + cublasDscal(mm, f, v+i*mm, 1); + TEST_CUBLAS("cublasDscal"); + } +#endif +} + +template<> +void NRMat::scale_col(const int i, const double f) +{ +#ifdef DEBUG +if(i<0||i>=mm) laerror("index out of range in scale_col"); +#endif +copyonwrite(); +#ifdef CUDALA + if(location == cpu) { +#endif + cblas_dscal(nn, f, &(*this)(0,i), mm); +#ifdef CUDALA + }else{ + cublasDscal(nn, f, v+i, mm); + TEST_CUBLAS("cublasDscal"); + } +#endif +} + + +template<> +void NRMat >::scale_row(const int i, const std::complex f) +{ +#ifdef DEBUG +if(i<0||i>=nn) laerror("index out of range in scale_row"); +#endif +copyonwrite(); +#ifdef CUDALA + if(location == cpu) { +#endif + cblas_zscal(mm, &f, &(*this)(i,0), 1); +#ifdef CUDALA + }else{ + const cuDoubleComplex fac = *(reinterpret_cast (&f)); + cublasZscal(mm, &fac, v+i*mm, 1); + TEST_CUBLAS("cublasDscal"); + } +#endif +} + +template<> +void NRMat >::scale_col(const int i, const std::complex f) +{ +#ifdef DEBUG +if(i<0||i>=mm) laerror("index out of range in scale_col"); +#endif +copyonwrite(); +#ifdef CUDALA + if(location == cpu) { +#endif + cblas_zscal(nn, &f, &(*this)(0,i), mm); +#ifdef CUDALA + }else{ + const cuDoubleComplex fac = *(reinterpret_cast (&f)); + cublasZscal(nn, &fac, v+i, mm); + TEST_CUBLAS("cublasDscal"); + } +#endif +} + + + + + + +//general version +template +void NRMat::scale_row(const int i, const T f) +{ +#ifdef DEBUG +if(i<0||i>=nn) laerror("index out of range in scale_row"); +#endif +copyonwrite(); +for(int j=0; j +void NRMat::scale_col(const int i, const T f) +{ +#ifdef DEBUG +if(i<0||i>=mm) laerror("index out of range in scale_col"); +#endif +copyonwrite(); +for(int j=0; j &p) const; - const NRMat permute_cols(const NRPerm &p) const; - const NRMat permute_both(const NRPerm &p, const NRPerm &q) const; + const NRMat permuted_rows(const NRPerm &p, const bool inverse=false) const; + const NRMat permuted_cols(const NRPerm &p, const bool inverse=false) const; + const NRMat permuted_both(const NRPerm &p, const NRPerm &q, const bool inverse=false) const; + void permuteme_rows(const CyclePerm &p); //in place + void permuteme_cols(const CyclePerm &p); //in place + void scale_row(const int i, const T f); //in place + void scale_col(const int i, const T f); //in place + explicit NRMat(const NRPerm &p, const bool direction); //permutation matrix /***************************************************************************//** @@ -349,6 +354,11 @@ public: // LV - swapping of rows i and j NRMat & swap_rows(const int i, const int j); + //rotate rows or columns through an angle + NRMat & rotate_cols(const int i, const int j, const T phi); + NRMat & rotate_rows(const int i, const int j, const T phi); + + //! multiply by sparse matrix SparseSMat operator*(const SparseSMat &rhs) const; diff --git a/permutation.cc b/permutation.cc index 642673b..ea69e25 100644 --- a/permutation.cc +++ b/permutation.cc @@ -17,6 +17,10 @@ */ #include "permutation.h" +#include +#include + + namespace LA { template @@ -27,6 +31,7 @@ T n=this->size(); if(n<0) laerror("invalid permutation size"); #endif if(n==0) return; +this->copyonwrite(); for(T i=1; i<=n; ++i) (*this)[i]=i; } @@ -118,12 +123,14 @@ return (count&1)? -1:1; } template -NRPerm::NRPerm(const CyclePerm &rhs, int n) -: NRVec_from1(n) +NRPerm::NRPerm(const CyclePerm &rhs, const int n) { #ifdef DEBUG if(!rhs.is_valid()) laerror("invalid cycle permutation"); #endif +int m; +if(n) m=n; else m=rhs.max(); +this->resize(m); identity(); T ncycles=rhs.size(); @@ -137,6 +144,22 @@ if(!is_valid()) laerror("internal error in NRPerm constructor from CyclePerm"); #endif } +template +void NRPerm::randomize(void) +{ +int n=this->size(); +if(n<=0) laerror("cannot randomize empty permutation"); +this->copyonwrite(); +this->identity(); +for(int i=n-1; i>=1; --i) + { + int j= random()%(i+1); + T tmp = (*this)[i+1]; + (*this)[i+1]=(*this)[j+1]; + (*this)[j+1]=tmp; + } +} + //////////////////////////////////////////////////////// @@ -150,7 +173,7 @@ T n=p.size(); NRVec_from1 used(0,n),tmp(n); T firstunused=1; T currentcycle=0; -std::list > cyclelist={}; +std::list > cyclelist={}; do { //find a cycle starting with first unused element @@ -196,7 +219,7 @@ for(T i=1; i<=this->size(); ++i) for(T ii=i; ii<=this->size(); ++ii) { T nn=(*this)[ii].size(); - for(T jj=1; jj<=nn; ++jj) + for(T jj=(ii==i?j+1:1); jj<=nn; ++jj) { T xx=(*this)[ii][jj]; if(x==xx) return false; @@ -233,11 +256,24 @@ for(T i=1; i<=ncycles; ++i) { T length=(*this)[i].size(); r[i].resize(length); + //reverse order in cycles (does not matter in cycle lengths 1 and 2 anyway) for(T j=1; j<=length; ++j) r[i][j] = (*this)[i][length-j+1]; } return r; } +//multiplication via NRPerm - could there be a more efficient direct algorithm? +template +CyclePerm CyclePerm::operator*(const CyclePerm q) const +{ +int m=this->max(); +int mm=q.max(); +if(mm>m) mm=m; +NRPerm qq(q,m); +NRPerm pp(*this,m); +NRPerm rr=pp*qq; +return CyclePerm(rr); +} template int CyclePerm::parity() const @@ -271,12 +307,105 @@ for(T i=1; i<=ncycles; ++i) r[length]++; } //fill in trivial cycles of length one -r[1] = n - r.sum(); +r[1] += n - r.sum(); if(r[1]<0) laerror("inconsistent cycle lengths in CyclePerm::cycles"); return r; } +//auxiliary function for input of a permutation in cycle format +//returns pointer after closing bracket or NULL if no cycle found +//or input error +template +const char *read1cycle(NRVec_from1 &c, const char *p) +{ +if(*p==0) return NULL; +const char *openbracket = strchr(p,'('); +if(!openbracket) return NULL; +const char *closebracket = strchr(openbracket+1,')'); +if(!closebracket) return NULL; +const char *s = openbracket+1; +int r; +int length=0; +std::list cycle; +do { + long int tmp; + int nchar; + if(*s==',') ++s; + r = sscanf(s,"%ld%n",&tmp,&nchar); + if(r==1) + { + ++length; + s += nchar; + cycle.push_back((T)tmp); + } + } + while(r==1 && s +void CyclePerm::readfrom(const std::string &line) +{ +const char *p=line.c_str(); +std::list > cyclelist={}; +int ncycles=0; +int count=0; +NRVec_from1 c; +while(p=read1cycle(c,p)) + { + //printf("cycle %d of length %d read\n",count,c.size()); + if(c.size()!=0) //store a nonempty cycle + { + ++count; + cyclelist.push_back(c); + } + } + + +//convert list to vector +this->resize(count); +T i=0; +for(auto l=cyclelist.begin(); l!=cyclelist.end(); ++l) (*this)[++i] = *l; +#ifdef DEBUG +if(!this->is_valid()) laerror("readfrom received input of invalid CyclePerm"); +#endif +} + + +template +std::istream & operator>>(std::istream &s, CyclePerm &x) +{ +std::string l; +getline(s,l); +x.readfrom(l); +return s; +} + +template +std::ostream & operator<<(std::ostream &s, const CyclePerm &x) +{ +for(int i=1; i<=x.size(); ++i) + { + s<<"("; + for(int j=1; j<=x[i].size(); ++j) + { + s<; template class CyclePerm; template class Partition; -} + +#define INSTANTIZE(T) \ +template std::istream & operator>>(std::istream &s, CyclePerm &x); \ +template std::ostream & operator<<(std::ostream &s, const CyclePerm &x); \ + + + +INSTANTIZE(int) + + + + +}//namespace diff --git a/permutation.h b/permutation.h index 8c651f4..db3258e 100644 --- a/permutation.h +++ b/permutation.h @@ -41,7 +41,7 @@ public: NRPerm(const NRVec_from1 &rhs): NRVec_from1(rhs) {}; NRPerm(const T &a, const int n): NRVec_from1(a, n) {}; NRPerm(const T *a, const int n): NRVec_from1(a, n) {}; - NRPerm(const CyclePerm &rhs, int n); + explicit NRPerm(const CyclePerm &rhs, const int n=0); //specific operations void identity(); @@ -51,10 +51,10 @@ public: NRPerm operator*(const NRPerm q) const; //q is rhs and applied first, this applied second NRPerm conjugate_by(const NRPerm q) const; //q^-1 p q int parity() const; + void randomize(void); //uniformly random by Fisher-Yates shuffle //TODO: - //@@@permutation matrix //@@@permgener //@@@next permutation //@@@lex rank @@ -67,19 +67,27 @@ template class CyclePerm : public NRVec_from1 > { public: CyclePerm() : NRVec_from1 >() {}; - CyclePerm(const NRPerm &rhs); + explicit CyclePerm(const NRPerm &rhs); bool is_valid() const; //is it really a permutation bool is_identity() const; //no cycles of length > 1 CyclePerm inverse() const; //reverse all cycles int parity() const; //negative if having odd number of even-length cycles + T max() const {T m=0; for(int i=1; i<=this->size(); ++i) {T mm= (*this)[i].max(); if(mm>m) m=mm;} return m;} Partition cycles(const T n) const; - //@@@efficient algorithm for multiplication? - //@@@operator >> and << - //@@@operation in place on matrix and vector + void readfrom(const std::string &line); + CyclePerm operator*(const CyclePerm q) const; //q is rhs and applied first, this applied second }; +template +std::istream & operator>>(std::istream &s, CyclePerm &x); + +template +std::ostream & operator<<(std::ostream &s, const CyclePerm &x); + + + //partitions stored as #of 1s, #of 2s, etc. template class Partition : public NRVec_from1 { @@ -94,8 +102,8 @@ public: //@@@generate all partitions, //@@@enumerator of partitions of n to r parts and total //@@@adjoint partition, -//@@@ output as in the group character table -//@@@Sn character table +//@@@output formatted as in the group character table +//@@@Sn character table computation }; diff --git a/smat.cc b/smat.cc index 75101bb..8d7c3b1 100644 --- a/smat.cc +++ b/smat.cc @@ -305,7 +305,7 @@ void NRSMat::fscanf(FILE *f, const char *format) { //apply permutation template -const NRSMat NRSMat::permute(const NRPerm &p) const +const NRSMat NRSMat::permuted(const NRPerm &p, const bool inverse) const { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of smatrix"); @@ -316,7 +316,8 @@ if(n!=(*this).size()) laerror("incompatible permutation and smatrix"); if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRSMat r(n); -for(int i=1; i<=n; ++i) {int pi = p[i]-1; r(i-1,i-1) = (*this)(pi,pi);} +if(inverse) for(int i=1; i<=n; ++i) {int pi = p[i]-1; r(i-1,i-1) = (*this)(pi,pi);} +else for(int i=1; i<=n; ++i) {int pi = p[i]-1; r(pi,pi) = (*this)(i-1,i-1);} return r; } diff --git a/smat.h b/smat.h index d6b4a09..3dd2c10 100644 --- a/smat.h +++ b/smat.h @@ -93,7 +93,7 @@ public: NRSMat & operator=(const T &a); //! permute matrix elements - const NRSMat permute(const NRPerm &p) const; + const NRSMat permuted(const NRPerm &p, const bool inverse=false) const; inline int getcount() const {return count?*count:0;} diff --git a/t.cc b/t.cc index 25d5d59..4ba4c42 100644 --- a/t.cc +++ b/t.cc @@ -1989,7 +1989,7 @@ c=a+b; cout< p; cin >>p; @@ -1997,8 +1997,67 @@ int n=p.size(); NRVec_from1 v(n); int i; for(i=1; i<=n; ++i) v[i]=10.*i; -cout < c; +cin>>c; +cout< p(c); +cout < cc(p); +cout <>n; +NRPerm p(n); +p.randomize(); +cout < cc(p); +cout < pp(cc,n); +cout < v(n); +for(int i=0; i vv(v); +v.permuteme(cc); +cout < vvv= vv.permuted(pp); +cout<>n; +NRVec v(n); +v.randomize(1.); +NRVec vv(v); +NRPerm p(n); +vv.sort(0,p); +NRVec vvv=v.permuted(p,true); +NRVec v4=vv.permuted(p,false); +cout<::sort(int direction, int from, int to, int *perm) { else return memqsort<0, NRVec, int, int>(*this, perm, from, to); } +template +int NRVec::sort(int direction, NRPerm &perm) +{ +if(nn!=perm.size()) laerror("incompatible vector and permutation"); +perm.identity(); +int r=sort(direction,0,nn-1,&perm[1]); +return r; +} + template<> NRVec > complexify(const NRVec &rhs) { NRVec > r(rhs.size(), rhs.getlocation()); @@ -834,7 +843,7 @@ NRVec > complexify(const NRVec &rhs) { } template -const NRVec NRVec::permute(const NRPerm &p) const +const NRVec NRVec::permuted(const NRPerm &p, const bool inverse) const { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of vector"); @@ -845,10 +854,34 @@ if(n!=(*this).size()) laerror("incompatible permutation and vector"); if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRVec r(n); -for(int i=1; i<=n; ++i) r[i-1] = v[p[i]-1]; +if(inverse) for(int i=1; i<=n; ++i) r[i-1] = v[p[i]-1]; +else for(int i=1; i<=n; ++i) r[p[i]-1] = v[i-1]; return r; } + +template +void NRVec::permuteme(const CyclePerm &p) +{ +#ifdef DEBUG +if(!p.is_valid()) laerror("invalid permutation of vector"); +#endif +if(p.max()>nn) laerror("incompatible permutation and vector"); +#ifdef CUDALA + if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); +#endif +copyonwrite(); +for(int cycle=1; cycle<=p.size(); ++cycle) + { + int length= p[cycle].size(); + if(length<=1) continue; //trivial cycle + T tmp = v[p[cycle][length]-1]; + for(int i=length; i>1; --i) v[p[cycle][i]-1] = v[p[cycle][i-1]-1]; + v[p[cycle][1]-1] = tmp; + } +} + + /***************************************************************************//** * forced instantization in the corespoding object file ******************************************************************************/ @@ -911,6 +944,38 @@ INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex >) INSTANTIZE_DUMMY(std::complex >) + + +//also not supported on gpu +#define INSTANTIZE_NONCOMPLEX(T) \ +template<>\ +const T NRVec::max() const\ +{\ +if(nn==0) return 0;\ +T m=v[0];\ +for(int i=1; im) m=v[i];\ +return m;\ +}\ +\ +template<>\ +const T NRVec::min() const\ +{\ +if(nn==0) return 0;\ +T m=v[0];\ +for(int i=1; i; template class NRVec >; template class NRVec; diff --git a/vec.h b/vec.h index 5c40609..39d9473 100644 --- a/vec.h +++ b/vec.h @@ -271,7 +271,8 @@ public: }; //! permute vector elements - const NRVec permute(const NRPerm &p) const; + const NRVec permuted(const NRPerm &p, const bool inverse=false) const; + void permuteme(const CyclePerm &p); //in place //! compute the sum of the absolute values of the elements of this vector inline const typename LA_traits::normtype asum() const; @@ -318,6 +319,12 @@ public: //! determine the minimal element (in the absolute value) of this vector inline const T amin() const; + //! determine the maximal element of this vector + const T max() const; + //! determine the minimal element of this vector + const T min() const; + + //! routine for formatted output void fprintf(FILE *f, const char *format, const int modulo) const; //! routine for unformatted output @@ -355,6 +362,7 @@ public: //! sort by default in ascending order and return the parity of corresponding permutation resulting to this order int sort(int direction = 0, int from = 0, int to = -1, int *perm = NULL); + int sort(int direction, NRPerm &perm); //! apply given function to each element NRVec& call_on_me(T (*_F)(const T &) ){ @@ -1082,6 +1090,8 @@ void NRVec::moveto(const GPUID dest) { } #endif + + /***************************************************************************//** * adds a real scalar value \f$\alpha\f$ to all elements of this real vector \f$\vec{x}\f$ * \f[\vec{x}_i\leftarrow\vec{x}_i+\alpha\f]