*** empty log message ***

2004-03-17 03:07:21 +00:00 · 2004-03-17 03:07:21 +00:00 · d7b55e9846
commit d7b55e9846
18 changed files with 5955 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,28 @@
 # CVS default ignores begin
 tags
 TAGS
 .make.state
 .nse_depinfo
 *~
 \#*
 .#*
 ,*
 _$*
 *$
 *.old
 *.bak
 *.BAK
 *.orig
 *.rej
 .del-*
 *.a
 *.olb
 *.o
 *.obj
 *.so
 *.exe
 *.Z
 *.elc
 *.ln
 core
 # CVS default ignores end
--- a/fourindex.h
+++ b/fourindex.h
@ -0,0 +1,261 @@
 #ifndef _fourindex_included
 #define _fourindex_included
 //element of a linked list, indices in a portable way, no bit shifts and endianity problems any more!
 template<class I, class T>
 struct matel4
        {
        T elem;
        matel4 *next;
 	typedef union {
 		I packed[4];
 		struct {
 			I i;
 			I j;
 			I k;	
 			I l;
 			} indiv;
 		} packedindex;
 	packedindex index;
        };
 typedef enum {nosymmetry=0, twoelectronreal=1, twoelectroncomplex=2, twobodyantisym=3} fourindexsymtype; //if twoelectron, only permutation-nonequivalent elements are stored
 template <class I, class T>
 class fourindex {
 protected:
 	I nn;
 	fourindexsymtype symmetry;
        int *count;
 	matel4<I,T> *list;
 private:
 	void deletelist();
 	void copylist(const matel4<I,T> *l);
 public:
 	//iterator
 	typedef class iterator {
 	private:
 		matel4<I,T> *p;
 	public:
 		iterator() {};
 		~iterator() {};
 		iterator(matel4<I,T> *list): p(list) {};
 		bool operator==(const iterator rhs) const {return p==rhs.p;}
 		bool operator!=(const iterator rhs) const {return p!=rhs.p;}
 		iterator operator++() {return p=p->next;}
 		iterator operator++(int) {matel4<I,T> *q=p; p=p->next; return q;}
 		matel4<I,T> & operator*() const {return *p;}
 		matel4<I,T> * operator->() const {return p;}
 	};
 	iterator begin() const {return list;}
 	iterator end() const {return NULL;}
 	//constructors etc.
 	inline fourindex() :nn(0),count(NULL),list(NULL) {};
 	inline fourindex(const I n) :nn(n),count(new int(1)),list(NULL) {};
 	fourindex(const fourindex &rhs); //copy constructor
 	inline int getcount() const {return count?*count:0;}
 	fourindex & operator=(const fourindex &rhs);
        fourindex & operator+=(const fourindex &rhs);
 	inline void setsymmetry(fourindexsymtype s) {symmetry=s;}
        fourindex & join(fourindex &rhs); //more efficient +=, rhs will be emptied
 	inline ~fourindex();
 	inline matel4<I,T> *getlist() const {return list;}
 	inline I size() const {return nn;}
 	void resize(const I n);
 	void copyonwrite();
 	int length() const;
 	inline void add(const I i, const I j, const I k, const I l, const T elem) 
 		{matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; list->index.indiv.i=i;list->index.indiv.j=j;list->index.indiv.k=k;list->index.indiv.l=l; list->elem=elem;}
 	inline void add(const typename matel4<I,T>::packedindex &index , const T elem) 
                {matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; list->index=index; list->elem=elem;}
 	inline void add(const I (&index)[4], const T elem)
                {matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; memcpy(&list->index.packed, &index, sizeof(typename matel4<I,T>::packedindex)); list->elem=elem;}
 };
 //destructor
 template <class I,class T>
 fourindex<I,T>::~fourindex()
 {
        if(!count) return;
        if(--(*count)<=0)
                {
 		deletelist();
                delete count;
                }
 }
 //copy constructor (sort arrays are not going to be copied)
 template <class I, class T>
 fourindex<I,T>::fourindex(const fourindex<I,T> &rhs)
 {
 #ifdef debug
 if(! &rhs) laerror("fourindex copy constructor with NULL argument");
 #endif
        nn=rhs.nn;
 	if(rhs.list&&!rhs.count) laerror("some inconsistency in fourindex contructors or assignments");
        list=rhs.list;
        if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared
 }
 //assignment operator
 template <class I, class T>
 fourindex<I,T> & fourindex<I,T>::operator=(const fourindex<I,T> &rhs)
 {
        if (this != &rhs)
                {
                if(count)
                    if(--(*count) ==0) {deletelist(); delete count;} // old stuff obsolete
                list=rhs.list;
                nn=rhs.nn; 
                if(list) count=rhs.count; else count= new int(0); //make the matrix defined, but empty and not shared, count will be incremented below
                if(count) (*count)++;
                }
        return *this;
 }
 template <class I, class T>
 fourindex<I,T> & fourindex<I,T>::operator+=(const fourindex<I,T> &rhs)
 {
 if(nn!=rhs.nn) laerror("incompatible dimensions for +=");
 if(!count) {count=new int;  *count=1; list=NULL;}
 else copyonwrite();
 register matel4<I,T> *l=rhs.list;
 while(l)
        {
        add( l->index,l->elem);
        l=l->next;
        }
 return *this;
 }
 template <class I, class T>
 fourindex<I,T> & fourindex<I,T>::join(fourindex<I,T> &rhs)
 {
 if(nn!=rhs.nn) laerror("incompatible dimensions for join");
 if(*rhs.count!=1) laerror("shared rhs in join()");
 if(!count) {count=new int;  *count=1; list=NULL;}
 else copyonwrite();
 matel4<I,T> **last=&list;
 while(*last) last= &((*last)->next);
 *last=rhs.list;
 rhs.list=NULL;
 return *this;
 }
 template <class I, class T>
 void fourindex<I,T>::resize(const I n)
 {
        if(n<=0 ) laerror("illegal fourindex dimension");
 	if(count)
                {
                if(*count > 1) {(*count)--; count=NULL; list=NULL;} //detach from previous
                else if(*count==1) deletelist();
                }
        nn=n;
        count=new int(1); //empty but defined matrix
        list=NULL;
 }
 template <class I, class T>
 void fourindex<I,T>::deletelist()
 {
 if(*count >1) laerror("trying to delete shared list");
 matel4<I,T> *l=list;
 while(l)
        {
        matel4<I,T> *ltmp=l;
        l=l->next;
        delete ltmp;
        }
 list=NULL;
 delete count;
 count=NULL;
 }
 template <class I, class T>
 void fourindex<I,T>::copylist(const matel4<I,T> *l)
 {
 list=NULL;
 while(l)
        {
        add(l->index,l->elem);
        l=l->next;
        }
 }
 template <class I, class T>
 void fourindex<I,T>::copyonwrite()
 {
        if(!count) laerror("probably an assignment to undefined fourindex");
        if(*count > 1)
                {
                (*count)--;
                count = new int; *count=1;
                if(!list) laerror("empty list with count>1");
                copylist(list);
                }
 }
 template <class I, class T>
 int fourindex<I,T>::length() const
 {
 int n=0;
 matel4<I,T> *l=list;
 while(l)
 	{
 	++n;
 	l=l->next;
 	}
 return n;
 }
 template <class I, class T>
 ostream& operator<<(ostream &s, const fourindex<I,T> &x)
                {
                int n;
                n=x.size();
                s << n << '\n';
                typename fourindex<I,T>::iterator it=x.begin();
                while(it!=x.end())
                        {
                        s << (int)it->index.indiv.i << ' ' << (int)it->index.indiv.j<<  ' ' <<(int)it->index.indiv.k << ' ' << (int)it->index.indiv.l  << ' ' << it->elem << '\n';
 			++it;
                        }
                s << "-1 -1 -1 -1\n";
                return s;
                }
 template <class I, class T>
 istream& operator>>(istream  &s, fourindex<I,T> &x)
                {
                int i,j,k,l;
 		T elem;
 		int n;
                s >> n ;
                x.resize(n);
                s >> i >> j >>k >>l;
                while(i>=0 && j>=0 &&k>=0 &&l>=0)
                        {
 			s>>elem;
 			x.add(i,j,k,l,elem);
 			s >> i >> j >>k >>ll;
                        }
                return s;
                }
 #endif /*_fourindex_included*/
--- a/la.h
+++ b/la.h
@ -0,0 +1,9 @@
 #ifndef _LA_H_
 #define _LA_H_
 #include "vec.h"
 #include "smat.h"
 #include "mat.h"
 #include "nonclass.h"
 #endif /* _LA_H_ */
--- a/la_traits.h
+++ b/la_traits.h
@ -0,0 +1,40 @@
 ////////////////////////////////////////////////////////////////////////////
 //traits classes
 #ifndef _LA_TRAITS_INCL
 #define _LA_TRAITS_INCL
 //default one, good for numbers
 template<class C> struct NRMat_traits {
 typedef C elementtype;
 typedef C producttype;
 static C norm (const  C &x) {return abs(x);}
 static void axpy (C &s, const C &x, const C &c) {s+=x*c;}
 };
 //specializations
 template<> struct NRMat_traits<NRMat<double> > {
 typedef double elementtype;
 typedef NRMat<double> producttype;
 static double norm (const NRMat<double> &x) {return x.norm();}
 static void axpy (NRMat<double>&s, const NRMat<double> &x, const double c) {s.axpy(c,x);}
 };
 template<> struct NRMat_traits<NRSMat<double> > {
 typedef double elementtype;
 typedef NRMat<double> producttype;
 static const double norm (const NRSMat<double> &x) {return x.norm(0.);}
 static void axpy (NRSMat<double>&s, const NRSMat<double> &x, const double c) {s.axpy(c,x);}
 };
 template<> struct NRMat_traits<NRMat<complex<double> > > {
 typedef complex<double> elementtype;
 typedef NRMat<complex<double> > producttype;
 static double norm (const NRMat<complex<double> >  &x) {return x.norm();}
 static void axpy (NRMat<complex<double> >&s, const NRMat<complex<double> > &x, const complex<double> c) {s.axpy(c,x);}
 };
 #endif
--- a/mat.cc
+++ b/mat.cc
@ -0,0 +1,844 @@
 #include "mat.h"
 // TODO :
 //
 //////////////////////////////////////////////////////////////////////////////
 //// forced instantization in the corresponding object file
 template NRMat<double>;
 template NRMat< complex<double> >;
 /*
 *  Templates first, specializations for BLAS next
 */
 // dtor
 template <typename T>
 NRMat<T>::~NRMat()
 {
 	if (!count) return;
 	if (--(*count) <= 0) {
 		if (v) {
 #ifdef MATPTR
 			delete[] (v[0]);
 #endif
 			delete[] v;
 		}
 		delete count;
 	}
 }
 // assign NRMat = NRMat
 template <typename T>
 NRMat<T> & NRMat<T>::operator=(const NRMat<T> &rhs)
 {
 	if (this == &rhs) return *this;
 	if (count) {
 		if (--(*count) ==0 ) {
 #ifdef MATPTR
 			delete[] (v[0]);
 #endif
 			delete[] v;
 			delete count;
 		}
 		v = rhs.v;
 		nn = rhs.nn;
 		mm = rhs.mm;
 		count = rhs.count;
 		if (count) (*count)--;
 	}
 	return *this;
 }
 // Assign diagonal
 template <typename T>
 NRMat<T> & NRMat<T>::operator=(const T &a)
 {
 	copyonwrite();
 #ifdef DEBUG
 	if (nn != mm) laerror("RMat.operator=scalar on non-square matrix");
 #endif
 #ifdef MATPTR
 	 for (int i=0; i< nn; i++) v[i][i] = a;
 #else
 	 for (int i=0; i< nn*nn; i+=nn+1) v[i] = a;
 #endif
 	 return *this;
 }
 // Explicit deep copy of NRmat
 template <typename T>
 NRMat<T> & NRMat<T>::operator|=(const NRMat<T> &rhs)
 {
 	if (this == &rhs) return *this;
 #ifdef DEBUG
 	if (!rhs.v) laerror("unallocated rhs in Mat operator |=");
 #endif
 	if (count)
 		if (*count > 1) {
 			--(*count);
 			nn = 0;
 			mm = 0;
 			count = 0;
 			v = 0;
 		}
 	if (nn != rhs.nn || mm != rhs.mm) {
 		if (v) {
 #ifdef MATPTR
 			delete[] (v[0]);
 #endif
 			delete[] (v);
 			v = 0;
 		}
 		nn = rhs.nn;
 		mm = rhs.mm;
 	}
 	if (!v) {
 #ifdef MATPTR
 		v = new T*[nn];
 		v[0] = new T[mm*nn];
 #else
 		v = new T[mm*nn];
 #endif
 	}
 #ifdef MATPTR
 	for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
 	memcpy(v[0], rhs.v[0], nn*mm*sizeof(T));
 #else
 	memcpy(v, rhs.v, nn*mm*sizeof(T));
 #endif
 	if (!count) count = new int;
 	*count = 1;
 	return *this;
 }
 // M += a
 template <typename T>
 NRMat<T> & NRMat<T>::operator+=(const T &a)
 {
 	copyonwrite();
 #ifdef DEBUG
 	if (nn != mm) laerror("Mat.operator+=scalar on non-square matrix");
 #endif
 #ifdef MATPTR
 	for (int i=0; i< nn; i++) v[i][i] += a;
 #else
 	for (int i=0; i< nn*nn; i+=nn+1) v[i] += a;
 #endif
 	return *this;
 }
 // M -= a
 template <typename T>
 NRMat<T> & NRMat<T>::operator-=(const T &a)
 {
 	copyonwrite();
 #ifdef DEBUG
 	if (nn != mm) laerror("Mat.operator-=scalar on non-square matrix");
 #endif
 #ifdef MATPTR
 	for (int i=0; i< nn; i++) v[i][i] -= a;
 #else
 	for (int i=0; i< nn*nn; i+=nn+1) v[i] -= a;
 #endif
 	return *this;
 }
 // unary minus
 template <typename T>
 const NRMat<T> NRMat<T>::operator-() const
 {
 	NRMat<T> result(nn, mm);
 #ifdef MATPTR
 	for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
 #else
 	for (int i=0; i<nn*mm; i++) result.v[i]= -v[i];
 #endif
 	return result;
 }
 // direct sum
 template <typename T>
 const NRMat<T> NRMat<T>::operator&(const NRMat<T> & b) const
 {
 	NRMat<T> result((T)0, nn+b.nn, mm+b.mm);
 	for (int i=0; i<nn; i++) memcpy(result[i], (*this)[i], sizeof(T)*mm);
 	for (int i=0; i<b.nn; i++) memcpy(result[nn+i]+nn, b[i], sizeof(T)*b.mm);
 	return result;
 }
 // direct product 
 template <typename T>
 const NRMat<T> NRMat<T>::operator|(const NRMat<T> &b) const
 {
 	NRMat<T> result(nn*b.nn, mm*b.mm);
 	for (int i=0; i<nn; i++)
 		for (int j=0; j<mm; j++)
 			for (int k=0; k<b.nn; k++)
 				for (int l=0; l<b.mm; l++)
 					result[i*b.nn+k][j*b.mm+l] = (*this)[i][j]*b[k][l];
 	return result;
 }
 // sum of columns
 template <typename T>
 const NRVec<T> NRMat<T>::csum() const
 {
 	NRVec<T> result(nn);
 	T sum;
 	for (int i=0; i<nn; i++) { 
 		sum = (T)0;
 		for(int j=0; j<mm; j++) sum += (*this)[i][j];
 		result[i] = sum;
 	}
 	return result;
 }
 // sum of rows
 template <typename T>
 const NRVec<T> NRMat<T>::rsum() const
 {
 	NRVec<T> result(nn);
 	T sum;
 	for (int i=0; i<mm; i++) { 
 		sum = (T)0;
 		for(int j=0; j<nn; j++) sum += (*this)[j][i];
 		result[i] = sum;
 	}
 	return result;
 }
 // make detach Mat and make it's own deep copy
 template <typename T>
 void NRMat<T>::copyonwrite()
 {
 #ifdef DEBUG
 	if (!count) laerror("Mat::copyonwrite of undefined matrix");
 #endif
 	if (*count > 1) {
 		(*count)--;
 		count = new int;
 		*count = 1;
 #ifdef MATPTR
 		T **newv = new T*[nn];
 		newv[0] = new T[mm*nn];
 		memcpy(newv[0], v[0], mm*nn*sizeof(T));
 		v = newv;
 		for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
 #else
 		T *newv = new T[mm*nn];
 		memcpy(newv, v, mm*nn*sizeof(T));
 		v = newv;
 #endif
 	}
 }
 template <typename T>
 void NRMat<T>::resize(const int n, const int m)
 {
 #ifdef DEBUG
 	if (n<=0 || m<=0) laerror("illegal dimensions in Mat::resize()");
 #endif
 	if (count)
 		if (*count > 1) {
 			(*count)--;
 			count = 0;
 			v  = 0;
 			nn = 0;
 			mm = 0;
 		}
 	if (!count) {
 		count = new int;
 		*count = 1;
 		nn = n;
 		mm = m;
 #ifdef MATPTR
 		v = new T*[nn];
 		v[0] = new T[m*n];
 		for (int i=1; i< n; i++) v[i] = v[i-1] + m;
 #else
 		v = new T[m*n];
 #endif
 		return;
 	}
 	// At this point *count = 1, check if resize is necessary
 	if (n!=nn || m!=mm) {
 		nn = n;
 		mm = m;
 #ifdef MATPTR
 		delete[] (v[0]);
 #endif
 		delete[] v;
 #ifdef MATPTR
 		v = new T*[nn];
 		v[0] = new T[m*n];
 		for (int i=1; i< n; i++) v[i] = v[i-1] + m;
 #else
 		v = new T[m*n];
 #endif
 	}
 }
 // transpose Mat
 template <typename T>
 NRMat<T> & NRMat<T>::transposeme()
 {
 #ifdef DEBUG
 	if (nn != mm) laerror("transpose of non-square Mat");
 #endif
 	copyonwrite();
 	for(int i=1; i<nn; i++)
 		for(int j=0; j<i; j++) {
 #ifdef MATPTR
 			T tmp = v[i][j]; 
 			v[i][j] = v[j][i]; 
 			v[j][i] = tmp;
 #else
 			register int a; 
 			register int b;
 			a = i*mm+j;
 			b = j*mm+i;
 			T tmp = v[a];
 			v[a] = v[b];
 			v[b] = tmp;
 #endif
 		}
 	return *this;
 }
 // Output of Mat
 template <typename T>
 void NRMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
 {
 	lawritemat(file, (const T*)(*this), nn, mm, format, 2, modulo, 0);
 }
 // Input of Mat
 template <typename T>
 void NRMat<T>::fscanf(FILE *f, const char *format)
 {
 	int n, m;
 	if (std::fscanf(f, "%d %d", &n, &m) != 2)
 		laerror("cannot read matrix dimensions in Mat::fscanf()");
 	resize(n,m);
 	T *p = *this;
 	for(int i=0; i<n; i++)
 		for(int j=0; j<n; j++)
 			if(std::fscanf(f,format,p++) != 1)
 				laerror("cannot read matrix element in Mat::fscanf()");
 }
 /*
 * BLAS specializations for double and complex<double>
 */
 // Mat *= a
 NRMat<double> & NRMat<double>::operator*=(const double &a)
 {
 	copyonwrite();
 	cblas_dscal(nn*mm, a, *this, 1);
 	return *this;
 }
 NRMat< complex<double> > &
 NRMat< complex<double> >::operator*=(const complex<double> &a)
 {
 	copyonwrite();
 	cblas_zscal(nn*mm, &a, (void *)(*this)[0], 1);
 	return *this;
 }
 // Mat += Mat
 NRMat<double> & NRMat<double>::operator+=(const NRMat<double>  &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn || mm!= rhs.mm) 
 		laerror("Mat += Mat of incompatible matrices");
 #endif
 	copyonwrite();
 	cblas_daxpy(nn*mm, 1.0, rhs, 1, *this, 1);
 	return *this;
 }
 NRMat< complex<double> > &
 NRMat< complex<double> >::operator+=(const NRMat< complex<double> >  &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn || mm!= rhs.mm) 
 		laerror("Mat += Mat of incompatible matrices");
 #endif
 	 copyonwrite();
 	 cblas_zaxpy(nn*mm, &CONE, (void *)rhs[0], 1, (void *)(*this)[0], 1);
 	 return *this;
 }
 // Mat -= Mat
 NRMat<double> & NRMat<double>::operator-=(const NRMat<double>  &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn || mm!= rhs.mm) 
 		laerror("Mat -= Mat of incompatible matrices");
 #endif
 	copyonwrite();
 	cblas_daxpy(nn*mm, -1.0, rhs, 1, *this, 1);
 	return *this;
 }
 NRMat< complex<double> > &
 NRMat< complex<double> >::operator-=(const NRMat< complex<double> >  &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn || mm!= rhs.mm) 
 		laerror("Mat -= Mat of incompatible matrices");
 #endif
 	 copyonwrite();
 	 cblas_zaxpy(nn*mm, &CMONE, (void *)rhs[0], 1, (void *)(*this)[0], 1);
 	 return *this;
 }
 // Mat += SMat
 NRMat<double> & NRMat<double>::operator+=(const NRSMat<double> &rhs)
 {
 #ifdef DEBUG
 	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat");
 #endif
 	const double *p = rhs;
 	copyonwrite();
 	for (int i=0; i<nn; i++) {
 		cblas_daxpy(i+1, 1.0, p, 1, (*this)[i], 1); 
 		p += i+1;
 	}
 	p = rhs; p++;
 	for (int i=1; i<nn; i++) {
 		cblas_daxpy(i, 1.0, p, 1, (*this)[0]+i, nn);
 		p += i+1;
 	}
 	return *this;
 }
 NRMat< complex<double> > & 
 NRMat< complex<double> >::operator+=(const NRSMat< complex<double> > &rhs)
 {
 #ifdef DEBUG
 	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat");
 #endif
 	const complex<double> *p = rhs;
 	copyonwrite();
 	for (int i=0; i<nn; i++) {
 		cblas_zaxpy(i+1, (void *)&CONE, (void *)p, 1, (void *)(*this)[i], 1); 
 		p += i+1;
 	}
 	p = rhs; p++;
 	for (int i=1; i<nn; i++) {
 		cblas_zaxpy(i, (void *)&CONE, (void *)p, 1, (void *)((*this)[i]+i), nn);
 		p += i+1;
 	}
 	return *this;
 }
 // Mat -= SMat
 NRMat<double> & NRMat<double>::operator-=(const NRSMat<double> &rhs)
 {
 #ifdef DEBUG
 	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat");
 #endif
 	const double *p = rhs;
 	copyonwrite();
 	for (int i=0; i<nn; i++) {
 		cblas_daxpy(i+1, -1.0, p, 1, (*this)[i], 1); 
 		p += i+1;
 	}
 	p = rhs; p++;
 	for (int i=1; i<nn; i++) {
 		cblas_daxpy(i, -1.0, p, 1, (*this)[0]+i, nn);
 		p += i+1;
 	}
 	return *this;
 }
 NRMat< complex<double> > & 
 NRMat< complex<double> >::operator-=(const NRSMat< complex<double> > &rhs)
 {
 #ifdef DEBUG
 	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat");
 #endif
 	const complex<double> *p = rhs;
 	copyonwrite();
 	for (int i=0; i<nn; i++) {
 		cblas_zaxpy(i+1, (void *)&CMONE, (void *)p, 1, (void *)(*this)[i], 1); 
 		p += i+1;
 	}
 	p = rhs; p++;
 	for (int i=1; i<nn; i++) {
 		cblas_zaxpy(i, (void *)&CMONE, (void *)p, 1, (void *)((*this)[i]+i), nn);
 		p += i+1;
 	}
 	return *this;
 }
 // Mat.Mat - scalar product
 const double NRMat<double>::dot(const NRMat<double> &rhs) const
 {
 #ifdef DEBUG
 	if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices");
 #endif
 	return cblas_ddot(nn*mm, (*this)[0], 1, rhs[0], 1);
 }
 const complex<double>
 NRMat< complex<double> >::dot(const NRMat< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices");
 #endif
 	complex<double> dot;
 	cblas_zdotc_sub(nn*mm, (void *)(*this)[0], 1, (void *)rhs[0], 1, 
 			(void *)(&dot));
 	return dot;
 }
 // Mat * Mat
 const NRMat<double> NRMat<double>::operator*(const NRMat<double> &rhs) const
 {
 #ifdef DEBUG
 	if (mm != rhs.nn) laerror("product of incompatible matrices");
 #endif
 	NRMat<double> result(nn, rhs.mm);
 	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm, 1.0,
 			*this, mm, rhs, rhs.mm, 0.0, result, rhs.mm);
 	return result;
 }
 const NRMat< complex<double> > 
 NRMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (mm != rhs.nn) laerror("product of incompatible matrices");
 #endif
 	NRMat< complex<double> > result(nn, rhs.mm);
 	cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm,
 			(const void *)(&CONE),(const void *)(*this)[0], mm, (const void *)rhs[0],
 			rhs.mm, (const void *)(&CZERO), (void *)result[0], rhs.mm);
 	return result;
 }
 // Multiply by diagonal from L
 void NRMat<double>::diagmultl(const NRVec<double> &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
 #endif
 	copyonwrite();
 	for(int i=0; i<nn; i++) cblas_dscal(mm, rhs[i], (*this)[i], 1);
 }
 void NRMat< complex<double> >::diagmultl(const NRVec< complex<double> > &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
 #endif
 	copyonwrite();
 	for (int i=0; i<nn; i++) cblas_zscal(mm, &rhs[i], (*this)[i], 1);
 }
 // Multiply by diagonal from R
 void NRMat<double>::diagmultr(const NRVec<double> &rhs)
 {
 #ifdef DEBUG
 	if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultr");
 #endif
 	copyonwrite();
 	for (int i=0; i<mm; i++) cblas_dscal(nn, rhs[i], (*this)[i], mm);
 }
 void NRMat< complex<double> >::diagmultr(const NRVec< complex<double> > &rhs)
 {
 #ifdef DEBUG
 	if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
 #endif
 	copyonwrite();
 	for (int i=0; i<mm; i++) cblas_zscal(nn, &rhs[i], (*this)[i], mm);
 }
 // Mat * Smat, decomposed to nn x Vec * Smat
 const NRMat<double> 
 NRMat<double>::operator*(const NRSMat<double> &rhs) const
 {
 #ifdef DEBUG
 	if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat");
 #endif
 	NRMat<double> result(nn, rhs.ncols());
 	for (int i=0; i<nn; i++)
 		cblas_dspmv(CblasRowMajor, CblasLower, mm, 1.0, &rhs[0], 
 				(*this)[i], 1, 0.0, result[i], 1);
 	return result;
 }
 const NRMat< complex<double> >
 NRMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat");
 #endif
 	NRMat< complex<double> > result(nn, rhs.ncols());
 	for (int i=0; i<nn; i++)
 		cblas_zhpmv(CblasRowMajor, CblasLower, mm, (void *)&CONE, (void *)&rhs[0],
 				(void *)(*this)[i], 1, (void *)&CZERO, (void *)result[i], 1);
 	return result;
 }
 // Mat * Vec
 const NRVec<double> 
 NRMat<double>::operator*(const NRVec<double> &vec) const
 {
 #ifdef DEBUG
 	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
 #endif
 	NRVec<double> result(nn);
 	cblas_dgemv(CblasRowMajor, CblasNoTrans, nn, mm, 1.0, (*this)[0],
 			mm, &vec[0], 1, 0.0, &result[0], 1);
 	return result;
 }
 const NRVec< complex<double> >
 NRMat< complex<double> >::operator*(const NRVec< complex<double> > &vec) const
 {
 #ifdef DEBUG
 	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
 #endif
 	NRVec< complex<double> > result(nn);
 	cblas_zgemv(CblasRowMajor, CblasNoTrans, nn, mm, (void *)&CONE, (void *)(*this)[0],
 			mm, (void *)&vec[0], 1, (void *)&CZERO, (void *)&result[0], 1);
 	return result;
 }
 // sum of rows
 const NRVec<double> NRMat<double>::rsum() const
 {
 	NRVec<double> result(mm);
 	for (int i=0; i<mm; i++) result[i] = cblas_dasum(nn,(*this)[0]+i,mm);
 	return result;
 }
 // sum of columns
 const NRVec<double> NRMat<double>::csum() const
 {
 	NRVec<double> result(nn);
 	for (int i=0; i<nn; i++) result[i] = cblas_dasum(mm, (*this)[i], 1);
 	return result;
 }
 // complex conjugate of Mat
 NRMat<double> &NRMat<double>::conjugateme() {return *this;}
 NRMat< complex<double> > & NRMat< complex<double> >::conjugateme()
 {
 	copyonwrite();
 	cblas_dscal(mm*nn, -1.0, (double *)((*this)[0])+1, 2);
 	return *this;
 }
 // transpose and optionally conjugate
 const NRMat<double> NRMat<double>::transpose(bool conj) const
 {
 	NRMat<double> result(mm,nn);
 	for(int i=0; i<nn; i++) cblas_dcopy(mm, (*this)[i], 1, result[0]+i, nn);
 	return result;
 }
 const NRMat< complex<double> >
 NRMat< complex<double> >::transpose(bool conj) const
 {
 	NRMat< complex<double> > result(mm,nn);
 	for (int i=0; i<nn; i++) 
 		cblas_zcopy(mm, (void *)(*this)[i], 1, (void *)(result[0]+i), nn);
 	if (conj) cblas_dscal(mm*nn, -1.0, (double *)(result[0])+1, 2);
 	return result;
 }
 // gemm : this = alpha*op( A )*op( B ) + beta*this
 void NRMat<double>::gemm(const double &beta, const NRMat<double> &a,
 		const char transa, const NRMat<double> &b, const char transb, 
 		const double &alpha)
 {
 	int l(transa=='n'?a.nn:a.mm);
 	int k(transa=='n'?a.mm:a.nn);
 	int kk(transb=='n'?b.nn:b.mm);
 	int ll(transb=='n'?b.mm:b.nn);
 #ifdef DEBUG
 	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
 #endif
 	if (alpha==0.0 && beta==1.0) return;
 	copyonwrite();
 	cblas_dgemm(CblasRowMajor, (transa=='n' ? CblasNoTrans : CblasTrans),
 			(transb=='n' ? CblasNoTrans : CblasTrans), nn, mm, k, alpha, a,
 			a.mm, b , b.mm, beta, *this , mm);
 }
 void NRMat< complex<double> >::gemm(const complex<double> & beta,
 		const NRMat< complex<double> > & a, const char transa, 
 		const NRMat< complex<double> > & b, const char transb, 
 		const complex<double> & alpha)
 {
 	int l(transa=='n'?a.nn:a.mm);
 	int k(transa=='n'?a.mm:a.nn);
 	int kk(transb=='n'?b.nn:b.mm);
 	int ll(transb=='n'?b.mm:b.nn);
 #ifdef DEBUG
 	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
 #endif
 	if (alpha==CZERO && beta==CONE) return;
 	copyonwrite();
 	cblas_zgemm(CblasRowMajor,
 			(transa=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)), 
 			(transb=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)),
 			nn, mm, k, &alpha, a , a.mm, b , b.mm, &beta, *this , mm);
 }
 // norm of Mat
 const double  NRMat<double>::norm(const double scalar) const
 {
 	if (!scalar) return cblas_dnrm2(nn*mm, (*this)[0], 1);
 	double sum = 0;
 	for (int i=0; i<nn; i++)
 		for (int j=0; j<mm; j++) {
 			register double tmp;
 #ifdef MATPTR
 			tmp = v[i][j];
 #else
 			tmp = v[i*mm+j];
 #endif
 			if (i==j) tmp -= scalar;
 			sum += tmp*tmp;
 		}
 	return sqrt(sum);
 }
 const double  NRMat< complex<double> >::norm(const complex<double> scalar) const
 {
 	if (scalar == CZERO) return cblas_dznrm2(nn*mm, (*this)[0], 1);
 	double sum = 0;
 	for (int i=0; i<nn; i++)
 		for (int j=0; j<mm; j++) {
 			register complex<double> tmp;
 #ifdef MATPTR
 			tmp = v[i][j];
 #else
 			tmp = v[i*mm+j];
 #endif
 			if (i==j) tmp -= scalar;
 			sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
 		}
 	return sqrt(sum);
 }
 // axpy: this = a * Mat
 void NRMat<double>::axpy(const double alpha, const NRMat<double> &mat)
 {
 #ifdef DEBUG
 	if (nn!=mat.nn || mm!=mat.mm) laerror("daxpy of incompatible matrices");
 #endif
 	copyonwrite();
 	cblas_daxpy(nn*mm, alpha, mat, 1, *this, 1);
 }
 void NRMat< complex<double> >::axpy(const complex<double> alpha, 
 		const NRMat< complex<double> > & mat)
 {
 #ifdef DEBUG
 	if (nn!=mat.nn || mm!=mat.mm) laerror("zaxpy of incompatible matrices");
 #endif
 	copyonwrite();
 	cblas_zaxpy(nn*mm, (void *)&alpha, mat, 1, (void *)(*this)[0], 1);
 }
 // trace of Mat
 const double NRMat<double>::trace() const
 {
 #ifdef DEBUG
 	if (nn != mm) laerror("no-square matrix in Mat::trace()");
 #endif
 	return cblas_dasum(nn, (*this)[0], nn+1);
 }
 const complex<double> NRMat< complex<double> >::trace() const
 {
 #ifdef DEBUG
 	if (nn != mm) laerror("no-square matrix in Mat::trace()");
 #endif
 	register complex<double> sum = CZERO;
 	for (int i=0; i<nn*nn; i+=(nn+1))
 #ifdef MATPTR
 		sum += v[0][i];
 #else
 		sum += v[i];
 #endif
 	return sum;
 }
 //////////////////////////////////////////////////////////////////////////////
 //// forced instantization in the corespoding object file
 #define INSTANTIZE(T) \
 template ostream & operator<<(ostream &s, const NRMat< T > &x); \
 template istream & operator>>(istream  &s, NRMat< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 export template <class T>
 ostream& operator<<(ostream &s, const NRMat<T> &x)
                {
                int i,j,n,m;
                n=x.nrows();
                m=x.ncols();
                s << n << ' ' << m << '\n';
                for(i=0;i<n;i++)
                        {
                        for(j=0; j<m;j++) s << x[i][j] << (j==m-1 ? '\n' : ' '); // endl cannot be used in the conditional expression, since it is an overloaded function
                        }
                return s;
                }
 export template <class T>
 istream& operator>>(istream  &s, NRMat<T> &x)
                {
                int i,j,n,m;
                s >> n >> m;
                x.resize(n,m);
                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x[i][j] ;
                return s;
                }
--- a/mat.h
+++ b/mat.h
@ -0,0 +1,346 @@
 #ifndef _LA_MAT_H_
 #define _LA_MAT_H_
 #include "vec.h"
 #include "smat.h"
 template <typename T>
 class NRMat {
 protected:
 	int nn;
 	int mm;
 #ifdef MATPTR
 	T **v;
 #else
 	T *v;
 #endif
 	int *count;
 public:
 	friend class NRVec<T>;
 	friend class NRSMat<T>;
 	inline NRMat() : nn(0), mm(0), v(0), count(0) {};
 	inline NRMat(const int n, const int m);
 	inline NRMat(const T &a, const int n, const int m);
 	NRMat(const T *a, const int n, const int m);
 	inline NRMat(const NRMat &rhs);
 	explicit NRMat(const NRSMat<T> &rhs);
 #ifndef MATPTR
 	NRMat(const NRVec<T> &rhs, const int n, const int m);
 #endif
 	~NRMat();
 	inline int getcount() const {return count?*count:0;}
 	NRMat & operator=(const NRMat &rhs);  //assignment
 	NRMat & operator=(const T &a);    //assign a to diagonal
 	NRMat & operator|=(const NRMat &rhs); //assignment to a new copy
 	NRMat & operator+=(const T &a);   //add diagonal
 	NRMat & operator-=(const T &a);   //substract diagonal
 	NRMat & operator*=(const T &a);   //multiply by a scalar
 	NRMat & operator+=(const NRMat &rhs);
 	NRMat & operator-=(const NRMat &rhs);
 	NRMat & operator+=(const NRSMat<T> &rhs);
 	NRMat & operator-=(const NRSMat<T> &rhs);
 	const NRMat operator-() const; //unary minus
 	inline const NRMat operator+(const T &a) const;
 	inline const NRMat operator-(const T &a) const;
 	inline const NRMat operator*(const T &a) const;
 	inline const NRMat operator+(const NRMat &rhs) const;
 	inline const NRMat operator-(const NRMat &rhs) const;
 	inline const NRMat operator+(const NRSMat<T> &rhs) const;
 	inline const NRMat operator-(const NRSMat<T> &rhs) const;
 	const T dot(const NRMat &rhs) const; // scalar product of Mat.Mat
 	const NRMat operator*(const NRMat &rhs) const; // Mat * Mat
 	void diagmultl(const NRVec<T> &rhs); //multiply by a diagonal matrix from L
 	void diagmultr(const NRVec<T> &rhs); //multiply by a diagonal matrix from R
 	const NRMat operator*(const NRSMat<T> &rhs) const; // Mat * Smat
 	const NRMat operator&(const NRMat &rhs) const; // direct sum
 	const NRMat operator|(const NRMat<T> &rhs) const; // direct product
 	const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
 	const NRVec<T> rsum() const; //sum of rows
 	const NRVec<T> csum() const; //sum of columns
 	inline T* operator[](const int i);  //subscripting: pointer to row i
 	inline const T* operator[](const int i) const;
 	inline T& operator()(const int i, const int j); // (i,j) subscripts
 	inline const T& operator()(const int i, const int j) const;
 	inline int nrows() const;
 	inline int ncols() const;
 	void copyonwrite();
 	void resize(const int n, const int m);
 	inline operator T*(); //get a pointer to the data
 	inline operator const T*() const;
 	NRMat & transposeme(); // square matrices only 
 	NRMat & conjugateme(); // square matrices only
 	const NRMat transpose(bool conj=false) const;
 	const NRMat conjugate() const;
 	void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b,
 			const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this
 /*
 	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b,
 			const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
 	void s_cutoff(const int,const int,const int,const int) const;
 */
 	void fprintf(FILE *f, const char *format, const int modulo) const;
 	void fscanf(FILE *f, const char *format);
 	const double norm(const T scalar=(T)0) const;
 	void axpy(const T alpha, const NRMat &x); // this += a*x
 	inline const T amax() const;
 	const T trace() const;
 //members concerning sparse matrix
 	explicit NRMat(const SparseMat<T> &rhs);                // dense from sparse
 	NRMat & operator+=(const SparseMat<T> &rhs);
        NRMat & operator-=(const SparseMat<T> &rhs);
        inline void simplify() {}; //just for compatibility with sparse ones
 //Strassen's multiplication (better than n^3, analogous syntax to gemm)
 	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
 	void s_cutoff(const int,const int,const int,const int) const;
 };
 // ctors
 template <typename T>
 NRMat<T>::NRMat(const int n, const int m) : nn(n), mm(m), count(new int)
 {
 	*count = 1;
 #ifdef MATPTR
 	v = new T*[n];
 	v[0] = new T[m*n];
 	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
 #else
 	v = new T[m*n];
 #endif
 }
 template <typename T>
 NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int)
 {
 	int i;
 	T *p;
 	*count = 1;
 #ifdef MATPTR
 	v = new T*[n];
 	p = v[0] = new T[m*n];
 	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
 #else
 	p = v = new T[m*n];
 #endif
 	if (a != (T)0)
 		for (i=0; i< n*m; i++) *p++ = a;
 	else
 		memset(p, 0, n*m*sizeof(T));
 }
 template <typename T>
 NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int)
 {
 	*count = 1;
 #ifdef MATPTR
 	v = new T*[n];
 	v[0] = new T[m*n];
 	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
 	memcpy(v[0], a, n*m*sizeof(T));
 #else
 	v = new T[m*n];
 	memcpy(v, a, n*m*sizeof(T));
 #endif
 }
 template <typename T>
 NRMat<T>::NRMat(const NRMat &rhs)
 {
 	nn = rhs.nn;
 	mm = rhs.mm;
 	count = rhs.count;
 	v = rhs.v;
 	if (count) ++(*count);
 }
 template <typename T>
 NRMat<T>::NRMat(const NRSMat<T> &rhs)
 {
 	int i;
 	nn = mm = rhs.nrows();
 	count = new int;
 	*count = 1;
 #ifdef MATPTR
 	v = new T*[nn];
 	v[0] = new T[mm*nn];
 	for (int i=1; i<nn; i++) v[i] = v[i-1] + mm;
 #else
 	v = new T[mm*nn];
 #endif
 	int j, k = 0;
 #ifdef MATPTR
 	for (i=0; i<nn; i++)
 		for (j=0; j<=i; j++) v[i][j] = v[j][i] = rhs[k++];
 #else
 	for (i=0; i<nn; i++)
 		for (j=0; j<=i; j++) v[i*nn+j] = v[j*nn+i] = rhs[k++];
 #endif
 }
 #ifndef MATPTR
 template <typename T>
 NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m)
 {
 #ifdef DEBUG
 	if (n*m != rhs.nn) laerror("matrix dimensions incompatible with vector length");
 #endif
 	nn = n;
 	mm = m;
 	count = rhs.count;
 	v = rhs.v;
 	(*count)++;
 }
 #endif
 // Mat + Smat
 template <typename T>
 inline const NRMat<T> NRMat<T>::operator+(const NRSMat<T> &rhs) const
 {
 	return NRMat<T>(*this) += rhs;
 }
 // Mat - Smat
 template <typename T>
 inline const NRMat<T> NRMat<T>::operator-(const NRSMat<T> &rhs) const
 {
 	return NRMat<T>(*this) -= rhs;
 }
 // Mat[i] : pointer to the first element of i-th row
 template <typename T>
 inline T* NRMat<T>::operator[](const int i)
 {
 #ifdef DEBUG
 	if (*count != 1) laerror("Mat lval use of [] with count > 1");
 	if (i<0 || i>=nn) laerror("Mat [] out of range");
 	if (!v) laerror("[] for unallocated Mat");
 #endif
 #ifdef MATPTR
 	return v[i];
 #else
 	return v+i*mm;
 #endif
 }
 template <typename T>
 inline const T* NRMat<T>::operator[](const int i) const
 {
 #ifdef DEBUG
 	if (i<0 || i>=nn) laerror("Mat [] out of range");
 	if (!v) laerror("[] for unallocated Mat");
 #endif
 #ifdef MATPTR
 	return v[i];
 #else
 	return v+i*mm;
 #endif
 }
 // Mat(i,j) reference to the matrix element M_{ij}
 template <typename T>
 inline T & NRMat<T>::operator()(const int i, const int j)
 {
 #ifdef DEBUG
 	if (*count != 1) laerror("Mat lval use of (,) with count > 1");
 	if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range");
 	if (!v) laerror("(,) for unallocated Mat");
 #endif
 #ifdef MATPTR
 	return v[i][j];
 #else
 	return v[i*mm+j];
 #endif
 }
 template <typename T>
 inline const T & NRMat<T>::operator()(const int i, const int j) const
 {
 #ifdef DEBUG
 	if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range");
 	if (!v) laerror("(,) for unallocated Mat");
 #endif
 #ifdef MATPTR
 	return v[i][j];
 #else
 	return v[i*mm+j];
 #endif
 }
 // number of rows
 template <typename T>
 inline int NRMat<T>::nrows() const
 {
 	return nn;
 }
 // number of columns
 template <typename T>
 inline int NRMat<T>::ncols() const
 {
 	return mm;
 }
 // reference pointer to Mat
 template <typename T>
 inline NRMat<T>::operator T* ()
 {
 #ifdef DEBUG
 	if (!v) laerror("unallocated Mat in operator T*");
 #endif
 #ifdef MATPTR
 	return v[0];
 #else
 	return v;
 #endif
 }
 template <typename T>
 inline NRMat<T>::operator const T* () const
 {
 #ifdef DEBUG
 	if (!v) laerror("unallocated Mat in operator T*");
 #endif
 #ifdef MATPTR
 	return v[0];
 #else
 	return v;
 #endif
 }
 // max element of Mat
 inline const double  NRMat<double>::amax() const
 {
 #ifdef MATPTR
 	return v[0][cblas_idamax(nn*mm, v[0], 1)];
 #else
 	return v[cblas_idamax(nn*mm, v, 1)];
 #endif
 }
 inline const complex<double>  NRMat< complex<double> >::amax() const
 {
 #ifdef MATPTR
 	return v[0][cblas_izamax(nn*mm, (void *)v[0], 1)];
 #else
 	return v[cblas_izamax(nn*mm, (void *)v, 1)];
 #endif
 }
 // I/O
 template <typename T> extern ostream& operator<<(ostream &s, const NRMat<T> &x);
 template <typename T> extern istream& operator>>(istream  &s, NRMat<T> &x);
 // generate operators: Mat + a, a + Mat, Mat * a
 NRVECMAT_OPER(Mat,+)
 NRVECMAT_OPER(Mat,-)
 NRVECMAT_OPER(Mat,*)
 // generate Mat + Mat, Mat - Mat
 NRVECMAT_OPER2(Mat,+)
 NRVECMAT_OPER2(Mat,-)
 #endif /* _LA_MAT_H_ */
--- a/matexp.h
+++ b/matexp.h
@ -0,0 +1,259 @@
 //general routine for polynomial of a matrix, tuned to minimize the number
 //of matrix-matrix multiplications on cost of additions and memory
 // the polynom and exp routines will work on any type, for which traits class
 // is defined containing definition of an element type, norm and axpy operation
 #include "la_traits.h"
 #include "sparsemat_traits.h"
 template<class T,class R>
 const T polynom2(const T &x, const NRVec<R> &c)
 {
 int order=c.size()-1;
 T z,y;
 //trivial reference implementation by horner scheme
 if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one
 else
 	{
 	int i;
 	z=x*c[order];
 	for(i=order-1; i>=0; i--)
 		{
 		if(i<order-1) z=y*x;
 		y=z+c[i];
 		}
 	}
 return y;
 }
 template<class T,class R>
 const T polynom(const T &x, const NRVec<R> &c)
 {
 int n=c.size()-1;
 int i,j,k,m=0,t;
 if(n<=4) return polynom2(x,c); //here the horner scheme is optimal
 //first find m which minimizes the number of multiplications
 j=10*n;
 for(i=2;i<=n+1;i++)
    {	
    t=i-2+2*(n/i)-(n%i)?0:1;
    if(t<j)
 	{
 	j=t;
 	m=i;
 	}
    }
 //allocate array for powers up to m
 T *xpows = new T[m];
 xpows[0]=x;
 for(i=1;i<m;i++) xpows[i]=xpows[i-1]*x;
 //run the summation loop
 T r,s,f;
 k= -1;
 for(i=0; i<=n/m;i++)
 	{
 	for(j=0;j<m;j++)
 		{
 		k++;
 		if(k>n) break;
 		if(j==0) {if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/}
 		else  
 			NRMat_traits<T>::axpy(s,xpows[j-1],c[k]); //general  s+=xpows[j-1]*c[k]; but more efficient for matrices
 		}
 	if(i==0) {r=s; f=xpows[m-1];}
 	else
 		{
 		r+= s*f;
 		f=f*xpows[m-1];
 		}
 	}
 delete[] xpows;
 return r;
 }
 //for general objects
 template<class T>
 const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1)
 {
 T z;
 if(right) {z=x; while(--nest>=0) z=z*y-y*z;}
 else {z=y; while(--nest>=0) z=x*z-z*x;}
 return z;
 }
 template<class T>
 const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1)
 {
 T z;
 if(right) {z=x; while(--nest>=0) z=z*y+y*z;}
 else {z=y; while(--nest>=0) z=x*z+z*x;}
 return z;
 }
 //general BCH expansion (can be written more efficiently in a specialization for matrices)
 template<class T>
 const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=1)\
 {
 T result=h;
 double factor=1.;
 T z=h;
 for(int i=1; i<=n; ++i)
 	{
 	factor/=i;
 	z= z*t-t*z;
 	if(verbose) cerr << "BCH contribution at order "<<i<<" : "<<z.norm()<<endl;
 	result+= z*factor; 
 	}
 return result;
 }
 template<class T>
 const T ipow( const T &x, int i)
 {
 if(i<0) laerror("negative exponent in ipow");
 if(i==0) {T r=x; r=1.; return r;}//trick for matrix dimension
 if(i==1) return x;
 T y,z;
 z=x;
 while(!(i&1))
 	{
 	z = z*z;
 	i >>= 1;
 	}
 y=z; 
 while((i >>= 1)/*!=0*/)
                {
                z = z*z;
                if(i&1) y = y*z;
                }
 return y;
 }
 inline int nextpow2(const double n)
 {
 const double log2=log(2.);
 if(n<=.75) return 0; //try to keep the taylor expansion short
 if(n<=1.) return 1;
 return int(ceil(log(n)/log2-log(.75)));
 }
 template<class T>
 NRVec<typename NRMat_traits<T>::elementtype> exp_aux(const T &x, int &power)
 {
 //should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc
 static double exptaylor[]={
 1.,
 1.,
 0.5,
 0.1666666666666666666666,
 0.0416666666666666666666,
 0.0083333333333333333333,
 0.0013888888888888888888,
 0.00019841269841269841253,
 2.4801587301587301566e-05,
 2.7557319223985892511e-06,
 2.7557319223985888276e-07,
 2.5052108385441720224e-08,
 2.0876756987868100187e-09,
 1.6059043836821613341e-10,
 1.1470745597729724507e-11,
 7.6471637318198164055e-13,
 4.7794773323873852534e-14,
 2.8114572543455205981e-15,
 1.5619206968586225271e-16,
 8.2206352466243294955e-18,
 4.1103176233121648441e-19,
 0.};
 double mnorm= NRMat_traits<T>::norm(x);
 power=nextpow2(mnorm);
 double scale=exp(-log(2.)*power);
 //find how long taylor expansion will be necessary
 const double precision=1e-16;
 double s,t;
 s=mnorm*scale;
 int n=0;
 t=1.;
 do	{
 	n++;
 	t*=s;
 	}
 while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case
 int i; //adjust the coefficients in order to avoid scaling the argument
 NRVec<typename NRMat_traits<T>::elementtype> taylor2(n+1);
 for(i=0,t=1.;i<=n;i++)
 	{
 	taylor2[i]=exptaylor[i]*t;
 	t*=scale;
 	}
 return taylor2;
 }
 template<class T>
 const T exp(const T &x)
 {
 int power;
 //prepare the polynom of and effectively scale T
 NRVec<typename NRMat_traits<T>::elementtype> taylor2=exp_aux(x,power);
 T r=polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices
 //power the result back
 for(int i=0; i<power; i++) r=r*r;
 return r;
 }
 template<class MAT>
 const typename NRMat_traits<MAT>::elementtype determinant(MAT a)//again passed by value
 {
 typename NRMat_traits<MAT>::elementtype det;
 if(a.nrows()!=a.ncols()) laerror("determinant of non-square matrix");
 linear_solve(a,NULL,&det);
 return det;
 }
 template<class M, class V>
 const V exptimes(const M &mat, V vec) //uses just matrix vector multiplication
 {
 if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes");
 int power;
 //prepare the polynom of and effectively scale the matrix
 NRVec<typename NRMat_traits<M>::elementtype> taylor2=exp_aux(mat,power);
 V result(mat.nrows());
 for(int i=1; i<=(1<<power); ++i) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
 	{
 	if(i>1) vec=result; //apply again to the result of previous application
 	//apply polynom of the matrix to the vector iteratively
 	V y=vec;
 	result=y*taylor2[0];
 	for(int j=1; j<taylor2.size(); ++j)
 		{
 		y=mat*y;
 		result.axpy(taylor2[j],y);
 		}
 	}
 return result;
 }
--- a/nonclass.cc
+++ b/nonclass.cc
@ -0,0 +1,524 @@
 extern "C" {
 #include "atlas_enum.h"
 #include "clapack.h"
 }
 #include "la.h"
 #ifdef FORTRAN_
 #define FORNAME(x) x##_
 #else
 #define FORNAME(x) x
 #endif
 #define INSTANTIZE(T) \
 template void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, \
 		int nodim,int modulo, int issym);
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 template <typename T>
 void lawritemat(FILE *file,const T *a,int r,int c,const char *form0,
 		int nodim,int modulo, int issym)
 {
 	int i,j;
 	const char *f;
 	/*print out title before %*/
 	f=form0;
 	skiptext:
 	while (*f && *f !='%' ) {fputc(*f++,file);}
 	if (*f=='%' && f[1]=='%') {
 		fputc(*f,file); f+=2; 
 		goto skiptext;
 	}
 	/* this has to be avoided when const arguments should be allowed *f=0; */
 	/*use the rest as a format for numbers*/
 	if (modulo) nodim=0;
 	if (nodim==2) fprintf(file,"%d %d\n",r,c);
 	if (nodim==1) fprintf(file,"%d\n",c);
 	if (modulo) {
 		int n1, n2, l, m;
 		char ff[32];
 		/* prepare integer format for column numbering */
 		if (sscanf(f+1,"%d",&l) != 1) l=128/modulo;
 		l -= 2;
 		m = l/2;
 		l = l-m;
 		sprintf(ff,"%%%ds%%3d%%%ds", l, m);
 		n1 = 1;
 		while(n1 <= c) {
 			n2=n1+modulo-1;
 			if (n2 > c) n2 = c;
 			/*write block between columns n1 and n2 */
 			fprintf(file,"\n    ");
 			for (i=n1; i<=n2; i++) fprintf(file,ff," ",i," ");
 			fprintf(file,"\n\n");
 			for (i=1; i<=r; i++) {
 				fprintf(file, "%3d ", i);
 				for (j=n1; j<=n2; j++) {
 					if(issym) {
 						int ii,jj;
 						if (i >= j) {
 							ii=i; 
 							jj=j;
 						} else {
 							ii=j; 
 							jj=i;
 						}
 						fprintf(file, f, ((complex<double>)a[ii*(ii+1)/2+jj]).real(), ((complex<double>)a[ii*(ii+1)/2+jj]).imag());
 					} else fprintf(file, f, ((complex<double>)a[(i-1)*c+j-1]).real(), ((complex<double>)a[(i-1)*c+j-1]).imag());
 					if (j < n2) fputc(' ',file);
 				}
 				fprintf(file, "\n");
 			}
 			n1 = n2+1;
 		}
 	} else {
 		for (i=1; i<=r; i++) {
 			for (j=1; j<=c; j++) {
 				if (issym) {
 					int ii,jj;
 					if (i >= j) {
 						ii=i; 
 						jj=j;
 					} else {
 						ii=j; 
 						jj=i;
 					}
 					fprintf(file, f, ((complex<double>)a[ii*(ii+1)/2+jj]).real(), ((complex<double>)a[ii*(ii+1)/2+jj]).imag());
 				} else fprintf(file,f,((complex<double>)a[(i-1)*c+j-1]).real(), ((complex<double>)a[(i-1)*c+j-1]).imag());
 				putc(j<c?' ':'\n',file);
 			}
 		}
 	}
 }
 // LA errorr handler
 void laerror(const char *s1, const char *s2, const char *s3, const char *s4)
 {
  std::cerr << "LA:ERROR - ";
  if(!s1)
    std::cerr << "udefined.";
  else {
    if(s1) std::cerr << s1;
    if(s2) std::cerr << s2;
    if(s3) std::cerr << s3;
    if(s4) std::cerr << s4;
  }
  std::cerr << endl;
  exit(1);
 }
 //////////////////////
 // LAPACK interface //
 //////////////////////
 // A will be overwritten, B will contain the solutions, A is nxn, B is rhs x n
 void linear_solve(NRMat<double> &A, NRMat<double> *B, double *det)
 {
 	int r, *ipiv;
 	if (A.nrows() != A.ncols()) laerror("linear_solve() call for non-square matrix");
 	if (B && A.nrows() != B->ncols()) laerror("incompatible matrices in linear_solve()");
 	A.copyonwrite();
 	if (B) B->copyonwrite();
 	ipiv = new int[A.nrows()];
 	r = clapack_dgesv(CblasRowMajor, A.nrows(), B ? B->nrows() : 0, A[0], A.ncols(),
 			ipiv, B ? B[0] : (double *)0, B ? B->ncols() : A.nrows());
 	if (r < 0) {
 		delete[] ipiv;
 		laerror("illegal argument in lapack_gesv");
 	}
 	if (det && r>=0) {
 		*det = A[0][0];
 		for (int i=1; i<A.nrows(); ++i) *det *= A[i][i];
 		//change sign of det by parity of ipiv permutation
 		for (int i=0; i<A.nrows(); ++i) *det = -(*det);
 	}
 	delete [] ipiv;
 	if (r>0 && B) laerror("singular matrix in lapack_gesv");
 }
 // Next routines are not available in clapack, fotran ones will b used with an
 // additional swap/transpose of outputs when needed
 extern "C" void FORNAME(dspsv)(const char *UPLO, const int *N, const int *NRHS,
 		double *AP, int *IPIV, double *B, const int *LDB, int *INFO);
 void linear_solve(NRSMat<double> &a, NRMat<double> *b, double *det)
 {
 	int r, *ipiv;
 	if (det) cerr << "@@@ sign of the determinant not implemented correctly yet\n";
 	if (b && a.nrows() != b->ncols())
 		laerror("incompatible matrices in symmetric linear_solve()");
 	a.copyonwrite();
 	if (b) b->copyonwrite();
 	ipiv = new int[a.nrows()];
 	char U = 'U';
 	int n = a.nrows();
 	int nrhs = 0;
 	if (b) nrhs = b->nrows();
 	int ldb = b ? b->ncols() : a.nrows();
 	FORNAME(dspsv)(&U, &n, &nrhs, a, ipiv, b?(*b)[0]:0, &ldb,&r);
 	if (r < 0) {
 		delete[] ipiv;
 		laerror("illegal argument in spsv() call of linear_solve()");
 	}
 	if (det && r >= 0) {
 		*det = a(0,0);
 		for (int i=1; i<a.nrows(); i++) *det *= a(i,i);
 		for (int i=0; i<a.nrows(); i++)
 			if (ipiv[i] != i) *det = -(*det);
 	}
 	delete[] ipiv;
 	if (r > 0 && b) laerror("singular matrix in linear_solve(SMat&, Mat*, double*");
 }
 extern "C" void FORNAME(dsyev)(const char *JOBZ, const char *UPLO, const int *N,
 		double *A, const int *LDA, double *W, double *WORK, const int *LWORK, int *INFO);
 // a will contain eigenvectors, w eigenvalues
 void diagonalize(NRMat<double> &a, NRVec<double> &w, const bool eivec, 
 		const bool corder)
 {
 	int n = a.nrows();
 	if (n != a.ncols()) laerror("diagonalize() call with non-square matrix");
 	if (a.nrows() != w.size()) 
 		laerror("inconsistent dimension of eigenvalue vector in diagonalize()");
 	a.copyonwrite();
 	w.copyonwrite();
 	int r = 0;
 	char U ='U';
 	char vectors = 'V';
 	if (!eivec) vectors = 'N';
 	int LWORK = -1;
 	double WORKX;
 	// First call is to determine size of workspace
 	FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, (double *)&WORKX, &LWORK, &r );
 	LWORK = (int)WORKX;
 	double *WORK = new double[LWORK];
 	FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, WORK, &LWORK, &r );
 	delete[] WORK;
 	if (vectors == 'V' && corder) a.transposeme();
 	if (r < 0) laerror("illegal argument in syev() of diagonalize()");
 	if (r > 0) laerror("convergence problem in syev() of diagonalize()");
 }
 extern "C" void FORNAME(dspev)(const char *JOBZ, const char *UPLO, const int *N,
 		double *AP, double *W, double *Z, const int *LDZ, double *WORK, int *INFO);
 // v will contain eigenvectors, w eigenvalues
 void diagonalize(NRSMat<double> &a, NRVec<double> &w, NRMat<double> *v,
 		const bool corder)
 {
 	int n = a.nrows();
 	if (v) if (v->nrows() != v ->ncols() || n != v->nrows())
 		laerror("diagonalize() call with inconsistent dimensions");
 	if (n != w.size()) laerror("inconsistent dimension of eigenvalue vector");
 	a.copyonwrite();
 	w.copyonwrite();
 	int r = 0;
 	char U = 'U';
 	char job = v ? 'v' : 'n';
 	double *WORK = new double[3*n];
 	FORNAME(dspev)(&job, &U, &n, a, w, v?(*v)[0]:(double *)0, &n, WORK,  &r );
 	delete[] WORK;
 	if (v && corder) v->transposeme();
 	if (r < 0) laerror("illegal argument in spev() of diagonalize()");
 	if (r > 0) laerror("convergence problem in spev() of diagonalize()");
 }
 extern "C" void FORNAME(dgesvd)(const char *JOBU,  const char *JOBVT,  const int *M,
 		const int *N,  double *A, const int *LDA, double *S, double *U, const int *LDU,
 		double *VT, const int *LDVT, double *WORK, const int *LWORK, int *INFO );
 void singular_decomposition(NRMat<double> &a, NRMat<double> *u, NRVec<double> &s,
 		NRMat<double> *v, const bool corder)
 {
 	int m = a.nrows();
 	int n = a.ncols();
 	if (u) if (m != u->nrows() || m!= u->ncols())
 		laerror("inconsistent dimension of U Mat in singular_decomposition()");
 	if (s.size() < m && s.size() < n) 
 		laerror("inconsistent dimension of S Vec in singular_decomposition()");
 	if (v) if (n != v->nrows() || n != v->ncols())
 		laerror("inconsistent dimension of V Mat in singular_decomposition()");
 	a.copyonwrite();
 	s.copyonwrite();
 	if (u) u->copyonwrite();
 	if (v) v->copyonwrite();
 	// C-order (transposed) input and swap u,v matrices,
 	// v should be transposed at the end
 	char jobu = u ? 'A' : 'N';
 	char jobv = v ? 'A' : 'N';
 	double work0;
 	int lwork = -1;
 	int r;
 	FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n,
 			u?(*u)[0]:0, &m, &work0, &lwork, &r);
 	lwork = (int) work0;
 	double *work = new double[lwork];
 	FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n,
 			u?(*u)[0]:0, &m, &work0, &lwork, &r);
 	delete[] work;
 	if (v && corder) v->transposeme();
 	if (r < 0) laerror("illegal argument in gesvd() of singular_decomposition()");
 	if (r > 0) laerror("convergence problem in gesvd() of ingular_decomposition()");
 }
 extern "C" void FORNAME(dgeev)(const char *JOBVL, const char *JOBVR, const int *N,
 		double *A, const int *LDA, double *WR, double *WI, double *VL, const int *LDVL,
 		double *VR, const int *LDVR, double *WORK, const int *LWORK, int *INFO );
 void gdiagonalize(NRMat<double> &a, NRVec<double> &wr, NRVec<double> &wi,
 		NRMat<double> *vl, NRMat<double> *vr, const bool corder)
 {
 	int n = a.nrows();
 	if (n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix");
 	if (n != wr.size()) 
 		laerror("inconsistent dimension of eigen vector in gdiagonalize()");
 	if (vl) if (n != vl->nrows() || n != vl->ncols())
 		laerror("inconsistent dimension of vl in gdiagonalize()");
 	if (vr) if (n != vr->nrows() || n != vr->ncols())
 		laerror("inconsistent dimension of vr in gdiagonalize()");
 	a.copyonwrite();
 	wr.copyonwrite();
 	wi.copyonwrite();
 	if (vl) vl->copyonwrite();
 	if (vr) vr->copyonwrite();
 	char jobvl = vl ? 'V' : 'N';
 	char jobvr = vr ? 'V' : 'N';
 	double work0;
 	int lwork = -1;
 	int r;
 	FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0,
 			&n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r);
 	lwork = (int) work0;
 	double *work = new double[lwork];
 	FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0,
 			&n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r);
 	delete[] work;
 	if (corder) {
 		if (vl) vl->transposeme();
 		if (vr) vr->transposeme();
 	}
 	if (r < 0) laerror("illegal argument in geev() of gdiagonalize()");
 	if (r > 0) laerror("convergence problem in geev() of gdiagonalize()");
 }
 void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
 		NRMat< complex<double> >*vl, NRMat< complex<double> > *vr)
 {
 	int n = a.nrows();
 	if(n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix");
 	NRVec<double> wr(n), wi(n);
 	NRMat<double> *rvl = 0;
 	NRMat<double> *rvr = 0;
 	if (vl) rvl = new NRMat<double>(n, n);
 	if (vr) rvr = new NRMat<double>(n, n);
 	gdiagonalize(a, wr, wi, rvl, rvr, 0);
 	//process the results into complex matrices
 	int i;
 	for (i=0; i<n; i++) w[i] = complex<double>(wr[i], wi[i]);
 	if (rvl || rvr) {
 		i = 0;
 		while (i < n) {
 			if (wi[i] == 0) {
 				if (vl) for (int j=0; j<n; j++) (*vl)[i][j] = (*rvl)[i][j];
 				if (vr) for (int j=0; j<n; j++) (*vr)[i][j] = (*rvr)[i][j];
 				i++;
 			} else {
 				if (vl)
 					for (int j=0; j<n; j++) {
 						(*vl)[i][j] = complex<double>((*rvl)[i][j], (*rvl)[i+1][j]);
 						(*vl)[i+1][j] = complex<double>((*rvl)[i][j], -(*rvl)[i+1][j]);
 					} 
 				if (vr)
 					for (int j=0; j<n; j++) {
 						(*vr)[i][j] = complex<double>((*rvr)[i][j], (*rvr)[i+1][j]);
 						(*vr)[i+1][j] = complex<double>((*rvr)[i][j], -(*rvr)[i+1][j]);
 					}
 				i += 2;
 			}
 		}
 	}
 	if (rvl) delete rvl;
 	if (rvr) delete rvr;
 }
 const NRMat<double> realpart(const NRMat< complex<double> > &a)
 {
 	NRMat<double> result(a.nrows(), a.ncols());
 	cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0], 2, result, 1);
 	return result;
 }
 const NRMat<double> imagpart(const NRMat< complex<double> > &a)
 {
 	NRMat<double> result(a.nrows(), a.ncols());
 	cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0]+1, 2, result, 1);
 	return result;
 }
 const NRMat< complex<double> > realmatrix (const NRMat<double> &a)
 {
 	NRMat <complex<double> > result(a.nrows(), a.ncols());
 	cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0], 2);
 	return result;
 }
 const NRMat< complex<double> > imagmatrix (const NRMat<double> &a)
 {
 	NRMat< complex<double> > result(a.nrows(), a.ncols());
 	cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0]+1, 2);
 	return result;
 }
 NRMat<double> matrixfunction(NRMat<double> a, complex<double>
 		(*f)(const complex<double> &), const bool adjust)
 {
 	int n = a.nrows();
 	NRMat< complex<double> > u(n, n), v(n, n);
 	NRVec< complex<double> > w(n);
 	gdiagonalize(a, w, &u, &v);
 	NRVec< complex<double> > z = diagofproduct(u, v, 1, 1);
 	for (int i=0; i<a.nrows(); i++) w[i] = (*f)(w[i]/z[i]);
 	u.diagmultl(w);
 	NRMat< complex<double> > r(n, n);
 	r.gemm(0.0, v, 'c', u, 'n', 1.0);
 	double inorm = cblas_dnrm2(n*n, (double *)r[0]+1, 2);
 	if (inorm > 1e-10) {
 		cout << "norm = " << inorm << endl;
 		laerror("nonzero norm of imaginary part of real matrixfunction");
 	}
 	return realpart(r);
 }
 NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double))
 {
 	int n = a.nrows();
 	NRVec<double> w(n);
 	NRMat<double> v(n, n);
 	diagonalize(a, w, &v, 0);
 	for (int i=0; i<a.nrows(); i++) w[i] = (*f)(w[i]);
 	NRMat<double> u = v;
 	v.diagmultl(w);
 	NRMat<double> r(n, n);
 	r.gemm(0.0, u, 't', v, 'n', 1.0);
 	return r;
 }
 // instantize template to an addresable function
 complex<double> myclog (const complex<double> &x) 
 {
 	return log(x);
 }
 NRMat<double>  log(const NRMat<double> &a)
 {
 	return matrixfunction(a, &myclog, 1);
 }
 const NRVec<double> diagofproduct(const NRMat<double> &a, const NRMat<double> &b,
 		bool trb, bool conjb)
 {
 	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
 				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
 			laerror("incompatible Mats in diagofproduct<double>()");
 	NRVec<double> result(a.nrows());
 	if (trb)
 		for(int i=0; i<a.nrows(); i++)
 			result[i] = cblas_ddot(a.ncols(), a[i], 1, b[i], 1);
 	else
 		for(int i=0; i<a.nrows(); i++)
 			result[i] = cblas_ddot(a.ncols(), a[i], 1, b[0]+i, b.ncols());
 	return result;
 }
 const NRVec< complex<double> > diagofproduct(const NRMat< complex<double> > &a,
 		const NRMat< complex<double> > &b, bool trb, bool conjb)
 {
 	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
 				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
 			laerror("incompatible Mats in diagofproduct<complex>()");
 	NRVec< complex<double> > result(a.nrows());
 	if (trb) {
 		if (conjb) {
 			for(int i=0; i<a.nrows(); i++)
 				cblas_zdotc_sub(a.ncols(), b[i], 1, a[i], 1, &result[i]);
 		} else {
 			for(int i=0; i<a.nrows(); i++)
 				cblas_zdotu_sub(a.ncols(), b[i], 1, a[i], 1, &result[i]);
 		}
 	} else {
 		if (conjb) {
 			for(int i=0; i<a.nrows(); i++)
 				cblas_zdotc_sub(a.ncols(), b[0]+i, b.ncols(), a[i], 1, &result[i]);
 		} else {
 			for(int i=0; i<a.nrows(); i++)
 				cblas_zdotu_sub(a.ncols(), b[0]+i, b.ncols(), a[i], 1, &result[i]);
 		}
 	}
 	return result;
 }
 double trace2(const NRMat<double> &a, const NRMat<double> &b, bool trb)
 {
 	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
 				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
 			laerror("incompatible Mats in diagofproduct<complex>()");
 	if (trb) return cblas_ddot(a.nrows()*a.ncols(), a, 1, b, 1);
 	double sum = 0.0;
 	for (int i=0; i<a.nrows(); i++)
 		sum += cblas_ddot(a.ncols(), a[i], 1, b[0]+i, b.ncols());
 	return sum;
 }
 double trace2(const NRSMat<double> &a, const NRSMat<double> &b,
 		const bool diagscaled)
 {
 	if (a.nrows() != b.nrows()) laerror("incompatible SMats in trace2()");
 	double r = 2.0*cblas_ddot(a.nrows()*(a.nrows()+1)/2, a, 1, b, 1);
 	if (diagscaled) return r;
 	for (int i=0; i<a.nrows(); i++) r -= a(i,i)*b(i,i);
 	return r;
 }
--- a/nonclass.h
+++ b/nonclass.h
@ -0,0 +1,85 @@
 #include "vec.h"
 #include "smat.h"
 #include "mat.h"
 //MISC
 template <class T> extern const NRMat<T> diagonalmatrix(const NRVec<T> &x);
 template <class T> extern const NRVec<T> lineof(const NRMat<T> &x, const int i); 
 template <class T> extern const NRVec<T> columnof(const NRMat<T> &x, const int i);
 template <class T> extern const NRVec<T> diagonalof(const NRMat<T> &x); 
 //more efficient commutator for a special case of full matrices
 template<class T>
 inline const NRMat<T> commutator ( const NRMat<T> &x, const NRMat<T> &y, const bool trx=0, const bool tryy=0)
 {
 NRMat<T> r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols());
 r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
 r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1);
 return r;
 }
 //more efficient commutator for a special case of full matrices
 template<class T>
 inline const NRMat<T> anticommutator ( const NRMat<T> &x, const NRMat<T> &y, const bool trx=0, const bool tryy=0)
 {
 NRMat<T> r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols());
 r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
 r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1);
 return r;
 }
 //////////////////////
 // LAPACK interface //
 //////////////////////
 #define declare_la(T) \
 extern const  NRVec<T> diagofproduct(const NRMat<T> &a, const NRMat<T> &b,\
 		bool trb=0, bool conjb=0); \
 extern T trace2(const NRMat<T> &a, const NRMat<T> &b, bool trb=0); \
 extern T trace2(const NRSMat<T> &a, const NRSMat<T> &b, const bool diagscaled=0);\
 extern void linear_solve(NRMat<T> &a, NRMat<T> *b, double *det=0); \
 extern void linear_solve(NRSMat<T> &a, NRMat<T> *b, double *det=0); \
 extern void diagonalize(NRMat<T> &a, NRVec<T> &w, const bool eivec=1,\
 		const bool corder=1); \
 extern void diagonalize(NRSMat<T> &a, NRVec<T> &w, NRMat<T> *v, const bool corder=1);\
 extern void singular_decomposition(NRMat<T> &a, NRMat<T> *u, NRVec<T> &s,\
 		NRMat<T> *v, const bool corder=1);
 declare_la(double)
 declare_la(complex<double>)
 // Separate declarations
 extern void gdiagonalize(NRMat<double> &a, NRVec<double> &wr, NRVec<double> &wi,
 		NRMat<double> *vl, NRMat<double> *vr, const bool corder=1);
 extern void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
 		 NRMat< complex<double> >*vl, NRMat< complex<double> > *vr);
 extern NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double));
 extern NRMat<double> matrixfunction(NRMat<double> a, complex<double> (*f)(const complex<double> &),const bool adjust=0);
 //functions on matrices
 inline NRMat<double>  sqrt(const NRSMat<double> &a) { return matrixfunction(a,&sqrt); }
 inline NRMat<double>  log(const NRSMat<double> &a) { return matrixfunction(a,&log); }
 extern NRMat<double> log(const NRMat<double> &a);
 extern const NRMat<double> realpart(const NRMat< complex<double> >&);
 extern const NRMat<double> imagpart(const NRMat< complex<double> >&);
 extern const NRMat< complex<double> > realmatrix (const NRMat<double>&);
 extern const NRMat< complex<double> > imagmatrix (const NRMat<double>&);
 //inverse by means of linear solve, preserving rhs intact
 template<typename T>
 const NRMat<T> inverse(NRMat<T> a, T *det=0)
 {
 #ifdef DEBUG
 	if(a.nrows()!=a.ncols()) laerror("inverse() for non-square matrix");
 #endif
 	NRMat<T> result(a.nrows(),a.nrows());
 	result = (T)1.;
 	linear_solve(a, &result, det);
 	return result;
 }
--- a/smat.cc
+++ b/smat.cc
@ -0,0 +1,399 @@
 #include "smat.h"
 // TODO
 // specialize unary minus
 //////////////////////////////////////////////////////////////////////////////
 ////// forced instantization in the corresponding object file
 template NRSMat<double>;
 template NRSMat< complex<double> >;
 /*
 *  * Templates first, specializations for BLAS next
 *
 */
 // conversion ctor, symmetrize general Mat into SMat
 template <typename T>
 NRSMat<T>::NRSMat(const NRMat<T> &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat");
 #endif
 	count = new int;
 	*count = 1;
 	v = new T[NN2];
 	int i, j, k=0;
 	for (i=0; i<nn; i++)
 		for (j=0; j<=i;j++) v[k++] = 0.5 * (rhs[i][j] + rhs[j][i]);
 }
 // dtor
 template <typename T>
 NRSMat<T>::~NRSMat()
 {
 	if (!count) return;
 	if (--(*count) <= 0) {
 		if (v) delete[] (v);
 		delete count;
 	}
 }
 // assignment with a physical copy
 template <typename T>
 NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs)
 {
 	if (this != &rhs) {
 		if(!rhs.v) laerror("unallocated rhs in NRSMat operator |=");
 		if(count)
 			if(*count > 1) {	// detach from the other
 				--(*count);
 				nn = 0;
 				count = 0;
 				v = 0;
 			}
 		if (nn != rhs.nn) {
 			if(v) delete [] (v);
 			nn = rhs.nn;
 		}
 		if (!v) v = new T[NN2];
 		if (!count) count = new int;
 		*count = 1;
 		memcpy(v, rhs.v, NN2*sizeof(T));
 	}
 	return *this;
 }
 // assignment
 template <typename T>
 NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs)
 {
 	if (this == & rhs) return *this;
 	if (count)
 		if(--(*count) == 0) {
 			delete [] v;
 			delete count;
 		}
 	v = rhs.v;
 	nn = rhs.nn;
 	count = rhs.count;
 	if (count) (*count)++;
 	return *this;
 }
 // assing to diagonal
 template <typename T>
 NRSMat<T> & NRSMat<T>::operator=(const T &a)
 {
 	copyonwrite();
 	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] = a;
 	return *this;
 }
 // unary minus
 template <typename T>
 const NRSMat<T> NRSMat<T>::operator-() const
 {
 	NRSMat<T> result(nn);
 	for(int i=0; i<NN2; i++) result.v[i]= -v[i];
 	return result;
 }
 // trace of Smat
 template <typename T>
 const T NRSMat<T>::trace() const
 {
 	T tmp = 0;
 	for (int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
 	return tmp;
 }
 // make new instation of the Smat, deep copy
 template <typename T>
 void NRSMat<T>::copyonwrite()
 {
 #ifdef DEBUG
 	if (!count) laerror("probably an assignment to undefined Smat");
 #endif
 	if (*count > 1) {
 		(*count)--;
 		count = new int;
 		*count = 1;
 		T *newv = new T[NN2];
 		memcpy(newv, v, NN2*sizeof(T));
 		v = newv;
 	}
 }
 // resize Smat
 template <typename T>
 void NRSMat<T>::resize(const int n)
 {
 #ifdef DEBUG
 	if (n <= 0) laerror("illegal matrix dimension in resize of Smat");
 #endif
 	if (count)
 		if(*count > 1) {	//detach from previous
 			(*count)--;
 			count = 0;
 			v = 0;
 			nn = 0;
 		}
 	if (!count)	{				//new uninitialized vector or just detached
 		count = new int;
 		*count = 1;
 		nn = n;
 		v = new T[NN2];
 		return;
 	}
 	if (n != nn) {
 		nn = n;
 		delete[] v;
 		v = new T[NN2];
 	}
 }
 // write matrix to the file with specific format
 template <typename T>
 void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
 {
 	lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
 }
 // read matrix from the file with specific format
 template <class T>
 void NRSMat<T>::fscanf(FILE *f, const char *format)
 {
 	int n, m;
 	if (std::fscanf(f,"%d %d",&n,&m) != 2)
 		laerror("cannot read matrix dimensions in SMat::fscanf");
 	if (n != m) laerror("different dimensions of SMat");
 	resize(n);
 	for (int i=0; i<n; i++) 
 		for (int j=0; j<n; j++)
 			if (std::fscanf(f,format,&((*this)(i,j))) != 1)
 				laerror("Smat - cannot read matrix element");
 }
 /*
 * BLAS specializations for double and complex<double>
 */
 // SMat * Mat
 const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
 #endif
 	NRMat<double> result(nn, rhs.ncols());
 	for (int k=0; k<rhs.ncols(); k++)
 		cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0]+k, rhs.ncols(),
 				0.0, result[0]+k, rhs.ncols());
 	return result;
 }
 const NRMat< complex<double> >
 NRSMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
 #endif
 	NRMat< complex<double> > result(nn, rhs.ncols());
 	for (int k=0; k<rhs.ncols(); k++)
 		cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(),
 				&CZERO, result[0]+k, rhs.ncols());
 	return result;
 }
 // SMat * SMat
 const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
 #endif
 	NRMat<double> result(0.0, nn, nn);
 	double *p, *q;
 	p = v;
 	for (int i=0; i<nn;i++) {
 		q = rhs.v;
 		for (int k=0; k<=i; k++) {
 			cblas_daxpy(k+1, *p++, q, 1, result[i], 1);
 			q += k+1;
 		}
 	}
 	p = v;
 	for (int i=0; i<nn;i++) {
 		q = rhs.v+1;
 		for (int j=1; j<nn; j++) {
 			result[i][j] += cblas_ddot(i+1<j ? i+1 : j, p, 1, q, 1);
 			q += j+1;
 		}
 		p += i+1;
 	}
 	p = v; 
 	q = rhs.v;
 	for (int i=0; i<nn; i++) {
 		cblas_dger(CblasRowMajor, i, i+1, 1., p, 1, q, 1, result, nn);
 		p += i+1;
 		q += i+1;
 	}
 	q = rhs.v+3;
 	for (int j=2; j<nn; j++) {
 		p = v+1;
 		for (int i=1; i<j; i++) {
 			cblas_daxpy(i, *++q, p, 1, result[0]+j, nn);
 			p += i+1;
 		}
 		q += 2;
 	}
 	return result;
 }
 const NRMat< complex<double> > 
 NRSMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
 #endif
 	NRMat< complex<double> > result(0.0, nn, nn);
 	NRMat< complex<double> > rhsmat(rhs);
 	result = *this * rhsmat;
 	return result;
 //	laerror("complex SMat*Smat not implemented");
 }
 // S dot S
 const double NRSMat<double>::dot(const NRSMat<double> &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("dot of incompatible SMat's");
 #endif
 	return cblas_ddot(NN2, v, 1, rhs.v, 1);
 }
 const complex<double> 
 NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("dot of incompatible SMat's");
 #endif
 	complex<double> dot;
 	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
 	return dot;
 }
 // x = S * x
 const NRVec<double> NRSMat<double>::operator*(const NRVec<double> &rhs) const
 {
 #ifdef DEBUG
 	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
 #endif
 	NRVec<double> result(nn);
 	cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs, 1, 0.0, result, 1);
 	return result;
 }
 const NRVec< complex<double> >
 NRSMat< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
 #endif
 	NRVec< complex<double> > result(nn);
 	cblas_zhpmv(CblasRowMajor, CblasLower, nn, (void *)(&CONE), (void *)v, 
 			(const void *)rhs, 1, (void *)(&CZERO), (void *)result, 1);
 	return result;
 }
 // norm of the matrix
 const double  NRSMat<double>::norm(const double scalar) const
 {
 	if (!scalar) return cblas_dnrm2(NN2, v, 1);
 	double sum = 0;
 	int k = 0;
 	for (int i=0; i<nn; ++i)
 		for (int j=0; j<=i; ++j) {
 			register double tmp;
 			tmp = v[k++];
 			if (i == j) tmp -= scalar;
 			sum += tmp*tmp;
 		}
 	return sqrt(sum);
 }
 const double
 NRSMat< complex<double> >::norm(const complex<double> scalar) const
 {
 	if (!(scalar.real()) && !(scalar.imag()))
 		return cblas_dznrm2(NN2, (void *)v, 1);
 	double sum = 0;
 	complex<double> tmp;
 	int k = 0;
 	for (int i=0; i<nn; ++i)
 		for (int j=0; j<=i; ++j) {
 			tmp = v[k++];
 			if (i == j) tmp -= scalar;
 			sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
 		}
 	return sqrt(sum);
 }
 // axpy: S = S * a
 void NRSMat<double>::axpy(const double alpha, const NRSMat<double> & x)
 {
 #ifdef DEBUG
 	if (nn != x.nn) laerror("axpy of incompatible SMats");
 #endif
 	copyonwrite();
 	cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
 }
 void NRSMat< complex<double> >::axpy(const complex<double> alpha,
 			const NRSMat< complex<double> > & x)
 {
 #ifdef DEBUG
 	if (nn != x.nn) laerror("axpy of incompatible SMats");
 #endif
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&alpha), (void *)x.v, 1, (void *)v, 1);
 }
 export template <class T>
 ostream& operator<<(ostream &s, const NRSMat<T> &x)
                {
                int i,j,n;
                n=x.nrows();
                s << n << ' ' << n << '\n';
                for(i=0;i<n;i++)
                        {
                        for(j=0; j<n;j++) s << x(i,j) << (j==n-1 ? '\n' : ' ');
                        }
                return s;
                }
 export template <class T>
 istream& operator>>(istream  &s, NRSMat<T> &x)
                {
                int i,j,n,m;
                s >> n >> m;
                if(n!=m) laerror("input symmetric matrix not square");
                x.resize(n);
                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x(i,j);
                return s;
                }
 //////////////////////////////////////////////////////////////////////////////
 //// forced instantization in the corespoding object file
 #define INSTANTIZE(T) \
 template ostream & operator<<(ostream &s, const NRSMat< T > &x); \
 template istream & operator>>(istream  &s, NRSMat< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
--- a/smat.h
+++ b/smat.h
@ -0,0 +1,303 @@
 #ifndef _LA_SMAT_H_
 #define _LA_SMAT_H_
 #include "vec.h"
 #include "mat.h"
 #define NN2 (nn*(nn+1)/2)
 template <class T>
 class NRSMat { // symmetric or complex hermitean matrix in packed form
 protected:
 	int nn;
 	T *v;
 	int *count;
 public:
 	friend class NRVec<T>;
 	friend class NRMat<T>;
 	inline NRSMat<T>::NRSMat() : nn(0),v(0),count(0) {};
 	inline explicit NRSMat(const int n);			// Zero-based array
 	inline NRSMat(const T &a, const int n);	//Initialize to constant
 	inline NRSMat(const T *a, const int n);	// Initialize to array
 	inline NRSMat(const NRSMat &rhs);		// Copy constructor
 	explicit NRSMat(const NRMat<T> &rhs);		// symmetric part of general matrix
 	explicit NRSMat(const NRVec<T> &rhs, const int n); //construct matrix from vector
 	NRSMat & operator|=(const NRSMat &rhs);	//assignment to a new copy
 	NRSMat & operator=(const NRSMat &rhs);	//assignment
 	NRSMat & operator=(const T &a);		//assign a to diagonal
 	inline NRSMat & operator*=(const T &a);
 	inline NRSMat & operator+=(const T &a); 
 	inline NRSMat & operator-=(const T &a); 
 	inline NRSMat & operator+=(const NRSMat &rhs); 
 	inline NRSMat & operator-=(const NRSMat &rhs); 
 	const NRSMat operator-() const; //unary minus
 	inline int getcount() const {return count?*count:0;}
 	inline const NRSMat operator*(const T &a) const;
 	inline const NRSMat operator+(const T &a) const;
 	inline const NRSMat operator-(const T &a) const;
 	inline const NRSMat operator+(const NRSMat &rhs) const; 
 	inline const NRSMat operator-(const NRSMat &rhs) const;
 	inline const NRMat<T> operator+(const NRMat<T> &rhs) const; 
 	inline const NRMat<T> operator-(const NRMat<T> &rhs) const; 
 	const NRMat<T> operator*(const NRSMat &rhs) const; // SMat*SMat
 	const NRMat<T> operator*(const NRMat<T> &rhs) const; // SMat*Mat 
 	const T dot(const NRSMat &rhs) const; // Smat.Smat
 	const NRVec<T> operator*(const NRVec<T> &rhs) const; 
 	inline const T& operator[](const int ij) const;
 	inline T& operator[](const int ij);
 	inline const T& operator()(const int i, const int j) const;
 	inline T& operator()(const int i, const int j);
 	inline int nrows() const;
 	inline int ncols() const;
 	const double norm(const T scalar=(T)0) const;
 	void axpy(const T alpha, const NRSMat &x); // this+= a*x
 	inline const T amax() const;
 	const T trace() const;
 	void copyonwrite();
 	void resize(const int n);
 	inline operator T*(); //get a pointer to the data
 	inline operator const T*() const; //get a pointer to the data
 	~NRSMat();
 	void fprintf(FILE *f, const char *format, const int modulo) const; 
 	void fscanf(FILE *f, const char *format); 
 //members concerning sparse matrix
 	explicit NRSMat(const SparseMat<T> &rhs);               // dense from sparse
 	inline void simplify() {}; //just for compatibility with sparse ones
 };
 // INLINES
 // ctors
 template <typename T>
 inline NRSMat<T>::NRSMat(const int n) : nn(n), v(new T[NN2]),
 				count(new int) {*count = 1;}
 template <typename T>
 inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n),
 	        v(new T[NN2]), count(new int)
 {
 	*count =1;
 	if(a != (T)0) for(int i=0; i<NN2; i++) v[i] = a;
 }
 template <typename T>
 inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n),
 	        v(new T[NN2]), count(new int)
 {
 	*count = 1;
 	memcpy(v, a, NN2*sizeof(T));
 }
 template <typename T>
 inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) //copy constructor
 {
 	v = rhs.v;
 	nn = rhs.nn;
 	count = rhs.count;
 	if (count) (*count)++;
 }
 template <typename T>
 NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) // type conversion
 {
 	nn = n;
 #ifdef DEBUG
 	if (NN2 != rhs.size())
 		laerror("matrix dimensions incompatible with vector length");
 #endif
 	count = rhs.count;
 	v = rhs.v;
 	(*count)++;
 }
 // S *= a
 inline NRSMat<double> & NRSMat<double>::operator*=(const double & a)
 {
 	copyonwrite();
 	cblas_dscal(NN2, a, v, 1);
 	return *this;
 }
 inline NRSMat< complex<double> > &
 NRSMat< complex<double> >::operator*=(const complex<double> & a)
 {
 	copyonwrite();
 	cblas_zscal(nn, (void *)(&a), (void *)v, 1);
 	return *this;
 }
 // S += D
 template <typename T>
 inline NRSMat<T> & NRSMat<T>::operator+=(const T &a)
 {
 	copyonwrite();
 	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] += a;
 	return *this;
 }
 // S -= D
 template <typename T>
 inline NRSMat<T> & NRSMat<T>::operator-=(const T &a)
 {
 	copyonwrite();
 	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] -= a;
 	return *this;
 }
 // S += S
 inline NRSMat<double> &
 NRSMat<double>::operator+=(const NRSMat<double> & rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+=");
 #endif
 	copyonwrite();
 	cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
 	return *this;
 }
 NRSMat< complex<double> > &
 NRSMat< complex<double> >::operator+=(const NRSMat< complex<double> > & rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+=");
 #endif
 	copyonwrite();
 	cblas_zaxpy(NN2, (void *)(&CONE), (void *)(&rhs.v), 1, (void *)(&v), 1);
 	return *this;
 }
 // S -= S
 inline NRSMat<double> &
 NRSMat<double>::operator-=(const NRSMat<double> & rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-=");
 #endif
 	copyonwrite();
 	cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
 	return *this;
 }
 inline NRSMat< complex<double> > &
 NRSMat< complex<double> >::operator-=(const NRSMat< complex<double> > & rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-=");
 #endif
 	copyonwrite();
 	cblas_zaxpy(NN2, (void *)(&CMONE), (void *)(&rhs.v), 1, (void *)(&v), 1);
 	return *this;
 }
 // SMat + Mat
 template <typename T>
 inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const
 {
 	return NRMat<T>(rhs) += *this;
 }
 // SMat - Mat
 template <typename T>
 inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const
 {
 	return NRMat<T>(-rhs) += *this;
 }
 // access the element, linear array case
 template <typename T>
 inline T & NRSMat<T>::operator[](const int ij)
 {
 #ifdef DEBUG
 	if (*count != 1) laerror("lval [] with count > 1 in Smat");
 	if (ij<0 || ij>=NN2) laerror("SMat [] out of range");
 	if (!v) laerror("[] for unallocated Smat");
 #endif
 	return v[ij];
 }
 template <typename T>
 inline const T & NRSMat<T>::operator[](const int ij) const
 {
 #ifdef DEBUG
 	if (ij<0 || ij>=NN2) laerror("SMat [] out of range");
 	if (!v) laerror("[] for unallocated Smat");
 #endif
 	return v[ij];
 }
 // access the element, 2-dim array case
 template <typename T>
 inline T & NRSMat<T>::operator()(const int i, const int j)
 {
 #ifdef DEBUG
 	if (*count != 1) laerror("lval (i,j) with count > 1 in Smat");
 	if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range");
 	if (!v) laerror("(i,j) for unallocated Smat");
 #endif
 	return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i];
 }
 template <typename T>
 inline const T & NRSMat<T>::operator()(const int i, const int j) const
 {
 #ifdef DEBUG
 	if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range");
 	if (!v) laerror("(i,j) for unallocated Smat");
 #endif
 	return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i];
 }
 // return the number of rows and columns
 template <typename T>
 inline int NRSMat<T>::nrows() const
 {
 	return nn;
 }
 template <typename T>
 inline int NRSMat<T>::ncols() const
 {
 	return nn;
 }
 // max value
 inline const double NRSMat<double>::amax() const
 {
 	return v[cblas_idamax(NN2, v, 1)];
 }
 inline const complex<double> NRSMat< complex<double> >::amax() const
 {
 	return v[cblas_izamax(NN2, (void *)v, 1)];
 }
 // reference pointer to Smat
 template <typename T>
 inline NRSMat<T>:: operator T*()
 {
 #ifdef DEBUG
 	if (!v) laerror("unallocated SMat in operator T*");
 #endif
 	return v;
 }
 template <typename T>
 inline NRSMat<T>:: operator const T*() const
 {
 #ifdef DEBUG
 	if (!v) laerror("unallocated SMat in operator T*");
 #endif
 	return v;
 }
 // I/O
 template <typename T> extern ostream& operator<<(ostream &s, const NRSMat<T> &x);
 template <typename T> extern istream& operator>>(istream  &s, NRSMat<T> &x);
 // generate operators: SMat + a, a + SMat, SMat * a
 NRVECMAT_OPER(SMat,+)
 NRVECMAT_OPER(SMat,-)
 NRVECMAT_OPER(SMat,*)
 // generate SMat + SMat, SMat - SMat
 NRVECMAT_OPER2(SMat,+)
 NRVECMAT_OPER2(SMat,-)
 #endif /* _LA_SMAT_H_ */
--- a/sparsemat.cc
+++ b/sparsemat.cc
--- a/sparsemat.h
+++ b/sparsemat.h
@ -0,0 +1,220 @@
 //for vectors and dense matrices we shall need
 #include "la.h"
 template<class T>
 inline const T MAX(const T &a, const T &b)
        {return b > a ? (b) : (a);}
 template<class T>
 inline void SWAP(T &a, T &b)
        {T dum=a; a=b; b=dum;}
 //threshold for neglecting elements, if not defined, no tests are done except exact zero test in simplify - might be even faster
 //seems to perform better with a threshold, in spite of abs() tests
 #define  SPARSEEPSILON 1e-13 
 typedef unsigned int SPMatindex;
 typedef int SPMatindexdiff; //more clear would be to use traits
 //element of a linked list
 template<class T>
 struct matel
        {
        T elem;
        SPMatindex row;
        SPMatindex col;
        matel *next;
        };
 template <class T>
 class SparseMat {
 protected:
 	SPMatindex nn;
        SPMatindex mm;
 	bool symmetric;
 	unsigned int nonzero;
        int *count;
 	matel<T> *list;
 private:
 	matel<T> **rowsorted; //NULL terminated
 	matel<T> **colsorted; //NULL terminated
 	void unsort();
 	void deletelist();
 	void copylist(const matel<T> *l);
 public:
 	//iterator
        typedef class iterator {
        private:
                matel<T> *p;
        public:
                iterator() {};
                ~iterator() {};
                iterator(matel<T> *list): p(list) {};
                bool operator==(const iterator rhs) const {return p==rhs.p;}
                bool operator!=(const iterator rhs) const {return p!=rhs.p;}
                iterator operator++() {return p=p->next;}
                iterator operator++(int) {matel<T> *q=p; p=p->next; return q;}
                matel<T> & operator*() const {return *p;}
                matel<T> * operator->() const {return p;}
        };
        iterator begin() const {return list;}
        iterator end() const {return NULL;}
 	//constructors etc.
 	inline SparseMat() :nn(0),mm(0),symmetric(0),nonzero(0),count(NULL),list(NULL),rowsorted(NULL),colsorted(NULL) {};
 	inline SparseMat(const SPMatindex n, const SPMatindex m) :nn(n),mm(m),symmetric(0),nonzero(0),count(new int(1)),list(NULL),rowsorted(NULL),colsorted(NULL) {};
 	SparseMat(const SparseMat &rhs); //copy constructor
 	inline int getcount() const {return count?*count:0;}
 	explicit SparseMat(const NRMat<T> &rhs); //construct from a dense one
 	explicit SparseMat(const NRSMat<T> &rhs); //construct from a dense symmetric one
 	SparseMat & operator=(const SparseMat &rhs);
 	SparseMat & operator=(const T a);          //assign a to diagonal
    	SparseMat & operator+=(const T a);         //assign a to diagonal
 	SparseMat & operator-=(const T a);         //assign a to diagonal
        SparseMat & operator*=(const T a);         //multiply by a scalar
        SparseMat & operator+=(const SparseMat &rhs);
 	SparseMat & addtriangle(const SparseMat &rhs, const bool lower, const char sign);
        SparseMat & join(SparseMat &rhs); //more efficient +=, rhs will be emptied
        SparseMat & operator-=(const SparseMat &rhs);
 	inline const SparseMat operator+(const T &rhs) const {return SparseMat(*this) += rhs;}
        inline const SparseMat operator-(const T &rhs) const {return SparseMat(*this) -= rhs;}
        inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;}
        inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general
        inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general
 	const NRVec<T> multiplyvector(const NRVec<T> &rhs, const bool transp=0) const; //sparse matrix * dense vector optionally transposed
 	inline const NRVec<T> operator*(const NRVec<T> &rhs) const {return multiplyvector(rhs);} //sparse matrix * dense vector
 	const SparseMat operator*(const SparseMat &rhs) const; 
        void gemm(const T beta, const SparseMat &a, const char transa, const SparseMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this, if this is symemtric, only half will be added onto it
 	const T dot(const SparseMat &rhs) const; //supervector dot product
 	const T dot(const NRMat<T> &rhs) const; //supervector dot product
 	inline ~SparseMat();
 	void axpy(const T alpha, const SparseMat &x, const bool transp=0); // this+= a*x(transposed)
 	inline matel<T> *getlist() const {return list;}
 	void setlist(matel<T> *l) {list=l;}
 	inline SPMatindex nrows() const {return nn;}
        inline SPMatindex ncols() const {return mm;}
 	void resize(const SPMatindex n, const SPMatindex m);
 	void transposeme();
 	const SparseMat transpose() const;
 	inline void setsymmetric() {if(nn!=mm) laerror("non-square cannot be symmetric"); symmetric=1;}
 	inline void defineunsymmetric() {symmetric=0;} //just define and do nothing with it
 	void setunsymmetric();//unwind the matrix assuming it was indeed symmetric
 	inline bool issymmetric() const {return symmetric;}
 	unsigned int length() const;
 	void copyonwrite();
 	void simplify();
 	const T trace() const;
 	const T norm(const T scalar=(T)0) const; //is const only mathematically, not in internal implementation - we have to simplify first
 	unsigned int sort(int type) const;
 	inline void add(const SPMatindex n, const SPMatindex m, const T elem) {matel<T> *ltmp= new matel<T>; ltmp->next=list; list=ltmp; list->row=n; list->col=m; list->elem=elem;}
 	void addsafe(const SPMatindex n, const SPMatindex m, const T elem);
 };
 template <class T>
 	extern istream& operator>>(istream  &s, SparseMat<T> &x);
 template <class T>
 	extern ostream& operator<<(ostream &s, const SparseMat<T> &x);
 //destructor
 template <class T>
 SparseMat<T>::~SparseMat()
 {
 	unsort();
        if(!count) return;
        if(--(*count)<=0)
                {
 		deletelist();
                delete count;
                }
 }
 //copy constructor (sort arrays are not going to be copied)
 template <class T>
 SparseMat<T>::SparseMat(const SparseMat<T> &rhs)
 {
 #ifdef debug
 if(! &rhs) laerror("SparseMat copy constructor with NULL argument");
 #endif
        nn=rhs.nn;
        mm=rhs.mm;
 	symmetric=rhs.symmetric;
 	if(rhs.list&&!rhs.count) laerror("some inconsistency in SparseMat contructors or assignments");
        list=rhs.list;
        if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared
 	colsorted=rowsorted=NULL;
 	nonzero=0;
 }
 template <class T>
 const SparseMat<T> SparseMat<T>::transpose() const
 {
 if(list&&!count) laerror("some inconsistency in SparseMat transpose");
 SparseMat<T> result;
 result.nn=mm;
 result.mm=nn;
 result.symmetric=symmetric;
 if(result.symmetric) 
 	{
 	result.list=list;
        if(list) {result.count=count; (*result.count)++;} else result.count=new int(1); //make the matrix defined, but empty and not shared
 	}
 else //really transpose it
 	{
 	result.count=new int(1);
 	result.list=NULL;
 	matel<T> *l =list;
 	while(l)
 		{
 		result.add(l->col,l->row,l->elem);
 		l=l->next;
 		}
 	}
 result.colsorted=result.rowsorted=NULL;
 result.nonzero=0;
 return result;
 }
 template<class T>
 inline const SparseMat<T> commutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
 {
 SparseMat<T> r;
 r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
 r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1); //saves a temporary and simplifies the whole sum
 return r;
 }
 template<class T>
 inline const SparseMat<T> anticommutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
 {
 SparseMat<T> r;
 r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
 r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1); //saves a temporary and simplifies the whole sum
 return r;
 }
 //add sparse to dense
 template<class T>
 NRMat<T> & NRMat<T>::operator+=(const SparseMat<T> &rhs)
 {
 if(nn!=rhs.nrows()||mm!=rhs.ncols()) laerror("incompatible matrices in +=");
 matel<T> *l=rhs.getlist();
 bool sym=rhs.issymmetric();
 while(l)
        {
 #ifdef MATPTR
        v[l->row][l->col] +=l->elem;
        if(sym && l->row!=l->col) v[l->col][l->row] +=l->elem;
 #else
        v[l->row*mm+l->col] +=l->elem;
         if(sym && l->row!=l->col) v[l->col*mm+l->row] +=l->elem;
 #endif
        l=l->next;
        }
 }
--- a/sparsemat_traits.h
+++ b/sparsemat_traits.h
@ -0,0 +1,15 @@
 ////////////////////////////////////////////////////////////////////////////
 //traits classes
 #ifndef _SPARSEMAT_TRAITS_INCL
 #define _SPARSEMAT_TRAITS_INCL
 template<> struct NRMat_traits<SparseMat<double> > {
 typedef double elementtype;
 typedef SparseMat<double> producttype;
 static double norm (const SparseMat<double> &x) {return x.norm();}
 static void axpy (SparseMat<double>&s, const SparseMat<double> &x, const double c) {s.axpy(c,x);}
 };
 #endif
--- a/strassen.cc
+++ b/strassen.cc
@ -0,0 +1,31 @@
 #include "la.h"
 /*Strassen algorithm*/
 // called routine is fortran-compatible
 extern "C" void fmm(const char c_transa,const char c_transb,const int m,const int n,const int k,const double alpha,
                const double *a,const int lda,const double *b,const int ldb,const double beta,double *c,const int ldc,
                double *d_aux,int i_naux);
 extern "C" void strassen_cutoff(int c, int c1, int c2, int c3);
 void NRMat<double>::s_cutoff(const int c, const int c1, const int c2, const int c3) const
 { strassen_cutoff(c,c1,c2,c3);}
 void NRMat<double>::strassen(const double beta, const NRMat<double> &a, const char transa, const NRMat<double> &b, const char transb, const double alpha)
 {
 int l(transa=='n'?a.nn:a.mm);
 int k(transa=='n'?a.mm:a.nn);
 int kk(transb=='n'?b.nn:b.mm);
 int ll(transb=='n'?b.mm:b.nn);
 if(l!=nn|| ll!=mm||k!=kk) laerror("incompatible (or undefined size) matrices in strassen");
 copyonwrite();
 //swap transpositions and order of matrices
 fmm(transb,transa,mm,nn,k,alpha,b,b.mm, a, a.mm, beta,*this, mm,NULL,0);
 }
 //stub for f77 blas called from strassen routine
 extern "C" void xerbla_(const char *msg)
 {
 laerror(msg);
 }
--- a/t.cc
+++ b/t.cc
@ -0,0 +1,775 @@
 // g++ -D _GLIBCPP_NO_TEMPLATE_EXPORT -g testblas.cc testblas2.cc nrutil_modif.cc -L/usr/local/lib/atlas -lstrassen -lf77blas -lcblas -latlas -ltraceback -lbfd -liberty
 #include <time.h>
 #include "la.h"
 #include "traceback.h"
 #include "sparsemat.h"
 #include "matexp.h"
 #include "fourindex.h"
 extern void test(const NRVec<double> &);
 double ad; 
 void f1(const double *c)
 {
 ad=*c;
 }
 void f2(double *c)
 {
 *c=ad;
 }
 inline int randind(const int n)
 {
 return int(random()/(1.+RAND_MAX)*n);
 }
 complex<double> mycident (const complex<double>&x) {return x;}
 int main()
 {
 sigtraceback(SIGSEGV,1);
 sigtraceback(SIGABRT,1);
 sigtraceback(SIGBUS,1);
 sigtraceback(SIGFPE,1);
 NRVec<double> x(1.,10);
 NRVec<double> y(2.,10);
 NRVec<double> z(-2.,10);
 y.axpy(3,x);
 y+=z;
 /*
 cout <<y;
 NRVec<double> a(x);
 NRVec<double> b;
 b|=x;
 NRVec<double> c;
 c=a;
 y =10. *y  ;
 int i;
 for(i=0;i<y.size();i++) cout <<y[i] <<" ";
 cout <<"\n";
 cout << y*z <<"\n";
 z|=x;
 z[1]=5;
 cout <<"zunit= "<<z.unitvector()<<"\n";
 cout <<"z= "<<z<<"\n";
 test(x);
 x = x*5;
 cout <<"x= "<<x<<"\n";
 cout <<"y= "<<y<<"\n";
 NRVec<double> u;
 u=x+y;
 cout <<"u= "<<u<<"\n";
 NRMat<double> aa(0.,3,3);
 aa[0][0]=aa[1][1]=aa(2,2)=2.;
 NRMat<double> bb(aa);
 double *p;
 aa.copyonwrite(); p= &aa[2][2];
 *p=3.;
 bb.copyonwrite(); bb(0,2)=1.;
 cout << "aa= " <<aa <<"\n";
 cout << "bb= " <<bb <<"\n";
 cout <<"aa trace "<<aa.trace() <<"\n";
 cout << "bbt= " <<bb.transpose() <<"\n";
 NRMat<double> cc=aa & bb;
 cout << "aa o+ bb= " << cc <<"\n";
 cout << cc.rsum() <<"\n";
 cout << cc.csum() <<"\n";
 NRVec<double>w(3);
 w[0]=1; w[1]=2;w[2]=3;
 NRVec<double> v(0.,3);
 v.gemv(0.,bb,'n',1.,w);
 cout << " v= " <<v <<"\n";
 v.gemv(0.,bb,'t',1.,w);
 cout << " v= " <<v <<"\n";
 */
 /*
 const int n=6000;
 NRMat<double> bb(1.,n,n);
 for(int i=0;i<n;i++) for(int j=0;j<n;j++) bb[i][j]=2.;
 for(int i=0;i<n;i++) for(int j=0;j<i;j++) {double t; t=bb[i][j] +bb[j][j]; bb[i][j]=t;bb[j][i]=t;}
 */
 /*
 NRMat<double> amat,bmat,cmat;
 cin >>amat;
 cin >>bmat;
 cmat=amat*bmat;
 cout<<cmat;
 cmat.copyonwrite(); cmat[0][0]=0;
 NRMat<double> amat(1.,2,2);
 NRMat<double> bmat(amat);
 NRMat<double> dmat(amat);
 //NRMat<double>  cmat; cmat=bmat*2.;
 NRMat<double>  cmat(bmat*2); //more efficient
 dmat.copyonwrite(); dmat[0][0]=0;
 cout<<amat;
 cout<<bmat;
 cout<<cmat;
 cout<<dmat;
 NRMat<double> amat;
 NRVec<double>  avec;
 cin >>amat;
 cin >>avec;
 cout << amat*avec;
 cout << avec*amat;
 NRVec<double> avec(0.,10);
 f1(avec);
 f2(avec);
 NRVec<double> uu(3);
 uu[0]=1; uu[1]=2; uu[2]=3;
 cout << uu << (uu|uu) <<"\n";
 NRSMat<double> sa(0.,3);
 sa(0,0)=1; sa(0,2)=5; sa(2,2)=10;sa(1,0)=2;sa(1,1)=3; sa(2,1)=-1;
 NRSMat<double> sb(0.,3);
 sb(0,0)=-2; sb(0,2)=1; sb(2,2)=2;sb(1,0)=-1;sb(1,1)=7; sb(2,1)=3;
 cout << "symetr\n" <<sa << -sa <<"\n";
 cout << "symetr\n" <<sb <<"\n";
 cout << "sa*sb\n" << sa*sb <<"\n";
 cout << "sb*sa\n" << sb*sa <<"\n";
 NRMat<double> m10(10.,3,3);
 cout << "10 + sa" << m10 + sa <<"\n";
 */
 /*
 const int dim=256;
 NRMat<double> big1(dim,dim),big2(dim,dim),big3;
 for(int i=0;i<dim;i++)
 	for(int j=0;j<dim;j++)
 		{
 		big1[i][j]=i*i+j*j*j-3*j;
 		big2[i][j]=i*i/(j+1)+j*j-3*j;
 		}
 double t=clock()/((double) (CLOCKS_PER_SEC));
 big3= big1*big2;
 cout <<" big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
 */
 /*
 NRMat<double> atest, btest,ctest;
 {
 int cc,c1,c2,c3;
 cin >>cc>>c1>>c2>>c3;
 atest.s_cutoff(cc,c1,c2,c3);
 }
 cin>>atest;
 cin>>btest;
 NRMat<double> dtest(atest.nrows(),btest.ncols());
 dtest.gemm(0., atest, 't', btest, 'n', 1.);
 cout << dtest;
 NRMat<double> etest(atest.nrows(),btest.ncols());
 etest.strassen(0., atest, 't', btest, 'n', 1.);
 cout << etest;
 */
 if(0)
 {
 int dim;
 cin >>dim;
 NRMat<double> big1(dim,dim),big2(dim,dim),big3,big4(dim,dim);
 for(int i=0;i<dim;i++)
        for(int j=0;j<dim;j++)
                {
                big1[i][j]=i*i+j*j*j-3*j;
                big2[i][j]=i*i/(j+1)+j*j-3*j;
                }
 double t=clock()/((double) (CLOCKS_PER_SEC));
 big3= big1*big2;
 cout <<" classical big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
 for (int c=64; c<=512;c+=64)
 	{
 	big4.s_cutoff(c,c,c,c);
 	t=clock()/((double) (CLOCKS_PER_SEC));
 	big4.strassen(0., big1, 'n', big2, 'n', 1.);
 	cout <<"cutoff "<<c<<" big1*big2 "<<big4[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
 	}
 }
 if(0)
 {
 NRMat<double> a(3,3),b;
 NRVec<double> v(3);
 for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= i*i+j; v[i]=10-i;}
 b=a;
 b*= sin(1.)+1;
 cout << a <<v;
 a.diagmultl(v);
 cout << a;
 b.diagmultr(v);
 cout << b;
 }
 if(0)
 {
 NRMat<double> a(3,3),b;
 NRVec<double> v(10);
 v[0]=2;v[1]=3;v[2]=1;v[3]=-3;v[4]=2;v[5]=-1;v[6]=3;v[7]=-2;v[8]=1;v[9]=1;
 for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
 cout <<a;
 cout << a.norm() <<"\n";
 b=a*a;
 cout << b.norm() <<"\n";
 cout << exp(a);
 cout << exp(a.norm()) <<"\n";
 cout << ipow(a,3);
 cout<<ipow(a,11);
 cout <<commutator(a,b);
 }
 if(0)
 {
 NRMat<double> a(3,3);
 for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
 NRSMat<double> b(a);
 NRMat<double> c(b);
 cout <<a;
 cout <<b;
 cout <<c;
 }
 if(0)
 {
 NRMat<double> a(3,3);
 a[0][0]=1; a[0][1]=2;a[0][2]=3;
 a[1][0]=4; a[1][1]=-5;a[1][2]=7;
 a[2][0]=-3;a[2][1]=10;a[2][2]=2;
 NRMat<double> b(2,3);
 b[0][0]=1;b[0][1]=2;b[0][2]=3;
 b[1][0]=2;b[1][1]=4;b[1][2]=6;
 cout <<a;
 cout <<b;
 linear_solve(a,&b);
 cout <<a;
 cout <<b;
 }
 if(0)
 {
 NRMat<double> a(3,3);
 for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
 NRVec<double> b(3);
 cout <<a;
 diagonalize(a,b);
 cout <<a;
 cout <<b;
 }
 if(0)
 {
 NRSMat<double> a(3);
 NRMat<double>v(3,3);
 for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a(i,j)= (i+j)/10.; }
 NRVec<double> b(3);
 cout <<a;
 NRMat<double>c=(NRMat<double>)a; //nebo NRMat<double>c(a);
 NRMat<double>d=exp(c);
 diagonalize(a,b,&v);
 cout <<b;
 cout <<v;
 cout <<d;
 diagonalize(d,b);
 cout <<b;
 cout <<d;
 }
 if(0)
 {
 NRMat<double> a;
 cin >>a ;
 NRMat<double> b=a.transpose();
 NRMat<double> u(a.nrows(),a.nrows()),v(a.ncols(),a.ncols());
 NRVec<double>s(a.ncols());
 singular_decomposition(a,&u,s,&v);
 //singular_decomposition(a,NULL,s,NULL); //this does not work when linked with static version of lapack, works with .so.3 version (from suse distrib)
 cout <<u;
 cout <<s;
 cout <<v;
 //singular_decomposition(b,&v,s,&u);
 //cout <<v;
 //cout <<s;
 //cout <<u;
 }
 if(0)
 {
 //diagonalize a general matrix and reconstruct it back; assume real eigenvalues
 //double aa[]={1,2,3,4,-5,7,-3,10,2};
 //NRMat<double> a(aa,3,3);
 NRMat<double> a;
 cin >>a;
 cout <<a ;
 int n=a.nrows();
 NRMat<double> u(n,n),v(n,n);
 NRVec<double>wr(n),wi(n);
 gdiagonalize(a,wr,wi,&u,&v,0);
 cout <<u;
 cout <<wr;
 cout <<wi;
 cout <<v;
 NRVec<double>z=diagofproduct(u,v,1);
 for(int i=0;i<a.nrows();++i) wr[i]/=z[i];//account for normalization of eigenvectors
 u.diagmultl(wr);
 v.transposeme();
 cout <<v*u;
 }
 if(0)
 {
 //diagonalize a general matrix and reconstruct it back; allow complex eigenvalues
 NRMat<double> a;
 cin >>a;
 cout <<a ; 
 int n=a.nrows();
 NRMat<complex<double> > u(n,n),v(n,n);
 NRVec<complex<double> >w(n);
 gdiagonalize(a,w,&u,&v);
 cout <<u;
 cout <<w;
 cout <<v;
 NRVec<complex<double> >z=diagofproduct(u,v,1,1);
 //NRMat<complex<double> > zz=u*v.transpose(1);
 cout <<z;
 //cout <<zz;
 for(int i=0;i<a.nrows();++i) w[i]/=z[i];//account for normalization of eigenvectors
 u.diagmultl(w);
 cout <<v.transpose(1)*u;
 }
 if(0)
 {
 SparseMat<double> a(4,4);
 NRVec<double> v(4);
 v[0]=1;v[1]=2;v[2]=3;v[3]=4;
 a=1.;
 a.copyonwrite();
 a.add(3,0,.5);
 a.add(0,2,.2);
 a.add(2,1,.1);
 a.add(3,3,1.);
 a.add(1,1,-1.);
 SparseMat<double> c(a);
 c*=10.;
 cout <<a;
 a.simplify();
 cout <<a;
 cout <<c;
 NRMat<double>b(c);
 cout <<b;
 cout << b*v;
 cout <<c*v;
 cout <<v*b;
 cout <<v*c;
 }
 if(0)
 {
 SparseMat<double> a(4,4),b(4,4);
 a=1.;
 a.copyonwrite();
 a.add(3,0,.5);
 b.add(0,2,.2);
 b.add(2,1,.1);
 b.add(3,3,1.);
 b.add(1,1,-1.);
 SparseMat<double>c=a+b;
 cout <<c;
 a.join(b);
 cout<<a;
 cout<<b;
 }
 if(0)
 {
 SparseMat<double> a(4,4),b(4,4);
 a=0.; b=2;
 a.add(3,0,.5);
 a.add(0,2,.2);
 a.add(1,1,1);
 a.add(1,0,.2);
 b.add(2,1,.1);
 b.add(3,3,1.);
 b.add(1,1,-1.);
 NRMat<double> aa(a),bb(b);
 SparseMat<double>c;
 NRMat<double>cc;
 //cout << NRMat<double>(c);
 //cout <<cc;
 //cout <<"norms "<<c.norm()<<" " <<cc.norm()<<endl;
 cout <<"original matrix \n"<<aa;
 cout <<(cc=exp(aa));
 c=exp(a);
 cout <<NRMat<double>(c);
 cout <<"norms2 "<<c.norm()<<" " <<cc.norm()<<endl;
 }
 #define sparsity (n/4)
 if(0)
 {
 for(int n=8; n<=1024*1024;n+=n)
 	{
 	SparseMat<double> aa(n,n);
 	cout << "\n\n\ntiming for size "<<n<<endl;
 	if(n<=512) {
 	NRMat<double> a(0.,n,n);
 	for(int i=0; i<sparsity;i++) a(randind(n),randind(n))=random()/(1.+RAND_MAX);
 	double t0=clock()/((double) (CLOCKS_PER_SEC));	
 	//cout <<a;
 	NRMat<double> b(exp(a));
 	//cout <<b;
 	cout <<"dense norm "<<b.norm() <<"\n";
 	cout << "test commutator " <<commutator(a,b).norm() <<endl;
 	double t1=clock()/((double) (CLOCKS_PER_SEC));    
 	cout << "dense time " <<n<<' '<< t1-t0 <<endl;
 	aa=SparseMat<double>(a);
 	}
 	else
 	{
 	for(int i=0; i<sparsity;i++) aa.add(randind(n),randind(n),random()/(1.+RAND_MAX));
 	}
 	//cout <<aa;
 	double t2=clock()/((double) (CLOCKS_PER_SEC));        
 	SparseMat<double> bb(exp(aa));
 	//cout <<bb;
 	cout <<"sparse norm "<<bb.norm() <<"\n";
 	cout << "test commutator " <<commutator(aa,bb).norm() <<endl;
        double t3=clock()/((double) (CLOCKS_PER_SEC));
 	 cout <<"sparse length "<<bb.length()<<"\n";
        cout << "sparse time "<<n<<' ' << t3-t2 <<endl;
 	}
 }
 if(1)
 {
 int n;
 cin>>n;
 	SparseMat<double> aa(n,n);
 	for(int i=0; i<sparsity;i++) aa.add(randind(n),randind(n),random()/(1.+RAND_MAX));
 	SparseMat<double> bb=exp(aa);
 	NRVec<double> v(n);
 	 for(int i=0; i<n;++i) v[i]=random()/(1.+RAND_MAX);
 	NRVec<double> res1=bb*v;
 	NRVec<double> res2=exptimes(aa,v);
 	cout <<"difference = "<<(res1-res2).norm()<<endl;
 }
 if(0)
 {
 SparseMat<double> a(4,4),b(4,4),d;
 a=0.; b=2;
 a.add(3,0,.5);
 a.add(0,2,.2);
 a.add(1,1,1);
 a.add(1,0,.2);
 b.add(2,1,.1);
 b.add(3,3,1.);
 b.add(1,1,-1.);
 NRMat<double> aa(a),bb(b),dd;
 SparseMat<double>c;
 NRMat<double>cc;
 c=commutator(a,b);
 cc=commutator(aa,bb);
 cout <<cc;
 cout <<NRMat<double>(c);
 cout <<"norms2 "<<c.norm()<<" " <<cc.norm()<<endl;
 }
 /*
 NRVec<double> v(10.,10);
 v+= 5.;
 cout <<v;
 */
 if(0)
 {
 const int n=3;
 NRMat<double> a(n,n);
 for(int i=0;i<n;++i) for(int j=0;j<i;++j)
 	{
 	a(i,j)= random()/(1.+RAND_MAX);
 	a(j,i)= -a(i,j);
 	}
 NRMat<double> b; b|=a;
 NRVec<double> er(n),ei(n);
 NRMat<double> vr(n,n),vl(n,n);
 gdiagonalize(b,er,ei,&vl,&vr);
 cout <<er<<ei;
 cout <<"left eivec\n"<<vl <<"right eivec\n"<<vr;
 NRMat<double> u=exp(a*.125);
 cout <<"norms "<<u.norm() << ' '<<(u-1.).norm()<<endl;
 gdiagonalize(u,er,ei,&vl,&vr);
 cout <<er<<ei;
 cout <<"left eivec\n"<<vl <<"right eivec\n"<<vr;
 }
 if(0)
 {
 /*
 int n;
 cin>>n;
 NRMat<double> a(n,n);
 for(int i=0;i<n;++i) for(int j=0;j<i;++j)
        {
        a(i,j)= random()/(1.+RAND_MAX);
        a(j,i)= -a(i,j);
        }
 NRMat<double> b=exp(a);
 cout <<a;
 */
 NRMat<double> a,b;
 cin >>b;
 int n=b.nrows();
 cout <<"difference from identity = "<<b.norm(1.)<<endl;
 NRMat<double> x(0.,n,n),x0;
 	double r;
 int i=0;
 do
 	{
 	x0=x;
 	NRMat<double> y=exp(x*-.5);
 	x+= y*b*y; 
 	x-= 1.;
 	x=(x-x.transpose())*.5;
 	cout <<"matrix x\n"<<x;
 	cout <<"iter "<<i <<" residue "<< (r=(exp(x)-b).norm())<<endl;
 	cout <<"iter "<<i <<" conv "<<(r=(x-x0).norm())<<endl;
 	++i;
 	} while(abs(r)>1e-10);
 cout <<"result\n"<<x<<endl;
 cout <<"exp(result)"<<exp(x)<<endl;
 NRMat<double> c=log(b); //matrixfunction(a,&mycident,1);
 cout <<c;
 NRMat<double> d=exp(c);
 cout <<"exp(log(x))\n"<<d;
 cout<<(d-b).norm()<<endl;
 }
 if(0)
 {
 int n;
 cin>>n;
 NRMat<double> a(n,n);
 for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
        {
        a(i,j)= .1*random()/(1.+RAND_MAX);
        a(j,i)= a(i,j);
        }
 NRMat<double> b=exp(a);
 NRMat<double> s=exp(a*.5);
 NRMat<double> y(0.,n,n);
 NRMat<double> z(0.,n,n);
        double r;
 int i=0;
 y=b;z=1.;
 cout << "norm = "<<b.norm(1.)<<endl;
 do
        {
 	NRMat<double> tmp=z*y*-1.+3.;
 	NRMat<double> ynew=y*tmp*.5;
 	z=tmp*z*.5;
 	y=ynew;
        cout <<"iter "<<i <<" residue "<< (r=(y-s).norm())<<endl;
        ++i;
        } while(abs(r)>1e-10);
 }
 if(0)
 {
 int n=3;
 NRMat<double> a(n,n);
 a(0,0)=1.;
        a(0,1)=2.;
        a(1,0)=2.;
        a(1,1)=6.;
 a(2,2)=-4;
 a(0,2)=1;
 cout <<a;
 double d;
 NRMat<double> c=inverse(a,&d);
 cout <<a<<c;
 }
 if(0)
 {
 NRMat<double> a(3,3);
 NRMat<double> b=a;
 for(int i=1; i<4;i++) b=b*b;
 }
 if(0)
 {
 NRMat<double> a;
 cin >>a;
 NRMat<double> b=exp(a);
 NRMat<double> c=log(b);
 cout <<a;
 cout <<b;
 cout <<c;
 cout << (b-exp(c)).norm() <<endl;
 }
 if(00)
 {
 NRMat<double> a;
 cin >>a;
 NRMat<double> c=log(a); //matrixfunction(a,&mycident,1);
 cout <<c;
 NRMat<double> b=exp(c);
 cout <<"exp(log(x))\n"<<b;
 cout<<(b-a).norm()<<endl;
 }
 if(0)
 {
 //check my exponential with respect to spectral decomposition one
 NRSMat<double> a;
 cin >>a;
 NRMat<double> aa(a);
 NRMat<double> b=exp(aa);
 NRMat<double> c=matrixfunction(a,&exp);
 cout <<a;
 cout <<b;
 cout <<c;
 cout << (b-c).norm()/b.norm() <<endl;
 }
 if(0)
 {
 //verify BCH expansion
 NRMat<double> h;
 NRMat<double> t;
 cin >>h;
 cin >>t;
 NRMat<double> r1= exp(-t) * h * exp(t);
 NRMat<double> r2=BCHexpansion(h,t,30);
 cout <<r1;
 cout <<r2;
 cout <<"error = "<<(r1-r2).norm()<<endl;
 }
 if(0)
 {
 int n;
 cin >>n;
 SparseMat<double> a(n,n);
 for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
        {
        a.add(i,j,random()/(1.+RAND_MAX));
        }
 a.setsymmetric();
 NRSMat<double> aa(a); 
 NRMat<double> aaa(a); 
 NRVec<double> w(n);
 NRMat<double> v(n,n);
 //cout <<aa;
 diagonalize(aa, w, &v,0);
 //cout <<w;
 //cout <<v;
 //cout << v*aaa*v.transpose(); 
 cout <<  (v*aaa*v.transpose() - diagonalmatrix(w)).norm()<<endl;
 }
 if(0)
 {
 NRMat<complex<double> > a;
 cin >>a;
 NRMat<complex<double> > b=exp(a);
 cout <<b;
 }
 if(0)
 {
 int n;
 cin >>n;
 //NRMat<double> a(n,n);
 NRSMat<double> a(n);
 for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
        {
        a(j,i)=a(i,j)=random()/(1.+RAND_MAX);
        }
 cout <<a;
 NRMat<double> y(1,n);
 for(int i=0;i<n;++i) y(0,i)=random()/(1.+RAND_MAX);
 cout <<y;
 linear_solve(a,&y);
 cout << y;
 }
 if(0)
 {
 int n;
 cin >>n;
 SparseMat<double> a(n,n);
 int spars=n*n/3;
        for(int i=0; i<spars;i++) a.add(randind(n),randind(n),random()/(1.+RAND_MAX));
 NRMat<double> aa(a);
 NRVec<double> v(aa[0],n*n);
 cout <<a;
 cout <<aa;
 cout <<v;
 cout <<"test "<<aa.dot(aa)<<endl;
 cout <<"test "<<v*v<<endl;
 cout <<"test "<<a.dot(aa)<<endl;
 cout <<"test "<<a.dot(a)<<endl;
 }
 }
--- a/vec.cc
+++ b/vec.cc
@ -0,0 +1,348 @@
 #include <iostream>
 #include "vec.h"
 //////////////////////////////////////////////////////////////////////////////
 //// forced instantization in the corespoding object file
 #define INSTANTIZE(T) \
 template ostream & operator<<(ostream &s, const NRVec< T > &x); \
 template istream & operator>>(istream  &s, NRVec< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 template NRVec<double>;
 template NRVec< complex<double> >;
 /*
 * Templates first, specializations for BLAS next
 */
 // conversion ctor
 #ifndef MATPTR
 template <typename T>
 NRVec<T>::NRVec(const NRMat<T> &rhs)
 {
 	nn = rhs.nn*rhs.mm;
 	v = rhs.v;
 	count = rhs.count;
 	(*count)++;
 }
 #endif
 // dtor
 template <typename T>
 NRVec<T>::~NRVec()
 {
 	if(!count) return;
 	if(--(*count) <= 0) {
 		if(v) delete[] (v);
 		delete count;
 	}
 }
 // detach from a physical vector and make own copy
 template <typename T>
 void NRVec<T>::copyonwrite()
 {
 #ifdef DEBUG
  if(!count) laerror("probably an assignment to undefined vector");
 #endif
  if(*count > 1)
  {
    (*count)--;
    count = new int;
    *count = 1;
    T *newv = new T[nn];
    memcpy(newv, v, nn*sizeof(T));
    v = newv;
  }
 }
 // Asignment
 template <typename T>
 NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
 {
  if (this != &rhs)
  {
    if(count)
      if(--(*count) == 0)
      {
        delete[] v;
        delete count;
      }
    v = rhs.v;
    nn = rhs.nn;
    count = rhs.count;
    if(count) (*count)++;
  }
  return *this;
 }
 // Resize
 template <typename T>
 void NRVec<T>::resize(const int n)
 {
 #ifdef DEBUG
  if(n<=0) laerror("illegal vector dimension");
 #endif
  if(count)
    if(*count > 1) {
      (*count)--;
      count = 0;
      v = 0;
      nn = 0;
    }
  if(!count) {
    count = new int;
    *count = 1;
    nn = n;
    v = new T[nn];
    return;
  }
  // *count = 1 in this branch
  if (n != nn) {
    nn = n;
    delete[] v;
    v = new T[nn];
  }
 }
 // ostream << NRVec
 template <typename T>
 ostream & operator<<(ostream &s, const NRVec<T> &x)
 {
  int i, n;
  n = x.size();
  s << n << endl;
  for(i=0; i<n; i++) s << x[i] << (i == n-1 ? '\n' : ' ');
  return s;
 }
 // istream >> NRVec
 template <typename T>
 istream & operator>>(istream &s, NRVec<T> &x)
 {
  int i,n;
  s >> n;
  x.resize(n);
  for(i=0; i<n; i++) s >> x[i];
  return s;
 }
 // formatted print for NRVec
 template<typename T>
 void NRVec<T>::fprintf(FILE *file, const char *format, const int modulo) const
 {
 	lawritemat(file, v, 1, nn, format, 1, modulo, 0);
 }
 // formatted scan for NRVec
 template <class T>
 void NRVec<T>::fscanf(FILE *f, const char *format)
 {
 	int n;
 	if(std::fscanf(f, "%d", &n) != 1) laerror("cannot read vector dimension");
 	resize(n);
 	for (int i=0; i<n; i++)
 		if (std::fscanf(f, format, v+i) != 1)
 			laerror("cannot read the vector eleemnt");
 }
 // assignmet with a physical copy
 template <typename T>
 NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
 {
 	if (this != &rhs) {
 #ifdef DEBUG
 		if (!rhs.v) laerror("unallocated rhs in NRVec operator |=");
 #endif
 		if (count)
 			if (*count > 1) {
 				--(*count);
 				nn = 0;
 				count = 0;
 				v = 0;
 			}
 		if (nn != rhs.nn) {
 			if (v) delete[] (v);
 			nn = rhs.nn;
 		}
 		if(!v) v = new T[nn];
 		if(!count) count = new int;
 		*count = 1;
 		memcpy(v, rhs.v, nn*sizeof(T));
 	}
 	return *this;
 }
 // unary minus
 template <typename T>
 const NRVec<T> NRVec<T>::operator-() const
 {
 	NRVec<T> result(nn);
 	for (int i=0; i<nn; i++) result.v[i]= -v[i];
 	return result;
 }
 // axpy call for T = double (not strided)
 void NRVec<double>::axpy(const double alpha, const NRVec<double> &x)
 {
 #ifdef DEBUG
 	if (nn != x.nn) laerror("axpy of incompatible vectors");
 #endif
 	copyonwrite();
 	cblas_daxpy(nn, alpha, x.v, 1, v, 1);
 }
 // axpy call for T = complex<double> (not strided)
 void NRVec< complex<double> >::axpy(const complex<double> alpha, 
 			const NRVec< complex<double> > &x)
 {
 #ifdef DEBUG
 	if (nn != x.nn) laerror("axpy of incompatible vectors");
 #endif
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&alpha), (void *)(x.v), 1, (void *)v, 1);
 }
 // axpy call for T = double (strided)
 void NRVec<double>::axpy(const double alpha, const double *x, const int stride)
 {
 	copyonwrite();
 	cblas_daxpy(nn, alpha, x, stride, v, 1);
 }
 // axpy call for T = complex<double> (strided)
 void NRVec< complex<double> >::axpy(const complex<double> alpha, 
 			const complex<double> *x, const int stride)
 {
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&alpha), (void *)x, stride, v, 1);
 }
 // unary minus
 const NRVec<double> NRVec<double>::operator-() const
 {
 	NRVec<double> result(*this);
 	result.copyonwrite();
 	cblas_dscal(nn, -1.0, result.v, 1);
 	return result;
 }
 const NRVec< complex<double> > 
 NRVec< complex<double> >::operator-() const
 {
 	NRVec< complex<double> > result(*this);
 	result.copyonwrite();
 	cblas_zdscal(nn, -1.0, (void *)(result.v), 1);
 	return result;
 }
 // assignment of scalar to every element
 template <typename T>
 NRVec<T> & NRVec<T>::operator=(const T &a)
 {
 	copyonwrite();
 	if(a != (T)0)
 		for (int i=0; i<nn; i++) v[i] = a;
 	else
 		memset(v, 0, nn*sizeof(T));
 	return *this;
 }
 // Normalization of NRVec<double>
 NRVec<double> & NRVec<double>::normalize()
 {
 	double tmp;
 	tmp = cblas_dnrm2(nn, v, 1);
 #ifdef DEBUG
 	if(!tmp) laerror("normalization of zero vector");
 #endif
 	copyonwrite();
 	tmp = 1.0/tmp;
 	cblas_dscal(nn, tmp, v, 1);
 	return *this;
 }
 // Normalization of NRVec< complex<double> >
 NRVec< complex<double> > & NRVec< complex<double> >::normalize()
 {
 	complex<double> tmp;
 	tmp = cblas_dznrm2(nn, (void *)v, 1);
 #ifdef DEBUG
 	if(!(tmp.real()) && !(tmp.imag())) laerror("normalization of zero vector");
 #endif
 	copyonwrite();
 	tmp = 1.0/tmp;
 	cblas_zscal(nn, (void *)(&tmp), (void *)v, 1);
 	return *this;
 }
 // gemv call 
 void NRVec<double>::gemv(const double beta, const NRMat<double> &A, 
 		const char trans, const double alpha, const NRVec &x)
 {
 #ifdef DEBUG
 	if ((trans == 'n'?A.ncols():A.nrows()) != x.size())
 		laerror("incompatible sizes in gemv A*x");
 #endif
 	cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans),
 			A.nrows(), A.ncols(), alpha, A[0], A.ncols(), x.v, 1, beta, v, 1);
 }
 void NRVec< complex<double> >::gemv(const complex<double> beta,
 		const NRMat< complex<double> > &A, const char trans, 
 		const complex<double> alpha, const NRVec &x)
 {
 #ifdef DEBUG
 	if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) 
 		laerror("incompatible sizes in gemv A*x");
 #endif
 	cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), 
 			A.nrows(), A.ncols(), (void *)(&alpha), (void *)A[0], A.ncols(), 
 			(void *)x.v, 1, (void *)(&beta), (void *)v, 1);
 }
 // Vec * Mat
 const NRVec<double> NRVec<double>::operator*(const NRMat<double> &mat) const
 {
 #ifdef DEBUG
 	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
 #endif
 	int n = mat.ncols();
 	NRVec<double> result(n);
 	cblas_dgemv(CblasRowMajor, CblasTrans, nn, n, 1.0, mat[0], n, v, 1,
 			0.0, result.v, 1);
 	return result;
 }
 const NRVec< complex<double> > 
 NRVec< complex<double> >::operator*(const NRMat< complex<double> > &mat) const
 {
 #ifdef DEBUG
 	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
 #endif
 	int n = mat.ncols();
 	NRVec< complex<double> > result(n);
 	cblas_zgemv(CblasRowMajor, CblasTrans, nn, n, &CONE, mat[0], n, v, 1,
 			&CZERO, result.v, 1);
 	return result;
 }
 // Direc product Mat = Vec | Vec
 const NRMat<double> NRVec<double>::operator|(const NRVec<double> &b) const
 {
 	NRMat<double> result(0.,nn,b.nn);
 	cblas_dger(CblasRowMajor, nn, b.nn, 1., v, 1, b.v, 1, result, b.nn);
 	return result;
 }
 const NRMat< complex<double> > 
 NRVec< complex<double> >::operator|(const NRVec< complex<double> > &b) const
 {
 	NRMat< complex<double> > result(0.,nn,b.nn);
 	cblas_zgerc(CblasRowMajor, nn, b.nn, &CONE, v, 1, b.v, 1, result, b.nn);
 	return result;
 }
--- a/vec.h
+++ b/vec.h
@ -0,0 +1,380 @@
 #ifndef _LA_VEC_H_
 #define _LA_VEC_H_
 extern "C" {
 #include "cblas.h"
 }
 #include <stdio.h>
 #include <complex>
 #include <string.h>
 #include <iostream>
 using namespace std;
 template <typename T> class NRVec;
 template <typename T> class NRSMat;
 template <typename T> class NRMat;
 template <typename T> class SparseMat;
 //////////////////////////////////////////////////////////////////////////////
 // Forward declarations
 void laerror(const char *s1=0, const char *s2=0, const char *s3=0, const char *s4=0);
 template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
 		const char *form0,int nodim,int modulo, int issym);
 // Memory allocated constants for cblas routines
 const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
 // Macros to construct binary operators +,-,*, from +=, -=, *=
 // for 3 cases: X + a, a + X, X + Y
 #define NRVECMAT_OPER(E,X) \
 template<class T> \
 	inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
 { return NR##E(*this) X##= a; } \
 	\
 	template<class T> \
 	inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
 { return NR##E<T>(rhs) X##= a; }
 #define NRVECMAT_OPER2(E,X) \
 template<class T> \
 	inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
 { return NR##E(*this) X##= a; }
 #include "smat.h"
 #include "mat.h"
 // NRVec class
 template <typename T>
 class NRVec {
 protected:
 	int nn;
 	T *v;
 	int *count;
 public:
 	friend class NRSMat<T>;
 	friend class NRMat<T>;
 	inline NRVec(): nn(0),v(0),count(0){};
 	inline explicit NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {};
 	inline NRVec(const T &a, const int n);
 	inline NRVec(const T *a, const int n);
 	inline NRVec(const NRVec &rhs);
 	inline explicit NRVec(const NRSMat<T> & S);
 #ifndef MATPTR
 	explicit NRVec(const NRMat<T> &rhs);
 #endif
 	NRVec & operator=(const NRVec &rhs);
 	NRVec & operator=(const T &a);  //assign a to every element
 	NRVec & operator|=(const NRVec &rhs);
 	const NRVec operator-() const;
 	inline NRVec & operator+=(const NRVec &rhs);
 	inline NRVec & operator-=(const NRVec &rhs);
 	inline NRVec & operator+=(const T &a);
 	inline NRVec & operator-=(const T &a);
 	inline NRVec & operator*=(const T &a);
 	inline int getcount() const {return count?*count:0;}
 	inline const NRVec operator+(const NRVec &rhs) const;
 	inline const NRVec operator-(const NRVec &rhs) const;
 	inline const NRVec operator+(const T &a) const;
 	inline const NRVec operator-(const T &a) const;
 	inline const NRVec operator*(const T &a) const;
 	inline const T operator*(const NRVec &rhs) const; //scalar product -> ddot
 	inline const NRVec operator*(const NRSMat<T> & S) const;
 	const NRVec operator*(const NRMat<T> &mat) const;
 	const NRMat<T> operator|(const NRVec<T> &rhs) const;
 	inline const T sum() const; //sum of its elements
 	inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector
 	inline T & operator[](const int i);
 	inline const T & operator[](const int i) const;
 	inline int size() const;
 	inline operator T*(); //get a pointer to the data
 	inline operator const T*() const; //get a pointer to the data
 	~NRVec();
 	void axpy(const T alpha, const NRVec &x); // this+= a*x
 	void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x
 	void gemv(const T beta, const NRMat<T> &a, const char trans, 
 			const T alpha, const NRVec &x);
 	void copyonwrite();
 	void resize(const int n);
 	NRVec & normalize();
 	inline const double norm() const;
 	inline const T amax() const;
 	inline const NRVec unitvector() const;
 	void fprintf(FILE *f, const char *format, const int modulo) const;
 	void fscanf(FILE *f, const char *format);
 //sparse matrix concerning members
 	explicit NRVec(const SparseMat<T> &rhs);                // dense from sparse matrix with one of dimensions =1
 	const NRVec operator*(const SparseMat<T> &mat) const; //vector*matrix
 	inline void simplify() {}; //just for compatibility with sparse ones
 	void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x);
 };
 template <typename T> ostream & operator<<(ostream &s, const NRVec<T> &x);
 template <typename T> istream & operator>>(istream  &s, NRVec<T> &x);
 // INLINES
 // ctors
 template <typename T>
 inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new int)
 {
 	*count = 1;
 	if(a != (T)0)
 		for(int i=0; i<n; i++)
 			v[i] = a;
 	else
 		memset(v, 0, nn*sizeof(T));
 }
 template <typename T>
 inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), v(new T[n]), count(new int)
 {
 	*count = 1;
 	memcpy(v, a, n*sizeof(T));
 }
 template <typename T>
 inline NRVec<T>::NRVec(const NRVec<T> &rhs)
 {
 	v = rhs.v;
 	nn = rhs.nn;
 	count = rhs.count;
 	if(count) (*count)++;
 }
 template <typename T>
 inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
 {
 	nn = rhs.nn;
 	nn = NN2;
 	v = rhs.v;
 	count = rhs.count;
 	(*count)++;
 }
 // x += a
 inline NRVec<double> & NRVec<double>::operator+=(const double &a)
 {
 	copyonwrite();
 	cblas_daxpy(nn, 1.0, &a, 0, v, 1);
 	return *this;
 }
 inline NRVec< complex<double> > &
 NRVec< complex<double> >::operator+=(const complex<double> &a)
 {
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&CONE), (void *)(&a), 0, (void *)v, 1);
 	return *this;
 }
 // x -= a
 inline NRVec<double> & NRVec<double>::operator-=(const double &a)
 {
 	copyonwrite();
 	cblas_daxpy(nn, 1.0, &a, 0, v, 1);
 	return *this;
 }
 inline NRVec< complex<double> > &
 NRVec< complex<double> >::operator-=(const complex<double> &a)
 {
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&CMONE), (void *)(&a), 0, (void *)v, 1);
 	return *this;
 }
 // x += x
 inline NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
 #endif
 	copyonwrite();
 	cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
 	return *this;
 }
 inline NRVec< complex<double> > &
 NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
 #endif
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&CONE), rhs.v, 1, v, 1);
 	return *this;
 }
 // x -= x
 inline NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
 #endif
 	copyonwrite();
 	cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
 	return *this;
 }
 inline NRVec< complex<double> > &
 NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
 #endif
 	copyonwrite();
 	cblas_zaxpy(nn, (void *)(&CMONE), (void *)rhs.v, 1, (void *)v, 1);
 	return *this;
 }
 // x *= a
 inline NRVec<double> & NRVec<double>::operator*=(const double &a)
 {
 	copyonwrite();
 	cblas_dscal(nn, a, v, 1);
 	return *this;
 }
 inline NRVec< complex<double> > &
 NRVec< complex<double> >::operator*=(const complex<double> &a)
 {
 	copyonwrite();
 	cblas_zscal(nn, (void *)(&a), (void *)v, 1);
 	return *this;
 }
 // scalar product x.y
 inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
 #endif
 	return cblas_ddot(nn, v, 1, rhs.v, 1);
 }
 inline const complex<double>
 NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
 {
 #ifdef DEBUG
 	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
 #endif
 	complex<double> dot;
 	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
 	return dot;
 }
 // Vec * SMat = SMat * Vec
 template <typename T>
 inline const NRVec<T> NRVec<T>::operator*(const NRSMat<T> & S) const
 {
 	return S * (*this);
 }
 // Sum of elements
 inline const double NRVec<double>::sum() const
 {
 	return cblas_dasum(nn, v, 1);
 }
 inline const complex<double>
 NRVec< complex<double> >::sum() const
 {
 	complex<double> sum = CZERO;
 	for (int i=0; i<nn; i++) sum += v[i];
 	return sum;
 }
 // Dot product: x * y
 inline const double NRVec<double>::dot(const double *y, const int stride) const
 {
 	return cblas_ddot(nn, y, stride, v, 1);
 }
 inline const complex<double>
 NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
 {
 	complex<double> dot;
 	cblas_zdotc_sub(nn, y, stride, v, 1, (void *)(&dot));
 	return dot;
 }
 // x[i] returns i-th element
 template <typename T>
 inline T & NRVec<T>::operator[](const int i)
 {
 #ifdef DEBUG
 	if(*count != 1) laerror("possible lval [] with count > 1");
 	if(i < 0 || i >= nn) laerror("NRVec out of range");
 	if(!v) laerror("[] on unallocated NRVec");
 #endif
 	return v[i];
 }
 template <typename T>
 inline const T & NRVec<T>::operator[](const int i) const
 {
 #ifdef DEBUG
 	if(i < 0 || i >= nn) laerror("NRVec out of range");
 	if(!v) laerror("[] on unallocated NRVec");
 #endif
 	return v[i];
 }
 // length of the vector
 template <typename T>
 inline int NRVec<T>::size() const
 {
 	return nn;
 }
 // reference Vec to the first element
 template <typename T>
 inline NRVec<T>::operator T*()
 {
 #ifdef DEBUG
 	if(!v) laerror("unallocated NRVec in operator T*");
 #endif
 	return v;
 }
 template <typename T>
 inline NRVec<T>::operator const T*() const
 {
 #ifdef DEBUG
 	if(!v) laerror("unallocated NRVec in operator T*");
 #endif
 	return v;
 }
 // return norm of the Vec
 inline const double  NRVec<double>::norm() const
 {
 	return cblas_dnrm2(nn, v, 1);
 }
 inline const double NRVec< complex<double> >::norm() const
 {
 	return cblas_dznrm2(nn, (void *)v, 1);
 }
 // Max element of the array
 inline const double  NRVec<double>::amax() const
 {
 	return v[cblas_idamax(nn, v, 1)];
 }
 inline const complex<double> NRVec< complex<double> >::amax() const
 {
 	return v[cblas_izamax(nn, (void *)v, 1)];
 }
 // Make Vec unitvector
 template <typename T>
 inline const NRVec<T> NRVec<T>::unitvector() const
 {
 	return NRVec<T>(*this).normalize();
 }
 // generate operators: Vec + a, a + Vec, Vec * a
 NRVECMAT_OPER(Vec,+)
 NRVECMAT_OPER(Vec,-)
 NRVECMAT_OPER(Vec,*)
 // generate operators: Vec + Vec, Vec - Vec
 NRVECMAT_OPER2(Vec,+)
 NRVECMAT_OPER2(Vec,-)
 // Few forward declarations
 #endif /* _LA_VEC_H_ */