*** empty log message ***

2004-03-17 03:07:21 +00:00 · 2004-03-17 03:07:21 +00:00 · d7b55e9846
commit d7b55e9846
18 changed files with 5955 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,28 @@
+# CVS default ignores begin
+tags
+TAGS
+.make.state
+.nse_depinfo
+*~
+\#*
+.#*
+,*
+_$*
+*$
+*.old
+*.bak
+*.BAK
+*.orig
+*.rej
+.del-*
+*.a
+*.olb
+*.o
+*.obj
+*.so
+*.exe
+*.Z
+*.elc
+*.ln
+core
+# CVS default ignores end
--- a/fourindex.h
+++ b/fourindex.h
@ -0,0 +1,261 @@
+#ifndef _fourindex_included
+#define _fourindex_included
+
+//element of a linked list, indices in a portable way, no bit shifts and endianity problems any more!
+
+
+template<class I, class T>
+struct matel4
+        {
+        T elem;
+        matel4 *next;
+	typedef union {
+		I packed[4];
+		struct {
+			I i;
+			I j;
+			I k;	
+			I l;
+			} indiv;
+		} packedindex;
+	packedindex index;
+        };
+
+typedef enum {nosymmetry=0, twoelectronreal=1, twoelectroncomplex=2, twobodyantisym=3} fourindexsymtype; //if twoelectron, only permutation-nonequivalent elements are stored
+
+template <class I, class T>
+class fourindex {
+protected:
+	I nn;
+	fourindexsymtype symmetry;
+        int *count;
+	matel4<I,T> *list;
+private:
+	void deletelist();
+	void copylist(const matel4<I,T> *l);
+public:
+	//iterator
+	typedef class iterator {
+	private:
+		matel4<I,T> *p;
+	public:
+		iterator() {};
+		~iterator() {};
+		iterator(matel4<I,T> *list): p(list) {};
+		bool operator==(const iterator rhs) const {return p==rhs.p;}
+		bool operator!=(const iterator rhs) const {return p!=rhs.p;}
+		iterator operator++() {return p=p->next;}
+		iterator operator++(int) {matel4<I,T> *q=p; p=p->next; return q;}
+		matel4<I,T> & operator*() const {return *p;}
+		matel4<I,T> * operator->() const {return p;}
+	};
+	iterator begin() const {return list;}
+	iterator end() const {return NULL;}
+
+	//constructors etc.
+	inline fourindex() :nn(0),count(NULL),list(NULL) {};
+	inline fourindex(const I n) :nn(n),count(new int(1)),list(NULL) {};
+	fourindex(const fourindex &rhs); //copy constructor
+	inline int getcount() const {return count?*count:0;}
+	fourindex & operator=(const fourindex &rhs);
+        fourindex & operator+=(const fourindex &rhs);
+	inline void setsymmetry(fourindexsymtype s) {symmetry=s;}
+        fourindex & join(fourindex &rhs); //more efficient +=, rhs will be emptied
+	inline ~fourindex();
+	inline matel4<I,T> *getlist() const {return list;}
+	inline I size() const {return nn;}
+	void resize(const I n);
+	void copyonwrite();
+	int length() const;
+	inline void add(const I i, const I j, const I k, const I l, const T elem) 
+		{matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; list->index.indiv.i=i;list->index.indiv.j=j;list->index.indiv.k=k;list->index.indiv.l=l; list->elem=elem;}
+
+	inline void add(const typename matel4<I,T>::packedindex &index , const T elem) 
+                {matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; list->index=index; list->elem=elem;}
+	
+	inline void add(const I (&index)[4], const T elem)
+                {matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; memcpy(&list->index.packed, &index, sizeof(typename matel4<I,T>::packedindex)); list->elem=elem;}
+
+		
+
+};
+
+
+//destructor
+template <class I,class T>
+fourindex<I,T>::~fourindex()
+{
+        if(!count) return;
+        if(--(*count)<=0)
+                {
+		deletelist();
+                delete count;
+                }
+}
+
+//copy constructor (sort arrays are not going to be copied)
+template <class I, class T>
+fourindex<I,T>::fourindex(const fourindex<I,T> &rhs)
+{
+#ifdef debug
+if(! &rhs) laerror("fourindex copy constructor with NULL argument");
+#endif
+        nn=rhs.nn;
+	if(rhs.list&&!rhs.count) laerror("some inconsistency in fourindex contructors or assignments");
+        list=rhs.list;
+        if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared
+}
+
+
+
+//assignment operator
+template <class I, class T>
+fourindex<I,T> & fourindex<I,T>::operator=(const fourindex<I,T> &rhs)
+{
+        if (this != &rhs)
+                {
+                if(count)
+                    if(--(*count) ==0) {deletelist(); delete count;} // old stuff obsolete
+                list=rhs.list;
+                nn=rhs.nn; 
+                if(list) count=rhs.count; else count= new int(0); //make the matrix defined, but empty and not shared, count will be incremented below
+                if(count) (*count)++;
+                }
+        return *this;
+}
+
+
+template <class I, class T>
+fourindex<I,T> & fourindex<I,T>::operator+=(const fourindex<I,T> &rhs)
+{
+if(nn!=rhs.nn) laerror("incompatible dimensions for +=");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+register matel4<I,T> *l=rhs.list;
+while(l)
+        {
+        add( l->index,l->elem);
+        l=l->next;
+        }
+return *this;
+}
+
+template <class I, class T>
+fourindex<I,T> & fourindex<I,T>::join(fourindex<I,T> &rhs)
+{
+if(nn!=rhs.nn) laerror("incompatible dimensions for join");
+if(*rhs.count!=1) laerror("shared rhs in join()");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+matel4<I,T> **last=&list;
+while(*last) last= &((*last)->next);
+*last=rhs.list;
+rhs.list=NULL;
+return *this;
+}
+
+template <class I, class T>
+void fourindex<I,T>::resize(const I n)
+{
+        if(n<=0 ) laerror("illegal fourindex dimension");
+	if(count)
+                {
+                if(*count > 1) {(*count)--; count=NULL; list=NULL;} //detach from previous
+                else if(*count==1) deletelist();
+                }
+        nn=n;
+        count=new int(1); //empty but defined matrix
+        list=NULL;
+}
+
+
+template <class I, class T>
+void fourindex<I,T>::deletelist()
+{
+if(*count >1) laerror("trying to delete shared list");
+matel4<I,T> *l=list;
+while(l)
+        {
+        matel4<I,T> *ltmp=l;
+        l=l->next;
+        delete ltmp;
+        }
+list=NULL;
+delete count;
+count=NULL;
+}
+
+template <class I, class T>
+void fourindex<I,T>::copylist(const matel4<I,T> *l)
+{
+list=NULL;
+while(l)
+        {
+        add(l->index,l->elem);
+        l=l->next;
+        }
+}
+
+template <class I, class T>
+void fourindex<I,T>::copyonwrite()
+{
+        if(!count) laerror("probably an assignment to undefined fourindex");
+        if(*count > 1)
+                {
+                (*count)--;
+                count = new int; *count=1;
+                if(!list) laerror("empty list with count>1");
+                copylist(list);
+                }
+}
+
+template <class I, class T>
+int fourindex<I,T>::length() const
+{
+int n=0;
+matel4<I,T> *l=list;
+while(l)
+	{
+	++n;
+	l=l->next;
+	}
+return n;
+}
+
+
+template <class I, class T>
+ostream& operator<<(ostream &s, const fourindex<I,T> &x)
+                {
+                int n;
+                n=x.size();
+                s << n << '\n';
+                typename fourindex<I,T>::iterator it=x.begin();
+                while(it!=x.end())
+                        {
+                        s << (int)it->index.indiv.i << ' ' << (int)it->index.indiv.j<<  ' ' <<(int)it->index.indiv.k << ' ' << (int)it->index.indiv.l  << ' ' << it->elem << '\n';
+			++it;
+                        }
+                s << "-1 -1 -1 -1\n";
+                return s;
+                }
+
+template <class I, class T>
+istream& operator>>(istream  &s, fourindex<I,T> &x)
+                {
+                int i,j,k,l;
+		T elem;
+		int n;
+                s >> n ;
+                x.resize(n);
+                s >> i >> j >>k >>l;
+                while(i>=0 && j>=0 &&k>=0 &&l>=0)
+                        {
+			s>>elem;
+			x.add(i,j,k,l,elem);
+			s >> i >> j >>k >>ll;
+                        }
+                return s;
+                }
+
+
+#endif /*_fourindex_included*/
--- a/la.h
+++ b/la.h
@ -0,0 +1,9 @@
+#ifndef _LA_H_
+#define _LA_H_
+
+#include "vec.h"
+#include "smat.h"
+#include "mat.h"
+#include "nonclass.h"
+
+#endif /* _LA_H_ */
--- a/la_traits.h
+++ b/la_traits.h
@ -0,0 +1,40 @@
+////////////////////////////////////////////////////////////////////////////
+//traits classes
+
+#ifndef _LA_TRAITS_INCL
+#define _LA_TRAITS_INCL
+
+//default one, good for numbers
+template<class C> struct NRMat_traits {
+typedef C elementtype;
+typedef C producttype;
+static C norm (const  C &x) {return abs(x);}
+static void axpy (C &s, const C &x, const C &c) {s+=x*c;}
+};
+
+//specializations
+template<> struct NRMat_traits<NRMat<double> > {
+typedef double elementtype;
+typedef NRMat<double> producttype;
+static double norm (const NRMat<double> &x) {return x.norm();}
+static void axpy (NRMat<double>&s, const NRMat<double> &x, const double c) {s.axpy(c,x);}
+};
+
+template<> struct NRMat_traits<NRSMat<double> > {
+typedef double elementtype;
+typedef NRMat<double> producttype;
+static const double norm (const NRSMat<double> &x) {return x.norm(0.);}
+static void axpy (NRSMat<double>&s, const NRSMat<double> &x, const double c) {s.axpy(c,x);}
+};
+
+
+template<> struct NRMat_traits<NRMat<complex<double> > > {
+typedef complex<double> elementtype;
+typedef NRMat<complex<double> > producttype;
+static double norm (const NRMat<complex<double> >  &x) {return x.norm();}
+static void axpy (NRMat<complex<double> >&s, const NRMat<complex<double> > &x, const complex<double> c) {s.axpy(c,x);}
+};
+
+
+
+#endif
--- a/mat.cc
+++ b/mat.cc
@ -0,0 +1,844 @@
+#include "mat.h"
+// TODO :
+//
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corresponding object file
+template NRMat<double>;
+template NRMat< complex<double> >;
+
+
+/*
+ *  Templates first, specializations for BLAS next
+ */
+
+// dtor
+template <typename T>
+NRMat<T>::~NRMat()
+{
+	if (!count) return;
+	if (--(*count) <= 0) {
+		if (v) {
+#ifdef MATPTR
+			delete[] (v[0]);
+#endif
+			delete[] v;
+		}
+		delete count;
+	}
+}
+
+// assign NRMat = NRMat
+template <typename T>
+NRMat<T> & NRMat<T>::operator=(const NRMat<T> &rhs)
+{
+	if (this == &rhs) return *this;
+	if (count) {
+		if (--(*count) ==0 ) {
+#ifdef MATPTR
+			delete[] (v[0]);
+#endif
+			delete[] v;
+			delete count;
+		}
+		v = rhs.v;
+		nn = rhs.nn;
+		mm = rhs.mm;
+		count = rhs.count;
+		if (count) (*count)--;
+	}
+	return *this;
+}
+
+// Assign diagonal
+template <typename T>
+NRMat<T> & NRMat<T>::operator=(const T &a)
+{
+	copyonwrite();
+#ifdef DEBUG
+	if (nn != mm) laerror("RMat.operator=scalar on non-square matrix");
+#endif
+#ifdef MATPTR
+	 for (int i=0; i< nn; i++) v[i][i] = a;
+#else
+	 for (int i=0; i< nn*nn; i+=nn+1) v[i] = a;
+#endif
+	 return *this;
+}
+
+// Explicit deep copy of NRmat
+template <typename T>
+NRMat<T> & NRMat<T>::operator|=(const NRMat<T> &rhs)
+{
+	if (this == &rhs) return *this;
+#ifdef DEBUG
+	if (!rhs.v) laerror("unallocated rhs in Mat operator |=");
+#endif
+	if (count)
+		if (*count > 1) {
+			--(*count);
+			nn = 0;
+			mm = 0;
+			count = 0;
+			v = 0;
+		}
+	if (nn != rhs.nn || mm != rhs.mm) {
+		if (v) {
+#ifdef MATPTR
+			delete[] (v[0]);
+#endif
+			delete[] (v);
+			v = 0;
+		}
+		nn = rhs.nn;
+		mm = rhs.mm;
+	}
+	if (!v) {
+#ifdef MATPTR
+		v = new T*[nn];
+		v[0] = new T[mm*nn];
+#else
+		v = new T[mm*nn];
+#endif
+	}
+
+#ifdef MATPTR
+	for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
+	memcpy(v[0], rhs.v[0], nn*mm*sizeof(T));
+#else
+	memcpy(v, rhs.v, nn*mm*sizeof(T));
+#endif
+
+	if (!count) count = new int;
+	*count = 1;
+
+	return *this;
+}
+
+// M += a
+template <typename T>
+NRMat<T> & NRMat<T>::operator+=(const T &a)
+{
+	copyonwrite();
+#ifdef DEBUG
+	if (nn != mm) laerror("Mat.operator+=scalar on non-square matrix");
+#endif
+#ifdef MATPTR
+	for (int i=0; i< nn; i++) v[i][i] += a;
+#else
+	for (int i=0; i< nn*nn; i+=nn+1) v[i] += a;
+#endif
+	return *this;
+}
+
+// M -= a
+template <typename T>
+NRMat<T> & NRMat<T>::operator-=(const T &a)
+{
+	copyonwrite();
+#ifdef DEBUG
+	if (nn != mm) laerror("Mat.operator-=scalar on non-square matrix");
+#endif
+#ifdef MATPTR
+	for (int i=0; i< nn; i++) v[i][i] -= a;
+#else
+	for (int i=0; i< nn*nn; i+=nn+1) v[i] -= a;
+#endif
+	return *this;
+}
+
+// unary minus
+template <typename T>
+const NRMat<T> NRMat<T>::operator-() const
+{
+	NRMat<T> result(nn, mm);
+#ifdef MATPTR
+	for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
+#else
+	for (int i=0; i<nn*mm; i++) result.v[i]= -v[i];
+#endif
+	return result;
+}
+
+// direct sum
+template <typename T>
+const NRMat<T> NRMat<T>::operator&(const NRMat<T> & b) const
+{
+	NRMat<T> result((T)0, nn+b.nn, mm+b.mm);
+	for (int i=0; i<nn; i++) memcpy(result[i], (*this)[i], sizeof(T)*mm);
+	for (int i=0; i<b.nn; i++) memcpy(result[nn+i]+nn, b[i], sizeof(T)*b.mm);
+	return result;
+}
+
+// direct product 
+template <typename T>
+const NRMat<T> NRMat<T>::operator|(const NRMat<T> &b) const
+{
+	NRMat<T> result(nn*b.nn, mm*b.mm);
+	for (int i=0; i<nn; i++)
+		for (int j=0; j<mm; j++)
+			for (int k=0; k<b.nn; k++)
+				for (int l=0; l<b.mm; l++)
+					result[i*b.nn+k][j*b.mm+l] = (*this)[i][j]*b[k][l];
+	return result;
+}
+
+// sum of columns
+template <typename T>
+const NRVec<T> NRMat<T>::csum() const
+{
+	NRVec<T> result(nn);
+	T sum;
+	
+	for (int i=0; i<nn; i++) { 
+		sum = (T)0;
+		for(int j=0; j<mm; j++) sum += (*this)[i][j];
+		result[i] = sum;
+	}
+	return result;
+}
+
+// sum of rows
+template <typename T>
+const NRVec<T> NRMat<T>::rsum() const
+{
+	NRVec<T> result(nn);
+	T sum;
+	
+	for (int i=0; i<mm; i++) { 
+		sum = (T)0;
+		for(int j=0; j<nn; j++) sum += (*this)[j][i];
+		result[i] = sum;
+	}
+	return result;
+}
+
+// make detach Mat and make it's own deep copy
+template <typename T>
+void NRMat<T>::copyonwrite()
+{
+#ifdef DEBUG
+	if (!count) laerror("Mat::copyonwrite of undefined matrix");
+#endif
+	if (*count > 1) {
+		(*count)--;
+		count = new int;
+		*count = 1;
+#ifdef MATPTR
+		T **newv = new T*[nn];
+		newv[0] = new T[mm*nn];
+		memcpy(newv[0], v[0], mm*nn*sizeof(T));
+		v = newv;
+		for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
+#else
+		T *newv = new T[mm*nn];
+		memcpy(newv, v, mm*nn*sizeof(T));
+		v = newv;
+#endif
+	}
+}
+
+template <typename T>
+void NRMat<T>::resize(const int n, const int m)
+{
+#ifdef DEBUG
+	if (n<=0 || m<=0) laerror("illegal dimensions in Mat::resize()");
+#endif
+	if (count)
+		if (*count > 1) {
+			(*count)--;
+			count = 0;
+			v  = 0;
+			nn = 0;
+			mm = 0;
+		}
+	if (!count) {
+		count = new int;
+		*count = 1;
+		nn = n;
+		mm = m;
+#ifdef MATPTR
+		v = new T*[nn];
+		v[0] = new T[m*n];
+		for (int i=1; i< n; i++) v[i] = v[i-1] + m;
+#else
+		v = new T[m*n];
+#endif
+		return;
+	}
+	// At this point *count = 1, check if resize is necessary
+	if (n!=nn || m!=mm) {
+		nn = n;
+		mm = m;
+#ifdef MATPTR
+		delete[] (v[0]);
+#endif
+		delete[] v;
+#ifdef MATPTR
+		v = new T*[nn];
+		v[0] = new T[m*n];
+		for (int i=1; i< n; i++) v[i] = v[i-1] + m;
+#else
+		v = new T[m*n];
+#endif
+	}
+}
+
+// transpose Mat
+template <typename T>
+NRMat<T> & NRMat<T>::transposeme()
+{
+#ifdef DEBUG
+	if (nn != mm) laerror("transpose of non-square Mat");
+#endif
+	copyonwrite();
+	for(int i=1; i<nn; i++)
+		for(int j=0; j<i; j++) {
+#ifdef MATPTR
+			T tmp = v[i][j]; 
+			v[i][j] = v[j][i]; 
+			v[j][i] = tmp;
+#else
+			register int a; 
+			register int b;
+			a = i*mm+j;
+			b = j*mm+i;
+			T tmp = v[a];
+			v[a] = v[b];
+			v[b] = tmp;
+#endif
+		}
+	return *this;
+}
+
+// Output of Mat
+template <typename T>
+void NRMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
+{
+	lawritemat(file, (const T*)(*this), nn, mm, format, 2, modulo, 0);
+}
+
+// Input of Mat
+template <typename T>
+void NRMat<T>::fscanf(FILE *f, const char *format)
+{
+	int n, m;
+	if (std::fscanf(f, "%d %d", &n, &m) != 2)
+		laerror("cannot read matrix dimensions in Mat::fscanf()");
+	resize(n,m);
+	T *p = *this;
+	for(int i=0; i<n; i++)
+		for(int j=0; j<n; j++)
+			if(std::fscanf(f,format,p++) != 1)
+				laerror("cannot read matrix element in Mat::fscanf()");
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/*
+ * BLAS specializations for double and complex<double>
+ */
+
+// Mat *= a
+NRMat<double> & NRMat<double>::operator*=(const double &a)
+{
+	copyonwrite();
+	cblas_dscal(nn*mm, a, *this, 1);
+	return *this;
+}
+NRMat< complex<double> > &
+NRMat< complex<double> >::operator*=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zscal(nn*mm, &a, (void *)(*this)[0], 1);
+	return *this;
+}
+
+// Mat += Mat
+NRMat<double> & NRMat<double>::operator+=(const NRMat<double>  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat += Mat of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn*mm, 1.0, rhs, 1, *this, 1);
+	return *this;
+}
+NRMat< complex<double> > &
+NRMat< complex<double> >::operator+=(const NRMat< complex<double> >  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat += Mat of incompatible matrices");
+#endif
+	 copyonwrite();
+	 cblas_zaxpy(nn*mm, &CONE, (void *)rhs[0], 1, (void *)(*this)[0], 1);
+	 return *this;
+}
+
+// Mat -= Mat
+NRMat<double> & NRMat<double>::operator-=(const NRMat<double>  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat -= Mat of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn*mm, -1.0, rhs, 1, *this, 1);
+	return *this;
+}
+NRMat< complex<double> > &
+NRMat< complex<double> >::operator-=(const NRMat< complex<double> >  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat -= Mat of incompatible matrices");
+#endif
+	 copyonwrite();
+	 cblas_zaxpy(nn*mm, &CMONE, (void *)rhs[0], 1, (void *)(*this)[0], 1);
+	 return *this;
+}
+
+// Mat += SMat
+NRMat<double> & NRMat<double>::operator+=(const NRSMat<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat");
+#endif
+	const double *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_daxpy(i+1, 1.0, p, 1, (*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_daxpy(i, 1.0, p, 1, (*this)[0]+i, nn);
+		p += i+1;
+	}
+	return *this;
+}
+NRMat< complex<double> > & 
+NRMat< complex<double> >::operator+=(const NRSMat< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat");
+#endif
+	const complex<double> *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_zaxpy(i+1, (void *)&CONE, (void *)p, 1, (void *)(*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_zaxpy(i, (void *)&CONE, (void *)p, 1, (void *)((*this)[i]+i), nn);
+		p += i+1;
+	}
+	return *this;
+}
+
+// Mat -= SMat
+NRMat<double> & NRMat<double>::operator-=(const NRSMat<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat");
+#endif
+	const double *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_daxpy(i+1, -1.0, p, 1, (*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_daxpy(i, -1.0, p, 1, (*this)[0]+i, nn);
+		p += i+1;
+	}
+	return *this;
+}
+NRMat< complex<double> > & 
+NRMat< complex<double> >::operator-=(const NRSMat< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat");
+#endif
+	const complex<double> *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_zaxpy(i+1, (void *)&CMONE, (void *)p, 1, (void *)(*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_zaxpy(i, (void *)&CMONE, (void *)p, 1, (void *)((*this)[i]+i), nn);
+		p += i+1;
+	}
+	return *this;
+}
+
+// Mat.Mat - scalar product
+const double NRMat<double>::dot(const NRMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices");
+#endif
+	return cblas_ddot(nn*mm, (*this)[0], 1, rhs[0], 1);
+}
+const complex<double>
+NRMat< complex<double> >::dot(const NRMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices");
+#endif
+	complex<double> dot;
+	cblas_zdotc_sub(nn*mm, (void *)(*this)[0], 1, (void *)rhs[0], 1, 
+			(void *)(&dot));
+	return dot;
+}
+
+// Mat * Mat
+const NRMat<double> NRMat<double>::operator*(const NRMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nn) laerror("product of incompatible matrices");
+#endif
+	NRMat<double> result(nn, rhs.mm);
+	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm, 1.0,
+			*this, mm, rhs, rhs.mm, 0.0, result, rhs.mm);
+	return result;
+}
+const NRMat< complex<double> > 
+NRMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nn) laerror("product of incompatible matrices");
+#endif
+	NRMat< complex<double> > result(nn, rhs.mm);
+	cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm,
+			(const void *)(&CONE),(const void *)(*this)[0], mm, (const void *)rhs[0],
+			rhs.mm, (const void *)(&CZERO), (void *)result[0], rhs.mm);
+	return result;
+}
+
+// Multiply by diagonal from L
+void NRMat<double>::diagmultl(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
+#endif
+	copyonwrite();
+	for(int i=0; i<nn; i++) cblas_dscal(mm, rhs[i], (*this)[i], 1);
+}
+void NRMat< complex<double> >::diagmultl(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
+#endif
+	copyonwrite();
+	for (int i=0; i<nn; i++) cblas_zscal(mm, &rhs[i], (*this)[i], 1);
+}
+
+// Multiply by diagonal from R
+void NRMat<double>::diagmultr(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultr");
+#endif
+	copyonwrite();
+	for (int i=0; i<mm; i++) cblas_dscal(nn, rhs[i], (*this)[i], mm);
+}
+void NRMat< complex<double> >::diagmultr(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
+#endif
+	copyonwrite();
+	for (int i=0; i<mm; i++) cblas_zscal(nn, &rhs[i], (*this)[i], mm);
+}
+
+// Mat * Smat, decomposed to nn x Vec * Smat
+const NRMat<double> 
+NRMat<double>::operator*(const NRSMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat");
+#endif
+	NRMat<double> result(nn, rhs.ncols());
+	for (int i=0; i<nn; i++)
+		cblas_dspmv(CblasRowMajor, CblasLower, mm, 1.0, &rhs[0], 
+				(*this)[i], 1, 0.0, result[i], 1);
+	return result;
+}
+const NRMat< complex<double> >
+NRMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat");
+#endif
+	NRMat< complex<double> > result(nn, rhs.ncols());
+	for (int i=0; i<nn; i++)
+		cblas_zhpmv(CblasRowMajor, CblasLower, mm, (void *)&CONE, (void *)&rhs[0],
+				(void *)(*this)[i], 1, (void *)&CZERO, (void *)result[i], 1);
+	return result;
+}
+
+// Mat * Vec
+const NRVec<double> 
+NRMat<double>::operator*(const NRVec<double> &vec) const
+{
+#ifdef DEBUG
+	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
+#endif
+	NRVec<double> result(nn);
+	cblas_dgemv(CblasRowMajor, CblasNoTrans, nn, mm, 1.0, (*this)[0],
+			mm, &vec[0], 1, 0.0, &result[0], 1);
+	return result;
+}
+const NRVec< complex<double> >
+NRMat< complex<double> >::operator*(const NRVec< complex<double> > &vec) const
+{
+#ifdef DEBUG
+	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
+#endif
+	NRVec< complex<double> > result(nn);
+	cblas_zgemv(CblasRowMajor, CblasNoTrans, nn, mm, (void *)&CONE, (void *)(*this)[0],
+			mm, (void *)&vec[0], 1, (void *)&CZERO, (void *)&result[0], 1);
+	return result;
+}
+
+// sum of rows
+const NRVec<double> NRMat<double>::rsum() const
+{
+	NRVec<double> result(mm);
+	for (int i=0; i<mm; i++) result[i] = cblas_dasum(nn,(*this)[0]+i,mm);
+	return result;
+}
+
+// sum of columns
+const NRVec<double> NRMat<double>::csum() const
+{
+	NRVec<double> result(nn);
+	for (int i=0; i<nn; i++) result[i] = cblas_dasum(mm, (*this)[i], 1);
+	return result;
+}
+
+// complex conjugate of Mat
+NRMat<double> &NRMat<double>::conjugateme() {return *this;}
+
+NRMat< complex<double> > & NRMat< complex<double> >::conjugateme()
+{
+	copyonwrite();
+	cblas_dscal(mm*nn, -1.0, (double *)((*this)[0])+1, 2);
+	return *this;
+}
+
+// transpose and optionally conjugate
+const NRMat<double> NRMat<double>::transpose(bool conj) const
+{
+	NRMat<double> result(mm,nn);
+	for(int i=0; i<nn; i++) cblas_dcopy(mm, (*this)[i], 1, result[0]+i, nn);
+	return result;
+}
+const NRMat< complex<double> >
+NRMat< complex<double> >::transpose(bool conj) const
+{
+	NRMat< complex<double> > result(mm,nn);
+	for (int i=0; i<nn; i++) 
+		cblas_zcopy(mm, (void *)(*this)[i], 1, (void *)(result[0]+i), nn);
+	if (conj) cblas_dscal(mm*nn, -1.0, (double *)(result[0])+1, 2);
+	return result;
+}
+
+// gemm : this = alpha*op( A )*op( B ) + beta*this
+void NRMat<double>::gemm(const double &beta, const NRMat<double> &a,
+		const char transa, const NRMat<double> &b, const char transb, 
+		const double &alpha)
+{
+	int l(transa=='n'?a.nn:a.mm);
+	int k(transa=='n'?a.mm:a.nn);
+	int kk(transb=='n'?b.nn:b.mm);
+	int ll(transb=='n'?b.mm:b.nn);
+
+#ifdef DEBUG
+	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
+#endif
+	if (alpha==0.0 && beta==1.0) return;
+
+	copyonwrite();
+	cblas_dgemm(CblasRowMajor, (transa=='n' ? CblasNoTrans : CblasTrans),
+			(transb=='n' ? CblasNoTrans : CblasTrans), nn, mm, k, alpha, a,
+			a.mm, b , b.mm, beta, *this , mm);
+}
+void NRMat< complex<double> >::gemm(const complex<double> & beta,
+		const NRMat< complex<double> > & a, const char transa, 
+		const NRMat< complex<double> > & b, const char transb, 
+		const complex<double> & alpha)
+{
+	int l(transa=='n'?a.nn:a.mm);
+	int k(transa=='n'?a.mm:a.nn);
+	int kk(transb=='n'?b.nn:b.mm);
+	int ll(transb=='n'?b.mm:b.nn);
+
+#ifdef DEBUG
+	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
+#endif
+	if (alpha==CZERO && beta==CONE) return;
+
+	copyonwrite();
+	cblas_zgemm(CblasRowMajor,
+			(transa=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)), 
+			(transb=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)),
+			nn, mm, k, &alpha, a , a.mm, b , b.mm, &beta, *this , mm);
+}
+
+// norm of Mat
+const double  NRMat<double>::norm(const double scalar) const
+{
+	if (!scalar) return cblas_dnrm2(nn*mm, (*this)[0], 1);
+	double sum = 0;
+	for (int i=0; i<nn; i++)
+		for (int j=0; j<mm; j++) {
+			register double tmp;
+#ifdef MATPTR
+			tmp = v[i][j];
+#else
+			tmp = v[i*mm+j];
+#endif
+			if (i==j) tmp -= scalar;
+			sum += tmp*tmp;
+		}
+	return sqrt(sum);
+}
+const double  NRMat< complex<double> >::norm(const complex<double> scalar) const
+{
+	if (scalar == CZERO) return cblas_dznrm2(nn*mm, (*this)[0], 1);
+	double sum = 0;
+	for (int i=0; i<nn; i++)
+		for (int j=0; j<mm; j++) {
+			register complex<double> tmp;
+#ifdef MATPTR
+			tmp = v[i][j];
+#else
+			tmp = v[i*mm+j];
+#endif
+			if (i==j) tmp -= scalar;
+			sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
+		}
+	return sqrt(sum);
+}
+
+// axpy: this = a * Mat
+void NRMat<double>::axpy(const double alpha, const NRMat<double> &mat)
+{
+#ifdef DEBUG
+	if (nn!=mat.nn || mm!=mat.mm) laerror("daxpy of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn*mm, alpha, mat, 1, *this, 1);
+}
+void NRMat< complex<double> >::axpy(const complex<double> alpha, 
+		const NRMat< complex<double> > & mat)
+{
+#ifdef DEBUG
+	if (nn!=mat.nn || mm!=mat.mm) laerror("zaxpy of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn*mm, (void *)&alpha, mat, 1, (void *)(*this)[0], 1);
+}
+
+// trace of Mat
+const double NRMat<double>::trace() const
+{
+#ifdef DEBUG
+	if (nn != mm) laerror("no-square matrix in Mat::trace()");
+#endif
+	return cblas_dasum(nn, (*this)[0], nn+1);
+}
+const complex<double> NRMat< complex<double> >::trace() const
+{
+#ifdef DEBUG
+	if (nn != mm) laerror("no-square matrix in Mat::trace()");
+#endif
+	register complex<double> sum = CZERO;
+	for (int i=0; i<nn*nn; i+=(nn+1))
+#ifdef MATPTR
+		sum += v[0][i];
+#else
+		sum += v[i];
+#endif
+	return sum;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corespoding object file
+#define INSTANTIZE(T) \
+template ostream & operator<<(ostream &s, const NRMat< T > &x); \
+template istream & operator>>(istream  &s, NRMat< T > &x); \
+
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+
+
+export template <class T>
+ostream& operator<<(ostream &s, const NRMat<T> &x)
+                {
+                int i,j,n,m;
+                n=x.nrows();
+                m=x.ncols();
+                s << n << ' ' << m << '\n';
+                for(i=0;i<n;i++)
+                        {
+                        for(j=0; j<m;j++) s << x[i][j] << (j==m-1 ? '\n' : ' '); // endl cannot be used in the conditional expression, since it is an overloaded function
+                        }
+                return s;
+                }
+
+export template <class T>
+istream& operator>>(istream  &s, NRMat<T> &x)
+                {
+                int i,j,n,m;
+                s >> n >> m;
+                x.resize(n,m);
+                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x[i][j] ;
+                return s;
+                }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/mat.h
+++ b/mat.h
@ -0,0 +1,346 @@
+#ifndef _LA_MAT_H_
+#define _LA_MAT_H_
+
+#include "vec.h"
+#include "smat.h"
+
+template <typename T>
+class NRMat {
+protected:
+	int nn;
+	int mm;
+#ifdef MATPTR
+	T **v;
+#else
+	T *v;
+#endif
+	int *count;
+public:
+	friend class NRVec<T>;
+	friend class NRSMat<T>;
+	
+	inline NRMat() : nn(0), mm(0), v(0), count(0) {};
+	inline NRMat(const int n, const int m);
+	inline NRMat(const T &a, const int n, const int m);
+	NRMat(const T *a, const int n, const int m);
+	inline NRMat(const NRMat &rhs);
+	explicit NRMat(const NRSMat<T> &rhs);
+#ifndef MATPTR
+	NRMat(const NRVec<T> &rhs, const int n, const int m);
+#endif
+	~NRMat();
+	inline int getcount() const {return count?*count:0;}
+	NRMat & operator=(const NRMat &rhs);  //assignment
+	NRMat & operator=(const T &a);    //assign a to diagonal
+	NRMat & operator|=(const NRMat &rhs); //assignment to a new copy
+	NRMat & operator+=(const T &a);   //add diagonal
+	NRMat & operator-=(const T &a);   //substract diagonal
+	NRMat & operator*=(const T &a);   //multiply by a scalar
+	NRMat & operator+=(const NRMat &rhs);
+	NRMat & operator-=(const NRMat &rhs);
+	NRMat & operator+=(const NRSMat<T> &rhs);
+	NRMat & operator-=(const NRSMat<T> &rhs);
+	const NRMat operator-() const; //unary minus
+	inline const NRMat operator+(const T &a) const;
+	inline const NRMat operator-(const T &a) const;
+	inline const NRMat operator*(const T &a) const;
+	inline const NRMat operator+(const NRMat &rhs) const;
+	inline const NRMat operator-(const NRMat &rhs) const;
+	inline const NRMat operator+(const NRSMat<T> &rhs) const;
+	inline const NRMat operator-(const NRSMat<T> &rhs) const;
+	const T dot(const NRMat &rhs) const; // scalar product of Mat.Mat
+	const NRMat operator*(const NRMat &rhs) const; // Mat * Mat
+	void diagmultl(const NRVec<T> &rhs); //multiply by a diagonal matrix from L
+	void diagmultr(const NRVec<T> &rhs); //multiply by a diagonal matrix from R
+	const NRMat operator*(const NRSMat<T> &rhs) const; // Mat * Smat
+	const NRMat operator&(const NRMat &rhs) const; // direct sum
+	const NRMat operator|(const NRMat<T> &rhs) const; // direct product
+	const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
+	const NRVec<T> rsum() const; //sum of rows
+	const NRVec<T> csum() const; //sum of columns
+	inline T* operator[](const int i);  //subscripting: pointer to row i
+	inline const T* operator[](const int i) const;
+	inline T& operator()(const int i, const int j); // (i,j) subscripts
+	inline const T& operator()(const int i, const int j) const;
+	inline int nrows() const;
+	inline int ncols() const;
+	void copyonwrite();
+	void resize(const int n, const int m);
+	inline operator T*(); //get a pointer to the data
+	inline operator const T*() const;
+	NRMat & transposeme(); // square matrices only 
+	NRMat & conjugateme(); // square matrices only
+	const NRMat transpose(bool conj=false) const;
+	const NRMat conjugate() const;
+	void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b,
+			const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this
+/*
+	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b,
+			const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
+	void s_cutoff(const int,const int,const int,const int) const;
+*/
+	void fprintf(FILE *f, const char *format, const int modulo) const;
+	void fscanf(FILE *f, const char *format);
+	const double norm(const T scalar=(T)0) const;
+	void axpy(const T alpha, const NRMat &x); // this += a*x
+	inline const T amax() const;
+	const T trace() const;
+
+//members concerning sparse matrix
+	explicit NRMat(const SparseMat<T> &rhs);                // dense from sparse
+	NRMat & operator+=(const SparseMat<T> &rhs);
+        NRMat & operator-=(const SparseMat<T> &rhs);
+        inline void simplify() {}; //just for compatibility with sparse ones
+
+//Strassen's multiplication (better than n^3, analogous syntax to gemm)
+	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
+	void s_cutoff(const int,const int,const int,const int) const;
+
+};
+
+// ctors
+template <typename T>
+NRMat<T>::NRMat(const int n, const int m) : nn(n), mm(m), count(new int)
+{
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[n];
+	v[0] = new T[m*n];
+	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
+#else
+	v = new T[m*n];
+#endif
+}
+
+template <typename T>
+NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int)
+{
+	int i;
+	T *p;
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[n];
+	p = v[0] = new T[m*n];
+	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
+#else
+	p = v = new T[m*n];
+#endif
+	if (a != (T)0)
+		for (i=0; i< n*m; i++) *p++ = a;
+	else
+		memset(p, 0, n*m*sizeof(T));
+}
+
+template <typename T>
+NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int)
+{
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[n];
+	v[0] = new T[m*n];
+	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
+	memcpy(v[0], a, n*m*sizeof(T));
+#else
+	v = new T[m*n];
+	memcpy(v, a, n*m*sizeof(T));
+#endif
+}
+
+template <typename T>
+NRMat<T>::NRMat(const NRMat &rhs)
+{
+	nn = rhs.nn;
+	mm = rhs.mm;
+	count = rhs.count;
+	v = rhs.v;
+	if (count) ++(*count);
+}
+
+template <typename T>
+NRMat<T>::NRMat(const NRSMat<T> &rhs)
+{
+	int i;
+	nn = mm = rhs.nrows();
+	count = new int;
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[nn];
+	v[0] = new T[mm*nn];
+	for (int i=1; i<nn; i++) v[i] = v[i-1] + mm;
+#else
+	v = new T[mm*nn];
+#endif
+
+	int j, k = 0;
+#ifdef MATPTR
+	for (i=0; i<nn; i++)
+		for (j=0; j<=i; j++) v[i][j] = v[j][i] = rhs[k++];
+#else
+	for (i=0; i<nn; i++)
+		for (j=0; j<=i; j++) v[i*nn+j] = v[j*nn+i] = rhs[k++];
+#endif
+}
+ 
+#ifndef MATPTR
+template <typename T>
+NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m)
+{
+#ifdef DEBUG
+	if (n*m != rhs.nn) laerror("matrix dimensions incompatible with vector length");
+#endif
+	nn = n;
+	mm = m;
+	count = rhs.count;
+	v = rhs.v;
+	(*count)++;
+}
+#endif
+
+// Mat + Smat
+template <typename T>
+inline const NRMat<T> NRMat<T>::operator+(const NRSMat<T> &rhs) const
+{
+	return NRMat<T>(*this) += rhs;
+}
+
+// Mat - Smat
+template <typename T>
+inline const NRMat<T> NRMat<T>::operator-(const NRSMat<T> &rhs) const
+{
+	return NRMat<T>(*this) -= rhs;
+}
+
+// Mat[i] : pointer to the first element of i-th row
+template <typename T>
+inline T* NRMat<T>::operator[](const int i)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("Mat lval use of [] with count > 1");
+	if (i<0 || i>=nn) laerror("Mat [] out of range");
+	if (!v) laerror("[] for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i];
+#else
+	return v+i*mm;
+#endif
+}
+template <typename T>
+inline const T* NRMat<T>::operator[](const int i) const
+{
+#ifdef DEBUG
+	if (i<0 || i>=nn) laerror("Mat [] out of range");
+	if (!v) laerror("[] for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i];
+#else
+	return v+i*mm;
+#endif
+}
+
+// Mat(i,j) reference to the matrix element M_{ij}
+template <typename T>
+inline T & NRMat<T>::operator()(const int i, const int j)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("Mat lval use of (,) with count > 1");
+	if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range");
+	if (!v) laerror("(,) for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i][j];
+#else
+	return v[i*mm+j];
+#endif
+}
+template <typename T>
+inline const T & NRMat<T>::operator()(const int i, const int j) const
+{
+#ifdef DEBUG
+	if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range");
+	if (!v) laerror("(,) for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i][j];
+#else
+	return v[i*mm+j];
+#endif
+}
+
+// number of rows
+template <typename T>
+inline int NRMat<T>::nrows() const
+{
+	return nn;
+}
+
+// number of columns
+template <typename T>
+inline int NRMat<T>::ncols() const
+{
+	return mm;
+}
+
+// reference pointer to Mat
+template <typename T>
+inline NRMat<T>::operator T* ()
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated Mat in operator T*");
+#endif
+#ifdef MATPTR
+	return v[0];
+#else
+	return v;
+#endif
+}
+template <typename T>
+inline NRMat<T>::operator const T* () const
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated Mat in operator T*");
+#endif
+#ifdef MATPTR
+	return v[0];
+#else
+	return v;
+#endif
+}
+
+// max element of Mat
+inline const double  NRMat<double>::amax() const
+{
+#ifdef MATPTR
+	return v[0][cblas_idamax(nn*mm, v[0], 1)];
+#else
+	return v[cblas_idamax(nn*mm, v, 1)];
+#endif
+}
+inline const complex<double>  NRMat< complex<double> >::amax() const
+{
+#ifdef MATPTR
+	return v[0][cblas_izamax(nn*mm, (void *)v[0], 1)];
+#else
+	return v[cblas_izamax(nn*mm, (void *)v, 1)];
+#endif
+}
+
+
+// I/O
+template <typename T> extern ostream& operator<<(ostream &s, const NRMat<T> &x);
+template <typename T> extern istream& operator>>(istream  &s, NRMat<T> &x);
+
+
+
+
+
+// generate operators: Mat + a, a + Mat, Mat * a
+NRVECMAT_OPER(Mat,+)
+NRVECMAT_OPER(Mat,-)
+NRVECMAT_OPER(Mat,*)
+// generate Mat + Mat, Mat - Mat
+NRVECMAT_OPER2(Mat,+)
+NRVECMAT_OPER2(Mat,-)
+
+#endif /* _LA_MAT_H_ */
--- a/matexp.h
+++ b/matexp.h
@ -0,0 +1,259 @@
+//general routine for polynomial of a matrix, tuned to minimize the number
+//of matrix-matrix multiplications on cost of additions and memory
+// the polynom and exp routines will work on any type, for which traits class
+// is defined containing definition of an element type, norm and axpy operation
+
+#include "la_traits.h"
+#include "sparsemat_traits.h"
+
+template<class T,class R>
+const T polynom2(const T &x, const NRVec<R> &c)
+{
+int order=c.size()-1;
+T z,y;
+
+//trivial reference implementation by horner scheme
+if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one
+else
+	{
+	int i;
+	z=x*c[order];
+	for(i=order-1; i>=0; i--)
+		{
+		if(i<order-1) z=y*x;
+		y=z+c[i];
+		}
+	}
+
+return y;
+}
+
+
+template<class T,class R>
+const T polynom(const T &x, const NRVec<R> &c)
+{
+int n=c.size()-1;
+int i,j,k,m=0,t;
+
+if(n<=4) return polynom2(x,c); //here the horner scheme is optimal
+
+//first find m which minimizes the number of multiplications
+j=10*n;
+for(i=2;i<=n+1;i++)
+    {	
+    t=i-2+2*(n/i)-(n%i)?0:1;
+    if(t<j)
+	{
+	j=t;
+	m=i;
+	}
+    }
+
+//allocate array for powers up to m
+T *xpows = new T[m];
+xpows[0]=x;
+for(i=1;i<m;i++) xpows[i]=xpows[i-1]*x;
+
+
+//run the summation loop
+T r,s,f;
+k= -1;
+for(i=0; i<=n/m;i++)
+	{
+	for(j=0;j<m;j++)
+		{
+		k++;
+		if(k>n) break;
+		if(j==0) {if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/}
+		else  
+			NRMat_traits<T>::axpy(s,xpows[j-1],c[k]); //general  s+=xpows[j-1]*c[k]; but more efficient for matrices
+		}
+
+	if(i==0) {r=s; f=xpows[m-1];}
+	else
+		{
+		r+= s*f;
+		f=f*xpows[m-1];
+		}
+	}
+ 
+delete[] xpows;
+return r;
+}
+
+
+//for general objects
+template<class T>
+const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1)
+{
+T z;
+if(right) {z=x; while(--nest>=0) z=z*y-y*z;}
+else {z=y; while(--nest>=0) z=x*z-z*x;}
+return z;
+}
+
+template<class T>
+const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1)
+{
+T z;
+if(right) {z=x; while(--nest>=0) z=z*y+y*z;}
+else {z=y; while(--nest>=0) z=x*z+z*x;}
+return z;
+}
+
+//general BCH expansion (can be written more efficiently in a specialization for matrices)
+template<class T>
+const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=1)\
+{
+T result=h;
+double factor=1.;
+T z=h;
+for(int i=1; i<=n; ++i)
+	{
+	factor/=i;
+	z= z*t-t*z;
+	if(verbose) cerr << "BCH contribution at order "<<i<<" : "<<z.norm()<<endl;
+	result+= z*factor; 
+	}
+return result;
+}
+
+
+template<class T>
+const T ipow( const T &x, int i)
+{
+if(i<0) laerror("negative exponent in ipow");
+if(i==0) {T r=x; r=1.; return r;}//trick for matrix dimension
+if(i==1) return x;
+T y,z;
+z=x;
+while(!(i&1))
+	{
+	z = z*z;
+	i >>= 1;
+	}
+y=z; 
+while((i >>= 1)/*!=0*/)
+                {
+                z = z*z;
+                if(i&1) y = y*z;
+                }
+return y;
+}
+
+inline int nextpow2(const double n)
+{
+const double log2=log(2.);
+if(n<=.75) return 0; //try to keep the taylor expansion short
+if(n<=1.) return 1;
+return int(ceil(log(n)/log2-log(.75)));
+}
+
+
+template<class T>
+NRVec<typename NRMat_traits<T>::elementtype> exp_aux(const T &x, int &power)
+{
+//should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc
+static double exptaylor[]={
+1.,
+1.,
+0.5,
+0.1666666666666666666666,
+0.0416666666666666666666,
+0.0083333333333333333333,
+0.0013888888888888888888,
+0.00019841269841269841253,
+2.4801587301587301566e-05,
+2.7557319223985892511e-06,
+2.7557319223985888276e-07,
+2.5052108385441720224e-08,
+2.0876756987868100187e-09,
+1.6059043836821613341e-10,
+1.1470745597729724507e-11,
+7.6471637318198164055e-13,
+4.7794773323873852534e-14,
+2.8114572543455205981e-15,
+1.5619206968586225271e-16,
+8.2206352466243294955e-18,
+4.1103176233121648441e-19,
+0.};
+double mnorm= NRMat_traits<T>::norm(x);
+power=nextpow2(mnorm);
+double scale=exp(-log(2.)*power);
+
+
+//find how long taylor expansion will be necessary
+const double precision=1e-16;
+double s,t;
+s=mnorm*scale;
+int n=0;
+t=1.;
+do	{
+	n++;
+	t*=s;
+	}
+while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case
+
+
+int i; //adjust the coefficients in order to avoid scaling the argument
+NRVec<typename NRMat_traits<T>::elementtype> taylor2(n+1);
+for(i=0,t=1.;i<=n;i++)
+	{
+	taylor2[i]=exptaylor[i]*t;
+	t*=scale;
+	}
+return taylor2;
+}
+
+
+
+template<class T>
+const T exp(const T &x)
+{
+int power;
+
+//prepare the polynom of and effectively scale T
+NRVec<typename NRMat_traits<T>::elementtype> taylor2=exp_aux(x,power);
+
+T r=polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices
+
+//power the result back
+for(int i=0; i<power; i++) r=r*r;
+return r;
+}
+
+
+template<class MAT>
+const typename NRMat_traits<MAT>::elementtype determinant(MAT a)//again passed by value
+{
+typename NRMat_traits<MAT>::elementtype det;
+if(a.nrows()!=a.ncols()) laerror("determinant of non-square matrix");
+linear_solve(a,NULL,&det);
+return det;
+}
+
+
+template<class M, class V>
+const V exptimes(const M &mat, V vec) //uses just matrix vector multiplication
+{
+if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes");
+int power;
+//prepare the polynom of and effectively scale the matrix
+NRVec<typename NRMat_traits<M>::elementtype> taylor2=exp_aux(mat,power);
+
+V result(mat.nrows());
+for(int i=1; i<=(1<<power); ++i) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
+	{
+	if(i>1) vec=result; //apply again to the result of previous application
+	//apply polynom of the matrix to the vector iteratively
+	V y=vec;
+	result=y*taylor2[0];
+	for(int j=1; j<taylor2.size(); ++j)
+		{
+		y=mat*y;
+		result.axpy(taylor2[j],y);
+		}
+	}
+
+return result;
+}
--- a/nonclass.cc
+++ b/nonclass.cc
@ -0,0 +1,524 @@
+extern "C" {
+#include "atlas_enum.h"
+#include "clapack.h"
+}
+#include "la.h"
+
+#ifdef FORTRAN_
+#define FORNAME(x) x##_
+#else
+#define FORNAME(x) x
+#endif
+
+#define INSTANTIZE(T) \
+template void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, \
+		int nodim,int modulo, int issym);
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+
+template <typename T>
+void lawritemat(FILE *file,const T *a,int r,int c,const char *form0,
+		int nodim,int modulo, int issym)
+{
+	int i,j;
+	const char *f;
+
+	/*print out title before %*/
+	f=form0;
+	skiptext:
+	while (*f && *f !='%' ) {fputc(*f++,file);}
+	if (*f=='%' && f[1]=='%') {
+		fputc(*f,file); f+=2; 
+		goto skiptext;
+	}
+	/* this has to be avoided when const arguments should be allowed *f=0; */
+	/*use the rest as a format for numbers*/
+
+	if (modulo) nodim=0;
+	if (nodim==2) fprintf(file,"%d %d\n",r,c);
+	if (nodim==1) fprintf(file,"%d\n",c);
+	if (modulo) {
+		int n1, n2, l, m;
+		char ff[32];
+		/* prepare integer format for column numbering */
+		if (sscanf(f+1,"%d",&l) != 1) l=128/modulo;
+		l -= 2;
+		m = l/2;
+		l = l-m;
+		sprintf(ff,"%%%ds%%3d%%%ds", l, m);
+		n1 = 1;
+		while(n1 <= c) {
+			n2=n1+modulo-1;
+			if (n2 > c) n2 = c;
+
+			/*write block between columns n1 and n2 */
+			fprintf(file,"\n    ");
+			for (i=n1; i<=n2; i++) fprintf(file,ff," ",i," ");
+			fprintf(file,"\n\n");
+
+			for (i=1; i<=r; i++) {
+				fprintf(file, "%3d ", i);
+				for (j=n1; j<=n2; j++) {
+					if(issym) {
+						int ii,jj;
+						if (i >= j) {
+							ii=i; 
+							jj=j;
+						} else {
+							ii=j; 
+							jj=i;
+						}
+						fprintf(file, f, ((complex<double>)a[ii*(ii+1)/2+jj]).real(), ((complex<double>)a[ii*(ii+1)/2+jj]).imag());
+					} else fprintf(file, f, ((complex<double>)a[(i-1)*c+j-1]).real(), ((complex<double>)a[(i-1)*c+j-1]).imag());
+					if (j < n2) fputc(' ',file);
+				}
+				fprintf(file, "\n");
+			}
+			n1 = n2+1;
+		}
+	} else {
+		for (i=1; i<=r; i++) {
+			for (j=1; j<=c; j++) {
+				if (issym) {
+					int ii,jj;
+					if (i >= j) {
+						ii=i; 
+						jj=j;
+					} else {
+						ii=j; 
+						jj=i;
+					}
+					fprintf(file, f, ((complex<double>)a[ii*(ii+1)/2+jj]).real(), ((complex<double>)a[ii*(ii+1)/2+jj]).imag());
+				} else fprintf(file,f,((complex<double>)a[(i-1)*c+j-1]).real(), ((complex<double>)a[(i-1)*c+j-1]).imag());
+				putc(j<c?' ':'\n',file);
+			}
+		}
+	}
+}
+
+// LA errorr handler
+void laerror(const char *s1, const char *s2, const char *s3, const char *s4)
+{
+  std::cerr << "LA:ERROR - ";
+  if(!s1)
+    std::cerr << "udefined.";
+  else {
+    if(s1) std::cerr << s1;
+    if(s2) std::cerr << s2;
+    if(s3) std::cerr << s3;
+    if(s4) std::cerr << s4;
+  }
+  std::cerr << endl;
+  exit(1);
+}
+
+//////////////////////
+// LAPACK interface //
+//////////////////////
+
+// A will be overwritten, B will contain the solutions, A is nxn, B is rhs x n
+void linear_solve(NRMat<double> &A, NRMat<double> *B, double *det)
+{
+	int r, *ipiv;
+	
+	if (A.nrows() != A.ncols()) laerror("linear_solve() call for non-square matrix");
+	if (B && A.nrows() != B->ncols()) laerror("incompatible matrices in linear_solve()");
+	A.copyonwrite();
+	if (B) B->copyonwrite();
+	ipiv = new int[A.nrows()];
+	r = clapack_dgesv(CblasRowMajor, A.nrows(), B ? B->nrows() : 0, A[0], A.ncols(),
+			ipiv, B ? B[0] : (double *)0, B ? B->ncols() : A.nrows());
+	if (r < 0) {
+		delete[] ipiv;
+		laerror("illegal argument in lapack_gesv");
+	}
+	if (det && r>=0) {
+		*det = A[0][0];
+		for (int i=1; i<A.nrows(); ++i) *det *= A[i][i];
+		//change sign of det by parity of ipiv permutation
+		for (int i=0; i<A.nrows(); ++i) *det = -(*det);
+	}
+	delete [] ipiv;
+	if (r>0 && B) laerror("singular matrix in lapack_gesv");
+}
+
+
+// Next routines are not available in clapack, fotran ones will b used with an
+// additional swap/transpose of outputs when needed
+
+extern "C" void FORNAME(dspsv)(const char *UPLO, const int *N, const int *NRHS,
+		double *AP, int *IPIV, double *B, const int *LDB, int *INFO);
+
+void linear_solve(NRSMat<double> &a, NRMat<double> *b, double *det)
+{
+	int r, *ipiv;
+	if (det) cerr << "@@@ sign of the determinant not implemented correctly yet\n";
+	if (b && a.nrows() != b->ncols())
+		laerror("incompatible matrices in symmetric linear_solve()");
+	a.copyonwrite();
+	if (b) b->copyonwrite();
+	ipiv = new int[a.nrows()];
+	char U = 'U';
+	int n = a.nrows();
+	int nrhs = 0;
+	if (b) nrhs = b->nrows();
+	int ldb = b ? b->ncols() : a.nrows();
+	FORNAME(dspsv)(&U, &n, &nrhs, a, ipiv, b?(*b)[0]:0, &ldb,&r);
+	if (r < 0) {
+		delete[] ipiv;
+		laerror("illegal argument in spsv() call of linear_solve()");
+	}
+	if (det && r >= 0) {
+		*det = a(0,0);
+		for (int i=1; i<a.nrows(); i++) *det *= a(i,i);
+		for (int i=0; i<a.nrows(); i++)
+			if (ipiv[i] != i) *det = -(*det);
+	}
+	delete[] ipiv;
+	if (r > 0 && b) laerror("singular matrix in linear_solve(SMat&, Mat*, double*");
+}
+
+
+extern "C" void FORNAME(dsyev)(const char *JOBZ, const char *UPLO, const int *N,
+		double *A, const int *LDA, double *W, double *WORK, const int *LWORK, int *INFO);
+
+// a will contain eigenvectors, w eigenvalues
+void diagonalize(NRMat<double> &a, NRVec<double> &w, const bool eivec, 
+		const bool corder)
+{
+	int n = a.nrows();
+	if (n != a.ncols()) laerror("diagonalize() call with non-square matrix");
+	if (a.nrows() != w.size()) 
+		laerror("inconsistent dimension of eigenvalue vector in diagonalize()");
+
+	a.copyonwrite();
+	w.copyonwrite();
+
+	int r = 0;
+	char U ='U';
+	char vectors = 'V';
+	if (!eivec) vectors = 'N';
+	int LWORK = -1;
+	double WORKX;
+
+	// First call is to determine size of workspace
+	FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, (double *)&WORKX, &LWORK, &r );
+	LWORK = (int)WORKX;
+	double *WORK = new double[LWORK];
+	FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, WORK, &LWORK, &r );
+	delete[] WORK;
+	if (vectors == 'V' && corder) a.transposeme();
+
+	if (r < 0) laerror("illegal argument in syev() of diagonalize()");
+	if (r > 0) laerror("convergence problem in syev() of diagonalize()");
+}
+
+
+extern "C" void FORNAME(dspev)(const char *JOBZ, const char *UPLO, const int *N,
+		double *AP, double *W, double *Z, const int *LDZ, double *WORK, int *INFO);
+
+// v will contain eigenvectors, w eigenvalues
+void diagonalize(NRSMat<double> &a, NRVec<double> &w, NRMat<double> *v,
+		const bool corder)
+{
+	int n = a.nrows();
+	if (v) if (v->nrows() != v ->ncols() || n != v->nrows())
+		laerror("diagonalize() call with inconsistent dimensions");
+	if (n != w.size()) laerror("inconsistent dimension of eigenvalue vector");
+
+	a.copyonwrite();
+	w.copyonwrite();
+
+	int r = 0;
+	char U = 'U';
+	char job = v ? 'v' : 'n';
+
+	double *WORK = new double[3*n];
+	FORNAME(dspev)(&job, &U, &n, a, w, v?(*v)[0]:(double *)0, &n, WORK,  &r );
+	delete[] WORK;
+	if (v && corder) v->transposeme();
+
+	if (r < 0) laerror("illegal argument in spev() of diagonalize()");
+	if (r > 0) laerror("convergence problem in spev() of diagonalize()");
+}
+
+
+extern "C" void FORNAME(dgesvd)(const char *JOBU,  const char *JOBVT,  const int *M,
+		const int *N,  double *A, const int *LDA, double *S, double *U, const int *LDU,
+		double *VT, const int *LDVT, double *WORK, const int *LWORK, int *INFO );
+
+void singular_decomposition(NRMat<double> &a, NRMat<double> *u, NRVec<double> &s,
+		NRMat<double> *v, const bool corder)
+{
+	int m = a.nrows();
+	int n = a.ncols();
+	if (u) if (m != u->nrows() || m!= u->ncols())
+		laerror("inconsistent dimension of U Mat in singular_decomposition()");
+	if (s.size() < m && s.size() < n) 
+		laerror("inconsistent dimension of S Vec in singular_decomposition()");
+	if (v) if (n != v->nrows() || n != v->ncols())
+		laerror("inconsistent dimension of V Mat in singular_decomposition()");
+
+	a.copyonwrite();
+	s.copyonwrite();
+	if (u) u->copyonwrite();
+	if (v) v->copyonwrite();
+	
+	// C-order (transposed) input and swap u,v matrices,
+	// v should be transposed at the end
+	char jobu = u ? 'A' : 'N';
+	char jobv = v ? 'A' : 'N';
+	double work0;
+	int lwork = -1;
+	int r;
+	FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n,
+			u?(*u)[0]:0, &m, &work0, &lwork, &r);
+	lwork = (int) work0;
+	double *work = new double[lwork];
+	FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n,
+			u?(*u)[0]:0, &m, &work0, &lwork, &r);
+	delete[] work;
+	if (v && corder) v->transposeme();
+
+	if (r < 0) laerror("illegal argument in gesvd() of singular_decomposition()");
+	if (r > 0) laerror("convergence problem in gesvd() of ingular_decomposition()");
+}
+
+
+extern "C" void FORNAME(dgeev)(const char *JOBVL, const char *JOBVR, const int *N,
+		double *A, const int *LDA, double *WR, double *WI, double *VL, const int *LDVL,
+		double *VR, const int *LDVR, double *WORK, const int *LWORK, int *INFO );
+
+void gdiagonalize(NRMat<double> &a, NRVec<double> &wr, NRVec<double> &wi,
+		NRMat<double> *vl, NRMat<double> *vr, const bool corder)
+{
+	int n = a.nrows();
+	if (n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix");
+	if (n != wr.size()) 
+		laerror("inconsistent dimension of eigen vector in gdiagonalize()");
+	if (vl) if (n != vl->nrows() || n != vl->ncols())
+		laerror("inconsistent dimension of vl in gdiagonalize()");
+	if (vr) if (n != vr->nrows() || n != vr->ncols())
+		laerror("inconsistent dimension of vr in gdiagonalize()");
+
+	a.copyonwrite();
+	wr.copyonwrite();
+	wi.copyonwrite();
+	if (vl) vl->copyonwrite();
+	if (vr) vr->copyonwrite();
+	
+	char jobvl = vl ? 'V' : 'N';
+	char jobvr = vr ? 'V' : 'N';
+	double work0;
+	int lwork = -1;
+	int r;
+	FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0,
+			&n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r);
+	lwork = (int) work0;
+	double *work = new double[lwork];
+	FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0,
+			&n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r);
+	delete[] work;
+
+	if (corder) {
+		if (vl) vl->transposeme();
+		if (vr) vr->transposeme();
+	}
+
+	if (r < 0) laerror("illegal argument in geev() of gdiagonalize()");
+	if (r > 0) laerror("convergence problem in geev() of gdiagonalize()");
+}
+
+void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
+		NRMat< complex<double> >*vl, NRMat< complex<double> > *vr)
+{
+	int n = a.nrows();
+	if(n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix");
+
+	NRVec<double> wr(n), wi(n);
+	NRMat<double> *rvl = 0;
+	NRMat<double> *rvr = 0;
+	if (vl) rvl = new NRMat<double>(n, n);
+	if (vr) rvr = new NRMat<double>(n, n);
+	gdiagonalize(a, wr, wi, rvl, rvr, 0);
+	
+	//process the results into complex matrices
+	int i;
+	for (i=0; i<n; i++) w[i] = complex<double>(wr[i], wi[i]);
+	if (rvl || rvr) {
+		i = 0;
+		while (i < n) {
+			if (wi[i] == 0) {
+				if (vl) for (int j=0; j<n; j++) (*vl)[i][j] = (*rvl)[i][j];
+				if (vr) for (int j=0; j<n; j++) (*vr)[i][j] = (*rvr)[i][j];
+				i++;
+			} else {
+				if (vl)
+					for (int j=0; j<n; j++) {
+						(*vl)[i][j] = complex<double>((*rvl)[i][j], (*rvl)[i+1][j]);
+						(*vl)[i+1][j] = complex<double>((*rvl)[i][j], -(*rvl)[i+1][j]);
+					} 
+				if (vr)
+					for (int j=0; j<n; j++) {
+						(*vr)[i][j] = complex<double>((*rvr)[i][j], (*rvr)[i+1][j]);
+						(*vr)[i+1][j] = complex<double>((*rvr)[i][j], -(*rvr)[i+1][j]);
+					}
+				i += 2;
+			}
+		}
+	}
+	if (rvl) delete rvl;
+	if (rvr) delete rvr;
+}
+
+
+const NRMat<double> realpart(const NRMat< complex<double> > &a)
+{
+	NRMat<double> result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0], 2, result, 1);
+	return result;
+}
+
+const NRMat<double> imagpart(const NRMat< complex<double> > &a)
+{
+	NRMat<double> result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0]+1, 2, result, 1);
+	return result;
+}
+
+const NRMat< complex<double> > realmatrix (const NRMat<double> &a)
+{
+	NRMat <complex<double> > result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0], 2);
+	return result;
+}
+
+const NRMat< complex<double> > imagmatrix (const NRMat<double> &a)
+{
+	NRMat< complex<double> > result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0]+1, 2);
+	return result;
+}
+
+
+NRMat<double> matrixfunction(NRMat<double> a, complex<double>
+		(*f)(const complex<double> &), const bool adjust)
+{
+	int n = a.nrows();
+	NRMat< complex<double> > u(n, n), v(n, n);
+	NRVec< complex<double> > w(n);
+	gdiagonalize(a, w, &u, &v);
+	NRVec< complex<double> > z = diagofproduct(u, v, 1, 1);
+
+	for (int i=0; i<a.nrows(); i++) w[i] = (*f)(w[i]/z[i]);
+	u.diagmultl(w);
+
+	NRMat< complex<double> > r(n, n);
+	r.gemm(0.0, v, 'c', u, 'n', 1.0);
+	double inorm = cblas_dnrm2(n*n, (double *)r[0]+1, 2);
+	if (inorm > 1e-10) {
+		cout << "norm = " << inorm << endl;
+		laerror("nonzero norm of imaginary part of real matrixfunction");
+	}
+	return realpart(r);
+}
+
+NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double))
+{
+	int n = a.nrows();
+	NRVec<double> w(n);
+	NRMat<double> v(n, n);
+	diagonalize(a, w, &v, 0);
+
+	for (int i=0; i<a.nrows(); i++) w[i] = (*f)(w[i]);
+	NRMat<double> u = v;
+	v.diagmultl(w);
+	NRMat<double> r(n, n);
+	r.gemm(0.0, u, 't', v, 'n', 1.0);
+	return r;
+}
+
+// instantize template to an addresable function
+complex<double> myclog (const complex<double> &x) 
+{
+	return log(x);
+}
+
+NRMat<double>  log(const NRMat<double> &a)
+{
+	return matrixfunction(a, &myclog, 1);
+}
+
+
+const NRVec<double> diagofproduct(const NRMat<double> &a, const NRMat<double> &b,
+		bool trb, bool conjb)
+{
+	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
+				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
+			laerror("incompatible Mats in diagofproduct<double>()");
+	NRVec<double> result(a.nrows());
+	if (trb)
+		for(int i=0; i<a.nrows(); i++)
+			result[i] = cblas_ddot(a.ncols(), a[i], 1, b[i], 1);
+	else
+		for(int i=0; i<a.nrows(); i++)
+			result[i] = cblas_ddot(a.ncols(), a[i], 1, b[0]+i, b.ncols());
+
+	return result;
+}
+
+
+const NRVec< complex<double> > diagofproduct(const NRMat< complex<double> > &a,
+		const NRMat< complex<double> > &b, bool trb, bool conjb)
+{
+	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
+				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
+			laerror("incompatible Mats in diagofproduct<complex>()");
+	NRVec< complex<double> > result(a.nrows());
+	if (trb) {
+		if (conjb) {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotc_sub(a.ncols(), b[i], 1, a[i], 1, &result[i]);
+		} else {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotu_sub(a.ncols(), b[i], 1, a[i], 1, &result[i]);
+		}
+	} else {
+		if (conjb) {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotc_sub(a.ncols(), b[0]+i, b.ncols(), a[i], 1, &result[i]);
+		} else {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotu_sub(a.ncols(), b[0]+i, b.ncols(), a[i], 1, &result[i]);
+		}
+	}
+	return result;
+}
+
+
+double trace2(const NRMat<double> &a, const NRMat<double> &b, bool trb)
+{
+	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
+				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
+			laerror("incompatible Mats in diagofproduct<complex>()");
+	if (trb) return cblas_ddot(a.nrows()*a.ncols(), a, 1, b, 1);
+
+	double sum = 0.0;
+	for (int i=0; i<a.nrows(); i++)
+		sum += cblas_ddot(a.ncols(), a[i], 1, b[0]+i, b.ncols());
+
+	return sum;
+}
+
+
+double trace2(const NRSMat<double> &a, const NRSMat<double> &b,
+		const bool diagscaled)
+{
+	if (a.nrows() != b.nrows()) laerror("incompatible SMats in trace2()");
+
+	double r = 2.0*cblas_ddot(a.nrows()*(a.nrows()+1)/2, a, 1, b, 1);
+	if (diagscaled) return r;
+	for (int i=0; i<a.nrows(); i++) r -= a(i,i)*b(i,i);
+	return r;
+}
+
--- a/nonclass.h
+++ b/nonclass.h
@ -0,0 +1,85 @@
+#include "vec.h"
+#include "smat.h"
+#include "mat.h"
+
+//MISC
+template <class T> extern const NRMat<T> diagonalmatrix(const NRVec<T> &x);
+template <class T> extern const NRVec<T> lineof(const NRMat<T> &x, const int i); 
+template <class T> extern const NRVec<T> columnof(const NRMat<T> &x, const int i);
+template <class T> extern const NRVec<T> diagonalof(const NRMat<T> &x); 
+
+//more efficient commutator for a special case of full matrices
+template<class T>
+inline const NRMat<T> commutator ( const NRMat<T> &x, const NRMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+NRMat<T> r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols());
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1);
+return r;
+}
+
+//more efficient commutator for a special case of full matrices
+template<class T>
+inline const NRMat<T> anticommutator ( const NRMat<T> &x, const NRMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+NRMat<T> r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols());
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1);
+return r;
+}
+
+
+
+
+//////////////////////
+// LAPACK interface //
+//////////////////////
+
+#define declare_la(T) \
+extern const  NRVec<T> diagofproduct(const NRMat<T> &a, const NRMat<T> &b,\
+		bool trb=0, bool conjb=0); \
+extern T trace2(const NRMat<T> &a, const NRMat<T> &b, bool trb=0); \
+extern T trace2(const NRSMat<T> &a, const NRSMat<T> &b, const bool diagscaled=0);\
+extern void linear_solve(NRMat<T> &a, NRMat<T> *b, double *det=0); \
+extern void linear_solve(NRSMat<T> &a, NRMat<T> *b, double *det=0); \
+extern void diagonalize(NRMat<T> &a, NRVec<T> &w, const bool eivec=1,\
+		const bool corder=1); \
+extern void diagonalize(NRSMat<T> &a, NRVec<T> &w, NRMat<T> *v, const bool corder=1);\
+extern void singular_decomposition(NRMat<T> &a, NRMat<T> *u, NRVec<T> &s,\
+		NRMat<T> *v, const bool corder=1);
+
+declare_la(double)
+declare_la(complex<double>)
+
+// Separate declarations
+extern void gdiagonalize(NRMat<double> &a, NRVec<double> &wr, NRVec<double> &wi,
+		NRMat<double> *vl, NRMat<double> *vr, const bool corder=1);
+extern void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
+		 NRMat< complex<double> >*vl, NRMat< complex<double> > *vr);
+extern NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double));
+extern NRMat<double> matrixfunction(NRMat<double> a, complex<double> (*f)(const complex<double> &),const bool adjust=0);
+
+//functions on matrices
+inline NRMat<double>  sqrt(const NRSMat<double> &a) { return matrixfunction(a,&sqrt); }
+inline NRMat<double>  log(const NRSMat<double> &a) { return matrixfunction(a,&log); }
+extern NRMat<double> log(const NRMat<double> &a);
+
+
+extern const NRMat<double> realpart(const NRMat< complex<double> >&);
+extern const NRMat<double> imagpart(const NRMat< complex<double> >&);
+extern const NRMat< complex<double> > realmatrix (const NRMat<double>&);
+extern const NRMat< complex<double> > imagmatrix (const NRMat<double>&);
+
+//inverse by means of linear solve, preserving rhs intact
+template<typename T>
+const NRMat<T> inverse(NRMat<T> a, T *det=0)
+{
+#ifdef DEBUG
+	if(a.nrows()!=a.ncols()) laerror("inverse() for non-square matrix");
+#endif
+	NRMat<T> result(a.nrows(),a.nrows());
+	result = (T)1.;
+	linear_solve(a, &result, det);
+	return result;
+}
+
--- a/smat.cc
+++ b/smat.cc
@ -0,0 +1,399 @@
+#include "smat.h"
+// TODO
+// specialize unary minus
+
+
+//////////////////////////////////////////////////////////////////////////////
+////// forced instantization in the corresponding object file
+template NRSMat<double>;
+template NRSMat< complex<double> >;
+
+
+
+/*
+ *  * Templates first, specializations for BLAS next
+ *
+ */
+
+// conversion ctor, symmetrize general Mat into SMat
+template <typename T>
+NRSMat<T>::NRSMat(const NRMat<T> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat");
+#endif
+	count = new int;
+	*count = 1;
+	v = new T[NN2];
+	int i, j, k=0;
+	for (i=0; i<nn; i++)
+		for (j=0; j<=i;j++) v[k++] = 0.5 * (rhs[i][j] + rhs[j][i]);
+}
+
+
+// dtor
+template <typename T>
+NRSMat<T>::~NRSMat()
+{
+	if (!count) return;
+	if (--(*count) <= 0) {
+		if (v) delete[] (v);
+		delete count;
+	}
+}
+
+
+// assignment with a physical copy
+template <typename T>
+NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs)
+{
+	if (this != &rhs) {
+		if(!rhs.v) laerror("unallocated rhs in NRSMat operator |=");
+		if(count)
+			if(*count > 1) {	// detach from the other
+				--(*count);
+				nn = 0;
+				count = 0;
+				v = 0;
+			}
+		if (nn != rhs.nn) {
+			if(v) delete [] (v);
+			nn = rhs.nn;
+		}
+		if (!v) v = new T[NN2];
+		if (!count) count = new int;
+		*count = 1;
+		memcpy(v, rhs.v, NN2*sizeof(T));
+	}
+	return *this;
+}
+
+// assignment
+template <typename T>
+NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs)
+{
+	if (this == & rhs) return *this;
+	if (count)
+		if(--(*count) == 0) {
+			delete [] v;
+			delete count;
+		}
+	v = rhs.v;
+	nn = rhs.nn;
+	count = rhs.count;
+	if (count) (*count)++;
+	return *this;
+}
+
+// assing to diagonal
+template <typename T>
+NRSMat<T> & NRSMat<T>::operator=(const T &a)
+{
+	copyonwrite();
+	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] = a;
+	return *this;
+}
+
+// unary minus
+template <typename T>
+const NRSMat<T> NRSMat<T>::operator-() const
+{
+	NRSMat<T> result(nn);
+	for(int i=0; i<NN2; i++) result.v[i]= -v[i];
+	return result;
+}
+
+// trace of Smat
+template <typename T>
+const T NRSMat<T>::trace() const
+{
+	T tmp = 0;
+	for (int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
+	return tmp;
+}
+
+// make new instation of the Smat, deep copy
+template <typename T>
+void NRSMat<T>::copyonwrite()
+{
+#ifdef DEBUG
+	if (!count) laerror("probably an assignment to undefined Smat");
+#endif
+	if (*count > 1) {
+		(*count)--;
+		count = new int;
+		*count = 1;
+		T *newv = new T[NN2];
+		memcpy(newv, v, NN2*sizeof(T));
+		v = newv;
+	}
+}
+
+// resize Smat
+template <typename T>
+void NRSMat<T>::resize(const int n)
+{
+#ifdef DEBUG
+	if (n <= 0) laerror("illegal matrix dimension in resize of Smat");
+#endif
+	if (count)
+		if(*count > 1) {	//detach from previous
+			(*count)--;
+			count = 0;
+			v = 0;
+			nn = 0;
+		}
+	if (!count)	{				//new uninitialized vector or just detached
+		count = new int;
+		*count = 1;
+		nn = n;
+		v = new T[NN2];
+		return;
+	}
+	if (n != nn) {
+		nn = n;
+		delete[] v;
+		v = new T[NN2];
+	}
+}
+
+// write matrix to the file with specific format
+template <typename T>
+void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
+{
+	lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
+}
+
+// read matrix from the file with specific format
+template <class T>
+void NRSMat<T>::fscanf(FILE *f, const char *format)
+{
+	int n, m;
+	if (std::fscanf(f,"%d %d",&n,&m) != 2)
+		laerror("cannot read matrix dimensions in SMat::fscanf");
+	if (n != m) laerror("different dimensions of SMat");
+	resize(n);
+	for (int i=0; i<n; i++) 
+		for (int j=0; j<n; j++)
+			if (std::fscanf(f,format,&((*this)(i,j))) != 1)
+				laerror("Smat - cannot read matrix element");
+}
+
+
+/*
+ * BLAS specializations for double and complex<double>
+ */
+
+// SMat * Mat
+const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
+#endif
+	NRMat<double> result(nn, rhs.ncols());
+	for (int k=0; k<rhs.ncols(); k++)
+		cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0]+k, rhs.ncols(),
+				0.0, result[0]+k, rhs.ncols());
+	return result;
+}
+const NRMat< complex<double> >
+NRSMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
+#endif
+	NRMat< complex<double> > result(nn, rhs.ncols());
+	for (int k=0; k<rhs.ncols(); k++)
+		cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(),
+				&CZERO, result[0]+k, rhs.ncols());
+	return result;
+}
+
+// SMat * SMat
+const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
+#endif
+	NRMat<double> result(0.0, nn, nn);
+	double *p, *q;
+
+	p = v;
+	for (int i=0; i<nn;i++) {
+		q = rhs.v;
+		for (int k=0; k<=i; k++) {
+			cblas_daxpy(k+1, *p++, q, 1, result[i], 1);
+			q += k+1;
+		}
+	}
+
+	p = v;
+	for (int i=0; i<nn;i++) {
+		q = rhs.v+1;
+		for (int j=1; j<nn; j++) {
+			result[i][j] += cblas_ddot(i+1<j ? i+1 : j, p, 1, q, 1);
+			q += j+1;
+		}
+		p += i+1;
+	}
+
+	p = v; 
+	q = rhs.v;
+	for (int i=0; i<nn; i++) {
+		cblas_dger(CblasRowMajor, i, i+1, 1., p, 1, q, 1, result, nn);
+		p += i+1;
+		q += i+1;
+	}
+	
+	q = rhs.v+3;
+	for (int j=2; j<nn; j++) {
+		p = v+1;
+		for (int i=1; i<j; i++) {
+			cblas_daxpy(i, *++q, p, 1, result[0]+j, nn);
+			p += i+1;
+		}
+		q += 2;
+	}
+
+	return result;
+}
+const NRMat< complex<double> > 
+NRSMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
+#endif
+	NRMat< complex<double> > result(0.0, nn, nn);
+	NRMat< complex<double> > rhsmat(rhs);
+	result = *this * rhsmat;
+	return result;
+//	laerror("complex SMat*Smat not implemented");
+}
+// S dot S
+const double NRSMat<double>::dot(const NRSMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("dot of incompatible SMat's");
+#endif
+	return cblas_ddot(NN2, v, 1, rhs.v, 1);
+}
+const complex<double> 
+NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("dot of incompatible SMat's");
+#endif
+	complex<double> dot;
+	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
+	return dot;
+}
+
+// x = S * x
+const NRVec<double> NRSMat<double>::operator*(const NRVec<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
+#endif
+	NRVec<double> result(nn);
+	cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs, 1, 0.0, result, 1);
+	return result;
+}
+const NRVec< complex<double> >
+NRSMat< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
+#endif
+	NRVec< complex<double> > result(nn);
+	cblas_zhpmv(CblasRowMajor, CblasLower, nn, (void *)(&CONE), (void *)v, 
+			(const void *)rhs, 1, (void *)(&CZERO), (void *)result, 1);
+	return result;
+}
+
+// norm of the matrix
+const double  NRSMat<double>::norm(const double scalar) const
+{
+	if (!scalar) return cblas_dnrm2(NN2, v, 1);
+	double sum = 0;
+	int k = 0;
+	for (int i=0; i<nn; ++i)
+		for (int j=0; j<=i; ++j) {
+			register double tmp;
+			tmp = v[k++];
+			if (i == j) tmp -= scalar;
+			sum += tmp*tmp;
+		}
+	return sqrt(sum);
+}
+const double
+NRSMat< complex<double> >::norm(const complex<double> scalar) const
+{
+	if (!(scalar.real()) && !(scalar.imag()))
+		return cblas_dznrm2(NN2, (void *)v, 1);
+	double sum = 0;
+	complex<double> tmp;
+	int k = 0;
+	for (int i=0; i<nn; ++i)
+		for (int j=0; j<=i; ++j) {
+			tmp = v[k++];
+			if (i == j) tmp -= scalar;
+			sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
+		}
+	return sqrt(sum);
+}
+
+// axpy: S = S * a
+void NRSMat<double>::axpy(const double alpha, const NRSMat<double> & x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible SMats");
+#endif
+	copyonwrite();
+	cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
+}
+void NRSMat< complex<double> >::axpy(const complex<double> alpha,
+			const NRSMat< complex<double> > & x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible SMats");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&alpha), (void *)x.v, 1, (void *)v, 1);
+}
+
+
+export template <class T>
+ostream& operator<<(ostream &s, const NRSMat<T> &x)
+                {
+                int i,j,n;
+                n=x.nrows();
+                s << n << ' ' << n << '\n';
+                for(i=0;i<n;i++)
+                        {
+                        for(j=0; j<n;j++) s << x(i,j) << (j==n-1 ? '\n' : ' ');
+                        }
+                return s;
+                }
+
+
+export template <class T>
+istream& operator>>(istream  &s, NRSMat<T> &x)
+                {
+                int i,j,n,m;
+                s >> n >> m;
+                if(n!=m) laerror("input symmetric matrix not square");
+                x.resize(n);
+                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x(i,j);
+                return s;
+                }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corespoding object file
+#define INSTANTIZE(T) \
+template ostream & operator<<(ostream &s, const NRSMat< T > &x); \
+template istream & operator>>(istream  &s, NRSMat< T > &x); \
+
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+
--- a/smat.h
+++ b/smat.h
@ -0,0 +1,303 @@
+#ifndef _LA_SMAT_H_
+#define _LA_SMAT_H_
+
+#include "vec.h"
+#include "mat.h"
+
+#define NN2 (nn*(nn+1)/2)
+template <class T>
+class NRSMat { // symmetric or complex hermitean matrix in packed form
+protected:
+	int nn;
+	T *v;
+	int *count;
+public:
+	friend class NRVec<T>;
+	friend class NRMat<T>;
+	
+	inline NRSMat<T>::NRSMat() : nn(0),v(0),count(0) {};
+	inline explicit NRSMat(const int n);			// Zero-based array
+	inline NRSMat(const T &a, const int n);	//Initialize to constant
+	inline NRSMat(const T *a, const int n);	// Initialize to array
+	inline NRSMat(const NRSMat &rhs);		// Copy constructor
+	explicit NRSMat(const NRMat<T> &rhs);		// symmetric part of general matrix
+	explicit NRSMat(const NRVec<T> &rhs, const int n); //construct matrix from vector
+	NRSMat & operator|=(const NRSMat &rhs);	//assignment to a new copy
+	NRSMat & operator=(const NRSMat &rhs);	//assignment
+	NRSMat & operator=(const T &a);		//assign a to diagonal
+	inline NRSMat & operator*=(const T &a);
+	inline NRSMat & operator+=(const T &a); 
+	inline NRSMat & operator-=(const T &a); 
+	inline NRSMat & operator+=(const NRSMat &rhs); 
+	inline NRSMat & operator-=(const NRSMat &rhs); 
+	const NRSMat operator-() const; //unary minus
+	inline int getcount() const {return count?*count:0;}
+	inline const NRSMat operator*(const T &a) const;
+	inline const NRSMat operator+(const T &a) const;
+	inline const NRSMat operator-(const T &a) const;
+	inline const NRSMat operator+(const NRSMat &rhs) const; 
+	inline const NRSMat operator-(const NRSMat &rhs) const;
+	inline const NRMat<T> operator+(const NRMat<T> &rhs) const; 
+	inline const NRMat<T> operator-(const NRMat<T> &rhs) const; 
+	const NRMat<T> operator*(const NRSMat &rhs) const; // SMat*SMat
+	const NRMat<T> operator*(const NRMat<T> &rhs) const; // SMat*Mat 
+	const T dot(const NRSMat &rhs) const; // Smat.Smat
+	const NRVec<T> operator*(const NRVec<T> &rhs) const; 
+	inline const T& operator[](const int ij) const;
+	inline T& operator[](const int ij);
+	inline const T& operator()(const int i, const int j) const;
+	inline T& operator()(const int i, const int j);
+	inline int nrows() const;
+	inline int ncols() const;
+	const double norm(const T scalar=(T)0) const;
+	void axpy(const T alpha, const NRSMat &x); // this+= a*x
+	inline const T amax() const;
+	const T trace() const;
+	void copyonwrite();
+	void resize(const int n);
+	inline operator T*(); //get a pointer to the data
+	inline operator const T*() const; //get a pointer to the data
+	~NRSMat();
+	void fprintf(FILE *f, const char *format, const int modulo) const; 
+	void fscanf(FILE *f, const char *format); 
+//members concerning sparse matrix
+	explicit NRSMat(const SparseMat<T> &rhs);               // dense from sparse
+	inline void simplify() {}; //just for compatibility with sparse ones
+};
+
+// INLINES
+// ctors
+template <typename T>
+inline NRSMat<T>::NRSMat(const int n) : nn(n), v(new T[NN2]),
+				count(new int) {*count = 1;}
+
+template <typename T>
+inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n),
+	        v(new T[NN2]), count(new int)
+{
+	*count =1;
+	if(a != (T)0) for(int i=0; i<NN2; i++) v[i] = a;
+}
+
+template <typename T>
+inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n),
+	        v(new T[NN2]), count(new int)
+{
+	*count = 1;
+	memcpy(v, a, NN2*sizeof(T));
+}
+
+template <typename T>
+inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) //copy constructor
+{
+	v = rhs.v;
+	nn = rhs.nn;
+	count = rhs.count;
+	if (count) (*count)++;
+}
+
+template <typename T>
+NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) // type conversion
+{
+	nn = n;
+#ifdef DEBUG
+	if (NN2 != rhs.size())
+		laerror("matrix dimensions incompatible with vector length");
+#endif
+	count = rhs.count;
+	v = rhs.v;
+	(*count)++;
+}
+
+// S *= a
+inline NRSMat<double> & NRSMat<double>::operator*=(const double & a)
+{
+	copyonwrite();
+	cblas_dscal(NN2, a, v, 1);
+	return *this;
+}
+inline NRSMat< complex<double> > &
+NRSMat< complex<double> >::operator*=(const complex<double> & a)
+{
+	copyonwrite();
+	cblas_zscal(nn, (void *)(&a), (void *)v, 1);
+	return *this;
+}
+
+
+// S += D
+template <typename T>
+inline NRSMat<T> & NRSMat<T>::operator+=(const T &a)
+{
+	copyonwrite();
+	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] += a;
+	return *this;
+}
+
+// S -= D
+template <typename T>
+inline NRSMat<T> & NRSMat<T>::operator-=(const T &a)
+{
+	copyonwrite();
+	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] -= a;
+	return *this;
+}
+
+// S += S
+inline NRSMat<double> &
+NRSMat<double>::operator+=(const NRSMat<double> & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+=");
+#endif
+	copyonwrite();
+	cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+NRSMat< complex<double> > &
+NRSMat< complex<double> >::operator+=(const NRSMat< complex<double> > & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+=");
+#endif
+	copyonwrite();
+	cblas_zaxpy(NN2, (void *)(&CONE), (void *)(&rhs.v), 1, (void *)(&v), 1);
+	return *this;
+}
+
+// S -= S
+inline NRSMat<double> &
+NRSMat<double>::operator-=(const NRSMat<double> & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-=");
+#endif
+	copyonwrite();
+	cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+inline NRSMat< complex<double> > &
+NRSMat< complex<double> >::operator-=(const NRSMat< complex<double> > & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-=");
+#endif
+	copyonwrite();
+	cblas_zaxpy(NN2, (void *)(&CMONE), (void *)(&rhs.v), 1, (void *)(&v), 1);
+	return *this;
+}
+
+// SMat + Mat
+template <typename T>
+inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const
+{
+	return NRMat<T>(rhs) += *this;
+}
+
+// SMat - Mat
+template <typename T>
+inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const
+{
+	return NRMat<T>(-rhs) += *this;
+}
+
+// access the element, linear array case
+template <typename T>
+inline T & NRSMat<T>::operator[](const int ij)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("lval [] with count > 1 in Smat");
+	if (ij<0 || ij>=NN2) laerror("SMat [] out of range");
+	if (!v) laerror("[] for unallocated Smat");
+#endif
+	return v[ij];
+}
+template <typename T>
+inline const T & NRSMat<T>::operator[](const int ij) const
+{
+#ifdef DEBUG
+	if (ij<0 || ij>=NN2) laerror("SMat [] out of range");
+	if (!v) laerror("[] for unallocated Smat");
+#endif
+	return v[ij];
+}
+
+// access the element, 2-dim array case
+template <typename T>
+inline T & NRSMat<T>::operator()(const int i, const int j)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("lval (i,j) with count > 1 in Smat");
+	if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range");
+	if (!v) laerror("(i,j) for unallocated Smat");
+#endif
+	return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i];
+}
+template <typename T>
+inline const T & NRSMat<T>::operator()(const int i, const int j) const
+{
+#ifdef DEBUG
+	if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range");
+	if (!v) laerror("(i,j) for unallocated Smat");
+#endif
+	return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i];
+}
+
+// return the number of rows and columns
+template <typename T>
+inline int NRSMat<T>::nrows() const
+{
+	return nn;
+}
+template <typename T>
+inline int NRSMat<T>::ncols() const
+{
+	return nn;
+}
+
+// max value
+inline const double NRSMat<double>::amax() const
+{
+	return v[cblas_idamax(NN2, v, 1)];
+}
+inline const complex<double> NRSMat< complex<double> >::amax() const
+{
+	return v[cblas_izamax(NN2, (void *)v, 1)];
+}
+
+// reference pointer to Smat
+template <typename T>
+inline NRSMat<T>:: operator T*()
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated SMat in operator T*");
+#endif
+	return v;
+}
+template <typename T>
+inline NRSMat<T>:: operator const T*() const
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated SMat in operator T*");
+#endif
+	return v;
+}
+
+
+
+// I/O
+template <typename T> extern ostream& operator<<(ostream &s, const NRSMat<T> &x);
+template <typename T> extern istream& operator>>(istream  &s, NRSMat<T> &x);
+
+
+
+
+// generate operators: SMat + a, a + SMat, SMat * a
+NRVECMAT_OPER(SMat,+)
+NRVECMAT_OPER(SMat,-)
+NRVECMAT_OPER(SMat,*)
+// generate SMat + SMat, SMat - SMat
+NRVECMAT_OPER2(SMat,+)
+NRVECMAT_OPER2(SMat,-)
+
+#endif /* _LA_SMAT_H_ */
--- a/sparsemat.cc
+++ b/sparsemat.cc
--- a/sparsemat.h
+++ b/sparsemat.h
@ -0,0 +1,220 @@
+//for vectors and dense matrices we shall need
+#include "la.h"
+
+template<class T>
+inline const T MAX(const T &a, const T &b)
+        {return b > a ? (b) : (a);}
+
+template<class T>
+inline void SWAP(T &a, T &b)
+        {T dum=a; a=b; b=dum;}
+
+
+//threshold for neglecting elements, if not defined, no tests are done except exact zero test in simplify - might be even faster
+//seems to perform better with a threshold, in spite of abs() tests
+#define  SPARSEEPSILON 1e-13 
+
+typedef unsigned int SPMatindex;
+typedef int SPMatindexdiff; //more clear would be to use traits
+
+//element of a linked list
+template<class T>
+struct matel
+        {
+        T elem;
+        SPMatindex row;
+        SPMatindex col;
+        matel *next;
+        };
+
+
+template <class T>
+class SparseMat {
+protected:
+	SPMatindex nn;
+        SPMatindex mm;
+	bool symmetric;
+	unsigned int nonzero;
+        int *count;
+	matel<T> *list;
+private:
+	matel<T> **rowsorted; //NULL terminated
+	matel<T> **colsorted; //NULL terminated
+	void unsort();
+	void deletelist();
+	void copylist(const matel<T> *l);
+public:
+	//iterator
+        typedef class iterator {
+        private:
+                matel<T> *p;
+        public:
+                iterator() {};
+                ~iterator() {};
+                iterator(matel<T> *list): p(list) {};
+                bool operator==(const iterator rhs) const {return p==rhs.p;}
+                bool operator!=(const iterator rhs) const {return p!=rhs.p;}
+                iterator operator++() {return p=p->next;}
+                iterator operator++(int) {matel<T> *q=p; p=p->next; return q;}
+                matel<T> & operator*() const {return *p;}
+                matel<T> * operator->() const {return p;}
+        };
+        iterator begin() const {return list;}
+        iterator end() const {return NULL;}
+
+	//constructors etc.
+	inline SparseMat() :nn(0),mm(0),symmetric(0),nonzero(0),count(NULL),list(NULL),rowsorted(NULL),colsorted(NULL) {};
+	inline SparseMat(const SPMatindex n, const SPMatindex m) :nn(n),mm(m),symmetric(0),nonzero(0),count(new int(1)),list(NULL),rowsorted(NULL),colsorted(NULL) {};
+	SparseMat(const SparseMat &rhs); //copy constructor
+	inline int getcount() const {return count?*count:0;}
+	explicit SparseMat(const NRMat<T> &rhs); //construct from a dense one
+	explicit SparseMat(const NRSMat<T> &rhs); //construct from a dense symmetric one
+	SparseMat & operator=(const SparseMat &rhs);
+	SparseMat & operator=(const T a);          //assign a to diagonal
+    	SparseMat & operator+=(const T a);         //assign a to diagonal
+	SparseMat & operator-=(const T a);         //assign a to diagonal
+        SparseMat & operator*=(const T a);         //multiply by a scalar
+        SparseMat & operator+=(const SparseMat &rhs);
+	SparseMat & addtriangle(const SparseMat &rhs, const bool lower, const char sign);
+        SparseMat & join(SparseMat &rhs); //more efficient +=, rhs will be emptied
+        SparseMat & operator-=(const SparseMat &rhs);
+	inline const SparseMat operator+(const T &rhs) const {return SparseMat(*this) += rhs;}
+        inline const SparseMat operator-(const T &rhs) const {return SparseMat(*this) -= rhs;}
+        inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;}
+        inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general
+        inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general
+	const NRVec<T> multiplyvector(const NRVec<T> &rhs, const bool transp=0) const; //sparse matrix * dense vector optionally transposed
+	inline const NRVec<T> operator*(const NRVec<T> &rhs) const {return multiplyvector(rhs);} //sparse matrix * dense vector
+	const SparseMat operator*(const SparseMat &rhs) const; 
+        void gemm(const T beta, const SparseMat &a, const char transa, const SparseMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this, if this is symemtric, only half will be added onto it
+	const T dot(const SparseMat &rhs) const; //supervector dot product
+	const T dot(const NRMat<T> &rhs) const; //supervector dot product
+	inline ~SparseMat();
+	void axpy(const T alpha, const SparseMat &x, const bool transp=0); // this+= a*x(transposed)
+	inline matel<T> *getlist() const {return list;}
+	void setlist(matel<T> *l) {list=l;}
+	inline SPMatindex nrows() const {return nn;}
+        inline SPMatindex ncols() const {return mm;}
+	void resize(const SPMatindex n, const SPMatindex m);
+	void transposeme();
+	const SparseMat transpose() const;
+	inline void setsymmetric() {if(nn!=mm) laerror("non-square cannot be symmetric"); symmetric=1;}
+	inline void defineunsymmetric() {symmetric=0;} //just define and do nothing with it
+	void setunsymmetric();//unwind the matrix assuming it was indeed symmetric
+	inline bool issymmetric() const {return symmetric;}
+	unsigned int length() const;
+	void copyonwrite();
+	void simplify();
+	const T trace() const;
+	const T norm(const T scalar=(T)0) const; //is const only mathematically, not in internal implementation - we have to simplify first
+	unsigned int sort(int type) const;
+	inline void add(const SPMatindex n, const SPMatindex m, const T elem) {matel<T> *ltmp= new matel<T>; ltmp->next=list; list=ltmp; list->row=n; list->col=m; list->elem=elem;}
+	void addsafe(const SPMatindex n, const SPMatindex m, const T elem);
+};
+
+template <class T>
+	extern istream& operator>>(istream  &s, SparseMat<T> &x);
+
+template <class T>
+	extern ostream& operator<<(ostream &s, const SparseMat<T> &x);
+
+//destructor
+template <class T>
+SparseMat<T>::~SparseMat()
+{
+	unsort();
+        if(!count) return;
+        if(--(*count)<=0)
+                {
+		deletelist();
+                delete count;
+                }
+}
+
+//copy constructor (sort arrays are not going to be copied)
+template <class T>
+SparseMat<T>::SparseMat(const SparseMat<T> &rhs)
+{
+#ifdef debug
+if(! &rhs) laerror("SparseMat copy constructor with NULL argument");
+#endif
+        nn=rhs.nn;
+        mm=rhs.mm;
+	symmetric=rhs.symmetric;
+	if(rhs.list&&!rhs.count) laerror("some inconsistency in SparseMat contructors or assignments");
+        list=rhs.list;
+        if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared
+	colsorted=rowsorted=NULL;
+	nonzero=0;
+}
+
+template <class T>
+const SparseMat<T> SparseMat<T>::transpose() const
+{
+if(list&&!count) laerror("some inconsistency in SparseMat transpose");
+SparseMat<T> result;
+result.nn=mm;
+result.mm=nn;
+result.symmetric=symmetric;
+if(result.symmetric) 
+	{
+	result.list=list;
+        if(list) {result.count=count; (*result.count)++;} else result.count=new int(1); //make the matrix defined, but empty and not shared
+	}
+else //really transpose it
+	{
+	result.count=new int(1);
+	result.list=NULL;
+	matel<T> *l =list;
+	while(l)
+		{
+		result.add(l->col,l->row,l->elem);
+		l=l->next;
+		}
+	}
+result.colsorted=result.rowsorted=NULL;
+result.nonzero=0;
+return result;
+}
+
+
+
+template<class T>
+inline const SparseMat<T> commutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+SparseMat<T> r;
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1); //saves a temporary and simplifies the whole sum
+return r;
+}
+
+template<class T>
+inline const SparseMat<T> anticommutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+SparseMat<T> r;
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1); //saves a temporary and simplifies the whole sum
+return r;
+}
+
+//add sparse to dense
+template<class T>
+NRMat<T> & NRMat<T>::operator+=(const SparseMat<T> &rhs)
+{
+if(nn!=rhs.nrows()||mm!=rhs.ncols()) laerror("incompatible matrices in +=");
+matel<T> *l=rhs.getlist();
+bool sym=rhs.issymmetric();
+while(l)
+        {
+#ifdef MATPTR
+        v[l->row][l->col] +=l->elem;
+        if(sym && l->row!=l->col) v[l->col][l->row] +=l->elem;
+#else
+        v[l->row*mm+l->col] +=l->elem;
+         if(sym && l->row!=l->col) v[l->col*mm+l->row] +=l->elem;
+#endif
+        l=l->next;
+        }
+}
+
+
--- a/sparsemat_traits.h
+++ b/sparsemat_traits.h
@ -0,0 +1,15 @@
+////////////////////////////////////////////////////////////////////////////
+//traits classes
+
+#ifndef _SPARSEMAT_TRAITS_INCL
+#define _SPARSEMAT_TRAITS_INCL
+
+
+template<> struct NRMat_traits<SparseMat<double> > {
+typedef double elementtype;
+typedef SparseMat<double> producttype;
+static double norm (const SparseMat<double> &x) {return x.norm();}
+static void axpy (SparseMat<double>&s, const SparseMat<double> &x, const double c) {s.axpy(c,x);}
+};
+
+#endif
--- a/strassen.cc
+++ b/strassen.cc
@ -0,0 +1,31 @@
+#include "la.h"
+/*Strassen algorithm*/
+// called routine is fortran-compatible
+extern "C" void fmm(const char c_transa,const char c_transb,const int m,const int n,const int k,const double alpha,
+                const double *a,const int lda,const double *b,const int ldb,const double beta,double *c,const int ldc,
+                double *d_aux,int i_naux);
+extern "C" void strassen_cutoff(int c, int c1, int c2, int c3);
+
+void NRMat<double>::s_cutoff(const int c, const int c1, const int c2, const int c3) const
+{ strassen_cutoff(c,c1,c2,c3);}
+void NRMat<double>::strassen(const double beta, const NRMat<double> &a, const char transa, const NRMat<double> &b, const char transb, const double alpha)
+{
+int l(transa=='n'?a.nn:a.mm);
+int k(transa=='n'?a.mm:a.nn);
+int kk(transb=='n'?b.nn:b.mm);
+int ll(transb=='n'?b.mm:b.nn);
+
+if(l!=nn|| ll!=mm||k!=kk) laerror("incompatible (or undefined size) matrices in strassen");
+
+copyonwrite();
+//swap transpositions and order of matrices
+fmm(transb,transa,mm,nn,k,alpha,b,b.mm, a, a.mm, beta,*this, mm,NULL,0);
+}
+
+//stub for f77 blas called from strassen routine
+extern "C" void xerbla_(const char *msg)
+{
+laerror(msg);
+}
+
+
--- a/t.cc
+++ b/t.cc
@ -0,0 +1,775 @@
+// g++ -D _GLIBCPP_NO_TEMPLATE_EXPORT -g testblas.cc testblas2.cc nrutil_modif.cc -L/usr/local/lib/atlas -lstrassen -lf77blas -lcblas -latlas -ltraceback -lbfd -liberty
+
+#include <time.h>
+#include "la.h"
+#include "traceback.h"
+#include "sparsemat.h"
+#include "matexp.h"
+#include "fourindex.h"
+
+
+extern void test(const NRVec<double> &);
+
+double ad; 
+void f1(const double *c)
+{
+ad=*c;
+}
+
+void f2(double *c)
+{
+*c=ad;
+}
+
+
+inline int randind(const int n)
+{
+return int(random()/(1.+RAND_MAX)*n);
+}
+
+complex<double> mycident (const complex<double>&x) {return x;}
+
+
+int main()
+{
+sigtraceback(SIGSEGV,1);
+sigtraceback(SIGABRT,1);
+sigtraceback(SIGBUS,1);
+sigtraceback(SIGFPE,1);
+NRVec<double> x(1.,10);
+NRVec<double> y(2.,10);
+NRVec<double> z(-2.,10);
+
+y.axpy(3,x);
+
+y+=z;
+/*
+cout <<y;
+NRVec<double> a(x);
+
+NRVec<double> b;
+b|=x;
+
+NRVec<double> c;
+c=a;
+
+y =10. *y  ;
+
+int i;
+for(i=0;i<y.size();i++) cout <<y[i] <<" ";
+cout <<"\n";
+
+cout << y*z <<"\n";
+
+z|=x;
+z[1]=5;
+
+cout <<"zunit= "<<z.unitvector()<<"\n";
+cout <<"z= "<<z<<"\n";
+test(x);
+
+x = x*5;
+
+
+cout <<"x= "<<x<<"\n";
+cout <<"y= "<<y<<"\n";
+
+NRVec<double> u;
+u=x+y;
+cout <<"u= "<<u<<"\n";
+
+NRMat<double> aa(0.,3,3);
+aa[0][0]=aa[1][1]=aa(2,2)=2.;
+
+NRMat<double> bb(aa);
+
+double *p;
+aa.copyonwrite(); p= &aa[2][2];
+*p=3.;
+bb.copyonwrite(); bb(0,2)=1.;
+
+cout << "aa= " <<aa <<"\n";
+cout << "bb= " <<bb <<"\n";
+cout <<"aa trace "<<aa.trace() <<"\n";
+cout << "bbt= " <<bb.transpose() <<"\n";
+NRMat<double> cc=aa & bb;
+cout << "aa o+ bb= " << cc <<"\n";
+cout << cc.rsum() <<"\n";
+cout << cc.csum() <<"\n";
+
+NRVec<double>w(3);
+w[0]=1; w[1]=2;w[2]=3;
+NRVec<double> v(0.,3);
+v.gemv(0.,bb,'n',1.,w);
+cout << " v= " <<v <<"\n";
+v.gemv(0.,bb,'t',1.,w);
+cout << " v= " <<v <<"\n";
+
+*/
+/*
+const int n=6000;
+NRMat<double> bb(1.,n,n);
+for(int i=0;i<n;i++) for(int j=0;j<n;j++) bb[i][j]=2.;
+for(int i=0;i<n;i++) for(int j=0;j<i;j++) {double t; t=bb[i][j] +bb[j][j]; bb[i][j]=t;bb[j][i]=t;}
+*/
+
+/*
+NRMat<double> amat,bmat,cmat;
+cin >>amat;
+cin >>bmat;
+cmat=amat*bmat;
+cout<<cmat;
+cmat.copyonwrite(); cmat[0][0]=0;
+NRMat<double> amat(1.,2,2);
+NRMat<double> bmat(amat);
+NRMat<double> dmat(amat);
+//NRMat<double>  cmat; cmat=bmat*2.;
+NRMat<double>  cmat(bmat*2); //more efficient
+dmat.copyonwrite(); dmat[0][0]=0;
+
+cout<<amat;
+cout<<bmat;
+cout<<cmat;
+cout<<dmat;
+
+
+NRMat<double> amat;
+NRVec<double>  avec;
+
+cin >>amat;
+cin >>avec;
+
+cout << amat*avec;
+cout << avec*amat;
+
+NRVec<double> avec(0.,10);
+
+f1(avec);
+f2(avec);
+
+NRVec<double> uu(3);
+uu[0]=1; uu[1]=2; uu[2]=3;
+cout << uu << (uu|uu) <<"\n";
+
+NRSMat<double> sa(0.,3);
+sa(0,0)=1; sa(0,2)=5; sa(2,2)=10;sa(1,0)=2;sa(1,1)=3; sa(2,1)=-1;
+
+NRSMat<double> sb(0.,3);
+sb(0,0)=-2; sb(0,2)=1; sb(2,2)=2;sb(1,0)=-1;sb(1,1)=7; sb(2,1)=3;
+
+cout << "symetr\n" <<sa << -sa <<"\n";
+cout << "symetr\n" <<sb <<"\n";
+
+cout << "sa*sb\n" << sa*sb <<"\n";
+cout << "sb*sa\n" << sb*sa <<"\n";
+
+NRMat<double> m10(10.,3,3);
+ cout << "10 + sa" << m10 + sa <<"\n";
+*/
+
+/*
+
+const int dim=256;
+NRMat<double> big1(dim,dim),big2(dim,dim),big3;
+for(int i=0;i<dim;i++)
+	for(int j=0;j<dim;j++)
+		{
+		big1[i][j]=i*i+j*j*j-3*j;
+		big2[i][j]=i*i/(j+1)+j*j-3*j;
+		}
+double t=clock()/((double) (CLOCKS_PER_SEC));
+big3= big1*big2;
+cout <<" big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
+
+*/
+
+/*
+NRMat<double> atest, btest,ctest;
+{
+int cc,c1,c2,c3;
+cin >>cc>>c1>>c2>>c3;
+atest.s_cutoff(cc,c1,c2,c3);
+}
+cin>>atest;
+cin>>btest;
+
+NRMat<double> dtest(atest.nrows(),btest.ncols());
+dtest.gemm(0., atest, 't', btest, 'n', 1.);
+cout << dtest;
+
+NRMat<double> etest(atest.nrows(),btest.ncols());
+etest.strassen(0., atest, 't', btest, 'n', 1.);
+cout << etest;
+*/
+
+if(0)
+{
+int dim;
+cin >>dim;
+NRMat<double> big1(dim,dim),big2(dim,dim),big3,big4(dim,dim);
+for(int i=0;i<dim;i++)
+        for(int j=0;j<dim;j++)
+                {
+                big1[i][j]=i*i+j*j*j-3*j;
+                big2[i][j]=i*i/(j+1)+j*j-3*j;
+                }
+double t=clock()/((double) (CLOCKS_PER_SEC));
+big3= big1*big2;
+cout <<" classical big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
+
+for (int c=64; c<=512;c+=64)
+	{
+	big4.s_cutoff(c,c,c,c);
+	t=clock()/((double) (CLOCKS_PER_SEC));
+	big4.strassen(0., big1, 'n', big2, 'n', 1.);
+	cout <<"cutoff "<<c<<" big1*big2 "<<big4[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
+	}
+}
+
+if(0)
+{
+NRMat<double> a(3,3),b;
+NRVec<double> v(3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= i*i+j; v[i]=10-i;}
+b=a;
+b*= sin(1.)+1;
+cout << a <<v;
+a.diagmultl(v);
+cout << a;
+b.diagmultr(v);
+cout << b;
+}
+
+if(0)
+{
+NRMat<double> a(3,3),b;
+NRVec<double> v(10);
+v[0]=2;v[1]=3;v[2]=1;v[3]=-3;v[4]=2;v[5]=-1;v[6]=3;v[7]=-2;v[8]=1;v[9]=1;
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
+cout <<a;
+cout << a.norm() <<"\n";
+b=a*a;
+cout << b.norm() <<"\n";
+cout << exp(a);
+cout << exp(a.norm()) <<"\n";
+cout << ipow(a,3);
+cout<<ipow(a,11);
+cout <<commutator(a,b);
+
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
+NRSMat<double> b(a);
+NRMat<double> c(b);
+cout <<a;
+cout <<b;
+cout <<c;
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+a[0][0]=1; a[0][1]=2;a[0][2]=3;
+a[1][0]=4; a[1][1]=-5;a[1][2]=7;
+a[2][0]=-3;a[2][1]=10;a[2][2]=2;
+NRMat<double> b(2,3);
+b[0][0]=1;b[0][1]=2;b[0][2]=3;
+b[1][0]=2;b[1][1]=4;b[1][2]=6;
+cout <<a;
+cout <<b;
+linear_solve(a,&b);
+cout <<a;
+cout <<b;
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
+NRVec<double> b(3);
+cout <<a;
+diagonalize(a,b);
+cout <<a;
+cout <<b;
+}
+
+if(0)
+{
+NRSMat<double> a(3);
+NRMat<double>v(3,3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a(i,j)= (i+j)/10.; }
+NRVec<double> b(3);
+cout <<a;
+NRMat<double>c=(NRMat<double>)a; //nebo NRMat<double>c(a);
+NRMat<double>d=exp(c);
+diagonalize(a,b,&v);
+cout <<b;
+cout <<v;
+cout <<d;
+diagonalize(d,b);
+cout <<b;
+cout <<d;
+}
+
+if(0)
+{
+NRMat<double> a;
+cin >>a ;
+NRMat<double> b=a.transpose();
+NRMat<double> u(a.nrows(),a.nrows()),v(a.ncols(),a.ncols());
+NRVec<double>s(a.ncols());
+singular_decomposition(a,&u,s,&v);
+//singular_decomposition(a,NULL,s,NULL); //this does not work when linked with static version of lapack, works with .so.3 version (from suse distrib)
+cout <<u;
+cout <<s;
+cout <<v;
+//singular_decomposition(b,&v,s,&u);
+//cout <<v;
+//cout <<s;
+//cout <<u;
+}
+
+if(0)
+{
+//diagonalize a general matrix and reconstruct it back; assume real eigenvalues
+//double aa[]={1,2,3,4,-5,7,-3,10,2};
+//NRMat<double> a(aa,3,3);
+NRMat<double> a;
+cin >>a;
+cout <<a ;
+int n=a.nrows();
+NRMat<double> u(n,n),v(n,n);
+NRVec<double>wr(n),wi(n);
+gdiagonalize(a,wr,wi,&u,&v,0);
+cout <<u;
+cout <<wr;
+cout <<wi;
+cout <<v;
+
+NRVec<double>z=diagofproduct(u,v,1);
+for(int i=0;i<a.nrows();++i) wr[i]/=z[i];//account for normalization of eigenvectors
+u.diagmultl(wr);
+v.transposeme();
+cout <<v*u;
+
+}
+
+if(0)
+{
+//diagonalize a general matrix and reconstruct it back; allow complex eigenvalues
+NRMat<double> a;
+cin >>a;
+cout <<a ; 
+int n=a.nrows();
+NRMat<complex<double> > u(n,n),v(n,n);
+NRVec<complex<double> >w(n);
+gdiagonalize(a,w,&u,&v);
+cout <<u;
+cout <<w;
+cout <<v;
+
+NRVec<complex<double> >z=diagofproduct(u,v,1,1);
+//NRMat<complex<double> > zz=u*v.transpose(1);
+cout <<z;
+//cout <<zz;
+for(int i=0;i<a.nrows();++i) w[i]/=z[i];//account for normalization of eigenvectors
+u.diagmultl(w);
+cout <<v.transpose(1)*u;
+
+}
+
+
+
+if(0)
+{
+SparseMat<double> a(4,4);
+NRVec<double> v(4);
+v[0]=1;v[1]=2;v[2]=3;v[3]=4;
+a=1.;
+a.copyonwrite();
+a.add(3,0,.5);
+a.add(0,2,.2);
+a.add(2,1,.1);
+a.add(3,3,1.);
+a.add(1,1,-1.);
+SparseMat<double> c(a);
+c*=10.;
+cout <<a;
+a.simplify();
+cout <<a;
+cout <<c;
+NRMat<double>b(c);
+cout <<b;
+cout << b*v;
+cout <<c*v;
+cout <<v*b;
+cout <<v*c;
+}
+
+if(0)
+{
+SparseMat<double> a(4,4),b(4,4);
+a=1.;
+a.copyonwrite();
+a.add(3,0,.5);
+b.add(0,2,.2);
+b.add(2,1,.1);
+b.add(3,3,1.);
+b.add(1,1,-1.);
+SparseMat<double>c=a+b;
+cout <<c;
+a.join(b);
+cout<<a;
+cout<<b;
+}
+
+if(0)
+{
+SparseMat<double> a(4,4),b(4,4);
+a=0.; b=2;
+a.add(3,0,.5);
+a.add(0,2,.2);
+a.add(1,1,1);
+a.add(1,0,.2);
+b.add(2,1,.1);
+b.add(3,3,1.);
+b.add(1,1,-1.);
+NRMat<double> aa(a),bb(b);
+SparseMat<double>c;
+NRMat<double>cc;
+//cout << NRMat<double>(c);
+//cout <<cc;
+//cout <<"norms "<<c.norm()<<" " <<cc.norm()<<endl;
+cout <<"original matrix \n"<<aa;
+cout <<(cc=exp(aa));
+c=exp(a);
+cout <<NRMat<double>(c);
+cout <<"norms2 "<<c.norm()<<" " <<cc.norm()<<endl;
+}
+
+#define sparsity (n/4)
+if(0)
+{
+for(int n=8; n<=1024*1024;n+=n)
+	{
+	SparseMat<double> aa(n,n);
+	cout << "\n\n\ntiming for size "<<n<<endl;
+	if(n<=512) {
+	NRMat<double> a(0.,n,n);
+	for(int i=0; i<sparsity;i++) a(randind(n),randind(n))=random()/(1.+RAND_MAX);
+	double t0=clock()/((double) (CLOCKS_PER_SEC));	
+	//cout <<a;
+	NRMat<double> b(exp(a));
+	//cout <<b;
+	cout <<"dense norm "<<b.norm() <<"\n";
+	cout << "test commutator " <<commutator(a,b).norm() <<endl;
+	double t1=clock()/((double) (CLOCKS_PER_SEC));    
+	cout << "dense time " <<n<<' '<< t1-t0 <<endl;
+	aa=SparseMat<double>(a);
+	}
+	else
+	{
+	for(int i=0; i<sparsity;i++) aa.add(randind(n),randind(n),random()/(1.+RAND_MAX));
+	}
+	//cout <<aa;
+	double t2=clock()/((double) (CLOCKS_PER_SEC));        
+	SparseMat<double> bb(exp(aa));
+	//cout <<bb;
+	cout <<"sparse norm "<<bb.norm() <<"\n";
+	cout << "test commutator " <<commutator(aa,bb).norm() <<endl;
+        double t3=clock()/((double) (CLOCKS_PER_SEC));
+	 cout <<"sparse length "<<bb.length()<<"\n";
+        cout << "sparse time "<<n<<' ' << t3-t2 <<endl;
+	}
+}
+
+if(1)
+{
+int n;
+cin>>n;
+	SparseMat<double> aa(n,n);
+	for(int i=0; i<sparsity;i++) aa.add(randind(n),randind(n),random()/(1.+RAND_MAX));
+	SparseMat<double> bb=exp(aa);
+	NRVec<double> v(n);
+	 for(int i=0; i<n;++i) v[i]=random()/(1.+RAND_MAX);
+	NRVec<double> res1=bb*v;
+	NRVec<double> res2=exptimes(aa,v);
+	cout <<"difference = "<<(res1-res2).norm()<<endl;
+}
+
+
+if(0)
+{
+SparseMat<double> a(4,4),b(4,4),d;
+a=0.; b=2;
+a.add(3,0,.5);
+a.add(0,2,.2);
+a.add(1,1,1);
+a.add(1,0,.2);
+b.add(2,1,.1);
+b.add(3,3,1.);
+b.add(1,1,-1.);
+NRMat<double> aa(a),bb(b),dd;
+SparseMat<double>c;
+NRMat<double>cc;
+
+c=commutator(a,b);
+cc=commutator(aa,bb);
+
+cout <<cc;
+cout <<NRMat<double>(c);
+cout <<"norms2 "<<c.norm()<<" " <<cc.norm()<<endl;
+}
+
+/*
+NRVec<double> v(10.,10);
+v+= 5.;
+cout <<v;
+*/
+if(0)
+{
+const int n=3;
+NRMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<i;++j)
+	{
+	a(i,j)= random()/(1.+RAND_MAX);
+	a(j,i)= -a(i,j);
+	}
+NRMat<double> b; b|=a;
+NRVec<double> er(n),ei(n);
+NRMat<double> vr(n,n),vl(n,n);
+gdiagonalize(b,er,ei,&vl,&vr);
+cout <<er<<ei;
+cout <<"left eivec\n"<<vl <<"right eivec\n"<<vr;
+NRMat<double> u=exp(a*.125);
+cout <<"norms "<<u.norm() << ' '<<(u-1.).norm()<<endl;
+gdiagonalize(u,er,ei,&vl,&vr);
+cout <<er<<ei;
+cout <<"left eivec\n"<<vl <<"right eivec\n"<<vr;
+}
+
+if(0)
+{
+/*
+int n;
+cin>>n;
+NRMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<i;++j)
+        {
+        a(i,j)= random()/(1.+RAND_MAX);
+        a(j,i)= -a(i,j);
+        }
+NRMat<double> b=exp(a);
+cout <<a;
+*/
+NRMat<double> a,b;
+cin >>b;
+int n=b.nrows();
+cout <<"difference from identity = "<<b.norm(1.)<<endl;
+
+NRMat<double> x(0.,n,n),x0;
+	double r;
+int i=0;
+do
+	{
+	x0=x;
+	NRMat<double> y=exp(x*-.5);
+	x+= y*b*y; 
+	x-= 1.;
+	x=(x-x.transpose())*.5;
+	cout <<"matrix x\n"<<x;
+	cout <<"iter "<<i <<" residue "<< (r=(exp(x)-b).norm())<<endl;
+	cout <<"iter "<<i <<" conv "<<(r=(x-x0).norm())<<endl;
+	++i;
+	} while(abs(r)>1e-10);
+cout <<"result\n"<<x<<endl;
+cout <<"exp(result)"<<exp(x)<<endl;
+NRMat<double> c=log(b); //matrixfunction(a,&mycident,1);
+cout <<c;
+NRMat<double> d=exp(c);
+cout <<"exp(log(x))\n"<<d;
+cout<<(d-b).norm()<<endl;
+
+}
+
+if(0)
+{
+int n;
+cin>>n;
+NRMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
+        {
+        a(i,j)= .1*random()/(1.+RAND_MAX);
+        a(j,i)= a(i,j);
+        }
+NRMat<double> b=exp(a);
+NRMat<double> s=exp(a*.5);
+NRMat<double> y(0.,n,n);
+NRMat<double> z(0.,n,n);
+        double r;
+int i=0;
+y=b;z=1.;
+cout << "norm = "<<b.norm(1.)<<endl;
+do
+        {
+	NRMat<double> tmp=z*y*-1.+3.;
+	NRMat<double> ynew=y*tmp*.5;
+	z=tmp*z*.5;
+	y=ynew;
+        cout <<"iter "<<i <<" residue "<< (r=(y-s).norm())<<endl;
+        ++i;
+        } while(abs(r)>1e-10);
+}
+
+
+if(0)
+{
+int n=3;
+NRMat<double> a(n,n);
+ a(0,0)=1.;
+        a(0,1)=2.;
+        a(1,0)=2.;
+        a(1,1)=6.;
+a(2,2)=-4;
+a(0,2)=1;
+cout <<a;
+double d;
+NRMat<double> c=inverse(a,&d);
+cout <<a<<c;
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+NRMat<double> b=a;
+for(int i=1; i<4;i++) b=b*b;
+}
+
+if(0)
+{
+NRMat<double> a;
+cin >>a;
+NRMat<double> b=exp(a);
+NRMat<double> c=log(b);
+cout <<a;
+cout <<b;
+cout <<c;
+cout << (b-exp(c)).norm() <<endl;
+}
+
+if(00)
+{
+NRMat<double> a;
+cin >>a;
+NRMat<double> c=log(a); //matrixfunction(a,&mycident,1);
+cout <<c;
+NRMat<double> b=exp(c);
+cout <<"exp(log(x))\n"<<b;
+cout<<(b-a).norm()<<endl;
+}
+
+if(0)
+{
+//check my exponential with respect to spectral decomposition one
+NRSMat<double> a;
+cin >>a;
+NRMat<double> aa(a);
+NRMat<double> b=exp(aa);
+NRMat<double> c=matrixfunction(a,&exp);
+cout <<a;
+cout <<b;
+cout <<c;
+cout << (b-c).norm()/b.norm() <<endl;
+}
+
+if(0)
+{
+//verify BCH expansion
+NRMat<double> h;
+NRMat<double> t;
+cin >>h;
+cin >>t;
+NRMat<double> r1= exp(-t) * h * exp(t);
+NRMat<double> r2=BCHexpansion(h,t,30);
+cout <<r1;
+cout <<r2;
+cout <<"error = "<<(r1-r2).norm()<<endl;
+}
+
+if(0)
+{
+int n;
+cin >>n;
+SparseMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
+        {
+        a.add(i,j,random()/(1.+RAND_MAX));
+        }
+a.setsymmetric();
+NRSMat<double> aa(a); 
+NRMat<double> aaa(a); 
+NRVec<double> w(n);
+NRMat<double> v(n,n);
+//cout <<aa;
+diagonalize(aa, w, &v,0);
+//cout <<w;
+//cout <<v;
+//cout << v*aaa*v.transpose(); 
+cout <<  (v*aaa*v.transpose() - diagonalmatrix(w)).norm()<<endl;
+}
+
+if(0)
+{
+NRMat<complex<double> > a;
+cin >>a;
+NRMat<complex<double> > b=exp(a);
+cout <<b;
+}
+
+if(0)
+{
+int n;
+cin >>n;
+//NRMat<double> a(n,n);
+NRSMat<double> a(n);
+for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
+        {
+        a(j,i)=a(i,j)=random()/(1.+RAND_MAX);
+        }
+cout <<a;
+NRMat<double> y(1,n);
+for(int i=0;i<n;++i) y(0,i)=random()/(1.+RAND_MAX);
+cout <<y;
+linear_solve(a,&y);
+cout << y;
+}
+
+if(0)
+{
+int n;
+cin >>n;
+SparseMat<double> a(n,n);
+int spars=n*n/3;
+        for(int i=0; i<spars;i++) a.add(randind(n),randind(n),random()/(1.+RAND_MAX));
+
+NRMat<double> aa(a);
+NRVec<double> v(aa[0],n*n);
+
+cout <<a;
+cout <<aa;
+cout <<v;
+
+cout <<"test "<<aa.dot(aa)<<endl;
+cout <<"test "<<v*v<<endl;
+cout <<"test "<<a.dot(aa)<<endl;
+cout <<"test "<<a.dot(a)<<endl;
+
+}
+
+}
+
+
+
--- a/vec.cc
+++ b/vec.cc
@ -0,0 +1,348 @@
+#include <iostream>
+#include "vec.h"
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corespoding object file
+#define INSTANTIZE(T) \
+template ostream & operator<<(ostream &s, const NRVec< T > &x); \
+template istream & operator>>(istream  &s, NRVec< T > &x); \
+
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+template NRVec<double>;
+template NRVec< complex<double> >;
+
+
+/*
+ * Templates first, specializations for BLAS next
+ */
+
+// conversion ctor
+#ifndef MATPTR
+template <typename T>
+NRVec<T>::NRVec(const NRMat<T> &rhs)
+{
+	nn = rhs.nn*rhs.mm;
+	v = rhs.v;
+	count = rhs.count;
+	(*count)++;
+}
+#endif
+
+// dtor
+template <typename T>
+NRVec<T>::~NRVec()
+{
+	if(!count) return;
+	if(--(*count) <= 0) {
+		if(v) delete[] (v);
+		delete count;
+	}
+}
+
+// detach from a physical vector and make own copy
+template <typename T>
+void NRVec<T>::copyonwrite()
+{
+#ifdef DEBUG
+  if(!count) laerror("probably an assignment to undefined vector");
+#endif
+  if(*count > 1)
+  {
+    (*count)--;
+    count = new int;
+    *count = 1;
+    T *newv = new T[nn];
+    memcpy(newv, v, nn*sizeof(T));
+    v = newv;
+  }
+}
+
+// Asignment
+template <typename T>
+NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
+{
+  if (this != &rhs)
+  {
+    if(count)
+      if(--(*count) == 0)
+      {
+        delete[] v;
+        delete count;
+      }
+    v = rhs.v;
+    nn = rhs.nn;
+    count = rhs.count;
+    if(count) (*count)++;
+  }
+  return *this;
+}
+
+// Resize
+template <typename T>
+void NRVec<T>::resize(const int n)
+{
+#ifdef DEBUG
+  if(n<=0) laerror("illegal vector dimension");
+#endif
+  if(count)
+    if(*count > 1) {
+      (*count)--;
+      count = 0;
+      v = 0;
+      nn = 0;
+    }
+  if(!count) {
+    count = new int;
+    *count = 1;
+    nn = n;
+    v = new T[nn];
+    return;
+  }
+  // *count = 1 in this branch
+  if (n != nn) {
+    nn = n;
+    delete[] v;
+    v = new T[nn];
+  }
+}
+
+// ostream << NRVec
+template <typename T>
+ostream & operator<<(ostream &s, const NRVec<T> &x)
+{
+  int i, n;
+
+  n = x.size();
+  s << n << endl;
+  for(i=0; i<n; i++) s << x[i] << (i == n-1 ? '\n' : ' ');
+  return s;
+}
+
+// istream >> NRVec
+template <typename T>
+istream & operator>>(istream &s, NRVec<T> &x)
+{
+  int i,n;
+
+  s >> n;
+  x.resize(n);
+  for(i=0; i<n; i++) s >> x[i];
+  return s;
+}
+
+// formatted print for NRVec
+template<typename T>
+void NRVec<T>::fprintf(FILE *file, const char *format, const int modulo) const
+{
+	lawritemat(file, v, 1, nn, format, 1, modulo, 0);
+}
+
+// formatted scan for NRVec
+template <class T>
+void NRVec<T>::fscanf(FILE *f, const char *format)
+{
+	int n;
+
+	if(std::fscanf(f, "%d", &n) != 1) laerror("cannot read vector dimension");
+	resize(n);
+	for (int i=0; i<n; i++)
+		if (std::fscanf(f, format, v+i) != 1)
+			laerror("cannot read the vector eleemnt");
+}
+
+// assignmet with a physical copy
+template <typename T>
+NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
+{
+	if (this != &rhs) {
+#ifdef DEBUG
+		if (!rhs.v) laerror("unallocated rhs in NRVec operator |=");
+#endif
+		if (count)
+			if (*count > 1) {
+				--(*count);
+				nn = 0;
+				count = 0;
+				v = 0;
+			}
+		if (nn != rhs.nn) {
+			if (v) delete[] (v);
+			nn = rhs.nn;
+		}
+		if(!v) v = new T[nn];
+		if(!count) count = new int;
+		*count = 1;
+		memcpy(v, rhs.v, nn*sizeof(T));
+	}
+	return *this;
+}
+
+// unary minus
+template <typename T>
+const NRVec<T> NRVec<T>::operator-() const
+{
+	NRVec<T> result(nn);
+	for (int i=0; i<nn; i++) result.v[i]= -v[i];
+	return result;
+}
+
+// axpy call for T = double (not strided)
+void NRVec<double>::axpy(const double alpha, const NRVec<double> &x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn, alpha, x.v, 1, v, 1);
+}
+
+// axpy call for T = complex<double> (not strided)
+void NRVec< complex<double> >::axpy(const complex<double> alpha, 
+			const NRVec< complex<double> > &x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&alpha), (void *)(x.v), 1, (void *)v, 1);
+}
+
+// axpy call for T = double (strided)
+void NRVec<double>::axpy(const double alpha, const double *x, const int stride)
+{
+	copyonwrite();
+	cblas_daxpy(nn, alpha, x, stride, v, 1);
+}
+
+// axpy call for T = complex<double> (strided)
+void NRVec< complex<double> >::axpy(const complex<double> alpha, 
+			const complex<double> *x, const int stride)
+{
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&alpha), (void *)x, stride, v, 1);
+}
+
+// unary minus
+const NRVec<double> NRVec<double>::operator-() const
+{
+	NRVec<double> result(*this);
+	result.copyonwrite();
+	cblas_dscal(nn, -1.0, result.v, 1);
+	return result;
+}
+const NRVec< complex<double> > 
+NRVec< complex<double> >::operator-() const
+{
+	NRVec< complex<double> > result(*this);
+	result.copyonwrite();
+	cblas_zdscal(nn, -1.0, (void *)(result.v), 1);
+	return result;
+}
+
+// assignment of scalar to every element
+template <typename T>
+NRVec<T> & NRVec<T>::operator=(const T &a)
+{
+	copyonwrite();
+	if(a != (T)0)
+		for (int i=0; i<nn; i++) v[i] = a;
+	else
+		memset(v, 0, nn*sizeof(T));
+	return *this;
+}
+
+// Normalization of NRVec<double>
+NRVec<double> & NRVec<double>::normalize()
+{
+	double tmp;
+
+	tmp = cblas_dnrm2(nn, v, 1);
+#ifdef DEBUG
+	if(!tmp) laerror("normalization of zero vector");
+#endif
+	copyonwrite();
+	tmp = 1.0/tmp;
+	cblas_dscal(nn, tmp, v, 1);
+	return *this;
+}
+
+// Normalization of NRVec< complex<double> >
+NRVec< complex<double> > & NRVec< complex<double> >::normalize()
+{
+	complex<double> tmp;
+	tmp = cblas_dznrm2(nn, (void *)v, 1);
+#ifdef DEBUG
+	if(!(tmp.real()) && !(tmp.imag())) laerror("normalization of zero vector");
+#endif
+	copyonwrite();
+	tmp = 1.0/tmp;
+	cblas_zscal(nn, (void *)(&tmp), (void *)v, 1);
+	return *this;
+}
+
+// gemv call 
+void NRVec<double>::gemv(const double beta, const NRMat<double> &A, 
+		const char trans, const double alpha, const NRVec &x)
+{
+#ifdef DEBUG
+	if ((trans == 'n'?A.ncols():A.nrows()) != x.size())
+		laerror("incompatible sizes in gemv A*x");
+#endif
+	cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans),
+			A.nrows(), A.ncols(), alpha, A[0], A.ncols(), x.v, 1, beta, v, 1);
+}
+void NRVec< complex<double> >::gemv(const complex<double> beta,
+		const NRMat< complex<double> > &A, const char trans, 
+		const complex<double> alpha, const NRVec &x)
+{
+#ifdef DEBUG
+	if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) 
+		laerror("incompatible sizes in gemv A*x");
+#endif
+	cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), 
+			A.nrows(), A.ncols(), (void *)(&alpha), (void *)A[0], A.ncols(), 
+			(void *)x.v, 1, (void *)(&beta), (void *)v, 1);
+}
+
+// Vec * Mat
+const NRVec<double> NRVec<double>::operator*(const NRMat<double> &mat) const
+{
+#ifdef DEBUG
+	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
+#endif
+	int n = mat.ncols();
+	NRVec<double> result(n);
+	cblas_dgemv(CblasRowMajor, CblasTrans, nn, n, 1.0, mat[0], n, v, 1,
+			0.0, result.v, 1);
+	return result;
+}
+const NRVec< complex<double> > 
+NRVec< complex<double> >::operator*(const NRMat< complex<double> > &mat) const
+{
+#ifdef DEBUG
+	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
+#endif
+	int n = mat.ncols();
+	NRVec< complex<double> > result(n);
+	cblas_zgemv(CblasRowMajor, CblasTrans, nn, n, &CONE, mat[0], n, v, 1,
+			&CZERO, result.v, 1);
+	return result;
+}
+
+// Direc product Mat = Vec | Vec
+const NRMat<double> NRVec<double>::operator|(const NRVec<double> &b) const
+{
+	NRMat<double> result(0.,nn,b.nn);
+	cblas_dger(CblasRowMajor, nn, b.nn, 1., v, 1, b.v, 1, result, b.nn);
+	return result;
+}
+const NRMat< complex<double> > 
+NRVec< complex<double> >::operator|(const NRVec< complex<double> > &b) const
+{
+	NRMat< complex<double> > result(0.,nn,b.nn);
+	cblas_zgerc(CblasRowMajor, nn, b.nn, &CONE, v, 1, b.v, 1, result, b.nn);
+	return result;
+}
+
+
--- a/vec.h
+++ b/vec.h
@ -0,0 +1,380 @@
+#ifndef _LA_VEC_H_
+#define _LA_VEC_H_
+
+extern "C" {
+#include "cblas.h"
+}
+#include <stdio.h>
+#include <complex>
+#include <string.h>
+#include <iostream>
+
+using namespace std;
+
+template <typename T> class NRVec;
+template <typename T> class NRSMat;
+template <typename T> class NRMat;
+template <typename T> class SparseMat;
+
+//////////////////////////////////////////////////////////////////////////////
+// Forward declarations
+void laerror(const char *s1=0, const char *s2=0, const char *s3=0, const char *s4=0);
+template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
+		const char *form0,int nodim,int modulo, int issym);
+
+// Memory allocated constants for cblas routines
+const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
+
+// Macros to construct binary operators +,-,*, from +=, -=, *=
+// for 3 cases: X + a, a + X, X + Y
+#define NRVECMAT_OPER(E,X) \
+template<class T> \
+	inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
+{ return NR##E(*this) X##= a; } \
+	\
+	template<class T> \
+	inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
+{ return NR##E<T>(rhs) X##= a; }
+
+#define NRVECMAT_OPER2(E,X) \
+template<class T> \
+	inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
+{ return NR##E(*this) X##= a; }
+
+#include "smat.h"
+#include "mat.h"
+
+// NRVec class
+template <typename T>
+class NRVec {
+protected:
+	int nn;
+	T *v;
+	int *count;
+public:
+	friend class NRSMat<T>;
+	friend class NRMat<T>;
+
+	inline NRVec(): nn(0),v(0),count(0){};
+	inline explicit NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {};
+	inline NRVec(const T &a, const int n);
+	inline NRVec(const T *a, const int n);
+	inline NRVec(const NRVec &rhs);
+	inline explicit NRVec(const NRSMat<T> & S);
+#ifndef MATPTR
+	explicit NRVec(const NRMat<T> &rhs);
+#endif
+	NRVec & operator=(const NRVec &rhs);
+	NRVec & operator=(const T &a);  //assign a to every element
+	NRVec & operator|=(const NRVec &rhs);
+	const NRVec operator-() const;
+	inline NRVec & operator+=(const NRVec &rhs);
+	inline NRVec & operator-=(const NRVec &rhs);
+	inline NRVec & operator+=(const T &a);
+	inline NRVec & operator-=(const T &a);
+	inline NRVec & operator*=(const T &a);
+	inline int getcount() const {return count?*count:0;}
+	inline const NRVec operator+(const NRVec &rhs) const;
+	inline const NRVec operator-(const NRVec &rhs) const;
+	inline const NRVec operator+(const T &a) const;
+	inline const NRVec operator-(const T &a) const;
+	inline const NRVec operator*(const T &a) const;
+	inline const T operator*(const NRVec &rhs) const; //scalar product -> ddot
+	inline const NRVec operator*(const NRSMat<T> & S) const;
+	const NRVec operator*(const NRMat<T> &mat) const;
+	const NRMat<T> operator|(const NRVec<T> &rhs) const;
+	inline const T sum() const; //sum of its elements
+	inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector
+	inline T & operator[](const int i);
+	inline const T & operator[](const int i) const;
+	inline int size() const;
+	inline operator T*(); //get a pointer to the data
+	inline operator const T*() const; //get a pointer to the data
+	~NRVec();
+	void axpy(const T alpha, const NRVec &x); // this+= a*x
+	void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x
+	void gemv(const T beta, const NRMat<T> &a, const char trans, 
+			const T alpha, const NRVec &x);
+	void copyonwrite();
+	void resize(const int n);
+	NRVec & normalize();
+	inline const double norm() const;
+	inline const T amax() const;
+	inline const NRVec unitvector() const;
+	void fprintf(FILE *f, const char *format, const int modulo) const;
+	void fscanf(FILE *f, const char *format);
+//sparse matrix concerning members
+	explicit NRVec(const SparseMat<T> &rhs);                // dense from sparse matrix with one of dimensions =1
+	const NRVec operator*(const SparseMat<T> &mat) const; //vector*matrix
+	inline void simplify() {}; //just for compatibility with sparse ones
+	void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x);
+};
+
+template <typename T> ostream & operator<<(ostream &s, const NRVec<T> &x);
+template <typename T> istream & operator>>(istream  &s, NRVec<T> &x);
+
+// INLINES
+
+// ctors
+template <typename T>
+inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new int)
+{
+	*count = 1;
+	if(a != (T)0)
+		for(int i=0; i<n; i++)
+			v[i] = a;
+	else
+		memset(v, 0, nn*sizeof(T));
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), v(new T[n]), count(new int)
+{
+	*count = 1;
+	memcpy(v, a, n*sizeof(T));
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(const NRVec<T> &rhs)
+{
+	v = rhs.v;
+	nn = rhs.nn;
+	count = rhs.count;
+	if(count) (*count)++;
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
+{
+	nn = rhs.nn;
+	nn = NN2;
+	v = rhs.v;
+	count = rhs.count;
+	(*count)++;
+}
+
+// x += a
+inline NRVec<double> & NRVec<double>::operator+=(const double &a)
+{
+	copyonwrite();
+	cblas_daxpy(nn, 1.0, &a, 0, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator+=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CONE), (void *)(&a), 0, (void *)v, 1);
+	return *this;
+}
+
+// x -= a
+inline NRVec<double> & NRVec<double>::operator-=(const double &a)
+{
+	copyonwrite();
+	cblas_daxpy(nn, 1.0, &a, 0, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator-=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CMONE), (void *)(&a), 0, (void *)v, 1);
+	return *this;
+}
+
+// x += x
+inline NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CONE), rhs.v, 1, v, 1);
+	return *this;
+}
+
+// x -= x
+inline NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CMONE), (void *)rhs.v, 1, (void *)v, 1);
+	return *this;
+}
+
+// x *= a
+inline NRVec<double> & NRVec<double>::operator*=(const double &a)
+{
+	copyonwrite();
+	cblas_dscal(nn, a, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator*=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zscal(nn, (void *)(&a), (void *)v, 1);
+	return *this;
+}
+
+// scalar product x.y
+inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
+#endif
+	return cblas_ddot(nn, v, 1, rhs.v, 1);
+}
+inline const complex<double>
+NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
+#endif
+	complex<double> dot;
+	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
+	return dot;
+}
+
+// Vec * SMat = SMat * Vec
+template <typename T>
+inline const NRVec<T> NRVec<T>::operator*(const NRSMat<T> & S) const
+{
+	return S * (*this);
+}
+
+// Sum of elements
+inline const double NRVec<double>::sum() const
+{
+	return cblas_dasum(nn, v, 1);
+}
+inline const complex<double>
+NRVec< complex<double> >::sum() const
+{
+	complex<double> sum = CZERO;
+	for (int i=0; i<nn; i++) sum += v[i];
+	return sum;
+}
+
+// Dot product: x * y
+inline const double NRVec<double>::dot(const double *y, const int stride) const
+{
+	return cblas_ddot(nn, y, stride, v, 1);
+}
+inline const complex<double>
+NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
+{
+	complex<double> dot;
+	cblas_zdotc_sub(nn, y, stride, v, 1, (void *)(&dot));
+	return dot;
+}
+
+// x[i] returns i-th element
+template <typename T>
+inline T & NRVec<T>::operator[](const int i)
+{
+#ifdef DEBUG
+	if(*count != 1) laerror("possible lval [] with count > 1");
+	if(i < 0 || i >= nn) laerror("NRVec out of range");
+	if(!v) laerror("[] on unallocated NRVec");
+#endif
+	return v[i];
+}
+template <typename T>
+inline const T & NRVec<T>::operator[](const int i) const
+{
+#ifdef DEBUG
+	if(i < 0 || i >= nn) laerror("NRVec out of range");
+	if(!v) laerror("[] on unallocated NRVec");
+#endif
+	return v[i];
+}
+
+// length of the vector
+template <typename T>
+inline int NRVec<T>::size() const
+{
+	return nn;
+}
+
+// reference Vec to the first element
+template <typename T>
+inline NRVec<T>::operator T*()
+{
+#ifdef DEBUG
+	if(!v) laerror("unallocated NRVec in operator T*");
+#endif
+	return v;
+}
+template <typename T>
+inline NRVec<T>::operator const T*() const
+{
+#ifdef DEBUG
+	if(!v) laerror("unallocated NRVec in operator T*");
+#endif
+	return v;
+}
+
+// return norm of the Vec
+inline const double  NRVec<double>::norm() const
+{
+	return cblas_dnrm2(nn, v, 1);
+}
+inline const double NRVec< complex<double> >::norm() const
+{
+	return cblas_dznrm2(nn, (void *)v, 1);
+}
+
+// Max element of the array
+inline const double  NRVec<double>::amax() const
+{
+	return v[cblas_idamax(nn, v, 1)];
+}
+inline const complex<double> NRVec< complex<double> >::amax() const
+{
+	return v[cblas_izamax(nn, (void *)v, 1)];
+}
+
+
+// Make Vec unitvector
+template <typename T>
+inline const NRVec<T> NRVec<T>::unitvector() const
+{
+	return NRVec<T>(*this).normalize();
+}
+
+// generate operators: Vec + a, a + Vec, Vec * a
+NRVECMAT_OPER(Vec,+)
+NRVECMAT_OPER(Vec,-)
+NRVECMAT_OPER(Vec,*)
+// generate operators: Vec + Vec, Vec - Vec
+NRVECMAT_OPER2(Vec,+)
+NRVECMAT_OPER2(Vec,-)
+
+// Few forward declarations
+
+
+#endif /* _LA_VEC_H_ */