From d7b55e98464705be89312a2d0bf39ccab92d5fa3 Mon Sep 17 00:00:00 2001
From: jiri <jiri>
Date: Wed, 17 Mar 2004 03:07:21 +0000
Subject: [PATCH] *** empty log message ***

---
 .gitignore         |   28 ++
 fourindex.h        |  261 +++++++++++
 la.h               |    9 +
 la_traits.h        |   40 ++
 mat.cc             |  844 ++++++++++++++++++++++++++++++++++
 mat.h              |  346 ++++++++++++++
 matexp.h           |  259 +++++++++++
 nonclass.cc        |  524 +++++++++++++++++++++
 nonclass.h         |   85 ++++
 smat.cc            |  399 ++++++++++++++++
 smat.h             |  303 ++++++++++++
 sparsemat.cc       | 1088 ++++++++++++++++++++++++++++++++++++++++++++
 sparsemat.h        |  220 +++++++++
 sparsemat_traits.h |   15 +
 strassen.cc        |   31 ++
 t.cc               |  775 +++++++++++++++++++++++++++++++
 vec.cc             |  348 ++++++++++++++
 vec.h              |  380 ++++++++++++++++
 18 files changed, 5955 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 fourindex.h
 create mode 100644 la.h
 create mode 100644 la_traits.h
 create mode 100644 mat.cc
 create mode 100644 mat.h
 create mode 100644 matexp.h
 create mode 100644 nonclass.cc
 create mode 100644 nonclass.h
 create mode 100644 smat.cc
 create mode 100644 smat.h
 create mode 100644 sparsemat.cc
 create mode 100644 sparsemat.h
 create mode 100644 sparsemat_traits.h
 create mode 100644 strassen.cc
 create mode 100644 t.cc
 create mode 100644 vec.cc
 create mode 100644 vec.h
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..da8168b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+# CVS default ignores begin
+tags
+TAGS
+.make.state
+.nse_depinfo
+*~
+\#*
+.#*
+,*
+_$*
+*$
+*.old
+*.bak
+*.BAK
+*.orig
+*.rej
+.del-*
+*.a
+*.olb
+*.o
+*.obj
+*.so
+*.exe
+*.Z
+*.elc
+*.ln
+core
+# CVS default ignores end
diff --git a/fourindex.h b/fourindex.h
new file mode 100644
index 0000000..823dbbc
--- /dev/null
+++ b/fourindex.h
@@ -0,0 +1,261 @@
+#ifndef _fourindex_included
+#define _fourindex_included
+
+//element of a linked list, indices in a portable way, no bit shifts and endianity problems any more!
+
+
+template<class I, class T>
+struct matel4
+        {
+        T elem;
+        matel4 *next;
+	typedef union {
+		I packed[4];
+		struct {
+			I i;
+			I j;
+			I k;	
+			I l;
+			} indiv;
+		} packedindex;
+	packedindex index;
+        };
+
+typedef enum {nosymmetry=0, twoelectronreal=1, twoelectroncomplex=2, twobodyantisym=3} fourindexsymtype; //if twoelectron, only permutation-nonequivalent elements are stored
+
+template <class I, class T>
+class fourindex {
+protected:
+	I nn;
+	fourindexsymtype symmetry;
+        int *count;
+	matel4<I,T> *list;
+private:
+	void deletelist();
+	void copylist(const matel4<I,T> *l);
+public:
+	//iterator
+	typedef class iterator {
+	private:
+		matel4<I,T> *p;
+	public:
+		iterator() {};
+		~iterator() {};
+		iterator(matel4<I,T> *list): p(list) {};
+		bool operator==(const iterator rhs) const {return p==rhs.p;}
+		bool operator!=(const iterator rhs) const {return p!=rhs.p;}
+		iterator operator++() {return p=p->next;}
+		iterator operator++(int) {matel4<I,T> *q=p; p=p->next; return q;}
+		matel4<I,T> & operator*() const {return *p;}
+		matel4<I,T> * operator->() const {return p;}
+	};
+	iterator begin() const {return list;}
+	iterator end() const {return NULL;}
+
+	//constructors etc.
+	inline fourindex() :nn(0),count(NULL),list(NULL) {};
+	inline fourindex(const I n) :nn(n),count(new int(1)),list(NULL) {};
+	fourindex(const fourindex &rhs); //copy constructor
+	inline int getcount() const {return count?*count:0;}
+	fourindex & operator=(const fourindex &rhs);
+        fourindex & operator+=(const fourindex &rhs);
+	inline void setsymmetry(fourindexsymtype s) {symmetry=s;}
+        fourindex & join(fourindex &rhs); //more efficient +=, rhs will be emptied
+	inline ~fourindex();
+	inline matel4<I,T> *getlist() const {return list;}
+	inline I size() const {return nn;}
+	void resize(const I n);
+	void copyonwrite();
+	int length() const;
+	inline void add(const I i, const I j, const I k, const I l, const T elem) 
+		{matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; list->index.indiv.i=i;list->index.indiv.j=j;list->index.indiv.k=k;list->index.indiv.l=l; list->elem=elem;}
+
+	inline void add(const typename matel4<I,T>::packedindex &index , const T elem) 
+                {matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; list->index=index; list->elem=elem;}
+	
+	inline void add(const I (&index)[4], const T elem)
+                {matel4<I,T> *ltmp= new matel4<I,T>; ltmp->next=list; list=ltmp; memcpy(&list->index.packed, &index, sizeof(typename matel4<I,T>::packedindex)); list->elem=elem;}
+
+		
+
+};
+
+
+//destructor
+template <class I,class T>
+fourindex<I,T>::~fourindex()
+{
+        if(!count) return;
+        if(--(*count)<=0)
+                {
+		deletelist();
+                delete count;
+                }
+}
+
+//copy constructor (sort arrays are not going to be copied)
+template <class I, class T>
+fourindex<I,T>::fourindex(const fourindex<I,T> &rhs)
+{
+#ifdef debug
+if(! &rhs) laerror("fourindex copy constructor with NULL argument");
+#endif
+        nn=rhs.nn;
+	if(rhs.list&&!rhs.count) laerror("some inconsistency in fourindex contructors or assignments");
+        list=rhs.list;
+        if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared
+}
+
+
+
+//assignment operator
+template <class I, class T>
+fourindex<I,T> & fourindex<I,T>::operator=(const fourindex<I,T> &rhs)
+{
+        if (this != &rhs)
+                {
+                if(count)
+                    if(--(*count) ==0) {deletelist(); delete count;} // old stuff obsolete
+                list=rhs.list;
+                nn=rhs.nn; 
+                if(list) count=rhs.count; else count= new int(0); //make the matrix defined, but empty and not shared, count will be incremented below
+                if(count) (*count)++;
+                }
+        return *this;
+}
+
+
+template <class I, class T>
+fourindex<I,T> & fourindex<I,T>::operator+=(const fourindex<I,T> &rhs)
+{
+if(nn!=rhs.nn) laerror("incompatible dimensions for +=");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+register matel4<I,T> *l=rhs.list;
+while(l)
+        {
+        add( l->index,l->elem);
+        l=l->next;
+        }
+return *this;
+}
+
+template <class I, class T>
+fourindex<I,T> & fourindex<I,T>::join(fourindex<I,T> &rhs)
+{
+if(nn!=rhs.nn) laerror("incompatible dimensions for join");
+if(*rhs.count!=1) laerror("shared rhs in join()");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+matel4<I,T> **last=&list;
+while(*last) last= &((*last)->next);
+*last=rhs.list;
+rhs.list=NULL;
+return *this;
+}
+
+template <class I, class T>
+void fourindex<I,T>::resize(const I n)
+{
+        if(n<=0 ) laerror("illegal fourindex dimension");
+	if(count)
+                {
+                if(*count > 1) {(*count)--; count=NULL; list=NULL;} //detach from previous
+                else if(*count==1) deletelist();
+                }
+        nn=n;
+        count=new int(1); //empty but defined matrix
+        list=NULL;
+}
+
+
+template <class I, class T>
+void fourindex<I,T>::deletelist()
+{
+if(*count >1) laerror("trying to delete shared list");
+matel4<I,T> *l=list;
+while(l)
+        {
+        matel4<I,T> *ltmp=l;
+        l=l->next;
+        delete ltmp;
+        }
+list=NULL;
+delete count;
+count=NULL;
+}
+
+template <class I, class T>
+void fourindex<I,T>::copylist(const matel4<I,T> *l)
+{
+list=NULL;
+while(l)
+        {
+        add(l->index,l->elem);
+        l=l->next;
+        }
+}
+
+template <class I, class T>
+void fourindex<I,T>::copyonwrite()
+{
+        if(!count) laerror("probably an assignment to undefined fourindex");
+        if(*count > 1)
+                {
+                (*count)--;
+                count = new int; *count=1;
+                if(!list) laerror("empty list with count>1");
+                copylist(list);
+                }
+}
+
+template <class I, class T>
+int fourindex<I,T>::length() const
+{
+int n=0;
+matel4<I,T> *l=list;
+while(l)
+	{
+	++n;
+	l=l->next;
+	}
+return n;
+}
+
+
+template <class I, class T>
+ostream& operator<<(ostream &s, const fourindex<I,T> &x)
+                {
+                int n;
+                n=x.size();
+                s << n << '\n';
+                typename fourindex<I,T>::iterator it=x.begin();
+                while(it!=x.end())
+                        {
+                        s << (int)it->index.indiv.i << ' ' << (int)it->index.indiv.j<<  ' ' <<(int)it->index.indiv.k << ' ' << (int)it->index.indiv.l  << ' ' << it->elem << '\n';
+			++it;
+                        }
+                s << "-1 -1 -1 -1\n";
+                return s;
+                }
+
+template <class I, class T>
+istream& operator>>(istream  &s, fourindex<I,T> &x)
+                {
+                int i,j,k,l;
+		T elem;
+		int n;
+                s >> n ;
+                x.resize(n);
+                s >> i >> j >>k >>l;
+                while(i>=0 && j>=0 &&k>=0 &&l>=0)
+                        {
+			s>>elem;
+			x.add(i,j,k,l,elem);
+			s >> i >> j >>k >>ll;
+                        }
+                return s;
+                }
+
+
+#endif /*_fourindex_included*/
diff --git a/la.h b/la.h
new file mode 100644
index 0000000..a408c64
--- /dev/null
+++ b/la.h
@@ -0,0 +1,9 @@
+#ifndef _LA_H_
+#define _LA_H_
+
+#include "vec.h"
+#include "smat.h"
+#include "mat.h"
+#include "nonclass.h"
+
+#endif /* _LA_H_ */
diff --git a/la_traits.h b/la_traits.h
new file mode 100644
index 0000000..6f35b50
--- /dev/null
+++ b/la_traits.h
@@ -0,0 +1,40 @@
+////////////////////////////////////////////////////////////////////////////
+//traits classes
+
+#ifndef _LA_TRAITS_INCL
+#define _LA_TRAITS_INCL
+
+//default one, good for numbers
+template<class C> struct NRMat_traits {
+typedef C elementtype;
+typedef C producttype;
+static C norm (const  C &x) {return abs(x);}
+static void axpy (C &s, const C &x, const C &c) {s+=x*c;}
+};
+
+//specializations
+template<> struct NRMat_traits<NRMat<double> > {
+typedef double elementtype;
+typedef NRMat<double> producttype;
+static double norm (const NRMat<double> &x) {return x.norm();}
+static void axpy (NRMat<double>&s, const NRMat<double> &x, const double c) {s.axpy(c,x);}
+};
+
+template<> struct NRMat_traits<NRSMat<double> > {
+typedef double elementtype;
+typedef NRMat<double> producttype;
+static const double norm (const NRSMat<double> &x) {return x.norm(0.);}
+static void axpy (NRSMat<double>&s, const NRSMat<double> &x, const double c) {s.axpy(c,x);}
+};
+
+
+template<> struct NRMat_traits<NRMat<complex<double> > > {
+typedef complex<double> elementtype;
+typedef NRMat<complex<double> > producttype;
+static double norm (const NRMat<complex<double> >  &x) {return x.norm();}
+static void axpy (NRMat<complex<double> >&s, const NRMat<complex<double> > &x, const complex<double> c) {s.axpy(c,x);}
+};
+
+
+
+#endif
diff --git a/mat.cc b/mat.cc
new file mode 100644
index 0000000..10ec8c6
--- /dev/null
+++ b/mat.cc
@@ -0,0 +1,844 @@
+#include "mat.h"
+// TODO :
+//
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corresponding object file
+template NRMat<double>;
+template NRMat< complex<double> >;
+
+
+/*
+ *  Templates first, specializations for BLAS next
+ */
+
+// dtor
+template <typename T>
+NRMat<T>::~NRMat()
+{
+	if (!count) return;
+	if (--(*count) <= 0) {
+		if (v) {
+#ifdef MATPTR
+			delete[] (v[0]);
+#endif
+			delete[] v;
+		}
+		delete count;
+	}
+}
+
+// assign NRMat = NRMat
+template <typename T>
+NRMat<T> & NRMat<T>::operator=(const NRMat<T> &rhs)
+{
+	if (this == &rhs) return *this;
+	if (count) {
+		if (--(*count) ==0 ) {
+#ifdef MATPTR
+			delete[] (v[0]);
+#endif
+			delete[] v;
+			delete count;
+		}
+		v = rhs.v;
+		nn = rhs.nn;
+		mm = rhs.mm;
+		count = rhs.count;
+		if (count) (*count)--;
+	}
+	return *this;
+}
+
+// Assign diagonal
+template <typename T>
+NRMat<T> & NRMat<T>::operator=(const T &a)
+{
+	copyonwrite();
+#ifdef DEBUG
+	if (nn != mm) laerror("RMat.operator=scalar on non-square matrix");
+#endif
+#ifdef MATPTR
+	 for (int i=0; i< nn; i++) v[i][i] = a;
+#else
+	 for (int i=0; i< nn*nn; i+=nn+1) v[i] = a;
+#endif
+	 return *this;
+}
+
+// Explicit deep copy of NRmat
+template <typename T>
+NRMat<T> & NRMat<T>::operator|=(const NRMat<T> &rhs)
+{
+	if (this == &rhs) return *this;
+#ifdef DEBUG
+	if (!rhs.v) laerror("unallocated rhs in Mat operator |=");
+#endif
+	if (count)
+		if (*count > 1) {
+			--(*count);
+			nn = 0;
+			mm = 0;
+			count = 0;
+			v = 0;
+		}
+	if (nn != rhs.nn || mm != rhs.mm) {
+		if (v) {
+#ifdef MATPTR
+			delete[] (v[0]);
+#endif
+			delete[] (v);
+			v = 0;
+		}
+		nn = rhs.nn;
+		mm = rhs.mm;
+	}
+	if (!v) {
+#ifdef MATPTR
+		v = new T*[nn];
+		v[0] = new T[mm*nn];
+#else
+		v = new T[mm*nn];
+#endif
+	}
+
+#ifdef MATPTR
+	for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
+	memcpy(v[0], rhs.v[0], nn*mm*sizeof(T));
+#else
+	memcpy(v, rhs.v, nn*mm*sizeof(T));
+#endif
+
+	if (!count) count = new int;
+	*count = 1;
+
+	return *this;
+}
+
+// M += a
+template <typename T>
+NRMat<T> & NRMat<T>::operator+=(const T &a)
+{
+	copyonwrite();
+#ifdef DEBUG
+	if (nn != mm) laerror("Mat.operator+=scalar on non-square matrix");
+#endif
+#ifdef MATPTR
+	for (int i=0; i< nn; i++) v[i][i] += a;
+#else
+	for (int i=0; i< nn*nn; i+=nn+1) v[i] += a;
+#endif
+	return *this;
+}
+
+// M -= a
+template <typename T>
+NRMat<T> & NRMat<T>::operator-=(const T &a)
+{
+	copyonwrite();
+#ifdef DEBUG
+	if (nn != mm) laerror("Mat.operator-=scalar on non-square matrix");
+#endif
+#ifdef MATPTR
+	for (int i=0; i< nn; i++) v[i][i] -= a;
+#else
+	for (int i=0; i< nn*nn; i+=nn+1) v[i] -= a;
+#endif
+	return *this;
+}
+
+// unary minus
+template <typename T>
+const NRMat<T> NRMat<T>::operator-() const
+{
+	NRMat<T> result(nn, mm);
+#ifdef MATPTR
+	for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
+#else
+	for (int i=0; i<nn*mm; i++) result.v[i]= -v[i];
+#endif
+	return result;
+}
+
+// direct sum
+template <typename T>
+const NRMat<T> NRMat<T>::operator&(const NRMat<T> & b) const
+{
+	NRMat<T> result((T)0, nn+b.nn, mm+b.mm);
+	for (int i=0; i<nn; i++) memcpy(result[i], (*this)[i], sizeof(T)*mm);
+	for (int i=0; i<b.nn; i++) memcpy(result[nn+i]+nn, b[i], sizeof(T)*b.mm);
+	return result;
+}
+
+// direct product 
+template <typename T>
+const NRMat<T> NRMat<T>::operator|(const NRMat<T> &b) const
+{
+	NRMat<T> result(nn*b.nn, mm*b.mm);
+	for (int i=0; i<nn; i++)
+		for (int j=0; j<mm; j++)
+			for (int k=0; k<b.nn; k++)
+				for (int l=0; l<b.mm; l++)
+					result[i*b.nn+k][j*b.mm+l] = (*this)[i][j]*b[k][l];
+	return result;
+}
+
+// sum of columns
+template <typename T>
+const NRVec<T> NRMat<T>::csum() const
+{
+	NRVec<T> result(nn);
+	T sum;
+	
+	for (int i=0; i<nn; i++) { 
+		sum = (T)0;
+		for(int j=0; j<mm; j++) sum += (*this)[i][j];
+		result[i] = sum;
+	}
+	return result;
+}
+
+// sum of rows
+template <typename T>
+const NRVec<T> NRMat<T>::rsum() const
+{
+	NRVec<T> result(nn);
+	T sum;
+	
+	for (int i=0; i<mm; i++) { 
+		sum = (T)0;
+		for(int j=0; j<nn; j++) sum += (*this)[j][i];
+		result[i] = sum;
+	}
+	return result;
+}
+
+// make detach Mat and make it's own deep copy
+template <typename T>
+void NRMat<T>::copyonwrite()
+{
+#ifdef DEBUG
+	if (!count) laerror("Mat::copyonwrite of undefined matrix");
+#endif
+	if (*count > 1) {
+		(*count)--;
+		count = new int;
+		*count = 1;
+#ifdef MATPTR
+		T **newv = new T*[nn];
+		newv[0] = new T[mm*nn];
+		memcpy(newv[0], v[0], mm*nn*sizeof(T));
+		v = newv;
+		for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
+#else
+		T *newv = new T[mm*nn];
+		memcpy(newv, v, mm*nn*sizeof(T));
+		v = newv;
+#endif
+	}
+}
+
+template <typename T>
+void NRMat<T>::resize(const int n, const int m)
+{
+#ifdef DEBUG
+	if (n<=0 || m<=0) laerror("illegal dimensions in Mat::resize()");
+#endif
+	if (count)
+		if (*count > 1) {
+			(*count)--;
+			count = 0;
+			v  = 0;
+			nn = 0;
+			mm = 0;
+		}
+	if (!count) {
+		count = new int;
+		*count = 1;
+		nn = n;
+		mm = m;
+#ifdef MATPTR
+		v = new T*[nn];
+		v[0] = new T[m*n];
+		for (int i=1; i< n; i++) v[i] = v[i-1] + m;
+#else
+		v = new T[m*n];
+#endif
+		return;
+	}
+	// At this point *count = 1, check if resize is necessary
+	if (n!=nn || m!=mm) {
+		nn = n;
+		mm = m;
+#ifdef MATPTR
+		delete[] (v[0]);
+#endif
+		delete[] v;
+#ifdef MATPTR
+		v = new T*[nn];
+		v[0] = new T[m*n];
+		for (int i=1; i< n; i++) v[i] = v[i-1] + m;
+#else
+		v = new T[m*n];
+#endif
+	}
+}
+
+// transpose Mat
+template <typename T>
+NRMat<T> & NRMat<T>::transposeme()
+{
+#ifdef DEBUG
+	if (nn != mm) laerror("transpose of non-square Mat");
+#endif
+	copyonwrite();
+	for(int i=1; i<nn; i++)
+		for(int j=0; j<i; j++) {
+#ifdef MATPTR
+			T tmp = v[i][j]; 
+			v[i][j] = v[j][i]; 
+			v[j][i] = tmp;
+#else
+			register int a; 
+			register int b;
+			a = i*mm+j;
+			b = j*mm+i;
+			T tmp = v[a];
+			v[a] = v[b];
+			v[b] = tmp;
+#endif
+		}
+	return *this;
+}
+
+// Output of Mat
+template <typename T>
+void NRMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
+{
+	lawritemat(file, (const T*)(*this), nn, mm, format, 2, modulo, 0);
+}
+
+// Input of Mat
+template <typename T>
+void NRMat<T>::fscanf(FILE *f, const char *format)
+{
+	int n, m;
+	if (std::fscanf(f, "%d %d", &n, &m) != 2)
+		laerror("cannot read matrix dimensions in Mat::fscanf()");
+	resize(n,m);
+	T *p = *this;
+	for(int i=0; i<n; i++)
+		for(int j=0; j<n; j++)
+			if(std::fscanf(f,format,p++) != 1)
+				laerror("cannot read matrix element in Mat::fscanf()");
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/*
+ * BLAS specializations for double and complex<double>
+ */
+
+// Mat *= a
+NRMat<double> & NRMat<double>::operator*=(const double &a)
+{
+	copyonwrite();
+	cblas_dscal(nn*mm, a, *this, 1);
+	return *this;
+}
+NRMat< complex<double> > &
+NRMat< complex<double> >::operator*=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zscal(nn*mm, &a, (void *)(*this)[0], 1);
+	return *this;
+}
+
+// Mat += Mat
+NRMat<double> & NRMat<double>::operator+=(const NRMat<double>  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat += Mat of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn*mm, 1.0, rhs, 1, *this, 1);
+	return *this;
+}
+NRMat< complex<double> > &
+NRMat< complex<double> >::operator+=(const NRMat< complex<double> >  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat += Mat of incompatible matrices");
+#endif
+	 copyonwrite();
+	 cblas_zaxpy(nn*mm, &CONE, (void *)rhs[0], 1, (void *)(*this)[0], 1);
+	 return *this;
+}
+
+// Mat -= Mat
+NRMat<double> & NRMat<double>::operator-=(const NRMat<double>  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat -= Mat of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn*mm, -1.0, rhs, 1, *this, 1);
+	return *this;
+}
+NRMat< complex<double> > &
+NRMat< complex<double> >::operator-=(const NRMat< complex<double> >  &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn || mm!= rhs.mm) 
+		laerror("Mat -= Mat of incompatible matrices");
+#endif
+	 copyonwrite();
+	 cblas_zaxpy(nn*mm, &CMONE, (void *)rhs[0], 1, (void *)(*this)[0], 1);
+	 return *this;
+}
+
+// Mat += SMat
+NRMat<double> & NRMat<double>::operator+=(const NRSMat<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat");
+#endif
+	const double *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_daxpy(i+1, 1.0, p, 1, (*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_daxpy(i, 1.0, p, 1, (*this)[0]+i, nn);
+		p += i+1;
+	}
+	return *this;
+}
+NRMat< complex<double> > & 
+NRMat< complex<double> >::operator+=(const NRSMat< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat+=SMat");
+#endif
+	const complex<double> *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_zaxpy(i+1, (void *)&CONE, (void *)p, 1, (void *)(*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_zaxpy(i, (void *)&CONE, (void *)p, 1, (void *)((*this)[i]+i), nn);
+		p += i+1;
+	}
+	return *this;
+}
+
+// Mat -= SMat
+NRMat<double> & NRMat<double>::operator-=(const NRSMat<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat");
+#endif
+	const double *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_daxpy(i+1, -1.0, p, 1, (*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_daxpy(i, -1.0, p, 1, (*this)[0]+i, nn);
+		p += i+1;
+	}
+	return *this;
+}
+NRMat< complex<double> > & 
+NRMat< complex<double> >::operator-=(const NRSMat< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn!=mm || nn!=rhs.nrows()) laerror("incompatible matrix size in Mat-=SMat");
+#endif
+	const complex<double> *p = rhs;
+	copyonwrite();
+	for (int i=0; i<nn; i++) {
+		cblas_zaxpy(i+1, (void *)&CMONE, (void *)p, 1, (void *)(*this)[i], 1); 
+		p += i+1;
+	}
+	p = rhs; p++;
+	for (int i=1; i<nn; i++) {
+		cblas_zaxpy(i, (void *)&CMONE, (void *)p, 1, (void *)((*this)[i]+i), nn);
+		p += i+1;
+	}
+	return *this;
+}
+
+// Mat.Mat - scalar product
+const double NRMat<double>::dot(const NRMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices");
+#endif
+	return cblas_ddot(nn*mm, (*this)[0], 1, rhs[0], 1);
+}
+const complex<double>
+NRMat< complex<double> >::dot(const NRMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if(nn!=rhs.nn || mm!= rhs.mm) laerror("Mat.Mat incompatible matrices");
+#endif
+	complex<double> dot;
+	cblas_zdotc_sub(nn*mm, (void *)(*this)[0], 1, (void *)rhs[0], 1, 
+			(void *)(&dot));
+	return dot;
+}
+
+// Mat * Mat
+const NRMat<double> NRMat<double>::operator*(const NRMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nn) laerror("product of incompatible matrices");
+#endif
+	NRMat<double> result(nn, rhs.mm);
+	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm, 1.0,
+			*this, mm, rhs, rhs.mm, 0.0, result, rhs.mm);
+	return result;
+}
+const NRMat< complex<double> > 
+NRMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nn) laerror("product of incompatible matrices");
+#endif
+	NRMat< complex<double> > result(nn, rhs.mm);
+	cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm,
+			(const void *)(&CONE),(const void *)(*this)[0], mm, (const void *)rhs[0],
+			rhs.mm, (const void *)(&CZERO), (void *)result[0], rhs.mm);
+	return result;
+}
+
+// Multiply by diagonal from L
+void NRMat<double>::diagmultl(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
+#endif
+	copyonwrite();
+	for(int i=0; i<nn; i++) cblas_dscal(mm, rhs[i], (*this)[i], 1);
+}
+void NRMat< complex<double> >::diagmultl(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
+#endif
+	copyonwrite();
+	for (int i=0; i<nn; i++) cblas_zscal(mm, &rhs[i], (*this)[i], 1);
+}
+
+// Multiply by diagonal from R
+void NRMat<double>::diagmultr(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultr");
+#endif
+	copyonwrite();
+	for (int i=0; i<mm; i++) cblas_dscal(nn, rhs[i], (*this)[i], mm);
+}
+void NRMat< complex<double> >::diagmultr(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (mm != rhs.size()) laerror("incompatible matrix dimension in diagmultl");
+#endif
+	copyonwrite();
+	for (int i=0; i<mm; i++) cblas_zscal(nn, &rhs[i], (*this)[i], mm);
+}
+
+// Mat * Smat, decomposed to nn x Vec * Smat
+const NRMat<double> 
+NRMat<double>::operator*(const NRSMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat");
+#endif
+	NRMat<double> result(nn, rhs.ncols());
+	for (int i=0; i<nn; i++)
+		cblas_dspmv(CblasRowMajor, CblasLower, mm, 1.0, &rhs[0], 
+				(*this)[i], 1, 0.0, result[i], 1);
+	return result;
+}
+const NRMat< complex<double> >
+NRMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (mm != rhs.nrows()) laerror("incompatible dimension in Mat*SMat");
+#endif
+	NRMat< complex<double> > result(nn, rhs.ncols());
+	for (int i=0; i<nn; i++)
+		cblas_zhpmv(CblasRowMajor, CblasLower, mm, (void *)&CONE, (void *)&rhs[0],
+				(void *)(*this)[i], 1, (void *)&CZERO, (void *)result[i], 1);
+	return result;
+}
+
+// Mat * Vec
+const NRVec<double> 
+NRMat<double>::operator*(const NRVec<double> &vec) const
+{
+#ifdef DEBUG
+	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
+#endif
+	NRVec<double> result(nn);
+	cblas_dgemv(CblasRowMajor, CblasNoTrans, nn, mm, 1.0, (*this)[0],
+			mm, &vec[0], 1, 0.0, &result[0], 1);
+	return result;
+}
+const NRVec< complex<double> >
+NRMat< complex<double> >::operator*(const NRVec< complex<double> > &vec) const
+{
+#ifdef DEBUG
+	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
+#endif
+	NRVec< complex<double> > result(nn);
+	cblas_zgemv(CblasRowMajor, CblasNoTrans, nn, mm, (void *)&CONE, (void *)(*this)[0],
+			mm, (void *)&vec[0], 1, (void *)&CZERO, (void *)&result[0], 1);
+	return result;
+}
+
+// sum of rows
+const NRVec<double> NRMat<double>::rsum() const
+{
+	NRVec<double> result(mm);
+	for (int i=0; i<mm; i++) result[i] = cblas_dasum(nn,(*this)[0]+i,mm);
+	return result;
+}
+
+// sum of columns
+const NRVec<double> NRMat<double>::csum() const
+{
+	NRVec<double> result(nn);
+	for (int i=0; i<nn; i++) result[i] = cblas_dasum(mm, (*this)[i], 1);
+	return result;
+}
+
+// complex conjugate of Mat
+NRMat<double> &NRMat<double>::conjugateme() {return *this;}
+
+NRMat< complex<double> > & NRMat< complex<double> >::conjugateme()
+{
+	copyonwrite();
+	cblas_dscal(mm*nn, -1.0, (double *)((*this)[0])+1, 2);
+	return *this;
+}
+
+// transpose and optionally conjugate
+const NRMat<double> NRMat<double>::transpose(bool conj) const
+{
+	NRMat<double> result(mm,nn);
+	for(int i=0; i<nn; i++) cblas_dcopy(mm, (*this)[i], 1, result[0]+i, nn);
+	return result;
+}
+const NRMat< complex<double> >
+NRMat< complex<double> >::transpose(bool conj) const
+{
+	NRMat< complex<double> > result(mm,nn);
+	for (int i=0; i<nn; i++) 
+		cblas_zcopy(mm, (void *)(*this)[i], 1, (void *)(result[0]+i), nn);
+	if (conj) cblas_dscal(mm*nn, -1.0, (double *)(result[0])+1, 2);
+	return result;
+}
+
+// gemm : this = alpha*op( A )*op( B ) + beta*this
+void NRMat<double>::gemm(const double &beta, const NRMat<double> &a,
+		const char transa, const NRMat<double> &b, const char transb, 
+		const double &alpha)
+{
+	int l(transa=='n'?a.nn:a.mm);
+	int k(transa=='n'?a.mm:a.nn);
+	int kk(transb=='n'?b.nn:b.mm);
+	int ll(transb=='n'?b.mm:b.nn);
+
+#ifdef DEBUG
+	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
+#endif
+	if (alpha==0.0 && beta==1.0) return;
+
+	copyonwrite();
+	cblas_dgemm(CblasRowMajor, (transa=='n' ? CblasNoTrans : CblasTrans),
+			(transb=='n' ? CblasNoTrans : CblasTrans), nn, mm, k, alpha, a,
+			a.mm, b , b.mm, beta, *this , mm);
+}
+void NRMat< complex<double> >::gemm(const complex<double> & beta,
+		const NRMat< complex<double> > & a, const char transa, 
+		const NRMat< complex<double> > & b, const char transb, 
+		const complex<double> & alpha)
+{
+	int l(transa=='n'?a.nn:a.mm);
+	int k(transa=='n'?a.mm:a.nn);
+	int kk(transb=='n'?b.nn:b.mm);
+	int ll(transb=='n'?b.mm:b.nn);
+
+#ifdef DEBUG
+	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
+#endif
+	if (alpha==CZERO && beta==CONE) return;
+
+	copyonwrite();
+	cblas_zgemm(CblasRowMajor,
+			(transa=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)), 
+			(transb=='n' ? CblasNoTrans : (transa=='c'?CblasConjTrans:CblasTrans)),
+			nn, mm, k, &alpha, a , a.mm, b , b.mm, &beta, *this , mm);
+}
+
+// norm of Mat
+const double  NRMat<double>::norm(const double scalar) const
+{
+	if (!scalar) return cblas_dnrm2(nn*mm, (*this)[0], 1);
+	double sum = 0;
+	for (int i=0; i<nn; i++)
+		for (int j=0; j<mm; j++) {
+			register double tmp;
+#ifdef MATPTR
+			tmp = v[i][j];
+#else
+			tmp = v[i*mm+j];
+#endif
+			if (i==j) tmp -= scalar;
+			sum += tmp*tmp;
+		}
+	return sqrt(sum);
+}
+const double  NRMat< complex<double> >::norm(const complex<double> scalar) const
+{
+	if (scalar == CZERO) return cblas_dznrm2(nn*mm, (*this)[0], 1);
+	double sum = 0;
+	for (int i=0; i<nn; i++)
+		for (int j=0; j<mm; j++) {
+			register complex<double> tmp;
+#ifdef MATPTR
+			tmp = v[i][j];
+#else
+			tmp = v[i*mm+j];
+#endif
+			if (i==j) tmp -= scalar;
+			sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag();
+		}
+	return sqrt(sum);
+}
+
+// axpy: this = a * Mat
+void NRMat<double>::axpy(const double alpha, const NRMat<double> &mat)
+{
+#ifdef DEBUG
+	if (nn!=mat.nn || mm!=mat.mm) laerror("daxpy of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn*mm, alpha, mat, 1, *this, 1);
+}
+void NRMat< complex<double> >::axpy(const complex<double> alpha, 
+		const NRMat< complex<double> > & mat)
+{
+#ifdef DEBUG
+	if (nn!=mat.nn || mm!=mat.mm) laerror("zaxpy of incompatible matrices");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn*mm, (void *)&alpha, mat, 1, (void *)(*this)[0], 1);
+}
+
+// trace of Mat
+const double NRMat<double>::trace() const
+{
+#ifdef DEBUG
+	if (nn != mm) laerror("no-square matrix in Mat::trace()");
+#endif
+	return cblas_dasum(nn, (*this)[0], nn+1);
+}
+const complex<double> NRMat< complex<double> >::trace() const
+{
+#ifdef DEBUG
+	if (nn != mm) laerror("no-square matrix in Mat::trace()");
+#endif
+	register complex<double> sum = CZERO;
+	for (int i=0; i<nn*nn; i+=(nn+1))
+#ifdef MATPTR
+		sum += v[0][i];
+#else
+		sum += v[i];
+#endif
+	return sum;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corespoding object file
+#define INSTANTIZE(T) \
+template ostream & operator<<(ostream &s, const NRMat< T > &x); \
+template istream & operator>>(istream  &s, NRMat< T > &x); \
+
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+
+
+export template <class T>
+ostream& operator<<(ostream &s, const NRMat<T> &x)
+                {
+                int i,j,n,m;
+                n=x.nrows();
+                m=x.ncols();
+                s << n << ' ' << m << '\n';
+                for(i=0;i<n;i++)
+                        {
+                        for(j=0; j<m;j++) s << x[i][j] << (j==m-1 ? '\n' : ' '); // endl cannot be used in the conditional expression, since it is an overloaded function
+                        }
+                return s;
+                }
+
+export template <class T>
+istream& operator>>(istream  &s, NRMat<T> &x)
+                {
+                int i,j,n,m;
+                s >> n >> m;
+                x.resize(n,m);
+                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x[i][j] ;
+                return s;
+                }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mat.h b/mat.h
new file mode 100644
index 0000000..2944f00
--- /dev/null
+++ b/mat.h
@@ -0,0 +1,346 @@
+#ifndef _LA_MAT_H_
+#define _LA_MAT_H_
+
+#include "vec.h"
+#include "smat.h"
+
+template <typename T>
+class NRMat {
+protected:
+	int nn;
+	int mm;
+#ifdef MATPTR
+	T **v;
+#else
+	T *v;
+#endif
+	int *count;
+public:
+	friend class NRVec<T>;
+	friend class NRSMat<T>;
+	
+	inline NRMat() : nn(0), mm(0), v(0), count(0) {};
+	inline NRMat(const int n, const int m);
+	inline NRMat(const T &a, const int n, const int m);
+	NRMat(const T *a, const int n, const int m);
+	inline NRMat(const NRMat &rhs);
+	explicit NRMat(const NRSMat<T> &rhs);
+#ifndef MATPTR
+	NRMat(const NRVec<T> &rhs, const int n, const int m);
+#endif
+	~NRMat();
+	inline int getcount() const {return count?*count:0;}
+	NRMat & operator=(const NRMat &rhs);  //assignment
+	NRMat & operator=(const T &a);    //assign a to diagonal
+	NRMat & operator|=(const NRMat &rhs); //assignment to a new copy
+	NRMat & operator+=(const T &a);   //add diagonal
+	NRMat & operator-=(const T &a);   //substract diagonal
+	NRMat & operator*=(const T &a);   //multiply by a scalar
+	NRMat & operator+=(const NRMat &rhs);
+	NRMat & operator-=(const NRMat &rhs);
+	NRMat & operator+=(const NRSMat<T> &rhs);
+	NRMat & operator-=(const NRSMat<T> &rhs);
+	const NRMat operator-() const; //unary minus
+	inline const NRMat operator+(const T &a) const;
+	inline const NRMat operator-(const T &a) const;
+	inline const NRMat operator*(const T &a) const;
+	inline const NRMat operator+(const NRMat &rhs) const;
+	inline const NRMat operator-(const NRMat &rhs) const;
+	inline const NRMat operator+(const NRSMat<T> &rhs) const;
+	inline const NRMat operator-(const NRSMat<T> &rhs) const;
+	const T dot(const NRMat &rhs) const; // scalar product of Mat.Mat
+	const NRMat operator*(const NRMat &rhs) const; // Mat * Mat
+	void diagmultl(const NRVec<T> &rhs); //multiply by a diagonal matrix from L
+	void diagmultr(const NRVec<T> &rhs); //multiply by a diagonal matrix from R
+	const NRMat operator*(const NRSMat<T> &rhs) const; // Mat * Smat
+	const NRMat operator&(const NRMat &rhs) const; // direct sum
+	const NRMat operator|(const NRMat<T> &rhs) const; // direct product
+	const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
+	const NRVec<T> rsum() const; //sum of rows
+	const NRVec<T> csum() const; //sum of columns
+	inline T* operator[](const int i);  //subscripting: pointer to row i
+	inline const T* operator[](const int i) const;
+	inline T& operator()(const int i, const int j); // (i,j) subscripts
+	inline const T& operator()(const int i, const int j) const;
+	inline int nrows() const;
+	inline int ncols() const;
+	void copyonwrite();
+	void resize(const int n, const int m);
+	inline operator T*(); //get a pointer to the data
+	inline operator const T*() const;
+	NRMat & transposeme(); // square matrices only 
+	NRMat & conjugateme(); // square matrices only
+	const NRMat transpose(bool conj=false) const;
+	const NRMat conjugate() const;
+	void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b,
+			const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this
+/*
+	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b,
+			const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
+	void s_cutoff(const int,const int,const int,const int) const;
+*/
+	void fprintf(FILE *f, const char *format, const int modulo) const;
+	void fscanf(FILE *f, const char *format);
+	const double norm(const T scalar=(T)0) const;
+	void axpy(const T alpha, const NRMat &x); // this += a*x
+	inline const T amax() const;
+	const T trace() const;
+
+//members concerning sparse matrix
+	explicit NRMat(const SparseMat<T> &rhs);                // dense from sparse
+	NRMat & operator+=(const SparseMat<T> &rhs);
+        NRMat & operator-=(const SparseMat<T> &rhs);
+        inline void simplify() {}; //just for compatibility with sparse ones
+
+//Strassen's multiplication (better than n^3, analogous syntax to gemm)
+	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
+	void s_cutoff(const int,const int,const int,const int) const;
+
+};
+
+// ctors
+template <typename T>
+NRMat<T>::NRMat(const int n, const int m) : nn(n), mm(m), count(new int)
+{
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[n];
+	v[0] = new T[m*n];
+	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
+#else
+	v = new T[m*n];
+#endif
+}
+
+template <typename T>
+NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int)
+{
+	int i;
+	T *p;
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[n];
+	p = v[0] = new T[m*n];
+	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
+#else
+	p = v = new T[m*n];
+#endif
+	if (a != (T)0)
+		for (i=0; i< n*m; i++) *p++ = a;
+	else
+		memset(p, 0, n*m*sizeof(T));
+}
+
+template <typename T>
+NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int)
+{
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[n];
+	v[0] = new T[m*n];
+	for (int i=1; i<n; i++) v[i] = v[i-1] + m;
+	memcpy(v[0], a, n*m*sizeof(T));
+#else
+	v = new T[m*n];
+	memcpy(v, a, n*m*sizeof(T));
+#endif
+}
+
+template <typename T>
+NRMat<T>::NRMat(const NRMat &rhs)
+{
+	nn = rhs.nn;
+	mm = rhs.mm;
+	count = rhs.count;
+	v = rhs.v;
+	if (count) ++(*count);
+}
+
+template <typename T>
+NRMat<T>::NRMat(const NRSMat<T> &rhs)
+{
+	int i;
+	nn = mm = rhs.nrows();
+	count = new int;
+	*count = 1;
+#ifdef MATPTR
+	v = new T*[nn];
+	v[0] = new T[mm*nn];
+	for (int i=1; i<nn; i++) v[i] = v[i-1] + mm;
+#else
+	v = new T[mm*nn];
+#endif
+
+	int j, k = 0;
+#ifdef MATPTR
+	for (i=0; i<nn; i++)
+		for (j=0; j<=i; j++) v[i][j] = v[j][i] = rhs[k++];
+#else
+	for (i=0; i<nn; i++)
+		for (j=0; j<=i; j++) v[i*nn+j] = v[j*nn+i] = rhs[k++];
+#endif
+}
+ 
+#ifndef MATPTR
+template <typename T>
+NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m)
+{
+#ifdef DEBUG
+	if (n*m != rhs.nn) laerror("matrix dimensions incompatible with vector length");
+#endif
+	nn = n;
+	mm = m;
+	count = rhs.count;
+	v = rhs.v;
+	(*count)++;
+}
+#endif
+
+// Mat + Smat
+template <typename T>
+inline const NRMat<T> NRMat<T>::operator+(const NRSMat<T> &rhs) const
+{
+	return NRMat<T>(*this) += rhs;
+}
+
+// Mat - Smat
+template <typename T>
+inline const NRMat<T> NRMat<T>::operator-(const NRSMat<T> &rhs) const
+{
+	return NRMat<T>(*this) -= rhs;
+}
+
+// Mat[i] : pointer to the first element of i-th row
+template <typename T>
+inline T* NRMat<T>::operator[](const int i)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("Mat lval use of [] with count > 1");
+	if (i<0 || i>=nn) laerror("Mat [] out of range");
+	if (!v) laerror("[] for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i];
+#else
+	return v+i*mm;
+#endif
+}
+template <typename T>
+inline const T* NRMat<T>::operator[](const int i) const
+{
+#ifdef DEBUG
+	if (i<0 || i>=nn) laerror("Mat [] out of range");
+	if (!v) laerror("[] for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i];
+#else
+	return v+i*mm;
+#endif
+}
+
+// Mat(i,j) reference to the matrix element M_{ij}
+template <typename T>
+inline T & NRMat<T>::operator()(const int i, const int j)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("Mat lval use of (,) with count > 1");
+	if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range");
+	if (!v) laerror("(,) for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i][j];
+#else
+	return v[i*mm+j];
+#endif
+}
+template <typename T>
+inline const T & NRMat<T>::operator()(const int i, const int j) const
+{
+#ifdef DEBUG
+	if (i<0 || i>=nn || j<0 || j>mm) laerror("Mat (,) out of range");
+	if (!v) laerror("(,) for unallocated Mat");
+#endif
+#ifdef MATPTR
+	return v[i][j];
+#else
+	return v[i*mm+j];
+#endif
+}
+
+// number of rows
+template <typename T>
+inline int NRMat<T>::nrows() const
+{
+	return nn;
+}
+
+// number of columns
+template <typename T>
+inline int NRMat<T>::ncols() const
+{
+	return mm;
+}
+
+// reference pointer to Mat
+template <typename T>
+inline NRMat<T>::operator T* ()
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated Mat in operator T*");
+#endif
+#ifdef MATPTR
+	return v[0];
+#else
+	return v;
+#endif
+}
+template <typename T>
+inline NRMat<T>::operator const T* () const
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated Mat in operator T*");
+#endif
+#ifdef MATPTR
+	return v[0];
+#else
+	return v;
+#endif
+}
+
+// max element of Mat
+inline const double  NRMat<double>::amax() const
+{
+#ifdef MATPTR
+	return v[0][cblas_idamax(nn*mm, v[0], 1)];
+#else
+	return v[cblas_idamax(nn*mm, v, 1)];
+#endif
+}
+inline const complex<double>  NRMat< complex<double> >::amax() const
+{
+#ifdef MATPTR
+	return v[0][cblas_izamax(nn*mm, (void *)v[0], 1)];
+#else
+	return v[cblas_izamax(nn*mm, (void *)v, 1)];
+#endif
+}
+
+
+// I/O
+template <typename T> extern ostream& operator<<(ostream &s, const NRMat<T> &x);
+template <typename T> extern istream& operator>>(istream  &s, NRMat<T> &x);
+
+
+
+
+
+// generate operators: Mat + a, a + Mat, Mat * a
+NRVECMAT_OPER(Mat,+)
+NRVECMAT_OPER(Mat,-)
+NRVECMAT_OPER(Mat,*)
+// generate Mat + Mat, Mat - Mat
+NRVECMAT_OPER2(Mat,+)
+NRVECMAT_OPER2(Mat,-)
+
+#endif /* _LA_MAT_H_ */
diff --git a/matexp.h b/matexp.h
new file mode 100644
index 0000000..ffb9bfc
--- /dev/null
+++ b/matexp.h
@@ -0,0 +1,259 @@
+//general routine for polynomial of a matrix, tuned to minimize the number
+//of matrix-matrix multiplications on cost of additions and memory
+// the polynom and exp routines will work on any type, for which traits class
+// is defined containing definition of an element type, norm and axpy operation
+
+#include "la_traits.h"
+#include "sparsemat_traits.h"
+
+template<class T,class R>
+const T polynom2(const T &x, const NRVec<R> &c)
+{
+int order=c.size()-1;
+T z,y;
+
+//trivial reference implementation by horner scheme
+if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one
+else
+	{
+	int i;
+	z=x*c[order];
+	for(i=order-1; i>=0; i--)
+		{
+		if(i<order-1) z=y*x;
+		y=z+c[i];
+		}
+	}
+
+return y;
+}
+
+
+template<class T,class R>
+const T polynom(const T &x, const NRVec<R> &c)
+{
+int n=c.size()-1;
+int i,j,k,m=0,t;
+
+if(n<=4) return polynom2(x,c); //here the horner scheme is optimal
+
+//first find m which minimizes the number of multiplications
+j=10*n;
+for(i=2;i<=n+1;i++)
+    {	
+    t=i-2+2*(n/i)-(n%i)?0:1;
+    if(t<j)
+	{
+	j=t;
+	m=i;
+	}
+    }
+
+//allocate array for powers up to m
+T *xpows = new T[m];
+xpows[0]=x;
+for(i=1;i<m;i++) xpows[i]=xpows[i-1]*x;
+
+
+//run the summation loop
+T r,s,f;
+k= -1;
+for(i=0; i<=n/m;i++)
+	{
+	for(j=0;j<m;j++)
+		{
+		k++;
+		if(k>n) break;
+		if(j==0) {if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/}
+		else  
+			NRMat_traits<T>::axpy(s,xpows[j-1],c[k]); //general  s+=xpows[j-1]*c[k]; but more efficient for matrices
+		}
+
+	if(i==0) {r=s; f=xpows[m-1];}
+	else
+		{
+		r+= s*f;
+		f=f*xpows[m-1];
+		}
+	}
+ 
+delete[] xpows;
+return r;
+}
+
+
+//for general objects
+template<class T>
+const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1)
+{
+T z;
+if(right) {z=x; while(--nest>=0) z=z*y-y*z;}
+else {z=y; while(--nest>=0) z=x*z-z*x;}
+return z;
+}
+
+template<class T>
+const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1)
+{
+T z;
+if(right) {z=x; while(--nest>=0) z=z*y+y*z;}
+else {z=y; while(--nest>=0) z=x*z+z*x;}
+return z;
+}
+
+//general BCH expansion (can be written more efficiently in a specialization for matrices)
+template<class T>
+const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=1)\
+{
+T result=h;
+double factor=1.;
+T z=h;
+for(int i=1; i<=n; ++i)
+	{
+	factor/=i;
+	z= z*t-t*z;
+	if(verbose) cerr << "BCH contribution at order "<<i<<" : "<<z.norm()<<endl;
+	result+= z*factor; 
+	}
+return result;
+}
+
+
+template<class T>
+const T ipow( const T &x, int i)
+{
+if(i<0) laerror("negative exponent in ipow");
+if(i==0) {T r=x; r=1.; return r;}//trick for matrix dimension
+if(i==1) return x;
+T y,z;
+z=x;
+while(!(i&1))
+	{
+	z = z*z;
+	i >>= 1;
+	}
+y=z; 
+while((i >>= 1)/*!=0*/)
+                {
+                z = z*z;
+                if(i&1) y = y*z;
+                }
+return y;
+}
+
+inline int nextpow2(const double n)
+{
+const double log2=log(2.);
+if(n<=.75) return 0; //try to keep the taylor expansion short
+if(n<=1.) return 1;
+return int(ceil(log(n)/log2-log(.75)));
+}
+
+
+template<class T>
+NRVec<typename NRMat_traits<T>::elementtype> exp_aux(const T &x, int &power)
+{
+//should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc
+static double exptaylor[]={
+1.,
+1.,
+0.5,
+0.1666666666666666666666,
+0.0416666666666666666666,
+0.0083333333333333333333,
+0.0013888888888888888888,
+0.00019841269841269841253,
+2.4801587301587301566e-05,
+2.7557319223985892511e-06,
+2.7557319223985888276e-07,
+2.5052108385441720224e-08,
+2.0876756987868100187e-09,
+1.6059043836821613341e-10,
+1.1470745597729724507e-11,
+7.6471637318198164055e-13,
+4.7794773323873852534e-14,
+2.8114572543455205981e-15,
+1.5619206968586225271e-16,
+8.2206352466243294955e-18,
+4.1103176233121648441e-19,
+0.};
+double mnorm= NRMat_traits<T>::norm(x);
+power=nextpow2(mnorm);
+double scale=exp(-log(2.)*power);
+
+
+//find how long taylor expansion will be necessary
+const double precision=1e-16;
+double s,t;
+s=mnorm*scale;
+int n=0;
+t=1.;
+do	{
+	n++;
+	t*=s;
+	}
+while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case
+
+
+int i; //adjust the coefficients in order to avoid scaling the argument
+NRVec<typename NRMat_traits<T>::elementtype> taylor2(n+1);
+for(i=0,t=1.;i<=n;i++)
+	{
+	taylor2[i]=exptaylor[i]*t;
+	t*=scale;
+	}
+return taylor2;
+}
+
+
+
+template<class T>
+const T exp(const T &x)
+{
+int power;
+
+//prepare the polynom of and effectively scale T
+NRVec<typename NRMat_traits<T>::elementtype> taylor2=exp_aux(x,power);
+
+T r=polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices
+
+//power the result back
+for(int i=0; i<power; i++) r=r*r;
+return r;
+}
+
+
+template<class MAT>
+const typename NRMat_traits<MAT>::elementtype determinant(MAT a)//again passed by value
+{
+typename NRMat_traits<MAT>::elementtype det;
+if(a.nrows()!=a.ncols()) laerror("determinant of non-square matrix");
+linear_solve(a,NULL,&det);
+return det;
+}
+
+
+template<class M, class V>
+const V exptimes(const M &mat, V vec) //uses just matrix vector multiplication
+{
+if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes");
+int power;
+//prepare the polynom of and effectively scale the matrix
+NRVec<typename NRMat_traits<M>::elementtype> taylor2=exp_aux(mat,power);
+
+V result(mat.nrows());
+for(int i=1; i<=(1<<power); ++i) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
+	{
+	if(i>1) vec=result; //apply again to the result of previous application
+	//apply polynom of the matrix to the vector iteratively
+	V y=vec;
+	result=y*taylor2[0];
+	for(int j=1; j<taylor2.size(); ++j)
+		{
+		y=mat*y;
+		result.axpy(taylor2[j],y);
+		}
+	}
+
+return result;
+}
diff --git a/nonclass.cc b/nonclass.cc
new file mode 100644
index 0000000..eae721c
--- /dev/null
+++ b/nonclass.cc
@@ -0,0 +1,524 @@
+extern "C" {
+#include "atlas_enum.h"
+#include "clapack.h"
+}
+#include "la.h"
+
+#ifdef FORTRAN_
+#define FORNAME(x) x##_
+#else
+#define FORNAME(x) x
+#endif
+
+#define INSTANTIZE(T) \
+template void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, \
+		int nodim,int modulo, int issym);
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+
+template <typename T>
+void lawritemat(FILE *file,const T *a,int r,int c,const char *form0,
+		int nodim,int modulo, int issym)
+{
+	int i,j;
+	const char *f;
+
+	/*print out title before %*/
+	f=form0;
+	skiptext:
+	while (*f && *f !='%' ) {fputc(*f++,file);}
+	if (*f=='%' && f[1]=='%') {
+		fputc(*f,file); f+=2; 
+		goto skiptext;
+	}
+	/* this has to be avoided when const arguments should be allowed *f=0; */
+	/*use the rest as a format for numbers*/
+
+	if (modulo) nodim=0;
+	if (nodim==2) fprintf(file,"%d %d\n",r,c);
+	if (nodim==1) fprintf(file,"%d\n",c);
+	if (modulo) {
+		int n1, n2, l, m;
+		char ff[32];
+		/* prepare integer format for column numbering */
+		if (sscanf(f+1,"%d",&l) != 1) l=128/modulo;
+		l -= 2;
+		m = l/2;
+		l = l-m;
+		sprintf(ff,"%%%ds%%3d%%%ds", l, m);
+		n1 = 1;
+		while(n1 <= c) {
+			n2=n1+modulo-1;
+			if (n2 > c) n2 = c;
+
+			/*write block between columns n1 and n2 */
+			fprintf(file,"\n    ");
+			for (i=n1; i<=n2; i++) fprintf(file,ff," ",i," ");
+			fprintf(file,"\n\n");
+
+			for (i=1; i<=r; i++) {
+				fprintf(file, "%3d ", i);
+				for (j=n1; j<=n2; j++) {
+					if(issym) {
+						int ii,jj;
+						if (i >= j) {
+							ii=i; 
+							jj=j;
+						} else {
+							ii=j; 
+							jj=i;
+						}
+						fprintf(file, f, ((complex<double>)a[ii*(ii+1)/2+jj]).real(), ((complex<double>)a[ii*(ii+1)/2+jj]).imag());
+					} else fprintf(file, f, ((complex<double>)a[(i-1)*c+j-1]).real(), ((complex<double>)a[(i-1)*c+j-1]).imag());
+					if (j < n2) fputc(' ',file);
+				}
+				fprintf(file, "\n");
+			}
+			n1 = n2+1;
+		}
+	} else {
+		for (i=1; i<=r; i++) {
+			for (j=1; j<=c; j++) {
+				if (issym) {
+					int ii,jj;
+					if (i >= j) {
+						ii=i; 
+						jj=j;
+					} else {
+						ii=j; 
+						jj=i;
+					}
+					fprintf(file, f, ((complex<double>)a[ii*(ii+1)/2+jj]).real(), ((complex<double>)a[ii*(ii+1)/2+jj]).imag());
+				} else fprintf(file,f,((complex<double>)a[(i-1)*c+j-1]).real(), ((complex<double>)a[(i-1)*c+j-1]).imag());
+				putc(j<c?' ':'\n',file);
+			}
+		}
+	}
+}
+
+// LA errorr handler
+void laerror(const char *s1, const char *s2, const char *s3, const char *s4)
+{
+  std::cerr << "LA:ERROR - ";
+  if(!s1)
+    std::cerr << "udefined.";
+  else {
+    if(s1) std::cerr << s1;
+    if(s2) std::cerr << s2;
+    if(s3) std::cerr << s3;
+    if(s4) std::cerr << s4;
+  }
+  std::cerr << endl;
+  exit(1);
+}
+
+//////////////////////
+// LAPACK interface //
+//////////////////////
+
+// A will be overwritten, B will contain the solutions, A is nxn, B is rhs x n
+void linear_solve(NRMat<double> &A, NRMat<double> *B, double *det)
+{
+	int r, *ipiv;
+	
+	if (A.nrows() != A.ncols()) laerror("linear_solve() call for non-square matrix");
+	if (B && A.nrows() != B->ncols()) laerror("incompatible matrices in linear_solve()");
+	A.copyonwrite();
+	if (B) B->copyonwrite();
+	ipiv = new int[A.nrows()];
+	r = clapack_dgesv(CblasRowMajor, A.nrows(), B ? B->nrows() : 0, A[0], A.ncols(),
+			ipiv, B ? B[0] : (double *)0, B ? B->ncols() : A.nrows());
+	if (r < 0) {
+		delete[] ipiv;
+		laerror("illegal argument in lapack_gesv");
+	}
+	if (det && r>=0) {
+		*det = A[0][0];
+		for (int i=1; i<A.nrows(); ++i) *det *= A[i][i];
+		//change sign of det by parity of ipiv permutation
+		for (int i=0; i<A.nrows(); ++i) *det = -(*det);
+	}
+	delete [] ipiv;
+	if (r>0 && B) laerror("singular matrix in lapack_gesv");
+}
+
+
+// Next routines are not available in clapack, fotran ones will b used with an
+// additional swap/transpose of outputs when needed
+
+extern "C" void FORNAME(dspsv)(const char *UPLO, const int *N, const int *NRHS,
+		double *AP, int *IPIV, double *B, const int *LDB, int *INFO);
+
+void linear_solve(NRSMat<double> &a, NRMat<double> *b, double *det)
+{
+	int r, *ipiv;
+	if (det) cerr << "@@@ sign of the determinant not implemented correctly yet\n";
+	if (b && a.nrows() != b->ncols())
+		laerror("incompatible matrices in symmetric linear_solve()");
+	a.copyonwrite();
+	if (b) b->copyonwrite();
+	ipiv = new int[a.nrows()];
+	char U = 'U';
+	int n = a.nrows();
+	int nrhs = 0;
+	if (b) nrhs = b->nrows();
+	int ldb = b ? b->ncols() : a.nrows();
+	FORNAME(dspsv)(&U, &n, &nrhs, a, ipiv, b?(*b)[0]:0, &ldb,&r);
+	if (r < 0) {
+		delete[] ipiv;
+		laerror("illegal argument in spsv() call of linear_solve()");
+	}
+	if (det && r >= 0) {
+		*det = a(0,0);
+		for (int i=1; i<a.nrows(); i++) *det *= a(i,i);
+		for (int i=0; i<a.nrows(); i++)
+			if (ipiv[i] != i) *det = -(*det);
+	}
+	delete[] ipiv;
+	if (r > 0 && b) laerror("singular matrix in linear_solve(SMat&, Mat*, double*");
+}
+
+
+extern "C" void FORNAME(dsyev)(const char *JOBZ, const char *UPLO, const int *N,
+		double *A, const int *LDA, double *W, double *WORK, const int *LWORK, int *INFO);
+
+// a will contain eigenvectors, w eigenvalues
+void diagonalize(NRMat<double> &a, NRVec<double> &w, const bool eivec, 
+		const bool corder)
+{
+	int n = a.nrows();
+	if (n != a.ncols()) laerror("diagonalize() call with non-square matrix");
+	if (a.nrows() != w.size()) 
+		laerror("inconsistent dimension of eigenvalue vector in diagonalize()");
+
+	a.copyonwrite();
+	w.copyonwrite();
+
+	int r = 0;
+	char U ='U';
+	char vectors = 'V';
+	if (!eivec) vectors = 'N';
+	int LWORK = -1;
+	double WORKX;
+
+	// First call is to determine size of workspace
+	FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, (double *)&WORKX, &LWORK, &r );
+	LWORK = (int)WORKX;
+	double *WORK = new double[LWORK];
+	FORNAME(dsyev)(&vectors, &U, &n, a, &n, w, WORK, &LWORK, &r );
+	delete[] WORK;
+	if (vectors == 'V' && corder) a.transposeme();
+
+	if (r < 0) laerror("illegal argument in syev() of diagonalize()");
+	if (r > 0) laerror("convergence problem in syev() of diagonalize()");
+}
+
+
+extern "C" void FORNAME(dspev)(const char *JOBZ, const char *UPLO, const int *N,
+		double *AP, double *W, double *Z, const int *LDZ, double *WORK, int *INFO);
+
+// v will contain eigenvectors, w eigenvalues
+void diagonalize(NRSMat<double> &a, NRVec<double> &w, NRMat<double> *v,
+		const bool corder)
+{
+	int n = a.nrows();
+	if (v) if (v->nrows() != v ->ncols() || n != v->nrows())
+		laerror("diagonalize() call with inconsistent dimensions");
+	if (n != w.size()) laerror("inconsistent dimension of eigenvalue vector");
+
+	a.copyonwrite();
+	w.copyonwrite();
+
+	int r = 0;
+	char U = 'U';
+	char job = v ? 'v' : 'n';
+
+	double *WORK = new double[3*n];
+	FORNAME(dspev)(&job, &U, &n, a, w, v?(*v)[0]:(double *)0, &n, WORK,  &r );
+	delete[] WORK;
+	if (v && corder) v->transposeme();
+
+	if (r < 0) laerror("illegal argument in spev() of diagonalize()");
+	if (r > 0) laerror("convergence problem in spev() of diagonalize()");
+}
+
+
+extern "C" void FORNAME(dgesvd)(const char *JOBU,  const char *JOBVT,  const int *M,
+		const int *N,  double *A, const int *LDA, double *S, double *U, const int *LDU,
+		double *VT, const int *LDVT, double *WORK, const int *LWORK, int *INFO );
+
+void singular_decomposition(NRMat<double> &a, NRMat<double> *u, NRVec<double> &s,
+		NRMat<double> *v, const bool corder)
+{
+	int m = a.nrows();
+	int n = a.ncols();
+	if (u) if (m != u->nrows() || m!= u->ncols())
+		laerror("inconsistent dimension of U Mat in singular_decomposition()");
+	if (s.size() < m && s.size() < n) 
+		laerror("inconsistent dimension of S Vec in singular_decomposition()");
+	if (v) if (n != v->nrows() || n != v->ncols())
+		laerror("inconsistent dimension of V Mat in singular_decomposition()");
+
+	a.copyonwrite();
+	s.copyonwrite();
+	if (u) u->copyonwrite();
+	if (v) v->copyonwrite();
+	
+	// C-order (transposed) input and swap u,v matrices,
+	// v should be transposed at the end
+	char jobu = u ? 'A' : 'N';
+	char jobv = v ? 'A' : 'N';
+	double work0;
+	int lwork = -1;
+	int r;
+	FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n,
+			u?(*u)[0]:0, &m, &work0, &lwork, &r);
+	lwork = (int) work0;
+	double *work = new double[lwork];
+	FORNAME(dgesvd)(&jobv, &jobu, &n, &m, a, &n, s, v?(*v)[0]:0, &n,
+			u?(*u)[0]:0, &m, &work0, &lwork, &r);
+	delete[] work;
+	if (v && corder) v->transposeme();
+
+	if (r < 0) laerror("illegal argument in gesvd() of singular_decomposition()");
+	if (r > 0) laerror("convergence problem in gesvd() of ingular_decomposition()");
+}
+
+
+extern "C" void FORNAME(dgeev)(const char *JOBVL, const char *JOBVR, const int *N,
+		double *A, const int *LDA, double *WR, double *WI, double *VL, const int *LDVL,
+		double *VR, const int *LDVR, double *WORK, const int *LWORK, int *INFO );
+
+void gdiagonalize(NRMat<double> &a, NRVec<double> &wr, NRVec<double> &wi,
+		NRMat<double> *vl, NRMat<double> *vr, const bool corder)
+{
+	int n = a.nrows();
+	if (n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix");
+	if (n != wr.size()) 
+		laerror("inconsistent dimension of eigen vector in gdiagonalize()");
+	if (vl) if (n != vl->nrows() || n != vl->ncols())
+		laerror("inconsistent dimension of vl in gdiagonalize()");
+	if (vr) if (n != vr->nrows() || n != vr->ncols())
+		laerror("inconsistent dimension of vr in gdiagonalize()");
+
+	a.copyonwrite();
+	wr.copyonwrite();
+	wi.copyonwrite();
+	if (vl) vl->copyonwrite();
+	if (vr) vr->copyonwrite();
+	
+	char jobvl = vl ? 'V' : 'N';
+	char jobvr = vr ? 'V' : 'N';
+	double work0;
+	int lwork = -1;
+	int r;
+	FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0,
+			&n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r);
+	lwork = (int) work0;
+	double *work = new double[lwork];
+	FORNAME(dgeev)(&jobvr, &jobvl, &n, a, &n, wr, wi, vr?vr[0]:(double *)0,
+			&n, vl?vl[0]:(double *)0, &n, &work0, &lwork, &r);
+	delete[] work;
+
+	if (corder) {
+		if (vl) vl->transposeme();
+		if (vr) vr->transposeme();
+	}
+
+	if (r < 0) laerror("illegal argument in geev() of gdiagonalize()");
+	if (r > 0) laerror("convergence problem in geev() of gdiagonalize()");
+}
+
+void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
+		NRMat< complex<double> >*vl, NRMat< complex<double> > *vr)
+{
+	int n = a.nrows();
+	if(n != a.ncols()) laerror("gdiagonalize() call for a non-square matrix");
+
+	NRVec<double> wr(n), wi(n);
+	NRMat<double> *rvl = 0;
+	NRMat<double> *rvr = 0;
+	if (vl) rvl = new NRMat<double>(n, n);
+	if (vr) rvr = new NRMat<double>(n, n);
+	gdiagonalize(a, wr, wi, rvl, rvr, 0);
+	
+	//process the results into complex matrices
+	int i;
+	for (i=0; i<n; i++) w[i] = complex<double>(wr[i], wi[i]);
+	if (rvl || rvr) {
+		i = 0;
+		while (i < n) {
+			if (wi[i] == 0) {
+				if (vl) for (int j=0; j<n; j++) (*vl)[i][j] = (*rvl)[i][j];
+				if (vr) for (int j=0; j<n; j++) (*vr)[i][j] = (*rvr)[i][j];
+				i++;
+			} else {
+				if (vl)
+					for (int j=0; j<n; j++) {
+						(*vl)[i][j] = complex<double>((*rvl)[i][j], (*rvl)[i+1][j]);
+						(*vl)[i+1][j] = complex<double>((*rvl)[i][j], -(*rvl)[i+1][j]);
+					} 
+				if (vr)
+					for (int j=0; j<n; j++) {
+						(*vr)[i][j] = complex<double>((*rvr)[i][j], (*rvr)[i+1][j]);
+						(*vr)[i+1][j] = complex<double>((*rvr)[i][j], -(*rvr)[i+1][j]);
+					}
+				i += 2;
+			}
+		}
+	}
+	if (rvl) delete rvl;
+	if (rvr) delete rvr;
+}
+
+
+const NRMat<double> realpart(const NRMat< complex<double> > &a)
+{
+	NRMat<double> result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0], 2, result, 1);
+	return result;
+}
+
+const NRMat<double> imagpart(const NRMat< complex<double> > &a)
+{
+	NRMat<double> result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), (const double *)a[0]+1, 2, result, 1);
+	return result;
+}
+
+const NRMat< complex<double> > realmatrix (const NRMat<double> &a)
+{
+	NRMat <complex<double> > result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0], 2);
+	return result;
+}
+
+const NRMat< complex<double> > imagmatrix (const NRMat<double> &a)
+{
+	NRMat< complex<double> > result(a.nrows(), a.ncols());
+	cblas_dcopy(a.nrows()*a.ncols(), a, 1, (double *)result[0]+1, 2);
+	return result;
+}
+
+
+NRMat<double> matrixfunction(NRMat<double> a, complex<double>
+		(*f)(const complex<double> &), const bool adjust)
+{
+	int n = a.nrows();
+	NRMat< complex<double> > u(n, n), v(n, n);
+	NRVec< complex<double> > w(n);
+	gdiagonalize(a, w, &u, &v);
+	NRVec< complex<double> > z = diagofproduct(u, v, 1, 1);
+
+	for (int i=0; i<a.nrows(); i++) w[i] = (*f)(w[i]/z[i]);
+	u.diagmultl(w);
+
+	NRMat< complex<double> > r(n, n);
+	r.gemm(0.0, v, 'c', u, 'n', 1.0);
+	double inorm = cblas_dnrm2(n*n, (double *)r[0]+1, 2);
+	if (inorm > 1e-10) {
+		cout << "norm = " << inorm << endl;
+		laerror("nonzero norm of imaginary part of real matrixfunction");
+	}
+	return realpart(r);
+}
+
+NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double))
+{
+	int n = a.nrows();
+	NRVec<double> w(n);
+	NRMat<double> v(n, n);
+	diagonalize(a, w, &v, 0);
+
+	for (int i=0; i<a.nrows(); i++) w[i] = (*f)(w[i]);
+	NRMat<double> u = v;
+	v.diagmultl(w);
+	NRMat<double> r(n, n);
+	r.gemm(0.0, u, 't', v, 'n', 1.0);
+	return r;
+}
+
+// instantize template to an addresable function
+complex<double> myclog (const complex<double> &x) 
+{
+	return log(x);
+}
+
+NRMat<double>  log(const NRMat<double> &a)
+{
+	return matrixfunction(a, &myclog, 1);
+}
+
+
+const NRVec<double> diagofproduct(const NRMat<double> &a, const NRMat<double> &b,
+		bool trb, bool conjb)
+{
+	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
+				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
+			laerror("incompatible Mats in diagofproduct<double>()");
+	NRVec<double> result(a.nrows());
+	if (trb)
+		for(int i=0; i<a.nrows(); i++)
+			result[i] = cblas_ddot(a.ncols(), a[i], 1, b[i], 1);
+	else
+		for(int i=0; i<a.nrows(); i++)
+			result[i] = cblas_ddot(a.ncols(), a[i], 1, b[0]+i, b.ncols());
+
+	return result;
+}
+
+
+const NRVec< complex<double> > diagofproduct(const NRMat< complex<double> > &a,
+		const NRMat< complex<double> > &b, bool trb, bool conjb)
+{
+	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
+				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
+			laerror("incompatible Mats in diagofproduct<complex>()");
+	NRVec< complex<double> > result(a.nrows());
+	if (trb) {
+		if (conjb) {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotc_sub(a.ncols(), b[i], 1, a[i], 1, &result[i]);
+		} else {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotu_sub(a.ncols(), b[i], 1, a[i], 1, &result[i]);
+		}
+	} else {
+		if (conjb) {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotc_sub(a.ncols(), b[0]+i, b.ncols(), a[i], 1, &result[i]);
+		} else {
+			for(int i=0; i<a.nrows(); i++)
+				cblas_zdotu_sub(a.ncols(), b[0]+i, b.ncols(), a[i], 1, &result[i]);
+		}
+	}
+	return result;
+}
+
+
+double trace2(const NRMat<double> &a, const NRMat<double> &b, bool trb)
+{
+	if (trb && (a.nrows() != b.nrows() || a.ncols() != b.ncols()) ||
+				!trb && (a.nrows() != b.ncols() || a.ncols() != b.nrows()))
+			laerror("incompatible Mats in diagofproduct<complex>()");
+	if (trb) return cblas_ddot(a.nrows()*a.ncols(), a, 1, b, 1);
+
+	double sum = 0.0;
+	for (int i=0; i<a.nrows(); i++)
+		sum += cblas_ddot(a.ncols(), a[i], 1, b[0]+i, b.ncols());
+
+	return sum;
+}
+
+
+double trace2(const NRSMat<double> &a, const NRSMat<double> &b,
+		const bool diagscaled)
+{
+	if (a.nrows() != b.nrows()) laerror("incompatible SMats in trace2()");
+
+	double r = 2.0*cblas_ddot(a.nrows()*(a.nrows()+1)/2, a, 1, b, 1);
+	if (diagscaled) return r;
+	for (int i=0; i<a.nrows(); i++) r -= a(i,i)*b(i,i);
+	return r;
+}
+
diff --git a/nonclass.h b/nonclass.h
new file mode 100644
index 0000000..de4b484
--- /dev/null
+++ b/nonclass.h
@@ -0,0 +1,85 @@
+#include "vec.h"
+#include "smat.h"
+#include "mat.h"
+
+//MISC
+template <class T> extern const NRMat<T> diagonalmatrix(const NRVec<T> &x);
+template <class T> extern const NRVec<T> lineof(const NRMat<T> &x, const int i); 
+template <class T> extern const NRVec<T> columnof(const NRMat<T> &x, const int i);
+template <class T> extern const NRVec<T> diagonalof(const NRMat<T> &x); 
+
+//more efficient commutator for a special case of full matrices
+template<class T>
+inline const NRMat<T> commutator ( const NRMat<T> &x, const NRMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+NRMat<T> r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols());
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1);
+return r;
+}
+
+//more efficient commutator for a special case of full matrices
+template<class T>
+inline const NRMat<T> anticommutator ( const NRMat<T> &x, const NRMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+NRMat<T> r(trx?x.ncols():x.nrows(), tryy?y.nrows():y.ncols());
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1);
+return r;
+}
+
+
+
+
+//////////////////////
+// LAPACK interface //
+//////////////////////
+
+#define declare_la(T) \
+extern const  NRVec<T> diagofproduct(const NRMat<T> &a, const NRMat<T> &b,\
+		bool trb=0, bool conjb=0); \
+extern T trace2(const NRMat<T> &a, const NRMat<T> &b, bool trb=0); \
+extern T trace2(const NRSMat<T> &a, const NRSMat<T> &b, const bool diagscaled=0);\
+extern void linear_solve(NRMat<T> &a, NRMat<T> *b, double *det=0); \
+extern void linear_solve(NRSMat<T> &a, NRMat<T> *b, double *det=0); \
+extern void diagonalize(NRMat<T> &a, NRVec<T> &w, const bool eivec=1,\
+		const bool corder=1); \
+extern void diagonalize(NRSMat<T> &a, NRVec<T> &w, NRMat<T> *v, const bool corder=1);\
+extern void singular_decomposition(NRMat<T> &a, NRMat<T> *u, NRVec<T> &s,\
+		NRMat<T> *v, const bool corder=1);
+
+declare_la(double)
+declare_la(complex<double>)
+
+// Separate declarations
+extern void gdiagonalize(NRMat<double> &a, NRVec<double> &wr, NRVec<double> &wi,
+		NRMat<double> *vl, NRMat<double> *vr, const bool corder=1);
+extern void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
+		 NRMat< complex<double> >*vl, NRMat< complex<double> > *vr);
+extern NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double));
+extern NRMat<double> matrixfunction(NRMat<double> a, complex<double> (*f)(const complex<double> &),const bool adjust=0);
+
+//functions on matrices
+inline NRMat<double>  sqrt(const NRSMat<double> &a) { return matrixfunction(a,&sqrt); }
+inline NRMat<double>  log(const NRSMat<double> &a) { return matrixfunction(a,&log); }
+extern NRMat<double> log(const NRMat<double> &a);
+
+
+extern const NRMat<double> realpart(const NRMat< complex<double> >&);
+extern const NRMat<double> imagpart(const NRMat< complex<double> >&);
+extern const NRMat< complex<double> > realmatrix (const NRMat<double>&);
+extern const NRMat< complex<double> > imagmatrix (const NRMat<double>&);
+
+//inverse by means of linear solve, preserving rhs intact
+template<typename T>
+const NRMat<T> inverse(NRMat<T> a, T *det=0)
+{
+#ifdef DEBUG
+	if(a.nrows()!=a.ncols()) laerror("inverse() for non-square matrix");
+#endif
+	NRMat<T> result(a.nrows(),a.nrows());
+	result = (T)1.;
+	linear_solve(a, &result, det);
+	return result;
+}
+
diff --git a/smat.cc b/smat.cc
new file mode 100644
index 0000000..9880023
--- /dev/null
+++ b/smat.cc
@@ -0,0 +1,399 @@
+#include "smat.h"
+// TODO
+// specialize unary minus
+
+
+//////////////////////////////////////////////////////////////////////////////
+////// forced instantization in the corresponding object file
+template NRSMat<double>;
+template NRSMat< complex<double> >;
+
+
+
+/*
+ *  * Templates first, specializations for BLAS next
+ *
+ */
+
+// conversion ctor, symmetrize general Mat into SMat
+template <typename T>
+NRSMat<T>::NRSMat(const NRMat<T> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat");
+#endif
+	count = new int;
+	*count = 1;
+	v = new T[NN2];
+	int i, j, k=0;
+	for (i=0; i<nn; i++)
+		for (j=0; j<=i;j++) v[k++] = 0.5 * (rhs[i][j] + rhs[j][i]);
+}
+
+
+// dtor
+template <typename T>
+NRSMat<T>::~NRSMat()
+{
+	if (!count) return;
+	if (--(*count) <= 0) {
+		if (v) delete[] (v);
+		delete count;
+	}
+}
+
+
+// assignment with a physical copy
+template <typename T>
+NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs)
+{
+	if (this != &rhs) {
+		if(!rhs.v) laerror("unallocated rhs in NRSMat operator |=");
+		if(count)
+			if(*count > 1) {	// detach from the other
+				--(*count);
+				nn = 0;
+				count = 0;
+				v = 0;
+			}
+		if (nn != rhs.nn) {
+			if(v) delete [] (v);
+			nn = rhs.nn;
+		}
+		if (!v) v = new T[NN2];
+		if (!count) count = new int;
+		*count = 1;
+		memcpy(v, rhs.v, NN2*sizeof(T));
+	}
+	return *this;
+}
+
+// assignment
+template <typename T>
+NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs)
+{
+	if (this == & rhs) return *this;
+	if (count)
+		if(--(*count) == 0) {
+			delete [] v;
+			delete count;
+		}
+	v = rhs.v;
+	nn = rhs.nn;
+	count = rhs.count;
+	if (count) (*count)++;
+	return *this;
+}
+
+// assing to diagonal
+template <typename T>
+NRSMat<T> & NRSMat<T>::operator=(const T &a)
+{
+	copyonwrite();
+	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] = a;
+	return *this;
+}
+
+// unary minus
+template <typename T>
+const NRSMat<T> NRSMat<T>::operator-() const
+{
+	NRSMat<T> result(nn);
+	for(int i=0; i<NN2; i++) result.v[i]= -v[i];
+	return result;
+}
+
+// trace of Smat
+template <typename T>
+const T NRSMat<T>::trace() const
+{
+	T tmp = 0;
+	for (int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
+	return tmp;
+}
+
+// make new instation of the Smat, deep copy
+template <typename T>
+void NRSMat<T>::copyonwrite()
+{
+#ifdef DEBUG
+	if (!count) laerror("probably an assignment to undefined Smat");
+#endif
+	if (*count > 1) {
+		(*count)--;
+		count = new int;
+		*count = 1;
+		T *newv = new T[NN2];
+		memcpy(newv, v, NN2*sizeof(T));
+		v = newv;
+	}
+}
+
+// resize Smat
+template <typename T>
+void NRSMat<T>::resize(const int n)
+{
+#ifdef DEBUG
+	if (n <= 0) laerror("illegal matrix dimension in resize of Smat");
+#endif
+	if (count)
+		if(*count > 1) {	//detach from previous
+			(*count)--;
+			count = 0;
+			v = 0;
+			nn = 0;
+		}
+	if (!count)	{				//new uninitialized vector or just detached
+		count = new int;
+		*count = 1;
+		nn = n;
+		v = new T[NN2];
+		return;
+	}
+	if (n != nn) {
+		nn = n;
+		delete[] v;
+		v = new T[NN2];
+	}
+}
+
+// write matrix to the file with specific format
+template <typename T>
+void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
+{
+	lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
+}
+
+// read matrix from the file with specific format
+template <class T>
+void NRSMat<T>::fscanf(FILE *f, const char *format)
+{
+	int n, m;
+	if (std::fscanf(f,"%d %d",&n,&m) != 2)
+		laerror("cannot read matrix dimensions in SMat::fscanf");
+	if (n != m) laerror("different dimensions of SMat");
+	resize(n);
+	for (int i=0; i<n; i++) 
+		for (int j=0; j<n; j++)
+			if (std::fscanf(f,format,&((*this)(i,j))) != 1)
+				laerror("Smat - cannot read matrix element");
+}
+
+
+/*
+ * BLAS specializations for double and complex<double>
+ */
+
+// SMat * Mat
+const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
+#endif
+	NRMat<double> result(nn, rhs.ncols());
+	for (int k=0; k<rhs.ncols(); k++)
+		cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0]+k, rhs.ncols(),
+				0.0, result[0]+k, rhs.ncols());
+	return result;
+}
+const NRMat< complex<double> >
+NRSMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
+#endif
+	NRMat< complex<double> > result(nn, rhs.ncols());
+	for (int k=0; k<rhs.ncols(); k++)
+		cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(),
+				&CZERO, result[0]+k, rhs.ncols());
+	return result;
+}
+
+// SMat * SMat
+const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
+#endif
+	NRMat<double> result(0.0, nn, nn);
+	double *p, *q;
+
+	p = v;
+	for (int i=0; i<nn;i++) {
+		q = rhs.v;
+		for (int k=0; k<=i; k++) {
+			cblas_daxpy(k+1, *p++, q, 1, result[i], 1);
+			q += k+1;
+		}
+	}
+
+	p = v;
+	for (int i=0; i<nn;i++) {
+		q = rhs.v+1;
+		for (int j=1; j<nn; j++) {
+			result[i][j] += cblas_ddot(i+1<j ? i+1 : j, p, 1, q, 1);
+			q += j+1;
+		}
+		p += i+1;
+	}
+
+	p = v; 
+	q = rhs.v;
+	for (int i=0; i<nn; i++) {
+		cblas_dger(CblasRowMajor, i, i+1, 1., p, 1, q, 1, result, nn);
+		p += i+1;
+		q += i+1;
+	}
+	
+	q = rhs.v+3;
+	for (int j=2; j<nn; j++) {
+		p = v+1;
+		for (int i=1; i<j; i++) {
+			cblas_daxpy(i, *++q, p, 1, result[0]+j, nn);
+			p += i+1;
+		}
+		q += 2;
+	}
+
+	return result;
+}
+const NRMat< complex<double> > 
+NRSMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
+#endif
+	NRMat< complex<double> > result(0.0, nn, nn);
+	NRMat< complex<double> > rhsmat(rhs);
+	result = *this * rhsmat;
+	return result;
+//	laerror("complex SMat*Smat not implemented");
+}
+// S dot S
+const double NRSMat<double>::dot(const NRSMat<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("dot of incompatible SMat's");
+#endif
+	return cblas_ddot(NN2, v, 1, rhs.v, 1);
+}
+const complex<double> 
+NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("dot of incompatible SMat's");
+#endif
+	complex<double> dot;
+	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
+	return dot;
+}
+
+// x = S * x
+const NRVec<double> NRSMat<double>::operator*(const NRVec<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
+#endif
+	NRVec<double> result(nn);
+	cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs, 1, 0.0, result, 1);
+	return result;
+}
+const NRVec< complex<double> >
+NRSMat< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
+#endif
+	NRVec< complex<double> > result(nn);
+	cblas_zhpmv(CblasRowMajor, CblasLower, nn, (void *)(&CONE), (void *)v, 
+			(const void *)rhs, 1, (void *)(&CZERO), (void *)result, 1);
+	return result;
+}
+
+// norm of the matrix
+const double  NRSMat<double>::norm(const double scalar) const
+{
+	if (!scalar) return cblas_dnrm2(NN2, v, 1);
+	double sum = 0;
+	int k = 0;
+	for (int i=0; i<nn; ++i)
+		for (int j=0; j<=i; ++j) {
+			register double tmp;
+			tmp = v[k++];
+			if (i == j) tmp -= scalar;
+			sum += tmp*tmp;
+		}
+	return sqrt(sum);
+}
+const double
+NRSMat< complex<double> >::norm(const complex<double> scalar) const
+{
+	if (!(scalar.real()) && !(scalar.imag()))
+		return cblas_dznrm2(NN2, (void *)v, 1);
+	double sum = 0;
+	complex<double> tmp;
+	int k = 0;
+	for (int i=0; i<nn; ++i)
+		for (int j=0; j<=i; ++j) {
+			tmp = v[k++];
+			if (i == j) tmp -= scalar;
+			sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
+		}
+	return sqrt(sum);
+}
+
+// axpy: S = S * a
+void NRSMat<double>::axpy(const double alpha, const NRSMat<double> & x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible SMats");
+#endif
+	copyonwrite();
+	cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
+}
+void NRSMat< complex<double> >::axpy(const complex<double> alpha,
+			const NRSMat< complex<double> > & x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible SMats");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&alpha), (void *)x.v, 1, (void *)v, 1);
+}
+
+
+export template <class T>
+ostream& operator<<(ostream &s, const NRSMat<T> &x)
+                {
+                int i,j,n;
+                n=x.nrows();
+                s << n << ' ' << n << '\n';
+                for(i=0;i<n;i++)
+                        {
+                        for(j=0; j<n;j++) s << x(i,j) << (j==n-1 ? '\n' : ' ');
+                        }
+                return s;
+                }
+
+
+export template <class T>
+istream& operator>>(istream  &s, NRSMat<T> &x)
+                {
+                int i,j,n,m;
+                s >> n >> m;
+                if(n!=m) laerror("input symmetric matrix not square");
+                x.resize(n);
+                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x(i,j);
+                return s;
+                }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corespoding object file
+#define INSTANTIZE(T) \
+template ostream & operator<<(ostream &s, const NRSMat< T > &x); \
+template istream & operator>>(istream  &s, NRSMat< T > &x); \
+
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+
diff --git a/smat.h b/smat.h
new file mode 100644
index 0000000..7a29511
--- /dev/null
+++ b/smat.h
@@ -0,0 +1,303 @@
+#ifndef _LA_SMAT_H_
+#define _LA_SMAT_H_
+
+#include "vec.h"
+#include "mat.h"
+
+#define NN2 (nn*(nn+1)/2)
+template <class T>
+class NRSMat { // symmetric or complex hermitean matrix in packed form
+protected:
+	int nn;
+	T *v;
+	int *count;
+public:
+	friend class NRVec<T>;
+	friend class NRMat<T>;
+	
+	inline NRSMat<T>::NRSMat() : nn(0),v(0),count(0) {};
+	inline explicit NRSMat(const int n);			// Zero-based array
+	inline NRSMat(const T &a, const int n);	//Initialize to constant
+	inline NRSMat(const T *a, const int n);	// Initialize to array
+	inline NRSMat(const NRSMat &rhs);		// Copy constructor
+	explicit NRSMat(const NRMat<T> &rhs);		// symmetric part of general matrix
+	explicit NRSMat(const NRVec<T> &rhs, const int n); //construct matrix from vector
+	NRSMat & operator|=(const NRSMat &rhs);	//assignment to a new copy
+	NRSMat & operator=(const NRSMat &rhs);	//assignment
+	NRSMat & operator=(const T &a);		//assign a to diagonal
+	inline NRSMat & operator*=(const T &a);
+	inline NRSMat & operator+=(const T &a); 
+	inline NRSMat & operator-=(const T &a); 
+	inline NRSMat & operator+=(const NRSMat &rhs); 
+	inline NRSMat & operator-=(const NRSMat &rhs); 
+	const NRSMat operator-() const; //unary minus
+	inline int getcount() const {return count?*count:0;}
+	inline const NRSMat operator*(const T &a) const;
+	inline const NRSMat operator+(const T &a) const;
+	inline const NRSMat operator-(const T &a) const;
+	inline const NRSMat operator+(const NRSMat &rhs) const; 
+	inline const NRSMat operator-(const NRSMat &rhs) const;
+	inline const NRMat<T> operator+(const NRMat<T> &rhs) const; 
+	inline const NRMat<T> operator-(const NRMat<T> &rhs) const; 
+	const NRMat<T> operator*(const NRSMat &rhs) const; // SMat*SMat
+	const NRMat<T> operator*(const NRMat<T> &rhs) const; // SMat*Mat 
+	const T dot(const NRSMat &rhs) const; // Smat.Smat
+	const NRVec<T> operator*(const NRVec<T> &rhs) const; 
+	inline const T& operator[](const int ij) const;
+	inline T& operator[](const int ij);
+	inline const T& operator()(const int i, const int j) const;
+	inline T& operator()(const int i, const int j);
+	inline int nrows() const;
+	inline int ncols() const;
+	const double norm(const T scalar=(T)0) const;
+	void axpy(const T alpha, const NRSMat &x); // this+= a*x
+	inline const T amax() const;
+	const T trace() const;
+	void copyonwrite();
+	void resize(const int n);
+	inline operator T*(); //get a pointer to the data
+	inline operator const T*() const; //get a pointer to the data
+	~NRSMat();
+	void fprintf(FILE *f, const char *format, const int modulo) const; 
+	void fscanf(FILE *f, const char *format); 
+//members concerning sparse matrix
+	explicit NRSMat(const SparseMat<T> &rhs);               // dense from sparse
+	inline void simplify() {}; //just for compatibility with sparse ones
+};
+
+// INLINES
+// ctors
+template <typename T>
+inline NRSMat<T>::NRSMat(const int n) : nn(n), v(new T[NN2]),
+				count(new int) {*count = 1;}
+
+template <typename T>
+inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n),
+	        v(new T[NN2]), count(new int)
+{
+	*count =1;
+	if(a != (T)0) for(int i=0; i<NN2; i++) v[i] = a;
+}
+
+template <typename T>
+inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n),
+	        v(new T[NN2]), count(new int)
+{
+	*count = 1;
+	memcpy(v, a, NN2*sizeof(T));
+}
+
+template <typename T>
+inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) //copy constructor
+{
+	v = rhs.v;
+	nn = rhs.nn;
+	count = rhs.count;
+	if (count) (*count)++;
+}
+
+template <typename T>
+NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) // type conversion
+{
+	nn = n;
+#ifdef DEBUG
+	if (NN2 != rhs.size())
+		laerror("matrix dimensions incompatible with vector length");
+#endif
+	count = rhs.count;
+	v = rhs.v;
+	(*count)++;
+}
+
+// S *= a
+inline NRSMat<double> & NRSMat<double>::operator*=(const double & a)
+{
+	copyonwrite();
+	cblas_dscal(NN2, a, v, 1);
+	return *this;
+}
+inline NRSMat< complex<double> > &
+NRSMat< complex<double> >::operator*=(const complex<double> & a)
+{
+	copyonwrite();
+	cblas_zscal(nn, (void *)(&a), (void *)v, 1);
+	return *this;
+}
+
+
+// S += D
+template <typename T>
+inline NRSMat<T> & NRSMat<T>::operator+=(const T &a)
+{
+	copyonwrite();
+	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] += a;
+	return *this;
+}
+
+// S -= D
+template <typename T>
+inline NRSMat<T> & NRSMat<T>::operator-=(const T &a)
+{
+	copyonwrite();
+	for (int i=0; i<nn; i++) v[i*(i+1)/2+i] -= a;
+	return *this;
+}
+
+// S += S
+inline NRSMat<double> &
+NRSMat<double>::operator+=(const NRSMat<double> & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+=");
+#endif
+	copyonwrite();
+	cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+NRSMat< complex<double> > &
+NRSMat< complex<double> >::operator+=(const NRSMat< complex<double> > & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator+=");
+#endif
+	copyonwrite();
+	cblas_zaxpy(NN2, (void *)(&CONE), (void *)(&rhs.v), 1, (void *)(&v), 1);
+	return *this;
+}
+
+// S -= S
+inline NRSMat<double> &
+NRSMat<double>::operator-=(const NRSMat<double> & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-=");
+#endif
+	copyonwrite();
+	cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+inline NRSMat< complex<double> > &
+NRSMat< complex<double> >::operator-=(const NRSMat< complex<double> > & rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("incompatible SMats in SMat::operator-=");
+#endif
+	copyonwrite();
+	cblas_zaxpy(NN2, (void *)(&CMONE), (void *)(&rhs.v), 1, (void *)(&v), 1);
+	return *this;
+}
+
+// SMat + Mat
+template <typename T>
+inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const
+{
+	return NRMat<T>(rhs) += *this;
+}
+
+// SMat - Mat
+template <typename T>
+inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const
+{
+	return NRMat<T>(-rhs) += *this;
+}
+
+// access the element, linear array case
+template <typename T>
+inline T & NRSMat<T>::operator[](const int ij)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("lval [] with count > 1 in Smat");
+	if (ij<0 || ij>=NN2) laerror("SMat [] out of range");
+	if (!v) laerror("[] for unallocated Smat");
+#endif
+	return v[ij];
+}
+template <typename T>
+inline const T & NRSMat<T>::operator[](const int ij) const
+{
+#ifdef DEBUG
+	if (ij<0 || ij>=NN2) laerror("SMat [] out of range");
+	if (!v) laerror("[] for unallocated Smat");
+#endif
+	return v[ij];
+}
+
+// access the element, 2-dim array case
+template <typename T>
+inline T & NRSMat<T>::operator()(const int i, const int j)
+{
+#ifdef DEBUG
+	if (*count != 1) laerror("lval (i,j) with count > 1 in Smat");
+	if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range");
+	if (!v) laerror("(i,j) for unallocated Smat");
+#endif
+	return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i];
+}
+template <typename T>
+inline const T & NRSMat<T>::operator()(const int i, const int j) const
+{
+#ifdef DEBUG
+	if (i<0 || i>=nn || j<0 || j>=nn) laerror("SMat (i,j) out of range");
+	if (!v) laerror("(i,j) for unallocated Smat");
+#endif
+	return i>=j ? v[i*(i+1)/2+j] : v[j*(j+1)/2+i];
+}
+
+// return the number of rows and columns
+template <typename T>
+inline int NRSMat<T>::nrows() const
+{
+	return nn;
+}
+template <typename T>
+inline int NRSMat<T>::ncols() const
+{
+	return nn;
+}
+
+// max value
+inline const double NRSMat<double>::amax() const
+{
+	return v[cblas_idamax(NN2, v, 1)];
+}
+inline const complex<double> NRSMat< complex<double> >::amax() const
+{
+	return v[cblas_izamax(NN2, (void *)v, 1)];
+}
+
+// reference pointer to Smat
+template <typename T>
+inline NRSMat<T>:: operator T*()
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated SMat in operator T*");
+#endif
+	return v;
+}
+template <typename T>
+inline NRSMat<T>:: operator const T*() const
+{
+#ifdef DEBUG
+	if (!v) laerror("unallocated SMat in operator T*");
+#endif
+	return v;
+}
+
+
+
+// I/O
+template <typename T> extern ostream& operator<<(ostream &s, const NRSMat<T> &x);
+template <typename T> extern istream& operator>>(istream  &s, NRSMat<T> &x);
+
+
+
+
+// generate operators: SMat + a, a + SMat, SMat * a
+NRVECMAT_OPER(SMat,+)
+NRVECMAT_OPER(SMat,-)
+NRVECMAT_OPER(SMat,*)
+// generate SMat + SMat, SMat - SMat
+NRVECMAT_OPER2(SMat,+)
+NRVECMAT_OPER2(SMat,-)
+
+#endif /* _LA_SMAT_H_ */
diff --git a/sparsemat.cc b/sparsemat.cc
new file mode 100644
index 0000000..d29c601
--- /dev/null
+++ b/sparsemat.cc
@@ -0,0 +1,1088 @@
+#include <string>
+#include <cmath>
+#include <complex>
+#include <iostream>
+
+#include "sparsemat.h"
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corresponding object file
+template SparseMat<double>;
+template SparseMat< complex<double> >;
+
+
+#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
+# define export
+#endif
+
+
+export template <class T>
+ostream& operator<<(ostream &s, const SparseMat<T> &x)
+                {
+                SPMatindex n,m;
+                n=x.nrows();
+                m=x.ncols();
+                s << (int)n << ' ' << (int)m << '\n';
+		matel<T> *list=x.getlist();
+		while(list)
+                        {
+                        s << (int)list->row << ' ' << (int)list->col  << ' ' << list->elem << '\n';
+			list=list->next;
+                        }
+		s << "-1 -1\n";
+                return s;
+                }
+
+export template <class T>
+istream& operator>>(istream  &s, SparseMat<T> &x)
+                {
+                int i,j;
+		int n,m;
+		matel<T> *l=NULL;
+                s >> n >> m;
+                x.resize(n,m);
+		s >> i >> j;
+		while(i>=0 && j>=0)
+			{
+			matel<T> *ll = l;
+			l= new matel<T>;
+			l->next= ll;
+			l->row=i;
+			l->col=j;
+			s >> l->elem;	
+			s >> i >> j;
+			}
+		x.setlist(l);
+                return s;
+                }
+
+//helpers to be used from different functions
+export template <class T>
+void SparseMat<T>::unsort()
+{
+if(symmetric) colsorted=NULL;
+if(colsorted) delete[] colsorted;
+if(rowsorted) delete[] rowsorted;
+colsorted=rowsorted=NULL;
+nonzero=0;
+}
+
+export template <class T>
+void SparseMat<T>::deletelist()
+{
+if(colsorted||rowsorted) unsort();//prevent obsolete pointers
+if(*count >1) laerror("trying to delete shared list");
+matel<T> *l=list;
+while(l)
+	{
+	matel<T> *ltmp=l;
+	l=l->next;
+	delete ltmp;
+	}
+list=NULL;
+delete count;
+count=NULL;
+}
+
+//no checks, not to be public
+export template <class T>
+void SparseMat<T>::copylist(const matel<T> *l)
+{
+list=NULL;
+while(l)
+	{
+	add(l->row,l->col,l->elem);
+	l=l->next;
+	}
+}
+
+export template <class T>
+void SparseMat<T>::copyonwrite()
+{
+        if(!count) laerror("probably an assignment to undefined sparse matrix");
+        if(*count > 1)
+                {
+                (*count)--;
+                count = new int; *count=1;
+		if(!list) laerror("empty list with count>1");
+		unsort();
+		copylist(list);
+                }
+}
+
+
+//global for sort !!! is not thread-safe
+static void *globsorted;
+
+//global functions cannot be partially specialized in templates, we have to make it a member function
+
+//!!! gencmp's and genswap are critical for performance, make sure that compiler really inlines them
+template<class T, int type>
+struct gencmp {
+inline static SPMatindexdiff EXEC(register const SPMatindex i, register const SPMatindex j) 
+	{
+	register SPMatindexdiff k;
+	register matel<T> *ii,*jj;
+	ii=((matel<T> **)globsorted)[i];
+	jj=((matel<T> **)globsorted)[j];
+	if (k=ii->col-jj->col) return k; else return ii->row-jj->row;}
+};
+
+
+template<class T>
+struct gencmp<T,0> {
+inline static SPMatindexdiff EXEC(register const SPMatindex i, register const SPMatindex j)
+	{
+        register SPMatindexdiff k;
+        register matel<T> *ii,*jj;
+        ii=((matel<T> **)globsorted)[i];
+        jj=((matel<T> **)globsorted)[j];
+	if (k=ii->row-jj->row) return k; else return ii->col-jj->col;}
+};
+
+
+
+
+template<class T>
+inline void genswap(const SPMatindex i,const SPMatindex j)
+{
+SWAP(((matel<T> **)globsorted)[i],((matel<T> **)globsorted)[j]);
+}
+
+
+
+template<class T, int type>
+void genqsort(SPMatindex l,SPMatindex r) /*safer version for worst case*/
+{
+register SPMatindex i,j,piv;
+
+/* other method for small arrays recommended in NUMREC is not used here
+does not give so large gain for moderate arrays and complicates the
+things, but would be worth trying (cf. profile) */
+
+if(r<=l) return; /*1 element*/
+if(gencmp<T,type>::EXEC(r,l)<0) genswap<T>(l,r);
+if(r-l==1) return; /*2 elements and preparation for median*/
+piv= (l+r)/2; /*pivoting by median of 3 - safer */
+if(gencmp<T,type>::EXEC(piv,l)<0) genswap<T>(l,piv); /*and change the pivot element implicitly*/
+if(gencmp<T,type>::EXEC(r,piv)<0) genswap<T>(r,piv); /*and change the pivot element implicitly*/
+if(r-l==2) return; /*in the case of 3 elements we are finished too */
+
+/*general case , l-th r-th already processed*/
+i=l+1; j=r-1;
+do{
+  /*important sharp inequality - stops at sentinel element for efficiency*/
+  /* this is inefficient if all keys are equal - unnecessary n log n swaps are done, but we assume that it is atypical input*/
+  while(gencmp<T,type>::EXEC(i++,piv)<0);
+  i--;
+  while(gencmp<T,type>::EXEC(j--,piv)>0);
+  j++;
+  if(i<j)
+        {
+        /* swap and keep track of position of pivoting element */
+        genswap<T>(i,j);
+        if(i==piv) piv=j; else if(j==piv) piv=i;
+        }
+  if(i<=j) {i++; j--;}
+  }while(i<=j);
+
+if(j-l < r-i)   /*because of the stack in bad case process first the shorter subarray*/
+        {if(l<j) genqsort<T,type>(l,j); if(i<r) genqsort<T,type>(i,r);}
+else
+        {if(i<r) genqsort<T,type>(i,r); if(l<j) genqsort<T,type>(l,j);}
+}
+
+
+export template <class T>
+unsigned int SparseMat<T>::length() const
+{
+if(nonzero) return nonzero;
+unsigned int n=0;
+matel<T> *l=list;
+while(l)
+	{
+	++n;
+	l=l->next;
+	}
+
+const_cast<SparseMat<T> *>(this)->nonzero=n;
+return n;
+}
+
+ 
+export template <class T>
+unsigned int SparseMat<T>::sort(int type) const //must be const since used from operator* which must be const to be compatible with other stuff, dirty casts here
+{
+if(type==0&&rowsorted || type==1&&colsorted) return nonzero;
+if(!list) return ((SparseMat<T> *)this)->nonzero=0;
+
+if(type!=2) const_cast<SparseMat<T> *>(this) ->setunsymmetric(); else type=0;//symmetric and sorted not supported simultaneously, type 2 is special just for simplify
+
+//create array from list, reallocate as necessary
+unsigned int size=3*MAX(nn,mm); //initial guess for a number of nonzero elements
+matel<T> **sorted= new matel<T>* [size];
+((SparseMat<T> *)this)->nonzero=0;
+matel<T> *l = list;
+while(l)
+        {
+        sorted[(((SparseMat<T> *)this)->nonzero)++]=l;
+        if(nonzero >= size ) //reallocate
+		{
+		size*=2;
+		matel<T> **newsorted= new matel<T>* [size];
+		memcpy(newsorted,sorted,size/2*sizeof(matel<T>*));
+		delete[] sorted;
+		sorted=newsorted;
+		}
+        l= l->next;
+        }
+
+//now sort the array of pointers according to type
+globsorted =sorted; 
+if(type==0) {genqsort<T,0>(0,nonzero-1); ((SparseMat<T> *)this)->rowsorted=sorted;} //type handled at compile time for more efficiency
+else {genqsort<T,1>(0,nonzero-1); ((SparseMat<T> *)this)->colsorted=sorted;} //should better be const cast
+
+//cout <<"sort: nonzero ="<<nonzero<<"\n";
+return nonzero; //number of (in principle) nonzero elements
+}
+
+
+export template <class T>
+void SparseMat<T>::simplify()
+{
+unsigned int n;
+if(!list) return;
+copyonwrite();
+if(symmetric)
+	{
+	unsort(); 
+	matel<T> *p;
+	p=list;
+	while(p)
+		{
+		if(p->row>p->col) SWAP(p->row,p->col); //get into one triangle, not OK for complex hermitean 
+		p=p->next;
+		}
+	n=sort(2); //sort and further handle like a triangle matrix
+	}
+else n=sort(0); //sorts according to row,column
+
+unsigned int i,j;
+SPMatindex r,c;
+j=0;
+r=rowsorted[j]->row;
+c=rowsorted[j]->col;
+for(i=1; i<n;i++)
+        {
+        if(r==rowsorted[i]->row && c==rowsorted[i]->col) {rowsorted[j]->elem +=rowsorted[i]->elem; delete rowsorted[i]; rowsorted[i]=NULL;}
+                else
+                        {
+                        j=i;
+                        r=rowsorted[j]->row;
+                        c=rowsorted[j]->col;
+                        }
+        }
+
+//check if  summed to zero
+for(i=0; i<n;i++) if(rowsorted[i] &&
+#ifdef SPARSEEPSILON
+	abs(rowsorted[i]->elem)<SPARSEEPSILON
+#else
+	! rowsorted[i]->elem 
+#endif
+	)	 {delete rowsorted[i]; rowsorted[i]=NULL;}
+
+//restore connectivity
+int nonz=0;
+matel<T> *p,*first,*prev;
+first=NULL;
+prev=NULL;
+for(i=0; i<n;i++) if(p=rowsorted[i])
+        {
+	++nonz;
+        if(!first) first=p;
+        if(prev) prev->next=p;
+        p->next=NULL;
+        prev=p;
+        }
+list=first;
+nonzero=nonz;
+unsort(); //since there were NULLs introduced, rowsorted is not dense
+}
+
+
+export template <class T>
+void SparseMat<T>::resize(const SPMatindex n, const SPMatindex m)
+{
+        if(n<=0 || m<=0) laerror("illegal matrix dimension");
+	unsort();
+        if(count) 
+		{
+		if(*count > 1) {(*count)--; count=NULL; list=NULL;} //detach from previous
+		else if(*count==1) deletelist();
+		}
+        nn=n;
+        mm=m;
+        count=new int(1); //empty but defined matrix
+	list=NULL;
+	symmetric=0;
+	colsorted=rowsorted=NULL;
+}
+
+export template <class T>
+void SparseMat<T>::addsafe(const SPMatindex n, const SPMatindex m, const T elem)
+{
+#ifdef debug
+if(n<0||n>=nn||m<0||m>=mm) laerror("SparseMat out of range");
+#endif
+#ifdef SPARSEEPSILON
+if(abs(elem)<SPARSEEPSILON) return;
+#else
+if(!elem) return;
+#endif
+if(!count) {count=new int;  *count=1; list=NULL;} //blank new matrix
+else copyonwrite(); //makes also unsort
+add(n,m,elem);
+}
+
+
+//assignment operator
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator=(const SparseMat<T> &rhs)
+{
+        if (this != &rhs)
+                {
+		unsort();
+                if(count)
+                    if(--(*count) ==0) {deletelist(); delete count;} // old stuff obsolete
+                list=rhs.list;
+                nn=rhs.nn;
+                mm=rhs.mm;
+                if(list) count=rhs.count; else count= new int(0); //make the matrix defined, but empty and not shared, count will be incremented below
+		symmetric=rhs.symmetric;
+                if(count) (*count)++;
+                }
+        return *this;
+}
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::join(SparseMat<T> &rhs)
+{
+if(symmetric!=rhs.symmetric||nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible matrices in join()");
+if(*rhs.count!=1) laerror("shared rhs in join()");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+matel<T> **last=&list;
+while(*last) last= &((*last)->next);
+*last=rhs.list;
+rhs.list=NULL;
+return *this;
+}
+
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::addtriangle(const SparseMat &rhs, const bool lower, const char sign)
+{
+if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for +=");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+register matel<T> *l=rhs.list;
+while(l)
+	{
+	if(rhs.symmetric || lower && l->row <=l->col || !lower && l->row >=l->col)
+#ifdef SPARSEEPSILON
+	    if(abs(l->elem)>SPARSEEPSILON) 
+#endif
+		add( l->row,l->col,sign=='+'?l->elem:- l->elem);
+	l=l->next;
+	}
+return *this;
+}
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator+=(const SparseMat<T> &rhs)
+{
+if(symmetric&&!rhs.symmetric) laerror("cannot add general to symmetric sparse");
+if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for +=");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+bool symmetrize= !symmetric && rhs.symmetric;
+register matel<T> *l=rhs.list;
+if(symmetrize)
+while(l)
+        {
+#ifdef SPARSEEPSILON
+        if(abs(l->elem)>SPARSEEPSILON) 
+#endif
+		{add( l->row,l->col,l->elem); if( l->row!=l->col) add( l->col,l->row,l->elem);}
+        l=l->next;
+        }
+else
+while(l)
+	{
+#ifdef SPARSEEPSILON
+	if(abs(l->elem)>SPARSEEPSILON) 
+#endif
+		add( l->row,l->col,l->elem);
+	l=l->next;
+	}
+return *this;
+}
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator-=(const SparseMat<T> &rhs)
+{
+if(symmetric&&!rhs.symmetric) laerror("cannot add general to symmetric sparse");
+if(nn!=rhs.nn||mm!=rhs.mm) laerror("incompatible dimensions for -=");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+bool symmetrize= !symmetric && rhs.symmetric;
+register matel<T> *l=rhs.list;
+if(symmetrize)
+while(l)
+        {
+#ifdef SPARSEEPSILON
+        if(abs(l->elem)>SPARSEEPSILON) 
+#endif
+		{add( l->row,l->col,- l->elem); if( l->row!=l->col) add( l->col,l->row,- l->elem);}
+        l=l->next;
+        }
+else
+while(l)
+        {
+#ifdef SPARSEEPSILON
+        if(abs(l->elem)>SPARSEEPSILON) 
+#endif
+		add( l->row,l->col,- l->elem);
+        l=l->next;
+        }
+return *this;
+}
+
+
+//constructor from a dense matrix
+export template <class T>
+SparseMat<T>::SparseMat(const NRMat<T> &rhs)
+{
+nn=rhs.nrows();
+mm=rhs.ncols();
+count=new int;
+*count=1; 
+list=NULL;
+symmetric=0;
+colsorted=rowsorted=NULL;
+SPMatindex i,j;
+for(i=0;i<nn;++i)
+	for(j=0; j<mm;++j)
+		{register T t(rhs(i,j));
+#ifdef SPARSEEPSILON
+		if( abs(t)>SPARSEEPSILON)
+#else
+		if(t)
+#endif
+		 add(i,j,t);
+		}
+}
+
+//constructor dense matrix from sparse
+export template <class T>
+NRMat<T>::NRMat(const SparseMat<T> &rhs)
+{
+nn=rhs.nrows();
+mm=rhs.ncols();
+count=new int(1);
+T *p;
+#ifdef MATPTR
+        v= new T*[nn];
+        p=v[0] = new T[mm*nn];
+        for (int i=1; i< nn; i++) v[i] = v[i-1] + mm;
+#else
+        p= v = new T[mm*nn];
+#endif
+memset(p,0,nn*mm*sizeof(T));
+matel<T> *l=rhs.getlist();
+bool sym=rhs.issymmetric();
+while(l)
+	{
+#ifdef MATPTR
+	v[l->row][l->col] +=l->elem;
+	if(sym && l->row!=l->col) v[l->col][l->row] +=l->elem;
+#else
+	v[l->row*mm+l->col] +=l->elem;
+	 if(sym && l->row!=l->col) v[l->col*mm+l->row] +=l->elem;
+#endif
+	l=l->next;
+	}
+}
+
+
+
+
+//constructor dense symmetric packed matrix from sparse
+#define nn2 (nn*(nn+1)/2)
+export template <class T>
+NRSMat<T>::NRSMat(const SparseMat<T> &rhs)
+{
+if(!rhs.issymmetric()||rhs.nrows()!=rhs.ncols()) laerror("sparse matrix is not symmetric");
+nn=rhs.nrows();
+count=new int(1);
+v=new T[nn2];
+memset(v,0,nn2*sizeof(T));
+matel<T> *l=rhs.getlist();
+while(l)
+	{
+	(*this)(l->row,l->col)=l->elem;
+	l=l->next;
+	}
+}
+#undef nn2
+
+//constructor dense vector from sparse
+export template <class T>
+NRVec<T>::NRVec(const SparseMat<T> &rhs)
+{
+if(rhs.nrows()>1 && rhs.ncols()>1) laerror("cannot construct a vector from a sparse matrix with more than one row/column");
+nn=rhs.nrows()>1?rhs.nrows():rhs.ncols();
+v=new T[nn]; 
+memset(v,0,nn*sizeof(T));
+count=new int(1);
+matel<T> *l=rhs.getlist();
+
+if(rhs.nrows()>1) while(l)
+        {
+	v[l->row]+=l->elem;
+        l=l->next;
+        }
+else while(l)
+        {
+        v[l->col]+=l->elem;
+        l=l->next;
+        }
+}
+
+//assignment of a scalar matrix
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator=(const T a)
+{
+if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix");
+if(nn!=mm) laerror("assignment of scalar to non-square sparse matrix");
+resize(nn,mm);//clear 
+#ifdef SPARSEEPSILON
+if(abs(a)<SPARSEEPSILON) return *this;
+#else
+if(a==(T)0) return *this;
+#endif
+SPMatindex i;
+for(i=0;i<nn;++i) add(i,i,a);
+return *this;
+}
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator+=(const T a)
+{
+if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix");
+if(nn!=mm) laerror("assignment of scalar to non-square sparse matrix");
+if(a==(T)0) return *this;
+SPMatindex i;
+for(i=0;i<nn;++i) add(i,i,a);
+return *this;
+}
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator-=(const T a)
+{
+if(!count ||nn<=0||mm<=0) laerror("assignment of scalar to undefined sparse matrix");
+if(nn!=mm) laerror("assignment of scalar to non-square sparse matrix");
+if(a==(T)0) return *this;
+SPMatindex i;
+for(i=0;i<nn;++i) add(i,i,-a);
+return *this;
+}
+
+
+
+//constructor from a dense symmetric matrix
+export template <class T>
+SparseMat<T>::SparseMat(const NRSMat<T> &rhs)
+{
+nn=rhs.nrows();
+mm=rhs.ncols();
+count=new int;
+*count=1;
+list=NULL;
+symmetric=1;
+colsorted=rowsorted=NULL;
+SPMatindex i,j;
+for(i=0;i<nn;++i)
+        for(j=0; j<=i;++j)
+		{register T t;
+                if(
+#ifdef SPARSEEPSILON
+			abs(t=rhs(i,j))>SPARSEEPSILON
+#else
+			t=rhs(i,j)
+#endif
+			) add(i,j,t);
+		}
+}
+
+export template <class T>
+void SparseMat<T>::transposeme()
+{
+if(!count) laerror("transposeme on undefined lhs");
+if(symmetric||!list) return;
+copyonwrite();//also unsort
+register matel<T> *l=list;
+while(l)
+	{
+	SWAP(l->row,l->col);
+	l=l->next;
+	}
+SWAP(nn,mm);
+}
+
+export template <class T>
+void SparseMat<T>::setunsymmetric()
+{
+if(!symmetric) return;
+unsort();
+symmetric=0;
+if(!count) return;
+copyonwrite();
+matel<T> *l=list;
+while(l) //include the mirror picture of elements into the list
+	{
+	if(
+#ifdef SPARSEEPSILON
+		abs(l->elem)>SPARSEEPSILON && 
+#endif
+	   l->row!=l->col) add(l->col,l->row,l->elem); //not OK for complex-hermitean
+	l=l->next;
+	}
+}
+
+
+export template <class T>
+SparseMat<T> & SparseMat<T>::operator*=(const T a)
+{
+if(!count) laerror("operator*= on undefined lhs");
+if(!list||a==(T)1) return *this;
+if(a==(T)0) resize(nn,mm);
+else copyonwrite();
+
+register matel<T> *l=list;
+while(l) 
+        {
+	l->elem*=a;
+        l=l->next;
+        }
+return *this;
+}
+
+const double SparseMat<double>::dot(const NRMat<double> &rhs) const
+{
+double r=0;
+matel<double> *l=list;
+while(l)
+	{
+	r+= l->elem*rhs[l->row][l->col];
+	if(symmetric&&l->row!=l->col) r+=l->elem*rhs[l->col][l->row];
+	l=l->next;
+	}
+return r;
+}
+
+
+template<class T>
+void NRVec<T>::gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec<T> &x)
+{
+if((trans=='n'?a.ncols():a.nrows())!= (SPMatindex)x.size()) laerror("incompatible sizes in gemv");
+copyonwrite();
+if(beta!=(T)0) (*this) *= beta;
+else memset(v,0,nn*sizeof(T));
+
+bool transp = tolower(trans)!='n'; //not OK for complex
+
+matel<T> *l=a.getlist();
+
+if(alpha==(T)0 || !l) return;
+T *vec=x.v;
+
+if(alpha==(T)1)
+{
+        if(a.issymmetric())
+        {
+        while(l)
+                {
+                v[l->row]+= l->elem*vec[l->col];
+                if(l->row!=l->col) v[l->col]+= l->elem*vec[l->row];
+                l=l->next;
+                }
+        }
+        else
+        {
+        if(transp)
+        while(l)
+                {
+                v[l->col]+= l->elem*vec[l->row];
+                l=l->next;
+                }
+        else
+        while(l)
+                {
+                v[l->row]+= l->elem*vec[l->col];
+                l=l->next;
+                }
+        }
+}
+else
+{
+	if(a.issymmetric())
+	{
+	while(l)
+	        {
+	        v[l->row]+= alpha*l->elem*vec[l->col];
+	        if(l->row!=l->col) v[l->col]+= alpha*l->elem*vec[l->row];
+	        l=l->next;
+	        }
+	}
+	else
+	{
+	if(transp)
+	while(l)
+	        {
+	        v[l->col]+= alpha*l->elem*vec[l->row];
+	        l=l->next;
+	        }
+	else
+	while(l)
+	        {
+	        v[l->row]+= alpha*l->elem*vec[l->col];
+	        l=l->next;
+	        }
+	}
+}
+}
+
+
+//multiplication with dense vector from both sides
+template <class T>
+const NRVec<T> SparseMat<T>::multiplyvector(const NRVec<T> &vec, const bool transp) const
+{
+if(transp && nn!=(SPMatindex)vec.size() || !transp && mm!=(SPMatindex)vec.size()) laerror("incompatible sizes in sparsemat*vector");
+NRVec<T> result(transp?mm:nn);
+result.gemv((T)0, *this, transp?'t':'n', (T)1., vec);
+return result;
+}
+
+
+template <class T>
+const NRVec<T> NRVec<T>::operator*(const SparseMat<T> &mat) const
+{
+if(mat.nrows()!= (SPMatindex)size()) laerror("incompatible sizes in vector*sparsemat");
+NRVec<T> result((T)0,mat.ncols());
+matel<T> *l=mat.getlist();
+bool symmetric=mat.issymmetric();
+while(l)
+        {
+        result.v[l->col]+= l->elem*v[l->row];
+	if(symmetric&&l->row!=l->col) result.v[l->row]+= l->elem*v[l->col];
+        l=l->next;
+        }
+return result;
+
+}
+
+template<class T>
+const T SparseMat<T>::trace() const
+{
+matel<T> *l=list;
+T sum(0);
+while(l)
+	{
+	if(l->row==l->col) sum+= l->elem;
+	l=l->next;
+	}
+return sum;
+}
+
+
+//not OK for complex hermitean
+template<class T>
+const T SparseMat<T>::norm(const T scalar) const
+{
+if(!list) return T(0);
+const_cast<SparseMat<T> *>(this)->simplify(); 
+
+matel<T> *l=list;
+T sum(0);
+if(scalar!=(T)0)
+	{
+	if(symmetric) 
+	while(l)
+	        {
+		T hlp=l->elem;
+		bool b=l->row==l->col;
+		if(b) hlp-=scalar;
+		T tmp=hlp*hlp;
+       	 sum+= tmp;
+		if(!b) sum+=tmp;
+       	 l=l->next;
+       	 }
+	else
+	while(l)
+	       	 {
+		T hlp=l->elem;
+		if(l->row==l->col) hlp-=scalar;
+	        sum+= hlp*hlp; 
+	        l=l->next;
+	        }
+	}
+else
+	{
+        if(symmetric)
+        while(l)
+                {
+                T tmp=l->elem*l->elem;
+         sum+= tmp;
+                if(l->row!=l->col) sum+=tmp;
+         l=l->next;
+         }
+        else
+        while(l)
+                 {
+                sum+= l->elem*l->elem;
+                l=l->next;
+                }
+	}
+return sqrt(sum); //not OK for int, would need traits technique
+}
+
+
+template<class T>
+void SparseMat<T>::axpy(const T alpha, const SparseMat<T> &x, const bool transp)
+{
+if(!transp && (nn!=x.nn||mm!=x.mm) || transp && (mm!=x.nn||nn!=x.mm) ) laerror("incompatible dimensions for axpy");
+if(!count) {count=new int;  *count=1; list=NULL;}
+else copyonwrite();
+
+if(alpha==(T)0||x.list==NULL) return;
+if(!transp||x.symmetric)
+	{
+	if(alpha==(T)1) {*this +=x; return;}
+	if(alpha==(T)-1) {*this -=x; return;}
+	}
+if(symmetric!=x.symmetric) laerror("general axpy not supported for different symmetry types");
+//now does not matter if both are general or both symmetric (transposition will not matter)
+
+register matel<T> *l=x.list;
+if(transp)
+while(l)
+        {
+	register T t=alpha*l->elem;
+#ifdef SPARSEEPSILON
+	if(abs(t)>SPARSEEPSILON) 
+#endif
+				add( l->col,l->row,t);
+        l=l->next;
+        }
+else
+while(l)
+	{
+	register T t=alpha*l->elem;
+#ifdef SPARSEEPSILON
+	if(abs(t)>SPARSEEPSILON) 
+#endif
+				add( l->row,l->col,t);
+	l=l->next;
+	}
+}
+
+
+template<class T>
+const T SparseMat<T>::dot(const SparseMat<T> &rhs) const //complex conj. not implemented yet
+{
+if(nn!=rhs.nn || mm!=rhs.mm) laerror("dot of incompatible sparse matrices");
+if(symmetric||rhs.symmetric) laerror("dot of symmetric sparse matrices not implemented");
+
+T result=0;
+if(list && rhs.list) //both nonzero
+    {
+    unsigned int na=sort(0);
+    unsigned int nb=rhs.sort(0);
+
+    //now merge the sorted lists
+    register unsigned int i,j;
+    register SPMatindex ra,ca;
+    j=0;
+    for(i=0; i<na;i++)
+        {
+	register SPMatindex rb=0,cb=0;
+        ra=rowsorted[i]->row;
+        ca=rowsorted[i]->col;
+        while(j<nb && (rb=rhs.rowsorted[j]->row) <ra) j++; /*skip in rhs*/
+        while(j<nb && (cb=rhs.rowsorted[j]->col) <ca) j++; /*skip in rhs*/
+	
+	if(j==nb) break; //we can exit the i-loop, no suitable element in b any more
+        if(ra==rb&&ca==cb)
+                {
+        	T tmp=rowsorted[i]->elem;
+                register unsigned int k;
+                /*j remembers the position, k forwards in the rhs.rowsorted to find all combinations*/
+                k=j;
+                do {
+                        result += tmp*rhs.rowsorted[k]->elem;
+                        k++;
+                        } while(k<nb && (rhs.rowsorted[k]->row == ra) && (rhs.rowsorted[k]->col == ca));
+                }
+        /*else skip in left operand*/
+        }
+    }
+return result;
+}
+
+
+
+template<class T>
+const SparseMat<T> SparseMat<T>::operator*(const SparseMat<T> &rhs) const
+{
+if(mm!=rhs.nn) laerror("product of incompatible sparse matrices");
+if(symmetric||rhs.symmetric) laerror("product of symmetric sparse matrices not implemented");
+
+SparseMat<T> result(nn,rhs.mm);
+if(list && rhs.list) //both nonzero
+    {
+    unsigned int na=sort(1);
+    unsigned int nb=rhs.sort(0);
+
+    //now merge the sorted lists
+    register unsigned int i,j;
+    register SPMatindex rb=0,ca;
+    j=0;
+    for(i=0; i<na;i++)
+        {
+        ca=colsorted[i]->col;
+        while(j<nb && (rb=rhs.rowsorted[j]->row) <ca) j++; /*skip in rhs.rowsorted*/
+	if(j==nb) break; //we can exit the i-loop, no suitable element in mb any more
+        if(rb==ca)
+                {
+        	T tmp=colsorted[i]->elem;
+                register unsigned int k;
+                /*j remembers the position, k forwards in the rhs.rowsorted to find all combinations*/
+                k=j;
+                do {
+                        result.add(colsorted[i]->row,rhs.rowsorted[k]->col,tmp*rhs.rowsorted[k]->elem);
+                        k++;
+                        } while(k<nb && ((rhs.rowsorted[k]->row) == ca));
+                }
+        /*else skip in left operand*/
+        }
+    result.simplify();//otherwise number of terms tends to grow exponentially
+    }
+return result;
+}
+
+
+
+template <class T>
+void SparseMat<T>::gemm(const T beta, const SparseMat<T> &a, const char transa, const SparseMat<T> &b, const char transb, const T alpha)
+{
+SPMatindex l(transa=='n'?a.nn:a.mm);
+SPMatindex k(transa=='n'?a.mm:a.nn);
+SPMatindex kk(transb=='n'?b.nn:b.mm);
+SPMatindex ll(transb=='n'?b.mm:b.nn);
+if(a.symmetric||b.symmetric) laerror("symmetric sparse matrices not supported in gemm");
+
+if(beta==(T)0) resize(l,ll); //empty matrix
+else	*this *= beta; //takes care about beta=1
+if(l!=nn|| ll!=mm||k!=kk) laerror("incompatible sparse matrices in gemm");
+
+if(alpha==(T)0 || !a.list ||!b.list) return;
+copyonwrite();
+
+//regular case, specialize for transpositions
+matel<T> **ma,**mb;
+unsigned int na,nb;
+bool tra= transa!='n';
+bool trb= transb!='n';
+if(!tra) {na=a.sort(1); ma=a.colsorted;} else {na=a.sort(0); ma=a.rowsorted;}
+if(!trb) {nb=b.sort(0); mb=b.rowsorted;} else {nb=b.sort(1); mb=b.colsorted;}
+
+//now merge the sorted lists
+register unsigned int i,j;
+register SPMatindex rb=0,ca,row;
+j=0;
+for(i=0; i<na;i++)
+        {
+        ca=tra?ma[i]->row:ma[i]->col;
+	row=tra?ma[i]->col:ma[i]->row;
+        while(j<nb && (rb=trb?mb[j]->col:mb[j]->row) <ca) j++; /*skip in mb*/ 
+	if(j==nb) break; //we can exit the i-loop, no suitable element in mb any more
+        if(rb==ca)
+                {
+		T tmp=alpha * ma[i]->elem;
+                register unsigned int k;
+                /*j remembers the position, k forwards in the mb to find all combinations*/
+                k=j;
+                do {
+			register SPMatindex col; 
+		     col=trb?mb[k]->row:mb[k]->col;
+		     if(!symmetric||row<=col) add(row,col,tmp*mb[k]->elem);
+                        k++;
+                        } while(k<nb && ((trb?mb[k]->col:mb[k]->row) == ca));
+                }
+        /*else skip in ma*/
+        }
+
+simplify();
+}
+
+
+
+
+#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
+
+#define INSTANTIZE(T) \
+template ostream& operator<<(ostream &s, const SparseMat<T> &x); \
+template istream& operator>>(istream &s, SparseMat<T> &x); \
+template void SparseMat<T>::copyonwrite(); \
+template void SparseMat<T>::resize(const SPMatindex n, const SPMatindex m); \
+template void SparseMat<T>::unsort(); \
+template unsigned int SparseMat<T>::sort(int type) const; \
+template unsigned int SparseMat<T>::length() const; \
+template void SparseMat<T>::deletelist(); \
+template void SparseMat<T>::simplify(); \
+template void SparseMat<T>::copylist(const matel<T> *l); \
+template void SparseMat<T>::add(const SPMatindex n, const SPMatindex m, const T elem); \
+template SparseMat<T> & SparseMat<T>::operator=(const SparseMat<T> &rhs); \
+template SparseMat<T> & SparseMat<T>::operator+=(const SparseMat<T> &rhs); \
+template SparseMat<T> & SparseMat<T>::operator-=(const SparseMat<T> &rhs); \
+template SparseMat<T>::SparseMat(const NRMat<T> &rhs); \
+template SparseMat<T>::SparseMat(const NRSMat<T> &rhs); \
+template void SparseMat<T>::transposeme(); \
+template SparseMat<T> & SparseMat<T>::operator*=(const T a); \
+template void SparseMat<T>::setunsymmetric(); \
+template SparseMat<T> & SparseMat<T>::operator=(const T a); \
+template SparseMat<T> & SparseMat<T>::operator+=(const T a); \
+template SparseMat<T> & SparseMat<T>::operator-=(const T a); \
+template NRMat<T>::NRMat(const SparseMat<T> &rhs); \
+template NRSMat<T>::NRSMat(const SparseMat<T> &rhs); \
+template NRVec<T>::NRVec(const SparseMat<T> &rhs); \
+template const NRVec<T> SparseMat<T>::operator*(const NRVec<T> &vec) const; \
+template const NRVec<T> NRVec<T>::operator*(const SparseMat<T> &mat) const; \
+template SparseMat<T> & SparseMat<T>::join(SparseMat<T> &rhs); \
+template const T SparseMat<T>::trace() const; \
+template const T SparseMat<T>::norm(const T scalar) const; \
+template void SparseMat<T>::axpy(const T alpha, const SparseMat<T> &x, const bool transp); \
+template const SparseMat<T> SparseMat<T>::operator*(const SparseMat<T> &rhs) const; \
+template const T SparseMat<T>::dot(const SparseMat<T> &rhs) const; \
+template void SparseMat<T>::gemm(const T beta, const SparseMat<T> &a, const char transa, const SparseMat<T> &b, const char transb, const T alpha); \
+template void NRVec<T>::gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec<T> &x);\
+
+
+INSTANTIZE(double)
+
+
+// some functions are not OK for hermitean! INSTANTIZE(complex<double>)
+
+#endif
diff --git a/sparsemat.h b/sparsemat.h
new file mode 100644
index 0000000..96468ee
--- /dev/null
+++ b/sparsemat.h
@@ -0,0 +1,220 @@
+//for vectors and dense matrices we shall need
+#include "la.h"
+
+template<class T>
+inline const T MAX(const T &a, const T &b)
+        {return b > a ? (b) : (a);}
+
+template<class T>
+inline void SWAP(T &a, T &b)
+        {T dum=a; a=b; b=dum;}
+
+
+//threshold for neglecting elements, if not defined, no tests are done except exact zero test in simplify - might be even faster
+//seems to perform better with a threshold, in spite of abs() tests
+#define  SPARSEEPSILON 1e-13 
+
+typedef unsigned int SPMatindex;
+typedef int SPMatindexdiff; //more clear would be to use traits
+
+//element of a linked list
+template<class T>
+struct matel
+        {
+        T elem;
+        SPMatindex row;
+        SPMatindex col;
+        matel *next;
+        };
+
+
+template <class T>
+class SparseMat {
+protected:
+	SPMatindex nn;
+        SPMatindex mm;
+	bool symmetric;
+	unsigned int nonzero;
+        int *count;
+	matel<T> *list;
+private:
+	matel<T> **rowsorted; //NULL terminated
+	matel<T> **colsorted; //NULL terminated
+	void unsort();
+	void deletelist();
+	void copylist(const matel<T> *l);
+public:
+	//iterator
+        typedef class iterator {
+        private:
+                matel<T> *p;
+        public:
+                iterator() {};
+                ~iterator() {};
+                iterator(matel<T> *list): p(list) {};
+                bool operator==(const iterator rhs) const {return p==rhs.p;}
+                bool operator!=(const iterator rhs) const {return p!=rhs.p;}
+                iterator operator++() {return p=p->next;}
+                iterator operator++(int) {matel<T> *q=p; p=p->next; return q;}
+                matel<T> & operator*() const {return *p;}
+                matel<T> * operator->() const {return p;}
+        };
+        iterator begin() const {return list;}
+        iterator end() const {return NULL;}
+
+	//constructors etc.
+	inline SparseMat() :nn(0),mm(0),symmetric(0),nonzero(0),count(NULL),list(NULL),rowsorted(NULL),colsorted(NULL) {};
+	inline SparseMat(const SPMatindex n, const SPMatindex m) :nn(n),mm(m),symmetric(0),nonzero(0),count(new int(1)),list(NULL),rowsorted(NULL),colsorted(NULL) {};
+	SparseMat(const SparseMat &rhs); //copy constructor
+	inline int getcount() const {return count?*count:0;}
+	explicit SparseMat(const NRMat<T> &rhs); //construct from a dense one
+	explicit SparseMat(const NRSMat<T> &rhs); //construct from a dense symmetric one
+	SparseMat & operator=(const SparseMat &rhs);
+	SparseMat & operator=(const T a);          //assign a to diagonal
+    	SparseMat & operator+=(const T a);         //assign a to diagonal
+	SparseMat & operator-=(const T a);         //assign a to diagonal
+        SparseMat & operator*=(const T a);         //multiply by a scalar
+        SparseMat & operator+=(const SparseMat &rhs);
+	SparseMat & addtriangle(const SparseMat &rhs, const bool lower, const char sign);
+        SparseMat & join(SparseMat &rhs); //more efficient +=, rhs will be emptied
+        SparseMat & operator-=(const SparseMat &rhs);
+	inline const SparseMat operator+(const T &rhs) const {return SparseMat(*this) += rhs;}
+        inline const SparseMat operator-(const T &rhs) const {return SparseMat(*this) -= rhs;}
+        inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;}
+        inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general
+        inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general
+	const NRVec<T> multiplyvector(const NRVec<T> &rhs, const bool transp=0) const; //sparse matrix * dense vector optionally transposed
+	inline const NRVec<T> operator*(const NRVec<T> &rhs) const {return multiplyvector(rhs);} //sparse matrix * dense vector
+	const SparseMat operator*(const SparseMat &rhs) const; 
+        void gemm(const T beta, const SparseMat &a, const char transa, const SparseMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this, if this is symemtric, only half will be added onto it
+	const T dot(const SparseMat &rhs) const; //supervector dot product
+	const T dot(const NRMat<T> &rhs) const; //supervector dot product
+	inline ~SparseMat();
+	void axpy(const T alpha, const SparseMat &x, const bool transp=0); // this+= a*x(transposed)
+	inline matel<T> *getlist() const {return list;}
+	void setlist(matel<T> *l) {list=l;}
+	inline SPMatindex nrows() const {return nn;}
+        inline SPMatindex ncols() const {return mm;}
+	void resize(const SPMatindex n, const SPMatindex m);
+	void transposeme();
+	const SparseMat transpose() const;
+	inline void setsymmetric() {if(nn!=mm) laerror("non-square cannot be symmetric"); symmetric=1;}
+	inline void defineunsymmetric() {symmetric=0;} //just define and do nothing with it
+	void setunsymmetric();//unwind the matrix assuming it was indeed symmetric
+	inline bool issymmetric() const {return symmetric;}
+	unsigned int length() const;
+	void copyonwrite();
+	void simplify();
+	const T trace() const;
+	const T norm(const T scalar=(T)0) const; //is const only mathematically, not in internal implementation - we have to simplify first
+	unsigned int sort(int type) const;
+	inline void add(const SPMatindex n, const SPMatindex m, const T elem) {matel<T> *ltmp= new matel<T>; ltmp->next=list; list=ltmp; list->row=n; list->col=m; list->elem=elem;}
+	void addsafe(const SPMatindex n, const SPMatindex m, const T elem);
+};
+
+template <class T>
+	extern istream& operator>>(istream  &s, SparseMat<T> &x);
+
+template <class T>
+	extern ostream& operator<<(ostream &s, const SparseMat<T> &x);
+
+//destructor
+template <class T>
+SparseMat<T>::~SparseMat()
+{
+	unsort();
+        if(!count) return;
+        if(--(*count)<=0)
+                {
+		deletelist();
+                delete count;
+                }
+}
+
+//copy constructor (sort arrays are not going to be copied)
+template <class T>
+SparseMat<T>::SparseMat(const SparseMat<T> &rhs)
+{
+#ifdef debug
+if(! &rhs) laerror("SparseMat copy constructor with NULL argument");
+#endif
+        nn=rhs.nn;
+        mm=rhs.mm;
+	symmetric=rhs.symmetric;
+	if(rhs.list&&!rhs.count) laerror("some inconsistency in SparseMat contructors or assignments");
+        list=rhs.list;
+        if(list) {count=rhs.count; (*count)++;} else count=new int(1); //make the matrix defined, but empty and not shared
+	colsorted=rowsorted=NULL;
+	nonzero=0;
+}
+
+template <class T>
+const SparseMat<T> SparseMat<T>::transpose() const
+{
+if(list&&!count) laerror("some inconsistency in SparseMat transpose");
+SparseMat<T> result;
+result.nn=mm;
+result.mm=nn;
+result.symmetric=symmetric;
+if(result.symmetric) 
+	{
+	result.list=list;
+        if(list) {result.count=count; (*result.count)++;} else result.count=new int(1); //make the matrix defined, but empty and not shared
+	}
+else //really transpose it
+	{
+	result.count=new int(1);
+	result.list=NULL;
+	matel<T> *l =list;
+	while(l)
+		{
+		result.add(l->col,l->row,l->elem);
+		l=l->next;
+		}
+	}
+result.colsorted=result.rowsorted=NULL;
+result.nonzero=0;
+return result;
+}
+
+
+
+template<class T>
+inline const SparseMat<T> commutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+SparseMat<T> r;
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1); //saves a temporary and simplifies the whole sum
+return r;
+}
+
+template<class T>
+inline const SparseMat<T> anticommutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
+{
+SparseMat<T> r;
+r.gemm((T)0,x,trx?'t':'n',y,tryy?'t':'n',(T)1);
+r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)1); //saves a temporary and simplifies the whole sum
+return r;
+}
+
+//add sparse to dense
+template<class T>
+NRMat<T> & NRMat<T>::operator+=(const SparseMat<T> &rhs)
+{
+if(nn!=rhs.nrows()||mm!=rhs.ncols()) laerror("incompatible matrices in +=");
+matel<T> *l=rhs.getlist();
+bool sym=rhs.issymmetric();
+while(l)
+        {
+#ifdef MATPTR
+        v[l->row][l->col] +=l->elem;
+        if(sym && l->row!=l->col) v[l->col][l->row] +=l->elem;
+#else
+        v[l->row*mm+l->col] +=l->elem;
+         if(sym && l->row!=l->col) v[l->col*mm+l->row] +=l->elem;
+#endif
+        l=l->next;
+        }
+}
+
+
diff --git a/sparsemat_traits.h b/sparsemat_traits.h
new file mode 100644
index 0000000..c2bcf35
--- /dev/null
+++ b/sparsemat_traits.h
@@ -0,0 +1,15 @@
+////////////////////////////////////////////////////////////////////////////
+//traits classes
+
+#ifndef _SPARSEMAT_TRAITS_INCL
+#define _SPARSEMAT_TRAITS_INCL
+
+
+template<> struct NRMat_traits<SparseMat<double> > {
+typedef double elementtype;
+typedef SparseMat<double> producttype;
+static double norm (const SparseMat<double> &x) {return x.norm();}
+static void axpy (SparseMat<double>&s, const SparseMat<double> &x, const double c) {s.axpy(c,x);}
+};
+
+#endif
diff --git a/strassen.cc b/strassen.cc
new file mode 100644
index 0000000..209248b
--- /dev/null
+++ b/strassen.cc
@@ -0,0 +1,31 @@
+#include "la.h"
+/*Strassen algorithm*/
+// called routine is fortran-compatible
+extern "C" void fmm(const char c_transa,const char c_transb,const int m,const int n,const int k,const double alpha,
+                const double *a,const int lda,const double *b,const int ldb,const double beta,double *c,const int ldc,
+                double *d_aux,int i_naux);
+extern "C" void strassen_cutoff(int c, int c1, int c2, int c3);
+
+void NRMat<double>::s_cutoff(const int c, const int c1, const int c2, const int c3) const
+{ strassen_cutoff(c,c1,c2,c3);}
+void NRMat<double>::strassen(const double beta, const NRMat<double> &a, const char transa, const NRMat<double> &b, const char transb, const double alpha)
+{
+int l(transa=='n'?a.nn:a.mm);
+int k(transa=='n'?a.mm:a.nn);
+int kk(transb=='n'?b.nn:b.mm);
+int ll(transb=='n'?b.mm:b.nn);
+
+if(l!=nn|| ll!=mm||k!=kk) laerror("incompatible (or undefined size) matrices in strassen");
+
+copyonwrite();
+//swap transpositions and order of matrices
+fmm(transb,transa,mm,nn,k,alpha,b,b.mm, a, a.mm, beta,*this, mm,NULL,0);
+}
+
+//stub for f77 blas called from strassen routine
+extern "C" void xerbla_(const char *msg)
+{
+laerror(msg);
+}
+
+
diff --git a/t.cc b/t.cc
new file mode 100644
index 0000000..aca14ee
--- /dev/null
+++ b/t.cc
@@ -0,0 +1,775 @@
+// g++ -D _GLIBCPP_NO_TEMPLATE_EXPORT -g testblas.cc testblas2.cc nrutil_modif.cc -L/usr/local/lib/atlas -lstrassen -lf77blas -lcblas -latlas -ltraceback -lbfd -liberty
+
+#include <time.h>
+#include "la.h"
+#include "traceback.h"
+#include "sparsemat.h"
+#include "matexp.h"
+#include "fourindex.h"
+
+
+extern void test(const NRVec<double> &);
+
+double ad; 
+void f1(const double *c)
+{
+ad=*c;
+}
+
+void f2(double *c)
+{
+*c=ad;
+}
+
+
+inline int randind(const int n)
+{
+return int(random()/(1.+RAND_MAX)*n);
+}
+
+complex<double> mycident (const complex<double>&x) {return x;}
+
+
+int main()
+{
+sigtraceback(SIGSEGV,1);
+sigtraceback(SIGABRT,1);
+sigtraceback(SIGBUS,1);
+sigtraceback(SIGFPE,1);
+NRVec<double> x(1.,10);
+NRVec<double> y(2.,10);
+NRVec<double> z(-2.,10);
+
+y.axpy(3,x);
+
+y+=z;
+/*
+cout <<y;
+NRVec<double> a(x);
+
+NRVec<double> b;
+b|=x;
+
+NRVec<double> c;
+c=a;
+
+y =10. *y  ;
+
+int i;
+for(i=0;i<y.size();i++) cout <<y[i] <<" ";
+cout <<"\n";
+
+cout << y*z <<"\n";
+
+z|=x;
+z[1]=5;
+
+cout <<"zunit= "<<z.unitvector()<<"\n";
+cout <<"z= "<<z<<"\n";
+test(x);
+
+x = x*5;
+
+
+cout <<"x= "<<x<<"\n";
+cout <<"y= "<<y<<"\n";
+
+NRVec<double> u;
+u=x+y;
+cout <<"u= "<<u<<"\n";
+
+NRMat<double> aa(0.,3,3);
+aa[0][0]=aa[1][1]=aa(2,2)=2.;
+
+NRMat<double> bb(aa);
+
+double *p;
+aa.copyonwrite(); p= &aa[2][2];
+*p=3.;
+bb.copyonwrite(); bb(0,2)=1.;
+
+cout << "aa= " <<aa <<"\n";
+cout << "bb= " <<bb <<"\n";
+cout <<"aa trace "<<aa.trace() <<"\n";
+cout << "bbt= " <<bb.transpose() <<"\n";
+NRMat<double> cc=aa & bb;
+cout << "aa o+ bb= " << cc <<"\n";
+cout << cc.rsum() <<"\n";
+cout << cc.csum() <<"\n";
+
+NRVec<double>w(3);
+w[0]=1; w[1]=2;w[2]=3;
+NRVec<double> v(0.,3);
+v.gemv(0.,bb,'n',1.,w);
+cout << " v= " <<v <<"\n";
+v.gemv(0.,bb,'t',1.,w);
+cout << " v= " <<v <<"\n";
+
+*/
+/*
+const int n=6000;
+NRMat<double> bb(1.,n,n);
+for(int i=0;i<n;i++) for(int j=0;j<n;j++) bb[i][j]=2.;
+for(int i=0;i<n;i++) for(int j=0;j<i;j++) {double t; t=bb[i][j] +bb[j][j]; bb[i][j]=t;bb[j][i]=t;}
+*/
+
+/*
+NRMat<double> amat,bmat,cmat;
+cin >>amat;
+cin >>bmat;
+cmat=amat*bmat;
+cout<<cmat;
+cmat.copyonwrite(); cmat[0][0]=0;
+NRMat<double> amat(1.,2,2);
+NRMat<double> bmat(amat);
+NRMat<double> dmat(amat);
+//NRMat<double>  cmat; cmat=bmat*2.;
+NRMat<double>  cmat(bmat*2); //more efficient
+dmat.copyonwrite(); dmat[0][0]=0;
+
+cout<<amat;
+cout<<bmat;
+cout<<cmat;
+cout<<dmat;
+
+
+NRMat<double> amat;
+NRVec<double>  avec;
+
+cin >>amat;
+cin >>avec;
+
+cout << amat*avec;
+cout << avec*amat;
+
+NRVec<double> avec(0.,10);
+
+f1(avec);
+f2(avec);
+
+NRVec<double> uu(3);
+uu[0]=1; uu[1]=2; uu[2]=3;
+cout << uu << (uu|uu) <<"\n";
+
+NRSMat<double> sa(0.,3);
+sa(0,0)=1; sa(0,2)=5; sa(2,2)=10;sa(1,0)=2;sa(1,1)=3; sa(2,1)=-1;
+
+NRSMat<double> sb(0.,3);
+sb(0,0)=-2; sb(0,2)=1; sb(2,2)=2;sb(1,0)=-1;sb(1,1)=7; sb(2,1)=3;
+
+cout << "symetr\n" <<sa << -sa <<"\n";
+cout << "symetr\n" <<sb <<"\n";
+
+cout << "sa*sb\n" << sa*sb <<"\n";
+cout << "sb*sa\n" << sb*sa <<"\n";
+
+NRMat<double> m10(10.,3,3);
+ cout << "10 + sa" << m10 + sa <<"\n";
+*/
+
+/*
+
+const int dim=256;
+NRMat<double> big1(dim,dim),big2(dim,dim),big3;
+for(int i=0;i<dim;i++)
+	for(int j=0;j<dim;j++)
+		{
+		big1[i][j]=i*i+j*j*j-3*j;
+		big2[i][j]=i*i/(j+1)+j*j-3*j;
+		}
+double t=clock()/((double) (CLOCKS_PER_SEC));
+big3= big1*big2;
+cout <<" big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
+
+*/
+
+/*
+NRMat<double> atest, btest,ctest;
+{
+int cc,c1,c2,c3;
+cin >>cc>>c1>>c2>>c3;
+atest.s_cutoff(cc,c1,c2,c3);
+}
+cin>>atest;
+cin>>btest;
+
+NRMat<double> dtest(atest.nrows(),btest.ncols());
+dtest.gemm(0., atest, 't', btest, 'n', 1.);
+cout << dtest;
+
+NRMat<double> etest(atest.nrows(),btest.ncols());
+etest.strassen(0., atest, 't', btest, 'n', 1.);
+cout << etest;
+*/
+
+if(0)
+{
+int dim;
+cin >>dim;
+NRMat<double> big1(dim,dim),big2(dim,dim),big3,big4(dim,dim);
+for(int i=0;i<dim;i++)
+        for(int j=0;j<dim;j++)
+                {
+                big1[i][j]=i*i+j*j*j-3*j;
+                big2[i][j]=i*i/(j+1)+j*j-3*j;
+                }
+double t=clock()/((double) (CLOCKS_PER_SEC));
+big3= big1*big2;
+cout <<" classical big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
+
+for (int c=64; c<=512;c+=64)
+	{
+	big4.s_cutoff(c,c,c,c);
+	t=clock()/((double) (CLOCKS_PER_SEC));
+	big4.strassen(0., big1, 'n', big2, 'n', 1.);
+	cout <<"cutoff "<<c<<" big1*big2 "<<big4[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-t <<"\n";
+	}
+}
+
+if(0)
+{
+NRMat<double> a(3,3),b;
+NRVec<double> v(3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= i*i+j; v[i]=10-i;}
+b=a;
+b*= sin(1.)+1;
+cout << a <<v;
+a.diagmultl(v);
+cout << a;
+b.diagmultr(v);
+cout << b;
+}
+
+if(0)
+{
+NRMat<double> a(3,3),b;
+NRVec<double> v(10);
+v[0]=2;v[1]=3;v[2]=1;v[3]=-3;v[4]=2;v[5]=-1;v[6]=3;v[7]=-2;v[8]=1;v[9]=1;
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
+cout <<a;
+cout << a.norm() <<"\n";
+b=a*a;
+cout << b.norm() <<"\n";
+cout << exp(a);
+cout << exp(a.norm()) <<"\n";
+cout << ipow(a,3);
+cout<<ipow(a,11);
+cout <<commutator(a,b);
+
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
+NRSMat<double> b(a);
+NRMat<double> c(b);
+cout <<a;
+cout <<b;
+cout <<c;
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+a[0][0]=1; a[0][1]=2;a[0][2]=3;
+a[1][0]=4; a[1][1]=-5;a[1][2]=7;
+a[2][0]=-3;a[2][1]=10;a[2][2]=2;
+NRMat<double> b(2,3);
+b[0][0]=1;b[0][1]=2;b[0][2]=3;
+b[1][0]=2;b[1][1]=4;b[1][2]=6;
+cout <<a;
+cout <<b;
+linear_solve(a,&b);
+cout <<a;
+cout <<b;
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a[i][j]= (i+j)/10.; }
+NRVec<double> b(3);
+cout <<a;
+diagonalize(a,b);
+cout <<a;
+cout <<b;
+}
+
+if(0)
+{
+NRSMat<double> a(3);
+NRMat<double>v(3,3);
+for(int i=0;i<3;i++) for(int j=0;j<3;j++) { a(i,j)= (i+j)/10.; }
+NRVec<double> b(3);
+cout <<a;
+NRMat<double>c=(NRMat<double>)a; //nebo NRMat<double>c(a);
+NRMat<double>d=exp(c);
+diagonalize(a,b,&v);
+cout <<b;
+cout <<v;
+cout <<d;
+diagonalize(d,b);
+cout <<b;
+cout <<d;
+}
+
+if(0)
+{
+NRMat<double> a;
+cin >>a ;
+NRMat<double> b=a.transpose();
+NRMat<double> u(a.nrows(),a.nrows()),v(a.ncols(),a.ncols());
+NRVec<double>s(a.ncols());
+singular_decomposition(a,&u,s,&v);
+//singular_decomposition(a,NULL,s,NULL); //this does not work when linked with static version of lapack, works with .so.3 version (from suse distrib)
+cout <<u;
+cout <<s;
+cout <<v;
+//singular_decomposition(b,&v,s,&u);
+//cout <<v;
+//cout <<s;
+//cout <<u;
+}
+
+if(0)
+{
+//diagonalize a general matrix and reconstruct it back; assume real eigenvalues
+//double aa[]={1,2,3,4,-5,7,-3,10,2};
+//NRMat<double> a(aa,3,3);
+NRMat<double> a;
+cin >>a;
+cout <<a ;
+int n=a.nrows();
+NRMat<double> u(n,n),v(n,n);
+NRVec<double>wr(n),wi(n);
+gdiagonalize(a,wr,wi,&u,&v,0);
+cout <<u;
+cout <<wr;
+cout <<wi;
+cout <<v;
+
+NRVec<double>z=diagofproduct(u,v,1);
+for(int i=0;i<a.nrows();++i) wr[i]/=z[i];//account for normalization of eigenvectors
+u.diagmultl(wr);
+v.transposeme();
+cout <<v*u;
+
+}
+
+if(0)
+{
+//diagonalize a general matrix and reconstruct it back; allow complex eigenvalues
+NRMat<double> a;
+cin >>a;
+cout <<a ; 
+int n=a.nrows();
+NRMat<complex<double> > u(n,n),v(n,n);
+NRVec<complex<double> >w(n);
+gdiagonalize(a,w,&u,&v);
+cout <<u;
+cout <<w;
+cout <<v;
+
+NRVec<complex<double> >z=diagofproduct(u,v,1,1);
+//NRMat<complex<double> > zz=u*v.transpose(1);
+cout <<z;
+//cout <<zz;
+for(int i=0;i<a.nrows();++i) w[i]/=z[i];//account for normalization of eigenvectors
+u.diagmultl(w);
+cout <<v.transpose(1)*u;
+
+}
+
+
+
+if(0)
+{
+SparseMat<double> a(4,4);
+NRVec<double> v(4);
+v[0]=1;v[1]=2;v[2]=3;v[3]=4;
+a=1.;
+a.copyonwrite();
+a.add(3,0,.5);
+a.add(0,2,.2);
+a.add(2,1,.1);
+a.add(3,3,1.);
+a.add(1,1,-1.);
+SparseMat<double> c(a);
+c*=10.;
+cout <<a;
+a.simplify();
+cout <<a;
+cout <<c;
+NRMat<double>b(c);
+cout <<b;
+cout << b*v;
+cout <<c*v;
+cout <<v*b;
+cout <<v*c;
+}
+
+if(0)
+{
+SparseMat<double> a(4,4),b(4,4);
+a=1.;
+a.copyonwrite();
+a.add(3,0,.5);
+b.add(0,2,.2);
+b.add(2,1,.1);
+b.add(3,3,1.);
+b.add(1,1,-1.);
+SparseMat<double>c=a+b;
+cout <<c;
+a.join(b);
+cout<<a;
+cout<<b;
+}
+
+if(0)
+{
+SparseMat<double> a(4,4),b(4,4);
+a=0.; b=2;
+a.add(3,0,.5);
+a.add(0,2,.2);
+a.add(1,1,1);
+a.add(1,0,.2);
+b.add(2,1,.1);
+b.add(3,3,1.);
+b.add(1,1,-1.);
+NRMat<double> aa(a),bb(b);
+SparseMat<double>c;
+NRMat<double>cc;
+//cout << NRMat<double>(c);
+//cout <<cc;
+//cout <<"norms "<<c.norm()<<" " <<cc.norm()<<endl;
+cout <<"original matrix \n"<<aa;
+cout <<(cc=exp(aa));
+c=exp(a);
+cout <<NRMat<double>(c);
+cout <<"norms2 "<<c.norm()<<" " <<cc.norm()<<endl;
+}
+
+#define sparsity (n/4)
+if(0)
+{
+for(int n=8; n<=1024*1024;n+=n)
+	{
+	SparseMat<double> aa(n,n);
+	cout << "\n\n\ntiming for size "<<n<<endl;
+	if(n<=512) {
+	NRMat<double> a(0.,n,n);
+	for(int i=0; i<sparsity;i++) a(randind(n),randind(n))=random()/(1.+RAND_MAX);
+	double t0=clock()/((double) (CLOCKS_PER_SEC));	
+	//cout <<a;
+	NRMat<double> b(exp(a));
+	//cout <<b;
+	cout <<"dense norm "<<b.norm() <<"\n";
+	cout << "test commutator " <<commutator(a,b).norm() <<endl;
+	double t1=clock()/((double) (CLOCKS_PER_SEC));    
+	cout << "dense time " <<n<<' '<< t1-t0 <<endl;
+	aa=SparseMat<double>(a);
+	}
+	else
+	{
+	for(int i=0; i<sparsity;i++) aa.add(randind(n),randind(n),random()/(1.+RAND_MAX));
+	}
+	//cout <<aa;
+	double t2=clock()/((double) (CLOCKS_PER_SEC));        
+	SparseMat<double> bb(exp(aa));
+	//cout <<bb;
+	cout <<"sparse norm "<<bb.norm() <<"\n";
+	cout << "test commutator " <<commutator(aa,bb).norm() <<endl;
+        double t3=clock()/((double) (CLOCKS_PER_SEC));
+	 cout <<"sparse length "<<bb.length()<<"\n";
+        cout << "sparse time "<<n<<' ' << t3-t2 <<endl;
+	}
+}
+
+if(1)
+{
+int n;
+cin>>n;
+	SparseMat<double> aa(n,n);
+	for(int i=0; i<sparsity;i++) aa.add(randind(n),randind(n),random()/(1.+RAND_MAX));
+	SparseMat<double> bb=exp(aa);
+	NRVec<double> v(n);
+	 for(int i=0; i<n;++i) v[i]=random()/(1.+RAND_MAX);
+	NRVec<double> res1=bb*v;
+	NRVec<double> res2=exptimes(aa,v);
+	cout <<"difference = "<<(res1-res2).norm()<<endl;
+}
+
+
+if(0)
+{
+SparseMat<double> a(4,4),b(4,4),d;
+a=0.; b=2;
+a.add(3,0,.5);
+a.add(0,2,.2);
+a.add(1,1,1);
+a.add(1,0,.2);
+b.add(2,1,.1);
+b.add(3,3,1.);
+b.add(1,1,-1.);
+NRMat<double> aa(a),bb(b),dd;
+SparseMat<double>c;
+NRMat<double>cc;
+
+c=commutator(a,b);
+cc=commutator(aa,bb);
+
+cout <<cc;
+cout <<NRMat<double>(c);
+cout <<"norms2 "<<c.norm()<<" " <<cc.norm()<<endl;
+}
+
+/*
+NRVec<double> v(10.,10);
+v+= 5.;
+cout <<v;
+*/
+if(0)
+{
+const int n=3;
+NRMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<i;++j)
+	{
+	a(i,j)= random()/(1.+RAND_MAX);
+	a(j,i)= -a(i,j);
+	}
+NRMat<double> b; b|=a;
+NRVec<double> er(n),ei(n);
+NRMat<double> vr(n,n),vl(n,n);
+gdiagonalize(b,er,ei,&vl,&vr);
+cout <<er<<ei;
+cout <<"left eivec\n"<<vl <<"right eivec\n"<<vr;
+NRMat<double> u=exp(a*.125);
+cout <<"norms "<<u.norm() << ' '<<(u-1.).norm()<<endl;
+gdiagonalize(u,er,ei,&vl,&vr);
+cout <<er<<ei;
+cout <<"left eivec\n"<<vl <<"right eivec\n"<<vr;
+}
+
+if(0)
+{
+/*
+int n;
+cin>>n;
+NRMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<i;++j)
+        {
+        a(i,j)= random()/(1.+RAND_MAX);
+        a(j,i)= -a(i,j);
+        }
+NRMat<double> b=exp(a);
+cout <<a;
+*/
+NRMat<double> a,b;
+cin >>b;
+int n=b.nrows();
+cout <<"difference from identity = "<<b.norm(1.)<<endl;
+
+NRMat<double> x(0.,n,n),x0;
+	double r;
+int i=0;
+do
+	{
+	x0=x;
+	NRMat<double> y=exp(x*-.5);
+	x+= y*b*y; 
+	x-= 1.;
+	x=(x-x.transpose())*.5;
+	cout <<"matrix x\n"<<x;
+	cout <<"iter "<<i <<" residue "<< (r=(exp(x)-b).norm())<<endl;
+	cout <<"iter "<<i <<" conv "<<(r=(x-x0).norm())<<endl;
+	++i;
+	} while(abs(r)>1e-10);
+cout <<"result\n"<<x<<endl;
+cout <<"exp(result)"<<exp(x)<<endl;
+NRMat<double> c=log(b); //matrixfunction(a,&mycident,1);
+cout <<c;
+NRMat<double> d=exp(c);
+cout <<"exp(log(x))\n"<<d;
+cout<<(d-b).norm()<<endl;
+
+}
+
+if(0)
+{
+int n;
+cin>>n;
+NRMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
+        {
+        a(i,j)= .1*random()/(1.+RAND_MAX);
+        a(j,i)= a(i,j);
+        }
+NRMat<double> b=exp(a);
+NRMat<double> s=exp(a*.5);
+NRMat<double> y(0.,n,n);
+NRMat<double> z(0.,n,n);
+        double r;
+int i=0;
+y=b;z=1.;
+cout << "norm = "<<b.norm(1.)<<endl;
+do
+        {
+	NRMat<double> tmp=z*y*-1.+3.;
+	NRMat<double> ynew=y*tmp*.5;
+	z=tmp*z*.5;
+	y=ynew;
+        cout <<"iter "<<i <<" residue "<< (r=(y-s).norm())<<endl;
+        ++i;
+        } while(abs(r)>1e-10);
+}
+
+
+if(0)
+{
+int n=3;
+NRMat<double> a(n,n);
+ a(0,0)=1.;
+        a(0,1)=2.;
+        a(1,0)=2.;
+        a(1,1)=6.;
+a(2,2)=-4;
+a(0,2)=1;
+cout <<a;
+double d;
+NRMat<double> c=inverse(a,&d);
+cout <<a<<c;
+}
+
+if(0)
+{
+NRMat<double> a(3,3);
+NRMat<double> b=a;
+for(int i=1; i<4;i++) b=b*b;
+}
+
+if(0)
+{
+NRMat<double> a;
+cin >>a;
+NRMat<double> b=exp(a);
+NRMat<double> c=log(b);
+cout <<a;
+cout <<b;
+cout <<c;
+cout << (b-exp(c)).norm() <<endl;
+}
+
+if(00)
+{
+NRMat<double> a;
+cin >>a;
+NRMat<double> c=log(a); //matrixfunction(a,&mycident,1);
+cout <<c;
+NRMat<double> b=exp(c);
+cout <<"exp(log(x))\n"<<b;
+cout<<(b-a).norm()<<endl;
+}
+
+if(0)
+{
+//check my exponential with respect to spectral decomposition one
+NRSMat<double> a;
+cin >>a;
+NRMat<double> aa(a);
+NRMat<double> b=exp(aa);
+NRMat<double> c=matrixfunction(a,&exp);
+cout <<a;
+cout <<b;
+cout <<c;
+cout << (b-c).norm()/b.norm() <<endl;
+}
+
+if(0)
+{
+//verify BCH expansion
+NRMat<double> h;
+NRMat<double> t;
+cin >>h;
+cin >>t;
+NRMat<double> r1= exp(-t) * h * exp(t);
+NRMat<double> r2=BCHexpansion(h,t,30);
+cout <<r1;
+cout <<r2;
+cout <<"error = "<<(r1-r2).norm()<<endl;
+}
+
+if(0)
+{
+int n;
+cin >>n;
+SparseMat<double> a(n,n);
+for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
+        {
+        a.add(i,j,random()/(1.+RAND_MAX));
+        }
+a.setsymmetric();
+NRSMat<double> aa(a); 
+NRMat<double> aaa(a); 
+NRVec<double> w(n);
+NRMat<double> v(n,n);
+//cout <<aa;
+diagonalize(aa, w, &v,0);
+//cout <<w;
+//cout <<v;
+//cout << v*aaa*v.transpose(); 
+cout <<  (v*aaa*v.transpose() - diagonalmatrix(w)).norm()<<endl;
+}
+
+if(0)
+{
+NRMat<complex<double> > a;
+cin >>a;
+NRMat<complex<double> > b=exp(a);
+cout <<b;
+}
+
+if(0)
+{
+int n;
+cin >>n;
+//NRMat<double> a(n,n);
+NRSMat<double> a(n);
+for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
+        {
+        a(j,i)=a(i,j)=random()/(1.+RAND_MAX);
+        }
+cout <<a;
+NRMat<double> y(1,n);
+for(int i=0;i<n;++i) y(0,i)=random()/(1.+RAND_MAX);
+cout <<y;
+linear_solve(a,&y);
+cout << y;
+}
+
+if(0)
+{
+int n;
+cin >>n;
+SparseMat<double> a(n,n);
+int spars=n*n/3;
+        for(int i=0; i<spars;i++) a.add(randind(n),randind(n),random()/(1.+RAND_MAX));
+
+NRMat<double> aa(a);
+NRVec<double> v(aa[0],n*n);
+
+cout <<a;
+cout <<aa;
+cout <<v;
+
+cout <<"test "<<aa.dot(aa)<<endl;
+cout <<"test "<<v*v<<endl;
+cout <<"test "<<a.dot(aa)<<endl;
+cout <<"test "<<a.dot(a)<<endl;
+
+}
+
+}
+
+
+
diff --git a/vec.cc b/vec.cc
new file mode 100644
index 0000000..3187d25
--- /dev/null
+++ b/vec.cc
@@ -0,0 +1,348 @@
+#include <iostream>
+#include "vec.h"
+
+//////////////////////////////////////////////////////////////////////////////
+//// forced instantization in the corespoding object file
+#define INSTANTIZE(T) \
+template ostream & operator<<(ostream &s, const NRVec< T > &x); \
+template istream & operator>>(istream  &s, NRVec< T > &x); \
+
+INSTANTIZE(double)
+INSTANTIZE(complex<double>)
+template NRVec<double>;
+template NRVec< complex<double> >;
+
+
+/*
+ * Templates first, specializations for BLAS next
+ */
+
+// conversion ctor
+#ifndef MATPTR
+template <typename T>
+NRVec<T>::NRVec(const NRMat<T> &rhs)
+{
+	nn = rhs.nn*rhs.mm;
+	v = rhs.v;
+	count = rhs.count;
+	(*count)++;
+}
+#endif
+
+// dtor
+template <typename T>
+NRVec<T>::~NRVec()
+{
+	if(!count) return;
+	if(--(*count) <= 0) {
+		if(v) delete[] (v);
+		delete count;
+	}
+}
+
+// detach from a physical vector and make own copy
+template <typename T>
+void NRVec<T>::copyonwrite()
+{
+#ifdef DEBUG
+  if(!count) laerror("probably an assignment to undefined vector");
+#endif
+  if(*count > 1)
+  {
+    (*count)--;
+    count = new int;
+    *count = 1;
+    T *newv = new T[nn];
+    memcpy(newv, v, nn*sizeof(T));
+    v = newv;
+  }
+}
+
+// Asignment
+template <typename T>
+NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
+{
+  if (this != &rhs)
+  {
+    if(count)
+      if(--(*count) == 0)
+      {
+        delete[] v;
+        delete count;
+      }
+    v = rhs.v;
+    nn = rhs.nn;
+    count = rhs.count;
+    if(count) (*count)++;
+  }
+  return *this;
+}
+
+// Resize
+template <typename T>
+void NRVec<T>::resize(const int n)
+{
+#ifdef DEBUG
+  if(n<=0) laerror("illegal vector dimension");
+#endif
+  if(count)
+    if(*count > 1) {
+      (*count)--;
+      count = 0;
+      v = 0;
+      nn = 0;
+    }
+  if(!count) {
+    count = new int;
+    *count = 1;
+    nn = n;
+    v = new T[nn];
+    return;
+  }
+  // *count = 1 in this branch
+  if (n != nn) {
+    nn = n;
+    delete[] v;
+    v = new T[nn];
+  }
+}
+
+// ostream << NRVec
+template <typename T>
+ostream & operator<<(ostream &s, const NRVec<T> &x)
+{
+  int i, n;
+
+  n = x.size();
+  s << n << endl;
+  for(i=0; i<n; i++) s << x[i] << (i == n-1 ? '\n' : ' ');
+  return s;
+}
+
+// istream >> NRVec
+template <typename T>
+istream & operator>>(istream &s, NRVec<T> &x)
+{
+  int i,n;
+
+  s >> n;
+  x.resize(n);
+  for(i=0; i<n; i++) s >> x[i];
+  return s;
+}
+
+// formatted print for NRVec
+template<typename T>
+void NRVec<T>::fprintf(FILE *file, const char *format, const int modulo) const
+{
+	lawritemat(file, v, 1, nn, format, 1, modulo, 0);
+}
+
+// formatted scan for NRVec
+template <class T>
+void NRVec<T>::fscanf(FILE *f, const char *format)
+{
+	int n;
+
+	if(std::fscanf(f, "%d", &n) != 1) laerror("cannot read vector dimension");
+	resize(n);
+	for (int i=0; i<n; i++)
+		if (std::fscanf(f, format, v+i) != 1)
+			laerror("cannot read the vector eleemnt");
+}
+
+// assignmet with a physical copy
+template <typename T>
+NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
+{
+	if (this != &rhs) {
+#ifdef DEBUG
+		if (!rhs.v) laerror("unallocated rhs in NRVec operator |=");
+#endif
+		if (count)
+			if (*count > 1) {
+				--(*count);
+				nn = 0;
+				count = 0;
+				v = 0;
+			}
+		if (nn != rhs.nn) {
+			if (v) delete[] (v);
+			nn = rhs.nn;
+		}
+		if(!v) v = new T[nn];
+		if(!count) count = new int;
+		*count = 1;
+		memcpy(v, rhs.v, nn*sizeof(T));
+	}
+	return *this;
+}
+
+// unary minus
+template <typename T>
+const NRVec<T> NRVec<T>::operator-() const
+{
+	NRVec<T> result(nn);
+	for (int i=0; i<nn; i++) result.v[i]= -v[i];
+	return result;
+}
+
+// axpy call for T = double (not strided)
+void NRVec<double>::axpy(const double alpha, const NRVec<double> &x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn, alpha, x.v, 1, v, 1);
+}
+
+// axpy call for T = complex<double> (not strided)
+void NRVec< complex<double> >::axpy(const complex<double> alpha, 
+			const NRVec< complex<double> > &x)
+{
+#ifdef DEBUG
+	if (nn != x.nn) laerror("axpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&alpha), (void *)(x.v), 1, (void *)v, 1);
+}
+
+// axpy call for T = double (strided)
+void NRVec<double>::axpy(const double alpha, const double *x, const int stride)
+{
+	copyonwrite();
+	cblas_daxpy(nn, alpha, x, stride, v, 1);
+}
+
+// axpy call for T = complex<double> (strided)
+void NRVec< complex<double> >::axpy(const complex<double> alpha, 
+			const complex<double> *x, const int stride)
+{
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&alpha), (void *)x, stride, v, 1);
+}
+
+// unary minus
+const NRVec<double> NRVec<double>::operator-() const
+{
+	NRVec<double> result(*this);
+	result.copyonwrite();
+	cblas_dscal(nn, -1.0, result.v, 1);
+	return result;
+}
+const NRVec< complex<double> > 
+NRVec< complex<double> >::operator-() const
+{
+	NRVec< complex<double> > result(*this);
+	result.copyonwrite();
+	cblas_zdscal(nn, -1.0, (void *)(result.v), 1);
+	return result;
+}
+
+// assignment of scalar to every element
+template <typename T>
+NRVec<T> & NRVec<T>::operator=(const T &a)
+{
+	copyonwrite();
+	if(a != (T)0)
+		for (int i=0; i<nn; i++) v[i] = a;
+	else
+		memset(v, 0, nn*sizeof(T));
+	return *this;
+}
+
+// Normalization of NRVec<double>
+NRVec<double> & NRVec<double>::normalize()
+{
+	double tmp;
+
+	tmp = cblas_dnrm2(nn, v, 1);
+#ifdef DEBUG
+	if(!tmp) laerror("normalization of zero vector");
+#endif
+	copyonwrite();
+	tmp = 1.0/tmp;
+	cblas_dscal(nn, tmp, v, 1);
+	return *this;
+}
+
+// Normalization of NRVec< complex<double> >
+NRVec< complex<double> > & NRVec< complex<double> >::normalize()
+{
+	complex<double> tmp;
+	tmp = cblas_dznrm2(nn, (void *)v, 1);
+#ifdef DEBUG
+	if(!(tmp.real()) && !(tmp.imag())) laerror("normalization of zero vector");
+#endif
+	copyonwrite();
+	tmp = 1.0/tmp;
+	cblas_zscal(nn, (void *)(&tmp), (void *)v, 1);
+	return *this;
+}
+
+// gemv call 
+void NRVec<double>::gemv(const double beta, const NRMat<double> &A, 
+		const char trans, const double alpha, const NRVec &x)
+{
+#ifdef DEBUG
+	if ((trans == 'n'?A.ncols():A.nrows()) != x.size())
+		laerror("incompatible sizes in gemv A*x");
+#endif
+	cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans),
+			A.nrows(), A.ncols(), alpha, A[0], A.ncols(), x.v, 1, beta, v, 1);
+}
+void NRVec< complex<double> >::gemv(const complex<double> beta,
+		const NRMat< complex<double> > &A, const char trans, 
+		const complex<double> alpha, const NRVec &x)
+{
+#ifdef DEBUG
+	if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) 
+		laerror("incompatible sizes in gemv A*x");
+#endif
+	cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), 
+			A.nrows(), A.ncols(), (void *)(&alpha), (void *)A[0], A.ncols(), 
+			(void *)x.v, 1, (void *)(&beta), (void *)v, 1);
+}
+
+// Vec * Mat
+const NRVec<double> NRVec<double>::operator*(const NRMat<double> &mat) const
+{
+#ifdef DEBUG
+	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
+#endif
+	int n = mat.ncols();
+	NRVec<double> result(n);
+	cblas_dgemv(CblasRowMajor, CblasTrans, nn, n, 1.0, mat[0], n, v, 1,
+			0.0, result.v, 1);
+	return result;
+}
+const NRVec< complex<double> > 
+NRVec< complex<double> >::operator*(const NRMat< complex<double> > &mat) const
+{
+#ifdef DEBUG
+	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
+#endif
+	int n = mat.ncols();
+	NRVec< complex<double> > result(n);
+	cblas_zgemv(CblasRowMajor, CblasTrans, nn, n, &CONE, mat[0], n, v, 1,
+			&CZERO, result.v, 1);
+	return result;
+}
+
+// Direc product Mat = Vec | Vec
+const NRMat<double> NRVec<double>::operator|(const NRVec<double> &b) const
+{
+	NRMat<double> result(0.,nn,b.nn);
+	cblas_dger(CblasRowMajor, nn, b.nn, 1., v, 1, b.v, 1, result, b.nn);
+	return result;
+}
+const NRMat< complex<double> > 
+NRVec< complex<double> >::operator|(const NRVec< complex<double> > &b) const
+{
+	NRMat< complex<double> > result(0.,nn,b.nn);
+	cblas_zgerc(CblasRowMajor, nn, b.nn, &CONE, v, 1, b.v, 1, result, b.nn);
+	return result;
+}
+
+
diff --git a/vec.h b/vec.h
new file mode 100644
index 0000000..330ad4c
--- /dev/null
+++ b/vec.h
@@ -0,0 +1,380 @@
+#ifndef _LA_VEC_H_
+#define _LA_VEC_H_
+
+extern "C" {
+#include "cblas.h"
+}
+#include <stdio.h>
+#include <complex>
+#include <string.h>
+#include <iostream>
+
+using namespace std;
+
+template <typename T> class NRVec;
+template <typename T> class NRSMat;
+template <typename T> class NRMat;
+template <typename T> class SparseMat;
+
+//////////////////////////////////////////////////////////////////////////////
+// Forward declarations
+void laerror(const char *s1=0, const char *s2=0, const char *s3=0, const char *s4=0);
+template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
+		const char *form0,int nodim,int modulo, int issym);
+
+// Memory allocated constants for cblas routines
+const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
+
+// Macros to construct binary operators +,-,*, from +=, -=, *=
+// for 3 cases: X + a, a + X, X + Y
+#define NRVECMAT_OPER(E,X) \
+template<class T> \
+	inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
+{ return NR##E(*this) X##= a; } \
+	\
+	template<class T> \
+	inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
+{ return NR##E<T>(rhs) X##= a; }
+
+#define NRVECMAT_OPER2(E,X) \
+template<class T> \
+	inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
+{ return NR##E(*this) X##= a; }
+
+#include "smat.h"
+#include "mat.h"
+
+// NRVec class
+template <typename T>
+class NRVec {
+protected:
+	int nn;
+	T *v;
+	int *count;
+public:
+	friend class NRSMat<T>;
+	friend class NRMat<T>;
+
+	inline NRVec(): nn(0),v(0),count(0){};
+	inline explicit NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {};
+	inline NRVec(const T &a, const int n);
+	inline NRVec(const T *a, const int n);
+	inline NRVec(const NRVec &rhs);
+	inline explicit NRVec(const NRSMat<T> & S);
+#ifndef MATPTR
+	explicit NRVec(const NRMat<T> &rhs);
+#endif
+	NRVec & operator=(const NRVec &rhs);
+	NRVec & operator=(const T &a);  //assign a to every element
+	NRVec & operator|=(const NRVec &rhs);
+	const NRVec operator-() const;
+	inline NRVec & operator+=(const NRVec &rhs);
+	inline NRVec & operator-=(const NRVec &rhs);
+	inline NRVec & operator+=(const T &a);
+	inline NRVec & operator-=(const T &a);
+	inline NRVec & operator*=(const T &a);
+	inline int getcount() const {return count?*count:0;}
+	inline const NRVec operator+(const NRVec &rhs) const;
+	inline const NRVec operator-(const NRVec &rhs) const;
+	inline const NRVec operator+(const T &a) const;
+	inline const NRVec operator-(const T &a) const;
+	inline const NRVec operator*(const T &a) const;
+	inline const T operator*(const NRVec &rhs) const; //scalar product -> ddot
+	inline const NRVec operator*(const NRSMat<T> & S) const;
+	const NRVec operator*(const NRMat<T> &mat) const;
+	const NRMat<T> operator|(const NRVec<T> &rhs) const;
+	inline const T sum() const; //sum of its elements
+	inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector
+	inline T & operator[](const int i);
+	inline const T & operator[](const int i) const;
+	inline int size() const;
+	inline operator T*(); //get a pointer to the data
+	inline operator const T*() const; //get a pointer to the data
+	~NRVec();
+	void axpy(const T alpha, const NRVec &x); // this+= a*x
+	void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x
+	void gemv(const T beta, const NRMat<T> &a, const char trans, 
+			const T alpha, const NRVec &x);
+	void copyonwrite();
+	void resize(const int n);
+	NRVec & normalize();
+	inline const double norm() const;
+	inline const T amax() const;
+	inline const NRVec unitvector() const;
+	void fprintf(FILE *f, const char *format, const int modulo) const;
+	void fscanf(FILE *f, const char *format);
+//sparse matrix concerning members
+	explicit NRVec(const SparseMat<T> &rhs);                // dense from sparse matrix with one of dimensions =1
+	const NRVec operator*(const SparseMat<T> &mat) const; //vector*matrix
+	inline void simplify() {}; //just for compatibility with sparse ones
+	void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x);
+};
+
+template <typename T> ostream & operator<<(ostream &s, const NRVec<T> &x);
+template <typename T> istream & operator>>(istream  &s, NRVec<T> &x);
+
+// INLINES
+
+// ctors
+template <typename T>
+inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new int)
+{
+	*count = 1;
+	if(a != (T)0)
+		for(int i=0; i<n; i++)
+			v[i] = a;
+	else
+		memset(v, 0, nn*sizeof(T));
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), v(new T[n]), count(new int)
+{
+	*count = 1;
+	memcpy(v, a, n*sizeof(T));
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(const NRVec<T> &rhs)
+{
+	v = rhs.v;
+	nn = rhs.nn;
+	count = rhs.count;
+	if(count) (*count)++;
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
+{
+	nn = rhs.nn;
+	nn = NN2;
+	v = rhs.v;
+	count = rhs.count;
+	(*count)++;
+}
+
+// x += a
+inline NRVec<double> & NRVec<double>::operator+=(const double &a)
+{
+	copyonwrite();
+	cblas_daxpy(nn, 1.0, &a, 0, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator+=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CONE), (void *)(&a), 0, (void *)v, 1);
+	return *this;
+}
+
+// x -= a
+inline NRVec<double> & NRVec<double>::operator-=(const double &a)
+{
+	copyonwrite();
+	cblas_daxpy(nn, 1.0, &a, 0, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator-=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CMONE), (void *)(&a), 0, (void *)v, 1);
+	return *this;
+}
+
+// x += x
+inline NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CONE), rhs.v, 1, v, 1);
+	return *this;
+}
+
+// x -= x
+inline NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
+#endif
+	copyonwrite();
+	cblas_zaxpy(nn, (void *)(&CMONE), (void *)rhs.v, 1, (void *)v, 1);
+	return *this;
+}
+
+// x *= a
+inline NRVec<double> & NRVec<double>::operator*=(const double &a)
+{
+	copyonwrite();
+	cblas_dscal(nn, a, v, 1);
+	return *this;
+}
+inline NRVec< complex<double> > &
+NRVec< complex<double> >::operator*=(const complex<double> &a)
+{
+	copyonwrite();
+	cblas_zscal(nn, (void *)(&a), (void *)v, 1);
+	return *this;
+}
+
+// scalar product x.y
+inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
+#endif
+	return cblas_ddot(nn, v, 1, rhs.v, 1);
+}
+inline const complex<double>
+NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
+{
+#ifdef DEBUG
+	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
+#endif
+	complex<double> dot;
+	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
+	return dot;
+}
+
+// Vec * SMat = SMat * Vec
+template <typename T>
+inline const NRVec<T> NRVec<T>::operator*(const NRSMat<T> & S) const
+{
+	return S * (*this);
+}
+
+// Sum of elements
+inline const double NRVec<double>::sum() const
+{
+	return cblas_dasum(nn, v, 1);
+}
+inline const complex<double>
+NRVec< complex<double> >::sum() const
+{
+	complex<double> sum = CZERO;
+	for (int i=0; i<nn; i++) sum += v[i];
+	return sum;
+}
+
+// Dot product: x * y
+inline const double NRVec<double>::dot(const double *y, const int stride) const
+{
+	return cblas_ddot(nn, y, stride, v, 1);
+}
+inline const complex<double>
+NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
+{
+	complex<double> dot;
+	cblas_zdotc_sub(nn, y, stride, v, 1, (void *)(&dot));
+	return dot;
+}
+
+// x[i] returns i-th element
+template <typename T>
+inline T & NRVec<T>::operator[](const int i)
+{
+#ifdef DEBUG
+	if(*count != 1) laerror("possible lval [] with count > 1");
+	if(i < 0 || i >= nn) laerror("NRVec out of range");
+	if(!v) laerror("[] on unallocated NRVec");
+#endif
+	return v[i];
+}
+template <typename T>
+inline const T & NRVec<T>::operator[](const int i) const
+{
+#ifdef DEBUG
+	if(i < 0 || i >= nn) laerror("NRVec out of range");
+	if(!v) laerror("[] on unallocated NRVec");
+#endif
+	return v[i];
+}
+
+// length of the vector
+template <typename T>
+inline int NRVec<T>::size() const
+{
+	return nn;
+}
+
+// reference Vec to the first element
+template <typename T>
+inline NRVec<T>::operator T*()
+{
+#ifdef DEBUG
+	if(!v) laerror("unallocated NRVec in operator T*");
+#endif
+	return v;
+}
+template <typename T>
+inline NRVec<T>::operator const T*() const
+{
+#ifdef DEBUG
+	if(!v) laerror("unallocated NRVec in operator T*");
+#endif
+	return v;
+}
+
+// return norm of the Vec
+inline const double  NRVec<double>::norm() const
+{
+	return cblas_dnrm2(nn, v, 1);
+}
+inline const double NRVec< complex<double> >::norm() const
+{
+	return cblas_dznrm2(nn, (void *)v, 1);
+}
+
+// Max element of the array
+inline const double  NRVec<double>::amax() const
+{
+	return v[cblas_idamax(nn, v, 1)];
+}
+inline const complex<double> NRVec< complex<double> >::amax() const
+{
+	return v[cblas_izamax(nn, (void *)v, 1)];
+}
+
+
+// Make Vec unitvector
+template <typename T>
+inline const NRVec<T> NRVec<T>::unitvector() const
+{
+	return NRVec<T>(*this).normalize();
+}
+
+// generate operators: Vec + a, a + Vec, Vec * a
+NRVECMAT_OPER(Vec,+)
+NRVECMAT_OPER(Vec,-)
+NRVECMAT_OPER(Vec,*)
+// generate operators: Vec + Vec, Vec - Vec
+NRVECMAT_OPER2(Vec,+)
+NRVECMAT_OPER2(Vec,-)
+
+// Few forward declarations
+
+
+#endif /* _LA_VEC_H_ */