diff --git a/fourindex.h b/fourindex.h
index 02451d3..d1b5167 100644
--- a/fourindex.h
+++ b/fourindex.h
@@ -1,5 +1,6 @@
 #ifndef _fourindex_included
 #define _fourindex_included
+#include <iostream>
 #include <string.h>
 
 //element of a linked list, indices in a portable way, no bit shifts and endianity problems any more!
@@ -23,7 +24,7 @@ struct matel4
 	packedindex index;
         };
 
-typedef enum {nosymmetry=0, twoelectronrealmullikan=1, twoelectronrealdirac=2, T2ijab_real=3} fourindexsymtype; //if twoelectron, only permutation-nonequivalent elements are stored
+typedef enum {nosymmetry=0, twoelectronrealmullikan=1, twoelectronrealdirac=2, T2ijab_real=3} fourindexsymtype; //only permutation-nonequivalent elements are stored
 // these should actually be static private members of the fourindex class, but leads to an ICE on gcc3.2
 static const int fourindex_n_symmetrytypes=4;
 static const int fourindex_permnumbers[fourindex_n_symmetrytypes]={1,8,8,4};
diff --git a/la_traits.h b/la_traits.h
index 77bb29b..43efda4 100644
--- a/la_traits.h
+++ b/la_traits.h
@@ -31,6 +31,26 @@ template<typename C> class NRMat;
 template<typename C> class NRSMat;
 template<typename C> class SparseMat;
 
+//we will need to treat char and unsigned char as numbers in << and >> I/O operators
+template<typename C>
+struct LA_traits_io
+	{
+	typedef C IOtype;
+	};
+
+template<>
+struct LA_traits_io<char>
+	{
+	typedef int IOtype;
+	};
+
+template<>
+struct LA_traits_io<unsigned char>
+        {
+        typedef unsigned int IOtype;
+        };
+
+
 //let's do some simple template metaprogramming and preprocessing
 //to keep the thing general and compact
 
@@ -90,7 +110,7 @@ static inline void get(int fd, complex<C> &x, bool dimensions=0, bool transp=0)
 static inline void put(int fd, const complex<C> &x, bool dimensions=0, bool transp=0) {if(sizeof(complex<C>)!=write(fd,&x,sizeof(complex<C>))) laerror("write error");}
 static void multiget(unsigned int n,int fd, complex<C> *x, bool dimensions=0){if((ssize_t)(n*sizeof(complex<C>))!=read(fd,x,n*sizeof(complex<C>))) laerror("read error");}
 static void multiput(unsigned int n, int fd, const complex<C> *x, bool dimensions=0) {if((ssize_t)(n*sizeof(complex<C>))!=write(fd,x,n*sizeof(complex<C>))) laerror("write error");}
-
+static void copy(complex<C> *dest, complex<C> *src, unsigned int n) {memcpy(dest,src,n*sizeof(complex<C>));}
 };
 
 //non-complex scalars
@@ -108,6 +128,7 @@ static inline void put(int fd, const C &x, bool dimensions=0, bool transp=0) {if
 static inline void get(int fd, C &x, bool dimensions=0, bool transp=0) {if(sizeof(C)!=read(fd,&x,sizeof(C))) laerror("read error");}
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=0){if((ssize_t)(n*sizeof(C))!=write(fd,x,n*sizeof(C))) laerror("write error");}
 static void multiget(unsigned int n, int fd, C *x, bool dimensions=0) {if((ssize_t)(n*sizeof(C))!=read(fd,x,n*sizeof(C))) laerror("read error");}
+static void copy(C *dest, C *src, unsigned int n) {memcpy(dest,src,n*sizeof(C));}
 };
 
 
@@ -131,6 +152,7 @@ static void put(int fd, const C &x, bool dimensions=1, bool transp=0) {x.put(fd,
 static void get(int fd, C &x, bool dimensions=1, bool transp=0) {x.get(fd,dimensions,transp);} \
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].put(fd,dimensions);} \
 static void multiget(unsigned int n,int fd, C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].get(fd,dimensions);} \
+static void copy(C *dest, C *src, unsigned int n) {for(unsigned int i=0; i<n; ++i) dest[i]=src[i];} \
 };
 
 
@@ -156,6 +178,7 @@ static void put(int fd, const C &x, bool dimensions=1, bool transp=0) {x.put(fd,
 static void get(int fd, C &x, bool dimensions=1, bool transp=0) {x.get(fd,dimensions);} 
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].put(fd,dimensions);} \
 static void multiget(unsigned int n,int fd, C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].get(fd,dimensions);} \
+static void copy(C *dest, C *src, unsigned int n) {for(unsigned int i=0; i<n; ++i) dest[i]=src[i];} \
 };
 
 
diff --git a/mat.cc b/mat.cc
index e6469a4..58f3769 100644
--- a/mat.cc
+++ b/mat.cc
@@ -14,15 +14,36 @@ extern ssize_t write(int, const void *, size_t);
 //////////////////////////////////////////////////////////////////////////////
 //// forced instantization in the corresponding object file
 template class NRMat<double>;
-template class NRMat< complex<double> >;
+template class NRMat<complex<double> >;
 template class NRMat<int>;
 template class NRMat<short>;
 template class NRMat<char>;
+template class NRMat<unsigned char>;
+template class NRMat<unsigned long>;
 
 
 /*
  *  Templates first, specializations for BLAS next
- */
+*/
+
+//row of
+template <typename T>
+const NRVec<T> NRMat<T>::row(const int i) const
+{
+#ifdef DEBUG
+if(i<0||i>=nn) laerror("illegal index in row()");
+#endif
+NRVec<T> r(mm);
+LA_traits<T>::copy(&r[0],
+#ifdef MATPTR
+        v[i]
+#else
+        v+i*mm
+#endif
+,mm);
+return r;
+}
+
 
 //raw I/O
 template <typename T>
@@ -288,6 +309,101 @@ void NRMat<T>::fscanf(FILE *f, const char *format)
  * BLAS specializations for double and complex<double>
  */
 
+template<>
+const NRSMat<double>  NRMat<double>::transposedtimes() const
+{
+NRSMat<double> r(mm,mm);
+int i,j;
+for(i=0; i<mm; ++i) for(j=0; j<=i; ++j)
+#ifdef MATPTR
+	r(i,j) = cblas_ddot(nn,v[0]+i,mm,v[0]+j,mm);
+#else
+	r(i,j) = cblas_ddot(nn,v+i,mm,v+j,mm);
+#endif
+return r;
+}
+
+template<>
+const NRSMat<complex<double> >  NRMat<complex<double> >::transposedtimes() const
+{
+NRSMat<complex<double> > r(mm,mm);
+int i,j;
+for(i=0; i<mm; ++i) for(j=0; j<=i; ++j)
+#ifdef MATPTR
+	cblas_zdotc_sub(nn, v[0]+i , mm,v[0]+j, mm, (void *)(&r(i,j)));
+#else
+	cblas_zdotc_sub(nn, v+i , mm,v+j, mm, (void *)(&r(i,j)));
+#endif
+return r;
+}
+
+
+
+//and for general type
+template <typename T>
+const NRSMat<T>  NRMat<T>::transposedtimes() const
+{
+NRSMat<T> r(mm,mm);
+int i,j;
+for(i=0; i<mm; ++i) for(j=0; j<=i; ++j)
+	{
+	T s =(T)0;
+	for(int k=0; k<nn; ++k) s+= (*this)(k,i) * (*this)(k,j);
+	r(i,j)=s;
+	}
+return r;
+}
+
+
+
+template<>
+const NRSMat<double>  NRMat<double>::timestransposed() const
+{
+NRSMat<double> r(nn,nn);
+int i,j;
+for(i=0; i<nn; ++i) for(j=0; j<=i; ++j)
+#ifdef MATPTR
+	r(i,j) = cblas_ddot(mm,v[i],1,v[j],1);
+#else
+	r(i,j) = cblas_ddot(mm,v+i*mm,1,v+j*mm,1);
+#endif
+return r;
+}
+
+template<>
+const NRSMat<complex<double> >  NRMat<complex<double> >::timestransposed() const
+{
+NRSMat<complex<double> > r(nn,nn);
+int i,j;
+for(i=0; i<nn; ++i) for(j=0; j<=i; ++j)
+#ifdef MATPTR
+	cblas_zdotc_sub(mm, v[i] , 1,v[j], 1, (void *)(&r(i,j)));
+#else
+	cblas_zdotc_sub(mm, v+i*mm , 1,v+j*mm, 1, (void *)(&r(i,j)));
+#endif
+return r;
+}
+
+
+
+//and for general type
+template <typename T>
+const NRSMat<T>  NRMat<T>::timestransposed() const
+{
+NRSMat<T> r(nn,nn);
+int i,j;
+for(i=0; i<nn; ++i) for(j=0; j<=i; ++j)
+	{
+	T s =(T)0;
+	for(int k=0; k<mm; ++k) s+= (*this)(i,k) * (*this)(j,k);
+	r(i,j)=s;
+	}
+return r;
+}
+
+
+
+
 // Mat *= a
 template<>
 NRMat<double> & NRMat<double>::operator*=(const double &a)
@@ -946,6 +1062,8 @@ INSTANTIZE(complex<double>)
 INSTANTIZE(int)
 INSTANTIZE(short)
 INSTANTIZE(char)
+INSTANTIZE(unsigned char)
+INSTANTIZE(unsigned long)
 
 
 export template <typename T>
@@ -957,7 +1075,7 @@ ostream& operator<<(ostream &s, const NRMat<T> &x)
                 s << n << ' ' << m << '\n';
                 for(i=0;i<n;i++)
                         {
-                        for(j=0; j<m;j++) s << x[i][j] << (j==m-1 ? '\n' : ' '); // endl cannot be used in the conditional expression, since it is an overloaded function
+                        for(j=0; j<m;j++) s << (typename LA_traits_io<T>::IOtype) x[i][j] << (j==m-1 ? '\n' : ' '); // endl cannot be used in the conditional expression, since it is an overloaded function
                         }
                 return s;
                 }
@@ -968,7 +1086,8 @@ istream& operator>>(istream  &s, NRMat<T> &x)
                 int i,j,n,m;
                 s >> n >> m;
                 x.resize(n,m);
-                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x[i][j] ;
+		typename LA_traits_io<T>::IOtype tmp;
+                for(i=0;i<n;i++) for(j=0; j<m;j++) { s>>tmp; x[i][j]=tmp;}
                 return s;
                 }
 
diff --git a/mat.h b/mat.h
index 548d053..b7030e4 100644
--- a/mat.h
+++ b/mat.h
@@ -61,12 +61,16 @@ public:
 	const NRMat otimes(const NRMat &rhs) const; //direct product
 	void diagmultl(const NRVec<T> &rhs); //multiply by a diagonal matrix from L
 	void diagmultr(const NRVec<T> &rhs); //multiply by a diagonal matrix from R
+	const NRSMat<T> transposedtimes() const; //A^T . A
+	const NRSMat<T> timestransposed() const; //A . A^T
 	const NRMat operator*(const NRSMat<T> &rhs) const; // Mat * Smat
 	const NRMat operator&(const NRMat &rhs) const; // direct sum
 	const NRMat operator|(const NRMat<T> &rhs) const; // direct product
 	const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;}; // Mat * Vec
 	const NRVec<T> rsum() const; //sum of rows
 	const NRVec<T> csum() const; //sum of columns
+	const NRVec<T> row(const int i) const; //row of, efficient
+	const NRVec<T> column(const int j) const {NRVec<T> r(nn); for(int i=0; i<nn; ++i) r[i]= (*this)(i,j); return r;}; //column of, general but not very efficient
 	void diagonalof(NRVec<T> &, const bool divide=0) const; //get diagonal 
 	inline T* operator[](const int i);  //subscripting: pointer to row i
 	inline const T* operator[](const int i) const;
@@ -530,6 +534,14 @@ void NRMat<T>::resize(const int n, const int m)
 
 
 
+template<typename T>
+NRMat<complex<T> > complexify(const NRMat<T> &rhs)
+{
+NRMat<complex<T> > r(rhs.nrows(),rhs.ncols());
+for(int i=0; i<rhs.nrows(); ++i) 
+	for(int j=0; j<rhs.ncols(); ++j) r(i,j)= rhs(i,j);
+return r;
+}
 
 
 
diff --git a/nonclass.cc b/nonclass.cc
index 18cd396..b4242fb 100644
--- a/nonclass.cc
+++ b/nonclass.cc
@@ -28,6 +28,8 @@ INSTANTIZE(complex<double>)
 INSTANTIZE(int)
 INSTANTIZE(short)
 INSTANTIZE(char)
+INSTANTIZE(unsigned char)
+INSTANTIZE(unsigned long)
 
 #define EPSDET 1e-300
 
@@ -643,7 +645,7 @@ NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double))
 	return r;
 }
 
-NRMat<double> realmatrixfunction(NRMat<double> a, double (*f) (double))
+NRMat<double> realmatrixfunction(NRMat<double> a, double (*f) (const double))
 {
         int n = a.nrows();
         NRVec<double> w(n);
@@ -657,6 +659,28 @@ NRMat<double> realmatrixfunction(NRMat<double> a, double (*f) (double))
         return r;
 }
 
+NRMat<complex<double> > complexmatrixfunction(NRMat<double> a, double (*fre) (const double), double (*fim) (const double))
+{
+        int n = a.nrows();
+        NRVec<double> wre(n),wim(n);
+        diagonalize(a, wre, true, false);
+	for (int i=0; i<a.nrows(); i++) wim[i] = (*fim)(wre[i]);
+        for (int i=0; i<a.nrows(); i++) wre[i] = (*fre)(wre[i]);
+        NRMat<double> u = a;
+	NRMat<double> b = a;
+        a.diagmultl(wre);
+        b.diagmultl(wim);
+	NRMat<double> t(n,n),tt(n,n);
+        t.gemm(0.0, u, 't', a, 'n', 1.0);
+	tt.gemm(0.0, u, 't', b, 'n', 1.0);
+        NRMat<complex<double> > r(n, n);
+	for (int i=0; i<a.nrows(); i++) for(int j=0; j<a.ncols(); ++j) r(i,j)=complex<double>(t(i,j),tt(i,j));
+        return r;
+}
+
+
+
+
 
 // instantize template to an addresable function
 complex<double> myclog (const complex<double> &x) 
@@ -664,6 +688,17 @@ complex<double> myclog (const complex<double> &x)
 	return log(x);
 }
 
+complex<double> sqrtinv (const complex<double> &x)
+{
+        return 1./sqrt(x);
+}
+
+double sqrtinv (const double x)
+{
+        return 1./sqrt(x);
+}
+
+
 NRMat<double>  log(const NRMat<double> &a)
 {
 	return matrixfunction(a, &myclog, 1);
diff --git a/nonclass.h b/nonclass.h
index abb9602..93ddd0a 100644
--- a/nonclass.h
+++ b/nonclass.h
@@ -74,11 +74,17 @@ extern void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
 		NRMat<double> *b=NULL, NRVec<double> *beta=NULL);
 extern NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double));
 extern NRMat<double> realmatrixfunction(NRMat<double> a, double (*f) (double)); //a has to by in fact symmetric
+extern NRMat<complex<double> > complexmatrixfunction(NRMat<double> a, double (*fre) (double), double (*fim) (double)); //a has to by in fact symmetric
 extern NRMat<double> matrixfunction(NRMat<double> a, complex<double> (*f)(const complex<double> &),const bool adjust=0);
 
+extern complex<double> sqrtinv(const complex<double> &);
+extern double sqrtinv(const double);
+
 //functions on matrices
 inline NRMat<double>  sqrt(const NRSMat<double> &a) { return matrixfunction(a,&sqrt); }
+inline NRMat<double>  sqrtinv(const NRSMat<double> &a) { return matrixfunction(a,&sqrtinv); }
 inline NRMat<double>  realsqrt(const NRMat<double> &a) { return realmatrixfunction(a,&sqrt); }
+inline NRMat<double>  realsqrtinv(const NRMat<double> &a) { return realmatrixfunction(a,&sqrtinv); }
 inline NRMat<double>  log(const NRSMat<double> &a) { return matrixfunction(a,&log); }
 extern NRMat<double> log(const NRMat<double> &a);
 
@@ -170,4 +176,11 @@ return r;
 //auxiliary routine to adjust eigenvectors to guarantee real logarithm
 extern void adjustphases(NRMat<double> &v);
 
+template<class T>
+T abssqr(const complex<T> &x)
+{
+return x.real()*x.real()+x.imag()*x.imag();
+}
+
+
 #endif
diff --git a/qsort.h b/qsort.h
index 8051b2a..7eed5b1 100644
--- a/qsort.h
+++ b/qsort.h
@@ -42,4 +42,44 @@ else
 return parity;
 }
 
+
+template<typename INDEX>
+int genqsort2(INDEX l, INDEX r, bool (*bigger)(const INDEX, const INDEX), void (*swap)(const INDEX,const INDEX)) 
+{
+INDEX i,j,piv;
+int parity=0;
+
+if(r<=l) return parity; //1 element
+if(bigger(l,r)) {parity^=1; swap(l,r);}
+if(r-l==1) return parity; //2 elements and preparation for median
+piv= (l+r)/2; //pivoting by median of 3 - safer 
+if(bigger(l,piv)) {parity^=1; swap(l,piv);} //and change the pivot element implicitly
+if(bigger(piv,r)) {parity^=1; swap(r,piv);} //and change the pivot element implicitly
+if(r-l==2) return parity; //in the case of 3 elements we are finished too
+
+//general case , l-th r-th already processed
+i=l+1; j=r-1;
+do{
+  //important sharp inequality - stops at sentinel element for efficiency
+  // this is inefficient if all keys are equal - unnecessary n log n swaps are done, but we assume that it is atypical input
+  while(bigger(piv,i++));
+  i--;
+  while(bigger(j--,piv));
+  j++;
+  if(i<j)
+	{
+	// swap and keep track of position of pivoting element 
+	parity^=1; swap(i,j); 
+	if(i==piv) piv=j; else if(j==piv) piv=i;
+	}
+  if(i<=j) {i++; j--;}
+  }while(i<=j);
+
+if(j-l < r-i)   //because of the stack in bad case process first the shorter subarray
+	{if(l<j) parity ^=genqsort2(l,j,bigger,swap); if(i<r) parity ^=genqsort2(i,r,bigger,swap);}
+else
+	{if(i<r) parity ^=genqsort2(i,r,bigger,swap); if(l<j) parity ^=genqsort2(l,j,bigger,swap);}
+return parity;
+}
+
 #endif
diff --git a/smat.cc b/smat.cc
index ecfc7f7..b7624ee 100644
--- a/smat.cc
+++ b/smat.cc
@@ -19,6 +19,7 @@ template class NRSMat< complex<double> >;
 template class NRSMat<int>;
 template class NRSMat<short>;
 template class NRSMat<char>;
+template class NRSMat<unsigned long>;
 
 
 
@@ -343,7 +344,7 @@ ostream& operator<<(ostream &s, const NRSMat<T> &x)
                 s << n << ' ' << n << '\n';
                 for(i=0;i<n;i++)
                         {
-                        for(j=0; j<n;j++) s << x(i,j) << (j==n-1 ? '\n' : ' ');
+                        for(j=0; j<n;j++) s << (typename LA_traits_io<T>::IOtype)x(i,j) << (j==n-1 ? '\n' : ' ');
                         }
                 return s;
                 }
@@ -356,7 +357,8 @@ istream& operator>>(istream  &s, NRSMat<T> &x)
                 s >> n >> m;
                 if(n!=m) laerror("input symmetric matrix not square");
                 x.resize(n);
-                for(i=0;i<n;i++) for(j=0; j<m;j++) s>>x(i,j);
+		typename LA_traits_io<T>::IOtype tmp;
+                for(i=0;i<n;i++) for(j=0; j<m;j++) {s>>tmp; x(i,j)=tmp;}
                 return s;
                 }
 
@@ -374,4 +376,5 @@ INSTANTIZE(complex<double>)
 INSTANTIZE(int)
 INSTANTIZE(short)
 INSTANTIZE(char)
+INSTANTIZE(unsigned long)
 
diff --git a/smat.h b/smat.h
index 5c139cc..fadeb4b 100644
--- a/smat.h
+++ b/smat.h
@@ -462,6 +462,17 @@ void NRSMat<T>::resize(const int n)
 }
 
 
+template<typename T>
+NRSMat<complex<T> > complexify(const NRSMat<T> &rhs)
+{
+NRSMat<complex<T> > r(rhs.nrows());
+for(int i=0; i<rhs.nrows(); ++i) 
+        for(int j=0; j<=i; ++j) r(i,j)=rhs(i,j);
+return r;
+}
+
+
+
 
 
 
diff --git a/sparsemat.cc b/sparsemat.cc
index 8f7c18f..a179211 100644
--- a/sparsemat.cc
+++ b/sparsemat.cc
@@ -7,6 +7,16 @@
 #include <errno.h>
 #include "sparsemat.h"
 
+template<typename T>
+static inline const T MAX(const T &a, const T &b)
+        {return b > a ? (b) : (a);}
+
+template<typename T>
+static inline void SWAP(T &a, T &b)
+        {T dum=a; a=b; b=dum;}
+
+
+
 //////////////////////////////////////////////////////////////////////////////
 //// forced instantization in the corresponding object file
 template class SparseMat<double>;
@@ -24,7 +34,7 @@ ostream& operator<<(ostream &s, const SparseMat<T> &x)
 		matel<T> *list=x.getlist();
 		while(list)
                         {
-                        s << (int)list->row << ' ' << (int)list->col  << ' ' << list->elem << '\n';
+                        s << (int)list->row << ' ' << (int)list->col  << ' ' << (typename LA_traits_io<T>::IOtype)list->elem << '\n';
 			list=list->next;
                         }
 		s << "-1 -1\n";
@@ -47,7 +57,9 @@ istream& operator>>(istream  &s, SparseMat<T> &x)
 			l->next= ll;
 			l->row=i;
 			l->col=j;
-			s >> l->elem;	
+			typename LA_traits_io<T>::IOtype tmp;
+			s >> tmp;
+			l->elem=tmp;	
 			s >> i >> j;
 			}
 		x.setlist(l);
@@ -188,7 +200,8 @@ inline static SPMatindexdiff EXEC(register const SPMatindex i, register const SP
 	register matel<T> *ii,*jj;
 	ii=((matel<T> **)globsorted)[i];
 	jj=((matel<T> **)globsorted)[j];
-	if (k=ii->col-jj->col) return k; else return ii->row-jj->row;}
+	if (k=ii->col-jj->col) return k; else return ii->row-jj->row;
+	}
 };
 
 
@@ -200,7 +213,8 @@ inline static SPMatindexdiff EXEC(register const SPMatindex i, register const SP
         register matel<T> *ii,*jj;
         ii=((matel<T> **)globsorted)[i];
         jj=((matel<T> **)globsorted)[j];
-	if (k=ii->row-jj->row) return k; else return ii->col-jj->col;}
+	if (k=ii->row-jj->row) return k; else return ii->col-jj->col;
+	}
 };
 
 
@@ -215,7 +229,7 @@ SWAP(((matel<T> **)globsorted)[i],((matel<T> **)globsorted)[j]);
 
 
 template<class T, int type>
-void genqsort(SPMatindex l,SPMatindex r) /*safer version for worst case*/
+void myqsort(SPMatindex l,SPMatindex r) /*safer version for worst case*/
 {
 register SPMatindex i,j,piv;
 
@@ -250,9 +264,9 @@ do{
   }while(i<=j);
 
 if(j-l < r-i)   /*because of the stack in bad case process first the shorter subarray*/
-        {if(l<j) genqsort<T,type>(l,j); if(i<r) genqsort<T,type>(i,r);}
+        {if(l<j) myqsort<T,type>(l,j); if(i<r) myqsort<T,type>(i,r);}
 else
-        {if(i<r) genqsort<T,type>(i,r); if(l<j) genqsort<T,type>(l,j);}
+        {if(i<r) myqsort<T,type>(i,r); if(l<j) myqsort<T,type>(l,j);}
 }
 
 
@@ -302,8 +316,8 @@ while(l)
 
 //now sort the array of pointers according to type
 globsorted =sorted; 
-if(type==0) {genqsort<T,0>(0,nonzero-1); ((SparseMat<T> *)this)->rowsorted=sorted;} //type handled at compile time for more efficiency
-else {genqsort<T,1>(0,nonzero-1); ((SparseMat<T> *)this)->colsorted=sorted;} //should better be const cast
+if(type==0) {myqsort<T,0>(0,nonzero-1); ((SparseMat<T> *)this)->rowsorted=sorted;} //type handled at compile time for more efficiency
+else {myqsort<T,1>(0,nonzero-1); ((SparseMat<T> *)this)->colsorted=sorted;} //should better be const cast
 
 //cout <<"sort: nonzero ="<<nonzero<<"\n";
 return nonzero; //number of (in principle) nonzero elements
diff --git a/t.cc b/t.cc
index f1d2217..7ca7df4 100644
--- a/t.cc
+++ b/t.cc
@@ -10,6 +10,7 @@
 #include "gmres.h"
 #include "conjgrad.h"
 #include "diis.h"
+#include "bitvector.h"
 
 
 extern void test(const NRVec<double> &x);
diff --git a/t2.cc b/t2.cc
index 349f971..58f8768 100644
--- a/t2.cc
+++ b/t2.cc
@@ -5,6 +5,7 @@
 #include "sparsemat.h"
 #include "matexp.h"
 #include "fourindex.h"
+#include "bitvector.h"
 
 
 void test(const NRVec<double> &x)
diff --git a/test.cc b/test.cc
index c689cc6..ed9e14d 100644
--- a/test.cc
+++ b/test.cc
@@ -1,8 +1,28 @@
-#include "vec.h"
+#include "bitvector.h"
 
 int main(void)
 {
-NRVec<double> *p = new NRVec<double>[1000];
-NRVec<double> q(10); q=1.;
-p[500]|=q;
+bitvector v(100);
+v.fill();
+bitvector x(50); x=v;
+v.copyonwrite();
+for(unsigned int i=0; i<100; i+=2) v.reset(i);
+x.fill();
+x= ~x;
+for(unsigned int i=0; i<100; ++i) x.assign(i,i&1);
+cout <<v<< endl;
+cout <<x<< endl;
+cout <<"TEST "<<(x==v)<< " "<<x.population()<<endl;
+
+v.clear(); x.clear();
+v.set(31); x.set(32);
+cout <<" TEST < "<< (x<v)<<endl;
+
+NRVec<int> t(10);
+for(int i=0; i<10; ++i) t[i]=i;
+cout <<t;
+t.sort(1);
+cout <<t;
+t.sort(0);
+cout<<t;
 }
diff --git a/vec.cc b/vec.cc
index 80c824c..88e5a40 100644
--- a/vec.cc
+++ b/vec.cc
@@ -5,6 +5,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <errno.h>
+#include "qsort.h"
 extern "C" {
 extern ssize_t read(int, void *, size_t);
 extern ssize_t write(int, const void *, size_t);
@@ -29,11 +30,14 @@ INSTANTIZE(short)
 INSTANTIZE(unsigned short)
 INSTANTIZE(char)
 INSTANTIZE(unsigned char)
+INSTANTIZE(unsigned long)
+
 template class NRVec<double>;
 template class NRVec<complex<double> >;
 template class NRVec<char>;
 template class NRVec<short>;
 template class NRVec<int>;
+template class NRVec<unsigned long>;
 
 
 
@@ -65,7 +69,7 @@ ostream & operator<<(ostream &s, const NRVec<T> &x)
 
   n = x.size();
   s << n << endl;
-  for(i=0; i<n; i++) s << x[i] << (i == n-1 ? '\n' : ' ');
+  for(i=0; i<n; i++) s << (typename LA_traits_io<T>::IOtype)x[i] << (i == n-1 ? '\n' : ' ');
   return s;
 }
 
@@ -76,7 +80,8 @@ istream & operator>>(istream &s, NRVec<T> &x)
 
   s >> n;
   x.resize(n);
-  for(i=0; i<n; i++) s >> x[i];
+  typename LA_traits_io<T>::IOtype tmp;
+  for(i=0; i<n; i++) {s >> tmp; x[i]=tmp;}
   return s;
 }
 
@@ -281,6 +286,10 @@ template<>
 NRVec<short> &  NRVec<short>::normalize() {laerror("normalize() impossible for integer types"); return *this;}
 template<>
 NRVec<char> &  NRVec<char>::normalize() {laerror("normalize() impossible for integer types"); return *this;}
+template<>
+NRVec<unsigned long> &  NRVec<unsigned long>::normalize() {laerror("normalize() impossible for integer types"); return *this;}
+
+
 template<>
 void NRVec<int>::gemv(const int beta,
                 const NRSMat<int> &A, const char trans,
@@ -297,6 +306,22 @@ void NRVec<short>::gemv(const short beta,
 laerror("not yet implemented");
 }
 
+template<>
+void NRVec<unsigned long>::gemv(const unsigned long beta,
+                const NRSMat<unsigned long> &A, const char trans,
+                const unsigned long alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+template<>
+void NRVec<unsigned char>::gemv(const unsigned char beta,
+                const NRSMat<unsigned char> &A, const char trans,
+                const unsigned char alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
 
 template<>
 void NRVec<char>::gemv(const char beta,
@@ -322,6 +347,24 @@ void NRVec<short>::gemv(const short beta,
 laerror("not yet implemented");
 }
 
+template<>
+void NRVec<unsigned long>::gemv(const unsigned long beta,
+                const NRMat<unsigned long> &A, const char trans,
+                const unsigned long alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+template<>
+void NRVec<unsigned char>::gemv(const unsigned char beta,
+                const NRMat<unsigned char> &A, const char trans,
+                const unsigned char alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+
+
 
 template<>
 void NRVec<char>::gemv(const char beta,
@@ -356,6 +399,22 @@ void NRVec<char>::gemv(const char beta,
 laerror("not yet implemented");
 }
 
+template<>
+void NRVec<unsigned long>::gemv(const unsigned long beta,
+                const SparseMat<unsigned long> &A, const char trans,
+                const unsigned long alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+template<>
+void NRVec<unsigned char>::gemv(const unsigned char beta,
+                const SparseMat<unsigned char> &A, const char trans,
+                const unsigned char alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
 
 
 
@@ -434,3 +493,36 @@ NRVec< complex<double> >::operator|(const NRVec< complex<double> > &b) const
 }
 
 
+//sorting of elements in the vector
+template<typename T>
+static inline void SWAP(T &a, T &b)
+        {T dum=a; a=b; b=dum;}
+
+static void *sortbase; //global for sort !!! is not thread-safe
+
+template<typename T>
+static void myswap(int i, int j)
+{
+SWAP(((T *)sortbase)[i],((T *)sortbase)[j]);
+}
+
+template<typename T>
+static bool mybigger(int i, int j)
+{
+return LA_traits<T>::bigger(((T *)sortbase)[i],((T *)sortbase)[j]);
+}
+
+template<typename T>
+static bool mysmaller(int i, int j)
+{
+return LA_traits<T>::smaller(((T *)sortbase)[i],((T *)sortbase)[j]);
+}
+
+template<typename T>
+int NRVec<T>::sort(int direction)
+{
+copyonwrite();
+sortbase = v;
+if(direction) return genqsort2(0,nn-1,mysmaller<T>,myswap<T>);
+else return genqsort2(0,nn-1,mybigger<T>,myswap<T>);
+}
diff --git a/vec.h b/vec.h
index 8e59b97..85c7b85 100644
--- a/vec.h
+++ b/vec.h
@@ -103,6 +103,7 @@ public:
 //sparse matrix concerning members
 	explicit NRVec(const SparseMat<T> &rhs);                // dense from sparse matrix with one of dimensions =1
 	inline void simplify() {}; //just for compatibility with sparse ones
+	int sort(int direction=0); //sort, ascending by default, returns parity of permutation
 };
 
 //due to mutual includes this has to be after full class declaration
@@ -591,5 +592,14 @@ NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
 }
 
 
+template<typename T>
+NRVec<complex<T> > complexify(const NRVec<T> &rhs)
+{
+NRVec<complex<T> > r(rhs.size());
+for(int i=0; i<rhs.size(); ++i)  r[i]=rhs[i];
+return r;
+}
+
+
 
 #endif /* _LA_VEC_H_ */