diff --git a/davidson.h b/davidson.h
index 0103ad8..223d044 100644
--- a/davidson.h
+++ b/davidson.h
@@ -13,3 +13,7 @@ export template <typename T, typename Matrix>
 extern void davidson(const Matrix &bigmat, NRVec<T> &eivals, NRVec<T> *eivecs, const char *eivecsfile, 
 		int nroots=1,  const bool verbose=0, const double eps=1e-6,
 	 	const bool incore=1, int maxit=100, const int maxkrylov = 500);
+
+//@@@options: left eigenvectors by matrix transpose, overridesymmetric (for nrmat)
+//@@@small matrix gdiagonalize - shift complex roots up (option to gdiagonalize?)
+//@@@test gdiagonalize whether it sorts the roots and what for complex ones
diff --git a/la_traits.h b/la_traits.h
index 7290515..a14f487 100644
--- a/la_traits.h
+++ b/la_traits.h
@@ -13,9 +13,13 @@ using namespace std;
 #include <complex>
 #include "laerror.h"
 
+#ifdef NONCBLAS
+#include "noncblas.h"
+#else
 extern "C" {
 #include "cblas.h"
 }
+#endif
 
 #ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
 # define export 
@@ -64,16 +68,25 @@ SCALAR(void *)
 
 
 //now declare the traits for scalars and for composed classes
+//NOTE! methods in traits classes have to be declared static,
+//since the class itself is never instantiated.
+//for performance, it can be also inlined at the same time
 template<typename C, typename Scalar> struct LA_traits_aux {};
 
+
+//TRAITS SPECIALIZATIONS
+
 //complex scalars
 template<typename C>
 struct LA_traits_aux<complex<C>, scalar_true> {
 typedef complex<C> elementtype;
 typedef complex<C> producttype;
 typedef C normtype;
-static normtype norm (const  complex<C> &x) {return abs(x);}
-static void axpy (complex<C> &s, const complex<C> &x, const complex<C> &c) {s+=x*c;}
+static inline bool gencmp(const complex<C> *x, const complex<C> *y, int n) {return memcmp(x,y,n*sizeof(complex<C>));}
+static bool bigger(const  complex<C> &x, const complex<C> &y) {laerror("complex comparison undefined"); return false;}
+static bool smaller(const  complex<C> &x, const complex<C> &y) {laerror("complex comparison undefined"); return false;}
+static inline normtype norm (const  complex<C> &x) {return abs(x);}
+static inline void axpy (complex<C> &s, const complex<C> &x, const complex<C> &c) {s+=x*c;}
 static void get(int fd, complex<C> &x, bool dimensions=0) {if(sizeof(complex<C>)!=read(fd,&x,sizeof(complex<C>))) laerror("read error");}
 static void put(int fd, const complex<C> &x, bool dimensions=0) {if(sizeof(complex<C>)!=write(fd,&x,sizeof(complex<C>))) laerror("write error");}
 static void multiget(unsigned int n,int fd, complex<C> *x, bool dimensions=0){if((ssize_t)(n*sizeof(complex<C>))!=read(fd,x,n*sizeof(complex<C>))) laerror("read error");}
@@ -87,8 +100,11 @@ struct LA_traits_aux<C, scalar_true> {
 typedef C elementtype;
 typedef C producttype;
 typedef C normtype;
-static normtype norm (const  C &x) {return abs(x);}
-static void axpy (C &s, const C &x, const C &c) {s+=x*c;}
+static inline bool gencmp(const C *x, const C *y, int n) {return memcmp(x,y,n*sizeof(C));}
+static inline bool bigger(const  C &x, const C &y) {return x>y;}
+static inline bool smaller(const  C &x, const C &y) {return x<y;}
+static inline normtype norm (const  C &x) {return abs(x);}
+static inline void axpy (C &s, const C &x, const C &c) {s+=x*c;}
 static void put(int fd, const C &x, bool dimensions=0) {if(sizeof(C)!=write(fd,&x,sizeof(C))) laerror("write error");}
 static void get(int fd, C &x, bool dimensions=0) {if(sizeof(C)!=read(fd,&x,sizeof(C))) laerror("read error");}
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=0){if((ssize_t)(n*sizeof(C))!=write(fd,x,n*sizeof(C))) laerror("write error");}
@@ -96,7 +112,7 @@ static void multiget(unsigned int n, int fd, C *x, bool dimensions=0) {if((ssize
 };
 
 
-//prepare for non-scalar classes
+//non-scalars except smat
 
 template<typename C>
 struct LA_traits; //forward declaration needed for template recursion
@@ -107,8 +123,11 @@ struct LA_traits_aux<X<C>, scalar_false> { \
 typedef C elementtype; \
 typedef X<C> producttype; \
 typedef typename LA_traits<C>::normtype normtype; \
-static normtype norm (const X<C> &x) {return x.norm();} \
-static void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \
+static bool gencmp(const C *x, const C *y, int n) {for(int i=0; i<n; ++i) if(x[i]!=y[i]) return true; return false;} \
+static inline bool bigger(const  C &x, const C &y) {return x>y;} \
+static inline bool smaller(const  C &x, const C &y) {return x<y;} \
+static inline normtype norm (const X<C> &x) {return x.norm();} \
+static inline void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \
 static void put(int fd, const C &x, bool dimensions=1) {x.put(fd,dimensions);} \
 static void get(int fd, C &x, bool dimensions=1) {x.get(fd,dimensions);} \
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].put(fd,dimensions);} \
@@ -123,14 +142,17 @@ generate_traits(SparseMat)
 
 #undef generate_traits
 
-//non-scalar exceptions (smat product type)
+//smat
 template<typename C> 
 struct LA_traits_aux<NRSMat<C>, scalar_false> { 
 typedef C elementtype; 
 typedef NRMat<C> producttype; 
 typedef typename LA_traits<C>::normtype normtype; 
-static normtype norm (const NRSMat<C> &x) {return x.norm();} 
-static void axpy (NRSMat<C>&s, const NRSMat<C> &x, const C c) {s.axpy(c,x);} 
+static bool gencmp(const C *x, const C *y, int n) {for(int i=0; i<n; ++i) if(x[i]!=y[i]) return true; return false;}
+static inline bool bigger(const  C &x, const C &y) {return x>y;}
+static inline bool smaller(const  C &x, const C &y) {return x<y;}
+static inline normtype norm (const NRSMat<C> &x) {return x.norm();} 
+static inline void axpy (NRSMat<C>&s, const NRSMat<C> &x, const C c) {s.axpy(c,x);} 
 static void put(int fd, const C &x, bool dimensions=1) {x.put(fd,dimensions);} 
 static void get(int fd, C &x, bool dimensions=1) {x.get(fd,dimensions);} 
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].put(fd,dimensions);} \
diff --git a/mat.cc b/mat.cc
index a18f09c..ade9c70 100644
--- a/mat.cc
+++ b/mat.cc
@@ -15,6 +15,7 @@ extern ssize_t write(int, const void *, size_t);
 template NRMat<double>;
 template NRMat< complex<double> >;
 template NRMat<int>;
+template NRMat<short>;
 template NRMat<char>;
 
 
@@ -798,6 +799,7 @@ template istream & operator>>(istream  &s, NRMat< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 INSTANTIZE(int)
+INSTANTIZE(short)
 INSTANTIZE(char)
 
 
diff --git a/mat.h b/mat.h
index 9e587ea..f0f6e23 100644
--- a/mat.h
+++ b/mat.h
@@ -29,9 +29,9 @@ public:
 #endif
 	~NRMat();
 #ifdef MATPTR
-	const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return(memcmp(v[0],rhs.v[0],nn*mm*sizeof(T)));}
+	const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v[0],rhs.v[0],nn*mm);} //memcmp for scalars else elementwise
 #else
-        const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return(memcmp(v,rhs.v,nn*mm*sizeof(T)));}
+        const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn*mm);} //memcmp for scalars else elementwise
 #endif
 	const bool operator==(const NRMat &rhs) const {return !(*this != rhs);};
 	inline int getcount() const {return count?*count:0;}
@@ -101,6 +101,7 @@ public:
 	explicit NRMat(const SparseMat<T> &rhs);                // dense from sparse
 	NRMat & operator+=(const SparseMat<T> &rhs);
         NRMat & operator-=(const SparseMat<T> &rhs);
+	void gemm(const T &beta, const SparseMat<T> &a, const char transa, const NRMat &b, const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this
         inline void simplify() {}; //just for compatibility with sparse ones
 	bool issymmetric() const {return 0;};
 
diff --git a/noncblas.h b/noncblas.h
index 5e970df..3a6c749 100644
--- a/noncblas.h
+++ b/noncblas.h
@@ -595,4 +595,64 @@ int cblas_errprn(int ierr, int info, char *form, ...);
 
 #endif  /* end #ifdef CBLAS_ENUM_ONLY */
 #endif
+
+
+/*
+ *             Automatically Tuned Linear Algebra Software v3.4.1
+ *                    (C) Copyright 1997 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef ATLAS_ENUM_H
+   #define ATLAS_ENUM_H
+
+
+   #define ATLAS_ORDER CBLAS_ORDER
+      #define AtlasRowMajor CblasRowMajor
+      #define AtlasColMajor CblasColMajor
+   #define ATLAS_TRANS CBLAS_TRANSPOSE
+      #define AtlasNoTrans CblasNoTrans
+      #define AtlasTrans CblasTrans
+      #define AtlasConjTrans CblasConjTrans
+   #define ATLAS_UPLO CBLAS_UPLO
+      #define AtlasUpper CblasUpper
+      #define AtlasLower CblasLower
+   #define ATLAS_DIAG CBLAS_DIAG
+      #define AtlasNonUnit CblasNonUnit
+      #define AtlasUnit CblasUnit
+   #define ATLAS_SIDE CBLAS_SIDE
+      #define AtlasLeft  CblasLeft
+      #define AtlasRight CblasRight
+
+#endif
+
+/*from clapack.h*/
+int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS,
+                  double *A, const int lda, int *ipiv,
+                  double *B, const int ldb); 
+
+
 #endif
diff --git a/nonclass.cc b/nonclass.cc
index 7b36641..f51d973 100644
--- a/nonclass.cc
+++ b/nonclass.cc
@@ -26,6 +26,7 @@ template void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 INSTANTIZE(int)
+INSTANTIZE(short)
 INSTANTIZE(char)
 
 template <typename T>
diff --git a/nonclass.h b/nonclass.h
index 765ceb2..5235fb8 100644
--- a/nonclass.h
+++ b/nonclass.h
@@ -122,12 +122,30 @@ return det;
 //general submatrix, INDEX will typically be NRVec<int> or even int*
 //NOTE: in order to check consistency between nrows and rows in rows is a NRVec
 //some advanced metaprogramming would be necessary
+//application: e.g. ignoresign=true, equalsigns=true, indexshift= -1 ... elements of Slater overlaps for RHF
 
 template<class MAT, class INDEX>
-const NRMat<typename LA_traits<MAT>::elementtype> submatrix(const MAT a, const int nrows, const INDEX rows, const int ncols, const INDEX cols, int indexshift=0, bool ignoresign=false)
+const NRMat<typename LA_traits<MAT>::elementtype> submatrix(const MAT a, const int nrows, const INDEX rows, const int ncols, const INDEX cols, int indexshift=0, bool ignoresign=false, bool equalsigns=false)
 {
 NRMat<typename LA_traits<MAT>::elementtype> r(nrows,ncols);
 
+if(equalsigns) //make the element zero if signs of both indices are opposite
+{
+if(ignoresign)
+{
+for(int i=0; i<nrows; ++i)
+        for(int j=0; j<ncols; ++j)
+                r(i,j) = rows[i]*cols[j]<0?0.:a(abs(rows[i])+indexshift,abs(cols[j])+indexshift);
+}
+else
+{
+for(int i=0; i<nrows; ++i)
+        for(int j=0; j<ncols; ++j)
+                r(i,j) = rows[i]*cols[j]<0?0.:a(rows[i]+indexshift,cols[j]+indexshift);
+}
+}
+else
+{
 if(ignoresign)
 {
 for(int i=0; i<nrows; ++i)
@@ -140,6 +158,7 @@ for(int i=0; i<nrows; ++i)
 	for(int j=0; j<ncols; ++j)
 		r(i,j) = a(rows[i]+indexshift,cols[j]+indexshift);
 }
+}
 
 return r;
 }
diff --git a/qsort.h b/qsort.h
new file mode 100644
index 0000000..15b0d79
--- /dev/null
+++ b/qsort.h
@@ -0,0 +1,41 @@
+//general quicksort suitable if we do not want to create extra class for the member type but prefer to encapsulate it into compare and swap soutines
+//returns parity of the permutation
+
+template<typename INDEX, typename COMPAR>
+int genqsort(INDEX l, INDEX r,COMPAR (*cmp)(const INDEX, const INDEX), void (*swap)(const INDEX,const INDEX)) 
+{
+INDEX i,j,piv;
+int parity=0;
+
+if(r<=l) return parity; //1 element
+if(cmp(r,l)<0) {parity^=1; swap(l,r);}
+if(r-l==1) return parity; //2 elements and preparation for median
+piv= (l+r)/2; //pivoting by median of 3 - safer 
+if(cmp(piv,l)<0) {parity^=1; swap(l,piv);} //and change the pivot element implicitly
+if(cmp(r,piv)<0) {parity^=1; swap(r,piv);} //and change the pivot element implicitly
+if(r-l==2) return parity; //in the case of 3 elements we are finished too
+
+//general case , l-th r-th already processed
+i=l+1; j=r-1;
+do{
+  //important sharp inequality - stops at sentinel element for efficiency
+  // this is inefficient if all keys are equal - unnecessary n log n swaps are done, but we assume that it is atypical input
+  while(cmp(i++,piv)<0);
+  i--;
+  while(cmp(j--,piv)>0);
+  j++;
+  if(i<j)
+	{
+	// swap and keep track of position of pivoting element 
+	parity^=1; swap(i,j); 
+	if(i==piv) piv=j; else if(j==piv) piv=i;
+	}
+  if(i<=j) {i++; j--;}
+  }while(i<=j);
+
+if(j-l < r-i)   //because of the stack in bad case process first the shorter subarray
+	{if(l<j) parity ^=genqsort(l,j,cmp,swap); if(i<r) parity ^=genqsort(i,r,cmp,swap);}
+else
+	{if(i<r) parity ^=genqsort(i,r,cmp,swap); if(l<j) parity ^=genqsort(l,j,cmp,swap);}
+return parity;
+}
diff --git a/smat.cc b/smat.cc
index dc02d40..deccb63 100644
--- a/smat.cc
+++ b/smat.cc
@@ -16,6 +16,7 @@ extern ssize_t write(int, const void *, size_t);
 template NRSMat<double>;
 template NRSMat< complex<double> >;
 template NRSMat<int>;
+template NRSMat<short>;
 template NRSMat<char>;
 
 
@@ -338,5 +339,6 @@ template istream & operator>>(istream  &s, NRSMat< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 INSTANTIZE(int)
+INSTANTIZE(short)
 INSTANTIZE(char)
 
diff --git a/smat.h b/smat.h
index 913959d..2f09ab5 100644
--- a/smat.h
+++ b/smat.h
@@ -24,7 +24,7 @@ public:
 	NRSMat & operator|=(const NRSMat &rhs);	//assignment to a new copy
 	NRSMat & operator=(const NRSMat &rhs);	//assignment
 	NRSMat & operator=(const T &a);		//assign a to diagonal
-        const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return(memcmp(v,rhs.v,NN2*sizeof(T)));}
+        const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,NN2);} //memcmp for scalars else elementwise
         const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);};
 	inline NRSMat & operator*=(const T &a);
 	inline NRSMat & operator+=(const T &a); 
diff --git a/sparsemat.h b/sparsemat.h
index 7e033d8..9f5b259 100644
--- a/sparsemat.h
+++ b/sparsemat.h
@@ -85,7 +85,8 @@ public:
         inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;}
         inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general
         inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general
-	inline const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
+	inline const NRVec<T> operator*(const NRVec<T> &rhs) const; // SparseMat * Vec
+	inline const NRMat<T> operator*(const NRMat<T> &rhs) const; // SparseMat * Mat
 	void diagonalof(NRVec<T> &, const bool divide=0) const; //get diagonal
 	const SparseMat operator*(const SparseMat &rhs) const; 
 	SparseMat & oplusequal(const SparseMat &rhs); //direct sum
@@ -135,6 +136,10 @@ template <typename T>
 inline const NRVec<T> SparseMat<T>::operator*(const NRVec<T> &rhs) const
 {NRVec<T> result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
 
+template <typename T>
+inline const NRMat<T> SparseMat<T>::operator*(const NRMat<T> &rhs) const
+{NRMat<T> result(nn,rhs.ncols()); result.gemm((T)0,*this,'n',rhs,'n',(T)1); return result;};
+
 template <typename T>
 	extern istream& operator>>(istream  &s, SparseMat<T> &x);
 
diff --git a/t.cc b/t.cc
index 718272a..9b9cca0 100644
--- a/t.cc
+++ b/t.cc
@@ -194,7 +194,8 @@ cout <<" big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-
 
 */
 
-/*
+if(0)
+{
 NRMat<double> atest, btest,ctest;
 {
 int cc,c1,c2,c3;
@@ -211,7 +212,7 @@ cout << dtest;
 NRMat<double> etest(atest.nrows(),btest.ncols());
 etest.strassen(0., atest, 't', btest, 'n', 1.);
 cout << etest;
-*/
+}
 
 if(0)
 {
@@ -511,6 +512,53 @@ cin>>n;
 	cout <<"difference = "<<(res1-res2).norm()<<endl;
 }
 
+if(1)
+{
+int n,k,m;
+cin >>n>>k>>m;
+{
+NRMat<double> a(n,k),b(k,m),c(n,m),d(n,m);
+for(int i=0;i<n;++i) for(int j=0;j<k;++j) a(i,j)= random()/(1.+RAND_MAX);
+for(int i=0;i<k;++i) for(int j=0;j<m;++j) b(i,j)= random()/(1.+RAND_MAX);
+c.gemm(0., a, 'n', b, 'n', .6);
+SparseMat<double> aa(a);
+d.gemm(0., aa, 'n', b, 'n', .6);
+cout<<c<<d;
+cout  <<"test error = "<<(c-d).norm()<<endl;
+}
+{
+NRMat<double> a(k,n),b(k,m),c(n,m),d(n,m);
+for(int i=0;i<k;++i) for(int j=0;j<n;++j) a(i,j)= random()/(1.+RAND_MAX);
+for(int i=0;i<k;++i) for(int j=0;j<m;++j) b(i,j)= random()/(1.+RAND_MAX);
+c.gemm(0., a, 't', b, 'n', .7);
+SparseMat<double> aa(a);
+d.gemm(0., aa, 't', b, 'n', .7);
+cout<<c<<d;
+cout  <<"test error = "<<(c-d).norm()<<endl;
+}
+{
+NRMat<double> a(n,k),b(m,k),c(n,m),d(n,m);
+for(int i=0;i<n;++i) for(int j=0;j<k;++j) a(i,j)= random()/(1.+RAND_MAX);
+for(int i=0;i<m;++i) for(int j=0;j<k;++j) b(i,j)= random()/(1.+RAND_MAX);
+c.gemm(0., a, 'n', b, 't', .8);
+SparseMat<double> aa(a);
+d.gemm(0., aa, 'n', b, 't', .8);
+cout<<c<<d;
+cout  <<"test error = "<<(c-d).norm()<<endl;
+}
+{
+NRMat<double> a(k,n),b(m,k),c(n,m),d(n,m);
+for(int i=0;i<k;++i) for(int j=0;j<n;++j) a(i,j)= random()/(1.+RAND_MAX);
+for(int i=0;i<m;++i) for(int j=0;j<k;++j) b(i,j)= random()/(1.+RAND_MAX);
+c.gemm(0., a, 't', b, 't', .9);
+SparseMat<double> aa(a);
+d.gemm(0., aa, 't', b, 't', .9);
+cout<<c<<d;
+cout  <<"test error = "<<(c-d).norm()<<endl;
+}
+
+}
+
 
 if(0)
 {
@@ -767,19 +815,21 @@ cout <<b;
 if(0)
 {
 int n;
+double d;
 cin >>n;
 //NRMat<double> a(n,n);
 NRSMat<double> a(n);
 for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
         {
-        a(j,i)=a(i,j)=random()/(1.+RAND_MAX);
+        a(j,i)=a(i,j)=random()/(1.+RAND_MAX)*(i==j?10.:1.);
         }
 cout <<a;
 NRMat<double> y(1,n);
 for(int i=0;i<n;++i) y(0,i)=random()/(1.+RAND_MAX);
 cout <<y;
-linear_solve(a,&y);
+linear_solve(a,&y,&d);
 cout << y;
+cout <<"det is "<<d<<endl;
 }
 
 if(0)
@@ -911,7 +961,7 @@ cout <<r;
 #undef sparsity
 #define sparsity (n*2)
 #define sparsity2 (n/5)
-if(1)
+if(0)
 {
 int n,m;
 cin >>n>>m;
diff --git a/vec.cc b/vec.cc
index 0463cb2..5e79a1f 100644
--- a/vec.cc
+++ b/vec.cc
@@ -141,6 +141,35 @@ const NRVec<T> NRVec<T>::operator-() const
 	return result;
 }
 
+
+//comparison operators (for lexical order)
+
+template <typename T>
+const bool NRVec<T>::operator>(const NRVec &rhs) const
+{
+int n=nn; if(rhs.nn<n) n=rhs.nn;
+for(int i=0; i<n;++i)
+	{
+	if(LA_traits<T>::bigger(v[i],rhs.v[i])) return true;
+	if(LA_traits<T>::smaller(v[i],rhs.v[i])) return false;
+	}
+return nn>rhs.nn;
+}
+
+template <typename T>
+const bool NRVec<T>::operator<(const NRVec &rhs) const
+{
+int n=nn; if(rhs.nn<n) n=rhs.nn;
+for(int i=0; i<n;++i)
+        {
+        if(LA_traits<T>::smaller(v[i],rhs.v[i])) return true;
+        if(LA_traits<T>::bigger(v[i],rhs.v[i])) return false;
+        }
+return nn<rhs.nn;
+}
+
+
+
 // axpy call for T = double (not strided)
 void NRVec<double>::axpy(const double alpha, const NRVec<double> &x)
 {
diff --git a/vec.h b/vec.h
index 9d980bc..b48f613 100644
--- a/vec.h
+++ b/vec.h
@@ -51,8 +51,12 @@ public:
 	NRVec & operator=(const NRVec &rhs);
 	NRVec & operator=(const T &a);  //assign a to every element
 	NRVec & operator|=(const NRVec &rhs);
-	const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return(memcmp(v,rhs.v,nn*sizeof(T)));}
+	const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn);} //memcmp for scalars else elementwise
 	const bool operator==(const NRVec &rhs) const {return !(*this != rhs);};
+	const bool operator>(const NRVec &rhs) const;
+	const bool operator<(const NRVec &rhs) const;
+	const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);};
+	const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);};
 	const NRVec operator-() const;
 	inline NRVec & operator+=(const NRVec &rhs);
 	inline NRVec & operator-=(const NRVec &rhs);