*** empty log message ***

2005-09-06 15:55:07 +00:00
parent 4e5311fece
commit abe1725466
14 changed files with 261 additions and 21 deletions
--- a/davidson.h
+++ b/davidson.h
@@ -13,3 +13,7 @@ export template <typename T, typename Matrix>
 extern void davidson(const Matrix &bigmat, NRVec<T> &eivals, NRVec<T> *eivecs, const char *eivecsfile, 
 		int nroots=1,  const bool verbose=0, const double eps=1e-6,
 	 	const bool incore=1, int maxit=100, const int maxkrylov = 500);
 //@@@options: left eigenvectors by matrix transpose, overridesymmetric (for nrmat)
 //@@@small matrix gdiagonalize - shift complex roots up (option to gdiagonalize?)
 //@@@test gdiagonalize whether it sorts the roots and what for complex ones
--- a/la_traits.h
+++ b/la_traits.h
@@ -13,9 +13,13 @@ using namespace std;
 #include <complex>
 #include "laerror.h"
 #ifdef NONCBLAS
 #include "noncblas.h"
 #else
 extern "C" {
 #include "cblas.h"
 }
 #endif
 #ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
 # define export 
@@ -64,16 +68,25 @@ SCALAR(void *)
 //now declare the traits for scalars and for composed classes
 //NOTE! methods in traits classes have to be declared static,
 //since the class itself is never instantiated.
 //for performance, it can be also inlined at the same time
 template<typename C, typename Scalar> struct LA_traits_aux {};
 //TRAITS SPECIALIZATIONS
 //complex scalars
 template<typename C>
 struct LA_traits_aux<complex<C>, scalar_true> {
 typedef complex<C> elementtype;
 typedef complex<C> producttype;
 typedef C normtype;
-static normtype norm (const  complex<C> &x) {return abs(x);}
+static inline bool gencmp(const complex<C> *x, const complex<C> *y, int n) {return memcmp(x,y,n*sizeof(complex<C>));}
-static void axpy (complex<C> &s, const complex<C> &x, const complex<C> &c) {s+=x*c;}
+static bool bigger(const  complex<C> &x, const complex<C> &y) {laerror("complex comparison undefined"); return false;}
 static bool smaller(const  complex<C> &x, const complex<C> &y) {laerror("complex comparison undefined"); return false;}
 static inline normtype norm (const  complex<C> &x) {return abs(x);}
 static inline void axpy (complex<C> &s, const complex<C> &x, const complex<C> &c) {s+=x*c;}
 static void get(int fd, complex<C> &x, bool dimensions=0) {if(sizeof(complex<C>)!=read(fd,&x,sizeof(complex<C>))) laerror("read error");}
 static void put(int fd, const complex<C> &x, bool dimensions=0) {if(sizeof(complex<C>)!=write(fd,&x,sizeof(complex<C>))) laerror("write error");}
 static void multiget(unsigned int n,int fd, complex<C> *x, bool dimensions=0){if((ssize_t)(n*sizeof(complex<C>))!=read(fd,x,n*sizeof(complex<C>))) laerror("read error");}
@@ -87,8 +100,11 @@ struct LA_traits_aux<C, scalar_true> {
 typedef C elementtype;
 typedef C producttype;
 typedef C normtype;
-static normtype norm (const  C &x) {return abs(x);}
+static inline bool gencmp(const C *x, const C *y, int n) {return memcmp(x,y,n*sizeof(C));}
-static void axpy (C &s, const C &x, const C &c) {s+=x*c;}
+static inline bool bigger(const  C &x, const C &y) {return x>y;}
 static inline bool smaller(const  C &x, const C &y) {return x<y;}
 static inline normtype norm (const  C &x) {return abs(x);}
 static inline void axpy (C &s, const C &x, const C &c) {s+=x*c;}
 static void put(int fd, const C &x, bool dimensions=0) {if(sizeof(C)!=write(fd,&x,sizeof(C))) laerror("write error");}
 static void get(int fd, C &x, bool dimensions=0) {if(sizeof(C)!=read(fd,&x,sizeof(C))) laerror("read error");}
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=0){if((ssize_t)(n*sizeof(C))!=write(fd,x,n*sizeof(C))) laerror("write error");}
@@ -96,7 +112,7 @@ static void multiget(unsigned int n, int fd, C *x, bool dimensions=0) {if((ssize
 };
-//prepare for non-scalar classes
+//non-scalars except smat
 template<typename C>
 struct LA_traits; //forward declaration needed for template recursion
@@ -107,8 +123,11 @@ struct LA_traits_aux<X<C>, scalar_false> { \
 typedef C elementtype; \
 typedef X<C> producttype; \
 typedef typename LA_traits<C>::normtype normtype; \
-static normtype norm (const X<C> &x) {return x.norm();} \
+static bool gencmp(const C *x, const C *y, int n) {for(int i=0; i<n; ++i) if(x[i]!=y[i]) return true; return false;} \
-static void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \
+static inline bool bigger(const  C &x, const C &y) {return x>y;} \
 static inline bool smaller(const  C &x, const C &y) {return x<y;} \
 static inline normtype norm (const X<C> &x) {return x.norm();} \
 static inline void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \
 static void put(int fd, const C &x, bool dimensions=1) {x.put(fd,dimensions);} \
 static void get(int fd, C &x, bool dimensions=1) {x.get(fd,dimensions);} \
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].put(fd,dimensions);} \
@@ -123,14 +142,17 @@ generate_traits(SparseMat)
 #undef generate_traits
-//non-scalar exceptions (smat product type)
+//smat
 template<typename C> 
 struct LA_traits_aux<NRSMat<C>, scalar_false> { 
 typedef C elementtype; 
 typedef NRMat<C> producttype; 
 typedef typename LA_traits<C>::normtype normtype; 
-static normtype norm (const NRSMat<C> &x) {return x.norm();} 
+static bool gencmp(const C *x, const C *y, int n) {for(int i=0; i<n; ++i) if(x[i]!=y[i]) return true; return false;}
-static void axpy (NRSMat<C>&s, const NRSMat<C> &x, const C c) {s.axpy(c,x);} 
+static inline bool bigger(const  C &x, const C &y) {return x>y;}
 static inline bool smaller(const  C &x, const C &y) {return x<y;}
 static inline normtype norm (const NRSMat<C> &x) {return x.norm();} 
 static inline void axpy (NRSMat<C>&s, const NRSMat<C> &x, const C c) {s.axpy(c,x);} 
 static void put(int fd, const C &x, bool dimensions=1) {x.put(fd,dimensions);} 
 static void get(int fd, C &x, bool dimensions=1) {x.get(fd,dimensions);} 
 static void multiput(unsigned int n,int fd, const C *x, bool dimensions=1) {for(unsigned int i=0; i<n; ++i) x[i].put(fd,dimensions);} \
--- a/mat.cc
+++ b/mat.cc
@@ -15,6 +15,7 @@ extern ssize_t write(int, const void *, size_t);
 template NRMat<double>;
 template NRMat< complex<double> >;
 template NRMat<int>;
 template NRMat<short>;
 template NRMat<char>;
@@ -798,6 +799,7 @@ template istream & operator>>(istream  &s, NRMat< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 INSTANTIZE(int)
 INSTANTIZE(short)
 INSTANTIZE(char)
--- a/mat.h
+++ b/mat.h
@@ -29,9 +29,9 @@ public:
 #endif
 	~NRMat();
 #ifdef MATPTR
-	const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return(memcmp(v[0],rhs.v[0],nn*mm*sizeof(T)));}
+	const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v[0],rhs.v[0],nn*mm);} //memcmp for scalars else elementwise
 #else
-        const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return(memcmp(v,rhs.v,nn*mm*sizeof(T)));}
+        const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn*mm);} //memcmp for scalars else elementwise
 #endif
 	const bool operator==(const NRMat &rhs) const {return !(*this != rhs);};
 	inline int getcount() const {return count?*count:0;}
@@ -101,6 +101,7 @@ public:
 	explicit NRMat(const SparseMat<T> &rhs);                // dense from sparse
 	NRMat & operator+=(const SparseMat<T> &rhs);
        NRMat & operator-=(const SparseMat<T> &rhs);
 	void gemm(const T &beta, const SparseMat<T> &a, const char transa, const NRMat &b, const char transb, const T &alpha);//this = alpha*op( A )*op( B ) + beta*this
        inline void simplify() {}; //just for compatibility with sparse ones
 	bool issymmetric() const {return 0;};
--- a/noncblas.h
+++ b/noncblas.h
@@ -595,4 +595,64 @@ int cblas_errprn(int ierr, int info, char *form, ...);
 #endif  /* end #ifdef CBLAS_ENUM_ONLY */
 #endif
 /*
 *             Automatically Tuned Linear Algebra Software v3.4.1
 *                    (C) Copyright 1997 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */
 #ifndef ATLAS_ENUM_H
   #define ATLAS_ENUM_H
   #define ATLAS_ORDER CBLAS_ORDER
      #define AtlasRowMajor CblasRowMajor
      #define AtlasColMajor CblasColMajor
   #define ATLAS_TRANS CBLAS_TRANSPOSE
      #define AtlasNoTrans CblasNoTrans
      #define AtlasTrans CblasTrans
      #define AtlasConjTrans CblasConjTrans
   #define ATLAS_UPLO CBLAS_UPLO
      #define AtlasUpper CblasUpper
      #define AtlasLower CblasLower
   #define ATLAS_DIAG CBLAS_DIAG
      #define AtlasNonUnit CblasNonUnit
      #define AtlasUnit CblasUnit
   #define ATLAS_SIDE CBLAS_SIDE
      #define AtlasLeft  CblasLeft
      #define AtlasRight CblasRight
 #endif
 /*from clapack.h*/
 int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS,
                  double *A, const int lda, int *ipiv,
                  double *B, const int ldb); 
 #endif
--- a/nonclass.cc
+++ b/nonclass.cc
@@ -26,6 +26,7 @@ template void lawritemat(FILE *file,const T *a,int r,int c,const char *form0, \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 INSTANTIZE(int)
 INSTANTIZE(short)
 INSTANTIZE(char)
 template <typename T>
--- a/nonclass.h
+++ b/nonclass.h
@@ -122,12 +122,30 @@ return det;
 //general submatrix, INDEX will typically be NRVec<int> or even int*
 //NOTE: in order to check consistency between nrows and rows in rows is a NRVec
 //some advanced metaprogramming would be necessary
 //application: e.g. ignoresign=true, equalsigns=true, indexshift= -1 ... elements of Slater overlaps for RHF
 template<class MAT, class INDEX>
-const NRMat<typename LA_traits<MAT>::elementtype> submatrix(const MAT a, const int nrows, const INDEX rows, const int ncols, const INDEX cols, int indexshift=0, bool ignoresign=false)
+const NRMat<typename LA_traits<MAT>::elementtype> submatrix(const MAT a, const int nrows, const INDEX rows, const int ncols, const INDEX cols, int indexshift=0, bool ignoresign=false, bool equalsigns=false)
 {
 NRMat<typename LA_traits<MAT>::elementtype> r(nrows,ncols);
 if(equalsigns) //make the element zero if signs of both indices are opposite
 {
 if(ignoresign)
 {
 for(int i=0; i<nrows; ++i)
        for(int j=0; j<ncols; ++j)
                r(i,j) = rows[i]*cols[j]<0?0.:a(abs(rows[i])+indexshift,abs(cols[j])+indexshift);
 }
 else
 {
 for(int i=0; i<nrows; ++i)
        for(int j=0; j<ncols; ++j)
                r(i,j) = rows[i]*cols[j]<0?0.:a(rows[i]+indexshift,cols[j]+indexshift);
 }
 }
 else
 {
 if(ignoresign)
 {
 for(int i=0; i<nrows; ++i)
@@ -140,6 +158,7 @@ for(int i=0; i<nrows; ++i)
 	for(int j=0; j<ncols; ++j)
 		r(i,j) = a(rows[i]+indexshift,cols[j]+indexshift);
 }
 }
 return r;
 }
--- a/qsort.h
+++ b/qsort.h
@@ -0,0 +1,41 @@
 //general quicksort suitable if we do not want to create extra class for the member type but prefer to encapsulate it into compare and swap soutines
 //returns parity of the permutation
 template<typename INDEX, typename COMPAR>
 int genqsort(INDEX l, INDEX r,COMPAR (*cmp)(const INDEX, const INDEX), void (*swap)(const INDEX,const INDEX)) 
 {
 INDEX i,j,piv;
 int parity=0;
 if(r<=l) return parity; //1 element
 if(cmp(r,l)<0) {parity^=1; swap(l,r);}
 if(r-l==1) return parity; //2 elements and preparation for median
 piv= (l+r)/2; //pivoting by median of 3 - safer 
 if(cmp(piv,l)<0) {parity^=1; swap(l,piv);} //and change the pivot element implicitly
 if(cmp(r,piv)<0) {parity^=1; swap(r,piv);} //and change the pivot element implicitly
 if(r-l==2) return parity; //in the case of 3 elements we are finished too
 //general case , l-th r-th already processed
 i=l+1; j=r-1;
 do{
  //important sharp inequality - stops at sentinel element for efficiency
  // this is inefficient if all keys are equal - unnecessary n log n swaps are done, but we assume that it is atypical input
  while(cmp(i++,piv)<0);
  i--;
  while(cmp(j--,piv)>0);
  j++;
  if(i<j)
 	{
 	// swap and keep track of position of pivoting element 
 	parity^=1; swap(i,j); 
 	if(i==piv) piv=j; else if(j==piv) piv=i;
 	}
  if(i<=j) {i++; j--;}
  }while(i<=j);
 if(j-l < r-i)   //because of the stack in bad case process first the shorter subarray
 	{if(l<j) parity ^=genqsort(l,j,cmp,swap); if(i<r) parity ^=genqsort(i,r,cmp,swap);}
 else
 	{if(i<r) parity ^=genqsort(i,r,cmp,swap); if(l<j) parity ^=genqsort(l,j,cmp,swap);}
 return parity;
 }
--- a/smat.cc
+++ b/smat.cc
@@ -16,6 +16,7 @@ extern ssize_t write(int, const void *, size_t);
 template NRSMat<double>;
 template NRSMat< complex<double> >;
 template NRSMat<int>;
 template NRSMat<short>;
 template NRSMat<char>;
@@ -338,5 +339,6 @@ template istream & operator>>(istream  &s, NRSMat< T > &x); \
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
 INSTANTIZE(int)
 INSTANTIZE(short)
 INSTANTIZE(char)
--- a/smat.h
+++ b/smat.h
@@ -24,7 +24,7 @@ public:
 	NRSMat & operator|=(const NRSMat &rhs);	//assignment to a new copy
 	NRSMat & operator=(const NRSMat &rhs);	//assignment
 	NRSMat & operator=(const T &a);		//assign a to diagonal
-        const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return(memcmp(v,rhs.v,NN2*sizeof(T)));}
+        const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,NN2);} //memcmp for scalars else elementwise
        const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);};
 	inline NRSMat & operator*=(const T &a);
 	inline NRSMat & operator+=(const T &a); 
--- a/sparsemat.h
+++ b/sparsemat.h
@@ -85,7 +85,8 @@ public:
        inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;}
        inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general
        inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general
-	inline const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
+	inline const NRVec<T> operator*(const NRVec<T> &rhs) const; // SparseMat * Vec
 	inline const NRMat<T> operator*(const NRMat<T> &rhs) const; // SparseMat * Mat
 	void diagonalof(NRVec<T> &, const bool divide=0) const; //get diagonal
 	const SparseMat operator*(const SparseMat &rhs) const; 
 	SparseMat & oplusequal(const SparseMat &rhs); //direct sum
@@ -135,6 +136,10 @@ template <typename T>
 inline const NRVec<T> SparseMat<T>::operator*(const NRVec<T> &rhs) const
 {NRVec<T> result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
 template <typename T>
 inline const NRMat<T> SparseMat<T>::operator*(const NRMat<T> &rhs) const
 {NRMat<T> result(nn,rhs.ncols()); result.gemm((T)0,*this,'n',rhs,'n',(T)1); return result;};
 template <typename T>
 	extern istream& operator>>(istream  &s, SparseMat<T> &x);
--- a/t.cc
+++ b/t.cc
@@ -194,7 +194,8 @@ cout <<" big1*big2 "<<big3[0][0]<<" time "<<clock()/((double) (CLOCKS_PER_SEC))-
 */
-/*
+if(0)
 {
 NRMat<double> atest, btest,ctest;
 {
 int cc,c1,c2,c3;
@@ -211,7 +212,7 @@ cout << dtest;
 NRMat<double> etest(atest.nrows(),btest.ncols());
 etest.strassen(0., atest, 't', btest, 'n', 1.);
 cout << etest;
-*/
+}
 if(0)
 {
@@ -511,6 +512,53 @@ cin>>n;
 	cout <<"difference = "<<(res1-res2).norm()<<endl;
 }
 if(1)
 {
 int n,k,m;
 cin >>n>>k>>m;
 {
 NRMat<double> a(n,k),b(k,m),c(n,m),d(n,m);
 for(int i=0;i<n;++i) for(int j=0;j<k;++j) a(i,j)= random()/(1.+RAND_MAX);
 for(int i=0;i<k;++i) for(int j=0;j<m;++j) b(i,j)= random()/(1.+RAND_MAX);
 c.gemm(0., a, 'n', b, 'n', .6);
 SparseMat<double> aa(a);
 d.gemm(0., aa, 'n', b, 'n', .6);
 cout<<c<<d;
 cout  <<"test error = "<<(c-d).norm()<<endl;
 }
 {
 NRMat<double> a(k,n),b(k,m),c(n,m),d(n,m);
 for(int i=0;i<k;++i) for(int j=0;j<n;++j) a(i,j)= random()/(1.+RAND_MAX);
 for(int i=0;i<k;++i) for(int j=0;j<m;++j) b(i,j)= random()/(1.+RAND_MAX);
 c.gemm(0., a, 't', b, 'n', .7);
 SparseMat<double> aa(a);
 d.gemm(0., aa, 't', b, 'n', .7);
 cout<<c<<d;
 cout  <<"test error = "<<(c-d).norm()<<endl;
 }
 {
 NRMat<double> a(n,k),b(m,k),c(n,m),d(n,m);
 for(int i=0;i<n;++i) for(int j=0;j<k;++j) a(i,j)= random()/(1.+RAND_MAX);
 for(int i=0;i<m;++i) for(int j=0;j<k;++j) b(i,j)= random()/(1.+RAND_MAX);
 c.gemm(0., a, 'n', b, 't', .8);
 SparseMat<double> aa(a);
 d.gemm(0., aa, 'n', b, 't', .8);
 cout<<c<<d;
 cout  <<"test error = "<<(c-d).norm()<<endl;
 }
 {
 NRMat<double> a(k,n),b(m,k),c(n,m),d(n,m);
 for(int i=0;i<k;++i) for(int j=0;j<n;++j) a(i,j)= random()/(1.+RAND_MAX);
 for(int i=0;i<m;++i) for(int j=0;j<k;++j) b(i,j)= random()/(1.+RAND_MAX);
 c.gemm(0., a, 't', b, 't', .9);
 SparseMat<double> aa(a);
 d.gemm(0., aa, 't', b, 't', .9);
 cout<<c<<d;
 cout  <<"test error = "<<(c-d).norm()<<endl;
 }
 }
 if(0)
 {
@@ -767,19 +815,21 @@ cout <<b;
 if(0)
 {
 int n;
 double d;
 cin >>n;
 //NRMat<double> a(n,n);
 NRSMat<double> a(n);
 for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
        {
-        a(j,i)=a(i,j)=random()/(1.+RAND_MAX);
+        a(j,i)=a(i,j)=random()/(1.+RAND_MAX)*(i==j?10.:1.);
        }
 cout <<a;
 NRMat<double> y(1,n);
 for(int i=0;i<n;++i) y(0,i)=random()/(1.+RAND_MAX);
 cout <<y;
-linear_solve(a,&y);
+linear_solve(a,&y,&d);
 cout << y;
 cout <<"det is "<<d<<endl;
 }
 if(0)
@@ -911,7 +961,7 @@ cout <<r;
 #undef sparsity
 #define sparsity (n*2)
 #define sparsity2 (n/5)
-if(1)
+if(0)
 {
 int n,m;
 cin >>n>>m;
--- a/vec.cc
+++ b/vec.cc
@@ -141,6 +141,35 @@ const NRVec<T> NRVec<T>::operator-() const
 	return result;
 }
 //comparison operators (for lexical order)
 template <typename T>
 const bool NRVec<T>::operator>(const NRVec &rhs) const
 {
 int n=nn; if(rhs.nn<n) n=rhs.nn;
 for(int i=0; i<n;++i)
 	{
 	if(LA_traits<T>::bigger(v[i],rhs.v[i])) return true;
 	if(LA_traits<T>::smaller(v[i],rhs.v[i])) return false;
 	}
 return nn>rhs.nn;
 }
 template <typename T>
 const bool NRVec<T>::operator<(const NRVec &rhs) const
 {
 int n=nn; if(rhs.nn<n) n=rhs.nn;
 for(int i=0; i<n;++i)
        {
        if(LA_traits<T>::smaller(v[i],rhs.v[i])) return true;
        if(LA_traits<T>::bigger(v[i],rhs.v[i])) return false;
        }
 return nn<rhs.nn;
 }
 // axpy call for T = double (not strided)
 void NRVec<double>::axpy(const double alpha, const NRVec<double> &x)
 {
--- a/vec.h
+++ b/vec.h
@@ -51,8 +51,12 @@ public:
 	NRVec & operator=(const NRVec &rhs);
 	NRVec & operator=(const T &a);  //assign a to every element
 	NRVec & operator|=(const NRVec &rhs);
-	const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return(memcmp(v,rhs.v,nn*sizeof(T)));}
+	const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn);} //memcmp for scalars else elementwise
 	const bool operator==(const NRVec &rhs) const {return !(*this != rhs);};
 	const bool operator>(const NRVec &rhs) const;
 	const bool operator<(const NRVec &rhs) const;
 	const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);};
 	const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);};
 	const NRVec operator-() const;
 	inline NRVec & operator+=(const NRVec &rhs);
 	inline NRVec & operator-=(const NRVec &rhs);