diff --git a/auxstorage.h b/auxstorage.h
index b12eae6..80a642c 100644
--- a/auxstorage.h
+++ b/auxstorage.h
@@ -1,6 +1,5 @@
 #ifndef _AUXSTORAGE_H_
 #define _AUXSTORAGE_H_
-#include "laerror.h"
 #include "vec.h"
 #include "mat.h"
 #include "smat.h"
@@ -68,6 +67,7 @@ void AuxStorage<T>::get(NRVec<T> &x, const int pos) const
 {
 if(recl==0) laerror("get from an empty file in AuxStorage");
 if((off64_t)-1 == lseek64(fd,pos*((off64_t)recl),SEEK_SET)) {perror(""); laerror("seek failed in AuxStorage");}
+x.copyonwrite();
 if((ssize_t)recl!=read(fd,&x[0],recl)) {perror(""); laerror("read failed in AuxStorage");}
 }
 
@@ -86,6 +86,7 @@ void AuxStorage<T>::get(NRMat<T> &x, const int pos) const
 {
 if(recl==0) laerror("get from an empty file in AuxStorage");
 if((off64_t)-1 == lseek64(fd,pos*((off64_t)recl),SEEK_SET)) {perror(""); laerror("seek failed in AuxStorage");}
+x.copyonwrite();
 if((ssize_t)recl!=read(fd,&x(0,0),recl)) {perror(""); laerror("read failed in AuxStorage");}
 }
 
@@ -104,6 +105,7 @@ void AuxStorage<T>::get(NRSMat<T> &x, const int pos) const
 {
 if(recl==0) laerror("get from an empty file in AuxStorage");
 if((off64_t)-1 == lseek64(fd,pos*((off64_t)recl),SEEK_SET)) {perror(""); laerror("seek failed in AuxStorage");}
+x.copyonwrite();
 if((ssize_t)recl!=read(fd,&x(0,0),recl)) {perror(""); laerror("read failed in AuxStorage");}
 }
 
diff --git a/diis.h b/diis.h
index 6feb1bf..d6f09b8 100644
--- a/diis.h
+++ b/diis.h
@@ -1,4 +1,4 @@
-//DIIS convergence acceleration
+//DIIS convergence acceleration according to Pulay: Chem. Phys. Lett. 73, 393 (1980); J. Comp. Chem. 3,556 (1982)
 #ifndef _DIIS_H_
 #define _DIIS_H_
 #include "vec.h"
diff --git a/la.h b/la.h
index d787b11..e0aab9a 100644
--- a/la.h
+++ b/la.h
@@ -1,9 +1,7 @@
 #ifndef _LA_H_
 #define _LA_H_
 
-#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
-# define export 
-#endif
+//this should be the single include file for the end user
 
 #include "vec.h"
 #include "smat.h"
diff --git a/la_traits.h b/la_traits.h
index ef5c710..7290515 100644
--- a/la_traits.h
+++ b/la_traits.h
@@ -1,9 +1,26 @@
 ////////////////////////////////////////////////////////////////////////////
-//LA traits classes
+//LA traits classes and generally needed includes
 
 #ifndef _LA_TRAITS_INCL
 #define _LA_TRAITS_INCL
 
+
+
+using namespace std;
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <complex>
+#include "laerror.h"
+
+extern "C" {
+#include "cblas.h"
+}
+
+#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
+# define export 
+#endif
+
 //forward declarations
 template<typename C> class NRVec;
 template<typename C> class NRMat;
diff --git a/mat.cc b/mat.cc
index 2269d06..a18f09c 100644
--- a/mat.cc
+++ b/mat.cc
@@ -187,13 +187,14 @@ const NRVec<T> NRMat<T>::rsum() const
 
 // transpose Mat
 template <typename T>
-NRMat<T> & NRMat<T>::transposeme()
+NRMat<T> & NRMat<T>::transposeme(int n)
 {
+if(n==0) n=nn;
 #ifdef DEBUG
-	if (nn != mm) laerror("transpose of non-square Mat");
+	if (n==nn && nn != mm || n>mm || n>nn) laerror("transpose of non-square Mat");
 #endif
 	copyonwrite();
-	for(int i=1; i<nn; i++)
+	for(int i=1; i<n; i++)
 		for(int j=0; j<i; j++) {
 #ifdef MATPTR
 			T tmp = v[i][j]; 
@@ -237,26 +238,6 @@ void NRMat<T>::fscanf(FILE *f, const char *format)
 
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
 /*
  * BLAS specializations for double and complex<double>
@@ -602,29 +583,6 @@ NRMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
 	return result;
 }
 
-// Mat * Vec
-const NRVec<double> 
-NRMat<double>::operator*(const NRVec<double> &vec) const
-{
-#ifdef DEBUG
-	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
-#endif
-	NRVec<double> result(nn);
-	cblas_dgemv(CblasRowMajor, CblasNoTrans, nn, mm, 1.0, (*this)[0],
-			mm, &vec[0], 1, 0.0, &result[0], 1);
-	return result;
-}
-const NRVec< complex<double> >
-NRMat< complex<double> >::operator*(const NRVec< complex<double> > &vec) const
-{
-#ifdef DEBUG
-	if(mm != vec.size()) laerror("incompatible sizes in Mat*Vec");
-#endif
-	NRVec< complex<double> > result(nn);
-	cblas_zgemv(CblasRowMajor, CblasNoTrans, nn, mm, (void *)&CONE, (void *)(*this)[0],
-			mm, (void *)&vec[0], 1, (void *)&CZERO, (void *)&result[0], 1);
-	return result;
-}
 
 // sum of rows
 const NRVec<double> NRMat<double>::rsum() const
diff --git a/mat.h b/mat.h
index c2e2275..9e587ea 100644
--- a/mat.h
+++ b/mat.h
@@ -1,8 +1,6 @@
 #ifndef _LA_MAT_H_
 #define _LA_MAT_H_
 
-#include "vec.h"
-#include "smat.h"
 #include "la_traits.h"
 
 template <typename T>
@@ -55,7 +53,7 @@ public:
 	inline const NRMat operator-(const NRMat &rhs) const;
 	inline const NRMat operator+(const NRSMat<T> &rhs) const;
 	inline const NRMat operator-(const NRSMat<T> &rhs) const;
-	const T dot(const NRMat &rhs) const; // scalar product of Mat.Mat
+	const T dot(const NRMat &rhs) const; // scalar product of Mat.Mat//@@@for complex do conjugate
 	const NRMat operator*(const NRMat &rhs) const; // Mat * Mat
 	const NRMat oplus(const NRMat &rhs) const; //direct sum
 	const NRMat otimes(const NRMat &rhs) const; //direct product
@@ -64,7 +62,7 @@ public:
 	const NRMat operator*(const NRSMat<T> &rhs) const; // Mat * Smat
 	const NRMat operator&(const NRMat &rhs) const; // direct sum
 	const NRMat operator|(const NRMat<T> &rhs) const; // direct product
-	const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
+	const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;}; // Mat * Vec
 	const NRVec<T> rsum() const; //sum of rows
 	const NRVec<T> csum() const; //sum of columns
 	void diagonalof(NRVec<T> &, const bool divide=0) const; //get diagonal 
@@ -74,13 +72,14 @@ public:
 	inline const T& operator()(const int i, const int j) const;
 	inline int nrows() const;
 	inline int ncols() const;
+	inline int size() const;
         void get(int fd, bool dimensions=1);
         void put(int fd, bool dimensions=1) const;
 	void copyonwrite();
 	void resize(const int n, const int m);
 	inline operator T*(); //get a pointer to the data
 	inline operator const T*() const;
-	NRMat & transposeme(); // square matrices only 
+	NRMat & transposeme(int n=0); // square matrices only 
 	NRMat & conjugateme(); // square matrices only
 	const NRMat transpose(bool conj=false) const;
 	const NRMat conjugate() const;
@@ -103,6 +102,7 @@ public:
 	NRMat & operator+=(const SparseMat<T> &rhs);
         NRMat & operator-=(const SparseMat<T> &rhs);
         inline void simplify() {}; //just for compatibility with sparse ones
+	bool issymmetric() const {return 0;};
 
 //Strassen's multiplication (better than n^3, analogous syntax to gemm)
 	void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);//this := alpha*op( A )*op( B ) + beta*this
@@ -110,6 +110,11 @@ public:
 
 };
 
+//due to mutual includes this has to be after full class declaration
+#include "vec.h"
+#include "smat.h"
+#include "sparsemat.h"
+
 // ctors
 template <typename T>
 NRMat<T>::NRMat(const int n, const int m) : nn(n), mm(m), count(new int)
@@ -294,6 +299,12 @@ inline int NRMat<T>::ncols() const
 	return mm;
 }
 
+template <typename T>
+inline int NRMat<T>::size() const
+{
+	return nn*mm;
+}
+
 // reference pointer to Mat
 template <typename T>
 inline NRMat<T>::operator T* ()
diff --git a/nonclass.cc b/nonclass.cc
index 41011d4..7829e14 100644
--- a/nonclass.cc
+++ b/nonclass.cc
@@ -2,7 +2,11 @@ extern "C" {
 #include "atlas_enum.h"
 #include "clapack.h"
 }
-#include "la.h"
+#include "vec.h"
+#include "smat.h"
+#include "mat.h"
+#include "nonclass.h"
+
 
 #ifdef FORTRAN_
 #define FORNAME(x) x##_
diff --git a/nonclass.h b/nonclass.h
index c2b69a0..f1efb40 100644
--- a/nonclass.h
+++ b/nonclass.h
@@ -74,11 +74,6 @@ extern void gdiagonalize(NRMat<double> &a, NRVec< complex<double> > &w,
 extern NRMat<double> matrixfunction(NRSMat<double> a, double (*f) (double));
 extern NRMat<double> matrixfunction(NRMat<double> a, complex<double> (*f)(const complex<double> &),const bool adjust=0);
 
-
-//////////////////////////////
-//other than lapack functions/
-//////////////////////////////
-
 //functions on matrices
 inline NRMat<double>  sqrt(const NRSMat<double> &a) { return matrixfunction(a,&sqrt); }
 inline NRMat<double>  log(const NRSMat<double> &a) { return matrixfunction(a,&log); }
diff --git a/smat.cc b/smat.cc
index f165426..dc02d40 100644
--- a/smat.cc
+++ b/smat.cc
@@ -248,27 +248,6 @@ NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
 	return dot;
 }
 
-// x = S * x
-const NRVec<double> NRSMat<double>::operator*(const NRVec<double> &rhs) const
-{
-#ifdef DEBUG
-	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
-#endif
-	NRVec<double> result(nn);
-	cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs, 1, 0.0, result, 1);
-	return result;
-}
-const NRVec< complex<double> >
-NRSMat< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
-{
-#ifdef DEBUG
-	if (nn!=rhs.size()) laerror("incompatible dimension in Smat*Vec");
-#endif
-	NRVec< complex<double> > result(nn);
-	cblas_zhpmv(CblasRowMajor, CblasLower, nn, (void *)(&CONE), (void *)v, 
-			(const void *)rhs, 1, (void *)(&CZERO), (void *)result, 1);
-	return result;
-}
 
 // norm of the matrix
 const double  NRSMat<double>::norm(const double scalar) const
@@ -347,18 +326,6 @@ istream& operator>>(istream  &s, NRSMat<T> &x)
                 return s;
                 }
 
-//not implemented yet
-const NRVec<int> NRSMat<int>::operator*(NRVec<int> const&rhs) const
-{
-laerror("NRSMat<int>::operator*(NRVec<int> const&) not implemented yet");
-return rhs;
-}
-
-const NRVec<char> NRSMat<char>::operator*(NRVec<char> const&rhs) const
-{
-laerror("NRSMat<char>::operator*(NRVec<char> const&) not implemented yet");
-return rhs;
-}
 
 
 
diff --git a/smat.h b/smat.h
index 1f38d91..913959d 100644
--- a/smat.h
+++ b/smat.h
@@ -1,8 +1,6 @@
 #ifndef _LA_SMAT_H_
 #define _LA_SMAT_H_
 
-#include "vec.h"
-#include "mat.h"
 #include "la_traits.h"
 
 #define NN2 (nn*(nn+1)/2)
@@ -44,8 +42,8 @@ public:
 	inline const NRMat<T> operator-(const NRMat<T> &rhs) const; 
 	const NRMat<T> operator*(const NRSMat &rhs) const; // SMat*SMat
 	const NRMat<T> operator*(const NRMat<T> &rhs) const; // SMat*Mat 
-	const T dot(const NRSMat &rhs) const; // Smat.Smat
-	const NRVec<T> operator*(const NRVec<T> &rhs) const; 
+	const T dot(const NRSMat &rhs) const; // Smat.Smat//@@@for complex do conjugate
+	const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;}; // Mat * Vec
 	void diagonalof(NRVec<T> &, const bool divide=0) const; //get diagonal
 	inline const T& operator[](const int ij) const;
 	inline T& operator[](const int ij);
@@ -53,6 +51,7 @@ public:
 	inline T& operator()(const int i, const int j);
 	inline int nrows() const;
 	inline int ncols() const;
+	inline int size() const;
 	const double norm(const T scalar=(T)0) const;
 	void axpy(const T alpha, const NRSMat &x); // this+= a*x
 	inline const T amax() const;
@@ -69,9 +68,15 @@ public:
 //members concerning sparse matrix
 	explicit NRSMat(const SparseMat<T> &rhs);               // dense from sparse
 	inline void simplify() {}; //just for compatibility with sparse ones
+	bool issymmetric() const {return 1;}
 };
 
-// INLINES
+//due to mutual includes this has to be after full class declaration
+#include "vec.h"
+#include "mat.h"
+#include "sparsemat.h"
+
+
 // ctors
 template <typename T>
 inline NRSMat<T>::NRSMat(const int n) : nn(n), v(new T[NN2]),
@@ -293,6 +298,13 @@ inline int NRSMat<T>::ncols() const
 	return nn;
 }
 
+template <typename T>
+inline int NRSMat<T>::size() const
+{
+        return NN2;
+}
+
+
 // max value
 inline const double NRSMat<double>::amax() const
 {
diff --git a/sparsemat.cc b/sparsemat.cc
index f4487cf..787e22f 100644
--- a/sparsemat.cc
+++ b/sparsemat.cc
@@ -1,7 +1,5 @@
 #include <string>
 #include <cmath>
-#include <complex>
-#include <iostream>
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -14,10 +12,6 @@ template SparseMat<double>;
 template SparseMat<complex<double> >;
 
 
-#ifdef _GLIBCPP_NO_TEMPLATE_EXPORT
-# define export
-#endif
-
 
 export template <class T>
 ostream& operator<<(ostream &s, const SparseMat<T> &x)
@@ -403,7 +397,7 @@ export template <class T>
 void SparseMat<T>::incsize(const SPMatindex n, const SPMatindex m)
 {
 	if(symmetric && n!=m) laerror("unsymmetric size increment of a symmetric sparsemat");
-	if(!count && nn==0 && mm==0) count=new int(1);
+	if(!count ) count=new int(1);
 	copyonwrite();//this errors if !count 
 	unsort();
         nn+=n;
@@ -883,34 +877,8 @@ else
 }
 
 
-//multiplication with dense vector from both sides
-template <class T>
-const NRVec<T> SparseMat<T>::multiplyvector(const NRVec<T> &vec, const bool transp) const
-{
-if(transp && nn!=(SPMatindex)vec.size() || !transp && mm!=(SPMatindex)vec.size()) laerror("incompatible sizes in sparsemat*vector");
-NRVec<T> result(transp?mm:nn);
-result.gemv((T)0, *this, transp?'t':'n', (T)1., vec);
-return result;
-}
 
 
-template <class T>
-const NRVec<T> NRVec<T>::operator*(const SparseMat<T> &mat) const
-{
-if(mat.nrows()!= (SPMatindex)size()) laerror("incompatible sizes in vector*sparsemat");
-NRVec<T> result((T)0,mat.ncols());
-matel<T> *l=mat.getlist();
-bool symmetric=mat.issymmetric();
-while(l)
-        {
-        result.v[l->col]+= l->elem*v[l->row];
-	if(symmetric&&l->row!=l->col) result.v[l->row]+= l->elem*v[l->col];
-        l=l->next;
-        }
-return result;
-
-}
-
 template<class T>
 const T SparseMat<T>::trace() const
 {
@@ -1249,7 +1217,6 @@ template SparseMat<T> & SparseMat<T>::operator-=(const T a); \
 template NRMat<T>::NRMat(const SparseMat<T> &rhs); \
 template NRSMat<T>::NRSMat(const SparseMat<T> &rhs); \
 template NRVec<T>::NRVec(const SparseMat<T> &rhs); \
-template const NRVec<T> SparseMat<T>::operator*(const NRVec<T> &vec) const; \
 template const NRVec<T> NRVec<T>::operator*(const SparseMat<T> &mat) const; \
 template SparseMat<T> & SparseMat<T>::join(SparseMat<T> &rhs); \
 template const T SparseMat<T>::trace() const; \
@@ -1263,7 +1230,7 @@ template void NRVec<T>::gemv(const T beta, const SparseMat<T> &a, const char tra
 
 INSTANTIZE(double)
 
+INSTANTIZE(complex<double>) //some functions are not OK for hermitean matrices, needs a revision!!!
 
-// some functions are not OK for hermitean! INSTANTIZE(complex<double>)
 
 #endif
diff --git a/sparsemat.h b/sparsemat.h
index 8d3a10f..7e033d8 100644
--- a/sparsemat.h
+++ b/sparsemat.h
@@ -1,14 +1,13 @@
 #ifndef _SPARSEMAT_H_
 #define _SPARSEMAT_H_
 
-//for vectors and dense matrices we shall need
-#include "la.h"
+#include "la_traits.h"
 
-template<class T>
+template<typename T>
 inline const T MAX(const T &a, const T &b)
         {return b > a ? (b) : (a);}
 
-template<class T>
+template<typename T>
 inline void SWAP(T &a, T &b)
         {T dum=a; a=b; b=dum;}
 
@@ -21,7 +20,7 @@ typedef unsigned int SPMatindex;
 typedef int SPMatindexdiff; //more clear would be to use traits
 
 //element of a linked list
-template<class T>
+template<typename T>
 struct matel
         {
         T elem;
@@ -31,7 +30,7 @@ struct matel
         };
 
 
-template <class T>
+template <typename T>
 class SparseMat {
 protected:
 	SPMatindex nn;
@@ -86,8 +85,7 @@ public:
         inline const SparseMat operator*(const T &rhs) const {return SparseMat(*this) *= rhs;}
         inline const SparseMat operator+(const SparseMat &rhs) const {return SparseMat(*this) += rhs;} //must not be symmetric+general
         inline const SparseMat operator-(const SparseMat &rhs) const {return SparseMat(*this) -= rhs;} //must not be symmetric+general
-	const NRVec<T> multiplyvector(const NRVec<T> &rhs, const bool transp=0) const; //sparse matrix * dense vector optionally transposed
-	inline const NRVec<T> operator*(const NRVec<T> &rhs) const {return multiplyvector(rhs);} //sparse matrix * dense vector
+	inline const NRVec<T> operator*(const NRVec<T> &rhs) const; // Mat * Vec
 	void diagonalof(NRVec<T> &, const bool divide=0) const; //get diagonal
 	const SparseMat operator*(const SparseMat &rhs) const; 
 	SparseMat & oplusequal(const SparseMat &rhs); //direct sum
@@ -128,14 +126,23 @@ public:
 	void addsafe(const SPMatindex n, const SPMatindex m, const T elem);
 };
 
-template <class T>
+//due to mutual includes this has to be after full class declaration
+#include "vec.h"
+#include "smat.h"
+#include "mat.h"
+
+template <typename T>
+inline const NRVec<T> SparseMat<T>::operator*(const NRVec<T> &rhs) const
+{NRVec<T> result(nn); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
+
+template <typename T>
 	extern istream& operator>>(istream  &s, SparseMat<T> &x);
 
-template <class T>
+template <typename T>
 	extern ostream& operator<<(ostream &s, const SparseMat<T> &x);
 
 //destructor
-template <class T>
+template <typename T>
 SparseMat<T>::~SparseMat()
 {
 	unsort();
@@ -148,7 +155,7 @@ SparseMat<T>::~SparseMat()
 }
 
 //copy constructor (sort arrays are not going to be copied)
-template <class T>
+template <typename T>
 SparseMat<T>::SparseMat(const SparseMat<T> &rhs)
 {
 #ifdef debug
@@ -164,7 +171,7 @@ if(! &rhs) laerror("SparseMat copy constructor with NULL argument");
 	nonzero=0;
 }
 
-template <class T>
+template <typename T>
 const SparseMat<T> SparseMat<T>::transpose() const
 {
 if(list&&!count) laerror("some inconsistency in SparseMat transpose");
@@ -195,7 +202,7 @@ return result;
 
 
 
-template<class T>
+template<typename T>
 inline const SparseMat<T> commutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
 {
 SparseMat<T> r;
@@ -204,7 +211,7 @@ r.gemm((T)1,y,tryy?'t':'n',x,trx?'t':'n',(T)-1); //saves a temporary and simplif
 return r;
 }
 
-template<class T>
+template<typename T>
 inline const SparseMat<T> anticommutator ( const SparseMat<T> &x, const SparseMat<T> &y, const bool trx=0, const bool tryy=0)
 {
 SparseMat<T> r;
@@ -215,7 +222,7 @@ return r;
 
 
 //add sparse to dense
-template<class T>
+template<typename T>
 NRMat<T> & NRMat<T>::operator+=(const SparseMat<T> &rhs)
 {
 if((unsigned int)nn!=rhs.nrows()||(unsigned int)mm!=rhs.ncols()) laerror("incompatible matrices in +=");
diff --git a/strassen.cc b/strassen.cc
index 5179206..caa6044 100644
--- a/strassen.cc
+++ b/strassen.cc
@@ -1,4 +1,6 @@
-#include "la.h"
+
+#include "mat.h"
+
 /*Strassen algorithm*/
 // called routine is fortran-compatible
 extern "C" void fmm(const char c_transa,const char c_transb,const int m,const int n,const int k,const double alpha,
diff --git a/vec.cc b/vec.cc
index e5d187b..5fb81b1 100644
--- a/vec.cc
+++ b/vec.cc
@@ -15,6 +15,10 @@ extern ssize_t write(int, const void *, size_t);
 #define INSTANTIZE(T) \
 template ostream & operator<<(ostream &s, const NRVec< T > &x); \
 template istream & operator>>(istream  &s, NRVec< T > &x); \
+template void NRVec<T>::put(int fd, bool dim) const; \
+template void NRVec<T>::get(int fd, bool dim); \
+
+
 
 INSTANTIZE(double)
 INSTANTIZE(complex<double>)
@@ -26,8 +30,10 @@ INSTANTIZE(char)
 INSTANTIZE(unsigned char)
 template NRVec<double>;
 template NRVec<complex<double> >;
-template NRVec<int>;
 template NRVec<char>;
+template NRVec<int>;
+
+
 
 
 /*
@@ -228,13 +234,55 @@ NRVec< complex<double> > & NRVec< complex<double> >::normalize()
 	return *this;
 }
 
-//and for these types it does not make sense to normalize but we have them for linkage
+//stubs for linkage
 NRVec<int> &  NRVec<int>::normalize() {laerror("normalize() impossible for integer types"); return *this;}
 NRVec<char> &  NRVec<char>::normalize() {laerror("normalize() impossible for integer types"); return *this;}
+void NRVec<int>::gemv(const int beta,
+                const NRSMat<int> &A, const char trans,
+                const int alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+void NRVec<char>::gemv(const char beta,
+                const NRSMat<char> &A, const char trans,
+                const char alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+void NRVec<int>::gemv(const int beta,
+                const NRMat<int> &A, const char trans,
+                const int alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+void NRVec<char>::gemv(const char beta,
+                const NRMat<char> &A, const char trans,
+                const char alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+void NRVec<int>::gemv(const int beta,
+                const SparseMat<int> &A, const char trans,
+                const int alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
+
+void NRVec<char>::gemv(const char beta,
+                const SparseMat<char> &A, const char trans,
+                const char alpha, const NRVec &x)
+{
+laerror("not yet implemented");
+}
 
 
 
-// gemv call 
+
+// gemv calls 
 void NRVec<double>::gemv(const double beta, const NRMat<double> &A, 
 		const char trans, const double alpha, const NRVec &x)
 {
@@ -243,8 +291,9 @@ void NRVec<double>::gemv(const double beta, const NRMat<double> &A,
 		laerror("incompatible sizes in gemv A*x");
 #endif
 	cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans),
-			A.nrows(), A.ncols(), alpha, A[0], A.ncols(), x.v, 1, beta, v, 1);
+			A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1);
 }
+
 void NRVec< complex<double> >::gemv(const complex<double> beta,
 		const NRMat< complex<double> > &A, const char trans, 
 		const complex<double> alpha, const NRVec &x)
@@ -254,35 +303,38 @@ void NRVec< complex<double> >::gemv(const complex<double> beta,
 		laerror("incompatible sizes in gemv A*x");
 #endif
 	cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), 
-			A.nrows(), A.ncols(), (void *)(&alpha), (void *)A[0], A.ncols(), 
-			(void *)x.v, 1, (void *)(&beta), (void *)v, 1);
+			A.nrows(), A.ncols(), &alpha, A, A.ncols(), 
+			x.v, 1, &beta, v, 1);
 }
 
-// Vec * Mat
-const NRVec<double> NRVec<double>::operator*(const NRMat<double> &mat) const
+
+void NRVec<double>::gemv(const double beta, const NRSMat<double> &A,
+                const char trans, const double alpha, const NRVec &x)
 {
 #ifdef DEBUG
-	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
+	if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv A*x");
 #endif
-	int n = mat.ncols();
-	NRVec<double> result(n);
-	cblas_dgemv(CblasRowMajor, CblasTrans, nn, n, 1.0, mat[0], n, v, 1,
-			0.0, result.v, 1);
-	return result;
+	NRVec<double> result(nn);
+	cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1);
 }
-const NRVec< complex<double> > 
-NRVec< complex<double> >::operator*(const NRMat< complex<double> > &mat) const
+
+
+void NRVec< complex<double> >::gemv(const complex<double> beta,
+                const NRSMat< complex<double> > &A, const char trans,
+                const complex<double> alpha, const NRVec &x)
 {
 #ifdef DEBUG
-	if(mat.nrows() != nn) laerror("incompatible sizes in Vec*Mat");
+	if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv");
 #endif
-	int n = mat.ncols();
-	NRVec< complex<double> > result(n);
-	cblas_zgemv(CblasRowMajor, CblasTrans, nn, n, &CONE, mat[0], n, v, 1,
-			&CZERO, result.v, 1);
-	return result;
+	NRVec< complex<double> > result(nn);
+	cblas_zhpmv(CblasRowMajor, CblasLower, A.ncols(), &alpha, A, 
+			x.v, 1, &beta, v, 1);
 }
 
+
+
+
+
 // Direc product Mat = Vec | Vec
 const NRMat<double> NRVec<double>::operator|(const NRVec<double> &b) const
 {
diff --git a/vec.h b/vec.h
index be353c4..9d980bc 100644
--- a/vec.h
+++ b/vec.h
@@ -1,24 +1,8 @@
 #ifndef _LA_VEC_H_
 #define _LA_VEC_H_
 
-#include "laerror.h"
-extern "C" {
-#include "cblas.h"
-}
-#include <stdio.h>
-#include <complex>
-#include <string.h>
-#include <iostream>
-
-using namespace std;
-
 #include "la_traits.h"
 
-template <typename T> class NRVec;
-template <typename T> class NRSMat;
-template <typename T> class NRMat;
-template <typename T> class SparseMat;
-
 //////////////////////////////////////////////////////////////////////////////
 // Forward declarations
 template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
@@ -43,9 +27,6 @@ template<class T> \
 	inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
 { return NR##E(*this) X##= a; }
 
-#include "smat.h"
-#include "mat.h"
-
 
 // NRVec class
 template <typename T>
@@ -84,9 +65,14 @@ public:
 	inline const NRVec operator+(const T &a) const;
 	inline const NRVec operator-(const T &a) const;
 	inline const NRVec operator*(const T &a) const;
-	inline const T operator*(const NRVec &rhs) const; //scalar product -> ddot
-	inline const NRVec operator*(const NRSMat<T> & S) const;
-	const NRVec operator*(const NRMat<T> &mat) const;
+	inline const T operator*(const NRVec &rhs) const; //scalar product -> dot
+	inline const T dot(const NRVec &rhs) const {return *this * rhs;}; //@@@for complex do conjugate
+	void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x);
+	void gemv(const T beta, const NRSMat<T> &a, const char trans /*just for compatibility*/,  const T alpha, const NRVec &x);
+	void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x);
+	const NRVec operator*(const NRMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
+	const NRVec operator*(const NRSMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
+	const NRVec operator*(const SparseMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
 	const NRMat<T> operator|(const NRVec<T> &rhs) const;
 	inline const T sum() const; //sum of its elements
 	inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector
@@ -98,8 +84,6 @@ public:
 	~NRVec();
 	void axpy(const T alpha, const NRVec &x); // this+= a*x
 	void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x
-	void gemv(const T beta, const NRMat<T> &a, const char trans, 
-			const T alpha, const NRVec &x);
 	void copyonwrite();
 	void resize(const int n);
         void get(int fd, bool dimensions=1);
@@ -112,11 +96,14 @@ public:
 	void fscanf(FILE *f, const char *format);
 //sparse matrix concerning members
 	explicit NRVec(const SparseMat<T> &rhs);                // dense from sparse matrix with one of dimensions =1
-	const NRVec operator*(const SparseMat<T> &mat) const; //vector*matrix
 	inline void simplify() {}; //just for compatibility with sparse ones
-	void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x);
 };
 
+//due to mutual includes this has to be after full class declaration
+#include "mat.h"
+#include "smat.h"
+#include "sparsemat.h"
+
 template <typename T> ostream & operator<<(ostream &s, const NRVec<T> &x);
 template <typename T> istream & operator>>(istream  &s, NRVec<T> &x);
 
@@ -313,28 +300,36 @@ inline NRVec<T> & NRVec<T>::operator*=(const T &a)
 inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const
 {
 #ifdef DEBUG
-	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
+        if (nn != rhs.nn) laerror("dot of incompatible vectors");
 #endif
-	return cblas_ddot(nn, v, 1, rhs.v, 1);
+        return cblas_ddot(nn, v, 1, rhs.v, 1);
 }
+
+
 inline const complex<double>
 NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
 {
 #ifdef DEBUG
-	if (nn != rhs.nn) laerror("ddot of incompatible vectors");
+	if (nn != rhs.nn) laerror("dot of incompatible vectors");
 #endif
 	complex<double> dot;
 	cblas_zdotc_sub(nn, (void *)v, 1, (void *)rhs.v, 1, (void *)(&dot));
 	return dot;
 }
 
-// Vec * SMat = SMat * Vec
-template <typename T>
-inline const NRVec<T> NRVec<T>::operator*(const NRSMat<T> & S) const
+template<typename T>
+inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const
 {
-	return S * (*this);
+#ifdef DEBUG
+        if (nn != rhs.nn) laerror("dot of incompatible vectors");
+#endif
+	T dot = 0;
+	for(int i=0; i<nn; ++i) dot+= v[i]*rhs.v[i];
+	return dot;
 }
 
+
+
 // Sum of elements
 inline const double NRVec<double>::sum() const
 {