diff --git a/t.cc b/t.cc
index f01b2e0..0a866c5 100644
--- a/t.cc
+++ b/t.cc
@@ -4025,7 +4025,7 @@ cout <<"Error = "<<(xx-x).norm()<<endl;
 
 }
 
-if(1)
+if(0)
 {
 //check symmetrizer/antisymmetrizer in general case
 int r,n,sym;
@@ -4060,7 +4060,64 @@ for(int i=0; i<r; ++i) il[i]= {1+i,0};
 Tensor<double> xx = xf.merge_indices(il,sym);
 cout <<"xx = "<<xx.shape<< " "<<xx.names<<endl;
 cout <<"Error = "<<(xx-xxx).norm()<<endl;
+}
 
+if(0)
+{
+int r=4;
+int n=3;
+NRVec<INDEXGROUP> s(r);
+for(int i=0; i<r; ++i)
+	{
+	s[i].number=1;
+	s[i].symmetry=0;
+	s[i].range=n;
+	s[i].offset=0;
+	}
+s[0].range ++;
+s[3].range+=2;
+Tensor<double> x(s); x.randomize(1.);
+x.defaultnames();
+INDEX i1={1,0};
+INDEX i2={2,0};
+Tensor<double> xc=x.innercontraction(i1,i2);
+double e=0;
+for(int i=0;i<n+1;++i) for(int j=0;j<n+2;++j)
+	{
+	double s=0;
+	for(int k=0; k<n; ++k) s+= x(i,k,k,j);
+	e += abs(s-xc(i,j));
+	}
+cout <<"Error= "<<e<<endl;
+}
+
+if(1)
+{
+int r=4;
+int n=5;
+NRVec<INDEXGROUP> sh(r);
+for(int i=0; i<r; ++i)
+	{
+	sh[i].number=1;
+	sh[i].symmetry=0;
+	sh[i].range=n;
+	sh[i].offset=0;
+	}
+Tensor<double> x(sh); x.randomize(1.);
+x.defaultnames();
+INDEX i0={0,0};
+INDEX i1={1,0};
+INDEX i2={2,0};
+INDEX i3={3,0};
+NRVec<INDEX> il1({i1,i2});
+NRVec<INDEX> il2({i0,i3});
+Tensor<double> xc=x.innercontraction(il1,il2);
+double s=0;
+for(int k=0; k<n; ++k) 
+	for(int l=0; l<n; ++l) 
+		s+= x(k,k,l,l);
+Tensor<double> ss(s);
+cout <<"Error= "<<ss-xc<<endl;
 }
 
 
diff --git a/tensor.cc b/tensor.cc
index efc873f..9b2831e 100644
--- a/tensor.cc
+++ b/tensor.cc
@@ -33,7 +33,7 @@ int Tensor<T>:: calcrank()
 int r=0;
 for(int i=0; i<shape.size(); ++i)
         {
-	const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[i];
+	const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&shape))[i];
         if(sh->number<=0) laerror("empty index group"); //scalar must have shape.size()==0, not empty index group
         r+=sh->number;
         }
@@ -52,7 +52,7 @@ cumsizes.resize(shape.size());
 LA_largeindex s=1;
 for(int i=0; i<shape.size(); ++i)
         {
-	const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[i];
+	const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&shape))[i];
         if(sh->number==0) laerror("empty index group");
         if(sh->range==0) return 0;
         cumsizes[i]=s;
@@ -396,7 +396,7 @@ template<typename T>
 void loopingroups(Tensor<T> &t, int ngroup, int igroup, T **p, SUPERINDEX &I, void (*callback)(const SUPERINDEX &, T *))
 {
 LA_index istart,iend;
-const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&t.shape))[ngroup];
+const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&t.shape))[ngroup];
 switch(sh->symmetry)
 	{
 	case 0:
@@ -431,7 +431,7 @@ for(LA_index i = istart; i<=iend; ++i)
 		if(newigroup<0)
 			{
 			--newngroup;
-			const indexgroup *sh2 = &(* const_cast<const NRVec<indexgroup> *>(&t.shape))[newngroup];
+			const INDEXGROUP *sh2 = &(* const_cast<const NRVec<INDEXGROUP> *>(&t.shape))[newngroup];
 			newigroup=sh2->number-1;
 			}
 		loopingroups(t,newngroup,newigroup,p,I,callback);
@@ -446,13 +446,13 @@ void Tensor<T>::loopover(void (*callback)(const SUPERINDEX &, T *))
 SUPERINDEX I(shape.size());
 for(int i=0; i<I.size(); ++i)
 	{
-	const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[i];
+	const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&shape))[i];
 	I[i].resize(sh->number); 
 	I[i] = sh->offset;
 	}
 T *pp=&data[0];
 int ss=shape.size()-1;
-const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[ss];
+const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&shape))[ss];
 loopingroups(*this,ss,sh->number-1,&pp,I,callback);
 }
 
@@ -461,7 +461,7 @@ template<typename T>
 void constloopingroups(const Tensor<T> &t, int ngroup, int igroup, const T **p, SUPERINDEX &I, void (*callback)(const SUPERINDEX &, const T *))
 {
 LA_index istart,iend;
-const indexgroup *sh = &t.shape[ngroup];
+const INDEXGROUP *sh = &t.shape[ngroup];
 switch(sh->symmetry)
 	{
 	case 0:
@@ -496,7 +496,7 @@ for(LA_index i = istart; i<=iend; ++i)
 		if(newigroup<0)
 			{
 			--newngroup;
-			const indexgroup *sh2 = &(* const_cast<const NRVec<indexgroup> *>(&t.shape))[newngroup];
+			const INDEXGROUP *sh2 = &(* const_cast<const NRVec<INDEXGROUP> *>(&t.shape))[newngroup];
 			newigroup=sh2->number-1;
 			}
 		constloopingroups(t,newngroup,newigroup,p,I,callback);
@@ -511,13 +511,13 @@ void Tensor<T>::constloopover(void (*callback)(const SUPERINDEX &, const T *)) c
 SUPERINDEX I(shape.size());
 for(int i=0; i<I.size(); ++i)
 	{
-	const indexgroup *sh = &shape[i];
+	const INDEXGROUP *sh = &shape[i];
 	I[i].resize(sh->number); 
 	I[i] = sh->offset;
 	}
 const T *pp=&data[0];
 int ss=shape.size()-1;
-const indexgroup *sh = &shape[ss];
+const INDEXGROUP *sh = &shape[ss];
 constloopingroups(*this,ss,sh->number-1,&pp,I,callback);
 }
 
@@ -664,7 +664,7 @@ help_t<T>->data[target] = *v;
 
 
 //permutation of individual indices from permutation of index groups
-NRPerm<int> group2flat_perm(const NRVec<indexgroup> &shape, const  NRPerm<int> &p)
+NRPerm<int> group2flat_perm(const NRVec<INDEXGROUP> &shape, const  NRPerm<int> &p)
 {
 int rank=0;
 for(int i=0; i<shape.size(); ++i) rank+=shape[i].number;
@@ -686,7 +686,7 @@ template<typename T>
 Tensor<T> Tensor<T>::permute_index_groups(const NRPerm<int> &p) const
 {
 //std::cout <<"permute_index_groups permutation = "<<p<<std::endl;
-NRVec<indexgroup> newshape=shape.permuted(p,true);
+NRVec<INDEXGROUP> newshape=shape.permuted(p,true);
 //std::cout <<"permute_index_groups newshape = "<<newshape<<std::endl;
 Tensor<T> r(newshape);
 if(is_named()) 
@@ -718,10 +718,10 @@ for(int i=0; i<I.size(); ++i)
 return J;
 }
 
-int flatposition(const INDEX &i, const NRVec<indexgroup> &shape) 
+int flatposition(const INDEX &i, const NRVec<INDEXGROUP> &shape) 
 {return flatposition(i.group,i.index,shape);}
 
-int flatposition(int group, int index, const NRVec<indexgroup> &shape)
+int flatposition(int group, int index, const NRVec<INDEXGROUP> &shape)
 {
 int ii=0;
 for(int g=0; g<group; ++g) ii+= shape[g].number;
@@ -729,7 +729,7 @@ ii += index;
 return ii;
 }
 
-INDEX indexposition(int flatindex, const NRVec<indexgroup> &shape)
+INDEX indexposition(int flatindex, const NRVec<INDEXGROUP> &shape)
 {
 INDEX I={0,0};
 if(flatindex<0) laerror("illegal index in indexposition");
@@ -792,7 +792,7 @@ if(group==0 && index==0 && shape[0].symmetry==0) //formal split of 1 index witho
 if(shape[group].number==1) return unwind_index_group(group); //single index in the group
 
 //general case - recalculate the shape and allocate the new tensor
-NRVec<indexgroup> newshape(shape.size()+1);
+NRVec<INDEXGROUP> newshape(shape.size()+1);
 newshape[0].number=1;
 newshape[0].symmetry=0;
 newshape[0].range=shape[group].range;
@@ -914,7 +914,7 @@ if(group<0) newsize=rank();
 else newsize=shape.size()+shape[group].number-1;
 
 //build new shape
-NRVec<indexgroup> newshape(newsize);
+NRVec<INDEXGROUP> newshape(newsize);
 int gg=0;
 for(int g=0; g<shape.size(); ++g)
 	{
@@ -1000,9 +1000,9 @@ if(sologroups)
 
 
 //general case - recalculate the shape and allocate the new tensor
-NRVec<indexgroup> oldshape(shape); 
+NRVec<INDEXGROUP> oldshape(shape); 
 oldshape.copyonwrite();
-NRVec<indexgroup> newshape(shape.size()+nonsolo);
+NRVec<INDEXGROUP> newshape(shape.size()+nonsolo);
 
 //first the unwound indices as solo groups
 for(int i=0; i<il.size(); ++i)
@@ -1122,7 +1122,7 @@ if(rhs1.shape[group].upperindex ^ rhs.shape[rhsgroup].upperindex == false) laerr
 const Tensor<T> u = conjugate1? (rhs1.unwind_index_group(group)).conjugate() : rhs1.unwind_index_group(group);
 const Tensor<T> rhsu = rhs.unwind_index_group(rhsgroup);
 
-NRVec<indexgroup> newshape(u.shape.size()+rhsu.shape.size()-2);
+NRVec<INDEXGROUP> newshape(u.shape.size()+rhsu.shape.size()-2);
 int ii=0;
 for(int i=1; i<rhsu.shape.size(); ++i) newshape[ii++] = rhsu.shape[i];
 for(int i=1; i<u.shape.size(); ++i) newshape[ii++] = u.shape[i]; //this tensor will have more significant indices than the rhs one
@@ -1189,7 +1189,7 @@ const Tensor<T> u = conjugate1? (rhs1.unwind_index(group,index)).conjugate() : r
 const Tensor<T> rhsu = rhs.unwind_index(rhsgroup,rhsindex);
 
 
-NRVec<indexgroup> newshape(u.shape.size()+rhsu.shape.size()-2);
+NRVec<INDEXGROUP> newshape(u.shape.size()+rhsu.shape.size()-2);
 int ii=0;
 for(int i=1; i<rhsu.shape.size(); ++i) newshape[ii++] = rhsu.shape[i];
 for(int i=1; i<u.shape.size(); ++i) newshape[ii++] = u.shape[i]; //this tensor will have more significant indices than the rhs one
@@ -1230,8 +1230,14 @@ auxmatmult<T>(nn,mm,kk,&data[0],&u.data[0], &rhsu.data[0],alpha,beta,conjugate);
 template<typename T>
 void Tensor<T>::addcontractions(const Tensor &rhs1, const INDEXLIST &il1, const Tensor &rhs2, const INDEXLIST &il2,  T alpha, T beta, bool doresize, bool conjugate1, bool conjugate2)
 {
-if(il1.size()==0) laerror("empty contraction - outer product not implemented");
 if(il1.size()!=il2.size()) laerror("mismatch in index lists in addcontractions");
+if(il1.size()==0) //add just outer product
+	{
+	if(beta!=(T)0) *this *= beta; else this->clear();
+	*this += (conjugate1?rhs1.conjugate():rhs1) * (conjugate2?rhs2.conjugate():rhs2) * alpha;
+	return;
+	} 
+
 for(int i=0; i<il1.size(); ++i)
 	{
 	if(il1[i].group<0||il1[i].group>=rhs1.shape.size()) laerror("wrong group1 number in contractions");
@@ -1250,7 +1256,7 @@ const Tensor<T> u = conjugate1? (rhs1.unwind_indices(il1)).conjugateme() : rhs1.
 const Tensor<T> rhsu = rhs2.unwind_indices(il2);
 
 
-NRVec<indexgroup> newshape(u.shape.size()+rhsu.shape.size()-2*il1.size());
+NRVec<INDEXGROUP> newshape(u.shape.size()+rhsu.shape.size()-2*il1.size());
 int ii=0;
 for(int i=il1.size(); i<rhsu.shape.size(); ++i) newshape[ii++] = rhsu.shape[i];
 for(int i=il1.size(); i<u.shape.size(); ++i) newshape[ii++] = u.shape[i]; //this tensor will have more significant indices than the rhs one
@@ -1295,6 +1301,78 @@ auxmatmult<T>(nn,mm,kk,&data[0],&u.data[0], &rhsu.data[0],alpha,beta,conjugate2)
 }
 
 
+int help_tdim;
+const SUPERINDEX *help_dummyindex;
+
+template<typename T>
+static void innercontraction_callback(const SUPERINDEX &I, T *r)
+{       
+//compute address like in operarator() but we need true address
+//this could be done more efficiently if needed, avoiding explicit SUPERINDEX J
+SUPERINDEX J = help_dummyindex->concat(I);
+int sign; 
+LA_largeindex ii=help_tt<T>->index(&sign,J); 
+if(sign<0) laerror("internal error in innercontraction");
+const T *matrix00 = &help_tt<T>->data[ii];
+*r = 0;
+for(int i=0; i<help_tdim; ++i) *r += matrix00[i*help_tdim+i]; //trace
+}                       
+                
+
+
+template<typename T>
+Tensor<T> Tensor<T>::innercontraction(const INDEXLIST &il1, const INDEXLIST &il2) const
+{
+if(il1.size()!=il2.size()) laerror("mismatch in index lists in innercontraction");
+if(il1.size()==0) return *this;
+for(int i=0; i<il1.size(); ++i)
+        {
+        if(il1[i].group<0||il1[i].group>=shape.size()) laerror("wrong group1 number in innercontraction");
+        if(il2[i].group<0||il2[i].group>=shape.size()) laerror("wrong group2 number in innercontraction");
+        if(il1[i].index<0||il1[i].index>=shape[il1[i].group].number)  laerror("wrong index1 number in innerconntraction");
+        if(il2[i].index<0||il2[i].index>=shape[il2[i].group].number)  laerror("wrong index2 number in innerconntraction");
+        if(shape[il1[i].group].offset != shape[il2[i].group].offset) laerror("incompatible index offset in innercontraction");
+        if(shape[il1[i].group].range  != shape[il2[i].group].range) laerror("incompatible index range in innercontraction");
+#ifdef LA_TENSOR_INDEXPOSITION
+        if(shape[il1[i].group].upperindex ^ shape[il2[i].group].upperindex == false) laerror("can contact only upper with lower index");
+#endif
+        for(int j=0; j<i; ++j) if(il1[i]==il1[j]||il2[i]==il2[j]) laerror("repeated index in the innercontraction list");
+	 for(int j=0; j<il1.size(); ++j) if(il1[i]==il2[j])  laerror("repeated index between the innercontraction lists");
+        }
+
+INDEXLIST il=il1.concat(il2);
+const Tensor u = unwind_indices(il);
+int tracedim = u.cumsizes[il1.size()];
+//std::cout <<"trace dim = "<<tracedim<<" unwound shape = "<<u.shape<<std::endl;
+
+if(il.size()>(u.shape).size()) laerror("larger innercontraction index list than tensor rank");
+if(il.size()==(u.shape).size()) //result is scalar
+	{
+	T s=0;
+	for(int i=0; i<tracedim; ++i) s += u.data[i*tracedim+i]; //trace
+	return Tensor<T>(s);
+	}
+
+//result is tensor
+NRVec<INDEXGROUP> newshape = (u.shape).subvector(il.size(),(u.shape).size()-1);
+//std::cout <<"new shape = "<<newshape<<std::endl;
+
+Tensor r(newshape);
+if(u.is_named()) r.names=(u.names).subvector(il.size(),(u.shape).size()-1);
+//loop over result's elements and do matrix trace
+help_tt<T> = &u;
+help_tdim = tracedim;
+SUPERINDEX dummyindex(il.size());
+for(int i=0; i<il.size(); ++i)
+	{
+	dummyindex[i].resize(1);
+	dummyindex[i][0]=u.shape[i].offset; 
+	}
+help_dummyindex = &dummyindex;
+r.loopover(innercontraction_callback);
+return r;
+}
+
 
 
 template<typename T>
@@ -1387,12 +1465,12 @@ loopover(permutationalgebra_callback2);
 template<typename T>
 void Tensor<T>::split_index_group(int group)
 {
-const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[0];
+const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&shape))[0];
 if(group<0||group >= shape.size()) laerror("illegal index group number");
 if(sh[group].number==1) return; //nothing to split
 if(sh[group].symmetry!=0) laerror("only non-symmetric index group can be splitted, use flatten instead");
 
-NRVec<indexgroup> newshape(shape.size()+sh[group].number-1);
+NRVec<INDEXGROUP> newshape(shape.size()+sh[group].number-1);
 int gg=0;
 for(int g=0; g<shape.size(); ++g)
 	{
@@ -1422,7 +1500,7 @@ if(group<0||group >= shape.size()) laerror("illegal index group number");
 if(shape[group].number==1) return; //nothing to split
 if(shape[group].symmetry!=0) laerror("only non-symmetric index group can be splitted, use flatten instead");
 
-NRVec<indexgroup> newshape(shape.size()+1);
+NRVec<INDEXGROUP> newshape(shape.size()+1);
 int gg=0;
 for(int g=0; g<shape.size(); ++g)
 	{
@@ -1462,7 +1540,7 @@ for(int g=groupfrom; g<=groupto; ++g)
 	newnumber += shape[g].number;
 	}
 
-NRVec<indexgroup> newshape(shape.size()-(groupto-groupfrom+1)+1);
+NRVec<INDEXGROUP> newshape(shape.size()-(groupto-groupfrom+1)+1);
 for(int g=0; g<=groupfrom; ++g) newshape[g]=shape[g];
 newshape[groupfrom].number=newnumber;
 for(int g=groupfrom+1; g<newshape.size(); ++g) newshape[g]=shape[g+groupto-groupfrom];
@@ -1544,7 +1622,7 @@ for(int i=0; i<r; ++i)
 	{
 	INDEX I=indexposition(i,shape);
 	NRMat<T> um;
-	NRVec<indexgroup> ushape;
+	NRVec<INDEXGROUP> ushape;
 	{
 	Tensor<T> uu=unwind_index(I);
 	ushape=uu.shape; //ushape.copyonwrite(); should not be needed
@@ -1682,7 +1760,7 @@ if(samegroup && isordered && il.size()==shape[il[0].group].number) return unwind
 
 
 //calculate new shape and flat index permutation
-NRVec<indexgroup> workshape(shape); 
+NRVec<INDEXGROUP> workshape(shape); 
 workshape.copyonwrite();
 NRPerm<int> basicperm(rank());
 
@@ -1698,7 +1776,7 @@ for(int i=0; i<il.size(); ++i)
 int newshapesize=1; //newly created group
 for(int i=0; i<workshape.size(); ++i) if(workshape[i].number>0) ++newshapesize; //this group survived index removal
 
-NRVec<indexgroup> newshape(newshapesize);
+NRVec<INDEXGROUP> newshape(newshapesize);
 newshape[0].number=il.size();
 newshape[0].symmetry=sym;
 newshape[0].offset=shape[il[0].group].offset;
@@ -1753,7 +1831,7 @@ return r;
 template<typename T>
 void Tensor<T>::canonicalize_shape()
 {
-const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[0];
+const INDEXGROUP *sh = &(* const_cast<const NRVec<INDEXGROUP> *>(&shape))[0];
 for(int i=0; i<shape.size(); ++i)
 	{
 	if(sh[i].number==1 && sh[i].symmetry!=0) {shape.copyonwrite(); shape[i].symmetry=0;}
diff --git a/tensor.h b/tensor.h
index 10f91a3..10e9961 100644
--- a/tensor.h
+++ b/tensor.h
@@ -40,12 +40,9 @@
 #include "miscfunc.h"
 
 //TODO:
-//@@@contraction inside one tensor - compute resulting shape, loopover the shape, create index into the original tensor + loop over the contr. index, do the summation, store result
-//@@@ will need to store vector of INDEX to the original tensor for the result's flatindex, will not be particularly efficient
-//@@@?maybe optional negative range for beta spin handling in some cases of fourindex-tensor conversions
 //@@@?general permutation of individual indices - check the indices in sym groups remain adjacent, calculate result's shape, loopover the result and permute using unwind_callback
 //@@@ is that needed? we can flatten the relevant groups and permute index groups alternatively - maybe implement on high level this way for convenience
-
+//?maybe optional negative range for beta spin handling in some cases of fourindex-tensor conversions combining AA and AB etc. cases - if needed
 
 
 //do not distinguish covariant/contravariant indices
@@ -106,7 +103,7 @@ class LA_traits<INDEXNAME> {
 };
 
 
-typedef class indexgroup {
+typedef class INDEXGROUP {
 public:
 int number; //number of indices
 int symmetry; //-1 0 or 1, later 2 for hermitian and -2 for antihermitian? - would need change in operator() and Signedpointer
@@ -120,12 +117,12 @@ LA_index range;  //indices span this range
 bool upperindex;
 #endif
 
-	bool operator==(const indexgroup &rhs) const {return number==rhs.number && symmetry==rhs.symmetry && offset==rhs.offset && range==rhs.range
+	bool operator==(const INDEXGROUP &rhs) const {return number==rhs.number && symmetry==rhs.symmetry && offset==rhs.offset && range==rhs.range
 #ifdef LA_TENSOR_INDEXPOSITION
 	&& upperindex == rhs.upperindex
 #endif
 	;};
-	inline bool operator!=(const indexgroup &rhs) const {return !((*this)==rhs);};
+	inline bool operator!=(const INDEXGROUP &rhs) const {return !((*this)==rhs);};
 } INDEXGROUP;
 
 
@@ -139,17 +136,17 @@ std::istream & operator>>(std::istream &s, INDEXNAME &x);
 
 
 template<>
-class LA_traits<indexgroup> {
+class LA_traits<INDEXGROUP> {
 	public:
 	static bool is_plaindata() {return true;};
-	static void copyonwrite(indexgroup& x) {};
+	static void copyonwrite(INDEXGROUP& x) {};
 	typedef INDEXGROUP normtype;
-	static inline int gencmp(const indexgroup *a, const indexgroup *b, int n) {return memcmp(a,b,n*sizeof(indexgroup));};
-        static inline void put(int fd, const indexgroup &x, bool dimensions=1) {if(sizeof(indexgroup)!=write(fd,&x,sizeof(indexgroup))) laerror("write error 1 in indexgroup put"); }
-        static inline void multiput(int nn, int fd, const indexgroup *x, bool dimensions=1) {if(nn*sizeof(indexgroup)!=write(fd,x,nn*sizeof(indexgroup))) laerror("write error 1 in indexgroup multiiput"); }
-        static inline void get(int fd, indexgroup &x, bool dimensions=1) {if(sizeof(indexgroup)!=read(fd,&x,sizeof(indexgroup))) laerror("read error 1 in indexgroup get");}
-        static inline void multiget(int nn, int fd, indexgroup *x, bool dimensions=1) {if(nn*sizeof(indexgroup)!=read(fd,x,nn*sizeof(indexgroup))) laerror("read error 1 in indexgroup get");}
-	static inline void clear(indexgroup *dest, size_t n) {memset(dest,0,n*sizeof(indexgroup));}
+	static inline int gencmp(const INDEXGROUP *a, const INDEXGROUP *b, int n) {return memcmp(a,b,n*sizeof(INDEXGROUP));};
+        static inline void put(int fd, const INDEXGROUP &x, bool dimensions=1) {if(sizeof(INDEXGROUP)!=write(fd,&x,sizeof(INDEXGROUP))) laerror("write error 1 in INDEXGROUP put"); }
+        static inline void multiput(int nn, int fd, const INDEXGROUP *x, bool dimensions=1) {if(nn*sizeof(INDEXGROUP)!=write(fd,x,nn*sizeof(INDEXGROUP))) laerror("write error 1 in INDEXGROUP multiiput"); }
+        static inline void get(int fd, INDEXGROUP &x, bool dimensions=1) {if(sizeof(INDEXGROUP)!=read(fd,&x,sizeof(INDEXGROUP))) laerror("read error 1 in INDEXGROUP get");}
+        static inline void multiget(int nn, int fd, INDEXGROUP *x, bool dimensions=1) {if(nn*sizeof(INDEXGROUP)!=read(fd,x,nn*sizeof(INDEXGROUP))) laerror("read error 1 in INDEXGROUP get");}
+	static inline void clear(INDEXGROUP *dest, size_t n) {memset(dest,0,n*sizeof(INDEXGROUP));}
 };
 
 
@@ -168,16 +165,36 @@ std::ostream & operator<<(std::ostream &s, const INDEX &x);
 std::istream & operator>>(std::istream &s, INDEX &x);
 
 
-int flatposition(int group, int index, const NRVec<indexgroup> &shape);
-int flatposition(const INDEX &i, const NRVec<indexgroup> &shape); //position of that index in FLATINDEX
-INDEX indexposition(int flatindex, const NRVec<indexgroup> &shape); //inverse to flatposition
+template<>
+class LA_traits<INDEX> {
+        public:
+        static bool is_plaindata() {return true;};
+        static void copyonwrite(INDEX& x) {};
+        typedef INDEX normtype;
+        static inline int gencmp(const INDEX *a, const INDEX *b, int n) {return memcmp(a,b,n*sizeof(INDEX));};
+        static inline void put(int fd, const INDEX &x, bool dimensions=1) {if(sizeof(INDEX)!=write(fd,&x,sizeof(INDEX))) laerror("write error 1 in INDEX put"); }
+        static inline void multiput(int nn, int fd, const INDEX *x, bool dimensions=1) {if(nn*sizeof(INDEX)!=write(fd,x,nn*sizeof(INDEX))) laerror("write error 1 in INDEX multiiput"); }
+        static inline void get(int fd, INDEX &x, bool dimensions=1) {if(sizeof(INDEX)!=read(fd,&x,sizeof(INDEX))) laerror("read error 1 in INDEX get");}
+        static inline void multiget(int nn, int fd, INDEX *x, bool dimensions=1) {if(nn*sizeof(INDEX)!=read(fd,x,nn*sizeof(INDEX))) laerror("read error 1 in INDEX get");}
+        static inline void clear(INDEX *dest, size_t n) {memset(dest,0,n*sizeof(INDEX));}
+};
+
+
+
+
+
+
+
+int flatposition(int group, int index, const NRVec<INDEXGROUP> &shape);
+int flatposition(const INDEX &i, const NRVec<INDEXGROUP> &shape); //position of that index in FLATINDEX
+INDEX indexposition(int flatindex, const NRVec<INDEXGROUP> &shape); //inverse to flatposition
 
 FLATINDEX superindex2flat(const SUPERINDEX &I);
 
 template<typename T>
 class Tensor {
 public:
-	NRVec<indexgroup> shape;
+	NRVec<INDEXGROUP> shape;
 	NRVec<T> data;	
 	int myrank;
 	NRVec<LA_largeindex> groupsizes; //group sizes of symmetry index groups (a function of shape but precomputed for efficiency)
@@ -193,15 +210,15 @@ public:
 	//constructors
 	Tensor() : myrank(-1) {};
 	explicit Tensor(const T &x) : myrank(0), data(1) {data[0]=x;}; //scalar
-	Tensor(const NRVec<indexgroup> &s) : shape(s) { data.resize(calcsize()); calcrank(); canonicalize_shape();}; //general tensor
-	Tensor(const NRVec<indexgroup> &s, const NRVec<INDEXNAME> &newnames) : shape(s), names(newnames) { data.resize(calcsize());  calcrank(); canonicalize_shape(); if(names.size()!=myrank && names.size()!=0) laerror("bad number of index names");}; //general tensor
-	Tensor(const NRVec<indexgroup> &s, const NRVec<T> &mydata) : shape(s) { LA_largeindex dsize=calcsize(); calcrank(); canonicalize_shape(); if(mydata.size()!=dsize) laerror("inconsistent data size with shape"); data=mydata;} 
-	Tensor(const NRVec<indexgroup> &s, const NRVec<T> &mydata, const NRVec<INDEXNAME> &newnames) : shape(s), names(newnames) { LA_largeindex dsize=calcsize(); calcrank(); canonicalize_shape(); if(mydata.size()!=dsize) laerror("inconsistent data size with shape"); data=mydata;  if(names.size()!=myrank && names.size()!=0) laerror("bad number of index names");} 
-	Tensor(const indexgroup &g) {shape.resize(1); shape[0]=g; data.resize(calcsize()); calcrank(); canonicalize_shape();}; //tensor with a single index group
-	Tensor(const indexgroup &g, const NRVec<INDEXNAME> &newnames) : names(newnames) {shape.resize(1); shape[0]=g; data.resize(calcsize()); calcrank(); canonicalize_shape(); if(names.size()!=myrank && names.size()!=0) laerror("bad number of index names");}; //tensor with a single index group
+	Tensor(const NRVec<INDEXGROUP> &s) : shape(s) { data.resize(calcsize()); calcrank(); canonicalize_shape();}; //general tensor
+	Tensor(const NRVec<INDEXGROUP> &s, const NRVec<INDEXNAME> &newnames) : shape(s), names(newnames) { data.resize(calcsize());  calcrank(); canonicalize_shape(); if(names.size()!=myrank && names.size()!=0) laerror("bad number of index names");}; //general tensor
+	Tensor(const NRVec<INDEXGROUP> &s, const NRVec<T> &mydata) : shape(s) { LA_largeindex dsize=calcsize(); calcrank(); canonicalize_shape(); if(mydata.size()!=dsize) laerror("inconsistent data size with shape"); data=mydata;} 
+	Tensor(const NRVec<INDEXGROUP> &s, const NRVec<T> &mydata, const NRVec<INDEXNAME> &newnames) : shape(s), names(newnames) { LA_largeindex dsize=calcsize(); calcrank(); canonicalize_shape(); if(mydata.size()!=dsize) laerror("inconsistent data size with shape"); data=mydata;  if(names.size()!=myrank && names.size()!=0) laerror("bad number of index names");} 
+	Tensor(const INDEXGROUP &g) {shape.resize(1); shape[0]=g; data.resize(calcsize()); calcrank(); canonicalize_shape();}; //tensor with a single index group
+	Tensor(const INDEXGROUP &g, const NRVec<INDEXNAME> &newnames) : names(newnames) {shape.resize(1); shape[0]=g; data.resize(calcsize()); calcrank(); canonicalize_shape(); if(names.size()!=myrank && names.size()!=0) laerror("bad number of index names");}; //tensor with a single index group
 	Tensor(const Tensor &rhs): myrank(rhs.myrank), shape(rhs.shape), groupsizes(rhs.groupsizes), cumsizes(rhs.cumsizes), data(rhs.data), names(rhs.names) {};
-	Tensor(int xrank, const NRVec<indexgroup> &xshape, const NRVec<LA_largeindex> &xgroupsizes,  const  NRVec<LA_largeindex> xcumsizes, const  NRVec<T> &xdata) :  myrank(xrank), shape(xshape), groupsizes(xgroupsizes), cumsizes(xcumsizes), data(xdata) {};
-	Tensor(int xrank, const NRVec<indexgroup> &xshape, const NRVec<LA_largeindex> &xgroupsizes,  const  NRVec<LA_largeindex> xcumsizes, const  NRVec<T> &xdata, const NRVec<INDEXNAME>  &xnames) :  myrank(xrank), shape(xshape), groupsizes(xgroupsizes), cumsizes(xcumsizes), data(xdata), names(xnames) {};
+	Tensor(int xrank, const NRVec<INDEXGROUP> &xshape, const NRVec<LA_largeindex> &xgroupsizes,  const  NRVec<LA_largeindex> xcumsizes, const  NRVec<T> &xdata) :  myrank(xrank), shape(xshape), groupsizes(xgroupsizes), cumsizes(xcumsizes), data(xdata) {};
+	Tensor(int xrank, const NRVec<INDEXGROUP> &xshape, const NRVec<LA_largeindex> &xgroupsizes,  const  NRVec<LA_largeindex> xcumsizes, const  NRVec<T> &xdata, const NRVec<INDEXNAME>  &xnames) :  myrank(xrank), shape(xshape), groupsizes(xgroupsizes), cumsizes(xcumsizes), data(xdata), names(xnames) {};
 
 	//conversions from/to matrix and vector
 	explicit Tensor(const NRVec<T> &x);
@@ -221,7 +238,7 @@ public:
 	LA_largeindex calcsize(); //set redundant data and return total size
 	LA_largeindex size() const {return data.size();};
 	void copyonwrite() {shape.copyonwrite(); groupsizes.copyonwrite(); cumsizes.copyonwrite(); data.copyonwrite(); names.copyonwrite();};
-	void resize(const NRVec<indexgroup> &s) {shape=s; data.resize(calcsize()); calcrank(); names.clear();};
+	void resize(const NRVec<INDEXGROUP> &s) {shape=s; data.resize(calcsize()); calcrank(); names.clear();};
 	void deallocate() {data.resize(0); shape.resize(0); groupsizes.resize(0); cumsizes.resize(0); names.resize(0);};
         inline Signedpointer<T> lhs(const SUPERINDEX &I) {int sign; LA_largeindex i=index(&sign,I); return Signedpointer<T>(&data[i],sign);};
 	inline T operator()(const SUPERINDEX &I) const {int sign; LA_largeindex i=index(&sign,I); if(sign==0) return 0; return sign>0 ?data[i] : -data[i];};
@@ -295,7 +312,6 @@ public:
 	void addcontraction(const Tensor &rhs1, int group, int index, const Tensor &rhs2, int rhsgroup, int rhsindex, T alpha=1, T beta=1, bool doresize=false, bool conjugate1=false, bool conjugate=false); //rhs1 will have more significant non-contracted indices in the result than rhs2
 	inline void addcontraction(const Tensor &rhs1, const INDEX &I1, const Tensor &rhs2, const INDEX &I2, T alpha=1, T beta=1, bool doresize=false, bool conjugate1=false, bool conjugate=false) {addcontraction(rhs1, I1.group, I1.index, rhs2, I2.group, I2.index, alpha, beta, doresize, conjugate, conjugate);};
 	inline void addcontraction(const Tensor &rhs1, const Tensor &rhs2, const INDEXNAME &iname, T alpha=1, T beta=1, bool doresize=false, bool conjugate1=false, bool conjugate=false) {addcontraction(rhs1, rhs1.findindex(iname), rhs2, rhs2.findindex(iname), alpha, beta, doresize, conjugate, conjugate);};
-	
 	inline Tensor contraction(int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha=1, bool conjugate1=false, bool conjugate=false) const {Tensor<T> r; r.addcontraction(*this,group,index,rhs,rhsgroup,rhsindex,alpha,0,true, conjugate1, conjugate); return r; };
 	inline Tensor contraction(const INDEX &I, const Tensor &rhs, const INDEX &RHSI, T alpha=1, bool conjugate1=false, bool conjugate=false) const {return  contraction(I.group,I.index, rhs, RHSI.group, RHSI.index,alpha, conjugate1, conjugate);};
 	inline Tensor contraction(const Tensor &rhs, const INDEXNAME &iname,  T alpha=1, bool conjugate1=false, bool conjugate=false) const {return  contraction(findindex(iname),rhs,rhs.findindex(iname),alpha, conjugate1, conjugate);};
@@ -308,6 +324,11 @@ public:
 	 void addgroupcontraction(const Tensor &rhs1, int group, const Tensor &rhs2, int rhsgroup, T alpha=1, T beta=1, bool doresize=false, bool conjugate1=false, bool conjugate=false); //over all indices in a group of same symmetry; rhs1 will have more significant non-contracted indices in the result than rhs2
 	 inline Tensor groupcontraction(int group, const Tensor &rhs, int rhsgroup, T alpha=1, bool conjugate1=false, bool conjugate=false) const {Tensor<T> r; r.addgroupcontraction(*this,group,rhs,rhsgroup,alpha,0,true, conjugate1, conjugate); return r; };
 
+	Tensor innercontraction(const INDEXLIST &il1, const INDEXLIST &il2) const; //contraction(s) inside this tensor
+	Tensor innercontraction(const INDEX &i1, const INDEX &i2) const {INDEXLIST il1(1); il1[0]=i1; INDEXLIST il2(1); il2[0]=i2; return innercontraction(il1,il2);};
+	Tensor innercontraction(const NRVec<INDEXNAME> &nl1, const NRVec<INDEXNAME> &nl2) const {return innercontraction(findindexlist(nl1),findindexlist(nl2));};
+	Tensor innercontraction(const INDEXNAME &n1, const INDEXNAME &n2) const {return innercontraction(findindex(n1),findindex(n2));};
+
 	void apply_permutation_algebra(const  Tensor &rhs, const PermutationAlgebra<int,T> &pa, bool inverse=false, T alpha=1, T beta=0); //general (not optimally efficient) symmetrizers, antisymmetrizers etc. acting on the flattened index list:
 	void apply_permutation_algebra(const  NRVec<Tensor> &rhsvec, const PermutationAlgebra<int,T> &pa, bool inverse=false, T alpha=1, T beta=0); //avoids explicit outer product but not vectorized, rather inefficient
 //								 this *=beta; for I over this: this(I) += alpha * sum_P c_P rhs(P(I))
@@ -345,7 +366,7 @@ void tensor2fourindex(const Tensor<T> &t, fourindex_dense<S,T,I> &f);
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<nosymmetry,T,I> &f)
 {
-NRVec<indexgroup> shape(4);
+NRVec<INDEXGROUP> shape(4);
 int n=f.nbas();
 for(int i=0; i<4; ++i)
 	{
@@ -378,7 +399,7 @@ f=fourindex_dense<nosymmetry,T,I>(range,NRMat<T>(t.data,range*range,range*range)
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<twoelectronrealmullikanAB,T,I> &f)
 {
-NRVec<indexgroup> shape(2);
+NRVec<INDEXGROUP> shape(2);
 int n=f.nbas();
 for(int i=0; i<2; ++i)
 	{
@@ -412,7 +433,7 @@ f=fourindex_dense<twoelectronrealmullikanAB,T,I>(NRMat<T>(t.data,range*(range+1)
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<twoelectronrealmullikan,T,I> &f)
 {
-NRVec<indexgroup> shape(2);
+NRVec<INDEXGROUP> shape(2);
 int n=f.nbas();
 for(int i=0; i<2; ++i)
 	{
@@ -448,7 +469,7 @@ f=fourindex_dense<twoelectronrealmullikan,T,I>(NRSMat<T>(mat)); //symmetrize mat
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<T2IjAb_aces,T,I> &f)
 {
-NRVec<indexgroup> shape(4);
+NRVec<INDEXGROUP> shape(4);
 for(int i=0; i<4; ++i)
 	{
 	shape[i].number=1;
@@ -487,7 +508,7 @@ f=fourindex_dense<T2IjAb_aces,T,I>(noca,nocb,nvra,nvrb,mat);
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<T2ijab_aces,T,I> &f)
 {
-NRVec<indexgroup> shape(2);
+NRVec<INDEXGROUP> shape(2);
 for(int i=0; i<2; ++i)
 	{
 	shape[i].number=2;
@@ -522,7 +543,7 @@ f=fourindex_dense<T2ijab_aces,T,I>(nocc,nvrt,mat);
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<antisymtwoelectronrealdiracAB,T,I> &f)
 {
-NRVec<indexgroup> shape(4);
+NRVec<INDEXGROUP> shape(4);
 int n=f.nbas();
 for(int i=0; i<4; ++i)
 	{
@@ -557,7 +578,7 @@ f=fourindex_dense<antisymtwoelectronrealdiracAB,T,I>(range,NRSMat<T>(mat));
 template<typename T, typename I>
 Tensor<T>  fourindex2tensor(const fourindex_dense<antisymtwoelectronrealdirac,T,I> &f)
 {
-NRVec<indexgroup> shape(2);
+NRVec<INDEXGROUP> shape(2);
 int n=f.nbas();
 for(int i=0; i<2; ++i)
 	{
diff --git a/vec.cc b/vec.cc
index c4434cb..3eeec6c 100644
--- a/vec.cc
+++ b/vec.cc
@@ -863,103 +863,6 @@ return -1;
 
 
 
-/***************************************************************************//**
- * extract block subvector
- * @param[in] from starting position
- * @param[in] to final position
- * @return extracted block subvector
- ******************************************************************************/
-template <typename T>
-const NRVec<T> NRVec<T>::subvector(const int from, const int to) const 
-{
-#ifdef DEBUG
-	if(from<0 || from>=nn|| to<0 || to>=nn || from>to){
-		laerror("invalid subvector specification");
-	}
-#endif
-//trivial case of whole vector for efficiency
-	if(from==0 && to == nn-1) return *this;
-
-	const int n = to - from + 1;
-	NRVec<T> r(n, getlocation());
-	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
-
-#ifdef CUDALA
-	if(location == cpu){
-#endif
-		memcpy(r.v, v+from, n*sizeof(T));
-#ifdef CUDALA
-	}else{
-		if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
-			cublasScopy(n*sizeof(T)/sizeof(float), (const float *)(v+from), 1, (float*)r.v, 1);
-			TEST_CUBLAS("cublasScopy");
-	}
-#endif	
-	return r;
-}
-
-
-template <typename T>
-const NRVec<T> NRVec<T>::subvector(const NRVec<int> &selection) const
-{
-        NOT_GPU(*this);
-        const int n = selection.size();
-        NRVec<T> r(n);
-
-        for(int i=0; i<n; ++i)
-                {
-                int ii=selection[i];
-                if(ii<0||ii>=nn) laerror("bad row index in subvector");
-                r[i] = (*this)[ii];
-                }
-return r;
-
-}
-
-/***************************************************************************//**
- * places given vector as subvector at given position
- * @param[in] from coordinate 
- * @param[in] rhs input vector
- ******************************************************************************/
-template <typename T>
-void NRVec<T>::storesubvector(const int from, const NRVec &rhs) 
-{
-	const int to = from + rhs.size() - 1;
-#ifdef DEBUG
-	if(from<0 || from>=nn || to>=nn) laerror("bad indices in storesubvector");
-#endif
-	SAME_LOC(*this, rhs);
-	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
-
-	#ifdef CUDALA
-		if(location == cpu){
-	#endif
-			memcpy(v+from, rhs.v, rhs.size()*sizeof(T));
-
-	#ifdef CUDALA
-		}else{
-			if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
-			cublasScopy(rhs.size()*sizeof(T)/sizeof(float), (const float *) (rhs.v), 1, (float *)(v + from), 1);
-		}
-	#endif
-}
-
-
-template <typename T>
-void NRVec<T>::storesubvector(const NRVec<int> &selection, const NRVec &rhs)
-{
-        NOT_GPU(*this);
-        const int n = selection.size();
-	if(n!=rhs.size()) laerror("size mismatch in storesubvector");
-
-        for(int i=0; i<n; ++i)
-                {
-                int ii=selection[i];
-                if(ii<0||ii>=nn) laerror("bad  index in storesubvector");
-                (*this)[ii] = rhs[i];
-                }
-}
-
 /***************************************************************************//**
  * conjugate this general vector
  * @return reference to the (unmodified) matrix
diff --git a/vec.h b/vec.h
index 6b55b6c..8c4a6e3 100644
--- a/vec.h
+++ b/vec.h
@@ -2115,6 +2115,104 @@ for(int i=0; i<b.size(); ++i)
                 a[i] = (T) b[i];
 }
 
+//these must be here rather than in vec.cc to be used on unknown new types
+/***************************************************************************//**
+ * extract block subvector
+ * @param[in] from starting position
+ * @param[in] to final position
+ * @return extracted block subvector
+ ******************************************************************************/
+template <typename T>
+const NRVec<T> NRVec<T>::subvector(const int from, const int to) const 
+{
+#ifdef DEBUG
+	if(from<0 || from>=nn|| to<0 || to>=nn || from>to){
+		laerror("invalid subvector specification");
+	}
+#endif
+//trivial case of whole vector for efficiency
+	if(from==0 && to == nn-1) return *this;
+
+	const int n = to - from + 1;
+	NRVec<T> r(n, getlocation());
+	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
+
+#ifdef CUDALA
+	if(location == cpu){
+#endif
+		memcpy(r.v, v+from, n*sizeof(T));
+#ifdef CUDALA
+	}else{
+		if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
+			cublasScopy(n*sizeof(T)/sizeof(float), (const float *)(v+from), 1, (float*)r.v, 1);
+			TEST_CUBLAS("cublasScopy");
+	}
+#endif	
+	return r;
+}
+
+
+template <typename T>
+const NRVec<T> NRVec<T>::subvector(const NRVec<int> &selection) const
+{
+        NOT_GPU(*this);
+        const int n = selection.size();
+        NRVec<T> r(n);
+
+        for(int i=0; i<n; ++i)
+                {
+                int ii=selection[i];
+                if(ii<0||ii>=nn) laerror("bad row index in subvector");
+                r[i] = (*this)[ii];
+                }
+return r;
+
+}
+
+/***************************************************************************//**
+ * places given vector as subvector at given position
+ * @param[in] from coordinate 
+ * @param[in] rhs input vector
+ ******************************************************************************/
+template <typename T>
+void NRVec<T>::storesubvector(const int from, const NRVec &rhs) 
+{
+	const int to = from + rhs.size() - 1;
+#ifdef DEBUG
+	if(from<0 || from>=nn || to>=nn) laerror("bad indices in storesubvector");
+#endif
+	SAME_LOC(*this, rhs);
+	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
+
+	#ifdef CUDALA
+		if(location == cpu){
+	#endif
+			memcpy(v+from, rhs.v, rhs.size()*sizeof(T));
+
+	#ifdef CUDALA
+		}else{
+			if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
+			cublasScopy(rhs.size()*sizeof(T)/sizeof(float), (const float *) (rhs.v), 1, (float *)(v + from), 1);
+		}
+	#endif
+}
+
+
+template <typename T>
+void NRVec<T>::storesubvector(const NRVec<int> &selection, const NRVec &rhs)
+{
+        NOT_GPU(*this);
+        const int n = selection.size();
+	if(n!=rhs.size()) laerror("size mismatch in storesubvector");
+
+        for(int i=0; i<n; ++i)
+                {
+                int ii=selection[i];
+                if(ii<0||ii>=nn) laerror("bad  index in storesubvector");
+                (*this)[ii] = rhs[i];
+                }
+}
+
 
 
 }//namespace