LA_library/tensor.cc

/*
    LA: linear algebra C++ interface library
    Copyright (C) 2024 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include "tensor.h"
#include "laerror.h"
#include "qsort.h"
#include "miscfunc.h"
#include <complex>
#include "bitvector.h"


namespace LA {

template<typename T>
int Tensor<T>:: calcrank()
{
int r=0;
for(int i=0; i<shape.size(); ++i)
        {
	const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[i];
        if(sh->number==0) laerror("empty index group");
        r+=sh->number;
        }
myrank=r;
return r;
}


template<typename T>
LA_largeindex Tensor<T>::calcsize()
{
groupsizes.resize(shape.size());
cumsizes.resize(shape.size());
LA_largeindex s=1;
for(int i=0; i<shape.size(); ++i)
        {
	const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[i];
        if(sh->number==0) laerror("empty index group");
        if(sh->range==0) return 0;
        cumsizes[i]=s;
        switch(sh->symmetry)
                {
                case 0:
                        s *= groupsizes[i] = longpow(sh->range,sh->number);
                        break;
                case 1:
                        s *= groupsizes[i] = simplicial(sh->number,sh->range);
                        break;
                case -1:
                        s *= groupsizes[i] = simplicial(sh->number,sh->range-sh->number+1);
                        break;
                default:
                        laerror("illegal index group symmetry");
                        break;
                }
        }
return s;
}


LA_largeindex subindex(int *sign, const INDEXGROUP &g, const NRVec<LA_index> &I) //index of one subgroup
{
#ifdef DEBUG
if(I.size()<=0) laerror("empty index group in subindex");
if(g.number!=I.size()) laerror("mismatch in the number of indices in a group");
for(int i=0; i<I.size(); ++i) if(I[i]<g.offset || I[i] >= g.offset+g.range) laerror("index out of range in tensor subindex");
#endif

switch(I.size()) //a few special cases for efficiency
	{
	case 0:
		*sign=0;
		return 0;
		break;
	case 1:
		*sign=1;
		return I[0]-g.offset;
		break;
	case 2:
		{
		*sign=1;
		if(g.symmetry==0) return (I[1]-g.offset)*g.range+I[0]-g.offset;
		LA_index i0,i1;
		if(I[0]>I[1]) {i1=I[0]; i0=I[1]; if(g.symmetry<0) *sign = -1;} else {i1=I[1]; i0=I[0];}
		i0 -= g.offset;
		i1 -= g.offset;
		if(g.symmetry<0)
			{
			if(i0==i1) {*sign=0; return -1;}
			return i1*(i1-1)/2+i0;
			}
		else
			{
			return i1*(i1+1)/2+i0;
			}
		}
		break;

	default: //general case
		{
		*sign=1;

                if(g.symmetry==0) //rectangular case
			{
			LA_largeindex r=0;
			for(int i=I.size()-1; i>=0; --i)
				{
				r*= g.range;
				r+= I[i]-g.offset;
				}
			return r;
			}
		}

		//compressed storage case
		NRVec<LA_index> II(I);
		II.copyonwrite();
		if(g.offset!=0) II -= g.offset;
		int parity=netsort(II.size(),&II[0]);
		if(g.symmetry<0 && (parity&1)) *sign= -1;
		if(g.symmetry<0) //antisymmetric
			{
			for(int i=0; i<I.size()-1; ++i)
				if(II[i]==II[i+1])
					{*sign=0; return -1;} //identical indices of antisymmetric tensor

			LA_largeindex r=0;
                        for(int i=0; i<II.size(); ++i) r += simplicial(i+1,II[i]-i);
                        return r;
			}
		else	//symmetric
			{
			LA_largeindex r=0;
			for(int i=0; i<II.size(); ++i) r += simplicial(i+1,II[i]);
			return r;
			}
		break;
	}
laerror("this error should not happen");
return -1;
}


//inverse map of group superindex to canonically ordered index list
NRVec<LA_index>  inverse_subindex(const INDEXGROUP &g, LA_largeindex s)
{
NRVec<LA_index> I(g.number);
if(g.number==1) {I[0]=s+g.offset; return I;}
switch(g.symmetry)
	{
	case 0:
		for(int i=0; i<g.number; ++i)
			{
			I[i] = s%g.range;
			s /= g.range;
			}
		break;
	case 1:
		for(int i=g.number; i>0; --i)
			{
			I[i-1] = inverse_simplicial(i,s);
			s -= simplicial(i,I[i-1]);
			}
		break;
	case -1:
		for(int i=g.number-1; i>=0; --i)
                        {
                        I[i] = i + inverse_simplicial(i+1,s);
                        s -= simplicial(i+1,I[i]-i);
                        }
		break;
	default: laerror("illegal index symmetry");
	}

if(g.offset!=0) I += g.offset;
return I;
}


template<typename T>
SUPERINDEX Tensor<T>::inverse_index(LA_largeindex s) const
{
SUPERINDEX I(shape.size());
for(int g=shape.size()-1; g>=0; --g)
	{
	LA_largeindex groupindex;
	if(g>0)
		{
		groupindex = s/cumsizes[g];
		s %= cumsizes[g];
		}
	else groupindex=s;
	I[g] = inverse_subindex(shape[g],groupindex);
	}
return I;
}


template<typename T>
LA_largeindex Tensor<T>::index(int *sign, const SUPERINDEX &I) const
{
//check index structure and ranges
#ifdef DEBUG
if(I.size()!=shape.size()) laerror("mismatch in the number of tensor index groups");
for(int i=0; i<I.size(); ++i)
	{
	if(shape[i].number!=I[i].size()) {std::cerr<<"error in index group no. "<<i<<std::endl; laerror("mismatch in the size of tensor index group");}
	for(int j=0; j<shape[i].number; ++j)
		{
		if(I[i][j] <shape[i].offset || I[i][j] >= shape[i].offset+shape[i].range)
			{
			std::cerr<<"error in index group no. "<<i<<" index no. "<<j<<std::endl;
			laerror("tensor index out of range");
			}
		}
	}
#endif

LA_largeindex r=0;
*sign=1;
for(int g=0; g<shape.size(); ++g) //loop over index groups
	{
	int gsign;
	LA_largeindex groupindex =  subindex(&gsign,shape[g],I[g]);
	//std::cout <<"INDEX TEST group "<<g<<" cumsizes "<< cumsizes[g]<<" groupindex "<<groupindex<<std::endl;
	*sign *= gsign;
	if(groupindex == -1) return -1;
	r += groupindex * cumsizes[g];
	}
return r;
}


template<typename T>
LA_largeindex Tensor<T>::index(int *sign, const FLATINDEX &I) const
{
#ifdef DEBUG
if(rank()!=I.size()) laerror("tensor rank mismatch in index");
#endif

LA_largeindex r=0;
*sign=1;
int gstart=0;
for(int g=0; g<shape.size(); ++g) //loop over index groups
        {
        int gsign;
	int gend= gstart+shape[g].number-1;
	NRVec<LA_index> subI = I.subvector(gstart,gend);
	gstart=gend+1;
        LA_largeindex groupindex =  subindex(&gsign,shape[g],subI);
        //std::cout <<"FLATINDEX TEST group "<<g<<" cumsizes "<< cumsizes[g]<<" groupindex "<<groupindex<<std::endl;
        *sign *= gsign;
        if(groupindex == -1) return -1;
        r += groupindex * cumsizes[g];
        }
return r;
}


template<typename T>
LA_largeindex Tensor<T>::vindex(int *sign,  LA_index i1, va_list args) const
{
NRVec<LA_index> I(rank());
I[0]=i1;
for(int i=1; i<rank(); ++i)
	{
	I[i] = va_arg(args,LA_index);
	}
va_end(args);

return index(sign,I);
}


//binary I/O

template<typename T>
void Tensor<T>::put(int fd) const
{
shape.put(fd,true);
groupsizes.put(fd,true);
cumsizes.put(fd,true);
data.put(fd,true);
}

template<typename T>
void Tensor<T>::get(int fd)
{
shape.get(fd,true);
myrank=calcrank(); //is not stored but recomputed
groupsizes.put(fd,true);
cumsizes.get(fd,true);
data.get(fd,true);
}


template<typename T>
Tensor<T>::Tensor(const NRVec<T> &x)
: data(x)
{
myrank=1;
shape.resize(1);
shape[0].number=1;
shape[0].symmetry=0;
#ifndef LA_TENSOR_ZERO_OFFSET
shape[0].offset=0;
#endif
shape[0].range=x.size();
calcsize();
}

template<typename T>
Tensor<T>::Tensor(const NRMat<T> &x)
: data(&x(0,0),x.nrows()*x.ncols())
{
myrank=2;
if(x.nrows()==x.ncols())
	{
	shape.resize(1);
	shape[0].number=2;
	shape[0].symmetry=0;
#ifndef LA_TENSOR_ZERO_OFFSET
	shape[0].offset=0;
#endif
	shape[0].range=x.nrows();
	}
else
	{
	shape.resize(2);
	shape[0].number=1; shape[1].number=1;
	shape[0].symmetry=0; shape[1].symmetry=0;
#ifndef LA_TENSOR_ZERO_OFFSET
        shape[0].offset=0; shape[1].offset=0;
#endif
	shape[0].range=x.ncols();
	shape[1].range=x.nrows();
	}
calcsize();
}


template<typename T>
Tensor<T>::Tensor(const NRSMat<T> &x)
: data(NRVec<T>(x))
{
myrank=2;
shape.resize(1);
shape[0].number=2;
shape[0].symmetry=1;
#ifndef LA_TENSOR_ZERO_OFFSET
        shape[0].offset=0;
#endif
shape[0].range=x.nrows();
calcsize();
}


template<typename T>
void loopingroups(Tensor<T> &t, int ngroup, int igroup, T **p, SUPERINDEX &I, void (*callback)(const SUPERINDEX &, T *))
{
LA_index istart,iend;
const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&t.shape))[ngroup];
switch(sh->symmetry)
	{
	case 0:
		istart= sh->offset;
		iend=   sh->offset+sh->range-1;
		break;
	case 1:
		istart= sh->offset;
		if(igroup==sh->number-1) iend=   sh->offset+sh->range-1;
		else iend = I[ngroup][igroup+1];
		break;
	case -1:
		istart= sh->offset + igroup;
		if(igroup==sh->number-1) iend=   sh->offset+sh->range-1;
                else iend = I[ngroup][igroup+1]-1;
		break;
	}

for(LA_index i = istart; i<=iend; ++i)
	{
	I[ngroup][igroup]=i;
	if(ngroup==0 && igroup==0)
		{
		int sign;
		//std::cout <<"TEST  "<<t.index(&sign,I)<<" ";
		(*callback)(I,(*p)++);
		}
	else
		{
		int newigroup= igroup-1;
		int newngroup=ngroup;
		if(newigroup<0)
			{
			--newngroup;
			const indexgroup *sh2 = &(* const_cast<const NRVec<indexgroup> *>(&t.shape))[newngroup];
			newigroup=sh2->number-1;
			}
		loopingroups(t,newngroup,newigroup,p,I,callback);
		}
	}
}


template<typename T>
void Tensor<T>::loopover(void (*callback)(const SUPERINDEX &, T *))
{
SUPERINDEX I(shape.size());
for(int i=0; i<I.size(); ++i)
	{
	const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[i];
	I[i].resize(sh->number);
	I[i] = sh->offset;
	}
T *pp=&data[0];
int ss=shape.size()-1;
const indexgroup *sh = &(* const_cast<const NRVec<indexgroup> *>(&shape))[ss];
loopingroups(*this,ss,sh->number-1,&pp,I,callback);
}


static std::ostream *sout;
template<typename T>
static void outputcallback(const SUPERINDEX &I, T *v)
{
//print indices flat
for(int i=0; i<I.size(); ++i)
	for(int j=0; j<I[i].size(); ++j) *sout << I[i][j]<<" ";
*sout<<" "<< *v<<std::endl;
}


std::ostream & operator<<(std::ostream &s, const INDEXGROUP &x)
{
s<<x.number <<" "<<x.symmetry<<" ";
#ifndef LA_TENSOR_ZERO_OFFSET
s<<x.offset<<" ";
#endif
s<< x.range<<std::endl;
return s;
}

std::istream & operator>>(std::istream &s, INDEXGROUP &x)
{
s>>x.number>>x.symmetry;
#ifndef LA_TENSOR_ZERO_OFFSET
s>>x.offset;
#endif
s>>x.range;
return s;
}


template<typename T>
std::ostream & operator<<(std::ostream &s, const Tensor<T> &x)
{
s<<x.shape;
sout= &s;
const_cast<Tensor<T> *>(&x)->loopover(&outputcallback<T>);
return s;
}

template <typename T>
std::istream & operator>>(std::istream &s, Tensor<T> &x)
{
s>>x.shape;
x.data.resize(x.calcsize()); x.calcrank();
FLATINDEX I(x.rank());
for(LA_largeindex i=0; i<x.data.size(); ++i)
	{
	for(int j=0; j<I.size(); ++j) s>>I[j];
	T val; s>>val;
	x.lhs(I) = val;
	}
return s;
}


template<typename T>
void loopovergroups(Tensor<T> &t, int ngroup, T **p, GROUPINDEX &I, void (*callback)(const GROUPINDEX &, T *))
{
for(LA_largeindex i = 0; i<t.groupsizes[ngroup]; ++i)
	{
	I[ngroup]=i;
	if(ngroup==0) (*callback)(I,(*p)++);
	else loopovergroups(t,ngroup-1,p,I,callback);
	}
}

template<typename T>
void Tensor<T>::grouploopover(void (*callback)(const GROUPINDEX &, T *))
{
GROUPINDEX I(shape.size());
T *pp=&data[0];
loopovergroups(*this,shape.size()-1,&pp,I,callback);
}


const NRPerm<int> *help_p;
template<typename T>
Tensor<T> *help_t;
template<typename T>
const Tensor<T> *help_tt;

template<typename T>
static void permutecallback(const GROUPINDEX &I, T *v)
{
LA_largeindex target=0;
for(int i=0; i< help_t<T>->shape.size(); ++i)
	{
	target += I[(*help_p)[i+1]-1] * help_t<T>->cumsizes[i];
	}
help_t<T>->data[target] = *v;
}


template<typename T>
Tensor<T> Tensor<T>::permute_index_groups(const NRPerm<int> &p) const
{
NRVec<indexgroup> newshape=shape.permuted(p);
Tensor<T> r(newshape);

//prepare statics for the callback
help_p = &p;
help_t<T> = &r;

//now rearrange the data
const_cast<Tensor<T> *>(this)->grouploopover(permutecallback<T>);
return r;
}


FLATINDEX superindex2flat(const SUPERINDEX &I)
{
int rank=0;
for(int i=0; i<I.size(); ++i) rank += I[i].size();
FLATINDEX J(rank);
int ii=0;
for(int i=0; i<I.size(); ++i)
	{
	for(int j=0; j<I[i].size(); ++j) J[ii++] = I[i][j];
	}
return J;
}

int flatposition(const INDEX &i, const NRVec<indexgroup> &shape)
{
int ii=0;
for(int g=0; g<i.group; ++g) ii+= shape[g].number;
ii += i.index;
return ii;
}

int flatposition(int group, int index, const NRVec<indexgroup> &shape)
{
int ii=0;
for(int g=0; g<group; ++g) ii+= shape[g].number;
ii += index;
return ii;
}


template<typename T>
static void unwind_callback(const SUPERINDEX &I, T *v)
{
FLATINDEX J = superindex2flat(I);
FLATINDEX JP = J.permuted(*help_p,true);
//std::cout <<"TEST unwind_callback: from "<<JP<<" TO "<<J<<std::endl;
*v = (*help_tt<T>)(JP); //rhs operator() generates the redundant elements for the unwinded lhs tensor
}


template<typename T>
Tensor<T> Tensor<T>::unwind_index(int group, int index) const
{
if(group<0||group>=shape.size()) laerror("wrong group number in unwind_index");
if(index<0||index>=shape[group].number)  laerror("wrong index number in unwind_index");
if(shape[group].number==1) //single index in the group
	{
	if(group==0) return *this; //is already the least significant group
	NRPerm<int> p(shape.size());
	p[1]= 1+group;
	int ii=1;
	if(ii==1+group) ii++; //skip this
	for(int i=2; i<=shape.size(); ++i)
		{
		p[i]=ii++;
		if(ii==1+group) ii++; //skip this
		}
	if(!p.is_valid()) laerror("internal error in unwind_index");
	return permute_index_groups(p);
	}

//general case - recalculate the shape and allocate the new tensor
NRVec<indexgroup> newshape(shape.size()+1);
newshape[0].number=1;
newshape[0].symmetry=0;
newshape[0].range=shape[group].range;
#ifndef LA_TENSOR_ZERO_OFFSET
newshape[0].offset = shape[group].offset;
#endif
int flatindex=0; //(group,index) in flat form
for(int i=0; i<shape.size(); ++i)
	{
	newshape[i+1] = shape[i];
	if(i==group)
		{
		--newshape[i+1].number;
		flatindex += index;
		}
	else flatindex += shape[i].number;
	}

Tensor<T> r(newshape);
if(r.rank()!=rank()) laerror("internal error 2 in unwind_index");

//compute the corresponding permutation of FLATINDEX for use in the callback
NRPerm<int> indexperm(rank());
indexperm[1]=flatindex+1;
int ii=1;
if(ii==flatindex+1) ii++;
for(int i=2; i<=rank(); ++i)
	{
	indexperm[i] = ii++;
	if(ii==flatindex+1) ii++; //skip this
	}
if(!indexperm.is_valid())
	{
	std::cout << "indexperm = "<<indexperm<<std::endl;
	laerror("internal error 3 in unwind_index");
	}

//loop recursively and do the unwinding
help_tt<T> = this;
help_p = &indexperm;
r.loopover(unwind_callback);
return r;
}


template<typename T>
Tensor<T> Tensor<T>::unwind_indices(const INDEXLIST &il) const
{
if(il.size()==0) return *this;
if(il.size()==1) return unwind_index(il[0].group,il[0].index);

for(int i=0; i<il.size(); ++i)
	{
	if(il[i].group<0||il[i].group>=shape.size()) laerror("wrong group number in unwind_indices");
	if(il[i].index<0||il[i].index>=shape[il[i].group].number)  laerror("wrong index number in unwind_indices");
	}

//all indices are solo in their groups - permute groups
bool sologroups=true;
int nonsolo=0;
for(int i=0; i<il.size(); ++i)
	if(shape[il[i].group].number!=1) {sologroups=false; ++nonsolo;}
if(sologroups)
	{
	NRPerm<int> p(shape.size());
	bitvector waslisted(shape.size());
	waslisted.clear();
	for(int i=0; i<il.size(); ++i)
		{
		p[1+i] = 1+il[i].group;
		waslisted.set(il[i].group);
		}
	int ii=il.size();
	for(int i=0; i<shape.size(); ++i)
		{
		if(!waslisted[i])
			{
			waslisted.set(i);
			p[1+ii] = 1+i;
			ii++;
			}
		}

	if(!p.is_valid()) laerror("internal error in unwind_indices");
	if(p.is_identity()) return *this;
	else return permute_index_groups(p);
	}


//general case - recalculate the shape and allocate the new tensor
NRVec<indexgroup> oldshape(shape);
oldshape.copyonwrite();
NRVec<indexgroup> newshape(shape.size()+nonsolo);

//first the unwound indices as solo groups
for(int i=0; i<il.size(); ++i)
	{
	newshape[i].number=1;
	newshape[i].symmetry=0;
	newshape[i].range=shape[il[i].group].range;
#ifndef LA_TENSOR_ZERO_OFFSET
	newshape[i].offset = shape[il[i].group].offset;
#endif
	oldshape[il[i].group].number --;
	}

//then the remaining groups with one index removed, if nonempty
int ii=il.size();
int emptied_groups=0;
for(int i=0; i<oldshape.size(); ++i)
	if(oldshape[i].number>0)
		{
		newshape[ii++] = oldshape[i];
		}
	else
		++emptied_groups;

if(emptied_groups) newshape.resize(newshape.size()-emptied_groups,true);

Tensor<T> r(newshape);
if(r.rank()!=rank()) laerror("internal error 2 in unwind_indces");

//compute the corresponding permutation of FLATINDEX for use in the callback
NRPerm<int> indexperm(rank());
bitvector waslisted(rank());
waslisted.clear();
//first unwound indices
ii=0;
for(int i=0; i<il.size(); ++i)
	{
	int pos= flatposition(il[i],shape);
	indexperm[1+ii] = 1+pos;
	waslisted.set(pos);
	++ii;
	}

//the remaining index groups
for(int g=0; g<shape.size(); ++g)
	for(int i=0; i<shape[g].number; ++i)
		{
		int pos= flatposition(g,i,shape);
		if(!waslisted[pos])
			{
			waslisted.set(pos);
			indexperm[1+ii] = 1+pos;
			++ii;
			}
		}

if(!indexperm.is_valid())
	{
	std::cout << "indexperm = "<<indexperm<<std::endl;
	laerror("internal error 3 in unwind_indices");
	}

//loop recursively and do the unwinding
help_tt<T> = this;
help_p = &indexperm;
r.loopover(unwind_callback);
return r;
}

template<typename T>
static void auxmatmult(int nn, int mm, int kk, T *r, T *a, T *b,  T alpha=1, T beta=0, bool conjugate=false) //R(nn,mm) = A(nn,kk) * B^T(mm,kk)
{
for(int i=0; i<nn; ++i) for(int j=0; j<mm; ++j)
	{
	if(beta==0)  r[i*mm+j]=0; else r[i*mm+j] *= beta;
	for(int k=0; k<kk; ++k) r[i*mm+j] += alpha * a[i*kk+k] * conjugate? LA_traits<T>::conjugate(b[j*kk+k]) : b[j*kk+k];
	}
}


template<>
void auxmatmult<double>(int nn, int mm, int kk, double *r, double *a, double *b, double alpha, double beta, bool conjugate)
{
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nn, mm, kk, alpha, a, kk, b, kk, beta, r, mm);
}

template<>
void auxmatmult<std::complex<double> >(int nn, int mm, int kk, std::complex<double> *r, std::complex<double> *a, std::complex<double> *b,  std::complex<double> alpha,  std::complex<double> beta, bool conjugate)
{
cblas_zgemm(CblasRowMajor, CblasNoTrans, (conjugate?CblasConjTrans:CblasTrans), nn, mm, kk, &alpha, a, kk, b, kk, &beta, r, mm);
}


//Conntraction could be implemented without the temporary storage for unwinding, but then we would need
//double recursion over indices of both tensors. Hopefully using the matrix multiplication here
//makes it also more efficient, even for (anti)symmetric indices
//The index unwinding is unfortunately a big burden, and in principle could be eliminated in case of non-symmetric indices
//
template<typename T>
void Tensor<T>::addcontraction(const Tensor &rhs1, int group, int index, const Tensor &rhs, int rhsgroup, int rhsindex, T alpha, T beta, bool doresize, bool conjugate1, bool conjugate)
{
if(group<0||group>=rhs1.shape.size()) laerror("wrong group number in contraction");
if(rhsgroup<0||rhsgroup>=rhs.shape.size()) laerror("wrong rhsgroup number in contraction");
if(index<0||index>=rhs1.shape[group].number)  laerror("wrong index number in conntraction");
if(rhsindex<0||rhsindex>=rhs.shape[rhsgroup].number)  laerror("wrong index number in conntraction");
if(rhs1.shape[group].offset != rhs.shape[rhsgroup].offset) laerror("incompatible index offset in contraction");
if(rhs1.shape[group].range != rhs.shape[rhsgroup].range) laerror("incompatible index range in contraction");

Tensor<T> u = rhs1.unwind_index(group,index);
if(conjugate1) u.conjugateme();
Tensor<T> rhsu = rhs.unwind_index(rhsgroup,rhsindex);


NRVec<indexgroup> newshape(u.shape.size()+rhsu.shape.size()-2);
int ii=0;
for(int i=1; i<rhsu.shape.size(); ++i) newshape[ii++] = rhsu.shape[i];
for(int i=1; i<u.shape.size(); ++i) newshape[ii++] = u.shape[i]; //this tensor will have more significant indices than the rhs one

if(doresize)
	{
	if(beta!= (T)0) laerror("resize in addcontraction requires beta=0");
	resize(newshape);
	}
else
	{
	if(shape!=newshape) laerror("tensor shape mismatch in addcontraction");
	}
int nn,mm,kk;
kk=u.groupsizes[0];
if(kk!=rhsu.groupsizes[0]) laerror("internal error in addcontraction");
nn=1; for(int i=1; i<u.shape.size(); ++i) nn*= u.groupsizes[i];
mm=1; for(int i=1; i<rhsu.shape.size(); ++i) mm*= rhsu.groupsizes[i];
data.copyonwrite();
auxmatmult<T>(nn,mm,kk,&data[0],&u.data[0], &rhsu.data[0],alpha,beta,conjugate);
}


template<typename T>
void Tensor<T>::addcontractions(const Tensor &rhs1, const INDEXLIST &il1, const Tensor &rhs2, const INDEXLIST &il2,  T alpha, T beta, bool doresize, bool conjugate1, bool conjugate2)
{
if(il1.size()==0) laerror("empty contraction - outer product not implemented");
if(il1.size()!=il2.size()) laerror("mismatch in index lists in addcontractions");
for(int i=0; i<il1.size(); ++i)
	{
	if(il1[i].group<0||il1[i].group>=rhs1.shape.size()) laerror("wrong group1 number in contractions");
	if(il2[i].group<0||il2[i].group>=rhs2.shape.size()) laerror("wrong group2 number in contractions");
	if(il1[i].index<0||il1[i].index>=rhs1.shape[il1[i].group].number)  laerror("wrong index1 number in conntractions");
	if(il2[i].index<0||il2[i].index>=rhs2.shape[il2[i].group].number)  laerror("wrong index2 number in conntractions");
	if(rhs1.shape[il1[i].group].offset != rhs2.shape[il2[i].group].offset) laerror("incompatible index offset in contractions");
	if(rhs1.shape[il1[i].group].range  != rhs2.shape[il2[i].group].range) laerror("incompatible index range in contractions");
	}

Tensor<T> u = rhs1.unwind_indices(il1);
if(conjugate1) u.conjugateme();
Tensor<T> rhsu = rhs2.unwind_indices(il2);


NRVec<indexgroup> newshape(u.shape.size()+rhsu.shape.size()-2*il1.size());
int ii=0;
for(int i=il1.size(); i<rhsu.shape.size(); ++i) newshape[ii++] = rhsu.shape[i];
for(int i=il1.size(); i<u.shape.size(); ++i) newshape[ii++] = u.shape[i]; //this tensor will have more significant indices than the rhs one

if(doresize)
	{
	if(beta!= (T)0) laerror("resize in addcontractions requires beta=0");
	resize(newshape);
	}
else
	{
	if(shape!=newshape) laerror("tensor shape mismatch in addcontraction");
	}
int nn,mm,kk;
kk=1;
int kk2=1;
for(int i=0; i<il1.size(); ++i)
	{
	kk *= u.groupsizes[i];
	kk2 *= rhsu.groupsizes[i];
	}
if(kk!=kk2) laerror("internal error in addcontractions");

nn=1; for(int i=il1.size(); i<u.shape.size(); ++i) nn*= u.groupsizes[i];
mm=1; for(int i=il1.size(); i<rhsu.shape.size(); ++i) mm*= rhsu.groupsizes[i];
data.copyonwrite();
auxmatmult<T>(nn,mm,kk,&data[0],&u.data[0], &rhsu.data[0],alpha,beta,conjugate2);
}


template<typename T>
static const PermutationAlgebra<int,T> *help_pa;

static bool help_inverse;

template<typename T>
static T help_alpha;

template<typename T>
static void permutationalgebra_callback(const SUPERINDEX &I, T *v)
{
FLATINDEX J = superindex2flat(I);
for(int p=0; p<help_pa<T>->size(); ++p)
	{
	FLATINDEX Jp = J.permuted((*help_pa<T>)[p].perm,help_inverse);
	*v += help_alpha<T> * (*help_pa<T>)[p].weight * (*help_t<T>)(Jp);
	}
}


template<typename T>
void Tensor<T>::apply_permutation_algebra(const Tensor<T> &rhs, const PermutationAlgebra<int,T> &pa, bool inverse, T alpha, T beta)
{
if(beta!=(T)0) {if(beta!=(T)1) *this *= beta;} else clear();
if(alpha==(T)0) return;
copyonwrite();

help_t<T> = const_cast<Tensor<T> *>(&rhs);
help_pa<T> = &pa;
help_inverse = inverse;
help_alpha<T> = alpha;
loopover(permutationalgebra_callback);
}


template<typename T>
void Tensor<T>::split_index_group(int group)
{
if(group<0||group >= shape.size()) laerror("illegal index group number");
if(shape[group].number==1) return; //nothing to split
if(shape[group].symmetry!=0) laerror("only non-symmetric index group can be splitted");

NRVec<indexgroup> newshape(shape.size()+shape[group].number-1);
int gg=0;
for(int g=0; g<shape.size(); ++g)
	{
	if(g==group)
		{
		for(int i=0; i<shape[group].number; ++i)
			{
			newshape[gg] = shape[group];
			newshape[gg].number = 1;
			gg++;
			}
		}
	else
		newshape[gg++] = shape[g];
	}

shape=newshape;
LA_largeindex newsize = calcsize(); //recalculate auxiliary arrays
if(data.size()!=newsize) laerror("internal error in split_index_group");
}


template<typename T>
void Tensor<T>:: merge_adjacent_index_groups(int groupfrom, int groupto)
{
if(groupfrom<0||groupfrom>= shape.size()) laerror("illegal index group number");
if(groupto<0||groupto>= shape.size()) laerror("illegal index group number");
if(groupfrom==groupto) return;
if(groupfrom>groupto) {int t=groupfrom; groupfrom=groupto; groupto=t;}

int newnumber=0;
for(int g=groupfrom; g<=groupto; ++g)
	{
	if(shape[g].symmetry!=0) laerror("only non-symmetric index groups can be merged");
	if(shape[g].offset!=shape[groupfrom].offset) laerror("incompatible offset in merge_adjacent_index_groups");
	if(shape[g].range!=shape[groupfrom].range) laerror("incompatible range in merge_adjacent_index_groups");
	newnumber += shape[g].number;
	}

NRVec<indexgroup> newshape(shape.size()-(groupto-groupfrom+1)+1);
for(int g=0; g<=groupfrom; ++g) newshape[g]=shape[g];
newshape[groupfrom].number=newnumber;
for(int g=groupfrom+1; g<newshape.size(); ++g) newshape[g]=shape[g+groupto-groupfrom];

shape=newshape;
LA_largeindex newsize = calcsize(); //recalculate auxiliary arrays
if(data.size()!=newsize) laerror("internal error in merge_adjacent_index_groups");
}


template<typename T>
Tensor<T> Tensor<T>::merge_index_groups(const NRVec<int> &groups) const
{
if(groups.size()<=1) return *this;
NRPerm<int> p(shape.size());
int gg=0;
bitvector selected(shape.size());
selected.clear();
for(int g=0; g<groups.size(); ++g)
	{
	if(groups[g]<0||groups[g]>=shape.size()) laerror("illegal group number in merge_index_groups");
	if(selected[g]) laerror("repeated group number in merge_index_groups");
	selected.set(g);
	p[gg++] = 1+groups[g];
	}
for(int g=0; g<shape.size(); ++g)
	if(!selected[g])
		p[gg++] = 1+g;
if(gg!=shape.size() || !p.is_valid()) laerror("internal error in merge_index_groups");

Tensor<T> r =  permute_index_groups(p);
r.merge_adjacent_index_groups(0,groups.size()-1);
return r;
}


template class Tensor<double>;
template class Tensor<std::complex<double> >;
template std::ostream & operator<<(std::ostream &s, const Tensor<double> &x);
template std::ostream & operator<<(std::ostream &s, const Tensor<std::complex<double> > &x);
template std::istream & operator>>(std::istream &s, Tensor<double> &x);
template std::istream & operator>>(std::istream &s, Tensor<std::complex<double> > &x);


}//namespace