2008-02-26 14:55:23 +01:00
/*
LA : linear algebra C + + interface library
Copyright ( C ) 2008 Jiri Pittner < jiri . pittner @ jh - inst . cas . cz > or < jiri @ pittnerovi . com >
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
2005-02-06 15:01:27 +01:00
# ifndef _MATEXP_H_
# define _MATEXP_H_
2004-03-17 04:07:21 +01:00
//general routine for polynomial of a matrix, tuned to minimize the number
//of matrix-matrix multiplications on cost of additions and memory
// the polynom and exp routines will work on any type, for which traits class
// is defined containing definition of an element type, norm and axpy operation
# include "la_traits.h"
2006-09-11 23:33:24 +02:00
# include "laerror.h"
2004-03-17 04:07:21 +01:00
template < class T , class R >
2006-09-04 22:12:34 +02:00
const T polynom0 ( const T & x , const NRVec < R > & c )
2004-03-17 04:07:21 +01:00
{
int order = c . size ( ) - 1 ;
T z , y ;
//trivial reference implementation by horner scheme
if ( order = = 0 ) { y = x ; y = c [ 0 ] ; } //to avoid the problem: we do not know the size of the matrix to contruct a scalar one
else
{
int i ;
z = x * c [ order ] ;
for ( i = order - 1 ; i > = 0 ; i - - )
{
if ( i < order - 1 ) z = y * x ;
y = z + c [ i ] ;
}
}
return y ;
}
2009-11-05 11:57:43 +01:00
2006-09-04 22:12:34 +02:00
//algorithm which minimazes number of multiplications, at the cost of storage
2004-03-17 04:07:21 +01:00
template < class T , class R >
const T polynom ( const T & x , const NRVec < R > & c )
{
int n = c . size ( ) - 1 ;
int i , j , k , m = 0 , t ;
2006-09-04 22:12:34 +02:00
if ( n < = 4 ) return polynom0 ( x , c ) ; //here the horner scheme is optimal
2004-03-17 04:07:21 +01:00
//first find m which minimizes the number of multiplications
j = 10 * n ;
for ( i = 2 ; i < = n + 1 ; i + + )
{
t = i - 2 + 2 * ( n / i ) - ( n % i ) ? 0 : 1 ;
if ( t < j )
{
j = t ;
m = i ;
}
}
2006-09-04 22:12:34 +02:00
2004-03-17 04:07:21 +01:00
//allocate array for powers up to m
T * xpows = new T [ m ] ;
xpows [ 0 ] = x ;
for ( i = 1 ; i < m ; i + + ) xpows [ i ] = xpows [ i - 1 ] * x ;
//run the summation loop
T r , s , f ;
k = - 1 ;
for ( i = 0 ; i < = n / m ; i + + )
{
for ( j = 0 ; j < m ; j + + )
{
k + + ;
if ( k > n ) break ;
2006-09-04 22:12:34 +02:00
if ( j = = 0 ) {
if ( i = = 0 ) s = x ; /*just to get the dimensions of the matrix*/
s = c [ k ] ; /*create diagonal matrix*/
}
2004-03-17 04:07:21 +01:00
else
2005-02-14 01:10:07 +01:00
LA_traits < T > : : axpy ( s , xpows [ j - 1 ] , c [ k ] ) ; //general s+=xpows[j-1]*c[k]; but more efficient for matrices
2004-03-17 04:07:21 +01:00
}
if ( i = = 0 ) { r = s ; f = xpows [ m - 1 ] ; }
else
{
r + = s * f ;
f = f * xpows [ m - 1 ] ;
}
}
delete [ ] xpows ;
return r ;
}
//for general objects
template < class T >
const T ncommutator ( const T & x , const T & y , int nest = 1 , const bool right = 1 )
{
T z ;
if ( right ) { z = x ; while ( - - nest > = 0 ) z = z * y - y * z ; }
else { z = y ; while ( - - nest > = 0 ) z = x * z - z * x ; }
return z ;
}
template < class T >
const T nanticommutator ( const T & x , const T & y , int nest = 1 , const bool right = 1 )
{
T z ;
if ( right ) { z = x ; while ( - - nest > = 0 ) z = z * y + y * z ; }
else { z = y ; while ( - - nest > = 0 ) z = x * z + z * x ; }
return z ;
}
//general BCH expansion (can be written more efficiently in a specialization for matrices)
template < class T >
2006-09-10 22:06:44 +02:00
const T BCHexpansion ( const T & h , const T & t , const int n , const bool verbose = 0 ) \
2004-03-17 04:07:21 +01:00
{
T result = h ;
double factor = 1. ;
T z = h ;
for ( int i = 1 ; i < = n ; + + i )
{
factor / = i ;
z = z * t - t * z ;
2006-09-10 22:06:44 +02:00
if ( verbose ) cerr < < " BCH contribution at order " < < i < < " : " < < z . norm ( ) * factor < < endl ;
2004-03-17 04:07:21 +01:00
result + = z * factor ;
}
return result ;
}
template < class T >
const T ipow ( const T & x , int i )
{
if ( i < 0 ) laerror ( " negative exponent in ipow " ) ;
2005-02-14 01:10:07 +01:00
if ( i = = 0 ) { T r = x ; r = ( typename LA_traits < T > : : elementtype ) 1 ; return r ; } //trick for matrix dimension
2004-03-17 04:07:21 +01:00
if ( i = = 1 ) return x ;
T y , z ;
z = x ;
while ( ! ( i & 1 ) )
{
z = z * z ;
i > > = 1 ;
}
y = z ;
while ( ( i > > = 1 ) /*!=0*/ )
{
z = z * z ;
if ( i & 1 ) y = y * z ;
}
return y ;
}
inline int nextpow2 ( const double n )
{
const double log2 = log ( 2. ) ;
if ( n < = .75 ) return 0 ; //try to keep the taylor expansion short
if ( n < = 1. ) return 1 ;
return int ( ceil ( log ( n ) / log2 - log ( .75 ) ) ) ;
}
2009-11-05 11:57:43 +01:00
//should better be computed by mathematica to have accurate last digits, perhaps chebyshev instead, see exp in glibc
//is shared also for sine and cosine now
static const double exptaylor [ ] = {
2004-03-17 04:07:21 +01:00
1. ,
1. ,
0.5 ,
0.1666666666666666666666 ,
0.0416666666666666666666 ,
0.0083333333333333333333 ,
0.0013888888888888888888 ,
0.00019841269841269841253 ,
2.4801587301587301566e-05 ,
2.7557319223985892511e-06 ,
2.7557319223985888276e-07 ,
2.5052108385441720224e-08 ,
2.0876756987868100187e-09 ,
1.6059043836821613341e-10 ,
1.1470745597729724507e-11 ,
7.6471637318198164055e-13 ,
4.7794773323873852534e-14 ,
2.8114572543455205981e-15 ,
1.5619206968586225271e-16 ,
8.2206352466243294955e-18 ,
4.1103176233121648441e-19 ,
2009-11-05 11:57:43 +01:00
1.9572941063391262595e-20 ,
2004-03-17 04:07:21 +01:00
0. } ;
2009-11-05 11:57:43 +01:00
2009-11-05 17:28:00 +01:00
//S is element type of T, but T may be any user-defined
template < class T , class C , class S >
NRVec < C > exp_aux ( const T & x , int & power , int maxpower , int maxtaylor , S prescale )
2009-11-05 11:57:43 +01:00
{
2009-10-08 16:01:15 +02:00
double mnorm = x . norm ( ) * abs ( prescale ) ;
2006-09-10 22:06:44 +02:00
power = nextpow2 ( mnorm ) ;
2006-09-12 01:34:41 +02:00
if ( maxpower > = 0 & & power > maxpower ) power = maxpower ;
2004-03-17 04:07:21 +01:00
double scale = exp ( - log ( 2. ) * power ) ;
//find how long taylor expansion will be necessary
2009-11-05 10:55:44 +01:00
const double precision = 1e-14 ; //further decreasing brings nothing
2004-03-17 04:07:21 +01:00
double s , t ;
s = mnorm * scale ;
int n = 0 ;
t = 1. ;
do {
n + + ;
t * = s ;
}
while ( t * exptaylor [ n ] > precision ) ; //taylor 0 will terminate in any case
2006-09-12 01:34:41 +02:00
if ( maxtaylor > = 0 & & n > maxtaylor ) n = maxtaylor ; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons
2006-09-04 22:12:34 +02:00
2006-09-10 22:06:44 +02:00
2004-03-17 04:07:21 +01:00
int i ; //adjust the coefficients in order to avoid scaling the argument
2006-09-11 18:21:46 +02:00
NRVec < C > taylor2 ( n + 1 ) ;
2004-03-17 04:07:21 +01:00
for ( i = 0 , t = 1. ; i < = n ; i + + )
{
taylor2 [ i ] = exptaylor [ i ] * t ;
t * = scale ;
}
2009-11-05 10:55:44 +01:00
//cout <<"TEST power, scale "<<power<<" "<<scale<<endl;
//cout <<"TEST taylor2 "<<taylor2<<endl;
2004-03-17 04:07:21 +01:00
return taylor2 ;
}
2009-11-05 17:28:00 +01:00
template < class T , class C , class S >
void sincos_aux ( NRVec < C > & si , NRVec < C > & co , const T & x , int & power , int maxpower , int maxtaylor , const S prescale )
2009-11-05 11:57:43 +01:00
{
double mnorm = x . norm ( ) * abs ( prescale ) ;
power = nextpow2 ( mnorm ) ;
if ( maxpower > = 0 & & power > maxpower ) power = maxpower ;
double scale = exp ( - log ( 2. ) * power ) ;
//find how long taylor expansion will be necessary
const double precision = 1e-14 ; //further decreasing brings nothing
double s , t ;
s = mnorm * scale ;
int n = 0 ;
t = 1. ;
do {
n + + ;
t * = s ;
}
while ( t * exptaylor [ n ] > precision ) ; //taylor 0 will terminate in any case
if ( maxtaylor > = 0 & & n > maxtaylor ) n = maxtaylor ; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons
if ( ( n & 1 ) = = 0 ) + + n ; //force it to be odd to have same length in sine and cosine
si . resize ( ( n + 1 ) / 2 ) ;
co . resize ( ( n + 1 ) / 2 ) ;
int i ; //adjust the coefficients in order to avoid scaling the argument
for ( i = 0 , t = 1. ; i < = n ; i + + )
{
if ( i & 1 ) si [ i > > 1 ] = exptaylor [ i ] * ( i & 2 ? - t : t ) ;
else co [ i > > 1 ] = exptaylor [ i ] * ( i & 2 ? - t : t ) ;
t * = scale ;
}
2009-11-05 17:34:26 +01:00
//cout <<"TEST sin "<<si<<endl;
//cout <<"TEST cos "<<co<<endl;
2009-11-05 11:57:43 +01:00
}
2006-09-10 22:06:44 +02:00
//it seems that we do not gain anything by polynom vs polynom0, check the m-optimization!
2004-03-17 04:07:21 +01:00
template < class T >
2006-09-12 01:34:41 +02:00
const T exp ( const T & x , bool horner = true , int maxpower = - 1 , int maxtaylor = - 1 )
2004-03-17 04:07:21 +01:00
{
int power ;
//prepare the polynom of and effectively scale T
2009-11-05 17:28:00 +01:00
NRVec < typename LA_traits < T > : : normtype > taylor2 = exp_aux < T , typename LA_traits < T > : : normtype , double > ( x , power , maxpower , maxtaylor , 1. ) ;
2004-03-17 04:07:21 +01:00
2006-09-04 22:12:34 +02:00
2006-09-10 22:06:44 +02:00
T r = horner ? polynom0 ( x , taylor2 ) : polynom ( x , taylor2 ) ;
2006-09-04 22:12:34 +02:00
//for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices
2004-03-17 04:07:21 +01:00
//power the result back
for ( int i = 0 ; i < power ; i + + ) r = r * r ;
return r ;
}
2009-11-05 11:57:43 +01:00
//make exp(iH) with real H in real arithmetics
template < class T >
void sincos ( T & s , T & c , const T & x , bool horner = true , int maxpower = - 1 , int maxtaylor = - 1 )
{
int power ;
NRVec < typename LA_traits < T > : : normtype > taylors , taylorc ;
2009-11-05 17:28:00 +01:00
sincos_aux < T , typename LA_traits < T > : : normtype > ( taylors , taylorc , x , power , maxpower , maxtaylor , 1. ) ;
2009-11-05 11:57:43 +01:00
//could we save something by computing both polynoms simultaneously?
{
T x2 = x * x ;
s = horner ? polynom0 ( x2 , taylors ) : polynom ( x2 , taylors ) ;
c = horner ? polynom0 ( x2 , taylorc ) : polynom ( x2 , taylorc ) ;
}
s = s * x ;
//power the results back
for ( int i = 0 ; i < power ; i + + )
{
T tmp = c * c - s * s ;
s = s * c ; s * = 2. ;
c = tmp ;
}
}
2004-03-17 04:07:21 +01:00
2006-09-11 22:30:33 +02:00
//this simple implementation seems not to be numerically stable enough
//and probably not efficient either
2006-09-12 01:07:22 +02:00
2009-11-05 17:28:00 +01:00
template < class M , class V , class MEL >
void exptimesdestructive ( const M & mat , V & result , V & rhs , bool transpose , const MEL scale , int maxpower = - 1 , int maxtaylor = - 1 , bool mat_is_0 = false ) //uses just matrix vector multiplication
2004-03-17 04:07:21 +01:00
{
2007-11-29 14:52:31 +01:00
if ( mat_is_0 ) { result = rhs ; LA_traits < V > : : copyonwrite ( result ) ; return ; } //prevent returning a shallow copy of rhs
2006-09-12 01:07:22 +02:00
if ( mat . nrows ( ) ! = mat . ncols ( ) | | ( unsigned int ) mat . nrows ( ) ! = ( unsigned int ) rhs . size ( ) ) laerror ( " inappropriate sizes in exptimes " ) ;
2004-03-17 04:07:21 +01:00
int power ;
//prepare the polynom of and effectively scale the matrix
2009-11-05 10:55:44 +01:00
NRVec < typename LA_traits < V > : : normtype > taylor2 = exp_aux < M , typename LA_traits < V > : : normtype > ( mat , power , maxpower , maxtaylor , scale ) ;
2006-09-12 01:07:22 +02:00
V tmp ;
2004-03-17 04:07:21 +01:00
for ( int i = 1 ; i < = ( 1 < < power ) ; + + i ) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
{
2006-09-12 01:07:22 +02:00
if ( i > 1 ) rhs = result ; //apply again to the result of previous application
else result = rhs ;
tmp = rhs ; //now rhs can be used as scratch
result * = taylor2 [ 0 ] ;
2004-03-17 04:07:21 +01:00
for ( int j = 1 ; j < taylor2 . size ( ) ; + + j )
{
2006-09-12 01:07:22 +02:00
mat . gemv ( 0. , rhs , transpose ? ' t ' : ' n ' , scale , tmp ) ;
tmp = rhs ;
result . axpy ( taylor2 [ j ] , tmp ) ;
2004-03-17 04:07:21 +01:00
}
}
2006-09-12 01:07:22 +02:00
return ;
2004-03-17 04:07:21 +01:00
}
2005-02-06 15:01:27 +01:00
2006-09-11 23:33:24 +02:00
2009-11-05 17:28:00 +01:00
//actually scale should be elementtype of M, but we do not have it since M can be anything user-defined
//and template paramter for it does not work due to optional arguments
2009-11-05 17:58:04 +01:00
//undecent solution: exptimesreal
2009-11-05 17:28:00 +01:00
//
2006-09-12 01:07:22 +02:00
template < class M , class V >
2009-10-08 16:01:15 +02:00
const V exptimes ( const M & mat , V rhs , bool transpose = false , const typename LA_traits < V > : : elementtype scale = 1. , int maxpower = - 1 , int maxtaylor = - 1 , bool mat_is_0 = false )
2006-09-12 01:07:22 +02:00
{
V result ;
2006-09-19 18:09:26 +02:00
exptimesdestructive ( mat , result , rhs , transpose , scale , maxpower , maxtaylor , mat_is_0 ) ;
2006-09-12 01:07:22 +02:00
return result ;
}
2006-09-11 23:33:24 +02:00
2009-11-05 17:58:04 +01:00
template < class M , class V >
const V exptimesreal ( const M & mat , V rhs , bool transpose = false , const typename LA_traits < V > : : normtype scale = 1. , int maxpower = - 1 , int maxtaylor = - 1 , bool mat_is_0 = false )
{
V result ;
exptimesdestructive ( mat , result , rhs , transpose , scale , maxpower , maxtaylor , mat_is_0 ) ;
return result ;
}
2006-09-11 23:33:24 +02:00
2009-11-05 17:28:00 +01:00
template < class M , class V , class S >
void sincostimes_simple ( const M & mat , V & si , V & co , const V & rhs , const NRVec < typename LA_traits < V > : : normtype > & taylors , const NRVec < typename LA_traits < V > : : normtype > & taylorc , bool transpose , const S scale )
2009-11-05 16:58:45 +01:00
{
si = rhs * taylors [ 0 ] ;
co = rhs * taylorc [ 0 ] ;
V tmp = rhs ;
for ( int j = 1 ; j < taylors . size ( ) ; + + j )
{
V tmp2 ( tmp . size ( ) ) ;
//multiply by a square of the matrix
mat . gemv ( 0. , tmp2 , transpose ? ' t ' : ' n ' , scale , tmp ) ;
mat . gemv ( 0. , tmp , transpose ? ' t ' : ' n ' , scale , tmp2 ) ;
si . axpy ( taylors [ j ] , tmp ) ;
co . axpy ( taylorc [ j ] , tmp ) ;
}
mat . gemv ( 0. , tmp , transpose ? ' t ' : ' n ' , scale , si ) ;
si = tmp ;
}
2009-11-05 17:51:16 +01:00
//this recursion is very inefficient, it is better to use complex exptimes!
2009-11-05 17:28:00 +01:00
template < class M , class V , class S >
void sincostimes_aux ( const M & mat , V & si , V & co , const V & rhs , const NRVec < typename LA_traits < V > : : normtype > & taylors , const NRVec < typename LA_traits < V > : : normtype > & taylorc , bool transpose , const S scale , int power )
2009-11-05 16:58:45 +01:00
{
if ( power = = 0 ) sincostimes_simple ( mat , si , co , rhs , taylors , taylorc , transpose , scale ) ;
else
{
V si2 , co2 ; //no large memory allocated yet - size 0
sincostimes_aux ( mat , si2 , co2 , rhs , taylors , taylorc , transpose , scale , power - 1 ) ;
sincostimes_aux ( mat , si , co , co2 , taylors , taylorc , transpose , scale , power - 1 ) ;
V ss , cs ;
sincostimes_aux ( mat , ss , cs , si2 , taylors , taylorc , transpose , scale , power - 1 ) ;
co - = ss ;
si + = cs ;
}
}
2009-11-05 17:51:16 +01:00
//inefficient, it is better to use complex exptimes!
2009-11-05 17:28:00 +01:00
//again scale should actually be elementtype of M which is inaccessible
2009-11-05 16:58:45 +01:00
template < class M , class V >
2009-11-05 17:34:26 +01:00
void sincostimes ( const M & mat , V & si , V & co , const V & rhs , bool transpose = false , const typename LA_traits < V > : : normtype scale = 1. , int maxpower = - 1 , int maxtaylor = - 1 , bool mat_is_0 = false ) //uses just matrix vector multiplication
2009-11-05 16:58:45 +01:00
{
if ( mat_is_0 ) //prevent returning a shallow copy of rhs
{
co = rhs ;
LA_traits < V > : : copyonwrite ( co ) ;
LA_traits < V > : : clearme ( si ) ;
return ;
}
if ( mat . nrows ( ) ! = mat . ncols ( ) | | ( unsigned int ) mat . nrows ( ) ! = ( unsigned int ) rhs . size ( ) ) laerror ( " inappropriate sizes in sincostimes " ) ;
//prepare the polynom of and effectively scale the matrix
int power ;
NRVec < typename LA_traits < V > : : normtype > taylors , taylorc ;
sincos_aux < M , typename LA_traits < V > : : normtype > ( taylors , taylorc , mat , power , maxpower , maxtaylor , scale ) ;
if ( taylors . size ( ) ! = taylorc . size ( ) ) laerror ( " internal error - same size of sin and cos expansions assumed " ) ;
//the actual computation and resursive "squaring"
cout < < " TEST power " < < power < < endl ;
sincostimes_aux ( mat , si , co , rhs , taylors , taylorc , transpose , scale , power ) ;
return ;
}
2006-09-11 23:33:24 +02:00
2006-09-11 17:49:34 +02:00
//@@@ power series matrix logarithm?
2005-02-06 15:01:27 +01:00
# endif