2008-02-26 14:55:23 +01:00
/*
LA : linear algebra C + + interface library
Copyright ( C ) 2008 Jiri Pittner < jiri . pittner @ jh - inst . cas . cz > or < jiri @ pittnerovi . com >
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
2005-02-06 15:01:27 +01:00
# ifndef _MATEXP_H_
# define _MATEXP_H_
2004-03-17 04:07:21 +01:00
//general routine for polynomial of a matrix, tuned to minimize the number
//of matrix-matrix multiplications on cost of additions and memory
// the polynom and exp routines will work on any type, for which traits class
// is defined containing definition of an element type, norm and axpy operation
# include "la_traits.h"
2006-09-11 23:33:24 +02:00
# include "laerror.h"
# ifndef NONCBLAS
extern " C " {
# include "cblas.h"
# include "clapack.h"
}
# else
# include "noncblas.h"
# endif
2004-03-17 04:07:21 +01:00
template < class T , class R >
2006-09-04 22:12:34 +02:00
const T polynom0 ( const T & x , const NRVec < R > & c )
2004-03-17 04:07:21 +01:00
{
int order = c . size ( ) - 1 ;
T z , y ;
//trivial reference implementation by horner scheme
if ( order = = 0 ) { y = x ; y = c [ 0 ] ; } //to avoid the problem: we do not know the size of the matrix to contruct a scalar one
else
{
int i ;
z = x * c [ order ] ;
for ( i = order - 1 ; i > = 0 ; i - - )
{
if ( i < order - 1 ) z = y * x ;
y = z + c [ i ] ;
}
}
return y ;
}
2006-09-04 22:12:34 +02:00
//algorithm which minimazes number of multiplications, at the cost of storage
2004-03-17 04:07:21 +01:00
template < class T , class R >
const T polynom ( const T & x , const NRVec < R > & c )
{
int n = c . size ( ) - 1 ;
int i , j , k , m = 0 , t ;
2006-09-04 22:12:34 +02:00
if ( n < = 4 ) return polynom0 ( x , c ) ; //here the horner scheme is optimal
2004-03-17 04:07:21 +01:00
//first find m which minimizes the number of multiplications
j = 10 * n ;
for ( i = 2 ; i < = n + 1 ; i + + )
{
t = i - 2 + 2 * ( n / i ) - ( n % i ) ? 0 : 1 ;
if ( t < j )
{
j = t ;
m = i ;
}
}
2006-09-04 22:12:34 +02:00
2004-03-17 04:07:21 +01:00
//allocate array for powers up to m
T * xpows = new T [ m ] ;
xpows [ 0 ] = x ;
for ( i = 1 ; i < m ; i + + ) xpows [ i ] = xpows [ i - 1 ] * x ;
//run the summation loop
T r , s , f ;
k = - 1 ;
for ( i = 0 ; i < = n / m ; i + + )
{
for ( j = 0 ; j < m ; j + + )
{
k + + ;
if ( k > n ) break ;
2006-09-04 22:12:34 +02:00
if ( j = = 0 ) {
if ( i = = 0 ) s = x ; /*just to get the dimensions of the matrix*/
s = c [ k ] ; /*create diagonal matrix*/
}
2004-03-17 04:07:21 +01:00
else
2005-02-14 01:10:07 +01:00
LA_traits < T > : : axpy ( s , xpows [ j - 1 ] , c [ k ] ) ; //general s+=xpows[j-1]*c[k]; but more efficient for matrices
2004-03-17 04:07:21 +01:00
}
if ( i = = 0 ) { r = s ; f = xpows [ m - 1 ] ; }
else
{
r + = s * f ;
f = f * xpows [ m - 1 ] ;
}
}
delete [ ] xpows ;
return r ;
}
//for general objects
template < class T >
const T ncommutator ( const T & x , const T & y , int nest = 1 , const bool right = 1 )
{
T z ;
if ( right ) { z = x ; while ( - - nest > = 0 ) z = z * y - y * z ; }
else { z = y ; while ( - - nest > = 0 ) z = x * z - z * x ; }
return z ;
}
template < class T >
const T nanticommutator ( const T & x , const T & y , int nest = 1 , const bool right = 1 )
{
T z ;
if ( right ) { z = x ; while ( - - nest > = 0 ) z = z * y + y * z ; }
else { z = y ; while ( - - nest > = 0 ) z = x * z + z * x ; }
return z ;
}
//general BCH expansion (can be written more efficiently in a specialization for matrices)
template < class T >
2006-09-10 22:06:44 +02:00
const T BCHexpansion ( const T & h , const T & t , const int n , const bool verbose = 0 ) \
2004-03-17 04:07:21 +01:00
{
T result = h ;
double factor = 1. ;
T z = h ;
for ( int i = 1 ; i < = n ; + + i )
{
factor / = i ;
z = z * t - t * z ;
2006-09-10 22:06:44 +02:00
if ( verbose ) cerr < < " BCH contribution at order " < < i < < " : " < < z . norm ( ) * factor < < endl ;
2004-03-17 04:07:21 +01:00
result + = z * factor ;
}
return result ;
}
template < class T >
const T ipow ( const T & x , int i )
{
if ( i < 0 ) laerror ( " negative exponent in ipow " ) ;
2005-02-14 01:10:07 +01:00
if ( i = = 0 ) { T r = x ; r = ( typename LA_traits < T > : : elementtype ) 1 ; return r ; } //trick for matrix dimension
2004-03-17 04:07:21 +01:00
if ( i = = 1 ) return x ;
T y , z ;
z = x ;
while ( ! ( i & 1 ) )
{
z = z * z ;
i > > = 1 ;
}
y = z ;
while ( ( i > > = 1 ) /*!=0*/ )
{
z = z * z ;
if ( i & 1 ) y = y * z ;
}
return y ;
}
inline int nextpow2 ( const double n )
{
const double log2 = log ( 2. ) ;
if ( n < = .75 ) return 0 ; //try to keep the taylor expansion short
if ( n < = 1. ) return 1 ;
return int ( ceil ( log ( n ) / log2 - log ( .75 ) ) ) ;
}
2006-09-11 18:21:46 +02:00
template < class T , class C >
2006-09-12 01:34:41 +02:00
NRVec < C > exp_aux ( const T & x , int & power , int maxpower = - 1 , int maxtaylor = - 1 )
2004-03-17 04:07:21 +01:00
{
//should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc
static double exptaylor [ ] = {
1. ,
1. ,
0.5 ,
0.1666666666666666666666 ,
0.0416666666666666666666 ,
0.0083333333333333333333 ,
0.0013888888888888888888 ,
0.00019841269841269841253 ,
2.4801587301587301566e-05 ,
2.7557319223985892511e-06 ,
2.7557319223985888276e-07 ,
2.5052108385441720224e-08 ,
2.0876756987868100187e-09 ,
1.6059043836821613341e-10 ,
1.1470745597729724507e-11 ,
7.6471637318198164055e-13 ,
4.7794773323873852534e-14 ,
2.8114572543455205981e-15 ,
1.5619206968586225271e-16 ,
8.2206352466243294955e-18 ,
4.1103176233121648441e-19 ,
0. } ;
2006-09-11 18:21:46 +02:00
double mnorm = x . norm ( ) ;
2006-09-10 22:06:44 +02:00
power = nextpow2 ( mnorm ) ;
2006-09-12 01:34:41 +02:00
if ( maxpower > = 0 & & power > maxpower ) power = maxpower ;
2004-03-17 04:07:21 +01:00
double scale = exp ( - log ( 2. ) * power ) ;
//find how long taylor expansion will be necessary
2006-09-10 22:06:44 +02:00
const double precision = 1e-14 ; //decreasing brings nothing
2004-03-17 04:07:21 +01:00
double s , t ;
s = mnorm * scale ;
int n = 0 ;
t = 1. ;
do {
n + + ;
t * = s ;
}
while ( t * exptaylor [ n ] > precision ) ; //taylor 0 will terminate in any case
2006-09-12 01:34:41 +02:00
if ( maxtaylor > = 0 & & n > maxtaylor ) n = maxtaylor ; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons
2006-09-04 22:12:34 +02:00
2006-09-10 22:06:44 +02:00
2004-03-17 04:07:21 +01:00
int i ; //adjust the coefficients in order to avoid scaling the argument
2006-09-11 18:21:46 +02:00
NRVec < C > taylor2 ( n + 1 ) ;
2004-03-17 04:07:21 +01:00
for ( i = 0 , t = 1. ; i < = n ; i + + )
{
taylor2 [ i ] = exptaylor [ i ] * t ;
t * = scale ;
}
return taylor2 ;
}
2006-09-10 22:06:44 +02:00
//it seems that we do not gain anything by polynom vs polynom0, check the m-optimization!
2004-03-17 04:07:21 +01:00
template < class T >
2006-09-12 01:34:41 +02:00
const T exp ( const T & x , bool horner = true , int maxpower = - 1 , int maxtaylor = - 1 )
2004-03-17 04:07:21 +01:00
{
int power ;
//prepare the polynom of and effectively scale T
2006-09-12 01:34:41 +02:00
NRVec < typename LA_traits < T > : : elementtype > taylor2 = exp_aux < T , typename LA_traits < T > : : elementtype > ( x , power , maxpower , maxtaylor ) ;
2004-03-17 04:07:21 +01:00
2006-09-04 22:12:34 +02:00
2006-09-10 22:06:44 +02:00
T r = horner ? polynom0 ( x , taylor2 ) : polynom ( x , taylor2 ) ;
2006-09-04 22:12:34 +02:00
//for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices
2004-03-17 04:07:21 +01:00
//power the result back
for ( int i = 0 ; i < power ; i + + ) r = r * r ;
return r ;
}
2006-09-11 22:30:33 +02:00
//this simple implementation seems not to be numerically stable enough
//and probably not efficient either
2006-09-12 01:07:22 +02:00
2004-03-17 04:07:21 +01:00
template < class M , class V >
2006-09-19 18:09:26 +02:00
void exptimesdestructive ( const M & mat , V & result , V & rhs , bool transpose = false , const double scale = 1. , int maxpower = - 1 , int maxtaylor = - 1 , bool mat_is_0 = false ) //uses just matrix vector multiplication
2004-03-17 04:07:21 +01:00
{
2007-11-29 14:52:31 +01:00
if ( mat_is_0 ) { result = rhs ; LA_traits < V > : : copyonwrite ( result ) ; return ; } //prevent returning a shallow copy of rhs
2006-09-12 01:07:22 +02:00
if ( mat . nrows ( ) ! = mat . ncols ( ) | | ( unsigned int ) mat . nrows ( ) ! = ( unsigned int ) rhs . size ( ) ) laerror ( " inappropriate sizes in exptimes " ) ;
2004-03-17 04:07:21 +01:00
int power ;
//prepare the polynom of and effectively scale the matrix
2006-09-12 01:34:41 +02:00
NRVec < typename LA_traits < V > : : elementtype > taylor2 = exp_aux < M , typename LA_traits < V > : : elementtype > ( mat , power , maxpower , maxtaylor ) ;
2006-09-12 01:07:22 +02:00
V tmp ;
2004-03-17 04:07:21 +01:00
for ( int i = 1 ; i < = ( 1 < < power ) ; + + i ) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
{
2006-09-12 01:07:22 +02:00
if ( i > 1 ) rhs = result ; //apply again to the result of previous application
else result = rhs ;
tmp = rhs ; //now rhs can be used as scratch
result * = taylor2 [ 0 ] ;
2004-03-17 04:07:21 +01:00
for ( int j = 1 ; j < taylor2 . size ( ) ; + + j )
{
2006-09-12 01:07:22 +02:00
mat . gemv ( 0. , rhs , transpose ? ' t ' : ' n ' , scale , tmp ) ;
tmp = rhs ;
result . axpy ( taylor2 [ j ] , tmp ) ;
2004-03-17 04:07:21 +01:00
}
}
2006-09-12 01:07:22 +02:00
return ;
2004-03-17 04:07:21 +01:00
}
2005-02-06 15:01:27 +01:00
2006-09-11 23:33:24 +02:00
2006-09-12 01:07:22 +02:00
template < class M , class V >
2006-09-19 18:09:26 +02:00
const V exptimes ( const M & mat , V rhs , bool transpose = false , const double scale = 1. , int maxpower = - 1 , int maxtaylor = - 1 , bool mat_is_0 = false )
2006-09-12 01:07:22 +02:00
{
V result ;
2006-09-19 18:09:26 +02:00
exptimesdestructive ( mat , result , rhs , transpose , scale , maxpower , maxtaylor , mat_is_0 ) ;
2006-09-12 01:07:22 +02:00
return result ;
}
2006-09-11 23:33:24 +02:00
2006-09-11 17:49:34 +02:00
//@@@ power series matrix logarithm?
2005-02-06 15:01:27 +01:00
# endif