2010-09-08 18:27:58 +02:00
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
/*******************************************************************************
2008-02-26 14:55:23 +01:00
LA : linear algebra C + + interface library
Copyright ( C ) 2008 Jiri Pittner < jiri . pittner @ jh - inst . cas . cz > or < jiri @ pittnerovi . com >
complex versions written by Roman Curik < roman . curik @ jh - inst . cas . cz >
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http : //www.gnu.org/licenses/>.
2010-09-08 18:27:58 +02:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2004-03-17 04:07:21 +01:00
# include <iostream>
2005-02-14 01:10:07 +01:00
# include <stdlib.h>
2010-01-17 21:28:38 +01:00
# include <stdio.h>
2005-02-14 01:10:07 +01:00
# include <sys/types.h>
# include <sys/stat.h>
# include <fcntl.h>
2005-11-20 14:46:00 +01:00
# include <errno.h>
2009-11-12 22:01:19 +01:00
# include "vec.h"
2006-04-01 06:48:01 +02:00
# include "qsort.h"
2013-11-04 15:56:39 +01:00
# include <unistd.h>
2010-09-08 18:27:58 +02:00
2005-02-14 01:10:07 +01:00
2009-11-12 22:01:19 +01:00
namespace LA {
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* conversion constructor interpreting a given matrix with \ f $ N \ f $ rows and
* \ f $ M \ f $ columns of general type < code > T < / code > as a vector of \ f $ N \ times { } M \ f $
* elements
* @ param [ in ] rhs matrix being converted
* @ see NRMat < T > : : NRMat ( )
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2004-03-17 04:07:21 +01:00
# ifndef MATPTR
template < typename T >
2010-09-08 18:27:58 +02:00
NRVec < T > : : NRVec ( const NRMat < T > & rhs ) {
# ifdef CUDALA
location = rhs . location ;
# endif
2004-03-17 04:07:21 +01:00
nn = rhs . nn * rhs . mm ;
v = rhs . v ;
count = rhs . count ;
( * count ) + + ;
}
# endif
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* routine for raw output
* @ param [ in ] fd file descriptor for output
* @ param [ in ] dim number of elements intended for output
* @ param [ in ] transp reserved
* @ see NRMat < T > : : put ( )
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-02-14 01:10:07 +01:00
template < typename T >
2010-09-08 18:27:58 +02:00
void NRVec < T > : : put ( int fd , bool dim , bool transp ) const {
2010-06-25 17:28:19 +02:00
# ifdef CUDALA
2010-09-08 18:27:58 +02:00
if ( location ! = cpu ) {
NRVec < T > tmp = * this ;
tmp . moveto ( cpu ) ;
tmp . put ( fd , dim , transp ) ;
return ;
}
2010-06-25 17:28:19 +02:00
# endif
2010-09-08 18:27:58 +02:00
errno = 0 ;
int pad ( 1 ) ; //align at least 8-byte
if ( dim ) {
if ( sizeof ( int ) ! = write ( fd , & nn , sizeof ( int ) ) ) laerror ( " write failed " ) ;
if ( sizeof ( int ) ! = write ( fd , & pad , sizeof ( int ) ) ) laerror ( " write failed " ) ;
}
LA_traits < T > : : multiput ( nn , fd , v , dim ) ;
2005-02-14 01:10:07 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* routine for raw input
* @ param [ in ] fd file descriptor for input
* @ param [ in ] dim number of elements intended for input , for dim = 0 perform copyonwrite
* @ param [ in ] transp reserved
* @ see NRMat < T > : : get ( ) , copyonwrite ( )
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-02-14 01:10:07 +01:00
template < typename T >
2010-09-08 18:27:58 +02:00
void NRVec < T > : : get ( int fd , bool dim , bool transp ) {
# ifdef CUDALA
if ( location ! = cpu ) {
NRVec < T > tmp ;
tmp . moveto ( cpu ) ;
tmp . get ( fd , dim , transp ) ;
tmp . moveto ( location ) ;
* this = tmp ;
return ;
}
# endif
int nn0 [ 2 ] ; //align at least 8-byte
errno = 0 ;
if ( dim ) {
if ( 2 * sizeof ( int ) ! = read ( fd , & nn0 , 2 * sizeof ( int ) ) ) laerror ( " read failed " ) ;
resize ( nn0 [ 0 ] ) ;
} else {
copyonwrite ( ) ;
}
LA_traits < T > : : multiget ( nn , fd , v , dim ) ;
2005-02-14 01:10:07 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* routine for formatted output via lawritemat
* @ param [ in ] file pointer to < tt > FILE < / tt > structure representing the output file
* @ param [ in ] format format specification in standard printf - like form
* @ param [ in ] modulo
* @ see lawritemat ( )
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2004-03-17 04:07:21 +01:00
template < typename T >
2010-09-08 18:27:58 +02:00
void NRVec < T > : : fprintf ( FILE * file , const char * format , const int modulo ) const {
NOT_GPU ( * this ) ;
2004-03-17 04:07:21 +01:00
lawritemat ( file , v , 1 , nn , format , 1 , modulo , 0 ) ;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* routine for formatted input via fscanf
* @ param [ in ] f pointer to < tt > FILE < / tt > structure representing the input file
* @ param [ in ] format format specification in standard printf - like form
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-02-14 01:10:07 +01:00
template < typename T >
2010-09-08 18:27:58 +02:00
void NRVec < T > : : fscanf ( FILE * f , const char * format ) {
int n ( 0 ) ;
NOT_GPU ( * this ) ;
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
if ( : : fscanf ( f , " %d " , & n ) ! = 1 ) laerror ( " can not read vector dimension " ) ;
2004-03-17 04:07:21 +01:00
resize ( n ) ;
2010-09-08 18:27:58 +02:00
for ( register int i = 0 ; i < n ; i + + ) {
if ( : : fscanf ( f , format , v + i ) ! = 1 ) {
laerror ( " can not read the vector element " ) ;
}
}
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* unary minus operator in case of real double - precision vector
* @ return the modified vector by value
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
template < >
const NRVec < double > NRVec < double > : : operator - ( ) const {
NRVec < double > result ( * this ) ;
result . copyonwrite ( ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
2021-06-10 17:44:54 +02:00
cblas_dscal ( nn , - 1.0 , result . v , 1 ) ;
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
} else {
2021-06-10 17:44:54 +02:00
cublasDscal ( nn , - 1.0 , result . v , 1 ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasDscal " ) ;
}
# endif
2004-03-17 04:07:21 +01:00
return result ;
}
2021-10-22 13:28:13 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* unary minus operator in case of complex double - precision vector
* @ return the modified vector by value
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
template < >
2021-04-21 15:04:37 +02:00
const NRVec < std : : complex < double > > NRVec < std : : complex < double > > : : operator - ( ) const {
NRVec < std : : complex < double > > result ( * this ) ;
2010-09-08 18:27:58 +02:00
result . copyonwrite ( ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
2021-06-10 17:44:54 +02:00
cblas_zdscal ( nn , - 1.0 , result . v , 1 ) ;
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
} else {
2021-06-10 17:44:54 +02:00
cublasZdscal ( nn , - 1.0 , ( cuDoubleComplex * ) result . v , 1 ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasZdscal " ) ;
}
# endif
return result ;
}
2005-09-06 17:55:07 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* unary minus operator for vector of general type
* @ return the modified vector
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
template < typename T >
const NRVec < T > NRVec < T > : : operator - ( ) const {
NOT_GPU ( * this ) ;
NRVec < T > result ( nn , getlocation ( ) ) ;
for ( register int i = 0 ; i < nn ; i + + ) result . v [ i ] = - v [ i ] ;
return result ;
}
2005-09-06 17:55:07 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* comparison operator ( lexicographical order )
* @ param [ in ] rhs vector intended for comparison
* @ return
* \ li \ c true current vector is bigger than vector \ c rhs
* \ li \ c false current vector is smaller than vector \ c rhs
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-09-06 17:55:07 +02:00
template < typename T >
2010-09-08 18:27:58 +02:00
const bool NRVec < T > : : operator > ( const NRVec & rhs ) const {
int n ( nn ) ;
SAME_LOC ( * this , rhs ) ;
NOT_GPU ( * this ) ;
if ( rhs . nn < n ) n = rhs . nn ;
for ( register int i = 0 ; i < n ; + + i ) {
if ( LA_traits < T > : : bigger ( v [ i ] , rhs . v [ i ] ) ) return true ;
if ( LA_traits < T > : : smaller ( v [ i ] , rhs . v [ i ] ) ) return false ;
2005-09-06 17:55:07 +02:00
}
2010-09-08 18:27:58 +02:00
return nn > rhs . nn ;
2005-09-06 17:55:07 +02:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* comparison operator ( lexicographical order )
* @ param [ in ] rhs vector intended for comparison
* @ return
* \ li \ c false current vector is bigger than vector \ c rhs
* \ li \ c true current vector is smaller than vector \ c rhs
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-09-06 17:55:07 +02:00
template < typename T >
2010-09-08 18:27:58 +02:00
const bool NRVec < T > : : operator < ( const NRVec & rhs ) const {
int n ( nn ) ;
SAME_LOC ( * this , rhs ) ;
NOT_GPU ( * this ) ;
if ( rhs . nn < n ) n = rhs . nn ;
for ( register int i = 0 ; i < n ; + + i ) {
if ( LA_traits < T > : : smaller ( v [ i ] , rhs . v [ i ] ) ) return true ;
if ( LA_traits < T > : : bigger ( v [ i ] , rhs . v [ i ] ) ) return false ;
}
return nn < rhs . nn ;
2005-09-06 17:55:07 +02:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* fill the real vector with pseudorandom numbers generated using uniform distribution
* @ param [ in ] x specification of the interval \ f $ [ 0 , x ] \ f $ for the random number generator
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2008-03-03 16:35:37 +01:00
template < >
2010-09-08 18:27:58 +02:00
void NRVec < double > : : randomize ( const double & x ) {
NOT_GPU ( * this ) ;
for ( register int i = 0 ; i < nn ; + + i ) {
v [ i ] = x * ( 2. * random ( ) / ( 1. + RAND_MAX ) - 1. ) ;
}
2008-03-03 16:35:37 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* fill the complex vector with pseudorandom numbers generated using uniform distribution
* the real and imaginary parts are generated independently
* @ param [ in ] x specification of the interval \ f $ [ 0 , x ] \ f $ for the random number generator
* @ return
* \ li \ c false current vector is bigger than vector \ c rhs
* \ li \ c true current vector is smaller than vector \ c rhs
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2009-10-08 16:01:15 +02:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : randomize ( const double & x ) {
2010-09-08 18:27:58 +02:00
NOT_GPU ( * this ) ;
for ( register int i = 0 ; i < nn ; + + i ) {
2021-04-21 15:04:37 +02:00
v [ i ] = std : : complex < double > ( x * ( 2. * random ( ) / ( 1. + RAND_MAX ) - 1. ) , x * ( 2. * random ( ) / ( 1. + RAND_MAX ) - 1. ) ) ;
2010-09-08 18:27:58 +02:00
}
2009-10-08 16:01:15 +02:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* constructor creating complex vector from a real one
* @ param [ in ] rhs the real vector being converted into the complex one
* @ param [ in ] imagpart
* \ li \ c true vector \ c rhs is interpreted as the imaginary part of the new complex vector
* \ li \ c false vector \ c rhs is interpreted as the real part of the new complex vector
* @ return
* \ li \ c false current vector is bigger than vector \ c rhs
* \ li \ c true current vector is smaller than vector \ c rhs
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
template < >
2021-04-21 15:04:37 +02:00
NRVec < std : : complex < double > > : : NRVec ( const NRVec < double > & rhs , bool imagpart ) : nn ( rhs . size ( ) ) {
2009-10-08 16:01:15 +02:00
2010-09-08 18:27:58 +02:00
count = new int ;
* count = 1 ;
# ifdef CUDALA
location = rhs . getlocation ( ) ;
if ( location = = cpu ) {
# endif
2021-04-21 15:04:37 +02:00
v = ( std : : complex < double > * ) new std : : complex < double > [ nn ] ;
memset ( v , 0 , nn * sizeof ( std : : complex < double > ) ) ;
2010-09-08 18:27:58 +02:00
cblas_dcopy ( nn , & rhs [ 0 ] , 1 , ( ( double * ) v ) + ( imagpart ? 1 : 0 ) , 2 ) ;
# ifdef CUDALA
} else {
2021-04-21 15:04:37 +02:00
v = ( std : : complex < double > * ) gpualloc ( nn * sizeof ( std : : complex < double > ) ) ;
2009-10-08 16:01:15 +02:00
2010-09-08 18:27:58 +02:00
cublasZscal ( nn , CUZERO , ( cuDoubleComplex * ) v , 1 ) ;
TEST_CUBLAS ( " cublasZscal " ) ;
2009-10-08 16:01:15 +02:00
2010-09-08 18:27:58 +02:00
cublasDcopy ( nn , & rhs [ 0 ] , 1 , ( ( double * ) v ) + ( imagpart ? 1 : 0 ) , 2 ) ;
TEST_CUBLAS ( " cublasDcopy " ) ;
}
# endif
}
2005-09-06 17:55:07 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the < b > axpy < / b > operation on the current real vector \ f $ \ vec { v } \ f $ , i . e .
* \ f [ \ vec { v } \ leftarrow \ vec { v } + \ alpha \ vec { x } \ f ]
* @ param [ in ] alpha double - precision real parameter \ f $ \ alpha \ f $
* @ param [ in ] x double - precision real vector \ f $ \ vec { x } \ f $
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2010-09-08 18:27:58 +02:00
void NRVec < double > : : axpy ( const double alpha , const NRVec < double > & x ) {
2004-03-17 04:07:21 +01:00
# ifdef DEBUG
2010-09-08 18:27:58 +02:00
if ( nn ! = x . nn ) laerror ( " incompatible vectors " ) ;
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC ( * this , x ) ;
2004-03-17 04:07:21 +01:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_daxpy ( nn , alpha , x . v , 1 , v , 1 ) ;
# ifdef CUDALA
} else {
cublasDaxpy ( nn , alpha , x . v , 1 , v , 1 ) ;
TEST_CUBLAS ( " cublasDaxpy " ) ;
}
# endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the < b > axpy < / b > operation on the current complex vector \ f $ \ vec { v } \ f $ , i . e .
* \ f [ \ vec { v } \ leftarrow \ vec { v } + \ alpha \ vec { x } \ f ]
* @ param [ in ] alpha \ f $ \ alpha \ f $ parameter
* @ param [ in ] x complex vector \ f $ \ vec { x } \ f $
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : axpy ( const std : : complex < double > alpha , const NRVec < std : : complex < double > > & x ) {
2004-03-17 04:07:21 +01:00
# ifdef DEBUG
2010-09-08 18:27:58 +02:00
if ( nn ! = x . nn ) laerror ( " incompatible vectors " ) ;
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC ( * this , x ) ;
2004-03-17 04:07:21 +01:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_zaxpy ( nn , & alpha , x . v , 1 , v , 1 ) ;
# ifdef CUDALA
} else {
const cuDoubleComplex _alpha = make_cuDoubleComplex ( alpha . real ( ) , alpha . imag ( ) ) ;
cublasZaxpy ( nn , _alpha , ( cuDoubleComplex * ) x . v , 1 , ( cuDoubleComplex * ) v , 1 ) ;
TEST_CUBLAS ( " cublasZaxpy " ) ;
}
# endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the < b > axpy < / b > operation on the current real vector \ f $ \ vec { v } \ f $ , i . e .
* \ f [ \ vec { v } \ leftarrow \ vec { v } + \ alpha \ vec { x } \ f ]
* @ param [ in ] alpha \ f $ \ alpha \ f $ parameter
* @ param [ in ] x pointer to double - precision real data
* @ param [ in ] stride sets the stride
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2010-09-08 18:27:58 +02:00
void NRVec < double > : : axpy ( const double alpha , const double * x , const int stride ) {
NOT_GPU ( * this ) ;
2004-03-17 04:07:21 +01:00
copyonwrite ( ) ;
cblas_daxpy ( nn , alpha , x , stride , v , 1 ) ;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the < b > axpy < / b > operation on the current complex vector \ f $ \ vec { v } \ f $ , i . e .
* \ f [ \ vec { v } \ leftarrow \ vec { v } + \ alpha \ vec { x } \ f ]
* @ param [ in ] alpha double - precision complex parameter \ f $ \ alpha \ f $
* @ param [ in ] x pointer to double - precision complex data
* @ param [ in ] stride sets the stride
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : axpy ( const std : : complex < double > alpha , const std : : complex < double > * x , const int stride ) {
2010-09-08 18:27:58 +02:00
NOT_GPU ( * this ) ;
2004-03-17 04:07:21 +01:00
copyonwrite ( ) ;
2009-11-12 22:01:19 +01:00
cblas_zaxpy ( nn , & alpha , x , stride , v , 1 ) ;
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* assign real scalar value to every element of the current vector
* @ param [ in ] a scalar value to be assigned
* @ return reference to the modified vector
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2010-09-08 18:27:58 +02:00
NRVec < double > & NRVec < double > : : operator = ( const double & a ) {
copyonwrite ( ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_dcopy ( nn , & a , 0 , v , 1 ) ;
# ifdef CUDALA
} else {
smart_gpu_set ( nn , ( double ) 0 , v ) ;
}
# endif
return * this ;
2004-03-17 04:07:21 +01:00
}
2005-11-20 14:46:00 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* assign complex scalar value to every element of the current vector
* @ param [ in ] a scalar value to be assigned
* @ return reference to the modified vector
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
NRVec < std : : complex < double > > & NRVec < std : : complex < double > > : : operator = ( const std : : complex < double > & a ) {
2010-09-08 18:27:58 +02:00
copyonwrite ( ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_zcopy ( nn , & a , 0 , v , 1 ) ;
# ifdef CUDALA
} else {
2021-04-21 15:04:37 +02:00
smart_gpu_set ( nn , ( std : : complex < double > ) 0 , v ) ;
2010-09-08 18:27:58 +02:00
}
# endif
return * this ;
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* assign scalar value to every element of the current vector of general type < code > T < / code >
* @ param [ in ] a scalar value to be assigned
* @ return reference to the modified vector
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2004-03-17 04:07:21 +01:00
template < typename T >
2010-09-08 18:27:58 +02:00
NRVec < T > & NRVec < T > : : operator = ( const T & a ) {
NOT_GPU ( * this ) ;
2004-03-17 04:07:21 +01:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
if ( a ! = ( T ) 0 ) {
for ( register int i = 0 ; i < nn ; i + + ) v [ i ] = a ;
} else {
2004-03-17 04:07:21 +01:00
memset ( v , 0 , nn * sizeof ( T ) ) ;
2010-09-08 18:27:58 +02:00
}
2004-03-17 04:07:21 +01:00
return * this ;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* normalize current real vector ( in the Euclidean norm )
* @ param [ in ] norm if not NULL , the norm of this vector is stored into * norm
* @ return reference to the modified vector
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2010-09-08 18:27:58 +02:00
NRVec < double > & NRVec < double > : : normalize ( double * norm ) {
double tmp ( 0.0 ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
tmp = cblas_dnrm2 ( nn , v , 1 ) ;
if ( norm ) * norm = tmp ;
# ifdef DEBUG
if ( ! tmp ) laerror ( " attempt to normalize zero vector " ) ;
# endif
copyonwrite ( ) ;
tmp = 1.0 / tmp ;
cblas_dscal ( nn , tmp , v , 1 ) ;
# ifdef CUDALA
} else {
tmp = cublasDnrm2 ( nn , v , 1 ) ;
TEST_CUBLAS ( " cublasDnrm2 " ) ;
if ( norm ) * norm = tmp ;
# ifdef DEBUG
if ( ! tmp ) laerror ( " attempt to normalize zero vector " ) ;
# endif
copyonwrite ( ) ;
tmp = 1.0 / tmp ;
cublasDscal ( nn , tmp , v , 1 ) ;
TEST_CUBLAS ( " cublasDscal " ) ;
}
2004-03-17 04:07:21 +01:00
# endif
return * this ;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* normalize current complex vector ( in the Euclidean norm )
* @ param [ in ] norm if not NULL , the norm of this vector is stored into * norm
* @ return reference to the modified vector
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
NRVec < std : : complex < double > > & NRVec < std : : complex < double > > : : normalize ( double * norm ) {
2010-09-08 18:27:58 +02:00
double tmp ( 0.0 ) ;
# ifdef CUDALA
if ( location = = cpu ) {
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
tmp = cblas_dznrm2 ( nn , v , 1 ) ;
if ( norm ) * norm = tmp ;
# ifdef DEBUG
if ( tmp = = 0.0 ) laerror ( " attempt to normalize zero vector " ) ;
# endif
copyonwrite ( ) ;
tmp = 1.0 / tmp ;
cblas_zdscal ( nn , tmp , v , 1 ) ;
# ifdef CUDALA
} else {
tmp = cublasDznrm2 ( nn , ( cuDoubleComplex * ) v , 1 ) ;
TEST_CUBLAS ( " cublasDznrm2 " ) ;
if ( norm ) * norm = tmp ;
# ifdef DEBUG
if ( tmp = = 0.0 ) laerror ( " attempt to normalize zero vector " ) ;
# endif
copyonwrite ( ) ;
tmp = 1.0 / tmp ;
cublasZdscal ( nn , tmp , ( cuDoubleComplex * ) v , 1 ) ;
TEST_CUBLAS ( " cublasZdscal " ) ;
}
# endif
2004-03-17 04:07:21 +01:00
return * this ;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the \ b gemv operation on this real vector \ f $ \ vec { y } \ f $ , i . e .
* \ f [ \ vec { y } \ leftarrow \ alpha \ operatorname { op } ( A ) \ cdot \ vec { x } + \ beta \ vec { y } \ f ]
* @ param [ in ] beta real parameter \ f $ \ beta \ f $
* @ param [ in ] A real matrix \ f $ A \ f $
* @ param [ in ] trans if < code > trans = = ' n ' < / code > use \ f $ A \ f $ directly , otherwise \ f $ \ operatorname { op } ( A ) \ equiv { } A ^ \ mathrm { T } \ f $
* @ param [ in ] alpha real parameter \ f $ \ alpha \ f $
* @ param [ in ] x real vector \ f $ \ vec { x } \ f $
* @ see NRMat < T > : : gemm
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2004-03-17 04:07:21 +01:00
void NRVec < double > : : gemv ( const double beta , const NRMat < double > & A ,
2010-09-08 18:27:58 +02:00
const char trans , const double alpha , const NRVec & x ) {
2004-03-17 04:07:21 +01:00
# ifdef DEBUG
2013-11-04 15:56:39 +01:00
if ( ( tolower ( trans ) = = ' n ' ? A . ncols ( ) : A . nrows ( ) ) ! = x . size ( ) ) { laerror ( " incompatible vectors " ) ; }
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC3 ( * this , x , A ) ;
2006-09-12 01:07:22 +02:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
2010-06-25 17:28:19 +02:00
# ifdef CUDALA
2010-09-08 18:27:58 +02:00
if ( location = = cpu ) {
2010-06-25 17:28:19 +02:00
# endif
2013-11-04 15:56:39 +01:00
cblas_dgemv ( CblasRowMajor , ( tolower ( trans ) = = ' n ' ? CblasNoTrans : CblasTrans ) , A . nrows ( ) , A . ncols ( ) , alpha , A , A . ncols ( ) , x . v , 1 , beta , v , 1 ) ;
2010-06-25 17:28:19 +02:00
# ifdef CUDALA
2010-09-08 18:27:58 +02:00
} else {
2013-11-04 15:56:39 +01:00
cublasDgemv ( ( tolower ( trans ) = = ' n ' ? ' T ' : ' N ' ) , A . ncols ( ) , A . nrows ( ) , alpha , A , A . ncols ( ) , x . v , 1 , beta , v , 1 ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasDgemv " ) ;
}
2010-06-25 17:28:19 +02:00
# endif
2004-03-17 04:07:21 +01:00
}
2005-02-18 23:08:15 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the \ b gemv operation on this complex vector \ f $ \ vec { y } \ f $ , i . e .
* \ f [ \ vec { y } \ leftarrow \ alpha \ operatorname { op } ( A ) \ cdot \ vec { x } + \ beta \ vec { y } \ f ]
* @ param [ in ] beta real parameter \ f $ \ beta \ f $
* @ param [ in ] A < b > real < / b > matrix \ f $ A \ f $
* @ param [ in ] trans if < tt > trans = = ' n ' < / tt > use \ f $ A \ f $ directly , otherwise \ f $ \ operatorname { op } ( A ) \ equiv { } A ^ \ mathrm { T } \ f $
* @ param [ in ] alpha real parameter \ f $ \ alpha \ f $
* @ param [ in ] x real vector \ f $ \ vec { x } \ f $
* @ see gemm
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2009-10-08 16:01:15 +02:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : gemv ( const double beta , const NRMat < double > & A ,
const char trans , const double alpha , const NRVec < std : : complex < double > > & x ) {
2009-10-08 16:01:15 +02:00
# ifdef DEBUG
2013-11-04 15:56:39 +01:00
if ( ( tolower ( trans ) = = ' n ' ? A . ncols ( ) : A . nrows ( ) ) ! = x . size ( ) ) { laerror ( " incompatible vectors " ) ; }
2010-09-08 18:27:58 +02:00
# endif
SAME_LOC3 ( * this , x , A ) ;
copyonwrite ( ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
2013-11-04 15:56:39 +01:00
cblas_dgemv ( CblasRowMajor , ( tolower ( trans ) = = ' n ' ? CblasNoTrans : CblasTrans ) ,
2010-09-08 18:27:58 +02:00
A . nrows ( ) , A . ncols ( ) , alpha , A , A . ncols ( ) , ( double * ) x . v , 2 , beta , ( double * ) v , 2 ) ;
2013-11-04 15:56:39 +01:00
cblas_dgemv ( CblasRowMajor , ( tolower ( trans ) = = ' n ' ? CblasNoTrans : CblasTrans ) ,
2010-09-08 18:27:58 +02:00
A . nrows ( ) , A . ncols ( ) , alpha , A , A . ncols ( ) , ( ( double * ) x . v ) + 1 , 2 , beta , ( ( double * ) v ) + 1 , 2 ) ;
# ifdef CUDALA
} else {
2013-11-04 15:56:39 +01:00
cublasDgemv ( ( tolower ( trans ) = = ' n ' ? ' T ' : ' N ' ) , A . ncols ( ) , A . nrows ( ) , alpha , A , A . ncols ( ) , ( double * ) ( x . v ) , 2 , beta , ( double * ) v , 2 ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasDgemv " ) ;
2013-11-04 15:56:39 +01:00
cublasDgemv ( ( tolower ( trans ) = = ' n ' ? ' T ' : ' N ' ) , A . ncols ( ) , A . nrows ( ) , alpha , A , A . ncols ( ) , ( ( double * ) x . v ) + 1 , 2 , beta , ( ( double * ) v ) + 1 , 2 ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasDgemv " ) ;
}
2009-10-08 16:01:15 +02:00
# endif
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the \ b gemv operation on this complex vector \ f $ \ vec { y } \ f $ , i . e .
* \ f [ \ vec { y } \ leftarrow \ alpha \ operatorname { op } ( A ) \ cdot \ vec { x } + \ beta \ vec { y } \ f ]
* @ param [ in ] beta complex parameter \ f $ \ beta \ f $
* @ param [ in ] A < b > complex < / b > matrix \ f $ A \ f $
* @ param [ in ] trans if < code > trans = = ' n ' < / code > use \ f $ A \ f $ directly , otherwise \ f $ \ operatorname { op } ( A ) \ equiv { } A ^ \ mathrm { T } \ f $
* @ param [ in ] alpha complex parameter \ f $ \ alpha \ f $
* @ param [ in ] x real vector \ f $ \ vec { x } \ f $
* @ see gemm
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : gemv ( const std : : complex < double > beta ,
const NRMat < std : : complex < double > > & A , const char trans ,
const std : : complex < double > alpha , const NRVec < std : : complex < double > > & x ) {
2004-03-17 04:07:21 +01:00
# ifdef DEBUG
2013-11-04 15:56:39 +01:00
if ( ( tolower ( trans ) = = ' n ' ? A . ncols ( ) : A . nrows ( ) ) ! = x . size ( ) ) { laerror ( " incompatible vectors " ) ; }
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC3 ( * this , x , A ) ;
2006-09-12 01:07:22 +02:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( location = = cpu ) {
# endif
2013-11-04 15:56:39 +01:00
cblas_zgemv ( CblasRowMajor , ( tolower ( trans ) = = ' n ' ? CblasNoTrans : CblasTrans ) ,
2010-09-08 18:27:58 +02:00
A . nrows ( ) , A . ncols ( ) , & alpha , A , A . ncols ( ) , x . v , 1 , & beta , v , 1 ) ;
# ifdef CUDALA
} else {
const cuDoubleComplex _alpha = make_cuDoubleComplex ( alpha . real ( ) , alpha . imag ( ) ) ;
const cuDoubleComplex _beta = make_cuDoubleComplex ( beta . real ( ) , beta . imag ( ) ) ;
2004-03-17 04:07:21 +01:00
2013-11-04 15:56:39 +01:00
cublasZgemv ( ( tolower ( trans ) = = ' n ' ? ' T ' : ' N ' ) , A . ncols ( ) , A . nrows ( ) ,
2010-09-08 18:27:58 +02:00
_alpha , ( cuDoubleComplex * ) ( A [ 0 ] ) , A . ncols ( ) , ( cuDoubleComplex * ) ( x . v ) , 1 , _beta , ( cuDoubleComplex * ) v , 1 ) ;
TEST_CUBLAS ( " cublasZgemv " ) ;
}
# endif
}
2005-02-18 23:08:15 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the \ b gemv operation on this real vector \ f $ \ vec { y } \ f $ , i . e .
* \ f [ \ vec { y } \ leftarrow \ alpha \ operatorname { op } ( A ) \ cdot \ vec { x } + \ beta \ vec { y } \ f ]
* @ param [ in ] beta real parameter \ f $ \ beta \ f $
* @ param [ in ] A real symmetric matrix \ f $ A \ f $ stored in packed form
* @ param [ in ] trans if < code > trans = = ' n ' < / code > use \ f $ A \ f $ directly , otherwise \ f $ \ operatorname { op } ( A ) \ equiv { } A ^ \ mathrm { T } \ f $
* @ param [ in ] alpha real parameter \ f $ \ alpha \ f $
* @ param [ in ] x real vector \ f $ \ vec { x } \ f $
* @ see gemm , NRSMat < T >
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2005-02-18 23:08:15 +01:00
void NRVec < double > : : gemv ( const double beta , const NRSMat < double > & A ,
2010-09-08 18:27:58 +02:00
const char trans , const double alpha , const NRVec & x ) {
2004-03-17 04:07:21 +01:00
# ifdef DEBUG
2010-09-08 18:27:58 +02:00
if ( A . ncols ( ) ! = x . size ( ) ) { laerror ( " incompatible dimensions " ) ; }
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC3 ( * this , A , x ) ;
2006-09-12 01:07:22 +02:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
2010-06-25 17:28:19 +02:00
# ifdef CUDALA
2010-09-08 18:27:58 +02:00
if ( location = = cpu ) {
2010-06-25 17:28:19 +02:00
# endif
2010-09-08 18:27:58 +02:00
cblas_dspmv ( CblasRowMajor , CblasLower , A . ncols ( ) , alpha , A , x . v , 1 , beta , v , 1 ) ;
2010-06-25 17:28:19 +02:00
# ifdef CUDALA
2010-09-08 18:27:58 +02:00
} else {
cublasDspmv ( ' U ' , A . ncols ( ) , alpha , A , x . v , 1 , beta , v , 1 ) ;
TEST_CUBLAS ( " cublasDspmv " ) ;
}
2010-06-25 17:28:19 +02:00
# endif
2004-03-17 04:07:21 +01:00
}
2005-02-18 23:08:15 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the \ c gemv operation on this complex vector \ f $ \ vec { y } \ f $ , i . e .
* \ f [ \ vec { y } \ leftarrow \ alpha \ operatorname { op } ( A ) \ cdot \ vec { x } + \ beta \ vec { y } \ f ]
* @ param [ in ] beta real parameter \ f $ \ beta \ f $
* @ param [ in ] A < b > real symmetric < / b > matrix \ f $ A \ f $ stored in packed form
* @ param [ in ] trans if < code > trans = = ' n ' < / code > use \ f $ A \ f $ directly , otherwise \ f $ \ operatorname { op } ( A ) \ equiv { } A ^ \ mathrm { T } \ f $
* @ param [ in ] alpha real parameter \ f $ \ alpha \ f $
* @ param [ in ] x complex vector \ f $ \ vec { x } \ f $
* @ see gemm , NRSMat < T >
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2009-10-08 16:01:15 +02:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : gemv ( const double beta , const NRSMat < double > & A ,
const char trans , const double alpha , const NRVec < std : : complex < double > > & x ) {
2009-10-08 16:01:15 +02:00
# ifdef DEBUG
2010-09-08 18:27:58 +02:00
if ( A . ncols ( ) ! = x . size ( ) ) { laerror ( " incompatible dimensions " ) ; }
2009-10-08 16:01:15 +02:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC3 ( * this , A , x ) ;
2009-10-08 16:01:15 +02:00
copyonwrite ( ) ;
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_dspmv ( CblasRowMajor , CblasLower , A . ncols ( ) , alpha , A , ( double * ) x . v , 2 , beta , ( double * ) v , 2 ) ;
cblas_dspmv ( CblasRowMajor , CblasLower , A . ncols ( ) , alpha , A , ( ( double * ) x . v ) + 1 , 2 , beta , ( ( double * ) v ) + 1 , 2 ) ;
# ifdef CUDALA
} else {
cublasDspmv ( ' U ' , A . ncols ( ) , alpha , A , ( double * ) ( x . v ) , 2 , beta , ( double * ) v , 2 ) ;
TEST_CUBLAS ( " cublasDspmv " ) ;
cublasDspmv ( ' U ' , A . ncols ( ) , alpha , A , ( ( double * ) ( x . v ) ) + 1 , 2 , beta , ( ( double * ) v ) + 1 , 2 ) ;
TEST_CUBLAS ( " cublasDspmv " ) ;
}
# endif
}
2005-02-18 23:08:15 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* perform the \ b gemv operation on this complex vector \ f $ \ vec { y } \ f $ , i . e .
* \ f [ \ vec { y } \ leftarrow \ alpha \ operatorname { op } ( A ) \ cdot \ vec { x } + \ beta \ vec { y } \ f ]
* @ param [ in ] beta complex parameter \ f $ \ beta \ f $
* @ param [ in ] A < b > complex Hermitian < / b > matrix \ f $ A \ f $ stored in packed form
* @ param [ in ] trans not used
* @ param [ in ] alpha complex parameter \ f $ \ alpha \ f $
* @ param [ in ] x complex vector \ f $ \ vec { x } \ f $
* @ see gemm , NRSMat < T >
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
void NRVec < std : : complex < double > > : : gemv ( const std : : complex < double > beta ,
const NRSMat < std : : complex < double > > & A , const char trans ,
const std : : complex < double > alpha , const NRVec < std : : complex < double > > & x ) {
2004-03-17 04:07:21 +01:00
# ifdef DEBUG
2010-09-08 18:27:58 +02:00
if ( A . ncols ( ) ! = x . size ( ) ) laerror ( " incompatible dimensions " ) ;
2004-03-17 04:07:21 +01:00
# endif
2010-09-08 18:27:58 +02:00
SAME_LOC3 ( * this , A , x ) ;
2006-09-12 01:07:22 +02:00
copyonwrite ( ) ;
2005-02-18 23:08:15 +01:00
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_zhpmv ( CblasRowMajor , CblasLower , A . ncols ( ) , & alpha , A , x . v , 1 , & beta , v , 1 ) ;
# ifdef CUDALA
} else {
const cuDoubleComplex _alpha = make_cuDoubleComplex ( alpha . real ( ) , alpha . imag ( ) ) ;
const cuDoubleComplex _beta = make_cuDoubleComplex ( beta . real ( ) , beta . imag ( ) ) ;
2005-02-18 23:08:15 +01:00
2021-04-21 15:04:37 +02:00
cublasZhpmv ( ' U ' , A . ncols ( ) , _alpha , ( cuDoubleComplex * ) ( ( const std : : complex < double > * ) A ) , ( cuDoubleComplex * ) ( x . v ) , 1 , _beta , ( cuDoubleComplex * ) ( this - > v ) , 1 ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasZhpmv " ) ;
}
# endif
}
2005-02-18 23:08:15 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* computes the outer product of this real vector \ f $ \ vec { a } \ f $ with given
* real vector \ f $ \ vec { b } \ f $ and scales the resulting matrix with factor \ f $ \ alpha \ f $ , i . e .
* the matrix elements of the final matrix \ f $ A \ f $ can be expressed as
* \ f [ A_ { i , j } = \ alpha \ cdot \ vec { a } _i \ vec { b } _j \ f ]
* @ param [ in ] b real vector \ f $ \ vec { b } \ f $
* @ param [ in ] conj not used
* @ param [ in ] scale real factor \ f $ \ alpha \ f $
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2010-09-08 18:27:58 +02:00
const NRMat < double > NRVec < double > : : otimes ( const NRVec < double > & b , const bool conj , const double & scale ) const {
SAME_LOC ( * this , b ) ;
NRMat < double > result ( 0.0 , nn , b . nn , this - > getlocation ( ) ) ;
# ifdef CUDALA
if ( location = = cpu ) {
# endif
cblas_dger ( CblasRowMajor , nn , b . nn , scale , v , 1 , b . v , 1 , result , b . nn ) ;
# ifdef CUDALA
} else {
cublasDger ( b . nn , nn , scale , b . v , 1 , v , 1 , result [ 0 ] , b . nn ) ;
TEST_CUBLAS ( " cublasDger " ) ;
}
# endif
2004-03-17 04:07:21 +01:00
return result ;
}
2009-11-12 22:01:19 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* computes the outer product of this complex vector \ f $ \ vec { a } \ f $ with given
* complex vector \ f $ \ vec { b } \ f $ and scales the resulting matrix with factor \ f $ \ alpha \ f $ , i . e .
* the matrix elements of the final matrix \ f $ A \ f $ can be expressed as
* \ f [ A_ { i , j } = \ alpha \ cdot \ vec { a } _i \ vec { b } _j \ f ]
* in case < code > conj = true < / code > , the result is
* \ f [ A_ { i , j } = \ alpha \ cdot \ vec { a } _i \ vec { b } _j ^ { * } \ f ]
* @ param [ in ] b complex vector \ f $ \ vec { b } \ f $
* @ param [ in ] conj determines whther the vector \ f $ \ vec { b } \ f $ is conjugated
* @ param [ in ] scale complex scaling factor \ f $ \ alpha \ f $
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2005-11-20 14:46:00 +01:00
template < >
2021-04-21 15:04:37 +02:00
const NRMat < std : : complex < double > >
NRVec < std : : complex < double > > : : otimes ( const NRVec < std : : complex < double > > & b , const bool conj , const std : : complex < double > & scale ) const {
2010-09-08 18:27:58 +02:00
SAME_LOC ( * this , b ) ;
2021-04-21 15:04:37 +02:00
NRMat < std : : complex < double > > result ( 0. , nn , b . nn , this - > getlocation ( ) ) ;
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( location = = cpu ) {
# endif
if ( conj ) {
cblas_zgerc ( CblasRowMajor , nn , b . nn , & scale , v , 1 , b . v , 1 , result , b . nn ) ;
} else {
cblas_zgeru ( CblasRowMajor , nn , b . nn , & scale , v , 1 , b . v , 1 , result , b . nn ) ;
}
# ifdef CUDALA
} else {
if ( conj ) {
const cuDoubleComplex alpha = make_cuDoubleComplex ( scale . real ( ) , - scale . imag ( ) ) ;
2004-03-17 04:07:21 +01:00
2011-01-18 15:37:05 +01:00
cublasZgerc ( b . nn , nn , alpha , ( cuDoubleComplex * ) ( b . v ) , 1 , ( cuDoubleComplex * ) ( v ) , 1 , ( cuDoubleComplex * ) ( result [ 0 ] ) , b . nn ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasZgerc " ) ;
2006-09-10 22:06:44 +02:00
2010-09-08 18:27:58 +02:00
result . conjugateme ( ) ;
} else {
const cuDoubleComplex alpha = make_cuDoubleComplex ( scale . real ( ) , + scale . imag ( ) ) ;
2006-09-10 22:06:44 +02:00
2011-01-18 15:37:05 +01:00
cublasZgeru ( b . nn , nn , alpha , ( cuDoubleComplex * ) ( b . v ) , 1 , ( cuDoubleComplex * ) ( v ) , 1 , ( cuDoubleComplex * ) ( result [ 0 ] ) , b . nn ) ;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS ( " cublasZgeru " ) ;
}
}
# endif
return result ;
}
2010-06-25 17:28:19 +02:00
2010-09-08 18:27:58 +02:00
template < typename T >
int NRVec < T > : : sort ( int direction , int from , int to , int * perm ) {
NOT_GPU ( * this ) ;
2010-06-25 17:28:19 +02:00
2010-09-08 18:27:58 +02:00
copyonwrite ( ) ;
if ( to = = - 1 ) to = nn - 1 ;
if ( direction ) return memqsort < 1 , NRVec < T > , int , int > ( * this , perm , from , to ) ;
else return memqsort < 0 , NRVec < T > , int , int > ( * this , perm , from , to ) ;
}
2010-06-25 17:28:19 +02:00
2021-05-19 22:29:47 +02:00
template < typename T >
int NRVec < T > : : sort ( int direction , NRPerm < int > & perm )
{
if ( nn ! = perm . size ( ) ) laerror ( " incompatible vector and permutation " ) ;
perm . identity ( ) ;
int r = sort ( direction , 0 , nn - 1 , & perm [ 1 ] ) ;
return r ;
}
2010-09-08 18:27:58 +02:00
template < >
2021-04-21 15:04:37 +02:00
NRVec < std : : complex < double > > complexify ( const NRVec < double > & rhs ) {
NRVec < std : : complex < double > > r ( rhs . size ( ) , rhs . getlocation ( ) ) ;
2010-06-25 17:28:19 +02:00
2010-09-08 18:27:58 +02:00
# ifdef CUDALA
if ( rhs . getlocation ( ) = = cpu ) {
# endif
cblas_dcopy ( rhs . size ( ) , & rhs [ 0 ] , 1 , ( double * ) ( & r [ 0 ] ) , 2 ) ;
# ifdef CUDALA
} else {
r = 0 ;
cublasDcopy ( rhs . size ( ) , rhs . v , 1 , ( double * ) ( r . v ) , 2 ) ;
TEST_CUBLAS ( " cublasDcopy " ) ;
}
# endif
return r ;
}
2006-09-10 22:06:44 +02:00
2021-05-13 16:45:10 +02:00
template < typename T >
2021-05-19 22:29:47 +02:00
const NRVec < T > NRVec < T > : : permuted ( const NRPerm < int > & p , const bool inverse ) const
2021-05-13 16:45:10 +02:00
{
# ifdef DEBUG
if ( ! p . is_valid ( ) ) laerror ( " invalid permutation of vector " ) ;
# endif
int n = p . size ( ) ;
if ( n ! = ( * this ) . size ( ) ) laerror ( " incompatible permutation and vector " ) ;
# ifdef CUDALA
if ( this - > getlocation ( ) ! = cpu | | p . getlocation ( ) ! = cpu ) laerror ( " permutations can be done only in CPU memory " ) ;
# endif
NRVec < T > r ( n ) ;
2021-05-19 22:29:47 +02:00
if ( inverse ) for ( int i = 1 ; i < = n ; + + i ) r [ i - 1 ] = v [ p [ i ] - 1 ] ;
else for ( int i = 1 ; i < = n ; + + i ) r [ p [ i ] - 1 ] = v [ i - 1 ] ;
2021-05-13 16:45:10 +02:00
return r ;
}
2021-05-19 22:29:47 +02:00
template < typename T >
void NRVec < T > : : permuteme ( const CyclePerm < int > & p )
{
# ifdef DEBUG
if ( ! p . is_valid ( ) ) laerror ( " invalid permutation of vector " ) ;
# endif
if ( p . max ( ) > nn ) laerror ( " incompatible permutation and vector " ) ;
# ifdef CUDALA
if ( this - > getlocation ( ) ! = cpu | | p . getlocation ( ) ! = cpu ) laerror ( " permutations can be done only in CPU memory " ) ;
# endif
copyonwrite ( ) ;
for ( int cycle = 1 ; cycle < = p . size ( ) ; + + cycle )
{
int length = p [ cycle ] . size ( ) ;
if ( length < = 1 ) continue ; //trivial cycle
T tmp = v [ p [ cycle ] [ length ] - 1 ] ;
for ( int i = length ; i > 1 ; - - i ) v [ p [ cycle ] [ i ] - 1 ] = v [ p [ cycle ] [ i - 1 ] - 1 ] ;
v [ p [ cycle ] [ 1 ] - 1 ] = tmp ;
}
}
2010-09-08 18:27:58 +02:00
/***************************************************************************/ /**
* forced instantization in the corespoding object file
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2011-01-18 15:37:05 +01:00
/*
Commented out by Roman for ICC
2010-09-08 18:27:58 +02:00
# define INSTANTIZE(T) \
template void NRVec < T > : : put ( int fd , bool dim , bool transp ) const ; \
template void NRVec < T > : : get ( int fd , bool dim , bool transp ) ; \
INSTANTIZE ( double )
2021-04-21 15:04:37 +02:00
INSTANTIZE ( std : : complex < double > )
2010-09-08 18:27:58 +02:00
INSTANTIZE ( char )
INSTANTIZE ( short )
INSTANTIZE ( int )
INSTANTIZE ( long )
INSTANTIZE ( long long )
INSTANTIZE ( unsigned char )
INSTANTIZE ( unsigned short )
INSTANTIZE ( unsigned int )
INSTANTIZE ( unsigned long )
INSTANTIZE ( unsigned long long )
2011-01-18 15:37:05 +01:00
*/
2006-09-10 22:06:44 +02:00
2010-09-08 18:27:58 +02:00
# define INSTANTIZE_DUMMY(T) \
template < > void NRVec < T > : : gemv ( const T beta , const NRMat < T > & a , const char trans , const T alpha , const NRVec < T > & x ) { laerror ( " gemv on unsupported types " ) ; } \
template < > void NRVec < T > : : gemv ( const T beta , const NRSMat < T > & a , const char trans , const T alpha , const NRVec < T > & x ) { laerror ( " gemv on unsupported types " ) ; } \
template < > void NRVec < T > : : gemv ( const T beta , const SparseMat < T > & a , const char trans , const T alpha , const NRVec < T > & x , bool s ) { laerror ( " gemv on unsupported types " ) ; } \
template < > void NRVec < T > : : gemv ( const LA_traits_complex < T > : : Component_type beta , const LA_traits_complex < T > : : NRMat_Noncomplex_type & a , const char trans , const LA_traits_complex < T > : : Component_type alpha , const NRVec < T > & x ) { laerror ( " gemv on unsupported types " ) ; } \
template < > void NRVec < T > : : gemv ( const LA_traits_complex < T > : : Component_type beta , const LA_traits_complex < T > : : NRSMat_Noncomplex_type & a , const char trans , const LA_traits_complex < T > : : Component_type alpha , const NRVec < T > & x ) { laerror ( " gemv on unsupported types " ) ; } \
template < > NRVec < T > & NRVec < T > : : normalize ( LA_traits < T > : : normtype * ) { laerror ( " normalize() impossible for integer types " ) ; return * this ; } \
template < > const NRMat < T > NRVec < T > : : otimes ( const NRVec < T > & b , const bool conj , const T & scale ) const { laerror ( " otimes presently implemented only for double and complex double " ) ; return NRMat < T > ( ) ; }
2011-01-18 15:37:05 +01:00
// Roman
// following gemv are not implemented
template < > void NRVec < double > : : gemv ( const double beta , const SparseMat < double > & a , const char trans , const double alpha , const NRVec < double > & x , bool s ) { laerror ( " gemv on unsupported types " ) ; }
2021-04-21 15:04:37 +02:00
template < > void NRVec < std : : complex < double > > : : gemv ( const std : : complex < double > beta , const SparseMat < std : : complex < double > > & a , const char trans , const std : : complex < double > alpha , const NRVec < std : : complex < double > > & x , bool s ) { laerror ( " gemv on unsupported types " ) ; }
2011-01-18 15:37:05 +01:00
2009-10-08 16:01:15 +02:00
INSTANTIZE_DUMMY ( char )
INSTANTIZE_DUMMY ( short )
INSTANTIZE_DUMMY ( int )
2009-11-12 22:01:19 +01:00
INSTANTIZE_DUMMY ( long )
INSTANTIZE_DUMMY ( long long )
INSTANTIZE_DUMMY ( unsigned char )
INSTANTIZE_DUMMY ( unsigned short )
2009-10-08 16:01:15 +02:00
INSTANTIZE_DUMMY ( unsigned int )
INSTANTIZE_DUMMY ( unsigned long )
2009-11-12 22:01:19 +01:00
INSTANTIZE_DUMMY ( unsigned long long )
2021-04-21 15:04:37 +02:00
INSTANTIZE_DUMMY ( std : : complex < char > )
INSTANTIZE_DUMMY ( std : : complex < short > )
INSTANTIZE_DUMMY ( std : : complex < int > )
INSTANTIZE_DUMMY ( std : : complex < long > )
INSTANTIZE_DUMMY ( std : : complex < long long > )
INSTANTIZE_DUMMY ( std : : complex < unsigned char > )
INSTANTIZE_DUMMY ( std : : complex < unsigned short > )
INSTANTIZE_DUMMY ( std : : complex < unsigned int > )
INSTANTIZE_DUMMY ( std : : complex < unsigned long > )
INSTANTIZE_DUMMY ( std : : complex < unsigned long long > )
INSTANTIZE_DUMMY ( std : : complex < std : : complex < double > > )
INSTANTIZE_DUMMY ( std : : complex < std : : complex < float > > )
2009-11-12 22:01:19 +01:00
2021-05-19 22:29:47 +02:00
//also not supported on gpu
# define INSTANTIZE_NONCOMPLEX(T) \
template < > \
const T NRVec < T > : : max ( ) const \
{ \
if ( nn = = 0 ) return 0 ; \
T m = v [ 0 ] ; \
for ( int i = 1 ; i < nn ; + + i ) if ( v [ i ] > m ) m = v [ i ] ; \
return m ; \
} \
\
template < > \
const T NRVec < T > : : min ( ) const \
{ \
if ( nn = = 0 ) return 0 ; \
T m = v [ 0 ] ; \
for ( int i = 1 ; i < nn ; + + i ) if ( v [ i ] < m ) m = v [ i ] ; \
return m ; \
} \
INSTANTIZE_NONCOMPLEX ( char )
2021-06-24 17:08:55 +02:00
INSTANTIZE_NONCOMPLEX ( unsigned char )
2021-05-19 22:29:47 +02:00
INSTANTIZE_NONCOMPLEX ( short )
2021-06-24 17:08:55 +02:00
INSTANTIZE_NONCOMPLEX ( unsigned short )
2021-05-19 22:29:47 +02:00
INSTANTIZE_NONCOMPLEX ( int )
2021-06-24 17:08:55 +02:00
INSTANTIZE_NONCOMPLEX ( unsigned int )
2021-05-19 22:29:47 +02:00
INSTANTIZE_NONCOMPLEX ( long )
2021-06-24 17:08:55 +02:00
INSTANTIZE_NONCOMPLEX ( unsigned long )
2021-05-19 22:29:47 +02:00
INSTANTIZE_NONCOMPLEX ( long long )
2021-06-24 17:08:55 +02:00
INSTANTIZE_NONCOMPLEX ( unsigned long long )
2021-05-19 22:29:47 +02:00
INSTANTIZE_NONCOMPLEX ( float )
INSTANTIZE_NONCOMPLEX ( double )
2021-10-22 13:28:13 +02:00
/***************************************************************************
* some efficient specializations of concatenations for plain data types
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# define INSTANTIZE_CONCAT(T) \
template < > \
NRVec < T > NRVec < T > : : concat ( const NRVec < T > & rhs ) const \
{ \
if ( nn = = 0 ) return rhs ; \
if ( rhs . nn = = 0 ) return * this ; \
NOT_GPU ( * this ) ; \
NOT_GPU ( rhs ) ; \
NRVec < T > r ( nn + rhs . nn ) ; \
memcpy ( r . v , v , nn * sizeof ( T ) ) ; \
memcpy ( r . v + nn , rhs . v , rhs . nn * sizeof ( T ) ) ; \
return r ; \
} \
INSTANTIZE_CONCAT ( char )
INSTANTIZE_CONCAT ( unsigned char )
INSTANTIZE_CONCAT ( short )
INSTANTIZE_CONCAT ( unsigned short )
INSTANTIZE_CONCAT ( int )
INSTANTIZE_CONCAT ( unsigned int )
INSTANTIZE_CONCAT ( long )
INSTANTIZE_CONCAT ( unsigned long )
INSTANTIZE_CONCAT ( long long )
INSTANTIZE_CONCAT ( unsigned long long )
INSTANTIZE_CONCAT ( float )
INSTANTIZE_CONCAT ( double )
INSTANTIZE_CONCAT ( std : : complex < float > )
INSTANTIZE_CONCAT ( std : : complex < double > )
2011-01-18 15:37:05 +01:00
template class NRVec < double > ;
2021-04-21 15:04:37 +02:00
template class NRVec < std : : complex < double > > ;
2011-01-18 15:37:05 +01:00
template class NRVec < char > ;
template class NRVec < short > ;
template class NRVec < int > ;
template class NRVec < long > ;
template class NRVec < long long > ;
template class NRVec < unsigned char > ;
template class NRVec < unsigned short > ;
template class NRVec < unsigned int > ;
template class NRVec < unsigned long > ;
template class NRVec < unsigned long long > ;
2009-11-12 22:01:19 +01:00
} //namespace