This commit is contained in:
jiri 2010-09-08 16:27:58 +00:00
parent e8cbf9e5fb
commit e580467e5a
14 changed files with 7106 additions and 3691 deletions

View File

@ -1,3 +1,6 @@
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
/*******************************************************************************
*******************************************************************************/
#include "la_traits.h" #include "la_traits.h"
#include "cuda_la.h" #include "cuda_la.h"
@ -7,63 +10,54 @@ namespace LA {
GPUID DEFAULT_LOC = cpu; GPUID DEFAULT_LOC = cpu;
void set_default_loc(const GPUID loc) void set_default_loc(const GPUID loc){
{ DEFAULT_LOC = loc;
DEFAULT_LOC = loc;
} }
void *gpualloc(size_t size) void *gpualloc(size_t size){
{ void *ptr = NULL;
cublasStatus status; cublasAlloc(size, 1, &ptr);
void *ptr=NULL; TEST_CUBLAS("cublasAlloc");
status = cublasAlloc(size,1,&ptr); return ptr;
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
return ptr;
} }
void gpufree(void *ptr) void gpufree(void *ptr){
{ cublasFree(ptr);
cublasStatus status = cublasFree(ptr); TEST_CUBLAS("cublasFree");
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasFree");
} }
void gpuget(size_t n, size_t elsize, const void *from, void *to) void gpuget(size_t n, size_t elsize, const void *from, void *to){
{ cublasGetVector(n, elsize, from, 1, to, 1);
cublasStatus status; TEST_CUBLAS("cublasGetVector");
status=cublasGetVector(n,elsize,from,1,to,1);
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasGetVector");
} }
void gpuput(size_t n, size_t elsize, const void *from, void *to) void gpuput(size_t n, size_t elsize, const void *from, void *to){
{ cublasSetVector(n, elsize, from, 1, to, 1);
cublasStatus status; TEST_CUBLAS("cublasSetVector");
status=cublasSetVector(n,elsize,from,1,to,1);
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
} }
double *gpuputdouble(const double &x) double *gpuputdouble(const double &x){
{ void *ptr = NULL;
cublasStatus status; cublasAlloc(1, sizeof(double), &ptr);
void *ptr=NULL; TEST_CUBLAS("cublasAlloc");
status = cublasAlloc(1,sizeof(double),&ptr);
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc"); cublasSetVector(1, sizeof(double), &x, 1, ptr, 1);
status=cublasSetVector(1,sizeof(double),&x,1,ptr,1); TEST_CUBLAS("cublasSetVector");
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
return (double *)ptr; return (double *)ptr;
} }
complex<double> *gpuputcomplex(const complex<double> &x) complex<double> *gpuputcomplex(const complex<double> &x){
{ void *ptr = NULL;
cublasStatus status; cublasAlloc(1, sizeof(complex<double>), &ptr);
void *ptr=NULL; TEST_CUBLAS("cublasAlloc");
status = cublasAlloc(1,sizeof(complex<double>),&ptr);
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
status=cublasSetVector(1,sizeof(complex<double>),&x,1,ptr,1);
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
return (complex<double> *)ptr;
}
cublasSetVector(1, sizeof(complex<double>), &x, 1, ptr, 1);
TEST_CUBLAS("cublasSetVector");
return (complex<double> *)ptr;
}
} }

View File

@ -1,6 +1,10 @@
//------------------------------------------------------------------------------
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
//------------------------------------------------------------------------------
#ifndef _CUDA_LA_H #ifndef _CUDA_LA_H
#define _CUDA_LA_H #define _CUDA_LA_H
#include <errno.h>
#ifdef CUDALA #ifdef CUDALA
#undef MATPTR #undef MATPTR
#include "cublas.h" #include "cublas.h"
@ -13,6 +17,7 @@ namespace LA {
#ifdef CUDALA #ifdef CUDALA
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");} #define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");} #define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
#define NOT_CPU(x) {if((x).getlocation()==cpu) laerror("Operation not implemented on CPU (yet). Use .moveto(>0) first.");}
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");} #define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");} #define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
#else #else
@ -22,6 +27,16 @@ namespace LA {
#define SAME_LOC3(x,y,z) {} #define SAME_LOC3(x,y,z) {}
#endif #endif
#ifdef DEBUG
#ifdef __GNUG__
#define TEST_CUBLAS(X) { if(cublasGetError() != CUBLAS_STATUS_SUCCESS){ laerror2(#X, __PRETTY_FUNCTION__); } }
#else
#define TEST_CUBLAS(X) { if(cublasGetError() != CUBLAS_STATUS_SUCCESS){ laerror2(#X, __func__); } }
#endif
#else
#define TEST_CUBLAS(X) {}
#endif
typedef enum {undefined=-1, cpu=0, gpu1=1, gpu2=2, gpu3=3, gpu4=4} GPUID; typedef enum {undefined=-1, cpu=0, gpu1=1, gpu2=2, gpu3=3, gpu4=4} GPUID;
#ifdef CUDALA #ifdef CUDALA
@ -33,6 +48,7 @@ public:
{ {
cublasStatus status = cublasInit(); cublasStatus status = cublasInit();
if (status != CUBLAS_STATUS_SUCCESS) laerror("Cannot init GPU for CUBLAS"); if (status != CUBLAS_STATUS_SUCCESS) laerror("Cannot init GPU for CUBLAS");
errno = 0;
} }
~GPU_START(void) ~GPU_START(void)
{ {
@ -50,8 +66,41 @@ extern complex<double> *gpuputcomplex(const complex<double> &x);
void set_default_loc(const GPUID loc); void set_default_loc(const GPUID loc);
extern GPUID DEFAULT_LOC; template <typename T>
void smart_gpu_set(size_t n, const T& val, void *gpu_to, size_t _step = 1){
void *ptr(NULL);
if(sizeof(T)%sizeof(float) != 0){ laerror("memory alignment error"); }
cublasAlloc(1, sizeof(T), &ptr);
TEST_CUBLAS("cublasAlloc");
cublasSetVector(1, sizeof(T), &val, 1, ptr, 1);
TEST_CUBLAS("cublasSetVector");
if(sizeof(T) == sizeof(float)){
cublasScopy(n, (float*)ptr, 0, ((float*)gpu_to), _step);
TEST_CUBLAS("cublasScopy");
}else if(sizeof(T) == sizeof(double)){
cublasDcopy(n, (double*)ptr, 0, ((double*)gpu_to), _step);
TEST_CUBLAS("cublasDcopy");
}else if(sizeof(T) == sizeof(complex<double>)){
cublasZcopy(n, (cuDoubleComplex*)ptr, 0, (cuDoubleComplex*)gpu_to, _step);
TEST_CUBLAS("cublasZcopy");
}else{
for(register int i=0; i<sizeof(T)/sizeof(float); i++){
cublasScopy(n, (float*)ptr + i, 0, ((float*)gpu_to) + i, sizeof(T)/sizeof(float)*_step);
TEST_CUBLAS("cublasScopy");
}
}
cublasFree(ptr);
TEST_CUBLAS("cublasFree");
}
extern GPUID DEFAULT_LOC;
#endif #endif
} }

9
la.h
View File

@ -44,5 +44,12 @@
#include "sparsesmat.h" #include "sparsesmat.h"
#include "vec.h" #include "vec.h"
using namespace LA;
typedef NRMat<int> NRIMat;
typedef NRMat<double> NRDMat;
typedef NRMat<complex<double> > NRCMat;
typedef NRVec<int> NRIVec;
typedef NRVec<double> NRDVec;
typedef NRVec<complex<double> > NRCVec;
#endif /* _LA_H_ */ #endif /* _LA_H_ */

View File

@ -32,6 +32,7 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <iostream> #include <iostream>
#include <limits>
#include <complex> #include <complex>

View File

@ -42,20 +42,20 @@ bool _LA_count_check=true;
extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage
void laerror(const char *s1) void laerror2(const char *s1, const char *s2)
{ {
std::cerr << "LA:ERROR - "; std::cerr << "LA:ERROR - ";
std::cout << "LA:ERROR - "; std::cout << "LA:ERROR - ";
if(s1) if(s1)
{ {
std::cerr << s1 << "\n"; std::cerr << s2 << ": " << s1 << "\n";
std::cout << s1 << "\n"; std::cout << s2 << ": " << s1 << "\n";
} }
#ifdef CUDALA #ifdef CUDALA
{ {
cublasStatus s=cublasGetError(); cublasStatus s = cublasGetError();
std::cerr << "CUBLAS status = "<<s<<std::endl; std::cerr << "CUBLAS status = " << s << std::endl;
std::cout << "CUBLAS status = "<<s<<std::endl; std::cout << "CUBLAS status = " << s << std::endl;
} }
#endif #endif
if(errno) perror("system error"); if(errno) perror("system error");
@ -63,43 +63,40 @@ std::cout << "CUBLAS status = "<<s<<std::endl;
throw LAerror(s1); throw LAerror(s1);
} }
//stub for f77 blas called from strassen routine //stub for f77 blas called from strassen routine
extern "C" void xerbla_(const char name[6], int *n) extern "C" void xerbla_(const char name[6], int *n){
{ char msg[1024];
char msg[1024]; strcpy(msg,"LAPACK or BLAS error in routine ");
strcpy(msg,"LAPACK or BLAS error in routine "); strncat(msg,name,6);
strncat(msg,name,6); sprintf(msg+strlen(msg),": illegal value of parameter #%d",*n);
sprintf(msg+strlen(msg),": illegal value of parameter #%d",*n); laerror(msg);
laerror(msg);
} }
//with atlas-cblas another error routine is necessary //with atlas-cblas another error routine is necessary
extern "C" void ATL_xerbla(int p, char *rout, char *form, ...){
extern "C" void ATL_xerbla(int p, char *rout, char *form, ...) char msg0[1024], *msg;
{ va_list argptr;
char msg0[1024], *msg; va_start(argptr, form);
va_list argptr; strcpy(msg0,"ATLAS error\n");
va_start(argptr, form); msg=msg0+strlen(msg0);
strcpy(msg0,"ATLAS error\n"); if (p) {sprintf(msg, "Parameter %d to routine %s was incorrect\n", p, rout); msg+=strlen(msg);}
msg=msg0+strlen(msg0); vsprintf(msg, form, argptr);
if (p) {sprintf(msg, "Parameter %d to routine %s was incorrect\n", p, rout); msg+=strlen(msg);} va_end(argptr);
vsprintf(msg, form, argptr); laerror(msg0);
va_end(argptr);
laerror(msg0);
} }
int cblas_errprn(int ierr, int info, char *form, ...) int cblas_errprn(int ierr, int info, char *form, ...) {
{ char msg0[1024], *msg;
char msg0[1024], *msg; va_list argptr;
va_list argptr; va_start(argptr, form);
va_start(argptr, form); sprintf(msg0,"CBLAS error %d %d\n",ierr,info);
sprintf(msg0,"CBLAS error %d %d\n",ierr,info); msg=msg0+strlen(msg0);
msg=msg0+strlen(msg0); vsprintf(msg, form, argptr);
vsprintf(msg, form, argptr); va_end(argptr);
va_end(argptr); laerror(msg0);
laerror(msg0); return 0;
return 0;
} }
}//namespace }//namespace

View File

@ -29,10 +29,15 @@ class LAerror
LAerror(const char *s) {msg=s;}; LAerror(const char *s) {msg=s;};
}; };
extern void laerror(const char *); #ifdef __GNUG__
#define laerror(X) { LA::laerror2(X, __PRETTY_FUNCTION__); }
#else
#define laerror(X) { LA::laerror2(X, __func__); }
#endif
inline std::ostream & operator<<(std::ostream &s, const LAerror &x) extern void laerror2(const char *, const char *);
{
inline std::ostream & operator<<(std::ostream &s, const LAerror &x) {
s << x.msg; s << x.msg;
return s; return s;
} }

3360
mat.cc

File diff suppressed because it is too large Load Diff

1608
mat.h

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
/* /* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
/*******************************************************************************
LA: linear algebra C++ interface library LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com> Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
@ -14,8 +15,7 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ *******************************************************************************/
#include "noncblas.h" #include "noncblas.h"
#include "laerror.h" #include "laerror.h"
@ -27,178 +27,156 @@
//Level 1 - straightforward wrappers //Level 1 - straightforward wrappers
extern "C" double FORNAME(ddot) (const FINT *n, const double *x, const FINT *incx, const double *y, const FINT *incy); extern "C" double FORNAME(ddot) (const FINT *n, const double *x, const FINT *incx, const double *y, const FINT *incy);
double cblas_ddot(const int N, const double *X, const int incX, double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY){
const double *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
return FORNAME(ddot)(&ntmp,X,&incxtmp,Y,&incytmp); return FORNAME(ddot)(&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
return FORNAME(ddot)(&N,X,&incX,Y,&incY); return FORNAME(ddot)(&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(dscal) (const FINT *n, const double *a, double *x, const FINT *incx); extern "C" void FORNAME(dscal) (const FINT *n, const double *a, double *x, const FINT *incx);
void cblas_dscal(const int N, const double alpha, double *X, const int incX) void cblas_dscal(const int N, const double alpha, double *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
FORNAME(dscal) (&ntmp,&alpha,X,&incxtmp); FORNAME(dscal) (&ntmp,&alpha,X,&incxtmp);
#else #else
FORNAME(dscal) (&N,&alpha,X,&incX); FORNAME(dscal) (&N,&alpha,X,&incX);
#endif #endif
} }
extern "C" void FORNAME(dcopy) (const FINT *n, const double *x, const FINT *incx, double *y, const FINT *incy); extern "C" void FORNAME(dcopy) (const FINT *n, const double *x, const FINT *incx, double *y, const FINT *incy);
void cblas_dcopy(const int N, const double *X, const int incX, void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY){
double *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(dcopy) (&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(dcopy) (&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(dcopy) (&N,X,&incX,Y,&incY); FORNAME(dcopy) (&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(daxpy) (const FINT *n, const double *a, const double *x, const FINT *incx, double *y, const FINT *incy); extern "C" void FORNAME(daxpy) (const FINT *n, const double *a, const double *x, const FINT *incx, double *y, const FINT *incy);
void cblas_daxpy(const int N, const double alpha, const double *X, void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY){
const int incX, double *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(daxpy) (&ntmp,&alpha,X,&incxtmp,Y,&incytmp); FORNAME(daxpy) (&ntmp,&alpha,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(daxpy) (&N,&alpha,X,&incX,Y,&incY); FORNAME(daxpy) (&N,&alpha,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" double FORNAME(dnrm2) (const FINT *n, const double *x, const FINT *incx); extern "C" double FORNAME(dnrm2) (const FINT *n, const double *x, const FINT *incx);
double cblas_dnrm2(const int N, const double *X, const int incX) double cblas_dnrm2(const int N, const double *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return FORNAME(dnrm2) (&ntmp,X,&incxtmp); return FORNAME(dnrm2) (&ntmp,X,&incxtmp);
#else #else
return FORNAME(dnrm2) (&N,X,&incX); return FORNAME(dnrm2) (&N,X,&incX);
#endif #endif
} }
extern "C" double FORNAME(dasum) (const FINT *n, const double *x, const FINT *incx); extern "C" double FORNAME(dasum) (const FINT *n, const double *x, const FINT *incx);
double cblas_dasum(const int N, const double *X, const int incX) double cblas_dasum(const int N, const double *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return FORNAME(dasum) (&ntmp,X,&incxtmp); return FORNAME(dasum) (&ntmp,X,&incxtmp);
#else #else
return FORNAME(dasum) (&N,X,&incX); return FORNAME(dasum) (&N,X,&incX);
#endif #endif
} }
extern "C" void FORNAME(zcopy) (const FINT *n, const void *x, const FINT *incx, void *y, const FINT *incy); extern "C" void FORNAME(zcopy) (const FINT *n, const void *x, const FINT *incx, void *y, const FINT *incy);
void cblas_zcopy(const int N, const void *X, const int incX, void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY){
void *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zcopy) (&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(zcopy) (&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zcopy) (&N,X,&incX,Y,&incY); FORNAME(zcopy) (&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(zaxpy) (const FINT *n, const void *a, const void *x, const FINT *incx, void *y, const FINT *incy); extern "C" void FORNAME(zaxpy) (const FINT *n, const void *a, const void *x, const FINT *incx, void *y, const FINT *incy);
void cblas_zaxpy(const int N, const void *alpha, const void *X, void cblas_zaxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY){
const int incX, void *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zaxpy) (&ntmp,alpha,X,&incxtmp,Y,&incytmp); FORNAME(zaxpy) (&ntmp,alpha,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zaxpy) (&N,alpha,X,&incX,Y,&incY); FORNAME(zaxpy) (&N,alpha,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(zscal) (const FINT *n, const void *a, void *x, const FINT *incx); extern "C" void FORNAME(zscal) (const FINT *n, const void *a, void *x, const FINT *incx);
void cblas_zscal(const int N, const void *alpha, void *X, const int incX) void cblas_zscal(const int N, const void *alpha, void *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
FORNAME(zscal)(&ntmp,alpha,X,&incxtmp); FORNAME(zscal)(&ntmp,alpha,X,&incxtmp);
#else #else
FORNAME(zscal)(&N,alpha,X,&incX); FORNAME(zscal)(&N,alpha,X,&incX);
#endif #endif
} }
extern "C" void FORNAME(zdscal) (const FINT *n, const double *a, void *x, const FINT *incx); extern "C" void FORNAME(zdscal) (const FINT *n, const double *a, void *x, const FINT *incx);
void cblas_zdscal(const int N, const double alpha, void *X, const int incX) void cblas_zdscal(const int N, const double alpha, void *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
FORNAME(zdscal)(&ntmp,&alpha,X,&incxtmp); FORNAME(zdscal)(&ntmp,&alpha,X,&incxtmp);
#else #else
FORNAME(zdscal)(&N,&alpha,X,&incX); FORNAME(zdscal)(&N,&alpha,X,&incX);
#endif #endif
} }
extern "C" double FORNAME(dznrm2) (const FINT *n, const void *x, const FINT *incx); extern "C" double FORNAME(dznrm2) (const FINT *n, const void *x, const FINT *incx);
double cblas_dznrm2(const int N, const void *X, const int incX) double cblas_dznrm2(const int N, const void *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return FORNAME(dznrm2) (&ntmp,X,&incxtmp); return FORNAME(dznrm2) (&ntmp,X,&incxtmp);
#else #else
return FORNAME(dznrm2) (&N,X,&incX); return FORNAME(dznrm2) (&N,X,&incX);
#endif #endif
} }
//the following ones are f2c-compatible, but is it truly portable??? //the following ones are f2c-compatible, but is it truly portable???
extern "C" void FORNAME(zdotu) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy); extern "C" void FORNAME(zdotu) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
void cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu){
void cblas_zdotu_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zdotu) (dotu,&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(zdotu) (dotu,&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zdotu) (dotu,&N,X,&incX,Y,&incY); FORNAME(zdotu) (dotu,&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(zdotc) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy); extern "C" void FORNAME(zdotc) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
void cblas_zdotc_sub(const int N, const void *X, const int incX, void cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc){
const void *Y, const int incY, void *dotc)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zdotc) (dotc,&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(zdotc) (dotc,&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zdotc) (dotc,&N,X,&incX,Y,&incY); FORNAME(zdotc) (dotc,&N,X,&incX,Y,&incY);
#endif #endif
} }
@ -211,38 +189,38 @@ FORNAME(zdotc) (dotc,&N,X,&incX,Y,&incY);
extern "C" void FORNAME(dspmv) (const char *uplo, const FINT *n, const double *alpha, const double *ap, const double *x, const FINT *incx, const double *beta, double *y, const FINT *incy); extern "C" void FORNAME(dspmv) (const char *uplo, const FINT *n, const double *alpha, const double *ap, const double *x, const FINT *incx, const double *beta, double *y, const FINT *incy);
void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *Ap, const int N, const double alpha, const double *Ap,
const double *X, const int incX, const double *X, const int incX,
const double beta, double *Y, const int incY) const double beta, double *Y, const int incY)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
if(Uplo!=CblasLower) LA::laerror("CblasLower uplo asserted"); if(Uplo!=CblasLower) laerror("CblasLower uplo asserted");
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(dspmv) ("U",&ntmp, &alpha, Ap, X, &incxtmp, &beta, Y, &incytmp); FORNAME(dspmv) ("U",&ntmp, &alpha, Ap, X, &incxtmp, &beta, Y, &incytmp);
#else #else
FORNAME(dspmv) ("U",&N, &alpha, Ap, X, &incX, &beta, Y, &incY); FORNAME(dspmv) ("U",&N, &alpha, Ap, X, &incX, &beta, Y, &incY);
#endif #endif
} }
extern "C" void FORNAME(zhpmv) (const char *uplo, const FINT *n, const void *alpha, const void *ap, const void *x, const FINT *incx, const void *beta, void *y, const FINT *incy); extern "C" void FORNAME(zhpmv) (const char *uplo, const FINT *n, const void *alpha, const void *ap, const void *x, const FINT *incx, const void *beta, void *y, const FINT *incy);
void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *Ap, const int N, const void *alpha, const void *Ap,
const void *X, const int incX, const void *X, const int incX,
const void *beta, void *Y, const int incY) const void *beta, void *Y, const int incY)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
if(Uplo!=CblasLower) LA::laerror("CblasLower uplo asserted"); if(Uplo!=CblasLower) laerror("CblasLower uplo asserted");
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zhpmv) ("U",&ntmp, alpha, Ap, X, &incxtmp, beta, Y, &incytmp); FORNAME(zhpmv) ("U",&ntmp, alpha, Ap, X, &incxtmp, beta, Y, &incytmp);
#else #else
FORNAME(zhpmv) ("U",&N, alpha, Ap, X, &incX, beta, Y, &incY); FORNAME(zhpmv) ("U",&N, alpha, Ap, X, &incX, beta, Y, &incY);
#endif #endif
} }
@ -251,185 +229,232 @@ FORNAME(zhpmv) ("U",&N, alpha, Ap, X, &incX, beta, Y, &incY);
extern "C" void FORNAME(dger) (const FINT *m, const FINT *n, const double *alpha, const double *x, const FINT *incx, const double *y, const FINT *incy, double *a, const FINT *lda); extern "C" void FORNAME(dger) (const FINT *m, const FINT *n, const double *alpha, const double *x, const FINT *incx, const double *y, const FINT *incy, double *a, const FINT *lda);
void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N,
const double alpha, const double *X, const int incX, const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda) const double *Y, const int incY, double *A, const int lda)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//swap m-n, y-x //swap m-n, y-x
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
const FINT ldatmp=lda; const FINT ldatmp=lda;
FORNAME(dger) (&ntmp, &mtmp, &alpha, Y, &incytmp, X, &incxtmp, A, &ldatmp); FORNAME(dger) (&ntmp, &mtmp, &alpha, Y, &incytmp, X, &incxtmp, A, &ldatmp);
#else #else
FORNAME(dger) (&N, &M, &alpha, Y, &incY, X, &incX, A, &lda); FORNAME(dger) (&N, &M, &alpha, Y, &incY, X, &incX, A, &lda);
#endif #endif
} }
void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX, const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda) const void *Y, const int incY, void *A, const int lda)
{ {
LA::laerror("cblas_zgerc cannot be simply converted to fortran order"); laerror("cblas_zgerc cannot be simply converted to fortran order");
} }
void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N, void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX, const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda) const void *Y, const int incY, void *A, const int lda)
{ {
LA::laerror("cblas_zgeru cannot be simply converted to fortran order"); laerror("cblas_zgeru cannot be simply converted to fortran order");
} }
extern "C" void FORNAME(dgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const double *alpha, const double *a, const FINT *lda, const double *b, const FINT *ldb, const double *beta, double *c, const FINT *ldc); extern "C" void FORNAME(dgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const double *alpha, const double *a, const FINT *lda, const double *b, const FINT *ldb, const double *beta, double *c, const FINT *ldc);
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A, const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb, const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc) const double beta, double *C, const int ldc)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//swap a-b, m-n //swap a-b, m-n
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ktmp=K; const FINT ktmp=K;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT ldbtmp=ldb; const FINT ldbtmp=ldb;
const FINT ldctmp=ldc; const FINT ldctmp=ldc;
FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T", FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T",
&ntmp, &mtmp, &ktmp, &alpha, B, &ldbtmp, A, &ldatmp, &beta, C, &ldctmp); &ntmp, &mtmp, &ktmp, &alpha, B, &ldbtmp, A, &ldatmp, &beta, C, &ldctmp);
#else #else
FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T", FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T",
&N, &M, &K, &alpha, B, &ldb, A, &lda, &beta, C, &ldc); &N, &M, &K, &alpha, B, &ldb, A, &lda, &beta, C, &ldc);
#endif #endif
} }
extern "C" void FORNAME(zgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const void *alpha, const void *a, const FINT *lda, const void *b, const FINT *ldb, const void *beta, void *c, const FINT *ldc); extern "C" void FORNAME(zgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const void *alpha, const void *a, const FINT *lda, const void *b, const FINT *ldb, const void *beta, void *c, const FINT *ldc);
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A, const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb, const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc) const void *beta, void *C, const int ldc)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//swap a-b, m-n //swap a-b, m-n
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ktmp=K; const FINT ktmp=K;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT ldbtmp=ldb; const FINT ldbtmp=ldb;
const FINT ldctmp=ldc; const FINT ldctmp=ldc;
FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
&ntmp, &mtmp, &ktmp, alpha, B, &ldbtmp, A, &ldatmp, beta, C, &ldctmp); &ntmp, &mtmp, &ktmp, alpha, B, &ldbtmp, A, &ldatmp, beta, C, &ldctmp);
#else #else
FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
&N, &M, &K, alpha, B, &ldb, A, &lda, beta, C, &ldc); &N, &M, &K, alpha, B, &ldb, A, &lda, beta, C, &ldc);
#endif #endif
} }
extern "C" void FORNAME(dgemv) (const char *TRANS, const FINT *M, const FINT *N, const double *ALPHA, const double *A, const FINT *LDA, const double *X, const FINT *INCX, const double *BETA, double *Y, const FINT *INCY); extern "C" void FORNAME(dgemv) (const char *TRANS, const FINT *M, const FINT *N, const double *ALPHA, const double *A, const FINT *LDA, const double *X, const FINT *INCX, const double *BETA, double *Y, const FINT *INCY);
void cblas_dgemv(const enum CBLAS_ORDER Order, void cblas_dgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const double alpha, const double *A, const int lda, const double alpha, const double *A, const int lda,
const double *X, const int incX, const double beta, const double *X, const int incX, const double beta,
double *Y, const int incY) double *Y, const int incY)
{ {
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp ); if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp );
//swap n-m and toggle transposition //swap n-m and toggle transposition
else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp ); else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp );
#else #else
if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY ); if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
//swap n-m and toggle transposition //swap n-m and toggle transposition
else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY ); else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
#endif #endif
} }
extern "C" void FORNAME(zgemv) (const char *TRANS, const FINT *M, const FINT *N, const void *ALPHA, const void *A, const FINT *LDA, const void *X, const FINT *INCX, const void *BETA, void *Y, const FINT *INCY); extern "C" void FORNAME(zgemv) (const char *TRANS, const FINT *M, const FINT *N, const void *ALPHA, const void *A, const FINT *LDA, const void *X, const FINT *INCX, const void *BETA, void *Y, const FINT *INCY);
void cblas_zgemv(const enum CBLAS_ORDER Order, void cblas_zgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda, const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta, const void *X, const int incX, const void *beta,
void *Y, const int incY) void *Y, const int incY)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
if(TransA == CblasConjTrans) LA::laerror("zgemv with CblasConjTrans not supportted"); if(TransA == CblasConjTrans) laerror("zgemv with CblasConjTrans not supportted");
//swap n-m and toggle transposition //swap n-m and toggle transposition
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, alpha, A, &ldatmp, X, &incxtmp, beta, Y, &incytmp ); FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, alpha, A, &ldatmp, X, &incxtmp, beta, Y, &incytmp );
#else #else
FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, alpha, A, &lda, X, &incX, beta, Y, &incY ); FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, alpha, A, &lda, X, &incX, beta, Y, &incY );
#endif #endif
} }
extern "C" FINT FORNAME(idamax) (const FINT *N, const double *DX, const FINT *INCX); extern "C" FINT FORNAME(idamax) (const FINT *N, const double *DX, const FINT *INCX);
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX) {
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(idamax)(&ntmp,X,&incxtmp); return (CBLAS_INDEX)FORNAME(idamax)(&ntmp,X,&incxtmp);
#else #else
return (CBLAS_INDEX)FORNAME(idamax)(&N,X,&incX); return (CBLAS_INDEX)FORNAME(idamax)(&N,X,&incX);
#endif #endif
} }
extern "C" FINT FORNAME(izamax) (const FINT *N, const void *DX, const FINT *INCX);
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX) {
#ifdef FORINT
const FINT ntmp=N;
const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(izamax)(&ntmp, X, &incxtmp);
#else
return (CBLAS_INDEX)FORNAME(izamax)(&N, X, &incX);
#endif
}
/*
extern "C" FINT FORNAME(idamin) (const FINT *N, const double *DX, const FINT *INCX);
CBLAS_INDEX cblas_idamin(const int N, const double *X, const int incX) {
#ifdef FORINT
const FINT ntmp=N;
const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(idamin)(&ntmp,X,&incxtmp);
#else
return (CBLAS_INDEX)FORNAME(idamin)(&N,X,&incX);
#endif
}
extern "C" FINT FORNAME(izamin) (const FINT *N, const void *DX, const FINT *INCX);
CBLAS_INDEX cblas_izamin(const int N, const void *X, const int incX) {
#ifdef FORINT
const FINT ntmp=N;
const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(izamin)(&ntmp, X, &incxtmp);
#else
return (CBLAS_INDEX)FORNAME(izamin)(&N, X, &incX);
#endif
}
*/
#endif #endif
#ifdef NONCLAPACK
#ifdef NONCLAPACK
//clapack_dgesv //clapack_dgesv
//allocate auxiliary storage and transpose input and output quantities to fortran/C order //allocate auxiliary storage and transpose input and output quantities to fortran/C order
extern "C" void FORNAME(dgesv) (const FINT *N, const FINT *NRHS, double *A, const FINT *LDA, FINT *IPIV, double *B, const FINT *LDB, FINT *INFO); extern "C" void FORNAME(dgesv) (const FINT *N, const FINT *NRHS, double *A, const FINT *LDA, FINT *IPIV, double *B, const FINT *LDB, FINT *INFO);
int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS, int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS,
double *A, const int lda, int *ipiv, double *A, const int lda, int *ipiv,
double *B, const int ldb) double *B, const int ldb)
{ {
FINT INFO=0; FINT INFO=0;
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//B should be in the same physical order, just transpose A in place and the LU result on output //B should be in the same physical order, just transpose A in place and the LU result on output
for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;} for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;}
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT nrhstmp=NRHS; const FINT nrhstmp=NRHS;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT ldbtmp=ldb; const FINT ldbtmp=ldb;
FINT ipivtmp=*ipiv; FINT ipivtmp=*ipiv;
FORNAME(dgesv) (&ntmp,&nrhstmp,A,&ldatmp,&ipivtmp,B,&ldbtmp,&INFO); FORNAME(dgesv) (&ntmp,&nrhstmp,A,&ldatmp,&ipivtmp,B,&ldbtmp,&INFO);
#else #else
FORNAME(dgesv) (&N,&NRHS,A,&lda,ipiv,B,&ldb,&INFO); FORNAME(dgesv) (&N,&NRHS,A,&lda,ipiv,B,&ldb,&INFO);
#endif #endif
for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;} for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;}
return (int)INFO; return (int)INFO;
} }
#endif #endif
void cblas_dswap(const int N, double *X, const int incX,
double *Y, const int incY){ extern "C" void FORNAME(dswap)(const FINT *N, double *X, const FINT *incX, double *Y, const FINT *incY);
LA::laerror("cblas_dswap is not available... (-DNONCBLAS)"); void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY){
#ifdef FORINT
const FINT N_tmp = N;
const FINT incX_tmp = incX;
const FINT incY_tmp = incY;
FORNAME(dswap)(&N_tmp, X, &incX_tmp, Y, &incY_tmp);
#else
FORNAME(dswap)(&N, X, &incX, Y, &incY);
#endif
} }
void cblas_zswap(const int N, void *X, const int incX, extern "C" void FORNAME(zswap)(const FINT *N, void *X, const FINT *incX, void *Y, const FINT *incY);
void *Y, const int incY){ void cblas_zswap(const int N, void *X, const int incX, void *Y, const int incY){
LA::laerror("cblas_zswap is not available... (-DNONCBLAS)"); #ifdef FORINT
} const FINT N_tmp = N;
const FINT incX_tmp = incX;
const FINT incY_tmp = incY;
FORNAME(zswap)(&N_tmp, X, &incX_tmp, Y, &incY_tmp);
#else
FORNAME(zswap)(&N, X, &incX, Y, &incY);
#endif
}

View File

@ -85,6 +85,11 @@ CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
/*
CBLAS_INDEX cblas_idamin(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_izamin(const int N, const void *X, const int incX);
*/
/* /*
* =========================================================================== * ===========================================================================
* Prototypes for level 1 BLAS routines * Prototypes for level 1 BLAS routines

722
smat.cc
View File

@ -1,3 +1,6 @@
//------------------------------------------------------------------------------
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
//------------------------------------------------------------------------------
/* /*
LA: linear algebra C++ interface library LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com> Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
@ -25,219 +28,359 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <errno.h> #include <errno.h>
extern "C" { extern "C" {
extern ssize_t read(int, void *, size_t); extern ssize_t read(int, void *, size_t);
extern ssize_t write(int, const void *, size_t); extern ssize_t write(int, const void *, size_t);
} }
// TODO
// specialize unary minus
namespace LA { namespace LA {
/***************************************************************************//**
/* * routine for raw output
* * Templates first, specializations for BLAS next * @param[in] fd file descriptor for output
* * @param[in] dim number of elements intended for output
*/ * @param[in] transp reserved
* @see NRMat<T>::get(), NRSMat<T>::copyonwrite()
//raw I/O ******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::put(int fd, bool dim, bool transp) const void NRSMat<T>::put(int fd, bool dim, bool transp) const {
{
#ifdef CUDALA #ifdef CUDALA
if(location!=cpu) if(location != cpu){
{ NRSMat<T> tmp= *this;
NRSMat<T> tmp= *this; tmp.moveto(cpu);
tmp.moveto(cpu); tmp.put(fd,dim,transp);
tmp.put(fd,dim,transp); return;
return; }
} #endif
errno = 0;
if(dim){
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
}
LA_traits<T>::multiput(NN2,fd,v,dim);
}
/***************************************************************************//**
* routine for raw input
* @param[in] fd file descriptor for input
* @param[in] dim number of elements intended for input
* @param[in] transp reserved
* @see NRSMat<T>::put(), NRSMat<T>::copyonwrite()
******************************************************************************/
template <typename T>
void NRSMat<T>::get(int fd, bool dim, bool transp) {
#ifdef CUDALA
if(location != cpu){
NRSMat<T> tmp;
tmp.moveto(cpu);
tmp.get(fd,dim,transp);
tmp.moveto(location);
*this = tmp;
return;
}
#endif #endif
errno=0; int nn0[2]; //align at least 8-byte
if(dim) errno = 0;
{ if(dim){
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); resize(nn0[0]);
} }else{
LA_traits<T>::multiput(NN2,fd,v,dim); copyonwrite();
}
LA_traits<T>::multiget(NN2,fd,v,dim);
} }
/***************************************************************************//**
* constructor symmetrizing given matrix \f$A\f$ of general type <code>T</code> yielding \f$(A+A^\mathrm{T})/2\f$
* @param[in] rhs matrix \f$A\f$
******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::get(int fd, bool dim, bool transp) NRSMat<T>::NRSMat(const NRMat<T> &rhs) {
{ NOT_GPU(rhs);
#ifdef CUDALA
if(location!=cpu)
{
NRSMat<T> tmp;
tmp.moveto(cpu);
tmp.get(fd,dim,transp);
tmp.moveto(location);
*this = tmp;
return;
}
#endif
int nn0[2]; //align at least 8-byte nn = rhs.nrows();
errno=0;
if(dim)
{
if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
resize(nn0[0]);
}
else
copyonwrite();
LA_traits<T>::multiget(NN2,fd,v,dim);
}
// conversion ctor, symmetrize general Mat into SMat
template <typename T>
NRSMat<T>::NRSMat(const NRMat<T> &rhs)
{
nn=rhs.nrows();
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat"); if(nn != rhs.ncols()) laerror("attempt to convert nonsquare NRMat<T> to NRSMat<T>");
#endif
#ifdef CUDALA
location = rhs.getlocation();
#endif #endif
count = new int; count = new int;
*count = 1; *count = 1;
v = new T[NN2]; v = new T[NN2];
int i, j, k=0; int i, j, k(0);
for (i=0; i<nn; i++) for(i=0; i<nn; i++){
for (j=0; j<=i;j++) v[k++] = (rhs[i][j] + rhs[j][i])/((T)2); for(j=0; j<=i; j++){
v[k++] = (rhs[i][j] + rhs[j][i])/((T)2);
}
}
} }
/***************************************************************************//**
* zero out this symmetric matrix of general type <code>T</code> and then set
// assign to diagonal * the diagonal elements to prescribed value
* @param[in] a scalar value to be assigned to the diagonal
* @return reference to the modified matrix
******************************************************************************/
template <typename T> template <typename T>
NRSMat<T> & NRSMat<T>::operator=(const T &a) NRSMat<T> & NRSMat<T>::operator=(const T &a) {
{ NOT_GPU(*this);
copyonwrite(); copyonwrite();
memset(v,0,NN2*sizeof(T)); memset(v, 0, NN2*sizeof(T));
for (int i=0; i<nn; i++) v[i*(i+1)/2+i] = a; for(register int i=0; i<nn; i++) v[i*(i+1)/2 + i] = a;
return *this; return *this;
} }
//get diagonal /***************************************************************************//**
* get or divide by the diagonal of real symmetric double-precision matrix
* @param[in, out] r vector for storing the diagonal
* @param[in] divide
* \li \c false save the diagonal to vector r
* \li \c true divide the vector r by the diagonal elements element-wise
* @param[in] cache reserved
* @return
* \li <tt>divide == true</tt> NULL
* \li <tt>divide == false</tt> pointer to the first element of r
******************************************************************************/
template <typename T> template <typename T>
const T* NRSMat<T>::diagonalof(NRVec<T> &r, const bool divide, bool cache) const const T* NRSMat<T>::diagonalof(NRVec<T> &r, const bool divide, bool cache) const {
{
#ifdef DEBUG #ifdef DEBUG
if(r.size()!=nn) laerror("incompatible vector in diagonalof()"); if(r.size() != nn) laerror("incompatible vector in const T* NRSMat<T>::diagonalof(NRVec<T> &, const bool, bool)");
#endif #endif
NOT_GPU(*this);
SAME_LOC(*this, r);
r.copyonwrite(); r.copyonwrite();
if (divide) if(divide){
for (int i=0; i<nn; i++) {T a =v[i*(i+1)/2+i]; if(a!=0.) r[i] /= a;} for(register int i=0; i<nn; i++){
else const T a = v[i*(i+1)/2+i];
for (int i=0; i<nn; i++) r[i] = v[i*(i+1)/2+i]; if(a != 0.) r[i] /= a;
return divide?NULL:&r[0]; }
}else{
for(register int i=0; i<nn; i++) r[i] = v[i*(i+1)/2+i];
}
return divide?NULL:&r[0];
} }
// unary minus /***************************************************************************//**
* implements unary minus operator for this symmetric
* matrix of general type <code>T</code>
* @return modified copy of this matrix
******************************************************************************/
template <typename T> template <typename T>
const NRSMat<T> NRSMat<T>::operator-() const const NRSMat<T> NRSMat<T>::operator-() const {
{ NOT_GPU(*this);
NRSMat<T> result(nn);
for(int i=0; i<NN2; i++) result.v[i]= -v[i]; NRSMat<T> result(nn, getlocation());
for(register int i = 0; i<NN2; i++) result.v[i]= -v[i];
return result; return result;
} }
// trace of Smat /***************************************************************************//**
* implements unary minus operator for this real symmetric matrix
* @return modified copy of this matrix
******************************************************************************/
template <>
const NRSMat<double> NRSMat<double>::operator-() const {
NRSMat<double> result(nn, getlocation());
#ifdef CUDALA
if(location == cpu){
#endif
memcpy(result.v, v, NN2*sizeof(double));
cblas_dscal(NN2, -1., result.v, 1);
#ifdef CUDALA
}else{
cublasDcopy(NN2, v, 1, result.v, 1);
TEST_CUBLAS("cublasDcopy");
cublasDscal(NN2, -1., result.v, 1);
TEST_CUBLAS("cublasDscal");
}
#endif
return result;
}
/***************************************************************************//**
* implements unary minus operator for this hermitian matrix
* @return modified copy of this matrix
******************************************************************************/
template <>
const NRSMat<complex<double> > NRSMat<complex<double> >::operator-() const {
NRSMat<complex<double> > result(nn, getlocation());
#ifdef CUDALA
if(location == cpu) {
#endif
memcpy(result.v, v, NN2*sizeof(complex<double>));
cblas_zscal(NN2, &CMONE, result.v, 1);
#ifdef CUDALA
}else{
cublasZcopy(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)result.v, 1);
TEST_CUBLAS("cublasZcopy");
cublasZscal(NN2, CUMONE, (cuDoubleComplex*)result.v, 1);
TEST_CUBLAS("cublasZscal");
}
#endif
return result;
}
/***************************************************************************//**
* @return the sum of the diagonal elements
******************************************************************************/
template <typename T> template <typename T>
const T NRSMat<T>::trace() const const T NRSMat<T>::trace() const {
{ NOT_GPU(*this);
T tmp = 0; T tmp = 0;
for (int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i]; for(register int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
return tmp; return tmp;
} }
/***************************************************************************//**
* fill this real symmetric matrix with
* pseudorandom numbers generated from uniform distribution
******************************************************************************/
template<> template<>
void NRSMat<double>::randomize(const double &x) void NRSMat<double>::randomize(const double &x) {
{ NOT_GPU(*this);
for(int i=0; i<NN2; ++i) v[i] = x*(2.*random()/(1.+RAND_MAX) -1.);
for(int i=0; i<NN2; ++i){
v[i] = x*(2.*random()/(1.+RAND_MAX) -1.);
}
} }
/***************************************************************************//**
* Fill this hermitian matrix with pseudorandom numbers generated from uniform
* distribution. The real and imaginary parts are generated independently.
******************************************************************************/
template<> template<>
void NRSMat<complex<double> >::randomize(const double &x) void NRSMat<complex<double> >::randomize(const double &x) {
{ for(register int i=0; i<NN2; ++i) v[i].real() = x*(2.*random()/(1. + RAND_MAX) -1.);
for(int i=0; i<NN2; ++i) v[i].real() = x*(2.*random()/(1.+RAND_MAX) -1.); for(register int i=0; i<NN2; ++i) v[i].imag() = x*(2.*random()/(1. + RAND_MAX) -1.);
for(int i=0; i<NN2; ++i) v[i].imag() = x*(2.*random()/(1.+RAND_MAX) -1.); for(register int i=0; i<nn; ++i){
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) if(i==j) v[i*(i+1)/2+j].imag()=0; //hermitean for(register int j=0; j<=i; ++j){
if(i == j) v[i*(i+1)/2+j].imag() = 0; //hermitean
}
}
} }
/***************************************************************************//**
* routine for formatted output via lawritemat
// write matrix to the file with specific format * @param[in] file pointer to <tt>FILE</tt> structure representing the output file
* @param[in] format format specification in standard printf-like form
* @param[in] modulo
* @see lawritemat()
******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const {
{ NOT_GPU(*this);
lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1); lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
} }
// read matrix from the file with specific format
/***************************************************************************//**
* routine for formatted input via fscanf
* @param[in] f pointer to <tt>FILE</tt> structure representing the input file
* @param[in] format format specification in standard printf-like form
******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::fscanf(FILE *f, const char *format) void NRSMat<T>::fscanf(FILE *f, const char *format) {
{
int n, m; int n, m;
if (::fscanf(f,"%d %d",&n,&m) != 2) NOT_GPU(*this);
laerror("cannot read matrix dimensions in SMat::fscanf");
if (n != m) laerror("different dimensions of SMat"); if (::fscanf(f,"%d %d", &n, &m) != 2)
laerror("cannot read matrix dimensions in NRSMat<T>::fscanf(FILE *, const char *)");
if (n != m) laerror("different dimensions in NRSMat<T>::fscanf(FILE *, const char *)");
resize(n); resize(n);
for (int i=0; i<n; i++) for (int i=0; i<n; i++)
for (int j=0; j<n; j++) for (int j=0; j<n; j++)
if (::fscanf(f,format,&((*this)(i,j))) != 1) if (::fscanf(f,format,&((*this)(i,j))) != 1)
laerror("Smat - cannot read matrix element"); laerror("NRSMat<T>::fscanf(FILE *, const char *) - unable to read matrix element");
} }
/* /***************************************************************************//**
* BLAS specializations for double and complex<double> * multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
*/ * with real double-precision dense matrix \f$A\f$
* @param[in] rhs real double-precision matrix \f$A\f$
* @return matrix produt \f$S\times{}A\f$
******************************************************************************/
// SMat * Mat
//NOTE: dsymm is not appropriate as it works on UNPACKED symmetric matrix
template<> template<>
const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat"); if(nn != rhs.nrows()) laerror("incompatible dimensions in NRMat<double> NRSMat<double>::operator*(const NRMat<double> &)");
#endif
SAME_LOC(*this, rhs);
NRMat<double> result(nn, rhs.ncols(), getlocation());
#ifdef CUDALA
if(location == cpu){
#endif
for(register int k = 0; k<rhs.ncols(); k++){
cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0] + k, rhs.ncols(), 0.0, result[0] + k, rhs.ncols());
}
#ifdef CUDALA
}else{
for(register int k = 0; k<rhs.ncols(); k++){
cublasDspmv('U', nn, 1.0, v, rhs[0] + k, rhs.ncols(), 0.0, result[0] + k, rhs.ncols());
TEST_CUBLAS("cublasDspmv");
}
}
#endif #endif
NRMat<double> result(nn, rhs.ncols());
for (int k=0; k<rhs.ncols(); k++)
cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0]+k, rhs.ncols(),
0.0, result[0]+k, rhs.ncols());
return result; return result;
} }
/***************************************************************************//**
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
* with real double-precision dense matrix \f$A\f$
* @param[in] rhs real double-precision matrix \f$A\f$
* @return matrix produt \f$S\times{}A\f$
******************************************************************************/
template<> template<>
const NRMat< complex<double> > const NRMat<complex<double> >
NRSMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const NRSMat<complex<double> >::operator*(const NRMat<complex<double> > &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat"); if (nn != rhs.nrows()) laerror("incompatible dimensions in NRSMat<complex<double> >::operator*(const NRMat<complex<double> > &)");
#endif
SAME_LOC(*this, rhs);
NRMat<complex<double> > result(nn, rhs.ncols(), getlocation());
#ifdef CUDALA
if(location == cpu){
#endif
for(register int k=0; k<rhs.ncols(); k++){
cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(), &CZERO, result[0]+k, rhs.ncols());
}
#ifdef CUDALA
}else{
for(register int k = 0; k<rhs.ncols(); k++){
cublasZhpmv('U', nn,
CUONE, (cuDoubleComplex*)v, (cuDoubleComplex*)(rhs[0] + k), rhs.ncols(),
CUZERO, (cuDoubleComplex*)(result[0] + k), rhs.ncols());
TEST_CUBLAS("cublasDspmv");
}
}
#endif #endif
NRMat< complex<double> > result(nn, rhs.ncols());
for (int k=0; k<rhs.ncols(); k++)
cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(),
&CZERO, result[0]+k, rhs.ncols());
return result; return result;
} }
/***************************************************************************//**
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
// SMat * SMat * with real double-precision symmetric matrix \f$T\f$
* @return matrix produt \f$S\times{}T\f$ (not necessarily symmetric)
******************************************************************************/
template<> template<>
const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat"); if (nn != rhs.nn) laerror("incompatible dimensions in NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &)");
#endif #endif
NRMat<double> result(0.0, nn, nn); NRMat<double> result(0.0, nn, nn);
double *p, *q; double *p, *q;
@ -283,156 +426,295 @@ const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
} }
/***************************************************************************//**
* multiply this complex double-precision symmetric matrix \f$G\f$ stored in packed form
* with complex double-precision symmetric matrix \f$H\f$
* @return matrix produt \f$G\times{}H\f$ (not necessarily symmetric)
******************************************************************************/
template<> template<>
const NRMat< complex<double> > const NRMat<complex<double> >
NRSMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const NRSMat<complex<double> >::operator*(const NRSMat<complex<double> > &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat"); if (nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >::operator*(const NRSMat<complex<double> > &)");
#endif #endif
NRMat< complex<double> > result(0.0, nn, nn); SAME_LOC(*this, rhs);
NRMat< complex<double> > rhsmat(rhs); NRMat<complex<double> > result(nn, nn, getlocation());
NRMat<complex<double> > rhsmat(rhs);
result = *this * rhsmat; result = *this * rhsmat;
return result; return result;
// laerror("complex SMat*Smat not implemented");
} }
/***************************************************************************//**
* compute inner product of this real symmetric matrix \f$A\f$ with given real symmetric matrix \f$B\f$
// S dot S * i.e. determine the value of
* \f[\sum_{i,j}A_{i,j}B_{i,j}\f]
* @param[in] rhs matrix \f$B\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const double NRSMat<double>::dot(const NRSMat<double> &rhs) const const double NRSMat<double>::dot(const NRSMat<double> &rhs) const {
{ double ret(0.);
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible SMat's"); if (nn != rhs.nn) laerror("incompatible dimensions in double NRSMat<double>::dot(const NRSMat<double> &)");
#endif #endif
return cblas_ddot(NN2, v, 1, rhs.v, 1); SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
ret = cblas_ddot(NN2, v, 1, rhs.v, 1);
#ifdef CUDALA
}else{
ret = cublasDdot(NN2, v, 1, rhs.v, 1);
}
#endif
return ret;
} }
/***************************************************************************//**
* compute inner product of this complex symmetric matrix \f$A\f$ with given complex symmetric matrix \f$B\f$
* i.e. determine the value of
* \f[\sum_{i,j}\overbar{A_{i,j}}B_{i,j}\f]
* @param[in] rhs matrix \f$B\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const complex<double> const complex<double> NRSMat<complex<double> >::dot(const NRSMat<complex<double> > &rhs) const {
NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible SMat's"); if (nn != rhs.nn) laerror("incompatible dimensions in complex<double> NRSMat<complex<double> >::dot(const NRSMat<complex<double> > &)");
#endif
complex<double> dot(0., 0.);
SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
#ifdef CUDALA
}else{
const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)(rhs.v), 1);
dot = complex<double>(cuCreal(_dot), cuCimag(_dot));
TEST_CUBLAS("cublasZdotc");
}
#endif #endif
complex<double> dot;
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
return dot; return dot;
} }
/***************************************************************************//**
* compute inner product of this real double-precision symmetric matrix \f$S\f$ of order \f$n\f$
* with given real double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$
* @param[in] rhs real double-precision vector \f$\vec{v}\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const double NRSMat<double>::dot(const NRVec<double> &rhs) const const double NRSMat<double>::dot(const NRVec<double> &rhs) const {
{ double ret(0.0);
#ifdef DEBUG #ifdef DEBUG
if (NN2 != rhs.nn) laerror("dot of incompatible SMat's"); if(NN2 != rhs.nn) laerror("incompatible dimensions in double NRSMat<double>::dot(const NRVec<double> &)");
#endif
SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
ret = cblas_ddot(NN2, v, 1, rhs.v, 1);
#ifdef CUDALA
}else{
ret = cublasDdot(NN2, v, 1, rhs.v, 1);
TEST_CUBLAS("cublasDdot");
}
#endif #endif
return cblas_ddot(NN2, v, 1, rhs.v, 1);
} }
/***************************************************************************//**
* compute inner product of this complex double-precision hermitian matrix \f$H\f$ of order \f$n\f$
* with given complex double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$
* @param[in] rhs complex double-precision vector \f$\vec{v}\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const complex<double> const complex<double>
NRSMat< complex<double> >::dot(const NRVec< complex<double> > &rhs) const NRSMat<complex<double> >::dot(const NRVec<complex<double> > &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (NN2 != rhs.nn) laerror("dot of incompatible SMat's"); if(NN2 != rhs.nn) laerror("incompatible dimensions in complex<double> NRSMat<complex<double> >::dot(const NRVec<complex<double> > &)");
#endif
complex<double> dot(0., 0.);
SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
#ifdef CUDALA
}else{
const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)rhs.v, 1);
TEST_CUBLAS("cublasZdotc");
dot = complex<double>(cuCreal(_dot), cuCimag(_dot));
}
#endif #endif
complex<double> dot;
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
return dot; return dot;
} }
/***************************************************************************//**
// norm of the matrix * compute the Frobenius norm of this real double-precision symmetric matrix
* @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation
******************************************************************************/
template<> template<>
const double NRSMat<double>::norm(const double scalar) const const double NRSMat<double>::norm(const double scalar) const {
{ if(!scalar){
if (!scalar) return cblas_dnrm2(NN2, v, 1); double ret(0.);
double sum = 0; #ifdef CUDALA
int k = 0; if(location == cpu){
for (int i=0; i<nn; ++i) #endif
for (int j=0; j<=i; ++j) { ret = cblas_dnrm2(NN2, v, 1);
register double tmp; #ifdef CUDALA
tmp = v[k++]; }else{
if (i == j) tmp -= scalar; ret = cublasDnrm2(NN2, v, 1);
TEST_CUBLAS("cublasDnrm2");
}
#endif
return ret;
}
NOT_GPU(*this);
double sum(0.);
int k(0);
for(register int i=0; i<nn; ++i){
for(register int j=0; j<=i; ++j) {
register double tmp = v[k++];
if(i == j) tmp -= scalar;
sum += tmp*tmp; sum += tmp*tmp;
} }
}
return std::sqrt(sum); return std::sqrt(sum);
} }
/***************************************************************************//**
* compute the Frobenius norm of this complex double-precision hermitian matrix
* @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation
******************************************************************************/
template<> template<>
const double NRSMat< complex<double> >::norm(const complex<double> scalar) const const double NRSMat< complex<double> >::norm(const complex<double> scalar) const {
{ if(!(scalar.real()) && !(scalar.imag())){
if (!(scalar.real()) && !(scalar.imag())) double ret(0.);
return cblas_dznrm2(NN2, v, 1); #ifdef CUDALA
double sum = 0; if(location == cpu){
#endif
ret = cblas_dznrm2(NN2, v, 1);
#ifdef CUDALA
}else{
ret = cublasDznrm2(NN2, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasDznrm2");
}
#endif
return ret;
}
int k(0);
double sum(0.);
complex<double> tmp; complex<double> tmp;
int k = 0;
for (int i=0; i<nn; ++i) for(register int i=0; i<nn; ++i){
for (int j=0; j<=i; ++j) { for(register int j=0; j<=i; ++j){
tmp = v[k++]; tmp = v[k++];
if (i == j) tmp -= scalar; if (i == j) tmp -= scalar;
sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag(); sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
} }
}
return std::sqrt(sum); return std::sqrt(sum);
} }
/***************************************************************************//**
* for this real double-precision symmetric matrix \f$S\f$ stored in packed form,
* real scalar value \f$\alpha\f$ and real double-precision symmetric matrix \f$T\f$, compute
// axpy: S = S * a * \f[S \leftarrow \alpha T + S\f]
******************************************************************************/
template<> template<>
void NRSMat<double>::axpy(const double alpha, const NRSMat<double> & x) void NRSMat<double>::axpy(const double alpha, const NRSMat<double> &x) {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != x.nn) laerror("axpy of incompatible SMats"); if(nn != x.nn) laerror("incompatible dimensions in void NRSMat<double>::axpy(const double, const NRSMat<double>&)");
#endif #endif
SAME_LOC(*this, x);
copyonwrite(); copyonwrite();
cblas_daxpy(NN2, alpha, x.v, 1, v, 1); #ifdef CUDALA
if(location == cpu){
#endif
cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
#ifdef CUDALA
}else{
cublasDaxpy(NN2, alpha, x.v, 1, v, 1);
TEST_CUBLAS("cublasDaxpy");
}
#endif
} }
/***************************************************************************//**
* for this complex double-precision hermitian matrix \f$H\f$ stored in packed form,
* complex scalar value \f$\alpha\f$ and complex double-precision hermitian matrix \f$G\f$, compute
* \f[H \leftarrow \alpha G + H\f]
******************************************************************************/
template<> template<>
void NRSMat< complex<double> >::axpy(const complex<double> alpha, void NRSMat<complex<double> >::axpy(const complex<double> alpha, const NRSMat<complex<double> > & x) {
const NRSMat< complex<double> > & x)
{
#ifdef DEBUG #ifdef DEBUG
if (nn != x.nn) laerror("axpy of incompatible SMats"); if(nn != x.nn) laerror("incompatible dimensions in void NRSMat<complex<double> >::axpy(const complex<double> , const NRSMat<complex<double> >&)");
#endif #endif
SAME_LOC(*this, x);
copyonwrite(); copyonwrite();
cblas_zaxpy(nn, &alpha, x.v, 1, v, 1); #ifdef CUDALA
if(location == cpu){
#endif
cblas_zaxpy(nn, &alpha, x.v, 1, v, 1);
#ifdef CUDALA
}else{
const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag());
cublasZaxpy(NN2, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZaxpy");
}
#endif
} }
//complex from real /***************************************************************************//**
* create hermitian matrix \f$H\f$ from given real double-precision symmetric
* matrix \f$S\f$
* @param[in] rhs real double-precision symmetric matrix \f$S\f$
* @param[in] imagpart flag determining whether \f$S\f$ should correspond to the real or imaginary part of \f$H\f$
******************************************************************************/
template<> template<>
NRSMat<complex<double> >::NRSMat(const NRSMat<double> &rhs, bool imagpart) NRSMat<complex<double> >::NRSMat(const NRSMat<double> &rhs, bool imagpart): nn(rhs.nrows()), count(new int(1)) {
: nn(rhs.nrows()), v(new complex<double>[rhs.nrows()*(rhs.nrows()+1)/2]), count(new int(1)) //inconsistent in general case?
{ const int nnp1 = nn*(nn + 1)/2;
memset(v,0,nn*(nn+1)/2*sizeof(complex<double>)); #ifdef CUDALA
cblas_dcopy(nn*(nn+1)/2,&rhs(0,0),1,((double *)v) + (imagpart?1:0),2); location = rhs.getlocation();
if(location == cpu){
#endif
v = new complex<double>[nnp1];
memset(v, 0, nnp1*sizeof(complex<double>));
cblas_dcopy(nnp1, &rhs(0, 0), 1, ((double *)v) + (imagpart?1:0), 2);
#ifdef CUDALA
}else{
v = (complex<double>*) gpualloc(nnp1*sizeof(complex<double>));
complex<double> *_val = gpuputcomplex(CZERO);
cublasZcopy(nnp1, (cuDoubleComplex*)_val, 0, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZcopy");
gpufree(_val);
cublasDcopy(nnp1, (double*)(&rhs(0,0)), 1, ((double*)v) + (imagpart?1:0), 2);
TEST_CUBLAS("cublasDcopy");
}
#endif
} }
/***************************************************************************//**
//some template specializations leading to BLAS/CUBLAS calls * forced instantization in the corresponding object file
******************************************************************************/
//////////////////////////////////////////////////////////////////////////////
////// forced instantization in the corresponding object file
template class NRSMat<double>; template class NRSMat<double>;
template class NRSMat< complex<double> >; template class NRSMat<complex<double> >;
template class NRSMat<long long>; template class NRSMat<long long>;
template class NRSMat<long>; template class NRSMat<long>;

1349
smat.h

File diff suppressed because it is too large Load Diff

1245
vec.cc

File diff suppressed because it is too large Load Diff

1808
vec.h

File diff suppressed because it is too large Load Diff