This commit is contained in:
jiri
2010-09-08 16:27:58 +00:00
parent e8cbf9e5fb
commit e580467e5a
14 changed files with 7105 additions and 3690 deletions
+37 -43
View File
@@ -1,3 +1,6 @@
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
/*******************************************************************************
*******************************************************************************/
#include "la_traits.h" #include "la_traits.h"
#include "cuda_la.h" #include "cuda_la.h"
@@ -7,63 +10,54 @@ namespace LA {
GPUID DEFAULT_LOC = cpu; GPUID DEFAULT_LOC = cpu;
void set_default_loc(const GPUID loc) void set_default_loc(const GPUID loc){
{ DEFAULT_LOC = loc;
DEFAULT_LOC = loc;
} }
void *gpualloc(size_t size) void *gpualloc(size_t size){
{ void *ptr = NULL;
cublasStatus status; cublasAlloc(size, 1, &ptr);
void *ptr=NULL; TEST_CUBLAS("cublasAlloc");
status = cublasAlloc(size,1,&ptr); return ptr;
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
return ptr;
} }
void gpufree(void *ptr) void gpufree(void *ptr){
{ cublasFree(ptr);
cublasStatus status = cublasFree(ptr); TEST_CUBLAS("cublasFree");
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasFree");
} }
void gpuget(size_t n, size_t elsize, const void *from, void *to) void gpuget(size_t n, size_t elsize, const void *from, void *to){
{ cublasGetVector(n, elsize, from, 1, to, 1);
cublasStatus status; TEST_CUBLAS("cublasGetVector");
status=cublasGetVector(n,elsize,from,1,to,1);
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasGetVector");
} }
void gpuput(size_t n, size_t elsize, const void *from, void *to) void gpuput(size_t n, size_t elsize, const void *from, void *to){
{ cublasSetVector(n, elsize, from, 1, to, 1);
cublasStatus status; TEST_CUBLAS("cublasSetVector");
status=cublasSetVector(n,elsize,from,1,to,1);
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
} }
double *gpuputdouble(const double &x) double *gpuputdouble(const double &x){
{ void *ptr = NULL;
cublasStatus status; cublasAlloc(1, sizeof(double), &ptr);
void *ptr=NULL; TEST_CUBLAS("cublasAlloc");
status = cublasAlloc(1,sizeof(double),&ptr);
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc"); cublasSetVector(1, sizeof(double), &x, 1, ptr, 1);
status=cublasSetVector(1,sizeof(double),&x,1,ptr,1); TEST_CUBLAS("cublasSetVector");
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
return (double *)ptr; return (double *)ptr;
} }
complex<double> *gpuputcomplex(const complex<double> &x) complex<double> *gpuputcomplex(const complex<double> &x){
{ void *ptr = NULL;
cublasStatus status; cublasAlloc(1, sizeof(complex<double>), &ptr);
void *ptr=NULL; TEST_CUBLAS("cublasAlloc");
status = cublasAlloc(1,sizeof(complex<double>),&ptr);
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
status=cublasSetVector(1,sizeof(complex<double>),&x,1,ptr,1);
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
return (complex<double> *)ptr;
}
cublasSetVector(1, sizeof(complex<double>), &x, 1, ptr, 1);
TEST_CUBLAS("cublasSetVector");
return (complex<double> *)ptr;
}
} }
+50 -1
View File
@@ -1,6 +1,10 @@
//------------------------------------------------------------------------------
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
//------------------------------------------------------------------------------
#ifndef _CUDA_LA_H #ifndef _CUDA_LA_H
#define _CUDA_LA_H #define _CUDA_LA_H
#include <errno.h>
#ifdef CUDALA #ifdef CUDALA
#undef MATPTR #undef MATPTR
#include "cublas.h" #include "cublas.h"
@@ -13,6 +17,7 @@ namespace LA {
#ifdef CUDALA #ifdef CUDALA
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");} #define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");} #define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
#define NOT_CPU(x) {if((x).getlocation()==cpu) laerror("Operation not implemented on CPU (yet). Use .moveto(>0) first.");}
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");} #define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");} #define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
#else #else
@@ -22,6 +27,16 @@ namespace LA {
#define SAME_LOC3(x,y,z) {} #define SAME_LOC3(x,y,z) {}
#endif #endif
#ifdef DEBUG
#ifdef __GNUG__
#define TEST_CUBLAS(X) { if(cublasGetError() != CUBLAS_STATUS_SUCCESS){ laerror2(#X, __PRETTY_FUNCTION__); } }
#else
#define TEST_CUBLAS(X) { if(cublasGetError() != CUBLAS_STATUS_SUCCESS){ laerror2(#X, __func__); } }
#endif
#else
#define TEST_CUBLAS(X) {}
#endif
typedef enum {undefined=-1, cpu=0, gpu1=1, gpu2=2, gpu3=3, gpu4=4} GPUID; typedef enum {undefined=-1, cpu=0, gpu1=1, gpu2=2, gpu3=3, gpu4=4} GPUID;
#ifdef CUDALA #ifdef CUDALA
@@ -33,6 +48,7 @@ public:
{ {
cublasStatus status = cublasInit(); cublasStatus status = cublasInit();
if (status != CUBLAS_STATUS_SUCCESS) laerror("Cannot init GPU for CUBLAS"); if (status != CUBLAS_STATUS_SUCCESS) laerror("Cannot init GPU for CUBLAS");
errno = 0;
} }
~GPU_START(void) ~GPU_START(void)
{ {
@@ -50,8 +66,41 @@ extern complex<double> *gpuputcomplex(const complex<double> &x);
void set_default_loc(const GPUID loc); void set_default_loc(const GPUID loc);
extern GPUID DEFAULT_LOC; template <typename T>
void smart_gpu_set(size_t n, const T& val, void *gpu_to, size_t _step = 1){
void *ptr(NULL);
if(sizeof(T)%sizeof(float) != 0){ laerror("memory alignment error"); }
cublasAlloc(1, sizeof(T), &ptr);
TEST_CUBLAS("cublasAlloc");
cublasSetVector(1, sizeof(T), &val, 1, ptr, 1);
TEST_CUBLAS("cublasSetVector");
if(sizeof(T) == sizeof(float)){
cublasScopy(n, (float*)ptr, 0, ((float*)gpu_to), _step);
TEST_CUBLAS("cublasScopy");
}else if(sizeof(T) == sizeof(double)){
cublasDcopy(n, (double*)ptr, 0, ((double*)gpu_to), _step);
TEST_CUBLAS("cublasDcopy");
}else if(sizeof(T) == sizeof(complex<double>)){
cublasZcopy(n, (cuDoubleComplex*)ptr, 0, (cuDoubleComplex*)gpu_to, _step);
TEST_CUBLAS("cublasZcopy");
}else{
for(register int i=0; i<sizeof(T)/sizeof(float); i++){
cublasScopy(n, (float*)ptr + i, 0, ((float*)gpu_to) + i, sizeof(T)/sizeof(float)*_step);
TEST_CUBLAS("cublasScopy");
}
}
cublasFree(ptr);
TEST_CUBLAS("cublasFree");
}
extern GPUID DEFAULT_LOC;
#endif #endif
} }
+8 -1
View File
@@ -44,5 +44,12 @@
#include "sparsesmat.h" #include "sparsesmat.h"
#include "vec.h" #include "vec.h"
using namespace LA;
typedef NRMat<int> NRIMat;
typedef NRMat<double> NRDMat;
typedef NRMat<complex<double> > NRCMat;
typedef NRVec<int> NRIVec;
typedef NRVec<double> NRDVec;
typedef NRVec<complex<double> > NRCVec;
#endif /* _LA_H_ */ #endif /* _LA_H_ */
+1
View File
@@ -32,6 +32,7 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <iostream> #include <iostream>
#include <limits>
#include <complex> #include <complex>
+33 -36
View File
@@ -42,20 +42,20 @@ bool _LA_count_check=true;
extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage
void laerror(const char *s1) void laerror2(const char *s1, const char *s2)
{ {
std::cerr << "LA:ERROR - "; std::cerr << "LA:ERROR - ";
std::cout << "LA:ERROR - "; std::cout << "LA:ERROR - ";
if(s1) if(s1)
{ {
std::cerr << s1 << "\n"; std::cerr << s2 << ": " << s1 << "\n";
std::cout << s1 << "\n"; std::cout << s2 << ": " << s1 << "\n";
} }
#ifdef CUDALA #ifdef CUDALA
{ {
cublasStatus s=cublasGetError(); cublasStatus s = cublasGetError();
std::cerr << "CUBLAS status = "<<s<<std::endl; std::cerr << "CUBLAS status = " << s << std::endl;
std::cout << "CUBLAS status = "<<s<<std::endl; std::cout << "CUBLAS status = " << s << std::endl;
} }
#endif #endif
if(errno) perror("system error"); if(errno) perror("system error");
@@ -63,43 +63,40 @@ std::cout << "CUBLAS status = "<<s<<std::endl;
throw LAerror(s1); throw LAerror(s1);
} }
//stub for f77 blas called from strassen routine //stub for f77 blas called from strassen routine
extern "C" void xerbla_(const char name[6], int *n) extern "C" void xerbla_(const char name[6], int *n){
{ char msg[1024];
char msg[1024]; strcpy(msg,"LAPACK or BLAS error in routine ");
strcpy(msg,"LAPACK or BLAS error in routine "); strncat(msg,name,6);
strncat(msg,name,6); sprintf(msg+strlen(msg),": illegal value of parameter #%d",*n);
sprintf(msg+strlen(msg),": illegal value of parameter #%d",*n); laerror(msg);
laerror(msg);
} }
//with atlas-cblas another error routine is necessary //with atlas-cblas another error routine is necessary
extern "C" void ATL_xerbla(int p, char *rout, char *form, ...){
extern "C" void ATL_xerbla(int p, char *rout, char *form, ...) char msg0[1024], *msg;
{ va_list argptr;
char msg0[1024], *msg; va_start(argptr, form);
va_list argptr; strcpy(msg0,"ATLAS error\n");
va_start(argptr, form); msg=msg0+strlen(msg0);
strcpy(msg0,"ATLAS error\n"); if (p) {sprintf(msg, "Parameter %d to routine %s was incorrect\n", p, rout); msg+=strlen(msg);}
msg=msg0+strlen(msg0); vsprintf(msg, form, argptr);
if (p) {sprintf(msg, "Parameter %d to routine %s was incorrect\n", p, rout); msg+=strlen(msg);} va_end(argptr);
vsprintf(msg, form, argptr); laerror(msg0);
va_end(argptr);
laerror(msg0);
} }
int cblas_errprn(int ierr, int info, char *form, ...) int cblas_errprn(int ierr, int info, char *form, ...) {
{ char msg0[1024], *msg;
char msg0[1024], *msg; va_list argptr;
va_list argptr; va_start(argptr, form);
va_start(argptr, form); sprintf(msg0,"CBLAS error %d %d\n",ierr,info);
sprintf(msg0,"CBLAS error %d %d\n",ierr,info); msg=msg0+strlen(msg0);
msg=msg0+strlen(msg0); vsprintf(msg, form, argptr);
vsprintf(msg, form, argptr); va_end(argptr);
va_end(argptr); laerror(msg0);
laerror(msg0); return 0;
return 0;
} }
}//namespace }//namespace
+8 -3
View File
@@ -29,10 +29,15 @@ class LAerror
LAerror(const char *s) {msg=s;}; LAerror(const char *s) {msg=s;};
}; };
extern void laerror(const char *); #ifdef __GNUG__
#define laerror(X) { LA::laerror2(X, __PRETTY_FUNCTION__); }
#else
#define laerror(X) { LA::laerror2(X, __func__); }
#endif
inline std::ostream & operator<<(std::ostream &s, const LAerror &x) extern void laerror2(const char *, const char *);
{
inline std::ostream & operator<<(std::ostream &s, const LAerror &x) {
s << x.msg; s << x.msg;
return s; return s;
} }
+2301 -1051
View File
File diff suppressed because it is too large Load Diff
+1048 -560
View File
File diff suppressed because it is too large Load Diff
+251 -226
View File
@@ -1,4 +1,5 @@
/* /* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
/*******************************************************************************
LA: linear algebra C++ interface library LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com> Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
@@ -14,8 +15,7 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ *******************************************************************************/
#include "noncblas.h" #include "noncblas.h"
#include "laerror.h" #include "laerror.h"
@@ -27,178 +27,156 @@
//Level 1 - straightforward wrappers //Level 1 - straightforward wrappers
extern "C" double FORNAME(ddot) (const FINT *n, const double *x, const FINT *incx, const double *y, const FINT *incy); extern "C" double FORNAME(ddot) (const FINT *n, const double *x, const FINT *incx, const double *y, const FINT *incy);
double cblas_ddot(const int N, const double *X, const int incX, double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY){
const double *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
return FORNAME(ddot)(&ntmp,X,&incxtmp,Y,&incytmp); return FORNAME(ddot)(&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
return FORNAME(ddot)(&N,X,&incX,Y,&incY); return FORNAME(ddot)(&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(dscal) (const FINT *n, const double *a, double *x, const FINT *incx); extern "C" void FORNAME(dscal) (const FINT *n, const double *a, double *x, const FINT *incx);
void cblas_dscal(const int N, const double alpha, double *X, const int incX) void cblas_dscal(const int N, const double alpha, double *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
FORNAME(dscal) (&ntmp,&alpha,X,&incxtmp); FORNAME(dscal) (&ntmp,&alpha,X,&incxtmp);
#else #else
FORNAME(dscal) (&N,&alpha,X,&incX); FORNAME(dscal) (&N,&alpha,X,&incX);
#endif #endif
} }
extern "C" void FORNAME(dcopy) (const FINT *n, const double *x, const FINT *incx, double *y, const FINT *incy); extern "C" void FORNAME(dcopy) (const FINT *n, const double *x, const FINT *incx, double *y, const FINT *incy);
void cblas_dcopy(const int N, const double *X, const int incX, void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY){
double *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(dcopy) (&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(dcopy) (&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(dcopy) (&N,X,&incX,Y,&incY); FORNAME(dcopy) (&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(daxpy) (const FINT *n, const double *a, const double *x, const FINT *incx, double *y, const FINT *incy); extern "C" void FORNAME(daxpy) (const FINT *n, const double *a, const double *x, const FINT *incx, double *y, const FINT *incy);
void cblas_daxpy(const int N, const double alpha, const double *X, void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY){
const int incX, double *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(daxpy) (&ntmp,&alpha,X,&incxtmp,Y,&incytmp); FORNAME(daxpy) (&ntmp,&alpha,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(daxpy) (&N,&alpha,X,&incX,Y,&incY); FORNAME(daxpy) (&N,&alpha,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" double FORNAME(dnrm2) (const FINT *n, const double *x, const FINT *incx); extern "C" double FORNAME(dnrm2) (const FINT *n, const double *x, const FINT *incx);
double cblas_dnrm2(const int N, const double *X, const int incX) double cblas_dnrm2(const int N, const double *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return FORNAME(dnrm2) (&ntmp,X,&incxtmp); return FORNAME(dnrm2) (&ntmp,X,&incxtmp);
#else #else
return FORNAME(dnrm2) (&N,X,&incX); return FORNAME(dnrm2) (&N,X,&incX);
#endif #endif
} }
extern "C" double FORNAME(dasum) (const FINT *n, const double *x, const FINT *incx); extern "C" double FORNAME(dasum) (const FINT *n, const double *x, const FINT *incx);
double cblas_dasum(const int N, const double *X, const int incX) double cblas_dasum(const int N, const double *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return FORNAME(dasum) (&ntmp,X,&incxtmp); return FORNAME(dasum) (&ntmp,X,&incxtmp);
#else #else
return FORNAME(dasum) (&N,X,&incX); return FORNAME(dasum) (&N,X,&incX);
#endif #endif
} }
extern "C" void FORNAME(zcopy) (const FINT *n, const void *x, const FINT *incx, void *y, const FINT *incy); extern "C" void FORNAME(zcopy) (const FINT *n, const void *x, const FINT *incx, void *y, const FINT *incy);
void cblas_zcopy(const int N, const void *X, const int incX, void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY){
void *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zcopy) (&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(zcopy) (&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zcopy) (&N,X,&incX,Y,&incY); FORNAME(zcopy) (&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(zaxpy) (const FINT *n, const void *a, const void *x, const FINT *incx, void *y, const FINT *incy); extern "C" void FORNAME(zaxpy) (const FINT *n, const void *a, const void *x, const FINT *incx, void *y, const FINT *incy);
void cblas_zaxpy(const int N, const void *alpha, const void *X, void cblas_zaxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY){
const int incX, void *Y, const int incY)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zaxpy) (&ntmp,alpha,X,&incxtmp,Y,&incytmp); FORNAME(zaxpy) (&ntmp,alpha,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zaxpy) (&N,alpha,X,&incX,Y,&incY); FORNAME(zaxpy) (&N,alpha,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(zscal) (const FINT *n, const void *a, void *x, const FINT *incx); extern "C" void FORNAME(zscal) (const FINT *n, const void *a, void *x, const FINT *incx);
void cblas_zscal(const int N, const void *alpha, void *X, const int incX) void cblas_zscal(const int N, const void *alpha, void *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
FORNAME(zscal)(&ntmp,alpha,X,&incxtmp); FORNAME(zscal)(&ntmp,alpha,X,&incxtmp);
#else #else
FORNAME(zscal)(&N,alpha,X,&incX); FORNAME(zscal)(&N,alpha,X,&incX);
#endif #endif
} }
extern "C" void FORNAME(zdscal) (const FINT *n, const double *a, void *x, const FINT *incx); extern "C" void FORNAME(zdscal) (const FINT *n, const double *a, void *x, const FINT *incx);
void cblas_zdscal(const int N, const double alpha, void *X, const int incX) void cblas_zdscal(const int N, const double alpha, void *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
FORNAME(zdscal)(&ntmp,&alpha,X,&incxtmp); FORNAME(zdscal)(&ntmp,&alpha,X,&incxtmp);
#else #else
FORNAME(zdscal)(&N,&alpha,X,&incX); FORNAME(zdscal)(&N,&alpha,X,&incX);
#endif #endif
} }
extern "C" double FORNAME(dznrm2) (const FINT *n, const void *x, const FINT *incx); extern "C" double FORNAME(dznrm2) (const FINT *n, const void *x, const FINT *incx);
double cblas_dznrm2(const int N, const void *X, const int incX) double cblas_dznrm2(const int N, const void *X, const int incX){
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return FORNAME(dznrm2) (&ntmp,X,&incxtmp); return FORNAME(dznrm2) (&ntmp,X,&incxtmp);
#else #else
return FORNAME(dznrm2) (&N,X,&incX); return FORNAME(dznrm2) (&N,X,&incX);
#endif #endif
} }
//the following ones are f2c-compatible, but is it truly portable??? //the following ones are f2c-compatible, but is it truly portable???
extern "C" void FORNAME(zdotu) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy); extern "C" void FORNAME(zdotu) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
void cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu){
void cblas_zdotu_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zdotu) (dotu,&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(zdotu) (dotu,&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zdotu) (dotu,&N,X,&incX,Y,&incY); FORNAME(zdotu) (dotu,&N,X,&incX,Y,&incY);
#endif #endif
} }
extern "C" void FORNAME(zdotc) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy); extern "C" void FORNAME(zdotc) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
void cblas_zdotc_sub(const int N, const void *X, const int incX, void cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc){
const void *Y, const int incY, void *dotc)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zdotc) (dotc,&ntmp,X,&incxtmp,Y,&incytmp); FORNAME(zdotc) (dotc,&ntmp,X,&incxtmp,Y,&incytmp);
#else #else
FORNAME(zdotc) (dotc,&N,X,&incX,Y,&incY); FORNAME(zdotc) (dotc,&N,X,&incX,Y,&incY);
#endif #endif
} }
@@ -211,38 +189,38 @@ FORNAME(zdotc) (dotc,&N,X,&incX,Y,&incY);
extern "C" void FORNAME(dspmv) (const char *uplo, const FINT *n, const double *alpha, const double *ap, const double *x, const FINT *incx, const double *beta, double *y, const FINT *incy); extern "C" void FORNAME(dspmv) (const char *uplo, const FINT *n, const double *alpha, const double *ap, const double *x, const FINT *incx, const double *beta, double *y, const FINT *incy);
void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *Ap, const int N, const double alpha, const double *Ap,
const double *X, const int incX, const double *X, const int incX,
const double beta, double *Y, const int incY) const double beta, double *Y, const int incY)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
if(Uplo!=CblasLower) LA::laerror("CblasLower uplo asserted"); if(Uplo!=CblasLower) laerror("CblasLower uplo asserted");
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(dspmv) ("U",&ntmp, &alpha, Ap, X, &incxtmp, &beta, Y, &incytmp); FORNAME(dspmv) ("U",&ntmp, &alpha, Ap, X, &incxtmp, &beta, Y, &incytmp);
#else #else
FORNAME(dspmv) ("U",&N, &alpha, Ap, X, &incX, &beta, Y, &incY); FORNAME(dspmv) ("U",&N, &alpha, Ap, X, &incX, &beta, Y, &incY);
#endif #endif
} }
extern "C" void FORNAME(zhpmv) (const char *uplo, const FINT *n, const void *alpha, const void *ap, const void *x, const FINT *incx, const void *beta, void *y, const FINT *incy); extern "C" void FORNAME(zhpmv) (const char *uplo, const FINT *n, const void *alpha, const void *ap, const void *x, const FINT *incx, const void *beta, void *y, const FINT *incy);
void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *Ap, const int N, const void *alpha, const void *Ap,
const void *X, const int incX, const void *X, const int incX,
const void *beta, void *Y, const int incY) const void *beta, void *Y, const int incY)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
if(Uplo!=CblasLower) LA::laerror("CblasLower uplo asserted"); if(Uplo!=CblasLower) laerror("CblasLower uplo asserted");
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zhpmv) ("U",&ntmp, alpha, Ap, X, &incxtmp, beta, Y, &incytmp); FORNAME(zhpmv) ("U",&ntmp, alpha, Ap, X, &incxtmp, beta, Y, &incytmp);
#else #else
FORNAME(zhpmv) ("U",&N, alpha, Ap, X, &incX, beta, Y, &incY); FORNAME(zhpmv) ("U",&N, alpha, Ap, X, &incX, beta, Y, &incY);
#endif #endif
} }
@@ -251,185 +229,232 @@ FORNAME(zhpmv) ("U",&N, alpha, Ap, X, &incX, beta, Y, &incY);
extern "C" void FORNAME(dger) (const FINT *m, const FINT *n, const double *alpha, const double *x, const FINT *incx, const double *y, const FINT *incy, double *a, const FINT *lda); extern "C" void FORNAME(dger) (const FINT *m, const FINT *n, const double *alpha, const double *x, const FINT *incx, const double *y, const FINT *incy, double *a, const FINT *lda);
void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N,
const double alpha, const double *X, const int incX, const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda) const double *Y, const int incY, double *A, const int lda)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//swap m-n, y-x //swap m-n, y-x
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
const FINT ldatmp=lda; const FINT ldatmp=lda;
FORNAME(dger) (&ntmp, &mtmp, &alpha, Y, &incytmp, X, &incxtmp, A, &ldatmp); FORNAME(dger) (&ntmp, &mtmp, &alpha, Y, &incytmp, X, &incxtmp, A, &ldatmp);
#else #else
FORNAME(dger) (&N, &M, &alpha, Y, &incY, X, &incX, A, &lda); FORNAME(dger) (&N, &M, &alpha, Y, &incY, X, &incX, A, &lda);
#endif #endif
} }
void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX, const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda) const void *Y, const int incY, void *A, const int lda)
{ {
LA::laerror("cblas_zgerc cannot be simply converted to fortran order"); laerror("cblas_zgerc cannot be simply converted to fortran order");
} }
void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N, void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX, const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda) const void *Y, const int incY, void *A, const int lda)
{ {
LA::laerror("cblas_zgeru cannot be simply converted to fortran order"); laerror("cblas_zgeru cannot be simply converted to fortran order");
} }
extern "C" void FORNAME(dgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const double *alpha, const double *a, const FINT *lda, const double *b, const FINT *ldb, const double *beta, double *c, const FINT *ldc); extern "C" void FORNAME(dgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const double *alpha, const double *a, const FINT *lda, const double *b, const FINT *ldb, const double *beta, double *c, const FINT *ldc);
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A, const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb, const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc) const double beta, double *C, const int ldc)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//swap a-b, m-n //swap a-b, m-n
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ktmp=K; const FINT ktmp=K;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT ldbtmp=ldb; const FINT ldbtmp=ldb;
const FINT ldctmp=ldc; const FINT ldctmp=ldc;
FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T", FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T",
&ntmp, &mtmp, &ktmp, &alpha, B, &ldbtmp, A, &ldatmp, &beta, C, &ldctmp); &ntmp, &mtmp, &ktmp, &alpha, B, &ldbtmp, A, &ldatmp, &beta, C, &ldctmp);
#else #else
FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T", FORNAME(dgemm) (TransB==CblasNoTrans?"N":"T", TransA==CblasNoTrans?"N":"T",
&N, &M, &K, &alpha, B, &ldb, A, &lda, &beta, C, &ldc); &N, &M, &K, &alpha, B, &ldb, A, &lda, &beta, C, &ldc);
#endif #endif
} }
extern "C" void FORNAME(zgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const void *alpha, const void *a, const FINT *lda, const void *b, const FINT *ldb, const void *beta, void *c, const FINT *ldc); extern "C" void FORNAME(zgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const void *alpha, const void *a, const FINT *lda, const void *b, const FINT *ldb, const void *beta, void *c, const FINT *ldc);
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A, const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb, const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc) const void *beta, void *C, const int ldc)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//swap a-b, m-n //swap a-b, m-n
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ktmp=K; const FINT ktmp=K;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT ldbtmp=ldb; const FINT ldbtmp=ldb;
const FINT ldctmp=ldc; const FINT ldctmp=ldc;
FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
&ntmp, &mtmp, &ktmp, alpha, B, &ldbtmp, A, &ldatmp, beta, C, &ldctmp); &ntmp, &mtmp, &ktmp, alpha, B, &ldbtmp, A, &ldatmp, beta, C, &ldctmp);
#else #else
FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), FORNAME(zgemm) ( TransB==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"), TransA==CblasConjTrans?"C":(TransB==CblasNoTrans?"N":"T"),
&N, &M, &K, alpha, B, &ldb, A, &lda, beta, C, &ldc); &N, &M, &K, alpha, B, &ldb, A, &lda, beta, C, &ldc);
#endif #endif
} }
extern "C" void FORNAME(dgemv) (const char *TRANS, const FINT *M, const FINT *N, const double *ALPHA, const double *A, const FINT *LDA, const double *X, const FINT *INCX, const double *BETA, double *Y, const FINT *INCY); extern "C" void FORNAME(dgemv) (const char *TRANS, const FINT *M, const FINT *N, const double *ALPHA, const double *A, const FINT *LDA, const double *X, const FINT *INCX, const double *BETA, double *Y, const FINT *INCY);
void cblas_dgemv(const enum CBLAS_ORDER Order, void cblas_dgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const double alpha, const double *A, const int lda, const double alpha, const double *A, const int lda,
const double *X, const int incX, const double beta, const double *X, const int incX, const double beta,
double *Y, const int incY) double *Y, const int incY)
{ {
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp ); if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp );
//swap n-m and toggle transposition //swap n-m and toggle transposition
else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp ); else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, &alpha, A, &ldatmp, X, &incxtmp, &beta, Y, &incytmp );
#else #else
if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY ); if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
//swap n-m and toggle transposition //swap n-m and toggle transposition
else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY ); else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
#endif #endif
} }
extern "C" void FORNAME(zgemv) (const char *TRANS, const FINT *M, const FINT *N, const void *ALPHA, const void *A, const FINT *LDA, const void *X, const FINT *INCX, const void *BETA, void *Y, const FINT *INCY); extern "C" void FORNAME(zgemv) (const char *TRANS, const FINT *M, const FINT *N, const void *ALPHA, const void *A, const FINT *LDA, const void *X, const FINT *INCX, const void *BETA, void *Y, const FINT *INCY);
void cblas_zgemv(const enum CBLAS_ORDER Order, void cblas_zgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda, const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta, const void *X, const int incX, const void *beta,
void *Y, const int incY) void *Y, const int incY)
{ {
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
if(TransA == CblasConjTrans) LA::laerror("zgemv with CblasConjTrans not supportted"); if(TransA == CblasConjTrans) laerror("zgemv with CblasConjTrans not supportted");
//swap n-m and toggle transposition //swap n-m and toggle transposition
#ifdef FORINT #ifdef FORINT
const FINT mtmp=M; const FINT mtmp=M;
const FINT ntmp=N; const FINT ntmp=N;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT incxtmp=incX; const FINT incxtmp=incX;
const FINT incytmp=incY; const FINT incytmp=incY;
FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, alpha, A, &ldatmp, X, &incxtmp, beta, Y, &incytmp ); FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &ntmp, &mtmp, alpha, A, &ldatmp, X, &incxtmp, beta, Y, &incytmp );
#else #else
FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, alpha, A, &lda, X, &incX, beta, Y, &incY ); FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, alpha, A, &lda, X, &incX, beta, Y, &incY );
#endif #endif
} }
extern "C" FINT FORNAME(idamax) (const FINT *N, const double *DX, const FINT *INCX); extern "C" FINT FORNAME(idamax) (const FINT *N, const double *DX, const FINT *INCX);
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX) {
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
{
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT incxtmp=incX; const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(idamax)(&ntmp,X,&incxtmp); return (CBLAS_INDEX)FORNAME(idamax)(&ntmp,X,&incxtmp);
#else #else
return (CBLAS_INDEX)FORNAME(idamax)(&N,X,&incX); return (CBLAS_INDEX)FORNAME(idamax)(&N,X,&incX);
#endif #endif
} }
extern "C" FINT FORNAME(izamax) (const FINT *N, const void *DX, const FINT *INCX);
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX) {
#ifdef FORINT
const FINT ntmp=N;
const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(izamax)(&ntmp, X, &incxtmp);
#else
return (CBLAS_INDEX)FORNAME(izamax)(&N, X, &incX);
#endif
}
/*
extern "C" FINT FORNAME(idamin) (const FINT *N, const double *DX, const FINT *INCX);
CBLAS_INDEX cblas_idamin(const int N, const double *X, const int incX) {
#ifdef FORINT
const FINT ntmp=N;
const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(idamin)(&ntmp,X,&incxtmp);
#else
return (CBLAS_INDEX)FORNAME(idamin)(&N,X,&incX);
#endif
}
extern "C" FINT FORNAME(izamin) (const FINT *N, const void *DX, const FINT *INCX);
CBLAS_INDEX cblas_izamin(const int N, const void *X, const int incX) {
#ifdef FORINT
const FINT ntmp=N;
const FINT incxtmp=incX;
return (CBLAS_INDEX)FORNAME(izamin)(&ntmp, X, &incxtmp);
#else
return (CBLAS_INDEX)FORNAME(izamin)(&N, X, &incX);
#endif
}
*/
#endif #endif
#ifdef NONCLAPACK
#ifdef NONCLAPACK
//clapack_dgesv //clapack_dgesv
//allocate auxiliary storage and transpose input and output quantities to fortran/C order //allocate auxiliary storage and transpose input and output quantities to fortran/C order
extern "C" void FORNAME(dgesv) (const FINT *N, const FINT *NRHS, double *A, const FINT *LDA, FINT *IPIV, double *B, const FINT *LDB, FINT *INFO); extern "C" void FORNAME(dgesv) (const FINT *N, const FINT *NRHS, double *A, const FINT *LDA, FINT *IPIV, double *B, const FINT *LDB, FINT *INFO);
int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS, int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS,
double *A, const int lda, int *ipiv, double *A, const int lda, int *ipiv,
double *B, const int ldb) double *B, const int ldb)
{ {
FINT INFO=0; FINT INFO=0;
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted"); if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
//B should be in the same physical order, just transpose A in place and the LU result on output //B should be in the same physical order, just transpose A in place and the LU result on output
for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;} for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;}
#ifdef FORINT #ifdef FORINT
const FINT ntmp=N; const FINT ntmp=N;
const FINT nrhstmp=NRHS; const FINT nrhstmp=NRHS;
const FINT ldatmp=lda; const FINT ldatmp=lda;
const FINT ldbtmp=ldb; const FINT ldbtmp=ldb;
FINT ipivtmp=*ipiv; FINT ipivtmp=*ipiv;
FORNAME(dgesv) (&ntmp,&nrhstmp,A,&ldatmp,&ipivtmp,B,&ldbtmp,&INFO); FORNAME(dgesv) (&ntmp,&nrhstmp,A,&ldatmp,&ipivtmp,B,&ldbtmp,&INFO);
#else #else
FORNAME(dgesv) (&N,&NRHS,A,&lda,ipiv,B,&ldb,&INFO); FORNAME(dgesv) (&N,&NRHS,A,&lda,ipiv,B,&ldb,&INFO);
#endif #endif
for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;} for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;}
return (int)INFO; return (int)INFO;
} }
#endif #endif
void cblas_dswap(const int N, double *X, const int incX,
double *Y, const int incY){ extern "C" void FORNAME(dswap)(const FINT *N, double *X, const FINT *incX, double *Y, const FINT *incY);
LA::laerror("cblas_dswap is not available... (-DNONCBLAS)"); void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY){
#ifdef FORINT
const FINT N_tmp = N;
const FINT incX_tmp = incX;
const FINT incY_tmp = incY;
FORNAME(dswap)(&N_tmp, X, &incX_tmp, Y, &incY_tmp);
#else
FORNAME(dswap)(&N, X, &incX, Y, &incY);
#endif
} }
void cblas_zswap(const int N, void *X, const int incX, extern "C" void FORNAME(zswap)(const FINT *N, void *X, const FINT *incX, void *Y, const FINT *incY);
void *Y, const int incY){ void cblas_zswap(const int N, void *X, const int incX, void *Y, const int incY){
LA::laerror("cblas_zswap is not available... (-DNONCBLAS)"); #ifdef FORINT
const FINT N_tmp = N;
const FINT incX_tmp = incX;
const FINT incY_tmp = incY;
FORNAME(zswap)(&N_tmp, X, &incX_tmp, Y, &incY_tmp);
#else
FORNAME(zswap)(&N, X, &incX, Y, &incY);
#endif
} }
+5
View File
@@ -85,6 +85,11 @@ CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
/*
CBLAS_INDEX cblas_idamin(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_izamin(const int N, const void *X, const int incX);
*/
/* /*
* =========================================================================== * ===========================================================================
* Prototypes for level 1 BLAS routines * Prototypes for level 1 BLAS routines
+502 -220
View File
@@ -1,3 +1,6 @@
//------------------------------------------------------------------------------
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
//------------------------------------------------------------------------------
/* /*
LA: linear algebra C++ interface library LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com> Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
@@ -25,219 +28,359 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <errno.h> #include <errno.h>
extern "C" { extern "C" {
extern ssize_t read(int, void *, size_t); extern ssize_t read(int, void *, size_t);
extern ssize_t write(int, const void *, size_t); extern ssize_t write(int, const void *, size_t);
} }
// TODO
// specialize unary minus
namespace LA { namespace LA {
/***************************************************************************//**
/* * routine for raw output
* * Templates first, specializations for BLAS next * @param[in] fd file descriptor for output
* * @param[in] dim number of elements intended for output
*/ * @param[in] transp reserved
* @see NRMat<T>::get(), NRSMat<T>::copyonwrite()
//raw I/O ******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::put(int fd, bool dim, bool transp) const void NRSMat<T>::put(int fd, bool dim, bool transp) const {
{
#ifdef CUDALA #ifdef CUDALA
if(location!=cpu) if(location != cpu){
{ NRSMat<T> tmp= *this;
NRSMat<T> tmp= *this; tmp.moveto(cpu);
tmp.moveto(cpu); tmp.put(fd,dim,transp);
tmp.put(fd,dim,transp); return;
return; }
} #endif
errno = 0;
if(dim){
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
}
LA_traits<T>::multiput(NN2,fd,v,dim);
}
/***************************************************************************//**
* routine for raw input
* @param[in] fd file descriptor for input
* @param[in] dim number of elements intended for input
* @param[in] transp reserved
* @see NRSMat<T>::put(), NRSMat<T>::copyonwrite()
******************************************************************************/
template <typename T>
void NRSMat<T>::get(int fd, bool dim, bool transp) {
#ifdef CUDALA
if(location != cpu){
NRSMat<T> tmp;
tmp.moveto(cpu);
tmp.get(fd,dim,transp);
tmp.moveto(location);
*this = tmp;
return;
}
#endif #endif
errno=0; int nn0[2]; //align at least 8-byte
if(dim) errno = 0;
{ if(dim){
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); resize(nn0[0]);
} }else{
LA_traits<T>::multiput(NN2,fd,v,dim); copyonwrite();
}
LA_traits<T>::multiget(NN2,fd,v,dim);
} }
/***************************************************************************//**
* constructor symmetrizing given matrix \f$A\f$ of general type <code>T</code> yielding \f$(A+A^\mathrm{T})/2\f$
* @param[in] rhs matrix \f$A\f$
******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::get(int fd, bool dim, bool transp) NRSMat<T>::NRSMat(const NRMat<T> &rhs) {
{ NOT_GPU(rhs);
#ifdef CUDALA
if(location!=cpu)
{
NRSMat<T> tmp;
tmp.moveto(cpu);
tmp.get(fd,dim,transp);
tmp.moveto(location);
*this = tmp;
return;
}
#endif
int nn0[2]; //align at least 8-byte nn = rhs.nrows();
errno=0;
if(dim)
{
if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
resize(nn0[0]);
}
else
copyonwrite();
LA_traits<T>::multiget(NN2,fd,v,dim);
}
// conversion ctor, symmetrize general Mat into SMat
template <typename T>
NRSMat<T>::NRSMat(const NRMat<T> &rhs)
{
nn=rhs.nrows();
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat"); if(nn != rhs.ncols()) laerror("attempt to convert nonsquare NRMat<T> to NRSMat<T>");
#endif
#ifdef CUDALA
location = rhs.getlocation();
#endif #endif
count = new int; count = new int;
*count = 1; *count = 1;
v = new T[NN2]; v = new T[NN2];
int i, j, k=0; int i, j, k(0);
for (i=0; i<nn; i++) for(i=0; i<nn; i++){
for (j=0; j<=i;j++) v[k++] = (rhs[i][j] + rhs[j][i])/((T)2); for(j=0; j<=i; j++){
v[k++] = (rhs[i][j] + rhs[j][i])/((T)2);
}
}
} }
/***************************************************************************//**
* zero out this symmetric matrix of general type <code>T</code> and then set
// assign to diagonal * the diagonal elements to prescribed value
* @param[in] a scalar value to be assigned to the diagonal
* @return reference to the modified matrix
******************************************************************************/
template <typename T> template <typename T>
NRSMat<T> & NRSMat<T>::operator=(const T &a) NRSMat<T> & NRSMat<T>::operator=(const T &a) {
{ NOT_GPU(*this);
copyonwrite(); copyonwrite();
memset(v,0,NN2*sizeof(T)); memset(v, 0, NN2*sizeof(T));
for (int i=0; i<nn; i++) v[i*(i+1)/2+i] = a; for(register int i=0; i<nn; i++) v[i*(i+1)/2 + i] = a;
return *this; return *this;
} }
//get diagonal /***************************************************************************//**
* get or divide by the diagonal of real symmetric double-precision matrix
* @param[in, out] r vector for storing the diagonal
* @param[in] divide
* \li \c false save the diagonal to vector r
* \li \c true divide the vector r by the diagonal elements element-wise
* @param[in] cache reserved
* @return
* \li <tt>divide == true</tt> NULL
* \li <tt>divide == false</tt> pointer to the first element of r
******************************************************************************/
template <typename T> template <typename T>
const T* NRSMat<T>::diagonalof(NRVec<T> &r, const bool divide, bool cache) const const T* NRSMat<T>::diagonalof(NRVec<T> &r, const bool divide, bool cache) const {
{
#ifdef DEBUG #ifdef DEBUG
if(r.size()!=nn) laerror("incompatible vector in diagonalof()"); if(r.size() != nn) laerror("incompatible vector in const T* NRSMat<T>::diagonalof(NRVec<T> &, const bool, bool)");
#endif #endif
NOT_GPU(*this);
SAME_LOC(*this, r);
r.copyonwrite(); r.copyonwrite();
if (divide) if(divide){
for (int i=0; i<nn; i++) {T a =v[i*(i+1)/2+i]; if(a!=0.) r[i] /= a;} for(register int i=0; i<nn; i++){
else const T a = v[i*(i+1)/2+i];
for (int i=0; i<nn; i++) r[i] = v[i*(i+1)/2+i]; if(a != 0.) r[i] /= a;
return divide?NULL:&r[0]; }
}else{
for(register int i=0; i<nn; i++) r[i] = v[i*(i+1)/2+i];
}
return divide?NULL:&r[0];
} }
// unary minus /***************************************************************************//**
* implements unary minus operator for this symmetric
* matrix of general type <code>T</code>
* @return modified copy of this matrix
******************************************************************************/
template <typename T> template <typename T>
const NRSMat<T> NRSMat<T>::operator-() const const NRSMat<T> NRSMat<T>::operator-() const {
{ NOT_GPU(*this);
NRSMat<T> result(nn);
for(int i=0; i<NN2; i++) result.v[i]= -v[i]; NRSMat<T> result(nn, getlocation());
for(register int i = 0; i<NN2; i++) result.v[i]= -v[i];
return result; return result;
} }
// trace of Smat /***************************************************************************//**
* implements unary minus operator for this real symmetric matrix
* @return modified copy of this matrix
******************************************************************************/
template <>
const NRSMat<double> NRSMat<double>::operator-() const {
NRSMat<double> result(nn, getlocation());
#ifdef CUDALA
if(location == cpu){
#endif
memcpy(result.v, v, NN2*sizeof(double));
cblas_dscal(NN2, -1., result.v, 1);
#ifdef CUDALA
}else{
cublasDcopy(NN2, v, 1, result.v, 1);
TEST_CUBLAS("cublasDcopy");
cublasDscal(NN2, -1., result.v, 1);
TEST_CUBLAS("cublasDscal");
}
#endif
return result;
}
/***************************************************************************//**
* implements unary minus operator for this hermitian matrix
* @return modified copy of this matrix
******************************************************************************/
template <>
const NRSMat<complex<double> > NRSMat<complex<double> >::operator-() const {
NRSMat<complex<double> > result(nn, getlocation());
#ifdef CUDALA
if(location == cpu) {
#endif
memcpy(result.v, v, NN2*sizeof(complex<double>));
cblas_zscal(NN2, &CMONE, result.v, 1);
#ifdef CUDALA
}else{
cublasZcopy(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)result.v, 1);
TEST_CUBLAS("cublasZcopy");
cublasZscal(NN2, CUMONE, (cuDoubleComplex*)result.v, 1);
TEST_CUBLAS("cublasZscal");
}
#endif
return result;
}
/***************************************************************************//**
* @return the sum of the diagonal elements
******************************************************************************/
template <typename T> template <typename T>
const T NRSMat<T>::trace() const const T NRSMat<T>::trace() const {
{ NOT_GPU(*this);
T tmp = 0; T tmp = 0;
for (int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i]; for(register int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
return tmp; return tmp;
} }
/***************************************************************************//**
* fill this real symmetric matrix with
* pseudorandom numbers generated from uniform distribution
******************************************************************************/
template<> template<>
void NRSMat<double>::randomize(const double &x) void NRSMat<double>::randomize(const double &x) {
{ NOT_GPU(*this);
for(int i=0; i<NN2; ++i) v[i] = x*(2.*random()/(1.+RAND_MAX) -1.);
for(int i=0; i<NN2; ++i){
v[i] = x*(2.*random()/(1.+RAND_MAX) -1.);
}
} }
/***************************************************************************//**
* Fill this hermitian matrix with pseudorandom numbers generated from uniform
* distribution. The real and imaginary parts are generated independently.
******************************************************************************/
template<> template<>
void NRSMat<complex<double> >::randomize(const double &x) void NRSMat<complex<double> >::randomize(const double &x) {
{ for(register int i=0; i<NN2; ++i) v[i].real() = x*(2.*random()/(1. + RAND_MAX) -1.);
for(int i=0; i<NN2; ++i) v[i].real() = x*(2.*random()/(1.+RAND_MAX) -1.); for(register int i=0; i<NN2; ++i) v[i].imag() = x*(2.*random()/(1. + RAND_MAX) -1.);
for(int i=0; i<NN2; ++i) v[i].imag() = x*(2.*random()/(1.+RAND_MAX) -1.); for(register int i=0; i<nn; ++i){
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) if(i==j) v[i*(i+1)/2+j].imag()=0; //hermitean for(register int j=0; j<=i; ++j){
if(i == j) v[i*(i+1)/2+j].imag() = 0; //hermitean
}
}
} }
/***************************************************************************//**
* routine for formatted output via lawritemat
// write matrix to the file with specific format * @param[in] file pointer to <tt>FILE</tt> structure representing the output file
* @param[in] format format specification in standard printf-like form
* @param[in] modulo
* @see lawritemat()
******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const {
{ NOT_GPU(*this);
lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1); lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
} }
// read matrix from the file with specific format
/***************************************************************************//**
* routine for formatted input via fscanf
* @param[in] f pointer to <tt>FILE</tt> structure representing the input file
* @param[in] format format specification in standard printf-like form
******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::fscanf(FILE *f, const char *format) void NRSMat<T>::fscanf(FILE *f, const char *format) {
{
int n, m; int n, m;
if (::fscanf(f,"%d %d",&n,&m) != 2) NOT_GPU(*this);
laerror("cannot read matrix dimensions in SMat::fscanf");
if (n != m) laerror("different dimensions of SMat"); if (::fscanf(f,"%d %d", &n, &m) != 2)
laerror("cannot read matrix dimensions in NRSMat<T>::fscanf(FILE *, const char *)");
if (n != m) laerror("different dimensions in NRSMat<T>::fscanf(FILE *, const char *)");
resize(n); resize(n);
for (int i=0; i<n; i++) for (int i=0; i<n; i++)
for (int j=0; j<n; j++) for (int j=0; j<n; j++)
if (::fscanf(f,format,&((*this)(i,j))) != 1) if (::fscanf(f,format,&((*this)(i,j))) != 1)
laerror("Smat - cannot read matrix element"); laerror("NRSMat<T>::fscanf(FILE *, const char *) - unable to read matrix element");
} }
/* /***************************************************************************//**
* BLAS specializations for double and complex<double> * multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
*/ * with real double-precision dense matrix \f$A\f$
* @param[in] rhs real double-precision matrix \f$A\f$
* @return matrix produt \f$S\times{}A\f$
******************************************************************************/
// SMat * Mat
//NOTE: dsymm is not appropriate as it works on UNPACKED symmetric matrix
template<> template<>
const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat"); if(nn != rhs.nrows()) laerror("incompatible dimensions in NRMat<double> NRSMat<double>::operator*(const NRMat<double> &)");
#endif
SAME_LOC(*this, rhs);
NRMat<double> result(nn, rhs.ncols(), getlocation());
#ifdef CUDALA
if(location == cpu){
#endif
for(register int k = 0; k<rhs.ncols(); k++){
cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0] + k, rhs.ncols(), 0.0, result[0] + k, rhs.ncols());
}
#ifdef CUDALA
}else{
for(register int k = 0; k<rhs.ncols(); k++){
cublasDspmv('U', nn, 1.0, v, rhs[0] + k, rhs.ncols(), 0.0, result[0] + k, rhs.ncols());
TEST_CUBLAS("cublasDspmv");
}
}
#endif #endif
NRMat<double> result(nn, rhs.ncols());
for (int k=0; k<rhs.ncols(); k++)
cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0]+k, rhs.ncols(),
0.0, result[0]+k, rhs.ncols());
return result; return result;
} }
/***************************************************************************//**
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
* with real double-precision dense matrix \f$A\f$
* @param[in] rhs real double-precision matrix \f$A\f$
* @return matrix produt \f$S\times{}A\f$
******************************************************************************/
template<> template<>
const NRMat< complex<double> > const NRMat<complex<double> >
NRSMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const NRSMat<complex<double> >::operator*(const NRMat<complex<double> > &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat"); if (nn != rhs.nrows()) laerror("incompatible dimensions in NRSMat<complex<double> >::operator*(const NRMat<complex<double> > &)");
#endif
SAME_LOC(*this, rhs);
NRMat<complex<double> > result(nn, rhs.ncols(), getlocation());
#ifdef CUDALA
if(location == cpu){
#endif
for(register int k=0; k<rhs.ncols(); k++){
cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(), &CZERO, result[0]+k, rhs.ncols());
}
#ifdef CUDALA
}else{
for(register int k = 0; k<rhs.ncols(); k++){
cublasZhpmv('U', nn,
CUONE, (cuDoubleComplex*)v, (cuDoubleComplex*)(rhs[0] + k), rhs.ncols(),
CUZERO, (cuDoubleComplex*)(result[0] + k), rhs.ncols());
TEST_CUBLAS("cublasDspmv");
}
}
#endif #endif
NRMat< complex<double> > result(nn, rhs.ncols());
for (int k=0; k<rhs.ncols(); k++)
cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(),
&CZERO, result[0]+k, rhs.ncols());
return result; return result;
} }
/***************************************************************************//**
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
// SMat * SMat * with real double-precision symmetric matrix \f$T\f$
* @return matrix produt \f$S\times{}T\f$ (not necessarily symmetric)
******************************************************************************/
template<> template<>
const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat"); if (nn != rhs.nn) laerror("incompatible dimensions in NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &)");
#endif #endif
NRMat<double> result(0.0, nn, nn); NRMat<double> result(0.0, nn, nn);
double *p, *q; double *p, *q;
@@ -283,156 +426,295 @@ const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
} }
/***************************************************************************//**
* multiply this complex double-precision symmetric matrix \f$G\f$ stored in packed form
* with complex double-precision symmetric matrix \f$H\f$
* @return matrix produt \f$G\times{}H\f$ (not necessarily symmetric)
******************************************************************************/
template<> template<>
const NRMat< complex<double> > const NRMat<complex<double> >
NRSMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const NRSMat<complex<double> >::operator*(const NRSMat<complex<double> > &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat"); if (nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >::operator*(const NRSMat<complex<double> > &)");
#endif #endif
NRMat< complex<double> > result(0.0, nn, nn); SAME_LOC(*this, rhs);
NRMat< complex<double> > rhsmat(rhs); NRMat<complex<double> > result(nn, nn, getlocation());
NRMat<complex<double> > rhsmat(rhs);
result = *this * rhsmat; result = *this * rhsmat;
return result; return result;
// laerror("complex SMat*Smat not implemented");
} }
/***************************************************************************//**
* compute inner product of this real symmetric matrix \f$A\f$ with given real symmetric matrix \f$B\f$
// S dot S * i.e. determine the value of
* \f[\sum_{i,j}A_{i,j}B_{i,j}\f]
* @param[in] rhs matrix \f$B\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const double NRSMat<double>::dot(const NRSMat<double> &rhs) const const double NRSMat<double>::dot(const NRSMat<double> &rhs) const {
{ double ret(0.);
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible SMat's"); if (nn != rhs.nn) laerror("incompatible dimensions in double NRSMat<double>::dot(const NRSMat<double> &)");
#endif #endif
return cblas_ddot(NN2, v, 1, rhs.v, 1); SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
ret = cblas_ddot(NN2, v, 1, rhs.v, 1);
#ifdef CUDALA
}else{
ret = cublasDdot(NN2, v, 1, rhs.v, 1);
}
#endif
return ret;
} }
/***************************************************************************//**
* compute inner product of this complex symmetric matrix \f$A\f$ with given complex symmetric matrix \f$B\f$
* i.e. determine the value of
* \f[\sum_{i,j}\overbar{A_{i,j}}B_{i,j}\f]
* @param[in] rhs matrix \f$B\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const complex<double> const complex<double> NRSMat<complex<double> >::dot(const NRSMat<complex<double> > &rhs) const {
NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
{
#ifdef DEBUG #ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible SMat's"); if (nn != rhs.nn) laerror("incompatible dimensions in complex<double> NRSMat<complex<double> >::dot(const NRSMat<complex<double> > &)");
#endif
complex<double> dot(0., 0.);
SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
#ifdef CUDALA
}else{
const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)(rhs.v), 1);
dot = complex<double>(cuCreal(_dot), cuCimag(_dot));
TEST_CUBLAS("cublasZdotc");
}
#endif #endif
complex<double> dot;
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
return dot; return dot;
} }
/***************************************************************************//**
* compute inner product of this real double-precision symmetric matrix \f$S\f$ of order \f$n\f$
* with given real double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$
* @param[in] rhs real double-precision vector \f$\vec{v}\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const double NRSMat<double>::dot(const NRVec<double> &rhs) const const double NRSMat<double>::dot(const NRVec<double> &rhs) const {
{ double ret(0.0);
#ifdef DEBUG #ifdef DEBUG
if (NN2 != rhs.nn) laerror("dot of incompatible SMat's"); if(NN2 != rhs.nn) laerror("incompatible dimensions in double NRSMat<double>::dot(const NRVec<double> &)");
#endif
SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
ret = cblas_ddot(NN2, v, 1, rhs.v, 1);
#ifdef CUDALA
}else{
ret = cublasDdot(NN2, v, 1, rhs.v, 1);
TEST_CUBLAS("cublasDdot");
}
#endif #endif
return cblas_ddot(NN2, v, 1, rhs.v, 1);
} }
/***************************************************************************//**
* compute inner product of this complex double-precision hermitian matrix \f$H\f$ of order \f$n\f$
* with given complex double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$
* @param[in] rhs complex double-precision vector \f$\vec{v}\f$
* @return computed inner product
******************************************************************************/
template<> template<>
const complex<double> const complex<double>
NRSMat< complex<double> >::dot(const NRVec< complex<double> > &rhs) const NRSMat<complex<double> >::dot(const NRVec<complex<double> > &rhs) const {
{
#ifdef DEBUG #ifdef DEBUG
if (NN2 != rhs.nn) laerror("dot of incompatible SMat's"); if(NN2 != rhs.nn) laerror("incompatible dimensions in complex<double> NRSMat<complex<double> >::dot(const NRVec<complex<double> > &)");
#endif
complex<double> dot(0., 0.);
SAME_LOC(*this, rhs);
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
#ifdef CUDALA
}else{
const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)rhs.v, 1);
TEST_CUBLAS("cublasZdotc");
dot = complex<double>(cuCreal(_dot), cuCimag(_dot));
}
#endif #endif
complex<double> dot;
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
return dot; return dot;
} }
/***************************************************************************//**
// norm of the matrix * compute the Frobenius norm of this real double-precision symmetric matrix
* @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation
******************************************************************************/
template<> template<>
const double NRSMat<double>::norm(const double scalar) const const double NRSMat<double>::norm(const double scalar) const {
{ if(!scalar){
if (!scalar) return cblas_dnrm2(NN2, v, 1); double ret(0.);
double sum = 0; #ifdef CUDALA
int k = 0; if(location == cpu){
for (int i=0; i<nn; ++i) #endif
for (int j=0; j<=i; ++j) { ret = cblas_dnrm2(NN2, v, 1);
register double tmp; #ifdef CUDALA
tmp = v[k++]; }else{
if (i == j) tmp -= scalar; ret = cublasDnrm2(NN2, v, 1);
TEST_CUBLAS("cublasDnrm2");
}
#endif
return ret;
}
NOT_GPU(*this);
double sum(0.);
int k(0);
for(register int i=0; i<nn; ++i){
for(register int j=0; j<=i; ++j) {
register double tmp = v[k++];
if(i == j) tmp -= scalar;
sum += tmp*tmp; sum += tmp*tmp;
} }
}
return std::sqrt(sum); return std::sqrt(sum);
} }
/***************************************************************************//**
* compute the Frobenius norm of this complex double-precision hermitian matrix
* @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation
******************************************************************************/
template<> template<>
const double NRSMat< complex<double> >::norm(const complex<double> scalar) const const double NRSMat< complex<double> >::norm(const complex<double> scalar) const {
{ if(!(scalar.real()) && !(scalar.imag())){
if (!(scalar.real()) && !(scalar.imag())) double ret(0.);
return cblas_dznrm2(NN2, v, 1); #ifdef CUDALA
double sum = 0; if(location == cpu){
#endif
ret = cblas_dznrm2(NN2, v, 1);
#ifdef CUDALA
}else{
ret = cublasDznrm2(NN2, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasDznrm2");
}
#endif
return ret;
}
int k(0);
double sum(0.);
complex<double> tmp; complex<double> tmp;
int k = 0;
for (int i=0; i<nn; ++i) for(register int i=0; i<nn; ++i){
for (int j=0; j<=i; ++j) { for(register int j=0; j<=i; ++j){
tmp = v[k++]; tmp = v[k++];
if (i == j) tmp -= scalar; if (i == j) tmp -= scalar;
sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag(); sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
} }
}
return std::sqrt(sum); return std::sqrt(sum);
} }
/***************************************************************************//**
* for this real double-precision symmetric matrix \f$S\f$ stored in packed form,
* real scalar value \f$\alpha\f$ and real double-precision symmetric matrix \f$T\f$, compute
// axpy: S = S * a * \f[S \leftarrow \alpha T + S\f]
******************************************************************************/
template<> template<>
void NRSMat<double>::axpy(const double alpha, const NRSMat<double> & x) void NRSMat<double>::axpy(const double alpha, const NRSMat<double> &x) {
{
#ifdef DEBUG #ifdef DEBUG
if (nn != x.nn) laerror("axpy of incompatible SMats"); if(nn != x.nn) laerror("incompatible dimensions in void NRSMat<double>::axpy(const double, const NRSMat<double>&)");
#endif #endif
SAME_LOC(*this, x);
copyonwrite(); copyonwrite();
cblas_daxpy(NN2, alpha, x.v, 1, v, 1); #ifdef CUDALA
if(location == cpu){
#endif
cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
#ifdef CUDALA
}else{
cublasDaxpy(NN2, alpha, x.v, 1, v, 1);
TEST_CUBLAS("cublasDaxpy");
}
#endif
} }
/***************************************************************************//**
* for this complex double-precision hermitian matrix \f$H\f$ stored in packed form,
* complex scalar value \f$\alpha\f$ and complex double-precision hermitian matrix \f$G\f$, compute
* \f[H \leftarrow \alpha G + H\f]
******************************************************************************/
template<> template<>
void NRSMat< complex<double> >::axpy(const complex<double> alpha, void NRSMat<complex<double> >::axpy(const complex<double> alpha, const NRSMat<complex<double> > & x) {
const NRSMat< complex<double> > & x)
{
#ifdef DEBUG #ifdef DEBUG
if (nn != x.nn) laerror("axpy of incompatible SMats"); if(nn != x.nn) laerror("incompatible dimensions in void NRSMat<complex<double> >::axpy(const complex<double> , const NRSMat<complex<double> >&)");
#endif #endif
SAME_LOC(*this, x);
copyonwrite(); copyonwrite();
cblas_zaxpy(nn, &alpha, x.v, 1, v, 1); #ifdef CUDALA
if(location == cpu){
#endif
cblas_zaxpy(nn, &alpha, x.v, 1, v, 1);
#ifdef CUDALA
}else{
const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag());
cublasZaxpy(NN2, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZaxpy");
}
#endif
} }
//complex from real /***************************************************************************//**
* create hermitian matrix \f$H\f$ from given real double-precision symmetric
* matrix \f$S\f$
* @param[in] rhs real double-precision symmetric matrix \f$S\f$
* @param[in] imagpart flag determining whether \f$S\f$ should correspond to the real or imaginary part of \f$H\f$
******************************************************************************/
template<> template<>
NRSMat<complex<double> >::NRSMat(const NRSMat<double> &rhs, bool imagpart) NRSMat<complex<double> >::NRSMat(const NRSMat<double> &rhs, bool imagpart): nn(rhs.nrows()), count(new int(1)) {
: nn(rhs.nrows()), v(new complex<double>[rhs.nrows()*(rhs.nrows()+1)/2]), count(new int(1)) //inconsistent in general case?
{ const int nnp1 = nn*(nn + 1)/2;
memset(v,0,nn*(nn+1)/2*sizeof(complex<double>)); #ifdef CUDALA
cblas_dcopy(nn*(nn+1)/2,&rhs(0,0),1,((double *)v) + (imagpart?1:0),2); location = rhs.getlocation();
if(location == cpu){
#endif
v = new complex<double>[nnp1];
memset(v, 0, nnp1*sizeof(complex<double>));
cblas_dcopy(nnp1, &rhs(0, 0), 1, ((double *)v) + (imagpart?1:0), 2);
#ifdef CUDALA
}else{
v = (complex<double>*) gpualloc(nnp1*sizeof(complex<double>));
complex<double> *_val = gpuputcomplex(CZERO);
cublasZcopy(nnp1, (cuDoubleComplex*)_val, 0, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZcopy");
gpufree(_val);
cublasDcopy(nnp1, (double*)(&rhs(0,0)), 1, ((double*)v) + (imagpart?1:0), 2);
TEST_CUBLAS("cublasDcopy");
}
#endif
} }
/***************************************************************************//**
//some template specializations leading to BLAS/CUBLAS calls * forced instantization in the corresponding object file
******************************************************************************/
//////////////////////////////////////////////////////////////////////////////
////// forced instantization in the corresponding object file
template class NRSMat<double>; template class NRSMat<double>;
template class NRSMat< complex<double> >; template class NRSMat<complex<double> >;
template class NRSMat<long long>; template class NRSMat<long long>;
template class NRSMat<long>; template class NRSMat<long>;
+866 -467
View File
File diff suppressed because it is too large Load Diff
+818 -427
View File
File diff suppressed because it is too large Load Diff
+1165 -643
View File
File diff suppressed because it is too large Load Diff