cuda
This commit is contained in:
parent
e8cbf9e5fb
commit
e580467e5a
66
cuda_la.cc
66
cuda_la.cc
@ -1,3 +1,6 @@
|
|||||||
|
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
|
||||||
|
/*******************************************************************************
|
||||||
|
*******************************************************************************/
|
||||||
#include "la_traits.h"
|
#include "la_traits.h"
|
||||||
#include "cuda_la.h"
|
#include "cuda_la.h"
|
||||||
|
|
||||||
@ -7,64 +10,55 @@ namespace LA {
|
|||||||
|
|
||||||
GPUID DEFAULT_LOC = cpu;
|
GPUID DEFAULT_LOC = cpu;
|
||||||
|
|
||||||
void set_default_loc(const GPUID loc)
|
void set_default_loc(const GPUID loc){
|
||||||
{
|
|
||||||
DEFAULT_LOC = loc;
|
DEFAULT_LOC = loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *gpualloc(size_t size)
|
void *gpualloc(size_t size){
|
||||||
{
|
|
||||||
cublasStatus status;
|
|
||||||
void *ptr = NULL;
|
void *ptr = NULL;
|
||||||
status = cublasAlloc(size,1,&ptr);
|
cublasAlloc(size, 1, &ptr);
|
||||||
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
|
TEST_CUBLAS("cublasAlloc");
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void gpufree(void *ptr)
|
void gpufree(void *ptr){
|
||||||
{
|
cublasFree(ptr);
|
||||||
cublasStatus status = cublasFree(ptr);
|
TEST_CUBLAS("cublasFree");
|
||||||
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasFree");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpuget(size_t n, size_t elsize, const void *from, void *to)
|
void gpuget(size_t n, size_t elsize, const void *from, void *to){
|
||||||
{
|
cublasGetVector(n, elsize, from, 1, to, 1);
|
||||||
cublasStatus status;
|
TEST_CUBLAS("cublasGetVector");
|
||||||
status=cublasGetVector(n,elsize,from,1,to,1);
|
|
||||||
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasGetVector");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpuput(size_t n, size_t elsize, const void *from, void *to)
|
void gpuput(size_t n, size_t elsize, const void *from, void *to){
|
||||||
{
|
cublasSetVector(n, elsize, from, 1, to, 1);
|
||||||
cublasStatus status;
|
TEST_CUBLAS("cublasSetVector");
|
||||||
status=cublasSetVector(n,elsize,from,1,to,1);
|
|
||||||
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double *gpuputdouble(const double &x)
|
double *gpuputdouble(const double &x){
|
||||||
{
|
|
||||||
cublasStatus status;
|
|
||||||
void *ptr = NULL;
|
void *ptr = NULL;
|
||||||
status = cublasAlloc(1,sizeof(double),&ptr);
|
cublasAlloc(1, sizeof(double), &ptr);
|
||||||
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
|
TEST_CUBLAS("cublasAlloc");
|
||||||
status=cublasSetVector(1,sizeof(double),&x,1,ptr,1);
|
|
||||||
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
|
cublasSetVector(1, sizeof(double), &x, 1, ptr, 1);
|
||||||
|
TEST_CUBLAS("cublasSetVector");
|
||||||
|
|
||||||
return (double *)ptr;
|
return (double *)ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
complex<double> *gpuputcomplex(const complex<double> &x)
|
complex<double> *gpuputcomplex(const complex<double> &x){
|
||||||
{
|
|
||||||
cublasStatus status;
|
|
||||||
void *ptr = NULL;
|
void *ptr = NULL;
|
||||||
status = cublasAlloc(1,sizeof(complex<double>),&ptr);
|
cublasAlloc(1, sizeof(complex<double>), &ptr);
|
||||||
if(status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasAlloc");
|
TEST_CUBLAS("cublasAlloc");
|
||||||
status=cublasSetVector(1,sizeof(complex<double>),&x,1,ptr,1);
|
|
||||||
if (status != CUBLAS_STATUS_SUCCESS) laerror("Error in cublasSetVector");
|
cublasSetVector(1, sizeof(complex<double>), &x, 1, ptr, 1);
|
||||||
|
TEST_CUBLAS("cublasSetVector");
|
||||||
|
|
||||||
return (complex<double> *)ptr;
|
return (complex<double> *)ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
51
cuda_la.h
51
cuda_la.h
@ -1,6 +1,10 @@
|
|||||||
|
//------------------------------------------------------------------------------
|
||||||
|
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
#ifndef _CUDA_LA_H
|
#ifndef _CUDA_LA_H
|
||||||
#define _CUDA_LA_H
|
#define _CUDA_LA_H
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
#undef MATPTR
|
#undef MATPTR
|
||||||
#include "cublas.h"
|
#include "cublas.h"
|
||||||
@ -13,6 +17,7 @@ namespace LA {
|
|||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
|
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
|
||||||
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
|
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
|
||||||
|
#define NOT_CPU(x) {if((x).getlocation()==cpu) laerror("Operation not implemented on CPU (yet). Use .moveto(>0) first.");}
|
||||||
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
||||||
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
||||||
#else
|
#else
|
||||||
@ -22,6 +27,16 @@ namespace LA {
|
|||||||
#define SAME_LOC3(x,y,z) {}
|
#define SAME_LOC3(x,y,z) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
#ifdef __GNUG__
|
||||||
|
#define TEST_CUBLAS(X) { if(cublasGetError() != CUBLAS_STATUS_SUCCESS){ laerror2(#X, __PRETTY_FUNCTION__); } }
|
||||||
|
#else
|
||||||
|
#define TEST_CUBLAS(X) { if(cublasGetError() != CUBLAS_STATUS_SUCCESS){ laerror2(#X, __func__); } }
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define TEST_CUBLAS(X) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef enum {undefined=-1, cpu=0, gpu1=1, gpu2=2, gpu3=3, gpu4=4} GPUID;
|
typedef enum {undefined=-1, cpu=0, gpu1=1, gpu2=2, gpu3=3, gpu4=4} GPUID;
|
||||||
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
@ -33,6 +48,7 @@ public:
|
|||||||
{
|
{
|
||||||
cublasStatus status = cublasInit();
|
cublasStatus status = cublasInit();
|
||||||
if (status != CUBLAS_STATUS_SUCCESS) laerror("Cannot init GPU for CUBLAS");
|
if (status != CUBLAS_STATUS_SUCCESS) laerror("Cannot init GPU for CUBLAS");
|
||||||
|
errno = 0;
|
||||||
}
|
}
|
||||||
~GPU_START(void)
|
~GPU_START(void)
|
||||||
{
|
{
|
||||||
@ -50,8 +66,41 @@ extern complex<double> *gpuputcomplex(const complex<double> &x);
|
|||||||
|
|
||||||
void set_default_loc(const GPUID loc);
|
void set_default_loc(const GPUID loc);
|
||||||
|
|
||||||
extern GPUID DEFAULT_LOC;
|
template <typename T>
|
||||||
|
void smart_gpu_set(size_t n, const T& val, void *gpu_to, size_t _step = 1){
|
||||||
|
void *ptr(NULL);
|
||||||
|
if(sizeof(T)%sizeof(float) != 0){ laerror("memory alignment error"); }
|
||||||
|
|
||||||
|
cublasAlloc(1, sizeof(T), &ptr);
|
||||||
|
TEST_CUBLAS("cublasAlloc");
|
||||||
|
|
||||||
|
cublasSetVector(1, sizeof(T), &val, 1, ptr, 1);
|
||||||
|
TEST_CUBLAS("cublasSetVector");
|
||||||
|
|
||||||
|
if(sizeof(T) == sizeof(float)){
|
||||||
|
cublasScopy(n, (float*)ptr, 0, ((float*)gpu_to), _step);
|
||||||
|
TEST_CUBLAS("cublasScopy");
|
||||||
|
|
||||||
|
}else if(sizeof(T) == sizeof(double)){
|
||||||
|
cublasDcopy(n, (double*)ptr, 0, ((double*)gpu_to), _step);
|
||||||
|
TEST_CUBLAS("cublasDcopy");
|
||||||
|
|
||||||
|
}else if(sizeof(T) == sizeof(complex<double>)){
|
||||||
|
cublasZcopy(n, (cuDoubleComplex*)ptr, 0, (cuDoubleComplex*)gpu_to, _step);
|
||||||
|
TEST_CUBLAS("cublasZcopy");
|
||||||
|
|
||||||
|
}else{
|
||||||
|
for(register int i=0; i<sizeof(T)/sizeof(float); i++){
|
||||||
|
cublasScopy(n, (float*)ptr + i, 0, ((float*)gpu_to) + i, sizeof(T)/sizeof(float)*_step);
|
||||||
|
TEST_CUBLAS("cublasScopy");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cublasFree(ptr);
|
||||||
|
TEST_CUBLAS("cublasFree");
|
||||||
|
}
|
||||||
|
|
||||||
|
extern GPUID DEFAULT_LOC;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
9
la.h
9
la.h
@ -44,5 +44,12 @@
|
|||||||
#include "sparsesmat.h"
|
#include "sparsesmat.h"
|
||||||
#include "vec.h"
|
#include "vec.h"
|
||||||
|
|
||||||
|
using namespace LA;
|
||||||
|
typedef NRMat<int> NRIMat;
|
||||||
|
typedef NRMat<double> NRDMat;
|
||||||
|
typedef NRMat<complex<double> > NRCMat;
|
||||||
|
typedef NRVec<int> NRIVec;
|
||||||
|
typedef NRVec<double> NRDVec;
|
||||||
|
typedef NRVec<complex<double> > NRCVec;
|
||||||
#endif /* _LA_H_ */
|
#endif /* _LA_H_ */
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
||||||
|
|
||||||
|
17
laerror.cc
17
laerror.cc
@ -42,14 +42,14 @@ bool _LA_count_check=true;
|
|||||||
|
|
||||||
extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage
|
extern "C" void _findme(void) {}; //for autoconf test we need a function with C linkage
|
||||||
|
|
||||||
void laerror(const char *s1)
|
void laerror2(const char *s1, const char *s2)
|
||||||
{
|
{
|
||||||
std::cerr << "LA:ERROR - ";
|
std::cerr << "LA:ERROR - ";
|
||||||
std::cout << "LA:ERROR - ";
|
std::cout << "LA:ERROR - ";
|
||||||
if(s1)
|
if(s1)
|
||||||
{
|
{
|
||||||
std::cerr << s1 << "\n";
|
std::cerr << s2 << ": " << s1 << "\n";
|
||||||
std::cout << s1 << "\n";
|
std::cout << s2 << ": " << s1 << "\n";
|
||||||
}
|
}
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
{
|
{
|
||||||
@ -63,9 +63,9 @@ std::cout << "CUBLAS status = "<<s<<std::endl;
|
|||||||
throw LAerror(s1);
|
throw LAerror(s1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//stub for f77 blas called from strassen routine
|
//stub for f77 blas called from strassen routine
|
||||||
extern "C" void xerbla_(const char name[6], int *n)
|
extern "C" void xerbla_(const char name[6], int *n){
|
||||||
{
|
|
||||||
char msg[1024];
|
char msg[1024];
|
||||||
strcpy(msg,"LAPACK or BLAS error in routine ");
|
strcpy(msg,"LAPACK or BLAS error in routine ");
|
||||||
strncat(msg,name,6);
|
strncat(msg,name,6);
|
||||||
@ -75,9 +75,7 @@ laerror(msg);
|
|||||||
|
|
||||||
|
|
||||||
//with atlas-cblas another error routine is necessary
|
//with atlas-cblas another error routine is necessary
|
||||||
|
extern "C" void ATL_xerbla(int p, char *rout, char *form, ...){
|
||||||
extern "C" void ATL_xerbla(int p, char *rout, char *form, ...)
|
|
||||||
{
|
|
||||||
char msg0[1024], *msg;
|
char msg0[1024], *msg;
|
||||||
va_list argptr;
|
va_list argptr;
|
||||||
va_start(argptr, form);
|
va_start(argptr, form);
|
||||||
@ -89,8 +87,7 @@ extern "C" void ATL_xerbla(int p, char *rout, char *form, ...)
|
|||||||
laerror(msg0);
|
laerror(msg0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cblas_errprn(int ierr, int info, char *form, ...)
|
int cblas_errprn(int ierr, int info, char *form, ...) {
|
||||||
{
|
|
||||||
char msg0[1024], *msg;
|
char msg0[1024], *msg;
|
||||||
va_list argptr;
|
va_list argptr;
|
||||||
va_start(argptr, form);
|
va_start(argptr, form);
|
||||||
|
11
laerror.h
11
laerror.h
@ -29,10 +29,15 @@ class LAerror
|
|||||||
LAerror(const char *s) {msg=s;};
|
LAerror(const char *s) {msg=s;};
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void laerror(const char *);
|
#ifdef __GNUG__
|
||||||
|
#define laerror(X) { LA::laerror2(X, __PRETTY_FUNCTION__); }
|
||||||
|
#else
|
||||||
|
#define laerror(X) { LA::laerror2(X, __func__); }
|
||||||
|
#endif
|
||||||
|
|
||||||
inline std::ostream & operator<<(std::ostream &s, const LAerror &x)
|
extern void laerror2(const char *, const char *);
|
||||||
{
|
|
||||||
|
inline std::ostream & operator<<(std::ostream &s, const LAerror &x) {
|
||||||
s << x.msg;
|
s << x.msg;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
145
noncblas.cc
145
noncblas.cc
@ -1,4 +1,5 @@
|
|||||||
/*
|
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
|
||||||
|
/*******************************************************************************
|
||||||
LA: linear algebra C++ interface library
|
LA: linear algebra C++ interface library
|
||||||
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
|
|
||||||
@ -14,8 +15,7 @@
|
|||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*******************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
#include "noncblas.h"
|
#include "noncblas.h"
|
||||||
#include "laerror.h"
|
#include "laerror.h"
|
||||||
@ -27,9 +27,7 @@
|
|||||||
//Level 1 - straightforward wrappers
|
//Level 1 - straightforward wrappers
|
||||||
|
|
||||||
extern "C" double FORNAME(ddot) (const FINT *n, const double *x, const FINT *incx, const double *y, const FINT *incy);
|
extern "C" double FORNAME(ddot) (const FINT *n, const double *x, const FINT *incx, const double *y, const FINT *incy);
|
||||||
double cblas_ddot(const int N, const double *X, const int incX,
|
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY){
|
||||||
const double *Y, const int incY)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -41,8 +39,7 @@ return FORNAME(ddot)(&N,X,&incX,Y,&incY);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(dscal) (const FINT *n, const double *a, double *x, const FINT *incx);
|
extern "C" void FORNAME(dscal) (const FINT *n, const double *a, double *x, const FINT *incx);
|
||||||
void cblas_dscal(const int N, const double alpha, double *X, const int incX)
|
void cblas_dscal(const int N, const double alpha, double *X, const int incX){
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -53,9 +50,7 @@ FORNAME(dscal) (&N,&alpha,X,&incX);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(dcopy) (const FINT *n, const double *x, const FINT *incx, double *y, const FINT *incy);
|
extern "C" void FORNAME(dcopy) (const FINT *n, const double *x, const FINT *incx, double *y, const FINT *incy);
|
||||||
void cblas_dcopy(const int N, const double *X, const int incX,
|
void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY){
|
||||||
double *Y, const int incY)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -67,9 +62,7 @@ FORNAME(dcopy) (&N,X,&incX,Y,&incY);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(daxpy) (const FINT *n, const double *a, const double *x, const FINT *incx, double *y, const FINT *incy);
|
extern "C" void FORNAME(daxpy) (const FINT *n, const double *a, const double *x, const FINT *incx, double *y, const FINT *incy);
|
||||||
void cblas_daxpy(const int N, const double alpha, const double *X,
|
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY){
|
||||||
const int incX, double *Y, const int incY)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -81,8 +74,7 @@ FORNAME(daxpy) (&N,&alpha,X,&incX,Y,&incY);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" double FORNAME(dnrm2) (const FINT *n, const double *x, const FINT *incx);
|
extern "C" double FORNAME(dnrm2) (const FINT *n, const double *x, const FINT *incx);
|
||||||
double cblas_dnrm2(const int N, const double *X, const int incX)
|
double cblas_dnrm2(const int N, const double *X, const int incX){
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -93,8 +85,7 @@ return FORNAME(dnrm2) (&N,X,&incX);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" double FORNAME(dasum) (const FINT *n, const double *x, const FINT *incx);
|
extern "C" double FORNAME(dasum) (const FINT *n, const double *x, const FINT *incx);
|
||||||
double cblas_dasum(const int N, const double *X, const int incX)
|
double cblas_dasum(const int N, const double *X, const int incX){
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -105,9 +96,7 @@ return FORNAME(dasum) (&N,X,&incX);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(zcopy) (const FINT *n, const void *x, const FINT *incx, void *y, const FINT *incy);
|
extern "C" void FORNAME(zcopy) (const FINT *n, const void *x, const FINT *incx, void *y, const FINT *incy);
|
||||||
void cblas_zcopy(const int N, const void *X, const int incX,
|
void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY){
|
||||||
void *Y, const int incY)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -119,9 +108,7 @@ FORNAME(zcopy) (&N,X,&incX,Y,&incY);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(zaxpy) (const FINT *n, const void *a, const void *x, const FINT *incx, void *y, const FINT *incy);
|
extern "C" void FORNAME(zaxpy) (const FINT *n, const void *a, const void *x, const FINT *incx, void *y, const FINT *incy);
|
||||||
void cblas_zaxpy(const int N, const void *alpha, const void *X,
|
void cblas_zaxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY){
|
||||||
const int incX, void *Y, const int incY)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -133,8 +120,7 @@ FORNAME(zaxpy) (&N,alpha,X,&incX,Y,&incY);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(zscal) (const FINT *n, const void *a, void *x, const FINT *incx);
|
extern "C" void FORNAME(zscal) (const FINT *n, const void *a, void *x, const FINT *incx);
|
||||||
void cblas_zscal(const int N, const void *alpha, void *X, const int incX)
|
void cblas_zscal(const int N, const void *alpha, void *X, const int incX){
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -145,8 +131,7 @@ FORNAME(zscal)(&N,alpha,X,&incX);
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(zdscal) (const FINT *n, const double *a, void *x, const FINT *incx);
|
extern "C" void FORNAME(zdscal) (const FINT *n, const double *a, void *x, const FINT *incx);
|
||||||
void cblas_zdscal(const int N, const double alpha, void *X, const int incX)
|
void cblas_zdscal(const int N, const double alpha, void *X, const int incX){
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -158,8 +143,7 @@ FORNAME(zdscal)(&N,&alpha,X,&incX);
|
|||||||
|
|
||||||
|
|
||||||
extern "C" double FORNAME(dznrm2) (const FINT *n, const void *x, const FINT *incx);
|
extern "C" double FORNAME(dznrm2) (const FINT *n, const void *x, const FINT *incx);
|
||||||
double cblas_dznrm2(const int N, const void *X, const int incX)
|
double cblas_dznrm2(const int N, const void *X, const int incX){
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -171,12 +155,8 @@ return FORNAME(dznrm2) (&N,X,&incX);
|
|||||||
|
|
||||||
|
|
||||||
//the following ones are f2c-compatible, but is it truly portable???
|
//the following ones are f2c-compatible, but is it truly portable???
|
||||||
|
|
||||||
extern "C" void FORNAME(zdotu) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
|
extern "C" void FORNAME(zdotu) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
|
||||||
|
void cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu){
|
||||||
void cblas_zdotu_sub(const int N, const void *X, const int incX,
|
|
||||||
const void *Y, const int incY, void *dotu)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -189,9 +169,7 @@ FORNAME(zdotu) (dotu,&N,X,&incX,Y,&incY);
|
|||||||
|
|
||||||
|
|
||||||
extern "C" void FORNAME(zdotc) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
|
extern "C" void FORNAME(zdotc) (void *retval, const FINT *n, const void *x, const FINT *incx, const void *y, const FINT *incy);
|
||||||
void cblas_zdotc_sub(const int N, const void *X, const int incX,
|
void cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc){
|
||||||
const void *Y, const int incY, void *dotc)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -215,8 +193,8 @@ void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
|
|||||||
const double *X, const int incX,
|
const double *X, const int incX,
|
||||||
const double beta, double *Y, const int incY)
|
const double beta, double *Y, const int incY)
|
||||||
{
|
{
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
if(Uplo!=CblasLower) LA::laerror("CblasLower uplo asserted");
|
if(Uplo!=CblasLower) laerror("CblasLower uplo asserted");
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -234,8 +212,8 @@ void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
|
|||||||
const void *X, const int incX,
|
const void *X, const int incX,
|
||||||
const void *beta, void *Y, const int incY)
|
const void *beta, void *Y, const int incY)
|
||||||
{
|
{
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
if(Uplo!=CblasLower) LA::laerror("CblasLower uplo asserted");
|
if(Uplo!=CblasLower) laerror("CblasLower uplo asserted");
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -254,7 +232,7 @@ void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N,
|
|||||||
const double alpha, const double *X, const int incX,
|
const double alpha, const double *X, const int incX,
|
||||||
const double *Y, const int incY, double *A, const int lda)
|
const double *Y, const int incY, double *A, const int lda)
|
||||||
{
|
{
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
//swap m-n, y-x
|
//swap m-n, y-x
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT mtmp=M;
|
const FINT mtmp=M;
|
||||||
@ -272,14 +250,14 @@ void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
|
|||||||
const void *alpha, const void *X, const int incX,
|
const void *alpha, const void *X, const int incX,
|
||||||
const void *Y, const int incY, void *A, const int lda)
|
const void *Y, const int incY, void *A, const int lda)
|
||||||
{
|
{
|
||||||
LA::laerror("cblas_zgerc cannot be simply converted to fortran order");
|
laerror("cblas_zgerc cannot be simply converted to fortran order");
|
||||||
}
|
}
|
||||||
|
|
||||||
void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
|
void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
|
||||||
const void *alpha, const void *X, const int incX,
|
const void *alpha, const void *X, const int incX,
|
||||||
const void *Y, const int incY, void *A, const int lda)
|
const void *Y, const int incY, void *A, const int lda)
|
||||||
{
|
{
|
||||||
LA::laerror("cblas_zgeru cannot be simply converted to fortran order");
|
laerror("cblas_zgeru cannot be simply converted to fortran order");
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void FORNAME(dgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const double *alpha, const double *a, const FINT *lda, const double *b, const FINT *ldb, const double *beta, double *c, const FINT *ldc);
|
extern "C" void FORNAME(dgemm) (const char *transa, const char *transb, const FINT *m, const FINT *n, const FINT *k, const double *alpha, const double *a, const FINT *lda, const double *b, const FINT *ldb, const double *beta, double *c, const FINT *ldc);
|
||||||
@ -289,7 +267,7 @@ void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
|
|||||||
const int lda, const double *B, const int ldb,
|
const int lda, const double *B, const int ldb,
|
||||||
const double beta, double *C, const int ldc)
|
const double beta, double *C, const int ldc)
|
||||||
{
|
{
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
//swap a-b, m-n
|
//swap a-b, m-n
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT mtmp=M;
|
const FINT mtmp=M;
|
||||||
@ -313,7 +291,7 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
|
|||||||
const int lda, const void *B, const int ldb,
|
const int lda, const void *B, const int ldb,
|
||||||
const void *beta, void *C, const int ldc)
|
const void *beta, void *C, const int ldc)
|
||||||
{
|
{
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
//swap a-b, m-n
|
//swap a-b, m-n
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT mtmp=M;
|
const FINT mtmp=M;
|
||||||
@ -364,8 +342,8 @@ void cblas_zgemv(const enum CBLAS_ORDER Order,
|
|||||||
const void *X, const int incX, const void *beta,
|
const void *X, const int incX, const void *beta,
|
||||||
void *Y, const int incY)
|
void *Y, const int incY)
|
||||||
{
|
{
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
if(TransA == CblasConjTrans) LA::laerror("zgemv with CblasConjTrans not supportted");
|
if(TransA == CblasConjTrans) laerror("zgemv with CblasConjTrans not supportted");
|
||||||
//swap n-m and toggle transposition
|
//swap n-m and toggle transposition
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT mtmp=M;
|
const FINT mtmp=M;
|
||||||
@ -380,9 +358,7 @@ FORNAME(zgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, alpha, A, &lda, X, &incX,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" FINT FORNAME(idamax) (const FINT *N, const double *DX, const FINT *INCX);
|
extern "C" FINT FORNAME(idamax) (const FINT *N, const double *DX, const FINT *INCX);
|
||||||
|
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX) {
|
||||||
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
|
|
||||||
{
|
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
const FINT ntmp=N;
|
const FINT ntmp=N;
|
||||||
const FINT incxtmp=incX;
|
const FINT incxtmp=incX;
|
||||||
@ -392,8 +368,42 @@ return (CBLAS_INDEX)FORNAME(idamax)(&N,X,&incX);
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C" FINT FORNAME(izamax) (const FINT *N, const void *DX, const FINT *INCX);
|
||||||
|
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX) {
|
||||||
|
#ifdef FORINT
|
||||||
|
const FINT ntmp=N;
|
||||||
|
const FINT incxtmp=incX;
|
||||||
|
return (CBLAS_INDEX)FORNAME(izamax)(&ntmp, X, &incxtmp);
|
||||||
|
#else
|
||||||
|
return (CBLAS_INDEX)FORNAME(izamax)(&N, X, &incX);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
extern "C" FINT FORNAME(idamin) (const FINT *N, const double *DX, const FINT *INCX);
|
||||||
|
CBLAS_INDEX cblas_idamin(const int N, const double *X, const int incX) {
|
||||||
|
#ifdef FORINT
|
||||||
|
const FINT ntmp=N;
|
||||||
|
const FINT incxtmp=incX;
|
||||||
|
return (CBLAS_INDEX)FORNAME(idamin)(&ntmp,X,&incxtmp);
|
||||||
|
#else
|
||||||
|
return (CBLAS_INDEX)FORNAME(idamin)(&N,X,&incX);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" FINT FORNAME(izamin) (const FINT *N, const void *DX, const FINT *INCX);
|
||||||
|
CBLAS_INDEX cblas_izamin(const int N, const void *X, const int incX) {
|
||||||
|
#ifdef FORINT
|
||||||
|
const FINT ntmp=N;
|
||||||
|
const FINT incxtmp=incX;
|
||||||
|
return (CBLAS_INDEX)FORNAME(izamin)(&ntmp, X, &incxtmp);
|
||||||
|
#else
|
||||||
|
return (CBLAS_INDEX)FORNAME(izamin)(&N, X, &incX);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef NONCLAPACK
|
#ifdef NONCLAPACK
|
||||||
//clapack_dgesv
|
//clapack_dgesv
|
||||||
@ -405,7 +415,7 @@ int clapack_dgesv(const enum CBLAS_ORDER Order, const int N, const int NRHS,
|
|||||||
double *B, const int ldb)
|
double *B, const int ldb)
|
||||||
{
|
{
|
||||||
FINT INFO=0;
|
FINT INFO=0;
|
||||||
if(Order!=CblasRowMajor) LA::laerror("CblasRowMajor order asserted");
|
if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
|
||||||
//B should be in the same physical order, just transpose A in place and the LU result on output
|
//B should be in the same physical order, just transpose A in place and the LU result on output
|
||||||
for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;}
|
for(int i=1; i<N; ++i) for(int j=0; j<i; ++j) {double t=A[j*lda+i]; A[j*lda+i]=A[i*lda+j]; A[i*lda+j]=t;}
|
||||||
#ifdef FORINT
|
#ifdef FORINT
|
||||||
@ -424,12 +434,27 @@ return (int)INFO;
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void cblas_dswap(const int N, double *X, const int incX,
|
|
||||||
double *Y, const int incY){
|
extern "C" void FORNAME(dswap)(const FINT *N, double *X, const FINT *incX, double *Y, const FINT *incY);
|
||||||
LA::laerror("cblas_dswap is not available... (-DNONCBLAS)");
|
void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY){
|
||||||
|
#ifdef FORINT
|
||||||
|
const FINT N_tmp = N;
|
||||||
|
const FINT incX_tmp = incX;
|
||||||
|
const FINT incY_tmp = incY;
|
||||||
|
FORNAME(dswap)(&N_tmp, X, &incX_tmp, Y, &incY_tmp);
|
||||||
|
#else
|
||||||
|
FORNAME(dswap)(&N, X, &incX, Y, &incY);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void cblas_zswap(const int N, void *X, const int incX,
|
extern "C" void FORNAME(zswap)(const FINT *N, void *X, const FINT *incX, void *Y, const FINT *incY);
|
||||||
void *Y, const int incY){
|
void cblas_zswap(const int N, void *X, const int incX, void *Y, const int incY){
|
||||||
LA::laerror("cblas_zswap is not available... (-DNONCBLAS)");
|
#ifdef FORINT
|
||||||
|
const FINT N_tmp = N;
|
||||||
|
const FINT incX_tmp = incX;
|
||||||
|
const FINT incY_tmp = incY;
|
||||||
|
FORNAME(zswap)(&N_tmp, X, &incX_tmp, Y, &incY_tmp);
|
||||||
|
#else
|
||||||
|
FORNAME(zswap)(&N, X, &incX, Y, &incY);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,11 @@ CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
|
|||||||
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
|
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
|
||||||
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
|
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
|
||||||
|
|
||||||
|
/*
|
||||||
|
CBLAS_INDEX cblas_idamin(const int N, const double *X, const int incX);
|
||||||
|
CBLAS_INDEX cblas_izamin(const int N, const void *X, const int incX);
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ===========================================================================
|
* ===========================================================================
|
||||||
* Prototypes for level 1 BLAS routines
|
* Prototypes for level 1 BLAS routines
|
||||||
|
640
smat.cc
640
smat.cc
@ -1,3 +1,6 @@
|
|||||||
|
//------------------------------------------------------------------------------
|
||||||
|
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
/*
|
/*
|
||||||
LA: linear algebra C++ interface library
|
LA: linear algebra C++ interface library
|
||||||
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
@ -25,50 +28,50 @@
|
|||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
extern ssize_t read(int, void *, size_t);
|
extern ssize_t read(int, void *, size_t);
|
||||||
extern ssize_t write(int, const void *, size_t);
|
extern ssize_t write(int, const void *, size_t);
|
||||||
}
|
}
|
||||||
// TODO
|
|
||||||
// specialize unary minus
|
|
||||||
|
|
||||||
namespace LA {
|
namespace LA {
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
/*
|
* routine for raw output
|
||||||
* * Templates first, specializations for BLAS next
|
* @param[in] fd file descriptor for output
|
||||||
*
|
* @param[in] dim number of elements intended for output
|
||||||
*/
|
* @param[in] transp reserved
|
||||||
|
* @see NRMat<T>::get(), NRSMat<T>::copyonwrite()
|
||||||
//raw I/O
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void NRSMat<T>::put(int fd, bool dim, bool transp) const
|
void NRSMat<T>::put(int fd, bool dim, bool transp) const {
|
||||||
{
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location!=cpu)
|
if(location != cpu){
|
||||||
{
|
|
||||||
NRSMat<T> tmp= *this;
|
NRSMat<T> tmp= *this;
|
||||||
tmp.moveto(cpu);
|
tmp.moveto(cpu);
|
||||||
tmp.put(fd,dim,transp);
|
tmp.put(fd,dim,transp);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if(dim)
|
if(dim){
|
||||||
{
|
|
||||||
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
|
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
|
||||||
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
|
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
|
||||||
}
|
}
|
||||||
LA_traits<T>::multiput(NN2,fd,v,dim);
|
LA_traits<T>::multiput(NN2,fd,v,dim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* routine for raw input
|
||||||
|
* @param[in] fd file descriptor for input
|
||||||
|
* @param[in] dim number of elements intended for input
|
||||||
|
* @param[in] transp reserved
|
||||||
|
* @see NRSMat<T>::put(), NRSMat<T>::copyonwrite()
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void NRSMat<T>::get(int fd, bool dim, bool transp)
|
void NRSMat<T>::get(int fd, bool dim, bool transp) {
|
||||||
{
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
if(location!=cpu)
|
if(location != cpu){
|
||||||
{
|
|
||||||
NRSMat<T> tmp;
|
NRSMat<T> tmp;
|
||||||
tmp.moveto(cpu);
|
tmp.moveto(cpu);
|
||||||
tmp.get(fd,dim,transp);
|
tmp.get(fd,dim,transp);
|
||||||
@ -80,164 +83,304 @@ if(location!=cpu)
|
|||||||
|
|
||||||
int nn0[2]; //align at least 8-byte
|
int nn0[2]; //align at least 8-byte
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if(dim)
|
if(dim){
|
||||||
{
|
|
||||||
if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
|
if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
|
||||||
resize(nn0[0]);
|
resize(nn0[0]);
|
||||||
}
|
}else{
|
||||||
else
|
|
||||||
copyonwrite();
|
copyonwrite();
|
||||||
|
}
|
||||||
LA_traits<T>::multiget(NN2,fd,v,dim);
|
LA_traits<T>::multiget(NN2,fd,v,dim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// conversion ctor, symmetrize general Mat into SMat
|
/***************************************************************************//**
|
||||||
|
* constructor symmetrizing given matrix \f$A\f$ of general type <code>T</code> yielding \f$(A+A^\mathrm{T})/2\f$
|
||||||
|
* @param[in] rhs matrix \f$A\f$
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
NRSMat<T>::NRSMat(const NRMat<T> &rhs)
|
NRSMat<T>::NRSMat(const NRMat<T> &rhs) {
|
||||||
{
|
NOT_GPU(rhs);
|
||||||
|
|
||||||
nn = rhs.nrows();
|
nn = rhs.nrows();
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.ncols()) laerror("attempt to convert non-square Mat to SMat");
|
if(nn != rhs.ncols()) laerror("attempt to convert nonsquare NRMat<T> to NRSMat<T>");
|
||||||
|
#endif
|
||||||
|
#ifdef CUDALA
|
||||||
|
location = rhs.getlocation();
|
||||||
#endif
|
#endif
|
||||||
count = new int;
|
count = new int;
|
||||||
*count = 1;
|
*count = 1;
|
||||||
v = new T[NN2];
|
v = new T[NN2];
|
||||||
int i, j, k=0;
|
int i, j, k(0);
|
||||||
for (i=0; i<nn; i++)
|
for(i=0; i<nn; i++){
|
||||||
for (j=0; j<=i;j++) v[k++] = (rhs[i][j] + rhs[j][i])/((T)2);
|
for(j=0; j<=i; j++){
|
||||||
|
v[k++] = (rhs[i][j] + rhs[j][i])/((T)2);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* zero out this symmetric matrix of general type <code>T</code> and then set
|
||||||
// assign to diagonal
|
* the diagonal elements to prescribed value
|
||||||
|
* @param[in] a scalar value to be assigned to the diagonal
|
||||||
|
* @return reference to the modified matrix
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
NRSMat<T> & NRSMat<T>::operator=(const T &a)
|
NRSMat<T> & NRSMat<T>::operator=(const T &a) {
|
||||||
{
|
NOT_GPU(*this);
|
||||||
copyonwrite();
|
copyonwrite();
|
||||||
memset(v, 0, NN2*sizeof(T));
|
memset(v, 0, NN2*sizeof(T));
|
||||||
for (int i=0; i<nn; i++) v[i*(i+1)/2+i] = a;
|
for(register int i=0; i<nn; i++) v[i*(i+1)/2 + i] = a;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
//get diagonal
|
/***************************************************************************//**
|
||||||
|
* get or divide by the diagonal of real symmetric double-precision matrix
|
||||||
|
* @param[in, out] r vector for storing the diagonal
|
||||||
|
* @param[in] divide
|
||||||
|
* \li \c false save the diagonal to vector r
|
||||||
|
* \li \c true divide the vector r by the diagonal elements element-wise
|
||||||
|
* @param[in] cache reserved
|
||||||
|
* @return
|
||||||
|
* \li <tt>divide == true</tt> NULL
|
||||||
|
* \li <tt>divide == false</tt> pointer to the first element of r
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const T* NRSMat<T>::diagonalof(NRVec<T> &r, const bool divide, bool cache) const
|
const T* NRSMat<T>::diagonalof(NRVec<T> &r, const bool divide, bool cache) const {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if(r.size()!=nn) laerror("incompatible vector in diagonalof()");
|
if(r.size() != nn) laerror("incompatible vector in const T* NRSMat<T>::diagonalof(NRVec<T> &, const bool, bool)");
|
||||||
#endif
|
#endif
|
||||||
|
NOT_GPU(*this);
|
||||||
|
SAME_LOC(*this, r);
|
||||||
|
|
||||||
r.copyonwrite();
|
r.copyonwrite();
|
||||||
|
|
||||||
if (divide)
|
if(divide){
|
||||||
for (int i=0; i<nn; i++) {T a =v[i*(i+1)/2+i]; if(a!=0.) r[i] /= a;}
|
for(register int i=0; i<nn; i++){
|
||||||
else
|
const T a = v[i*(i+1)/2+i];
|
||||||
for (int i=0; i<nn; i++) r[i] = v[i*(i+1)/2+i];
|
if(a != 0.) r[i] /= a;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(register int i=0; i<nn; i++) r[i] = v[i*(i+1)/2+i];
|
||||||
|
}
|
||||||
|
|
||||||
return divide?NULL:&r[0];
|
return divide?NULL:&r[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// unary minus
|
/***************************************************************************//**
|
||||||
|
* implements unary minus operator for this symmetric
|
||||||
|
* matrix of general type <code>T</code>
|
||||||
|
* @return modified copy of this matrix
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const NRSMat<T> NRSMat<T>::operator-() const
|
const NRSMat<T> NRSMat<T>::operator-() const {
|
||||||
{
|
NOT_GPU(*this);
|
||||||
NRSMat<T> result(nn);
|
|
||||||
for(int i=0; i<NN2; i++) result.v[i]= -v[i];
|
NRSMat<T> result(nn, getlocation());
|
||||||
|
for(register int i = 0; i<NN2; i++) result.v[i]= -v[i];
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// trace of Smat
|
/***************************************************************************//**
|
||||||
|
* implements unary minus operator for this real symmetric matrix
|
||||||
|
* @return modified copy of this matrix
|
||||||
|
******************************************************************************/
|
||||||
|
template <>
|
||||||
|
const NRSMat<double> NRSMat<double>::operator-() const {
|
||||||
|
NRSMat<double> result(nn, getlocation());
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
memcpy(result.v, v, NN2*sizeof(double));
|
||||||
|
cblas_dscal(NN2, -1., result.v, 1);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
cublasDcopy(NN2, v, 1, result.v, 1);
|
||||||
|
TEST_CUBLAS("cublasDcopy");
|
||||||
|
|
||||||
|
cublasDscal(NN2, -1., result.v, 1);
|
||||||
|
TEST_CUBLAS("cublasDscal");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* implements unary minus operator for this hermitian matrix
|
||||||
|
* @return modified copy of this matrix
|
||||||
|
******************************************************************************/
|
||||||
|
template <>
|
||||||
|
const NRSMat<complex<double> > NRSMat<complex<double> >::operator-() const {
|
||||||
|
NRSMat<complex<double> > result(nn, getlocation());
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu) {
|
||||||
|
#endif
|
||||||
|
memcpy(result.v, v, NN2*sizeof(complex<double>));
|
||||||
|
cblas_zscal(NN2, &CMONE, result.v, 1);
|
||||||
|
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
cublasZcopy(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)result.v, 1);
|
||||||
|
TEST_CUBLAS("cublasZcopy");
|
||||||
|
|
||||||
|
cublasZscal(NN2, CUMONE, (cuDoubleComplex*)result.v, 1);
|
||||||
|
TEST_CUBLAS("cublasZscal");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* @return the sum of the diagonal elements
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const T NRSMat<T>::trace() const
|
const T NRSMat<T>::trace() const {
|
||||||
{
|
NOT_GPU(*this);
|
||||||
|
|
||||||
T tmp = 0;
|
T tmp = 0;
|
||||||
for (int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
|
for(register int i=0; i<nn; i++) tmp += v[i*(i+1)/2+i];
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* fill this real symmetric matrix with
|
||||||
|
* pseudorandom numbers generated from uniform distribution
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
void NRSMat<double>::randomize(const double &x)
|
void NRSMat<double>::randomize(const double &x) {
|
||||||
{
|
NOT_GPU(*this);
|
||||||
for(int i=0; i<NN2; ++i) v[i] = x*(2.*random()/(1.+RAND_MAX) -1.);
|
|
||||||
|
for(int i=0; i<NN2; ++i){
|
||||||
|
v[i] = x*(2.*random()/(1.+RAND_MAX) -1.);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* Fill this hermitian matrix with pseudorandom numbers generated from uniform
|
||||||
|
* distribution. The real and imaginary parts are generated independently.
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
void NRSMat<complex<double> >::randomize(const double &x)
|
void NRSMat<complex<double> >::randomize(const double &x) {
|
||||||
{
|
for(register int i=0; i<NN2; ++i) v[i].real() = x*(2.*random()/(1. + RAND_MAX) -1.);
|
||||||
for(int i=0; i<NN2; ++i) v[i].real() = x*(2.*random()/(1.+RAND_MAX) -1.);
|
for(register int i=0; i<NN2; ++i) v[i].imag() = x*(2.*random()/(1. + RAND_MAX) -1.);
|
||||||
for(int i=0; i<NN2; ++i) v[i].imag() = x*(2.*random()/(1.+RAND_MAX) -1.);
|
for(register int i=0; i<nn; ++i){
|
||||||
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) if(i==j) v[i*(i+1)/2+j].imag()=0; //hermitean
|
for(register int j=0; j<=i; ++j){
|
||||||
|
if(i == j) v[i*(i+1)/2+j].imag() = 0; //hermitean
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* routine for formatted output via lawritemat
|
||||||
// write matrix to the file with specific format
|
* @param[in] file pointer to <tt>FILE</tt> structure representing the output file
|
||||||
|
* @param[in] format format specification in standard printf-like form
|
||||||
|
* @param[in] modulo
|
||||||
|
* @see lawritemat()
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const
|
void NRSMat<T>::fprintf(FILE *file, const char *format, const int modulo) const {
|
||||||
{
|
NOT_GPU(*this);
|
||||||
|
|
||||||
lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
|
lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// read matrix from the file with specific format
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* routine for formatted input via fscanf
|
||||||
|
* @param[in] f pointer to <tt>FILE</tt> structure representing the input file
|
||||||
|
* @param[in] format format specification in standard printf-like form
|
||||||
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void NRSMat<T>::fscanf(FILE *f, const char *format)
|
void NRSMat<T>::fscanf(FILE *f, const char *format) {
|
||||||
{
|
|
||||||
int n, m;
|
int n, m;
|
||||||
|
NOT_GPU(*this);
|
||||||
|
|
||||||
if (::fscanf(f,"%d %d", &n, &m) != 2)
|
if (::fscanf(f,"%d %d", &n, &m) != 2)
|
||||||
laerror("cannot read matrix dimensions in SMat::fscanf");
|
laerror("cannot read matrix dimensions in NRSMat<T>::fscanf(FILE *, const char *)");
|
||||||
if (n != m) laerror("different dimensions of SMat");
|
if (n != m) laerror("different dimensions in NRSMat<T>::fscanf(FILE *, const char *)");
|
||||||
resize(n);
|
resize(n);
|
||||||
for (int i=0; i<n; i++)
|
for (int i=0; i<n; i++)
|
||||||
for (int j=0; j<n; j++)
|
for (int j=0; j<n; j++)
|
||||||
if (::fscanf(f,format,&((*this)(i,j))) != 1)
|
if (::fscanf(f,format,&((*this)(i,j))) != 1)
|
||||||
laerror("Smat - cannot read matrix element");
|
laerror("NRSMat<T>::fscanf(FILE *, const char *) - unable to read matrix element");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/***************************************************************************//**
|
||||||
* BLAS specializations for double and complex<double>
|
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
|
||||||
*/
|
* with real double-precision dense matrix \f$A\f$
|
||||||
|
* @param[in] rhs real double-precision matrix \f$A\f$
|
||||||
|
* @return matrix produt \f$S\times{}A\f$
|
||||||
|
******************************************************************************/
|
||||||
// SMat * Mat
|
|
||||||
//NOTE: dsymm is not appropriate as it works on UNPACKED symmetric matrix
|
|
||||||
template<>
|
template<>
|
||||||
const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const
|
const NRMat<double> NRSMat<double>::operator*(const NRMat<double> &rhs) const {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
|
if(nn != rhs.nrows()) laerror("incompatible dimensions in NRMat<double> NRSMat<double>::operator*(const NRMat<double> &)");
|
||||||
|
#endif
|
||||||
|
SAME_LOC(*this, rhs);
|
||||||
|
NRMat<double> result(nn, rhs.ncols(), getlocation());
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
for(register int k = 0; k<rhs.ncols(); k++){
|
||||||
|
cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0] + k, rhs.ncols(), 0.0, result[0] + k, rhs.ncols());
|
||||||
|
}
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
for(register int k = 0; k<rhs.ncols(); k++){
|
||||||
|
cublasDspmv('U', nn, 1.0, v, rhs[0] + k, rhs.ncols(), 0.0, result[0] + k, rhs.ncols());
|
||||||
|
TEST_CUBLAS("cublasDspmv");
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
NRMat<double> result(nn, rhs.ncols());
|
|
||||||
for (int k=0; k<rhs.ncols(); k++)
|
|
||||||
cblas_dspmv(CblasRowMajor, CblasLower, nn, 1.0, v, rhs[0]+k, rhs.ncols(),
|
|
||||||
0.0, result[0]+k, rhs.ncols());
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
|
||||||
|
* with real double-precision dense matrix \f$A\f$
|
||||||
|
* @param[in] rhs real double-precision matrix \f$A\f$
|
||||||
|
* @return matrix produt \f$S\times{}A\f$
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const NRMat<complex<double> >
|
const NRMat<complex<double> >
|
||||||
NRSMat< complex<double> >::operator*(const NRMat< complex<double> > &rhs) const
|
NRSMat<complex<double> >::operator*(const NRMat<complex<double> > &rhs) const {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.nrows()) laerror("incompatible dimensions in SMat*Mat");
|
if (nn != rhs.nrows()) laerror("incompatible dimensions in NRSMat<complex<double> >::operator*(const NRMat<complex<double> > &)");
|
||||||
|
#endif
|
||||||
|
SAME_LOC(*this, rhs);
|
||||||
|
NRMat<complex<double> > result(nn, rhs.ncols(), getlocation());
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
for(register int k=0; k<rhs.ncols(); k++){
|
||||||
|
cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(), &CZERO, result[0]+k, rhs.ncols());
|
||||||
|
}
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
for(register int k = 0; k<rhs.ncols(); k++){
|
||||||
|
cublasZhpmv('U', nn,
|
||||||
|
CUONE, (cuDoubleComplex*)v, (cuDoubleComplex*)(rhs[0] + k), rhs.ncols(),
|
||||||
|
CUZERO, (cuDoubleComplex*)(result[0] + k), rhs.ncols());
|
||||||
|
|
||||||
|
TEST_CUBLAS("cublasDspmv");
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
NRMat< complex<double> > result(nn, rhs.ncols());
|
|
||||||
for (int k=0; k<rhs.ncols(); k++)
|
|
||||||
cblas_zhpmv(CblasRowMajor, CblasLower, nn, &CONE, v, rhs[0]+k, rhs.ncols(),
|
|
||||||
&CZERO, result[0]+k, rhs.ncols());
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form
|
||||||
// SMat * SMat
|
* with real double-precision symmetric matrix \f$T\f$
|
||||||
|
* @return matrix produt \f$S\times{}T\f$ (not necessarily symmetric)
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
|
const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
|
if (nn != rhs.nn) laerror("incompatible dimensions in NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &)");
|
||||||
#endif
|
#endif
|
||||||
NRMat<double> result(0.0, nn, nn);
|
NRMat<double> result(0.0, nn, nn);
|
||||||
double *p, *q;
|
double *p, *q;
|
||||||
@ -283,154 +426,293 @@ const NRMat<double> NRSMat<double>::operator*(const NRSMat<double> &rhs) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* multiply this complex double-precision symmetric matrix \f$G\f$ stored in packed form
|
||||||
|
* with complex double-precision symmetric matrix \f$H\f$
|
||||||
|
* @return matrix produt \f$G\times{}H\f$ (not necessarily symmetric)
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const NRMat<complex<double> >
|
const NRMat<complex<double> >
|
||||||
NRSMat< complex<double> >::operator*(const NRSMat< complex<double> > &rhs) const
|
NRSMat<complex<double> >::operator*(const NRSMat<complex<double> > &rhs) const {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.nn) laerror("incompatible dimensions in SMat*SMat");
|
if (nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >::operator*(const NRSMat<complex<double> > &)");
|
||||||
#endif
|
#endif
|
||||||
NRMat< complex<double> > result(0.0, nn, nn);
|
SAME_LOC(*this, rhs);
|
||||||
|
NRMat<complex<double> > result(nn, nn, getlocation());
|
||||||
NRMat<complex<double> > rhsmat(rhs);
|
NRMat<complex<double> > rhsmat(rhs);
|
||||||
result = *this * rhsmat;
|
result = *this * rhsmat;
|
||||||
return result;
|
return result;
|
||||||
// laerror("complex SMat*Smat not implemented");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* compute inner product of this real symmetric matrix \f$A\f$ with given real symmetric matrix \f$B\f$
|
||||||
// S dot S
|
* i.e. determine the value of
|
||||||
|
* \f[\sum_{i,j}A_{i,j}B_{i,j}\f]
|
||||||
|
* @param[in] rhs matrix \f$B\f$
|
||||||
|
* @return computed inner product
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const double NRSMat<double>::dot(const NRSMat<double> &rhs) const
|
const double NRSMat<double>::dot(const NRSMat<double> &rhs) const {
|
||||||
{
|
double ret(0.);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.nn) laerror("dot of incompatible SMat's");
|
if (nn != rhs.nn) laerror("incompatible dimensions in double NRSMat<double>::dot(const NRSMat<double> &)");
|
||||||
#endif
|
#endif
|
||||||
return cblas_ddot(NN2, v, 1, rhs.v, 1);
|
SAME_LOC(*this, rhs);
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
ret = cblas_ddot(NN2, v, 1, rhs.v, 1);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
ret = cublasDdot(NN2, v, 1, rhs.v, 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* compute inner product of this complex symmetric matrix \f$A\f$ with given complex symmetric matrix \f$B\f$
|
||||||
|
* i.e. determine the value of
|
||||||
|
* \f[\sum_{i,j}\overbar{A_{i,j}}B_{i,j}\f]
|
||||||
|
* @param[in] rhs matrix \f$B\f$
|
||||||
|
* @return computed inner product
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const complex<double>
|
const complex<double> NRSMat<complex<double> >::dot(const NRSMat<complex<double> > &rhs) const {
|
||||||
NRSMat< complex<double> >::dot(const NRSMat< complex<double> > &rhs) const
|
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != rhs.nn) laerror("dot of incompatible SMat's");
|
if (nn != rhs.nn) laerror("incompatible dimensions in complex<double> NRSMat<complex<double> >::dot(const NRSMat<complex<double> > &)");
|
||||||
|
#endif
|
||||||
|
complex<double> dot(0., 0.);
|
||||||
|
SAME_LOC(*this, rhs);
|
||||||
|
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
complex<double> dot;
|
|
||||||
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
|
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)(rhs.v), 1);
|
||||||
|
dot = complex<double>(cuCreal(_dot), cuCimag(_dot));
|
||||||
|
TEST_CUBLAS("cublasZdotc");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return dot;
|
return dot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* compute inner product of this real double-precision symmetric matrix \f$S\f$ of order \f$n\f$
|
||||||
|
* with given real double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$
|
||||||
|
* @param[in] rhs real double-precision vector \f$\vec{v}\f$
|
||||||
|
* @return computed inner product
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const double NRSMat<double>::dot(const NRVec<double> &rhs) const
|
const double NRSMat<double>::dot(const NRVec<double> &rhs) const {
|
||||||
{
|
double ret(0.0);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (NN2 != rhs.nn) laerror("dot of incompatible SMat's");
|
if(NN2 != rhs.nn) laerror("incompatible dimensions in double NRSMat<double>::dot(const NRVec<double> &)");
|
||||||
|
#endif
|
||||||
|
SAME_LOC(*this, rhs);
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
ret = cblas_ddot(NN2, v, 1, rhs.v, 1);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
ret = cublasDdot(NN2, v, 1, rhs.v, 1);
|
||||||
|
TEST_CUBLAS("cublasDdot");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return cblas_ddot(NN2, v, 1, rhs.v, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* compute inner product of this complex double-precision hermitian matrix \f$H\f$ of order \f$n\f$
|
||||||
|
* with given complex double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$
|
||||||
|
* @param[in] rhs complex double-precision vector \f$\vec{v}\f$
|
||||||
|
* @return computed inner product
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const complex<double>
|
const complex<double>
|
||||||
NRSMat< complex<double> >::dot(const NRVec< complex<double> > &rhs) const
|
NRSMat<complex<double> >::dot(const NRVec<complex<double> > &rhs) const {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (NN2 != rhs.nn) laerror("dot of incompatible SMat's");
|
if(NN2 != rhs.nn) laerror("incompatible dimensions in complex<double> NRSMat<complex<double> >::dot(const NRVec<complex<double> > &)");
|
||||||
|
#endif
|
||||||
|
complex<double> dot(0., 0.);
|
||||||
|
SAME_LOC(*this, rhs);
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
#endif
|
#endif
|
||||||
complex<double> dot;
|
|
||||||
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
|
cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)rhs.v, 1);
|
||||||
|
TEST_CUBLAS("cublasZdotc");
|
||||||
|
dot = complex<double>(cuCreal(_dot), cuCimag(_dot));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return dot;
|
return dot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
// norm of the matrix
|
* compute the Frobenius norm of this real double-precision symmetric matrix
|
||||||
|
* @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const double NRSMat<double>::norm(const double scalar) const
|
const double NRSMat<double>::norm(const double scalar) const {
|
||||||
{
|
if(!scalar){
|
||||||
if (!scalar) return cblas_dnrm2(NN2, v, 1);
|
double ret(0.);
|
||||||
double sum = 0;
|
#ifdef CUDALA
|
||||||
int k = 0;
|
if(location == cpu){
|
||||||
for (int i=0; i<nn; ++i)
|
#endif
|
||||||
for (int j=0; j<=i; ++j) {
|
ret = cblas_dnrm2(NN2, v, 1);
|
||||||
register double tmp;
|
#ifdef CUDALA
|
||||||
tmp = v[k++];
|
}else{
|
||||||
|
ret = cublasDnrm2(NN2, v, 1);
|
||||||
|
TEST_CUBLAS("cublasDnrm2");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
NOT_GPU(*this);
|
||||||
|
|
||||||
|
double sum(0.);
|
||||||
|
int k(0);
|
||||||
|
for(register int i=0; i<nn; ++i){
|
||||||
|
for(register int j=0; j<=i; ++j) {
|
||||||
|
register double tmp = v[k++];
|
||||||
if(i == j) tmp -= scalar;
|
if(i == j) tmp -= scalar;
|
||||||
sum += tmp*tmp;
|
sum += tmp*tmp;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return std::sqrt(sum);
|
return std::sqrt(sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* compute the Frobenius norm of this complex double-precision hermitian matrix
|
||||||
|
* @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
const double NRSMat< complex<double> >::norm(const complex<double> scalar) const
|
const double NRSMat< complex<double> >::norm(const complex<double> scalar) const {
|
||||||
{
|
if(!(scalar.real()) && !(scalar.imag())){
|
||||||
if (!(scalar.real()) && !(scalar.imag()))
|
double ret(0.);
|
||||||
return cblas_dznrm2(NN2, v, 1);
|
#ifdef CUDALA
|
||||||
double sum = 0;
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
ret = cblas_dznrm2(NN2, v, 1);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
ret = cublasDznrm2(NN2, (cuDoubleComplex*)v, 1);
|
||||||
|
TEST_CUBLAS("cublasDznrm2");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int k(0);
|
||||||
|
double sum(0.);
|
||||||
complex<double> tmp;
|
complex<double> tmp;
|
||||||
int k = 0;
|
|
||||||
for (int i=0; i<nn; ++i)
|
for(register int i=0; i<nn; ++i){
|
||||||
for (int j=0; j<=i; ++j) {
|
for(register int j=0; j<=i; ++j){
|
||||||
tmp = v[k++];
|
tmp = v[k++];
|
||||||
if (i == j) tmp -= scalar;
|
if (i == j) tmp -= scalar;
|
||||||
sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
|
sum += tmp.real()*tmp.real() + tmp.imag()*tmp.imag();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return std::sqrt(sum);
|
return std::sqrt(sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* for this real double-precision symmetric matrix \f$S\f$ stored in packed form,
|
||||||
|
* real scalar value \f$\alpha\f$ and real double-precision symmetric matrix \f$T\f$, compute
|
||||||
// axpy: S = S * a
|
* \f[S \leftarrow \alpha T + S\f]
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
void NRSMat<double>::axpy(const double alpha, const NRSMat<double> & x)
|
void NRSMat<double>::axpy(const double alpha, const NRSMat<double> &x) {
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (nn != x.nn) laerror("axpy of incompatible SMats");
|
if(nn != x.nn) laerror("incompatible dimensions in void NRSMat<double>::axpy(const double, const NRSMat<double>&)");
|
||||||
#endif
|
#endif
|
||||||
|
SAME_LOC(*this, x);
|
||||||
copyonwrite();
|
copyonwrite();
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
|
cblas_daxpy(NN2, alpha, x.v, 1, v, 1);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
cublasDaxpy(NN2, alpha, x.v, 1, v, 1);
|
||||||
|
TEST_CUBLAS("cublasDaxpy");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<>
|
|
||||||
void NRSMat< complex<double> >::axpy(const complex<double> alpha,
|
|
||||||
const NRSMat< complex<double> > & x)
|
|
||||||
{
|
|
||||||
#ifdef DEBUG
|
|
||||||
if (nn != x.nn) laerror("axpy of incompatible SMats");
|
|
||||||
#endif
|
#endif
|
||||||
copyonwrite();
|
|
||||||
cblas_zaxpy(nn, &alpha, x.v, 1, v, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//complex from real
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* for this complex double-precision hermitian matrix \f$H\f$ stored in packed form,
|
||||||
|
* complex scalar value \f$\alpha\f$ and complex double-precision hermitian matrix \f$G\f$, compute
|
||||||
|
* \f[H \leftarrow \alpha G + H\f]
|
||||||
|
******************************************************************************/
|
||||||
template<>
|
template<>
|
||||||
NRSMat<complex<double> >::NRSMat(const NRSMat<double> &rhs, bool imagpart)
|
void NRSMat<complex<double> >::axpy(const complex<double> alpha, const NRSMat<complex<double> > & x) {
|
||||||
: nn(rhs.nrows()), v(new complex<double>[rhs.nrows()*(rhs.nrows()+1)/2]), count(new int(1))
|
#ifdef DEBUG
|
||||||
{
|
if(nn != x.nn) laerror("incompatible dimensions in void NRSMat<complex<double> >::axpy(const complex<double> , const NRSMat<complex<double> >&)");
|
||||||
memset(v,0,nn*(nn+1)/2*sizeof(complex<double>));
|
#endif
|
||||||
cblas_dcopy(nn*(nn+1)/2,&rhs(0,0),1,((double *)v) + (imagpart?1:0),2);
|
SAME_LOC(*this, x);
|
||||||
|
copyonwrite();
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
cblas_zaxpy(nn, &alpha, x.v, 1, v, 1);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag());
|
||||||
|
cublasZaxpy(NN2, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1);
|
||||||
|
TEST_CUBLAS("cublasZaxpy");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* create hermitian matrix \f$H\f$ from given real double-precision symmetric
|
||||||
|
* matrix \f$S\f$
|
||||||
|
* @param[in] rhs real double-precision symmetric matrix \f$S\f$
|
||||||
|
* @param[in] imagpart flag determining whether \f$S\f$ should correspond to the real or imaginary part of \f$H\f$
|
||||||
|
******************************************************************************/
|
||||||
|
template<>
|
||||||
|
NRSMat<complex<double> >::NRSMat(const NRSMat<double> &rhs, bool imagpart): nn(rhs.nrows()), count(new int(1)) {
|
||||||
|
//inconsistent in general case?
|
||||||
|
const int nnp1 = nn*(nn + 1)/2;
|
||||||
|
#ifdef CUDALA
|
||||||
|
location = rhs.getlocation();
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
v = new complex<double>[nnp1];
|
||||||
|
memset(v, 0, nnp1*sizeof(complex<double>));
|
||||||
|
cblas_dcopy(nnp1, &rhs(0, 0), 1, ((double *)v) + (imagpart?1:0), 2);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
v = (complex<double>*) gpualloc(nnp1*sizeof(complex<double>));
|
||||||
|
|
||||||
//some template specializations leading to BLAS/CUBLAS calls
|
complex<double> *_val = gpuputcomplex(CZERO);
|
||||||
|
cublasZcopy(nnp1, (cuDoubleComplex*)_val, 0, (cuDoubleComplex*)v, 1);
|
||||||
|
TEST_CUBLAS("cublasZcopy");
|
||||||
|
gpufree(_val);
|
||||||
|
|
||||||
|
cublasDcopy(nnp1, (double*)(&rhs(0,0)), 1, ((double*)v) + (imagpart?1:0), 2);
|
||||||
|
TEST_CUBLAS("cublasDcopy");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************************//**
|
||||||
|
* forced instantization in the corresponding object file
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
******************************************************************************/
|
||||||
////// forced instantization in the corresponding object file
|
|
||||||
template class NRSMat<double>;
|
template class NRSMat<double>;
|
||||||
template class NRSMat<complex<double> >;
|
template class NRSMat<complex<double> >;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user