*** empty log message ***

This commit is contained in:
jiri 2010-09-08 13:30:20 +00:00
parent 074c943862
commit 1b85da3291
10 changed files with 49 additions and 19 deletions

View File

@ -1,3 +1,7 @@
25.06.2010 Added basic CUBLAS support for NRVec, NRMat, NRSMat
24.06.2010 Fixed a memory leak existing when MATPTR was defined
18.06.2010 added autoconf support for BLAS+LAPACK compiled with 64-bit integers and for CUBLAS
11.06.2010 interface to fortran BLAS+LAPACK compiled with 64-bit integers contributed by L. Veis
25.02.2010 linear_solve_x and multiply_by_inverse contributed by M. Sulc 25.02.2010 linear_solve_x and multiply_by_inverse contributed by M. Sulc
17.01.2010 miscellaneous extensions contributed by M. Sulc 17.01.2010 miscellaneous extensions contributed by M. Sulc
17.01.2010 bugfix in NRMat::operator *= += -= for non-square matrix 17.01.2010 bugfix in NRMat::operator *= += -= for non-square matrix

View File

@ -6,9 +6,11 @@ t_SOURCES = t.cc t2.cc
test_SOURCES = test.cc test_SOURCES = test.cc
LDADD = .libs/libla.a LDADD = .libs/libla.a
ACLOCAL_AMFLAGS = -I m4 ACLOCAL_AMFLAGS = -I m4
EXTRA_DIST = LICENSE EXTRA_DIST = LICENSE
.cu.o:
$(NVCC) -o $@ -c $< $(NVCCFLAGS)
#todo: achieve portability of the fortran calls via autoconf? #todo: achieve portability of the fortran calls via autoconf?
#use ./configure CXXFLAGS="" LDFLAGS="" to avoid defaults #use ./configure CXXFLAGS="" LDFLAGS="" to avoid defaults

View File

@ -33,6 +33,15 @@ AC_CHECK_LIB([lapack], [dgeev_],, [
]) ])
MATPTROPT=""
AC_ARG_ENABLE([matptr],[ --enable-matptr switch to double** matrix representation (CUDA incompatible) [[default=no]]],
[case "${enableval}" in
yes) MATPTROPT="-DMATPTR" ;;
no) ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-matptr]) ;;
esac],
,)
#check for optional libraries #check for optional libraries
#cblas and clapack available? #cblas and clapack available?
@ -46,10 +55,13 @@ AC_CHECK_HEADER([clapack.h],,[CLAPACKOPT=-DNONCLAPACK], AC_INCLUDES_DEFAULT)
AC_SUBST([CLAPACKOPT]) AC_SUBST([CLAPACKOPT])
#CUDA available? link with cublas and avoid cblas and clapack then... #CUDA available? link with cublas and avoid cblas and clapack then...
AC_CHECK_LIB([cublas], [cublasInit], [CUDALIBS=-lcublas CUDAOPT=-DCUDALA CBLASOPT=-DNONCBLAS CLAPACKOPT=-DNONCLAPACK CBLASLIB=""], [CUDALIB="" CUDAOPT=""]) AC_CHECK_LIB([cublas], [cublasInit], [MATPTROPT="" NVCC=nvcc NVCCFLAGS="-O -arch sm_20" CUDALIBS=-lcublas CUDAOPT=-DCUDALA CBLASOPT=-DNONCBLAS CLAPACKOPT=-DNONCLAPACK CBLASLIB=""], [CUDALIB="" CUDAOPT=""])
AC_CHECK_HEADER([cublas.h],,[CUDAOPT="" CUDALIBS=""], AC_INCLUDES_DEFAULT) AC_CHECK_HEADER([cublas.h],,[CUDAOPT="" CUDALIBS=""], AC_INCLUDES_DEFAULT)
AC_SUBST([CUDALIBS]) AC_SUBST([CUDALIBS])
AC_SUBST([CUDAOPT]) AC_SUBST([CUDAOPT])
AC_SUBST([NVCC])
AC_SUBST([NVCCFLAGS])
AC_SUBST([MATPTROPT])
#the check for traceback needs bfd to be linked into #the check for traceback needs bfd to be linked into
@ -94,16 +106,6 @@ AC_ARG_ENABLE([debug],[ --disable-debug not to perform some range-checking [[
,) ,)
AC_SUBST([DEBUGOPT]) AC_SUBST([DEBUGOPT])
MATPTROPT=""
AC_ARG_ENABLE([matptr],[ --enable-matptr switch to double** matrix representation [[default=no]]],
[case "${enableval}" in
yes) MATPTROPT="-DMATPTR" ;;
no) ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-matptr]) ;;
esac],
,)
AC_SUBST([MATPTROPT])
@ -132,4 +134,5 @@ echo "In addition, similarly you might set include and link paths for Nvidia CUB
echo "For usage examples see file t.cc. Do not forget using copyonwrite() before " echo "For usage examples see file t.cc. Do not forget using copyonwrite() before "
echo "changing individual matrix/vector elements via l.h.s. operator[] or operator() " echo "changing individual matrix/vector elements via l.h.s. operator[] or operator() "
echo "**********************************************************************************" echo "**********************************************************************************"
#echo "Use ./configure --disable-optimize CXXFLAGS="" LDFLAGS="" for a fast compile "

View File

@ -11,10 +11,12 @@
namespace LA { namespace LA {
#ifdef CUDALA #ifdef CUDALA
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");} #define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");} #define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");} #define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
#else #else
#define CPU_GPU(x,y) {}
#define NOT_GPU(x) {} #define NOT_GPU(x) {}
#define SAME_LOC(x,y) {} #define SAME_LOC(x,y) {}
#define SAME_LOC3(x,y,z) {} #define SAME_LOC3(x,y,z) {}

View File

@ -4,4 +4,10 @@
#define FORNAME(x) x #define FORNAME(x) x
#endif #endif
#define FORTRAN_INT long #ifdef LONG_FORTRAN_INT
#define FORINT
#define FINT long
#else
#undef FORINT
#define FINT int
#endif

11
mat.cc
View File

@ -355,24 +355,29 @@ NRMat<T> & NRMat<T>::operator-=(const T &a)
return *this; return *this;
} }
template <> template <>
const NRMat<double> NRMat<double>::operator-() const const NRMat<double> NRMat<double>::operator-() const
{ {
NRMat<double> result(nn, mm);
#ifdef CUDALA #ifdef CUDALA
NRMat<double> result(nn, mm, location);
if(location==cpu) if(location==cpu)
{ {
#else
NRMat<double> result(nn, mm);
#endif #endif
#ifdef MATPTR #ifdef MATPTR
for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i]; for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
#else #else
cblas_dscal(nn*mm, -1., v, 1); memcpy(result.v,v,nn*mm*sizeof(double));
cblas_dscal(nn*mm, -1., result.v, 1);
#endif #endif
#ifdef CUDALA #ifdef CUDALA
} }
else else
{ {
cublasDscal(nn*mm, -1., v, 1); cublasDcopy(nn*mm, v, 1, result.v, 1);
cublasDscal(nn*mm, -1., result.v, 1);
} }
#endif #endif
return result; return result;

1
mat.h
View File

@ -816,6 +816,7 @@ template<typename T>
void NRMat<T>::moveto(const GPUID dest) void NRMat<T>::moveto(const GPUID dest)
{ {
if(location==dest) return; if(location==dest) return;
CPU_GPU(location,dest);
location=dest; location=dest;
if(v && !count) laerror("internal inconsistency of reference counting 1"); if(v && !count) laerror("internal inconsistency of reference counting 1");

1
smat.h
View File

@ -656,6 +656,7 @@ template<typename T>
void NRSMat<T>::moveto(const GPUID dest) void NRSMat<T>::moveto(const GPUID dest)
{ {
if(location==dest) return; if(location==dest) return;
CPU_GPU(location,dest);
location=dest; location=dest;
if(v && !count) laerror("internal inconsistency of reference counting 1"); if(v && !count) laerror("internal inconsistency of reference counting 1");

View File

@ -23,6 +23,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <errno.h> #include <errno.h>
#include "bitvector.h"
#include "sparsemat.h" #include "sparsemat.h"
namespace LA { namespace LA {
@ -918,14 +919,17 @@ const_cast<SparseMat<T> *>(this)->simplify();
matel<T> *l=list; matel<T> *l=list;
typename LA_traits<T>::normtype sum(0); typename LA_traits<T>::normtype sum(0);
if(scalar!=(T)0) if(scalar!=(T)0)
{ {
if(nn!=mm) laerror("subtraction of scalar from non-square sparse matrix in norm()");
bitvector has_diagonal_element(nn); has_diagonal_element.clear();
if(symmetric) if(symmetric)
while(l) while(l)
{ {
T hlp=l->elem; T hlp=l->elem;
bool b=l->row==l->col; bool b= l->row==l->col;
if(b) hlp-=scalar; if(b) {hlp-=scalar; has_diagonal_element.set(l->row);}
typename LA_traits<T>::normtype tmp=LA_traits<T>::sqrabs(hlp); typename LA_traits<T>::normtype tmp=LA_traits<T>::sqrabs(hlp);
sum+= tmp; sum+= tmp;
if(!b) sum+=tmp; if(!b) sum+=tmp;
@ -935,10 +939,11 @@ if(scalar!=(T)0)
while(l) while(l)
{ {
T hlp=l->elem; T hlp=l->elem;
if(l->row==l->col) hlp-=scalar; if(l->row==l->col) {hlp-=scalar; has_diagonal_element.set(l->row);}
sum+= LA_traits<T>::sqrabs(hlp); sum+= LA_traits<T>::sqrabs(hlp);
l=l->next; l=l->next;
} }
sum += (nn-has_diagonal_element.population()) * LA_traits<T>::sqrabs(scalar); //add contribution of the subtracted scalar from zero non-stored diagonal elements
} }
else else
{ {

1
vec.h
View File

@ -711,6 +711,7 @@ template<typename T>
void NRVec<T>::moveto(const GPUID dest) void NRVec<T>::moveto(const GPUID dest)
{ {
if(location==dest) return; if(location==dest) return;
CPU_GPU(location,dest);
location=dest; location=dest;
if(v && !count) laerror("internal inconsistency of reference counting 1"); if(v && !count) laerror("internal inconsistency of reference counting 1");