*** empty log message ***
This commit is contained in:
parent
074c943862
commit
1b85da3291
@ -1,3 +1,7 @@
|
||||
25.06.2010 Added basic CUBLAS support for NRVec, NRMat, NRSMat
|
||||
24.06.2010 Fixed a memory leak existing when MATPTR was defined
|
||||
18.06.2010 added autoconf support for BLAS+LAPACK compiled with 64-bit integers and for CUBLAS
|
||||
11.06.2010 interface to fortran BLAS+LAPACK compiled with 64-bit integers contributed by L. Veis
|
||||
25.02.2010 linear_solve_x and multiply_by_inverse contributed by M. Sulc
|
||||
17.01.2010 miscellaneous extensions contributed by M. Sulc
|
||||
17.01.2010 bugfix in NRMat::operator *= += -= for non-square matrix
|
||||
|
@ -6,9 +6,11 @@ t_SOURCES = t.cc t2.cc
|
||||
test_SOURCES = test.cc
|
||||
LDADD = .libs/libla.a
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
EXTRA_DIST = LICENSE
|
||||
|
||||
.cu.o:
|
||||
$(NVCC) -o $@ -c $< $(NVCCFLAGS)
|
||||
|
||||
#todo: achieve portability of the fortran calls via autoconf?
|
||||
|
||||
#use ./configure CXXFLAGS="" LDFLAGS="" to avoid defaults
|
||||
|
25
configure.ac
25
configure.ac
@ -33,6 +33,15 @@ AC_CHECK_LIB([lapack], [dgeev_],, [
|
||||
])
|
||||
|
||||
|
||||
MATPTROPT=""
|
||||
AC_ARG_ENABLE([matptr],[ --enable-matptr switch to double** matrix representation (CUDA incompatible) [[default=no]]],
|
||||
[case "${enableval}" in
|
||||
yes) MATPTROPT="-DMATPTR" ;;
|
||||
no) ;;
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable-matptr]) ;;
|
||||
esac],
|
||||
,)
|
||||
|
||||
#check for optional libraries
|
||||
|
||||
#cblas and clapack available?
|
||||
@ -46,10 +55,13 @@ AC_CHECK_HEADER([clapack.h],,[CLAPACKOPT=-DNONCLAPACK], AC_INCLUDES_DEFAULT)
|
||||
AC_SUBST([CLAPACKOPT])
|
||||
|
||||
#CUDA available? link with cublas and avoid cblas and clapack then...
|
||||
AC_CHECK_LIB([cublas], [cublasInit], [CUDALIBS=-lcublas CUDAOPT=-DCUDALA CBLASOPT=-DNONCBLAS CLAPACKOPT=-DNONCLAPACK CBLASLIB=""], [CUDALIB="" CUDAOPT=""])
|
||||
AC_CHECK_LIB([cublas], [cublasInit], [MATPTROPT="" NVCC=nvcc NVCCFLAGS="-O -arch sm_20" CUDALIBS=-lcublas CUDAOPT=-DCUDALA CBLASOPT=-DNONCBLAS CLAPACKOPT=-DNONCLAPACK CBLASLIB=""], [CUDALIB="" CUDAOPT=""])
|
||||
AC_CHECK_HEADER([cublas.h],,[CUDAOPT="" CUDALIBS=""], AC_INCLUDES_DEFAULT)
|
||||
AC_SUBST([CUDALIBS])
|
||||
AC_SUBST([CUDAOPT])
|
||||
AC_SUBST([NVCC])
|
||||
AC_SUBST([NVCCFLAGS])
|
||||
AC_SUBST([MATPTROPT])
|
||||
|
||||
|
||||
#the check for traceback needs bfd to be linked into
|
||||
@ -94,16 +106,6 @@ AC_ARG_ENABLE([debug],[ --disable-debug not to perform some range-checking [[
|
||||
,)
|
||||
AC_SUBST([DEBUGOPT])
|
||||
|
||||
MATPTROPT=""
|
||||
AC_ARG_ENABLE([matptr],[ --enable-matptr switch to double** matrix representation [[default=no]]],
|
||||
[case "${enableval}" in
|
||||
yes) MATPTROPT="-DMATPTR" ;;
|
||||
no) ;;
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable-matptr]) ;;
|
||||
esac],
|
||||
,)
|
||||
AC_SUBST([MATPTROPT])
|
||||
|
||||
|
||||
|
||||
|
||||
@ -132,4 +134,5 @@ echo "In addition, similarly you might set include and link paths for Nvidia CUB
|
||||
echo "For usage examples see file t.cc. Do not forget using copyonwrite() before "
|
||||
echo "changing individual matrix/vector elements via l.h.s. operator[] or operator() "
|
||||
echo "**********************************************************************************"
|
||||
#echo "Use ./configure --disable-optimize CXXFLAGS="" LDFLAGS="" for a fast compile "
|
||||
|
||||
|
@ -11,10 +11,12 @@
|
||||
namespace LA {
|
||||
|
||||
#ifdef CUDALA
|
||||
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
|
||||
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
|
||||
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
||||
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
||||
#else
|
||||
#define CPU_GPU(x,y) {}
|
||||
#define NOT_GPU(x) {}
|
||||
#define SAME_LOC(x,y) {}
|
||||
#define SAME_LOC3(x,y,z) {}
|
||||
|
@ -4,4 +4,10 @@
|
||||
#define FORNAME(x) x
|
||||
#endif
|
||||
|
||||
#define FORTRAN_INT long
|
||||
#ifdef LONG_FORTRAN_INT
|
||||
#define FORINT
|
||||
#define FINT long
|
||||
#else
|
||||
#undef FORINT
|
||||
#define FINT int
|
||||
#endif
|
||||
|
11
mat.cc
11
mat.cc
@ -355,24 +355,29 @@ NRMat<T> & NRMat<T>::operator-=(const T &a)
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
const NRMat<double> NRMat<double>::operator-() const
|
||||
{
|
||||
NRMat<double> result(nn, mm);
|
||||
#ifdef CUDALA
|
||||
NRMat<double> result(nn, mm, location);
|
||||
if(location==cpu)
|
||||
{
|
||||
#else
|
||||
NRMat<double> result(nn, mm);
|
||||
#endif
|
||||
#ifdef MATPTR
|
||||
for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
|
||||
#else
|
||||
cblas_dscal(nn*mm, -1., v, 1);
|
||||
memcpy(result.v,v,nn*mm*sizeof(double));
|
||||
cblas_dscal(nn*mm, -1., result.v, 1);
|
||||
#endif
|
||||
#ifdef CUDALA
|
||||
}
|
||||
else
|
||||
{
|
||||
cublasDscal(nn*mm, -1., v, 1);
|
||||
cublasDcopy(nn*mm, v, 1, result.v, 1);
|
||||
cublasDscal(nn*mm, -1., result.v, 1);
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
|
1
mat.h
1
mat.h
@ -816,6 +816,7 @@ template<typename T>
|
||||
void NRMat<T>::moveto(const GPUID dest)
|
||||
{
|
||||
if(location==dest) return;
|
||||
CPU_GPU(location,dest);
|
||||
location=dest;
|
||||
|
||||
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
||||
|
1
smat.h
1
smat.h
@ -656,6 +656,7 @@ template<typename T>
|
||||
void NRSMat<T>::moveto(const GPUID dest)
|
||||
{
|
||||
if(location==dest) return;
|
||||
CPU_GPU(location,dest);
|
||||
location=dest;
|
||||
|
||||
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
||||
|
11
sparsemat.cc
11
sparsemat.cc
@ -23,6 +23,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include "bitvector.h"
|
||||
#include "sparsemat.h"
|
||||
|
||||
namespace LA {
|
||||
@ -918,14 +919,17 @@ const_cast<SparseMat<T> *>(this)->simplify();
|
||||
|
||||
matel<T> *l=list;
|
||||
typename LA_traits<T>::normtype sum(0);
|
||||
|
||||
if(scalar!=(T)0)
|
||||
{
|
||||
if(nn!=mm) laerror("subtraction of scalar from non-square sparse matrix in norm()");
|
||||
bitvector has_diagonal_element(nn); has_diagonal_element.clear();
|
||||
if(symmetric)
|
||||
while(l)
|
||||
{
|
||||
T hlp=l->elem;
|
||||
bool b=l->row==l->col;
|
||||
if(b) hlp-=scalar;
|
||||
bool b= l->row==l->col;
|
||||
if(b) {hlp-=scalar; has_diagonal_element.set(l->row);}
|
||||
typename LA_traits<T>::normtype tmp=LA_traits<T>::sqrabs(hlp);
|
||||
sum+= tmp;
|
||||
if(!b) sum+=tmp;
|
||||
@ -935,10 +939,11 @@ if(scalar!=(T)0)
|
||||
while(l)
|
||||
{
|
||||
T hlp=l->elem;
|
||||
if(l->row==l->col) hlp-=scalar;
|
||||
if(l->row==l->col) {hlp-=scalar; has_diagonal_element.set(l->row);}
|
||||
sum+= LA_traits<T>::sqrabs(hlp);
|
||||
l=l->next;
|
||||
}
|
||||
sum += (nn-has_diagonal_element.population()) * LA_traits<T>::sqrabs(scalar); //add contribution of the subtracted scalar from zero non-stored diagonal elements
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user