*** empty log message ***
This commit is contained in:
parent
074c943862
commit
1b85da3291
@ -1,3 +1,7 @@
|
|||||||
|
25.06.2010 Added basic CUBLAS support for NRVec, NRMat, NRSMat
|
||||||
|
24.06.2010 Fixed a memory leak existing when MATPTR was defined
|
||||||
|
18.06.2010 added autoconf support for BLAS+LAPACK compiled with 64-bit integers and for CUBLAS
|
||||||
|
11.06.2010 interface to fortran BLAS+LAPACK compiled with 64-bit integers contributed by L. Veis
|
||||||
25.02.2010 linear_solve_x and multiply_by_inverse contributed by M. Sulc
|
25.02.2010 linear_solve_x and multiply_by_inverse contributed by M. Sulc
|
||||||
17.01.2010 miscellaneous extensions contributed by M. Sulc
|
17.01.2010 miscellaneous extensions contributed by M. Sulc
|
||||||
17.01.2010 bugfix in NRMat::operator *= += -= for non-square matrix
|
17.01.2010 bugfix in NRMat::operator *= += -= for non-square matrix
|
||||||
|
@ -6,9 +6,11 @@ t_SOURCES = t.cc t2.cc
|
|||||||
test_SOURCES = test.cc
|
test_SOURCES = test.cc
|
||||||
LDADD = .libs/libla.a
|
LDADD = .libs/libla.a
|
||||||
ACLOCAL_AMFLAGS = -I m4
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
|
||||||
EXTRA_DIST = LICENSE
|
EXTRA_DIST = LICENSE
|
||||||
|
|
||||||
|
.cu.o:
|
||||||
|
$(NVCC) -o $@ -c $< $(NVCCFLAGS)
|
||||||
|
|
||||||
#todo: achieve portability of the fortran calls via autoconf?
|
#todo: achieve portability of the fortran calls via autoconf?
|
||||||
|
|
||||||
#use ./configure CXXFLAGS="" LDFLAGS="" to avoid defaults
|
#use ./configure CXXFLAGS="" LDFLAGS="" to avoid defaults
|
||||||
|
25
configure.ac
25
configure.ac
@ -33,6 +33,15 @@ AC_CHECK_LIB([lapack], [dgeev_],, [
|
|||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
|
MATPTROPT=""
|
||||||
|
AC_ARG_ENABLE([matptr],[ --enable-matptr switch to double** matrix representation (CUDA incompatible) [[default=no]]],
|
||||||
|
[case "${enableval}" in
|
||||||
|
yes) MATPTROPT="-DMATPTR" ;;
|
||||||
|
no) ;;
|
||||||
|
*) AC_MSG_ERROR([bad value ${enableval} for --enable-matptr]) ;;
|
||||||
|
esac],
|
||||||
|
,)
|
||||||
|
|
||||||
#check for optional libraries
|
#check for optional libraries
|
||||||
|
|
||||||
#cblas and clapack available?
|
#cblas and clapack available?
|
||||||
@ -46,10 +55,13 @@ AC_CHECK_HEADER([clapack.h],,[CLAPACKOPT=-DNONCLAPACK], AC_INCLUDES_DEFAULT)
|
|||||||
AC_SUBST([CLAPACKOPT])
|
AC_SUBST([CLAPACKOPT])
|
||||||
|
|
||||||
#CUDA available? link with cublas and avoid cblas and clapack then...
|
#CUDA available? link with cublas and avoid cblas and clapack then...
|
||||||
AC_CHECK_LIB([cublas], [cublasInit], [CUDALIBS=-lcublas CUDAOPT=-DCUDALA CBLASOPT=-DNONCBLAS CLAPACKOPT=-DNONCLAPACK CBLASLIB=""], [CUDALIB="" CUDAOPT=""])
|
AC_CHECK_LIB([cublas], [cublasInit], [MATPTROPT="" NVCC=nvcc NVCCFLAGS="-O -arch sm_20" CUDALIBS=-lcublas CUDAOPT=-DCUDALA CBLASOPT=-DNONCBLAS CLAPACKOPT=-DNONCLAPACK CBLASLIB=""], [CUDALIB="" CUDAOPT=""])
|
||||||
AC_CHECK_HEADER([cublas.h],,[CUDAOPT="" CUDALIBS=""], AC_INCLUDES_DEFAULT)
|
AC_CHECK_HEADER([cublas.h],,[CUDAOPT="" CUDALIBS=""], AC_INCLUDES_DEFAULT)
|
||||||
AC_SUBST([CUDALIBS])
|
AC_SUBST([CUDALIBS])
|
||||||
AC_SUBST([CUDAOPT])
|
AC_SUBST([CUDAOPT])
|
||||||
|
AC_SUBST([NVCC])
|
||||||
|
AC_SUBST([NVCCFLAGS])
|
||||||
|
AC_SUBST([MATPTROPT])
|
||||||
|
|
||||||
|
|
||||||
#the check for traceback needs bfd to be linked into
|
#the check for traceback needs bfd to be linked into
|
||||||
@ -94,16 +106,6 @@ AC_ARG_ENABLE([debug],[ --disable-debug not to perform some range-checking [[
|
|||||||
,)
|
,)
|
||||||
AC_SUBST([DEBUGOPT])
|
AC_SUBST([DEBUGOPT])
|
||||||
|
|
||||||
MATPTROPT=""
|
|
||||||
AC_ARG_ENABLE([matptr],[ --enable-matptr switch to double** matrix representation [[default=no]]],
|
|
||||||
[case "${enableval}" in
|
|
||||||
yes) MATPTROPT="-DMATPTR" ;;
|
|
||||||
no) ;;
|
|
||||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable-matptr]) ;;
|
|
||||||
esac],
|
|
||||||
,)
|
|
||||||
AC_SUBST([MATPTROPT])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -132,4 +134,5 @@ echo "In addition, similarly you might set include and link paths for Nvidia CUB
|
|||||||
echo "For usage examples see file t.cc. Do not forget using copyonwrite() before "
|
echo "For usage examples see file t.cc. Do not forget using copyonwrite() before "
|
||||||
echo "changing individual matrix/vector elements via l.h.s. operator[] or operator() "
|
echo "changing individual matrix/vector elements via l.h.s. operator[] or operator() "
|
||||||
echo "**********************************************************************************"
|
echo "**********************************************************************************"
|
||||||
|
#echo "Use ./configure --disable-optimize CXXFLAGS="" LDFLAGS="" for a fast compile "
|
||||||
|
|
||||||
|
@ -11,10 +11,12 @@
|
|||||||
namespace LA {
|
namespace LA {
|
||||||
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
|
#define CPU_GPU(x,y) {if((x)!=cpu && (y)!=cpu) laerror("one operand must be in CPU memory");}
|
||||||
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
|
#define NOT_GPU(x) {if((x).getlocation()!=cpu) laerror("Operation not implemented on GPU (yet). Use .moveto(0) first.");}
|
||||||
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
#define SAME_LOC(x,y) {if((x).getlocation()!=(y).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
||||||
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
#define SAME_LOC3(x,y,z) {if((x).getlocation()!=(y).getlocation() || (x).getlocation()!=(z).getlocation()) laerror("Operands have different location. Use .moveto() first.");}
|
||||||
#else
|
#else
|
||||||
|
#define CPU_GPU(x,y) {}
|
||||||
#define NOT_GPU(x) {}
|
#define NOT_GPU(x) {}
|
||||||
#define SAME_LOC(x,y) {}
|
#define SAME_LOC(x,y) {}
|
||||||
#define SAME_LOC3(x,y,z) {}
|
#define SAME_LOC3(x,y,z) {}
|
||||||
|
@ -4,4 +4,10 @@
|
|||||||
#define FORNAME(x) x
|
#define FORNAME(x) x
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define FORTRAN_INT long
|
#ifdef LONG_FORTRAN_INT
|
||||||
|
#define FORINT
|
||||||
|
#define FINT long
|
||||||
|
#else
|
||||||
|
#undef FORINT
|
||||||
|
#define FINT int
|
||||||
|
#endif
|
||||||
|
11
mat.cc
11
mat.cc
@ -355,24 +355,29 @@ NRMat<T> & NRMat<T>::operator-=(const T &a)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
const NRMat<double> NRMat<double>::operator-() const
|
const NRMat<double> NRMat<double>::operator-() const
|
||||||
{
|
{
|
||||||
NRMat<double> result(nn, mm);
|
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
|
NRMat<double> result(nn, mm, location);
|
||||||
if(location==cpu)
|
if(location==cpu)
|
||||||
{
|
{
|
||||||
|
#else
|
||||||
|
NRMat<double> result(nn, mm);
|
||||||
#endif
|
#endif
|
||||||
#ifdef MATPTR
|
#ifdef MATPTR
|
||||||
for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
|
for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
|
||||||
#else
|
#else
|
||||||
cblas_dscal(nn*mm, -1., v, 1);
|
memcpy(result.v,v,nn*mm*sizeof(double));
|
||||||
|
cblas_dscal(nn*mm, -1., result.v, 1);
|
||||||
#endif
|
#endif
|
||||||
#ifdef CUDALA
|
#ifdef CUDALA
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cublasDscal(nn*mm, -1., v, 1);
|
cublasDcopy(nn*mm, v, 1, result.v, 1);
|
||||||
|
cublasDscal(nn*mm, -1., result.v, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return result;
|
return result;
|
||||||
|
1
mat.h
1
mat.h
@ -816,6 +816,7 @@ template<typename T>
|
|||||||
void NRMat<T>::moveto(const GPUID dest)
|
void NRMat<T>::moveto(const GPUID dest)
|
||||||
{
|
{
|
||||||
if(location==dest) return;
|
if(location==dest) return;
|
||||||
|
CPU_GPU(location,dest);
|
||||||
location=dest;
|
location=dest;
|
||||||
|
|
||||||
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
||||||
|
1
smat.h
1
smat.h
@ -656,6 +656,7 @@ template<typename T>
|
|||||||
void NRSMat<T>::moveto(const GPUID dest)
|
void NRSMat<T>::moveto(const GPUID dest)
|
||||||
{
|
{
|
||||||
if(location==dest) return;
|
if(location==dest) return;
|
||||||
|
CPU_GPU(location,dest);
|
||||||
location=dest;
|
location=dest;
|
||||||
|
|
||||||
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include "bitvector.h"
|
||||||
#include "sparsemat.h"
|
#include "sparsemat.h"
|
||||||
|
|
||||||
namespace LA {
|
namespace LA {
|
||||||
@ -918,14 +919,17 @@ const_cast<SparseMat<T> *>(this)->simplify();
|
|||||||
|
|
||||||
matel<T> *l=list;
|
matel<T> *l=list;
|
||||||
typename LA_traits<T>::normtype sum(0);
|
typename LA_traits<T>::normtype sum(0);
|
||||||
|
|
||||||
if(scalar!=(T)0)
|
if(scalar!=(T)0)
|
||||||
{
|
{
|
||||||
|
if(nn!=mm) laerror("subtraction of scalar from non-square sparse matrix in norm()");
|
||||||
|
bitvector has_diagonal_element(nn); has_diagonal_element.clear();
|
||||||
if(symmetric)
|
if(symmetric)
|
||||||
while(l)
|
while(l)
|
||||||
{
|
{
|
||||||
T hlp=l->elem;
|
T hlp=l->elem;
|
||||||
bool b= l->row==l->col;
|
bool b= l->row==l->col;
|
||||||
if(b) hlp-=scalar;
|
if(b) {hlp-=scalar; has_diagonal_element.set(l->row);}
|
||||||
typename LA_traits<T>::normtype tmp=LA_traits<T>::sqrabs(hlp);
|
typename LA_traits<T>::normtype tmp=LA_traits<T>::sqrabs(hlp);
|
||||||
sum+= tmp;
|
sum+= tmp;
|
||||||
if(!b) sum+=tmp;
|
if(!b) sum+=tmp;
|
||||||
@ -935,10 +939,11 @@ if(scalar!=(T)0)
|
|||||||
while(l)
|
while(l)
|
||||||
{
|
{
|
||||||
T hlp=l->elem;
|
T hlp=l->elem;
|
||||||
if(l->row==l->col) hlp-=scalar;
|
if(l->row==l->col) {hlp-=scalar; has_diagonal_element.set(l->row);}
|
||||||
sum+= LA_traits<T>::sqrabs(hlp);
|
sum+= LA_traits<T>::sqrabs(hlp);
|
||||||
l=l->next;
|
l=l->next;
|
||||||
}
|
}
|
||||||
|
sum += (nn-has_diagonal_element.population()) * LA_traits<T>::sqrabs(scalar); //add contribution of the subtracted scalar from zero non-stored diagonal elements
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
1
vec.h
1
vec.h
@ -711,6 +711,7 @@ template<typename T>
|
|||||||
void NRVec<T>::moveto(const GPUID dest)
|
void NRVec<T>::moveto(const GPUID dest)
|
||||||
{
|
{
|
||||||
if(location==dest) return;
|
if(location==dest) return;
|
||||||
|
CPU_GPU(location,dest);
|
||||||
location=dest;
|
location=dest;
|
||||||
|
|
||||||
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
||||||
|
Loading…
Reference in New Issue
Block a user