1222 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1222 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
 | 
						|
/*******************************************************************************
 | 
						|
    LA: linear algebra C++ interface library
 | 
						|
    Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
 | 
						|
                  complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
 | 
						|
 | 
						|
 | 
						|
    This program is free software: you can redistribute it and/or modify
 | 
						|
    it under the terms of the GNU General Public License as published by
 | 
						|
    the Free Software Foundation, either version 3 of the License, or
 | 
						|
    (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU General Public License
 | 
						|
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
*******************************************************************************/
 | 
						|
#include <iostream>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <sys/types.h>
 | 
						|
#include <sys/stat.h>
 | 
						|
#include <fcntl.h>
 | 
						|
#include <errno.h>
 | 
						|
#include "vec.h"
 | 
						|
#include <unistd.h>
 | 
						|
#include "vecmat3.h"
 | 
						|
 | 
						|
namespace LA {
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * conversion constructor interpreting a given matrix with \f$N\f$ rows and 
 | 
						|
 * \f$M\f$ columns of general type <code>T</code> as a vector of \f$N\times{}M\f$
 | 
						|
 * elements
 | 
						|
 * @param[in] rhs matrix being converted
 | 
						|
 * @see NRMat<T>::NRMat()
 | 
						|
 ******************************************************************************/
 | 
						|
#ifndef MATPTR
 | 
						|
template <typename T>
 | 
						|
NRVec<T>::NRVec(const NRMat<T> &rhs) {
 | 
						|
#ifdef CUDALA
 | 
						|
	location = rhs.location;
 | 
						|
#endif
 | 
						|
	nn = rhs.nn*rhs.mm;
 | 
						|
	v = rhs.v;
 | 
						|
	count = rhs.count;
 | 
						|
	(*count)++;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * routine for formatted output via lawritemat
 | 
						|
 * @param[in] file pointer to <tt>FILE</tt> structure representing the output file
 | 
						|
 * @param[in] format format specification in standard printf-like form
 | 
						|
 * @param[in] modulo
 | 
						|
 * @see lawritemat()
 | 
						|
 ******************************************************************************/
 | 
						|
template<typename T>
 | 
						|
void NRVec<T>::fprintf(FILE *file, const char *format, const int modulo) const {
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	lawritemat(file, v, 1, nn, format, 1, modulo, 0);
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * routine for formatted input via fscanf
 | 
						|
 * @param[in] f pointer to <tt>FILE</tt> structure representing the input file
 | 
						|
 * @param[in] format format specification in standard printf-like form
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
void NRVec<T>::fscanf(FILE *f, const char *format) {
 | 
						|
	int n(0);
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	if(::fscanf(f, "%d", &n) != 1) laerror("can not read vector dimension");
 | 
						|
	resize(n);
 | 
						|
	for(register int i=0; i<n; i++){
 | 
						|
		if (::fscanf(f, format, v + i) != 1){
 | 
						|
			laerror("can not read the vector element");
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * unary minus operator in case of real double-precision vector
 | 
						|
 * @return the modified vector by value
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
const NRVec<double> NRVec<double>::operator-() const {
 | 
						|
	NRVec<double> result(*this);
 | 
						|
	result.copyonwrite();
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_dscal(nn, -1.0, result.v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDscal(nn, -1.0, result.v, 1);
 | 
						|
		TEST_CUBLAS("cublasDscal");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * unary minus operator in case of complex double-precision vector
 | 
						|
 * @return the modified vector by value
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
const NRVec<std::complex<double> > NRVec<std::complex<double> >::operator-() const {
 | 
						|
	NRVec<std::complex<double> > result(*this);
 | 
						|
	result.copyonwrite();
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_zdscal(nn, -1.0, result.v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasZdscal(nn, -1.0, (cuDoubleComplex*)result.v, 1);
 | 
						|
		TEST_CUBLAS("cublasZdscal");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * unary minus operator for vector of general type
 | 
						|
 * @return the modified vector
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
const NRVec<T> NRVec<T>::operator-() const {
 | 
						|
	NOT_GPU(*this);
 | 
						|
	NRVec<T> result(nn, getlocation());
 | 
						|
	for(register int i=0; i<nn; i++) result.v[i] = -v[i];
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * comparison operator (lexicographical order)
 | 
						|
 * @param[in] rhs vector intended for comparison
 | 
						|
 * @return 
 | 
						|
 *	\li \c true current vector is bigger than vector \c rhs
 | 
						|
 *	\li \c false current vector is smaller than vector \c rhs
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
const bool NRVec<T>::operator>(const NRVec &rhs) const {
 | 
						|
	int n(nn);
 | 
						|
 | 
						|
	SAME_LOC(*this, rhs);
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	if(rhs.nn < n) n = rhs.nn;
 | 
						|
	for(register int i=0; i<n;++i){
 | 
						|
		if(LA_traits<T>::bigger(v[i], rhs.v[i])) return true;
 | 
						|
		if(LA_traits<T>::smaller(v[i], rhs.v[i])) return false;
 | 
						|
	}
 | 
						|
	return nn>rhs.nn;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * comparison operator (lexicographical order)
 | 
						|
 * @param[in] rhs vector intended for comparison
 | 
						|
 * @return 
 | 
						|
 *	\li \c false current vector is bigger than vector \c rhs
 | 
						|
 *	\li \c true current vector is smaller than vector \c rhs
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
const bool NRVec<T>::operator<(const NRVec &rhs) const {
 | 
						|
	int n(nn);
 | 
						|
	
 | 
						|
	SAME_LOC(*this, rhs);
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	if(rhs.nn < n) n = rhs.nn;
 | 
						|
	for(register int i=0; i<n;++i){
 | 
						|
		if(LA_traits<T>::smaller(v[i], rhs.v[i])) return true;
 | 
						|
		if(LA_traits<T>::bigger(v[i], rhs.v[i])) return false;
 | 
						|
	}
 | 
						|
	return nn<rhs.nn;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * fill the real vector with pseudorandom numbers generated using uniform distribution
 | 
						|
 * @param[in] x specification of the interval \f$[0,x]\f$ for the random number generator
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<double>::randomize(const double &x){
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	for(register int i=0; i<nn; ++i){
 | 
						|
		v[i] = x*RANDDOUBLESIGNED();
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * fill the complex vector with pseudorandom numbers generated using uniform distribution
 | 
						|
 * the real and imaginary parts are generated independently
 | 
						|
 * @param[in] x specification of the interval \f$[0,x]\f$ for the random number generator
 | 
						|
 * @return 
 | 
						|
 *	\li \c false current vector is bigger than vector \c rhs
 | 
						|
 *	\li \c true current vector is smaller than vector \c rhs
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::randomize(const double &x) {
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	for(register int i=0; i<nn; ++i){
 | 
						|
		v[i] = std::complex<double>(x*RANDDOUBLESIGNED(), x*RANDDOUBLESIGNED());
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * constructor creating complex vector from a real one
 | 
						|
 * @param[in] rhs the real vector being converted into the complex one
 | 
						|
 * @param[in] imagpart 
 | 
						|
 *	\li \c true vector \c rhs is interpreted as the imaginary part of the new complex vector
 | 
						|
 *	\li \c false vector \c rhs is interpreted as the real part of the new complex vector
 | 
						|
 * @return 
 | 
						|
 *	\li \c false current vector is bigger than vector \c rhs
 | 
						|
 *	\li \c true current vector is smaller than vector \c rhs
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
NRVec<std::complex<double> >::NRVec(const NRVec<double> &rhs, bool imagpart): nn(rhs.size()){
 | 
						|
 | 
						|
	 count = new int;
 | 
						|
	*count = 1;
 | 
						|
#ifdef CUDALA
 | 
						|
	location = rhs.getlocation();
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		v = (std::complex<double>*)new std::complex<double>[nn];
 | 
						|
		memset(v, 0, nn*sizeof(std::complex<double>));
 | 
						|
		cblas_dcopy(nn, &rhs[0], 1, ((double *)v) + (imagpart?1:0), 2);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		v = (std::complex<double>*) gpualloc(nn*sizeof(std::complex<double>));
 | 
						|
 | 
						|
		cublasZscal(nn, CUZERO, (cuDoubleComplex*)v, 1);
 | 
						|
		TEST_CUBLAS("cublasZscal");
 | 
						|
 | 
						|
		cublasDcopy(nn, &rhs[0], 1, ((double *)v) + (imagpart?1:0), 2);
 | 
						|
		TEST_CUBLAS("cublasDcopy");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the <b>axpy</b> operation on the current real vector \f$\vec{v}\f$, i.e.
 | 
						|
 * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f]
 | 
						|
 * @param[in] alpha double-precision real parameter \f$\alpha\f$
 | 
						|
 * @param[in] x double-precision real vector \f$\vec{x}\f$
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<double>::axpy(const double alpha, const NRVec<double> &x) {
 | 
						|
#ifdef DEBUG
 | 
						|
	if (nn != x.nn) laerror("incompatible vectors");
 | 
						|
#endif
 | 
						|
	SAME_LOC(*this, x);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_daxpy(nn, alpha, x.v, 1, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDaxpy(nn, alpha, x.v, 1, v, 1);
 | 
						|
		TEST_CUBLAS("cublasDaxpy");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the <b>axpy</b> operation on the current complex vector \f$\vec{v}\f$, i.e.
 | 
						|
 * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f]
 | 
						|
 * @param[in] alpha \f$\alpha\f$ parameter
 | 
						|
 * @param[in] x complex vector \f$\vec{x}\f$
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::axpy(const std::complex<double> alpha, const NRVec<std::complex<double> > &x){
 | 
						|
#ifdef DEBUG
 | 
						|
	if (nn != x.nn) laerror("incompatible vectors");
 | 
						|
#endif
 | 
						|
	SAME_LOC(*this, x);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_zaxpy(nn, &alpha, x.v, 1, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag());
 | 
						|
		cublasZaxpy(nn, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1);
 | 
						|
		TEST_CUBLAS("cublasZaxpy");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the <b>axpy</b> operation on the current real vector \f$\vec{v}\f$, i.e.
 | 
						|
 * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f]
 | 
						|
 * @param[in] alpha \f$\alpha\f$ parameter
 | 
						|
 * @param[in] x pointer to double-precision real data
 | 
						|
 * @param[in] stride sets the stride
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<double>::axpy(const double alpha, const double *x, const int stride){
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	copyonwrite();
 | 
						|
	cblas_daxpy(nn, alpha, x, stride, v, 1);
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the <b>axpy</b> operation on the current complex vector \f$\vec{v}\f$, i.e.
 | 
						|
 * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f]
 | 
						|
 * @param[in] alpha double-precision complex parameter \f$\alpha\f$
 | 
						|
 * @param[in] x pointer to double-precision complex data
 | 
						|
 * @param[in] stride sets the stride
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::axpy(const std::complex<double> alpha, const std::complex<double> *x, const int stride){
 | 
						|
	NOT_GPU(*this);
 | 
						|
 | 
						|
	copyonwrite();
 | 
						|
	cblas_zaxpy(nn, &alpha, x, stride, v, 1);
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * assign real scalar value to every element of the current vector
 | 
						|
 * @param[in] a scalar value to be assigned
 | 
						|
 * @return reference to the modified vector
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
NRVec<double>& NRVec<double>::operator=(const double &a){
 | 
						|
copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_dcopy(nn, &a, 0, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		smart_gpu_set(nn, (double)0, v);
 | 
						|
	}
 | 
						|
#endif	
 | 
						|
	return *this;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * assign complex scalar value to every element of the current vector
 | 
						|
 * @param[in] a scalar value to be assigned
 | 
						|
 * @return reference to the modified vector
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
NRVec<std::complex<double> >& NRVec<std::complex<double> >::operator=(const std::complex<double> &a){
 | 
						|
copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_zcopy(nn, &a, 0, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		smart_gpu_set(nn, (std::complex<double>)0, v);
 | 
						|
	}
 | 
						|
#endif	
 | 
						|
	return *this;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * assign scalar value to every element of the current vector of general type <code>T</code>
 | 
						|
 * @param[in] a scalar value to be assigned
 | 
						|
 * @return reference to the modified vector
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
NRVec<T>& NRVec<T>::operator=(const T &a){
 | 
						|
	NOT_GPU(*this);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
	if(!LA_traits<T>::is_plaindata() || a != (T)0){
 | 
						|
		for(register int i=0; i<nn; i++) v[i] = a;
 | 
						|
	}else{
 | 
						|
		memset(v, 0, nn*sizeof(T));
 | 
						|
	}
 | 
						|
	return *this;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * normalize current real vector (in the Euclidean norm)
 | 
						|
 * @param[in] norm if not NULL, the norm of this vector is stored into *norm
 | 
						|
 * @return reference to the modified vector
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
NRVec<double>& NRVec<double>::normalize(double *norm){
 | 
						|
	double tmp(0.0);
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		tmp = cblas_dnrm2(nn, v, 1);
 | 
						|
		if(norm) *norm = tmp;
 | 
						|
		#ifdef DEBUG
 | 
						|
			if(!tmp) laerror("attempt to normalize zero vector");
 | 
						|
		#endif
 | 
						|
		copyonwrite();
 | 
						|
		tmp = 1.0 / tmp;
 | 
						|
		cblas_dscal(nn, tmp, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		tmp = cublasDnrm2(nn, v, 1);
 | 
						|
		TEST_CUBLAS("cublasDnrm2");
 | 
						|
 | 
						|
		if(norm) *norm = tmp;
 | 
						|
		#ifdef DEBUG
 | 
						|
			if(!tmp) laerror("attempt to normalize zero vector");
 | 
						|
		#endif
 | 
						|
		copyonwrite();
 | 
						|
		tmp = 1.0 / tmp;
 | 
						|
		cublasDscal(nn, tmp, v, 1);
 | 
						|
		TEST_CUBLAS("cublasDscal");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return *this;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * normalize current complex vector (in the Euclidean norm)
 | 
						|
 * @param[in] norm if not NULL, the norm of this vector is stored into *norm
 | 
						|
 * @return reference to the modified vector
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
NRVec<std::complex<double> > & NRVec<std::complex<double> >::normalize(double *norm){
 | 
						|
	double tmp(0.0);
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		tmp = cblas_dznrm2(nn, v, 1);
 | 
						|
		if(norm) *norm = tmp;
 | 
						|
		#ifdef DEBUG
 | 
						|
		if(tmp == 0.0) laerror("attempt to normalize zero vector");
 | 
						|
		#endif
 | 
						|
		copyonwrite();
 | 
						|
		tmp = 1.0 / tmp;
 | 
						|
		cblas_zdscal(nn, tmp, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		tmp = cublasDznrm2(nn, (cuDoubleComplex*)v, 1);
 | 
						|
		TEST_CUBLAS("cublasDznrm2");
 | 
						|
 | 
						|
		if(norm) *norm = tmp;
 | 
						|
		#ifdef DEBUG
 | 
						|
			if(tmp == 0.0) laerror("attempt to normalize zero vector");
 | 
						|
		#endif
 | 
						|
		copyonwrite();
 | 
						|
 | 
						|
		tmp = 1.0 / tmp;
 | 
						|
		cublasZdscal(nn, tmp, (cuDoubleComplex*)v, 1);
 | 
						|
		TEST_CUBLAS("cublasZdscal");
 | 
						|
	}
 | 
						|
#endif		
 | 
						|
	return *this;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the \b gemv operation on this real vector \f$\vec{y}\f$, i.e.
 | 
						|
 * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f]
 | 
						|
 * @param[in] beta real parameter \f$\beta\f$
 | 
						|
 * @param[in] A real matrix \f$A\f$
 | 
						|
 * @param[in] trans if <code>trans == 'n'</code> use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$
 | 
						|
 * @param[in] alpha real parameter \f$\alpha\f$
 | 
						|
 * @param[in] x real vector \f$\vec{x}\f$
 | 
						|
 * @see NRMat<T>::gemm
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<double>::gemv(const double beta, const NRMat<double> &A, 
 | 
						|
		const char trans, const double alpha, const NRVec &x) {
 | 
						|
#ifdef DEBUG
 | 
						|
	if((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); }
 | 
						|
#endif
 | 
						|
	SAME_LOC3(*this, x, A);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location==cpu){
 | 
						|
#endif
 | 
						|
		cblas_dgemv(CblasRowMajor, (tolower(trans)=='n' ? CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), x.v, 1, beta, v, 1);
 | 
						|
		TEST_CUBLAS("cublasDgemv");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e.
 | 
						|
 * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f]
 | 
						|
 * @param[in] beta real parameter \f$\beta\f$
 | 
						|
 * @param[in] A <b>real</b> matrix \f$A\f$
 | 
						|
 * @param[in] trans if <tt>trans == 'n'</tt> use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$
 | 
						|
 * @param[in] alpha real parameter \f$\alpha\f$
 | 
						|
 * @param[in] x real vector \f$\vec{x}\f$
 | 
						|
 * @see gemm
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::gemv(const double beta, const NRMat<double> &A,
 | 
						|
                const char trans, const double alpha, const NRVec<std::complex<double> > &x) {
 | 
						|
#ifdef DEBUG
 | 
						|
	if ((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); }
 | 
						|
#endif
 | 
						|
	SAME_LOC3(*this, x, A);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location==cpu){
 | 
						|
#endif
 | 
						|
		cblas_dgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans),
 | 
						|
				A.nrows(), A.ncols(), alpha, A, A.ncols(), (double *)x.v, 2, beta, (double *)v, 2);
 | 
						|
		cblas_dgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans),
 | 
						|
				A.nrows(), A.ncols(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), (double*)(x.v), 2, beta, (double *)v, 2);
 | 
						|
		TEST_CUBLAS("cublasDgemv");
 | 
						|
 | 
						|
		cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2);
 | 
						|
		TEST_CUBLAS("cublasDgemv");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e.
 | 
						|
 * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f]
 | 
						|
 * @param[in] beta complex parameter \f$\beta\f$
 | 
						|
 * @param[in] A <b>complex</b> matrix \f$A\f$
 | 
						|
 * @param[in] trans if <code>trans == 'n'</code> use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$
 | 
						|
 * @param[in] alpha complex parameter \f$\alpha\f$
 | 
						|
 * @param[in] x real vector \f$\vec{x}\f$
 | 
						|
 * @see gemm
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::gemv(const std::complex<double> beta,
 | 
						|
		const NRMat<std::complex<double> > &A, const char trans, 
 | 
						|
		const std::complex<double> alpha, const NRVec<std::complex<double> > &x) {
 | 
						|
#ifdef DEBUG
 | 
						|
	if ((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); }
 | 
						|
#endif
 | 
						|
	SAME_LOC3(*this, x, A);
 | 
						|
	copyonwrite();
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_zgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), 
 | 
						|
				A.nrows(), A.ncols(), &alpha, A, A.ncols(), x.v, 1, &beta, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
 | 
						|
		const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag());
 | 
						|
		const cuDoubleComplex  _beta = make_cuDoubleComplex(beta.real(), beta.imag());
 | 
						|
 | 
						|
		cublasZgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(),
 | 
						|
				_alpha, (cuDoubleComplex*)(A[0]), A.ncols(), (cuDoubleComplex*)(x.v), 1, _beta, (cuDoubleComplex*)v, 1);
 | 
						|
		TEST_CUBLAS("cublasZgemv");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the \b gemv operation on this real vector \f$\vec{y}\f$, i.e.
 | 
						|
 * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f]
 | 
						|
 * @param[in] beta real parameter \f$\beta\f$
 | 
						|
 * @param[in] A real symmetric matrix \f$A\f$ stored in packed form
 | 
						|
 * @param[in] trans if <code>trans == 'n'</code> use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$
 | 
						|
 * @param[in] alpha real parameter \f$\alpha\f$
 | 
						|
 * @param[in] x real vector \f$\vec{x}\f$
 | 
						|
 * @see gemm, NRSMat<T>
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<double>::gemv(const double beta, const NRSMat<double> &A,
 | 
						|
		const char trans, const double alpha, const NRVec &x) {
 | 
						|
#ifdef DEBUG
 | 
						|
	if(A.ncols() != x.size()){ laerror("incompatible dimensions"); }
 | 
						|
#endif
 | 
						|
	SAME_LOC3(*this, A, x);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location==cpu){
 | 
						|
#endif
 | 
						|
		cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDspmv('U', A.ncols(), alpha, A, x.v, 1, beta, v, 1);
 | 
						|
		TEST_CUBLAS("cublasDspmv");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the \c gemv operation on this complex vector \f$\vec{y}\f$, i.e.
 | 
						|
 * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f]
 | 
						|
 * @param[in] beta real parameter \f$\beta\f$
 | 
						|
 * @param[in] A <b>real symmetric</b> matrix \f$A\f$ stored in packed form
 | 
						|
 * @param[in] trans if <code>trans == 'n'</code> use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$
 | 
						|
 * @param[in] alpha real parameter \f$\alpha\f$
 | 
						|
 * @param[in] x complex vector \f$\vec{x}\f$
 | 
						|
 * @see gemm, NRSMat<T>
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::gemv(const double beta, const NRSMat<double> &A,
 | 
						|
		const char trans, const double alpha, const NRVec<std::complex<double> > &x) {
 | 
						|
#ifdef DEBUG
 | 
						|
	if(A.ncols() != x.size()){ laerror("incompatible dimensions"); }
 | 
						|
#endif
 | 
						|
	SAME_LOC3(*this, A, x);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, (double *)x.v, 2, beta, (double *)v, 2);
 | 
						|
		cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, ((double *)x.v) + 1, 2, beta, ((double *)v) + 1, 2);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDspmv('U', A.ncols(), alpha, A, (double*)(x.v), 2, beta, (double*)v, 2);
 | 
						|
		TEST_CUBLAS("cublasDspmv");
 | 
						|
		
 | 
						|
		cublasDspmv('U', A.ncols(), alpha, A, ((double*)(x.v)) + 1, 2, beta, ((double*)v) + 1, 2);
 | 
						|
		TEST_CUBLAS("cublasDspmv");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e.
 | 
						|
 * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f]
 | 
						|
 * @param[in] beta complex parameter \f$\beta\f$
 | 
						|
 * @param[in] A <b>complex Hermitian</b> matrix \f$A\f$ stored in packed form
 | 
						|
 * @param[in] trans not used
 | 
						|
 * @param[in] alpha complex parameter \f$\alpha\f$
 | 
						|
 * @param[in] x complex vector \f$\vec{x}\f$
 | 
						|
 * @see gemm, NRSMat<T>
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
void NRVec<std::complex<double> >::gemv(const std::complex<double> beta,
 | 
						|
                const NRSMat<std::complex<double> > &A, const char trans,
 | 
						|
                const std::complex<double> alpha, const NRVec<std::complex<double> > &x){
 | 
						|
#ifdef DEBUG
 | 
						|
	if(A.ncols() != x.size()) laerror("incompatible dimensions");
 | 
						|
#endif
 | 
						|
	SAME_LOC3(*this, A, x);
 | 
						|
	copyonwrite();
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_zhpmv(CblasRowMajor, CblasLower, A.ncols(), &alpha, A, x.v, 1, &beta, v, 1);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag());
 | 
						|
		const cuDoubleComplex  _beta = make_cuDoubleComplex(beta.real(), beta.imag());
 | 
						|
 | 
						|
		cublasZhpmv('U', A.ncols(), _alpha, (cuDoubleComplex*)((const std::complex<double>*)A), (cuDoubleComplex*)(x.v), 1, _beta, (cuDoubleComplex*)(this->v), 1);
 | 
						|
		TEST_CUBLAS("cublasZhpmv");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * computes the outer product of this real vector \f$\vec{a}\f$ with given
 | 
						|
 * real vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e.
 | 
						|
 * the matrix elements of the final matrix \f$A\f$ can be expressed as
 | 
						|
 * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j\f]
 | 
						|
 * @param[in] b real vector \f$\vec{b}\f$
 | 
						|
 * @param[in] conj not used
 | 
						|
 * @param[in] scale real factor \f$\alpha\f$
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
const NRMat<double> NRVec<double>::otimes(const NRVec<double> &b,const bool conj, const double &scale) const {
 | 
						|
 | 
						|
	SAME_LOC(*this, b);
 | 
						|
	NRMat<double> result(0.0, nn, b.nn, this->getlocation());
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result, b.nn);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result[0], b.nn);
 | 
						|
		TEST_CUBLAS("cublasDger");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
template<>
 | 
						|
const NRVec<double> NRVec<double>::otimes2vec(const NRVec<double> &b,const bool conj, const double &scale) const {
 | 
						|
 | 
						|
	SAME_LOC(*this, b);
 | 
						|
	NRVec<double> result(0.0, nn*b.nn, this->getlocation());
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result.v, b.nn);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result.v, b.nn);
 | 
						|
		TEST_CUBLAS("cublasDger");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * computes the outer product of this complex vector \f$\vec{a}\f$ with given
 | 
						|
 * complex vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e.
 | 
						|
 * the matrix elements of the final matrix \f$A\f$ can be expressed as
 | 
						|
 * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j\f]
 | 
						|
 * in case <code>conj = true</code>, the result is
 | 
						|
 * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j^{*}\f]
 | 
						|
 * @param[in] b complex vector \f$\vec{b}\f$
 | 
						|
 * @param[in] conj determines whther the vector \f$\vec{b}\f$ is conjugated
 | 
						|
 * @param[in] scale complex scaling factor \f$\alpha\f$
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
const NRMat<std::complex<double> > 
 | 
						|
NRVec<std::complex<double> >::otimes(const NRVec<std::complex<double> > &b, const bool conj, const std::complex<double> &scale) const {
 | 
						|
	
 | 
						|
	SAME_LOC(*this, b);
 | 
						|
	NRMat<std::complex<double> > result(0., nn, b.nn, this->getlocation());
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		if(conj){
 | 
						|
			cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result, b.nn);
 | 
						|
		}else{
 | 
						|
			cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result, b.nn);
 | 
						|
		}
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		if(conj){
 | 
						|
			const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag());
 | 
						|
 | 
						|
			cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn);
 | 
						|
			TEST_CUBLAS("cublasZgerc");
 | 
						|
 | 
						|
			result.conjugateme();
 | 
						|
		}else{
 | 
						|
			const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag());
 | 
						|
 | 
						|
			cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn);
 | 
						|
			TEST_CUBLAS("cublasZgeru");
 | 
						|
		}
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
template<>
 | 
						|
const NRVec<std::complex<double> > 
 | 
						|
NRVec<std::complex<double> >::otimes2vec(const NRVec<std::complex<double> > &b, const bool conj, const std::complex<double> &scale) const {
 | 
						|
	
 | 
						|
	SAME_LOC(*this, b);
 | 
						|
	NRVec<std::complex<double> > result(0., nn*b.nn, this->getlocation());
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		if(conj){
 | 
						|
			cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn);
 | 
						|
		}else{
 | 
						|
			cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn);
 | 
						|
		}
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		if(conj){
 | 
						|
			const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag());
 | 
						|
 | 
						|
			cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn);
 | 
						|
			TEST_CUBLAS("cublasZgerc");
 | 
						|
 | 
						|
			result.conjugateme();
 | 
						|
		}else{
 | 
						|
			const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag());
 | 
						|
 | 
						|
			cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn);
 | 
						|
			TEST_CUBLAS("cublasZgeru");
 | 
						|
		}
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return result;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
template<>
 | 
						|
NRVec<std::complex<double> > complexify(const NRVec<double> &rhs) {
 | 
						|
	NRVec<std::complex<double> > r(rhs.size(), rhs.getlocation());
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(rhs.getlocation() == cpu){
 | 
						|
#endif
 | 
						|
		cblas_dcopy(rhs.size(), &rhs[0], 1, (double *)(&r[0]), 2);
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		r = 0;
 | 
						|
		cublasDcopy(rhs.size(), rhs.v, 1, (double*)(r.v), 2);
 | 
						|
		TEST_CUBLAS("cublasDcopy");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
	return r;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
template<typename T>
 | 
						|
void NRVec<T>::permuteme(const CyclePerm<int> &p)
 | 
						|
{
 | 
						|
#ifdef DEBUG
 | 
						|
if(!p.is_valid()) laerror("invalid permutation of vector");
 | 
						|
#endif
 | 
						|
if(p.max()>nn) laerror("incompatible permutation and vector");
 | 
						|
#ifdef CUDALA
 | 
						|
        if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory");
 | 
						|
#endif
 | 
						|
copyonwrite();
 | 
						|
for(int cycle=1; cycle<=p.size(); ++cycle)
 | 
						|
	{
 | 
						|
	int length= p[cycle].size();
 | 
						|
	if(length<=1) continue; //trivial cycle
 | 
						|
	T tmp = v[p[cycle][length]-1];
 | 
						|
	for(int i=length; i>1; --i) v[p[cycle][i]-1] = v[p[cycle][i-1]-1];
 | 
						|
	v[p[cycle][1]-1] = tmp;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
template<typename T>
 | 
						|
const int NRVec<T>::find(const T &val) const
 | 
						|
{
 | 
						|
for(int i=0; i<nn; ++i) if(val==v[i]) return i;
 | 
						|
return -1;
 | 
						|
}
 | 
						|
 | 
						|
template<typename T>
 | 
						|
const int NRVec<T>::findthr(const T &val, const  typename LA_traits<T>::normtype &thr) const
 | 
						|
{
 | 
						|
for(int i=0; i<nn; ++i) if(MYABS(val-v[i])<thr) return i;
 | 
						|
return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * extract block subvector
 | 
						|
 * @param[in] from starting position
 | 
						|
 * @param[in] to final position
 | 
						|
 * @return extracted block subvector
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
const NRVec<T> NRVec<T>::subvector(const int from, const int to) const 
 | 
						|
{
 | 
						|
#ifdef DEBUG
 | 
						|
	if(from<0 || from>=nn|| to<0 || to>=nn || from>to){
 | 
						|
		laerror("invalid subvector specification");
 | 
						|
	}
 | 
						|
#endif
 | 
						|
//trivial case of whole vector for efficiency
 | 
						|
	if(from==0 && to == nn-1) return *this;
 | 
						|
 | 
						|
	const int n = to - from + 1;
 | 
						|
	NRVec<T> r(n, getlocation());
 | 
						|
	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 | 
						|
 | 
						|
#ifdef CUDALA
 | 
						|
	if(location == cpu){
 | 
						|
#endif
 | 
						|
		memcpy(r.v, v+from, n*sizeof(T));
 | 
						|
#ifdef CUDALA
 | 
						|
	}else{
 | 
						|
		if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
 | 
						|
			cublasScopy(n*sizeof(T)/sizeof(float), (const float *)(v+from), 1, (float*)r.v, 1);
 | 
						|
			TEST_CUBLAS("cublasScopy");
 | 
						|
	}
 | 
						|
#endif	
 | 
						|
	return r;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
template <typename T>
 | 
						|
const NRVec<T> NRVec<T>::subvector(const NRVec<int> &selection) const
 | 
						|
{
 | 
						|
        NOT_GPU(*this);
 | 
						|
        const int n = selection.size();
 | 
						|
        NRVec<T> r(n);
 | 
						|
 | 
						|
        for(int i=0; i<n; ++i)
 | 
						|
                {
 | 
						|
                int ii=selection[i];
 | 
						|
                if(ii<0||ii>=nn) laerror("bad row index in subvector");
 | 
						|
                r[i] = (*this)[ii];
 | 
						|
                }
 | 
						|
return r;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * places given vector as subvector at given position
 | 
						|
 * @param[in] from coordinate 
 | 
						|
 * @param[in] rhs input vector
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
void NRVec<T>::storesubvector(const int from, const NRVec &rhs) 
 | 
						|
{
 | 
						|
	const int to = from + rhs.size() - 1;
 | 
						|
#ifdef DEBUG
 | 
						|
	if(from<0 || from>=nn || to>=nn) laerror("bad indices in storesubvector");
 | 
						|
#endif
 | 
						|
	SAME_LOC(*this, rhs);
 | 
						|
	if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
 | 
						|
 | 
						|
	#ifdef CUDALA
 | 
						|
		if(location == cpu){
 | 
						|
	#endif
 | 
						|
			memcpy(v+from, rhs.v, rhs.size()*sizeof(T));
 | 
						|
 | 
						|
	#ifdef CUDALA
 | 
						|
		}else{
 | 
						|
			if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
 | 
						|
			cublasScopy(rhs.size()*sizeof(T)/sizeof(float), (const float *) (rhs.v), 1, (float *)(v + from), 1);
 | 
						|
		}
 | 
						|
	#endif
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
template <typename T>
 | 
						|
void NRVec<T>::storesubvector(const NRVec<int> &selection, const NRVec &rhs)
 | 
						|
{
 | 
						|
        NOT_GPU(*this);
 | 
						|
        const int n = selection.size();
 | 
						|
	if(n!=rhs.size()) laerror("size mismatch in storesubvector");
 | 
						|
 | 
						|
        for(int i=0; i<n; ++i)
 | 
						|
                {
 | 
						|
                int ii=selection[i];
 | 
						|
                if(ii<0||ii>=nn) laerror("bad  index in storesubvector");
 | 
						|
                (*this)[ii] = rhs[i];
 | 
						|
                }
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * conjugate this general vector
 | 
						|
 * @return reference to the (unmodified) matrix
 | 
						|
 ******************************************************************************/
 | 
						|
template<typename T>
 | 
						|
NRVec<T>& NRVec<T>::conjugateme() {
 | 
						|
copyonwrite();
 | 
						|
#ifdef CUDALA
 | 
						|
        if(location != cpu) laerror("general conjugation only on CPU");
 | 
						|
#endif
 | 
						|
	for(int i=0; i<nn; ++i) v[i] =  LA_traits<T>::conjugate(v[i]);
 | 
						|
        return *this;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * conjugate this complex vector 
 | 
						|
 * @return reference to the modified matrix
 | 
						|
 ******************************************************************************/
 | 
						|
template<>
 | 
						|
NRVec<std::complex<double> >& NRVec<std::complex<double> >::conjugateme() {
 | 
						|
        copyonwrite();
 | 
						|
#ifdef CUDALA
 | 
						|
        if(location == cpu){
 | 
						|
#endif
 | 
						|
                cblas_dscal((size_t)nn, -1.0, ((double *)v) + 1, 2);
 | 
						|
#ifdef CUDALA
 | 
						|
        }else{
 | 
						|
                cublasDscal((size_t)nn, -1.0, ((double *)v) + 1, 2);
 | 
						|
        }
 | 
						|
#endif
 | 
						|
        return *this;
 | 
						|
}
 | 
						|
 | 
						|
template<>
 | 
						|
NRVec<std::complex<float> >& NRVec<std::complex<float> >::conjugateme() {
 | 
						|
        copyonwrite();
 | 
						|
#ifdef CUDALA
 | 
						|
        if(location == cpu){
 | 
						|
#endif
 | 
						|
                cblas_sscal((size_t)nn, -1.0, ((float *)v) + 1, 2);
 | 
						|
#ifdef CUDALA
 | 
						|
        }else{
 | 
						|
                cublasSscal((size_t)nn, -1.0, ((float *)v) + 1, 2);
 | 
						|
        }
 | 
						|
#endif
 | 
						|
        return *this;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * sum up the elements of current vector of general type <code>T</code>
 | 
						|
 * @return sum
 | 
						|
 ******************************************************************************/
 | 
						|
template <typename T>
 | 
						|
const T NRVec<T>::sum() const {
 | 
						|
        NOT_GPU(*this);
 | 
						|
        T sum;
 | 
						|
 | 
						|
                sum = (T)0;
 | 
						|
                for(int i=0; i<nn; i++) sum += v[i];
 | 
						|
        return sum;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * sum up the all of the current double-precision real vector
 | 
						|
 * @return sum
 | 
						|
 ******************************************************************************/
 | 
						|
template <>
 | 
						|
const double NRVec<double>::sum() const {
 | 
						|
        double result=0;
 | 
						|
#ifdef CUDALA
 | 
						|
        if(location == cpu){
 | 
						|
#endif
 | 
						|
                        cblas_daxpy(nn, 1.0, v, 1, &result, 0);
 | 
						|
#ifdef CUDALA
 | 
						|
        }else{
 | 
						|
        laerror("not implemented");
 | 
						|
                }
 | 
						|
        }
 | 
						|
#endif
 | 
						|
        return result;
 | 
						|
}
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * sum up the all of the current double-precision complex vector
 | 
						|
 * @return sum
 | 
						|
 ******************************************************************************/
 | 
						|
template <>
 | 
						|
const std::complex<double>  NRVec<std::complex<double> >::sum() const {
 | 
						|
        std::complex<double>  result=0;
 | 
						|
#ifdef CUDALA
 | 
						|
        if(location == cpu){
 | 
						|
#endif
 | 
						|
                        cblas_zaxpy(nn, &CONE, v, 1, &result, 0);
 | 
						|
#ifdef CUDALA
 | 
						|
        }else{
 | 
						|
        laerror("not implemented");
 | 
						|
                }
 | 
						|
        }
 | 
						|
#endif
 | 
						|
        return result;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************//**
 | 
						|
 * forced instantization in the corespoding object file
 | 
						|
 ******************************************************************************/
 | 
						|
/*
 | 
						|
 Commented out by Roman for ICC
 | 
						|
 | 
						|
#define INSTANTIZE(T) \
 | 
						|
template void NRVec<T>::put(int fd, bool dim, bool transp) const; \
 | 
						|
template void NRVec<T>::get(int fd, bool dim, bool transp); \
 | 
						|
 | 
						|
INSTANTIZE(double)
 | 
						|
INSTANTIZE(std::complex<double>)
 | 
						|
INSTANTIZE(char)
 | 
						|
INSTANTIZE(short)
 | 
						|
INSTANTIZE(int)
 | 
						|
INSTANTIZE(long)
 | 
						|
INSTANTIZE(long long)
 | 
						|
INSTANTIZE(unsigned char)
 | 
						|
INSTANTIZE(unsigned short)
 | 
						|
INSTANTIZE(unsigned int)
 | 
						|
INSTANTIZE(unsigned long)
 | 
						|
INSTANTIZE(unsigned long long)
 | 
						|
*/
 | 
						|
 | 
						|
#define INSTANTIZE_DUMMY(T) \
 | 
						|
template<> void NRVec<T>::gemv(const T beta, const NRMat<T> &a, const char trans,  const T alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
 | 
						|
template<> void NRVec<T>::gemv(const T beta, const NRSMat<T> &a, const char trans,  const T alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
 | 
						|
template<> void NRVec<T>::gemv(const T beta, const SparseMat<T> &a, const char trans,  const T alpha, const NRVec<T> &x, bool s) { laerror("gemv on unsupported types"); } \
 | 
						|
template<> void NRVec<T>::gemv(const LA_traits_complex<T>::Component_type beta, const  LA_traits_complex<T>::NRMat_Noncomplex_type  &a, const char trans,  const  LA_traits_complex<T>::Component_type alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
 | 
						|
template<> void NRVec<T>::gemv(const  LA_traits_complex<T>::Component_type beta, const  LA_traits_complex<T>::NRSMat_Noncomplex_type  &a, const char trans,  const  LA_traits_complex<T>::Component_type alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
 | 
						|
template<> NRVec<T> & NRVec<T>::normalize(LA_traits<T>::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \
 | 
						|
template<> const NRMat<T> NRVec<T>::otimes(const NRVec<T> &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat<T> ();}\
 | 
						|
template<> const NRVec<T> NRVec<T>::otimes2vec(const NRVec<T> &b,const bool conj, const T &scale) const {laerror("otimes2vec presently implemented only for double and complex double"); return NRVec<T> ();}\
 | 
						|
 | 
						|
 | 
						|
 | 
						|
INSTANTIZE_DUMMY(char)
 | 
						|
INSTANTIZE_DUMMY(short)
 | 
						|
INSTANTIZE_DUMMY(int)
 | 
						|
INSTANTIZE_DUMMY(long)
 | 
						|
INSTANTIZE_DUMMY(long long)
 | 
						|
INSTANTIZE_DUMMY(unsigned char)
 | 
						|
INSTANTIZE_DUMMY(unsigned short)
 | 
						|
INSTANTIZE_DUMMY(unsigned int)
 | 
						|
INSTANTIZE_DUMMY(unsigned long)
 | 
						|
INSTANTIZE_DUMMY(unsigned long long)
 | 
						|
INSTANTIZE_DUMMY(std::complex<char>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<short>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<int>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<long>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<long long>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<unsigned char>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<unsigned short>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<unsigned int>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<unsigned long>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<unsigned long long>)
 | 
						|
INSTANTIZE_DUMMY(std::complex<std::complex<double> >)
 | 
						|
INSTANTIZE_DUMMY(std::complex<std::complex<float> >)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
//also not supported on gpu
 | 
						|
#define INSTANTIZE_NONCOMPLEX(T) \
 | 
						|
template<>\
 | 
						|
const T NRVec<T>::max() const\
 | 
						|
{\
 | 
						|
if(nn==0) return 0;\
 | 
						|
T m=v[0];\
 | 
						|
for(int i=1; i<nn; ++i) if(v[i]>m) m=v[i];\
 | 
						|
return m;\
 | 
						|
}\
 | 
						|
\
 | 
						|
template<>\
 | 
						|
const T NRVec<T>::min() const\
 | 
						|
{\
 | 
						|
if(nn==0) return 0;\
 | 
						|
T m=v[0];\
 | 
						|
for(int i=1; i<nn; ++i) if(v[i]<m) m=v[i];\
 | 
						|
return m;\
 | 
						|
}\
 | 
						|
 | 
						|
 | 
						|
 | 
						|
INSTANTIZE_NONCOMPLEX(char)
 | 
						|
INSTANTIZE_NONCOMPLEX(unsigned char)
 | 
						|
INSTANTIZE_NONCOMPLEX(short)
 | 
						|
INSTANTIZE_NONCOMPLEX(unsigned short)
 | 
						|
INSTANTIZE_NONCOMPLEX(int)
 | 
						|
INSTANTIZE_NONCOMPLEX(unsigned int)
 | 
						|
INSTANTIZE_NONCOMPLEX(long)
 | 
						|
INSTANTIZE_NONCOMPLEX(unsigned long)
 | 
						|
INSTANTIZE_NONCOMPLEX(long long)
 | 
						|
INSTANTIZE_NONCOMPLEX(unsigned long long)
 | 
						|
INSTANTIZE_NONCOMPLEX(float)
 | 
						|
INSTANTIZE_NONCOMPLEX(double)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/***************************************************************************
 | 
						|
 *some efficient specializations of concatenations for plain data types
 | 
						|
 ******************************************************************************/
 | 
						|
 | 
						|
#define INSTANTIZE_CONCAT(T) \
 | 
						|
template<> \
 | 
						|
NRVec<T> NRVec<T>::concat(const NRVec<T> &rhs) const \
 | 
						|
{ \
 | 
						|
if(nn==0) return rhs; \
 | 
						|
if(rhs.nn==0) return *this; \
 | 
						|
NOT_GPU(*this); \
 | 
						|
NOT_GPU(rhs); \
 | 
						|
NRVec<T> r(nn+rhs.nn); \
 | 
						|
memcpy(r.v,v,nn*sizeof(T)); \
 | 
						|
memcpy(r.v+nn,rhs.v,rhs.nn*sizeof(T)); \
 | 
						|
return r; \
 | 
						|
} \
 | 
						|
 | 
						|
 | 
						|
INSTANTIZE_CONCAT(char)
 | 
						|
INSTANTIZE_CONCAT(unsigned char)
 | 
						|
INSTANTIZE_CONCAT(short)
 | 
						|
INSTANTIZE_CONCAT(unsigned short)
 | 
						|
INSTANTIZE_CONCAT(int)
 | 
						|
INSTANTIZE_CONCAT(unsigned int)
 | 
						|
INSTANTIZE_CONCAT(long)
 | 
						|
INSTANTIZE_CONCAT(unsigned long)
 | 
						|
INSTANTIZE_CONCAT(long long)
 | 
						|
INSTANTIZE_CONCAT(unsigned long long)
 | 
						|
INSTANTIZE_CONCAT(float)
 | 
						|
INSTANTIZE_CONCAT(double)
 | 
						|
INSTANTIZE_CONCAT(std::complex<float>)
 | 
						|
INSTANTIZE_CONCAT(std::complex<double>)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
//template class NRVec<float>;
 | 
						|
//template class NRVec<std::complex<float> >;
 | 
						|
template class NRVec<double>;
 | 
						|
template class NRVec<std::complex<double> >;
 | 
						|
template class NRVec<char>;
 | 
						|
template class NRVec<short>;
 | 
						|
template class NRVec<int>;
 | 
						|
template class NRVec<long>;
 | 
						|
template class NRVec<long long>;
 | 
						|
template class NRVec<unsigned char>;
 | 
						|
template class NRVec<unsigned short>;
 | 
						|
template class NRVec<unsigned int>;
 | 
						|
template class NRVec<unsigned long>;
 | 
						|
template class NRVec<unsigned long long>;
 | 
						|
 | 
						|
}//namespace
 |