Compare commits

...

23 Commits

Author SHA1 Message Date
26ed939901 tensor: bugfix in unwind_index and linear_transform of single group 2025-12-15 20:32:01 +01:00
671b924c8c lanczos: little bug fix 2025-12-15 14:36:16 +01:00
1965f4f653 lanczos: first working version 2025-12-12 15:54:32 +01:00
7e90168443 davidson - comment 2025-12-12 14:22:15 +01:00
4500752146 lanczos: skeleton initial commit 2025-12-10 17:12:00 +01:00
62d9e18043 diagonalize3 for n smaller than dimension 2025-12-10 17:11:26 +01:00
6fe5dd8be6 FIXED a recently introduced bug in vec.h 2025-12-09 17:45:38 +01:00
1cb536421f davidson: added root targeting 2025-12-09 16:03:04 +01:00
3d284d544a diagonalize3 wrapper for dsteqr 2025-12-09 15:21:18 +01:00
c70e5c5f18 davidson for complex hermitean 2025-12-05 18:14:55 +01:00
651f614c91 get_git_version bash 2025-12-02 13:16:52 +01:00
b84b4571cb SMat row sums 2025-12-01 17:40:30 +01:00
3a176ddb13 support for Orca format in binary put() and get() 2025-11-28 14:20:05 +01:00
253a554e8e fourindex: added comment/enum 2025-11-27 14:56:22 +01:00
76d00b6b2b fourindex: by default ignore assignments to zero elements due to antisymmetry 2025-11-27 14:11:54 +01:00
831404d08d fourindex: adding set() for compatibility with templated use 2025-11-26 17:46:26 +01:00
b556b06442 twoside_transform for complex and for non-symetric case 2025-11-26 16:57:21 +01:00
3da3efa57c tensor: improved diagnostics 2025-11-20 21:00:33 +01:00
ad1c4ee968 tensor: linear_transform implemented 2025-11-20 18:17:34 +01:00
d136c2314d tensor: index name preservation in tucker 2025-11-20 16:41:57 +01:00
e867a7a8c9 tensor: generalized reinterpretation as matrix 2025-11-20 16:24:17 +01:00
a342032b58 simplified (inverse)Tucker for non-iverting index order 2025-11-20 16:00:53 +01:00
54642a71cc increased storage for precomputed simplicial() 2025-11-20 15:28:17 +01:00
18 changed files with 808 additions and 94 deletions

View File

@@ -1,5 +1,5 @@
lib_LTLIBRARIES = libla.la lib_LTLIBRARIES = libla.la
include_HEADERS = LA_version.h tensor.h miscfunc.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h reg.h regsurf.h include_HEADERS = LA_version.h tensor.h miscfunc.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h lanczos.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h reg.h regsurf.h
libla_la_SOURCES = simple.cc tensor.cc miscfunc.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc reg.cc regsurf.cc libla_la_SOURCES = simple.cc tensor.cc miscfunc.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc reg.cc regsurf.cc
nodist_libla_la_SOURCES = version.cc nodist_libla_la_SOURCES = version.cc
check_PROGRAMS = t test tX test_reg test_regsurf check_PROGRAMS = t test tX test_reg test_regsurf

View File

@@ -33,12 +33,13 @@ namespace LA {
//Note that for efficiency in a direct CI case the diagonalof() should cache its result //Note that for efficiency in a direct CI case the diagonalof() should cache its result
//@@@should work for complex hermitian-only too, but was not tested yet (maybe somwhere complex conjugation will have to be added) //works for complex hermitian-only too, but does not converge so well for more than 1 root
//@@@ for large krylov spaces >200 it can occur 'convergence problem in sygv/syev in diagonalize()' //@@@ for large krylov spaces >200 it can occur 'convergence problem in sygv/syev in diagonalize()'
//@@@options: left eigenvectors by matrix transpose, overridesymmetric (for nrmat) //@@@options: left eigenvectors by matrix transpose, overridesymmetric (for nrmat)
//@@@small matrix gdiagonalize - shift complex roots up (option to gdiagonalize?) //@@@small matrix gdiagonalize - shift complex roots up (option to gdiagonalize?)
//@@@test gdiagonalize whether it sorts the roots and what for complex ones //@@@test gdiagonalize whether it sorts the roots and what for complex ones
//@@@implement left eigenvectors for nonsymmetric case //@@@implement left eigenvectors for nonsymmetric case
//@@@implement start from larger Krylov space than a single vector - useful for multiroot solutions
//Davidson algorithm: J. Comp. Phys. 17:817 (1975) //Davidson algorithm: J. Comp. Phys. 17:817 (1975)
@@ -48,7 +49,7 @@ template <typename T, typename Matrix>
extern void davidson(const Matrix &bigmat, NRVec<T> &eivals, NRVec<T> *eivecs, const char *eivecsfile, extern void davidson(const Matrix &bigmat, NRVec<T> &eivals, NRVec<T> *eivecs, const char *eivecsfile,
int nroots=1, const bool verbose=0, const double eps=1e-6, int nroots=1, const bool verbose=0, const double eps=1e-6,
const bool incore=1, int maxit=100, const int maxkrylov = 500, const bool incore=1, int maxit=100, const int maxkrylov = 500,
void (*initguess)(NRVec<T> &)=NULL) void (*initguess)(NRVec<T> &)=NULL, const typename LA_traits<T>::normtype *target=NULL)
{ {
bool flag=0; bool flag=0;
int n=bigmat.nrows(); int n=bigmat.nrows();
@@ -155,15 +156,40 @@ else
for(int i=0; i<=krylovsize; ++i) {r[i]/=LA_traits<T>::realpart(beta[i]); ri[i]/=LA_traits<T>::realpart(beta[i]);} for(int i=0; i<=krylovsize; ++i) {r[i]/=LA_traits<T>::realpart(beta[i]); ri[i]/=LA_traits<T>::realpart(beta[i]);}
} }
if(target) //resort eigenpairs by distance from the target
{
NRVec<typename LA_traits<T>::normtype> key(krylovsize+1);
for(int i=0; i<=krylovsize; ++i) key[i] = abs(r[i] - *target);
NRPerm<int> p(krylovsize+1);
key.sort(0,p);
NRVec<typename LA_traits<T>::normtype> rp(maxkrylov);
NRMat<T> smallVp(maxkrylov,maxkrylov);
for(int i=0; i<=krylovsize; ++i)
{
rp[i]= r[p[i+1]-1];
for(int j=0; j<=krylovsize; ++j) smallVp(j,i) = smallV(j,p[i+1]-1);
}
r = rp;
smallV = smallVp;
}
typename LA_traits<T>::normtype eival_n=r[nroot]; typename LA_traits<T>::normtype eival_n=r[nroot];
oldnroot=nroot; oldnroot=nroot;
typename LA_traits<T>::normtype test=std::abs(smallV(krylovsize,nroot)); typename LA_traits<T>::normtype test=std::abs(smallV(krylovsize,nroot));
//std::cout <<"NROOT = "<<nroot<<" TEST = "<<test<<std::endl;
if(test>100.)
{
std::cout <<"Divergence in Davidson\n";
flag=1;
goto finished;
}
if(test<eps) nroot++; if(test<eps) nroot++;
if(it==0) nroot = 0; if(it==0) nroot = 0;
for(int iroot=0; iroot<=std::min(krylovsize,nroots-1); ++iroot) for(int iroot=0; iroot<=std::min(krylovsize,nroots-1); ++iroot)
{ {
test = std::abs(smallV(krylovsize,iroot)); test = std::abs(smallV(krylovsize,iroot));
if(test>eps) nroot=std::min(nroot,iroot); if(test>eps) nroot=std::min(nroot,iroot); //return to previous root
if(verbose && iroot<=std::max(oldnroot,nroot)) if(verbose && iroot<=std::max(oldnroot,nroot))
{ {
std::cout <<"Davidson: iter="<<it <<" dim="<<krylovsize<<" root="<<iroot<<" eigenvalue="<<r[iroot]<<"\n"; std::cout <<"Davidson: iter="<<it <<" dim="<<krylovsize<<" root="<<iroot<<" eigenvalue="<<r[iroot]<<"\n";
@@ -229,7 +255,8 @@ for(j=0; j<=krylovsize; ++j)
typename LA_traits<T>::normtype vnorm2; typename LA_traits<T>::normtype vnorm2;
if(!incore) s0->get(vec2,j); if(!incore) s0->get(vec2,j);
do { do {
T ab = vec1*(incore?v0[j]:vec2) /smallS(j,j); // For COMPLEX the order matters, the first one gets conjuagted, changed vec1 to the right!
T ab = (incore?v0[j]:vec2)*vec1 /smallS(j,j);
vec1.axpy(-ab,incore?v0[j]:vec2); vec1.axpy(-ab,incore?v0[j]:vec2);
vnorm2 = vec1.norm(); vnorm2 = vec1.norm();
if(vnorm2==0) if(vnorm2==0)

View File

@@ -133,7 +133,7 @@ typedef enum {
T2ijab_unitary=7, T2ijab_unitary=7,
T2IjAb_unitary=8, T2IjAb_unitary=8,
antisymtwoelectronrealdiracAB=9,rdm2AB=9,twoelectroncomplexmullikan_without_conjugation=9, antisymtwoelectronrealdiracAB=9,rdm2AB=9,twoelectroncomplexmullikan_without_conjugation=9,
twoelectronrealmullikanreducedsymAA=10, twoelectronrealmullikanreducedsymAA=10, rdm2SS=10,
twoelectronrealmullikanreducedsymAB=11, twoelectronrealmullikanreducedsymAB=11,
twoelectroncomplexmullikan=12, twoelectroncomplexmullikan=12,
} fourindexsymtype; //only permutation-nonequivalent elements are stored } fourindexsymtype; //only permutation-nonequivalent elements are stored
@@ -155,7 +155,7 @@ static const int fourindex_permutations[fourindex_n_symmetrytypes][max_fourindex
{{0,1,2,3,1},{2,3,0,1,-1}}, //T2IjAb_unitary (antihermitian exponent) {{0,1,2,3,1},{2,3,0,1,-1}}, //T2IjAb_unitary (antihermitian exponent)
{{0,1,2,3,1},{2,3,0,1,1}}, //antisymtwoelectronrealdiracAB {{0,1,2,3,1},{2,3,0,1,1}}, //antisymtwoelectronrealdiracAB
{{0,1,2,3,1},{2,3,0,1,1},{1,0,3,2,1},{3,2,1,0,1}}, //twoelectronrealmullikanreducedsymAA (e.g. unitary downfolded CC eff. integrals) {{0,1,2,3,1},{2,3,0,1,1},{1,0,3,2,1},{3,2,1,0,1}}, //twoelectronrealmullikanreducedsymAA (e.g. unitary downfolded CC eff. integrals)
{{0,1,2,3,1},{1,0,3,2,1}}, //twoelectronrealmullikanreducedsymAB (e.g. unitary downfolded CC eff. integrals) {{0,1,2,3,1},{1,0,3,2,1}}, //twoelectronrealmullikanreducedsymAB (e.g. unitary downfolded CC eff. integrals, or RDM2 spin-summed)
{{0,1,2,3,1},{2,3,0,1,1},{1,0,3,2,2},{3,2,1,0,2}}, //twoelectroncomplexmullikan ... 2 means complex conjugation {{0,1,2,3,1},{2,3,0,1,1},{1,0,3,2,2},{3,2,1,0,2}}, //twoelectroncomplexmullikan ... 2 means complex conjugation
}; };
@@ -1244,7 +1244,7 @@ if (!NRSMat<T>::v) laerror("access to unallocated fourindex_dense");
#endif #endif
return (*this).NRSMat<T>::operator() ((j-1)*nnbas+i-1,(b-1)*nnbas+a-1); return (*this).NRSMat<T>::operator() ((j-1)*nnbas+i-1,(b-1)*nnbas+a-1);
} }
inline void set(unsigned int i, unsigned int j, unsigned int k, unsigned int l, T elem) {this->operator()(i,j,k,l)=elem;};
void print(std::ostream &out) const void print(std::ostream &out) const
{ {
unsigned int i,j,a,b; unsigned int i,j,a,b;
@@ -1385,7 +1385,12 @@ if(i<j) elem = -elem;
if(k<l) elem = -elem; if(k<l) elem = -elem;
int I = ASMat_index_1(i,j); int I = ASMat_index_1(i,j);
int J = ASMat_index_1(k,l); int J = ASMat_index_1(k,l);
if (I<0 || J<0) laerror("assignment to nonexisting element"); if (I<0 || J<0)
#ifdef FORBID_NONEXISTENT_ASSIGNMENTS
laerror("assignment to nonexisting element");
#else
return;
#endif
#ifdef DEBUG #ifdef DEBUG
if (I>=NRSMat<T>::nn || J>=NRSMat<T>::nn) laerror("index out of range"); if (I>=NRSMat<T>::nn || J>=NRSMat<T>::nn) laerror("index out of range");
if (!NRSMat<T>::v) laerror("access to unallocated fourindex_dense"); if (!NRSMat<T>::v) laerror("access to unallocated fourindex_dense");
@@ -1401,7 +1406,12 @@ if(i<j) elem = -elem;
if(k<l) elem = -elem; if(k<l) elem = -elem;
int I = ASMat_index_1(i,j); int I = ASMat_index_1(i,j);
int J = ASMat_index_1(k,l); int J = ASMat_index_1(k,l);
if (I<0 || J<0) laerror("assignment to nonexisting element"); if (I<0 || J<0)
#ifdef FORBID_NONEXISTENT_ASSIGNMENTS
laerror("assignment to nonexisting element");
#else
return;
#endif
#ifdef DEBUG #ifdef DEBUG
if (I>=NRSMat<T>::nn || J>=NRSMat<T>::nn) laerror("index out of range"); if (I>=NRSMat<T>::nn || J>=NRSMat<T>::nn) laerror("index out of range");
if (!NRSMat<T>::v) laerror("access to unallocated fourindex_dense"); if (!NRSMat<T>::v) laerror("access to unallocated fourindex_dense");

View File

@@ -1,4 +1,4 @@
#!/bin/csh -f #!/bin/bash
cat <<! cat <<!
#include "LA_version.h" #include "LA_version.h"
const char LA::version[] = const char LA::version[] =

1
la.h
View File

@@ -28,6 +28,7 @@
#include "bitvector.h" #include "bitvector.h"
#include "conjgrad.h" #include "conjgrad.h"
#include "davidson.h" #include "davidson.h"
#include "lanczos.h"
#include "diis.h" #include "diis.h"
#include "fourindex.h" #include "fourindex.h"
#include "gmres.h" #include "gmres.h"

View File

@@ -392,10 +392,10 @@ static inline bool bigger(const X<C> &x, const X<C> &y) {return x>y;} \
static inline bool smaller(const X<C> &x, const X<C> &y) {return x<y;} \ static inline bool smaller(const X<C> &x, const X<C> &y) {return x<y;} \
static inline normtype norm (const X<C> &x) {return x.norm();} \ static inline normtype norm (const X<C> &x) {return x.norm();} \
static inline void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \ static inline void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \
static void put(int fd, const X<C> &x, bool dimensions=1, bool transp=0) {x.put(fd,dimensions,transp);} \ static void put(int fd, const X<C> &x, bool dimensions=1, bool transp=0, bool orcaformat=false) {x.put(fd,dimensions,transp,orcaformat);} \
static void get(int fd, X<C> &x, bool dimensions=1, bool transp=0) {x.get(fd,dimensions,transp);} \ static void get(int fd, X<C> &x, bool dimensions=1, bool transp=0, bool orcaformat=false) {x.get(fd,dimensions,transp,orcaformat);} \
static void multiput(size_t n,int fd, const X<C> *x, bool dimensions=1) {for(size_t i=0; i<n; ++i) x[i].put(fd,dimensions);} \ static void multiput(size_t n,int fd, const X<C> *x, bool dimensions=1, bool orcaformat=false) {for(size_t i=0; i<n; ++i) x[i].put(fd,dimensions,false,orcaformat);} \
static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=0; i<n; ++i) x[i].get(fd,dimensions);} \ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1, bool orcaformat=false) {for(size_t i=0; i<n; ++i) x[i].get(fd,dimensions,false,orcaformat);} \
static void copy(X<C> *dest, X<C> *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \ static void copy(X<C> *dest, X<C> *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
static void clear(X<C> *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\ static void clear(X<C> *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();}\
static void copyonwrite(X<C> &x) {x.copyonwrite();}\ static void copyonwrite(X<C> &x) {x.copyonwrite();}\
@@ -435,10 +435,10 @@ static inline bool bigger(const C &x, const C &y) {return x>y;} \
static inline bool smaller(const C &x, const C &y) {return x<y;} \ static inline bool smaller(const C &x, const C &y) {return x<y;} \
static inline normtype norm (const X<C> &x) {return x.norm();} \ static inline normtype norm (const X<C> &x) {return x.norm();} \
static inline void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \ static inline void axpy (X<C>&s, const X<C> &x, const C c) {s.axpy(c,x);} \
static void put(int fd, const X<C> &x, bool dimensions=1, bool transp=0) {x.put(fd,dimensions);} \ static void put(int fd, const X<C> &x, bool dimensions=1, bool transp=0, bool orcaformat=false) {x.put(fd,dimensions,false,orcaformat);} \
static void get(int fd, X<C> &x, bool dimensions=1, bool transp=0) {x.get(fd,dimensions);} \ static void get(int fd, X<C> &x, bool dimensions=1, bool transp=0, bool orcaformat=false) {x.get(fd,dimensions,false,orcaformat);} \
static void multiput(size_t n,int fd, const X<C> *x, bool dimensions=1) {for(size_t i=0; i<n; ++i) x[i].put(fd,dimensions);} \ static void multiput(size_t n,int fd, const X<C> *x, bool dimensions=1, bool orcaformat=false) {for(size_t i=0; i<n; ++i) x[i].put(fd,dimensions,false,orcaformat);} \
static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1) {for(size_t i=0; i<n; ++i) x[i].get(fd,dimensions);} \ static void multiget(size_t n,int fd, X<C> *x, bool dimensions=1, bool orcaformat=false) {for(size_t i=0; i<n; ++i) x[i].get(fd,dimensions,false,orcaformat);} \
static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \ static void copy(C *dest, C *src, size_t n) {for(size_t i=0; i<n; ++i) dest[i]=src[i];} \
static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \ static void clear(C *dest, size_t n) {for(size_t i=0; i<n; ++i) dest[i].clear();} \
static void copyonwrite(X<C> &x) {x.copyonwrite();} \ static void copyonwrite(X<C> &x) {x.copyonwrite();} \

212
lanczos.h Normal file
View File

@@ -0,0 +1,212 @@
/*
LA: linear algebra C++ interface library
Copyright (C) 2025 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _lanczos_h
#define _lanczos_h
#include "vec.h"
#include "smat.h"
#include "mat.h"
#include "sparsemat.h"
#include "nonclass.h"
#include "auxstorage.h"
//TODO:
//@@@implement restart when Krylov space is exceeded
//@@@implement inital guess of more than 1 vector (also in davidson) and block version of the methods
namespace LA {
//Lanczos diagonalization of hermitean matrix
//matrix can be any class which has nrows(), ncols(), diagonalof(), issymmetric(), and gemv() available
//does not even have to be explicitly stored - direct CI
//therefore the whole implementation must be a template in a header
template <typename T, typename Matrix>
extern void lanczos(const Matrix &bigmat, NRVec<T> &eivals, NRVec<T> *eivecs, const char *eivecsfile,
int nroots=1, const bool verbose=0, const double eps=1e-6,
const bool incore=1, int maxit=100, const int maxkrylov = 1000,
void (*initguess)(NRVec<T> &)=NULL, const typename LA_traits<T>::normtype *target=NULL)
{
if(!bigmat.issymmetric()) laerror("lanczos only for hermitean matrices");
bool flag=0;
int n=bigmat.nrows();
if ( n!= (int)bigmat.ncols()) laerror("non-square matrix in lanczos");
if(eivals.size()<nroots) laerror("too small eivals dimension in lanczos");
NRVec<T> vec1(n),vec2(n);
NRVec<T> *v0,*v1;
AuxStorage<T> *s0,*s1;
if(incore)
{
v0 = new NRVec<T>[maxkrylov];
}
else
{
s0 = new AuxStorage<T>;
}
int i,j;
if(nroots>=maxkrylov) nroots =maxkrylov-1;
int nroot=0;
int oldnroot;
//default guess based on lowest diagonal element of the matrix
if(initguess) initguess(vec1);
else
{
const T *diagonal = bigmat.diagonalof(vec2,false,true);
typename LA_traits<T>::normtype t=1e100; int i,j;
vec1=0;
for(i=0, j= -1; i<n; ++i) if(LA_traits<T>::realpart(diagonal[i])<t) {t=LA_traits<T>::realpart(diagonal[i]); j=i;}
vec1[j]=1;
}
NRVec<typename LA_traits<T>::normtype> alpha(maxkrylov),beta(maxkrylov-1);
//initial step
vec1.normalize();
if(incore) v0[0]=vec1; else s0->put(vec1,0);
bigmat.gemv(0,vec2,'n',1,vec1); //avoid bigmat.operator*(vec), since that needs to allocate another n-sized vector
{
T tmp=vec2*vec1;
alpha[0]= LA_traits<T>::realpart(tmp);
if(LA_traits<T>::imagpart(tmp)>1e-6) laerror("matrix probably not hermitian in lanczos");
}
vec2.axpy(-alpha[0],vec1);
NRVec<typename LA_traits<T>::normtype> r(1); r[0]=alpha[0];
NRVec<typename LA_traits<T>::normtype> rold;
NRMat<typename LA_traits<T>::normtype> smallV;
int krylovsize=1;
//iterative Lanczos
int it=0;
for(j=1; j<maxkrylov;++j)
{
++it;
if(it>maxit) {std::cout <<"too many interations in lanczos\n"; flag=1; goto finished;}
++krylovsize;
//extend the Krylov space (last w vector expected in vec2, last v vector in vec1)
beta[j-1] = vec2.norm();
if(beta[j-1] > 0)
{
vec2 /= beta[j-1];
}
else
{
laerror("zero norm in lanczos");
//could generate an arbitrary vector and orthonormalize it
}
if(incore) v0[j]=vec2; else s0->put(vec2,j);
vec1 *= -beta[j-1];
bigmat.gemv(1,vec1,'n',1,vec2);
{
T tmp=vec1*vec2;
alpha[j]= LA_traits<T>::realpart(tmp);
if(LA_traits<T>::imagpart(tmp)>1e-6) laerror("matrix probably not hermitian in lanczos");
}
vec1.axpy(-alpha[j],vec2);
vec1.swap(vec2); //move new w vector to vec2, new v vector to vec1
//diagonalize the tridiagonal
smallV.resize(j+1,j+1);
rold=r;
r=alpha.subvector(0,j);
NRVec<typename LA_traits<T>::normtype> rr=beta.subvector(0,j-1);
diagonalize3(r,rr,&smallV);
if(target) //resort eigenpairs by distance from the target
{
NRVec<typename LA_traits<T>::normtype> key(j+1);
for(int i=0; i<=j; ++i) key[i] = abs(r[i] - *target);
NRPerm<int> p(j+1);
key.sort(0,p);
NRVec<typename LA_traits<T>::normtype> rp(j+1);
NRMat<typename LA_traits<T>::normtype> smallVp(j+1,j+1);
for(int i=0; i<=j; ++i)
{
rp[i]= r[p[i+1]-1];
for(int k=0; k<=j; ++k) smallVp(k,i) = smallV(k,p[i+1]-1);
}
r = rp;
smallV = smallVp;
}
if(verbose)
{
for(int iroot=0; iroot<std::min(krylovsize,nroots); ++iroot)
{
std::cout <<"Lanczos: iter="<<it <<" dim="<<krylovsize<<" root="<<iroot<<" eigenvalue="<<r[iroot]<<"\n";
}
std::cout.flush();
}
//convergence test when we have enough roots even in rold
if(krylovsize>nroots)
{
bool conv=true;
for(int iroot=0; iroot<nroots; ++iroot)
if(abs(r[iroot]-rold[iroot])>eps) conv=false;
if(conv)
{
flag=0;
goto converged;
}
}
}
flag=1;
goto finished;
converged:
AuxStorage<typename LA_traits<T>::elementtype> *ev;
if(eivecsfile) ev = new AuxStorage<typename LA_traits<T>::elementtype>(eivecsfile);
if(verbose) {std::cout << "Lanczos converged in "<<it<<" iterations.\n"; std::cout.flush();}
for(nroot=0; nroot<nroots; ++nroot)
{
eivals[nroot]=r[nroot];
if(eivecs)
{
vec1=0;
for(j=0; j<krylovsize; ++j )
{
if(!incore) s0->get(vec2,j);
vec1.axpy(smallV(j,nroot),incore?v0[j]:vec2);
}
vec1.normalize();
if(eivecs) eivecs[nroot]|=vec1;
if(eivecsfile)
{
ev->put(vec1,nroot);
}
}
}
if(eivecsfile) delete ev;
finished:
if(incore) {delete[] v0;}
else {delete s0;}
if(flag) laerror("no convergence in lanczos");
}
}//namespace
#endif

19
mat.cc
View File

@@ -121,12 +121,12 @@ const NRVec<T> NRMat<T>::row(const int i, int l) const {
* @see NRVec<T>::put() * @see NRVec<T>::put()
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
void NRMat<T>::put(int fd, bool dim, bool transp) const { void NRMat<T>::put(int fd, bool dim, bool transp, bool orcaformat) const {
#ifdef CUDALA #ifdef CUDALA
if(location != cpu) { if(location != cpu) {
NRMat<T> tmp = *this; NRMat<T> tmp = *this;
tmp.moveto(cpu); tmp.moveto(cpu);
tmp.put(fd, dim, transp); tmp.put(fd, dim, transp, orcaformat);
return; return;
} }
#endif #endif
@@ -134,6 +134,11 @@ void NRMat<T>::put(int fd, bool dim, bool transp) const {
if(dim){ if(dim){
if(sizeof(int) != write(fd,&(transp?mm:nn),sizeof(int))) laerror("write failed"); if(sizeof(int) != write(fd,&(transp?mm:nn),sizeof(int))) laerror("write failed");
if(sizeof(int) != write(fd,&(transp?nn:mm),sizeof(int))) laerror("write failed"); if(sizeof(int) != write(fd,&(transp?nn:mm),sizeof(int))) laerror("write failed");
if(orcaformat)
{
int tmp=sizeof(T);
if(sizeof(int) != write(fd,&tmp,sizeof(int))) laerror("write failed");
}
} }
if(transp){ //not particularly efficient if(transp){ //not particularly efficient
@@ -167,12 +172,12 @@ void NRMat<T>::put(int fd, bool dim, bool transp) const {
* @see NRVec<T>::get(), copyonwrite() * @see NRVec<T>::get(), copyonwrite()
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
void NRMat<T>::get(int fd, bool dim, bool transp){ void NRMat<T>::get(int fd, bool dim, bool transp, bool orcaformat){
#ifdef CUDALA #ifdef CUDALA
if(location != cpu){ if(location != cpu){
NRMat<T> tmp; NRMat<T> tmp;
tmp.moveto(cpu); tmp.moveto(cpu);
tmp.get(fd, dim, transp); tmp.get(fd, dim, transp,orcaformat);
tmp.moveto(getlocation()); tmp.moveto(getlocation());
*this = tmp; *this = tmp;
return; return;
@@ -183,6 +188,12 @@ void NRMat<T>::get(int fd, bool dim, bool transp){
if(dim){ if(dim){
if(sizeof(int) != read(fd, &nn0, sizeof(int))) laerror("read failed"); if(sizeof(int) != read(fd, &nn0, sizeof(int))) laerror("read failed");
if(sizeof(int) != read(fd, &mm0, sizeof(int))) laerror("read failed"); if(sizeof(int) != read(fd, &mm0, sizeof(int))) laerror("read failed");
if(orcaformat)
{
int tmp;
if(sizeof(int) != read(fd, &tmp, sizeof(int))) laerror("read failed");
if(tmp!=sizeof(T)) laerror("mismatch in orca format");
}
if(transp) resize(mm0, nn0); else resize(nn0, mm0); if(transp) resize(mm0, nn0); else resize(nn0, mm0);
}else{ }else{
copyonwrite(); copyonwrite();

4
mat.h
View File

@@ -305,9 +305,9 @@ public:
inline size_t size() const; inline size_t size() const;
//! unformatted input //! unformatted input
void get(int fd, bool dimensions = 1, bool transposed = false); void get(int fd, bool dimensions = 1, bool transposed = false, bool orcaformat=false);
//! unformatted output //! unformatted output
void put(int fd, bool dimensions = 1, bool transposed = false) const; void put(int fd, bool dimensions = 1, bool transposed = false, bool orcaformat=false) const;
//! formatted output //! formatted output
void fprintf(FILE *f, const char *format, const int modulo) const; void fprintf(FILE *f, const char *format, const int modulo) const;
//! formatted input //! formatted input

View File

@@ -767,7 +767,7 @@ return(bern[n]);
// //
//enlarge storage size if needed //enlarge storage size if needed
// //
#define SIMPLICIAL_MAXD 8 #define SIMPLICIAL_MAXD 16
#define SIMPLICIAL_MAXN 1024 #define SIMPLICIAL_MAXN 1024
unsigned long long simplicial(int d, int n) unsigned long long simplicial(int d, int n)
@@ -783,7 +783,7 @@ if(d==1) return n;
//resize precomputed part as needed //resize precomputed part as needed
if(n>maxn) if(n>maxn)
{ {
if(n>=SIMPLICIAL_MAXN) laerror("storage overflow in simplicial"); if(n>=SIMPLICIAL_MAXN) laerror("storage overflow in simplicial, increase MAXN");
int newdim=2*n; int newdim=2*n;
if(newdim<2*maxn) newdim=2*maxn; if(newdim<2*maxn) newdim=2*maxn;
if(newdim>=SIMPLICIAL_MAXN) newdim=SIMPLICIAL_MAXN-1; if(newdim>=SIMPLICIAL_MAXN) newdim=SIMPLICIAL_MAXN-1;
@@ -797,7 +797,7 @@ if(n>maxn)
if(d>maxd) if(d>maxd)
{ {
if(d>=SIMPLICIAL_MAXD) laerror("storage overflow in simplicial"); if(d>=SIMPLICIAL_MAXD) laerror("storage overflow in simplicial, increase MAXD");
for(int dd=maxd+1; dd<=d; ++dd) for(int dd=maxd+1; dd<=d; ++dd)
{ {
stored[dd-2][0]=0; stored[dd-2][0]=0;
@@ -817,7 +817,7 @@ int inverse_simplicial(int d, unsigned long long s)
if(s==0) return 0; if(s==0) return 0;
if(d==0 || s==1) return 1; if(d==0 || s==1) return 1;
if(d==1) return (int)s; if(d==1) return (int)s;
if(d>=SIMPLICIAL_MAXD) laerror("storage overflow in inverse_simplicial"); if(d>=SIMPLICIAL_MAXD) laerror("storage overflow in inverse_simplicial - increase MAXD");
static int maxd=0; static int maxd=0;
static Polynomial<double> polynomials[SIMPLICIAL_MAXD]; static Polynomial<double> polynomials[SIMPLICIAL_MAXD];

View File

@@ -944,7 +944,31 @@ extern "C" void FORNAME(zggev)(const char *JOBVL, const char *JOBVR, const FINT
std::complex<double> *VL, const FINT *LDVL, std::complex<double> *VR, const FINT *LDVR, std::complex<double> *VL, const FINT *LDVL, std::complex<double> *VR, const FINT *LDVR,
std::complex<double> *WORK, const FINT *LWORK, double *RWORK, FINT *INFO ); std::complex<double> *WORK, const FINT *LWORK, double *RWORK, FINT *INFO );
//tridiagonal
extern "C" void FORNAME(dsteqr)(const char *compz, const FINT *n, double *d, double *d1, double *z, const FINT *ldz, double *work, FINT *info);
void diagonalize3(NRVec<double> &d, NRVec<double> &d1, NRMat<double> *v, const bool corder, int n0)
{
FINT n = n0? n0: d.size();
if(d1.size()<n-1) laerror("inconsistent dimensions in diagonalize3");
if(v) {if(n!=v->nrows()||n!=v->ncols()) laerror("inconsistent dimensions in diagonalize3");}
d.copyonwrite();
d1.copyonwrite();
if(v) v->copyonwrite();
FINT ldz=n;
FINT info;
int worksize=2*n-2;
if(worksize<1) worksize=1;
double *work = new double[worksize];
char job=v?'i':'n';
FORNAME(dsteqr)(&job,&n,&d[0],&d1[0],(v?&(*v)(0,0):NULL),&ldz,work,&info);
delete[] work;
if (v && corder) v->transposeme();
if (info < 0) laerror("illegal argument in dsteqr() fo diagonalize3");
if (info > 0) laerror("convergence problem in dsteqr() fo diagonalize3");
}
//statics for sorting //statics for sorting

View File

@@ -68,22 +68,43 @@ return positive_power(z,i);
template <class T> template <class T>
const NRSMat<T> twoside_transform(const NRSMat<T> &S, const NRMat<T> &C, bool transp=0) //calculate C^dagger S C const NRSMat<T> twoside_transform(const NRSMat<T> &S, const NRMat<T> &C, bool transp=0) //calculate C^dagger S C for symmetric/hermitian S
{ {
char tchar='t';
if(LA_traits<T>::is_complex()) tchar='c';
if(transp) if(transp)
{ {
NRMat<T> tmp = C * S; NRMat<T> tmp = C * S;
NRMat<T> result(C.nrows(),C.nrows()); NRMat<T> result(C.nrows(),C.nrows());
result.gemm((T)0,tmp,'n',C,'t',(T)1); result.gemm((T)0,tmp,'n',C,tchar,(T)1);
return NRSMat<T>(result); return NRSMat<T>(result);
} }
NRMat<T> tmp = S * C; NRMat<T> tmp = S * C;
NRMat<T> result(C.ncols(),C.ncols()); NRMat<T> result(C.ncols(),C.ncols());
result.gemm((T)0,C,'t',tmp,'n',(T)1); result.gemm((T)0,C,tchar,tmp,'n',(T)1);
return NRSMat<T>(result); return NRSMat<T>(result);
} }
template <class T>
const NRMat<T> twoside_transform(const NRMat<T> &S, const NRMat<T> &C, bool transp=0) //calculate C^dagger S C for non-symmetric S
{
char tchar='t';
if(LA_traits<T>::is_complex()) tchar='c';
if(transp)
{
NRMat<T> tmp = C * S;
NRMat<T> result(C.nrows(),C.nrows());
result.gemm((T)0,tmp,'n',C,tchar,(T)1);
return result;
}
NRMat<T> tmp = S * C;
NRMat<T> result(C.ncols(),C.ncols());
result.gemm((T)0,C,tchar,tmp,'n',(T)1);
return result;
}
template <class T> template <class T>
@@ -162,6 +183,9 @@ extern void gdiagonalize(NRMat<std::complex<double> > &a, NRVec<double> &wr, NRV
NRMat<std::complex<double> > *vl=NULL, NRMat<std::complex<double> > *vr=NULL, const bool corder=1, int n=0, const int sorttype=0, const int biorthonormalize=0, NRMat<std::complex<double> > *vl=NULL, NRMat<std::complex<double> > *vr=NULL, const bool corder=1, int n=0, const int sorttype=0, const int biorthonormalize=0,
NRMat<std::complex<double> > *b=NULL, NRVec<std::complex<double> > *beta=NULL); NRMat<std::complex<double> > *b=NULL, NRVec<std::complex<double> > *beta=NULL);
//diagonalization of symmetric real tridiagonal matrix
extern void diagonalize3(NRVec<double> &d, NRVec<double> &d1, NRMat<double> *v, const bool corder=1, int n0=0);
//complex,real,imaginary parts of various entities //complex,real,imaginary parts of various entities
template<typename T> template<typename T>
extern const typename LA_traits<T>::realtype realpart(const T&); extern const typename LA_traits<T>::realtype realpart(const T&);

19
smat.cc
View File

@@ -41,12 +41,12 @@ namespace LA {
* @see NRMat<T>::get(), NRSMat<T>::copyonwrite() * @see NRMat<T>::get(), NRSMat<T>::copyonwrite()
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::put(int fd, bool dim, bool transp) const { void NRSMat<T>::put(int fd, bool dim, bool transp, bool orcaformat) const {
#ifdef CUDALA #ifdef CUDALA
if(location != cpu){ if(location != cpu){
NRSMat<T> tmp= *this; NRSMat<T> tmp= *this;
tmp.moveto(cpu); tmp.moveto(cpu);
tmp.put(fd,dim,transp); tmp.put(fd,dim,transp,orcaformat);
return; return;
} }
#endif #endif
@@ -54,6 +54,11 @@ void NRSMat<T>::put(int fd, bool dim, bool transp) const {
if(dim){ if(dim){
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write");
if(orcaformat)
{
int tmp=sizeof(T);
if(sizeof(int) != write(fd,&tmp,sizeof(int))) laerror("cannot write");
}
} }
LA_traits<T>::multiput((size_t)nn*(nn+1)/2,fd,v,dim); LA_traits<T>::multiput((size_t)nn*(nn+1)/2,fd,v,dim);
} }
@@ -66,12 +71,12 @@ void NRSMat<T>::put(int fd, bool dim, bool transp) const {
* @see NRSMat<T>::put(), NRSMat<T>::copyonwrite() * @see NRSMat<T>::put(), NRSMat<T>::copyonwrite()
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
void NRSMat<T>::get(int fd, bool dim, bool transp) { void NRSMat<T>::get(int fd, bool dim, bool transp, bool orcaformat) {
#ifdef CUDALA #ifdef CUDALA
if(location != cpu){ if(location != cpu){
NRSMat<T> tmp; NRSMat<T> tmp;
tmp.moveto(cpu); tmp.moveto(cpu);
tmp.get(fd,dim,transp); tmp.get(fd,dim,transp,orcaformat);
tmp.moveto(location); tmp.moveto(location);
*this = tmp; *this = tmp;
return; return;
@@ -82,6 +87,12 @@ void NRSMat<T>::get(int fd, bool dim, bool transp) {
errno = 0; errno = 0;
if(dim){ if(dim){
if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read"); if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read");
if(orcaformat)
{
int tmp;
if(sizeof(int) != read(fd,&tmp,sizeof(int))) laerror("cannot read");
if(tmp!=sizeof(T)) laerror("mismatch in orca format");
}
resize(nn0[0]); resize(nn0[0]);
}else{ }else{
copyonwrite(); copyonwrite();

17
smat.h
View File

@@ -181,10 +181,23 @@ public:
return s; return s;
} }
const NRVec<T> rsum() const
{
NRVec<T> r(nn);
for(int i=0; i<nn;++i)
{
r[i]=(T)0;
for(int j=0; j<nn; ++j) r[i] += (*this)(i,j);
}
return r;
}
const NRVec<T> csum() const {return rsum();};
const T trace() const; const T trace() const;
void get(int fd, bool dimensions = 1, bool transp = 0); void get(int fd, bool dimensions = 1, bool transp = 0, bool orcaformat=false);
void put(int fd, bool dimensions = 1, bool transp = 0) const; void put(int fd, bool dimensions = 1, bool transp = 0, bool orcaformat=false) const;
void copyonwrite(bool detachonly=false, bool deep=true); void copyonwrite(bool detachonly=false, bool deep=true);

202
t.cc
View File

@@ -1137,7 +1137,7 @@ if(0)
{ {
int n,m; int n,m;
cin>>n >>m; cin>>n >>m;
NRSMat<double> a(n,n); NRSMat<double> a(n);
NRVec<double> rr(n); NRVec<double> rr(n);
for(int i=0;i<n;++i) for(int j=0;j<=i;++j) for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
@@ -3733,6 +3733,7 @@ INDEXGROUP shape;
{ {
shape.number=r; shape.number=r;
shape.symmetry= 1; shape.symmetry= 1;
//shape.symmetry= -1;
shape.range=n; shape.range=n;
shape.offset=0; shape.offset=0;
} }
@@ -4392,7 +4393,7 @@ cout <<"Error = "<<(z-zzz).norm()<<endl;
} }
if(1) if(0)
{ {
//for profiling and timing //for profiling and timing
int nn; int nn;
@@ -4423,5 +4424,202 @@ cout <<z.norm()<<endl;
} }
if(0)
{
//tucker test order
int r,n;
bool inv;
cin>>r>>n>>inv;
NRVec<INDEXGROUP> shape(r);
for(int i=0; i<r; ++i)
{
shape[i].number=1;
shape[i].symmetry=0;
shape[i].range=n+i;
shape[i].offset=0;
}
Tensor<double> x(shape);
x.randomize(1.);
x.defaultnames();
cout<<x;
Tensor<double> x0(x);
x0.copyonwrite();
NRVec<NRMat<double> > dec=x.Tucker(1e-12,inv);
cout<<"Tucker\n"<<x<<endl;
cout<<dec;
Tensor<double> y = x.inverseTucker(dec,inv);
cout <<"invTucker\n"<<y;
cout <<"Error = "<<(x0-y).norm()<<endl;
if(inv==false)
{
Tensor<double> z = x.linear_transform(dec);
cout <<"Error2 = "<<(x0-z).norm()<<endl;
}
}
if(0)
{
//test linear transform
int r=3;
int n,sym;
cin>>n>>sym;
NRVec<INDEXGROUP> shape(2);
{
shape[0].number=r;
shape[0].symmetry=sym;
shape[0].range=n;
shape[0].offset=0;
shape[1].number=1;
shape[1].symmetry=0;
shape[1].range=n;
shape[1].offset=0;
}
Tensor<double> x(shape);
x.randomize(1.);
NRVec<NRMat<double> > t(2);
t[0].resize(n+2,n); t[0].randomize(1.);
t[1].resize(n+2,n); t[1].randomize(1.);
Tensor<double> xt=x.linear_transform(t);
shape.copyonwrite();
shape[0].range=n+2;
shape[1].range=n+2;
Tensor<double> y(shape);
//this is the most stupid way to compute the transform ;-)
for(int i=0; i<n+2; ++i) for(int j=0; j<n+2; ++j) for(int k=0; k<n+2; ++k) for(int l=0; l<n+2; ++l)
{
y.lhs(i,j,k,l)=0;
for(int ii=0; ii<n; ++ii) for(int jj=0; jj<n; ++jj) for(int kk=0; kk<n; ++kk) for(int ll=0; ll<n; ++ll)
y.lhs(i,j,k,l) += x(ii,jj,kk,ll) * t[0](i,ii) * t[0](j,jj) * t[0](k,kk) * t[1](l,ll) ;
}
cout <<"Error = "<<(xt-y).norm()<<endl;
}
if(0)
{
int n,m;
bool which;
cin>>n >>m >>which;
NRSMat<double> a(n);
NRVec<double> rr(n);
for(int i=0;i<n;++i) for(int j=0;j<=i;++j)
{
a(i,j)= RANDDOUBLE();
if(i==j) a(i,i)+= .5*(i-n*.5);
}
NRSMat<double> aa;
NRMat<double> vv(n,n);
aa=a; diagonalize(aa,rr,&vv);
NRVec<double> r(m);
NRVec<double> *eivecs = new NRVec<double>[m];
cout <<"Exact energies " <<rr<<endl;
if(which) lanczos(a,r,eivecs,NULL,m,1,1e-6,1,200,300);
else davidson(a,r,eivecs,NULL,m,1,1e-6,1,200,300);
cout <<"Iterative energies " <<r;
cout <<"Eigenvectors compare:\n";
for(int i=0; i<m; ++i)
{
cout <<eivecs[i];
for(int j=0; j<n;++j) cout <<vv[j][i]<<" ";
cout <<endl;
}
}
if(0)
{
int n,m;
bool which;
cin>>n >>m>>which ;
NRSMat<complex<double> > a(n);
a.randomize(.1);
for(int i=0;i<n;++i)
{
a(i,i)= RANDDOUBLE() + .5*(i-n);
}
//cout <<"test matrix = "<<a;
NRVec<double> rr(n);
NRSMat<complex<double> > aa;
NRMat<complex<double> > vv(n,n);
aa=a; diagonalize(aa,rr,&vv);
cout <<"Exact energies "<<rr;
NRVec<complex<double> > r(m);
NRVec<complex<double> > *eivecs = new NRVec<complex<double> >[m];
if(which) lanczos(a,r,eivecs,NULL,m,true,1e-5,true,10*n,n);
else davidson(a,r,eivecs,NULL,m,true,1e-5,true,10*n,n);
cout <<"Davidson/Lanczos energies " <<r;
cout <<"Exact energies "<<rr;
cout <<"Eigenvectors compare:\n";
for(int i=0; i<m; ++i)
{
cout <<eivecs[i];
for(int j=0; j<n;++j) cout <<vv[j][i]<<" ";
cout <<endl;
}
}
if(1)
{
int r,n,sym;
cin>>r>>n>>sym;
NRVec<INDEXGROUP> shape(3);
shape[0].number=2;
shape[0].symmetry=0;
shape[0].range=n+1;
shape[0].offset=0;
shape[1].number=r;
shape[1].symmetry= sym;
shape[1].range=n;
shape[1].offset=0;
shape[2].number=2;
shape[2].symmetry=0;
shape[2].range=n+2;
shape[2].offset=0;
Tensor<double> x(shape); x.randomize(1.);
x.defaultnames();
cout <<"x= "<<x.shape << " "<<x.names<<endl;
NRVec<INDEXGROUP> yshape(2);
yshape[0].number=1;
yshape[0].symmetry=0;
yshape[0].range=n;
yshape[0].offset=0;
yshape[1].number=1;
yshape[1].symmetry= 0;
yshape[1].range=n+3;
yshape[1].offset=0;
Tensor<double> y(yshape); y.randomize(1.);
INDEX posit(1,0);
//Tensor<double> z=x.unwind_index(1,1);
//Tensor<double> z=x.contraction(INDEX(1,1),y,INDEX(0,0),1,false,false);
Tensor<double> z=x.linear_transform(1,y);
cout <<z.shape;
//check
}
}//main }//main

164
tensor.cc
View File

@@ -713,6 +713,9 @@ return q;
template<typename T> template<typename T>
Tensor<T> Tensor<T>::permute_index_groups(const NRPerm<int> &p) const Tensor<T> Tensor<T>::permute_index_groups(const NRPerm<int> &p) const
{ {
if(p.size()!=shape.size()) laerror("permutation size mismatch in permute_index_groups");
if(p.is_identity()) return *this;
//std::cout <<"permute_index_groups permutation = "<<p<<std::endl; //std::cout <<"permute_index_groups permutation = "<<p<<std::endl;
NRVec<INDEXGROUP> newshape=shape.permuted(p,true); NRVec<INDEXGROUP> newshape=shape.permuted(p,true);
//std::cout <<"permute_index_groups newshape = "<<newshape<<std::endl; //std::cout <<"permute_index_groups newshape = "<<newshape<<std::endl;
@@ -836,6 +839,7 @@ if(group==0 && index==0 && shape[0].symmetry==0) //formal split of 1 index witho
} }
if(shape[group].number==1) return unwind_index_group(group); //single index in the group if(shape[group].number==1) return unwind_index_group(group); //single index in the group
//general case - recalculate the shape and allocate the new tensor //general case - recalculate the shape and allocate the new tensor
NRVec<INDEXGROUP> newshape(shape.size()+1); NRVec<INDEXGROUP> newshape(shape.size()+1);
newshape[0].number=1; newshape[0].number=1;
@@ -856,7 +860,10 @@ for(int i=0; i<shape.size(); ++i)
--newshape[i+1].number; --newshape[i+1].number;
flatindex += index; flatindex += index;
} }
else flatindex += shape[i].number; else
{
if(i<group) flatindex += shape[i].number;
}
} }
@@ -1722,15 +1729,17 @@ if(r==1) //create an analogous output for the trivial case
} }
//loop over all indices; relies on the fact that unwinding does not change order of remaining indices //loop over all indices; relies on the fact that unwinding does not change order of remaining indices
//for inverseorder=false, to avoid inverting order by permute_index_groups we repeatedly unwind the LAST index, and all indices rotate at this position with the first one in the last iteration due to the unwind!
NRVec<INDEXNAME> names_saved = names;
for(int i=0; i<r; ++i) for(int i=0; i<r; ++i)
{ {
INDEX I=indexposition(i,shape); INDEX I= inverseorder? indexposition(i,shape) : indexposition(r-1,shape);
NRMat<T> um; NRMat<T> um;
NRVec<INDEXGROUP> ushape; NRVec<INDEXGROUP> ushape;
{ {
Tensor<T> uu=unwind_index(I); Tensor<T> uu=unwind_index(I);
ushape=uu.shape; //ushape.copyonwrite(); should not be needed ushape=uu.shape;
um=uu.matrix(); um=uu.matrix(1);
} }
int mini=um.nrows(); if(um.ncols()<mini) mini=um.ncols(); //compact SVD, expect descendingly sorted values int mini=um.nrows(); if(um.ncols()<mini) mini=um.ncols(); //compact SVD, expect descendingly sorted values
NRMat<T> u(um.nrows(),mini),vt(mini,um.ncols()); NRMat<T> u(um.nrows(),mini),vt(mini,um.ncols());
@@ -1755,28 +1764,30 @@ for(int i=0; i<r; ++i)
umnew=u.submatrix(0,umnr-1,0,preserve-1); umnew=u.submatrix(0,umnr-1,0,preserve-1);
} }
else umnew=u; else umnew=u;
ret[(inverseorder? r-i-1 : i)]=vt.transpose(true); ret[r-i-1]=vt.transpose(true);
umnew.diagmultr(w); umnew.diagmultr(w);
//rebuild tensor of the preserved shape from matrix //rebuild tensor of the preserved shape from matrix
ushape.copyonwrite();
ushape[0].range=preserve; ushape[0].range=preserve;
{ {
NRVec<T> newdata(umnew); NRVec<T> newdata(umnew);
umnew.resize(0,0);//deallocate umnew.resize(0,0);//deallocate
*this = Tensor(ushape,newdata); *this = Tensor(ushape,newdata);
names=names_saved;
} }
} }
if(!is_flat()) laerror("this should not happen"); if(!is_flat()) laerror("this should not happen");
if(!inverseorder) if(inverseorder)
{ {
NRPerm<int> p(r); NRPerm<int> p(rank()); p.identity();
for(int i=1; i<=r; ++i) p[r-i+1]=i; names=names_saved.permuted(p.reverse());
*this = permute_index_groups(p);
} }
else
names=names_saved;
return ret; return ret;
} }
template<typename T> template<typename T>
Tensor<T> Tensor<T>::inverseTucker(const NRVec<NRMat<T> > &x, bool inverseorder) const Tensor<T> Tensor<T>::inverseTucker(const NRVec<NRMat<T> > &x, bool inverseorder) const
{ {
@@ -1784,9 +1795,11 @@ if(rank()!=x.size()) laerror("input of inverseTucker does not match rank");
Tensor<T> tmp(*this); Tensor<T> tmp(*this);
Tensor<T> r; Tensor<T> r;
if(!is_flat()) laerror("inverseTucker only for flat tensors as produced by Tucker"); if(!is_flat()) laerror("inverseTucker only for flat tensors as produced by Tucker");
if(inverseorder)
{
for(int i=0; i<rank(); ++i) for(int i=0; i<rank(); ++i)
{ {
Tensor<T> mat(x[i],true); Tensor<T> mat(x[i],true); //flat tensor from a matrix
r= tmp.contraction(i,0,mat,0,0,(T)1,false,false); r= tmp.contraction(i,0,mat,0,0,(T)1,false,false);
if(i<rank()-1) if(i<rank()-1)
{ {
@@ -1794,17 +1807,134 @@ for(int i=0; i<rank(); ++i)
r.deallocate(); r.deallocate();
} }
} }
if(!inverseorder) }
else //not inverseroder
{
for(int i=rank()-1; i>=0; --i)
{ {
NRPerm<int> p(r.rank()); Tensor<T> mat(x[i],true); //flat tensor from a matrix
for(int i=1; i<=r.rank(); ++i) p[r.rank()-i+1]=i; r= tmp.contraction(rank()-1,0,mat,0,0,(T)1,false,false); //the current index will be the last after previous contractions
return r.permute_index_groups(p); if(i>0)
{
tmp=r;
r.deallocate();
}
}
}
if(inverseorder)
{
NRPerm<int> p(rank()); p.identity();
r.names=names.permuted(p.reverse());
} }
else else
return r; r.names=names;
return r;
} }
template<typename T>
Tensor<T> Tensor<T>::linear_transform(const NRVec<NRMat<T> > &x) const
{
if(x.size()!=shape.size()) laerror("wrong number of transformation matrices in linear_transform");
for(int i=0; i<shape.size(); ++i) if(x[i].ncols()!=shape[i].range) laerror("mismatch of transformation matrix size in linear_transform");
Tensor<T> tmp(*this);
Tensor<T> r;
//do the groups in reverse order to preserve index ordering
for(int g=shape.size()-1; g>=0; --g)
{
Tensor<T> mat(x[g],true); //flat tensor from a matrix
for(int i=shape[g].number-1; i>=0; --i) //indices in the group in reverse order
{
#ifdef LA_TENSOR_INDEXPOSITION
//what should we do with index position?
//either set upperindex of mat appropriately, or request ignoring that in contractions?
#endif
r= tmp.contraction(indexposition(rank()-1,tmp.shape),mat,INDEX(0,0),(T)1,false,false); //always the last index
if(i>0)
{
tmp=r;
r.deallocate();
}
}
//the group's indices are now individual ones leftmost in tensor r, restore group size and symmetry
if(shape[g].number>1)
{
INDEXLIST il(shape[g].number);
for(int i=0; i<shape[g].number; ++i)
{
il[i].group=i;
il[i].index=0;
}
r = r.merge_indices(il,shape[g].symmetry);
}
if(g>0)
{
tmp=r;
r.deallocate();
}
}
r.names=names;
return r;
}
template<typename T>
Tensor<T> Tensor<T>::linear_transform(const int g, const Tensor<T> &x) const
{
if(g<0||g>=shape.size()) laerror("wrong index group number in linear_transform");
if(x.rank()!=2 || x.shape.size()!=2 || x.shape[0].number!=1||x.shape[1].number!=1) laerror("wrong tensor shape for linear_transform");
if(x.shape[0].range!=shape[g].range) laerror("index range mismatch in linear_transform");
Tensor<T> tmp(*this);
Tensor<T> r;
//contract all indices in the reverse order
int gnow=g;
for(int i=shape[g].number-1; i>=0; --i) //indices in the group in reverse order
{
r= tmp.contraction(INDEX(gnow,i),x,INDEX(0,0),(T)1,false,false);
++gnow; //new group number in r, one index was added left
if(i>0)
{
tmp=r;
r.deallocate();
}
}
//the group's indices are now individual ones leftmost in tensor r, restore group size and symmetry
if(shape[g].number>1)
{
INDEXLIST il(shape[g].number);
for(int i=0; i<shape[g].number; ++i)
{
il[i].group=i;
il[i].index=0;
}
r = r.merge_indices(il,shape[g].symmetry);
}
//permute the group (now 0) to its original position
if(g!=0)
{
NRPerm<int> p(r.shape.size());
for(int i=0; i<g; ++i) p[i+1] = (i+1)+1;
p[g+1]=1;
for(int i=g+1; i<r.shape.size();++i) p[i+1] = i+1;
//std::cout<<"perm "<<p;
r=r.permute_index_groups(p);
}
//preserve names
r.names=names;
return r;
}
template<typename T> template<typename T>
int Tensor<T>::findflatindex(const INDEXNAME nam) const int Tensor<T>::findflatindex(const INDEXNAME nam) const
{ {
@@ -2116,7 +2246,7 @@ NRVec<NRVec_from1<int> > antigroups;
parse_antisymmetrizer(antisymmetrizer,antigroups,antinames); parse_antisymmetrizer(antisymmetrizer,antigroups,antinames);
//check the names make sense and fill in the possibly missing ones as separate group //check the names make sense and fill in the possibly missing ones as separate group
if(antinames.size()>tmpnames.size()) laerror("too many indices in the antisymmetrizet"); if(antinames.size()>tmpnames.size()) laerror("too many indices in the antisymmetrizer");
bitvector isexplicit(tmpnames.size()); bitvector isexplicit(tmpnames.size());
isexplicit.clear(); isexplicit.clear();
for(int i=0; i<antinames.size(); ++i) for(int i=0; i<antinames.size(); ++i)

View File

@@ -83,6 +83,21 @@ if(LA_traits<T>::is_complex()) //condition known at compile time
} }
static bool didwarn=false;
static inline bool ptr_ok(void *ptr)
{
#ifdef DEBUG
if(ptr==NULL && !didwarn)
{
std::cout<<"Warning: write to zero element of antisym tensor ignored\n";
std::cerr<<"Warning: write to zero element of antisym tensor ignored\n";
didwarn=true;
}
#endif
return (bool)ptr;
}
template<typename T> template<typename T>
class Signedpointer class Signedpointer
{ {
@@ -91,11 +106,11 @@ int sgn;
public: public:
Signedpointer(T *p, int s) : ptr(p),sgn(s) {}; Signedpointer(T *p, int s) : ptr(p),sgn(s) {};
//dereferencing *ptr should be ignored for sgn==0 //dereferencing *ptr should be ignored for sgn==0
const T operator=(const T rhs) {*ptr = signeddata(sgn,rhs); return rhs;} const T operator=(const T rhs) {if(ptr_ok(ptr)) *ptr = signeddata(sgn,rhs); return rhs;}
void operator*=(const T rhs) {*ptr *= rhs;} void operator*=(const T rhs) {if(ptr_ok(ptr)) *ptr *= rhs;}
void operator/=(const T rhs) {*ptr /= rhs;} void operator/=(const T rhs) {if(ptr_ok(ptr)) *ptr /= rhs;}
void operator+=(T rhs) {*ptr += signeddata(sgn,rhs);} void operator+=(T rhs) {if(ptr_ok(ptr)) *ptr += signeddata(sgn,rhs);}
void operator-=(T rhs) {*ptr -= signeddata(sgn,rhs);} void operator-=(T rhs) {if(ptr_ok(ptr)) *ptr -= signeddata(sgn,rhs);}
}; };
@@ -182,6 +197,8 @@ struct INDEX
int group; int group;
int index; int index;
bool operator==(const INDEX &rhs) const {return group==rhs.group && index==rhs.index;}; bool operator==(const INDEX &rhs) const {return group==rhs.group && index==rhs.index;};
INDEX() {};
INDEX(int g, int i): group(g), index(i) {};
}; };
typedef NRVec<INDEX> INDEXLIST; //collection of several indices typedef NRVec<INDEX> INDEXLIST; //collection of several indices
@@ -223,13 +240,16 @@ public:
int myrank; int myrank;
NRVec<LA_largeindex> groupsizes; //group sizes of symmetry index groups (a function of shape but precomputed for efficiency) NRVec<LA_largeindex> groupsizes; //group sizes of symmetry index groups (a function of shape but precomputed for efficiency)
NRVec<LA_largeindex> cumsizes; //cumulative sizes of symmetry index groups (a function of shape but precomputed for efficiency); always cumsizes[0]=1, index group 0 is the innermost-loop one NRVec<LA_largeindex> cumsizes; //cumulative sizes of symmetry index groups (a function of shape but precomputed for efficiency); always cumsizes[0]=1, index group 0 is the innermost-loop one
public:
NRVec<INDEXNAME> names; NRVec<INDEXNAME> names;
//indexing
LA_largeindex index(int *sign, const SUPERINDEX &I) const; //map the tensor indices to the position in data LA_largeindex index(int *sign, const SUPERINDEX &I) const; //map the tensor indices to the position in data
LA_largeindex index(int *sign, const FLATINDEX &I) const; //map the tensor indices to the position in data LA_largeindex index(int *sign, const FLATINDEX &I) const; //map the tensor indices to the position in data
LA_largeindex vindex(int *sign, LA_index i1, va_list args) const; //map list of indices to the position in data LA_largeindex vindex(int *sign, LA_index i1, va_list args) const; //map list of indices to the position in data
SUPERINDEX inverse_index(LA_largeindex s) const; //inefficient, but possible if needed SUPERINDEX inverse_index(LA_largeindex s) const; //inefficient, but possible if needed
int myflatposition(int group, int index) const {return flatposition(group,index,shape);};
int myflatposition(const INDEX &i) const {return flatposition(i,shape);};
INDEX myindexposition(int flatindex) const {return indexposition(flatindex,shape);};
//constructors //constructors
Tensor() : myrank(-1) {}; Tensor() : myrank(-1) {};
@@ -248,7 +268,12 @@ public:
explicit Tensor(const NRVec<T> &x); explicit Tensor(const NRVec<T> &x);
explicit Tensor(const NRMat<T> &x, bool flat=false); explicit Tensor(const NRMat<T> &x, bool flat=false);
explicit Tensor(const NRSMat<T> &x); explicit Tensor(const NRSMat<T> &x);
NRMat<T> matrix() const {return NRMat<T>(data,data.size()/groupsizes[0],groupsizes[0],0);}; //reinterpret as matrix with column index being the tensor's leftmost index group (typically the unwound single index) NRMat<T> matrix(int n=1) const //reinterpret as matrix with column index being the tensor's n leftmost index groups (typically the unwound single index)
{
if(n<0||n>shape.size()) laerror("wrong number n in tensor:: matrix()");
LA_largeindex ncol = (n==shape.size()) ? data.size() : cumsizes[n];
return NRMat<T>(data,data.size()/ncol,ncol,0);
};
bool is_named() const {if(names.size()==0) return false; if(names.size()!=myrank) laerror("bad number of index names"); return true;}; bool is_named() const {if(names.size()==0) return false; if(names.size()!=myrank) laerror("bad number of index names"); return true;};
bool is_uniquely_named() const; //no repeated names bool is_uniquely_named() const; //no repeated names
@@ -268,22 +293,16 @@ public:
void deallocate() {data.resize(0); shape.resize(0); groupsizes.resize(0); cumsizes.resize(0); names.resize(0);}; void deallocate() {data.resize(0); shape.resize(0); groupsizes.resize(0); cumsizes.resize(0); names.resize(0);};
inline Signedpointer<T> lhs(const SUPERINDEX &I) {int sign; LA_largeindex i=index(&sign,I); inline Signedpointer<T> lhs(const SUPERINDEX &I) {int sign; LA_largeindex i=index(&sign,I);
#ifdef DEBUG if(sign==0) return Signedpointer<T>(NULL,sign);
if(sign==0) laerror("l-value pointer to nonexistent tensor element"); else return Signedpointer<T>(&data[i],sign);};
#endif
return Signedpointer<T>(&data[i],sign);};
inline T operator()(const SUPERINDEX &I) const {int sign; LA_largeindex i=index(&sign,I); if(sign==0) return 0; else return signeddata(sign,data[i]);}; inline T operator()(const SUPERINDEX &I) const {int sign; LA_largeindex i=index(&sign,I); if(sign==0) return 0; else return signeddata(sign,data[i]);};
inline Signedpointer<T> lhs(const FLATINDEX &I) {int sign; LA_largeindex i=index(&sign,I); inline Signedpointer<T> lhs(const FLATINDEX &I) {int sign; LA_largeindex i=index(&sign,I);
#ifdef DEBUG if(sign==0) return Signedpointer<T>(NULL,sign);
if(sign==0) laerror("l-value pointer to nonexistent tensor element"); else return Signedpointer<T>(&data[i],sign);};
#endif
return Signedpointer<T>(&data[i],sign);};
inline T operator()(const FLATINDEX &I) const {int sign; LA_largeindex i=index(&sign,I); if(sign==0) return 0; else return signeddata(sign,data[i]);}; inline T operator()(const FLATINDEX &I) const {int sign; LA_largeindex i=index(&sign,I); if(sign==0) return 0; else return signeddata(sign,data[i]);};
inline Signedpointer<T> lhs(LA_index i1...) {va_list args; int sign; LA_largeindex i; va_start(args,i1); i= vindex(&sign, i1,args); inline Signedpointer<T> lhs(LA_index i1...) {va_list args; int sign; LA_largeindex i; va_start(args,i1); i= vindex(&sign, i1,args);
#ifdef DEBUG if(sign==0) return Signedpointer<T>(NULL,sign);
if(sign==0) laerror("l-value pointer to nonexistent tensor element"); else return Signedpointer<T>(&data[i],sign); };
#endif
return Signedpointer<T>(&data[i],sign); };
inline T operator()(LA_index i1...) const {va_list args; ; int sign; LA_largeindex i; va_start(args,i1); i= vindex(&sign, i1,args); if(sign==0) return 0; else return signeddata(sign,data[i]);}; inline T operator()(LA_index i1...) const {va_list args; ; int sign; LA_largeindex i; va_start(args,i1); i= vindex(&sign, i1,args); if(sign==0) return 0; else return signeddata(sign,data[i]);};
inline Tensor& operator=(const Tensor &rhs) {myrank=rhs.myrank; shape=rhs.shape; groupsizes=rhs.groupsizes; cumsizes=rhs.cumsizes; data=rhs.data; names=rhs.names; return *this;}; inline Tensor& operator=(const Tensor &rhs) {myrank=rhs.myrank; shape=rhs.shape; groupsizes=rhs.groupsizes; cumsizes=rhs.cumsizes; data=rhs.data; names=rhs.names; return *this;};
@@ -399,8 +418,11 @@ public:
Tensor merge_indices(const INDEXLIST &il, int symmetry=0) const; //opposite to flatten (merging with optional symmetrization/antisymmetrization and compression) Tensor merge_indices(const INDEXLIST &il, int symmetry=0) const; //opposite to flatten (merging with optional symmetrization/antisymmetrization and compression)
Tensor merge_indices(const NRVec<INDEXNAME> &nl, int symmetry=0) const {return merge_indices(findindexlist(nl),symmetry);}; Tensor merge_indices(const NRVec<INDEXNAME> &nl, int symmetry=0) const {return merge_indices(findindexlist(nl),symmetry);};
NRVec<NRMat<T> > Tucker(typename LA_traits<T>::normtype thr=1e-12, bool inverseorder=true); //HOSVD-Tucker decomposition, return core tensor in *this, flattened NRVec<NRMat<T> > Tucker(typename LA_traits<T>::normtype thr=1e-12, bool inverseorder=false); //HOSVD-Tucker decomposition, return core tensor in *this, flattened
Tensor inverseTucker(const NRVec<NRMat<T> > &x, bool inverseorder=true) const; //rebuild the original tensor from Tucker Tensor inverseTucker(const NRVec<NRMat<T> > &x, bool inverseorder=false) const; //rebuild the original tensor from Tucker
Tensor linear_transform(const NRVec<NRMat<T> > &x) const; //linear transform by a different matrix per each index group, preserving group symmetries
Tensor linear_transform(const int g, const Tensor<T> &x) const; //linear transform a single group, preserve its position and symmetry, x must be rank-2 flat tensor (this is useful for raising/lowering indices)
Tensor linear_transform(const int g, const NRMat<T> &x) const {Tensor<T> mat(x,true); return linear_transform(g,mat);}; //linear transform a single group, preserve its position and symmetry
}; };

61
vec.h
View File

@@ -311,10 +311,10 @@ public:
//! compute the Euclidean inner product (with conjugation in complex case) //! compute the Euclidean inner product (with conjugation in complex case)
inline const T operator*(const NRVec &rhs) const; inline const T operator*(const NRVec &rhs) const;
inline const T dot(const NRVec &rhs) const {return *this * rhs;};
//! compute the Euclidean inner product (with conjugation in complex case) with a stride-vector //! compute the Euclidean inner product (with conjugation in complex case) with a stride-vector
inline const T dot(const T *a, const int stride = 1) const; inline const T dot(const T *a, const int stride = 1, bool conjugate=true) const;
inline const T dot(const NRVec &rhs, bool conjugate=true) const {return dot(&(rhs[0]),1,conjugate);};
void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x); void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x);
void gemv(const T beta, const NRSMat<T> &a, const char trans /**< just for compatibility reasons */, const T alpha, const NRVec &x); void gemv(const T beta, const NRSMat<T> &a, const char trans /**< just for compatibility reasons */, const T alpha, const NRVec &x);
@@ -455,12 +455,12 @@ public:
//! routine for formatted output //! routine for formatted output
void fprintf(FILE *f, const char *format, const int modulo) const; void fprintf(FILE *f, const char *format, const int modulo) const;
//! routine for unformatted output //! routine for unformatted output
void put(int fd, bool dimensions=1, bool transp=0) const; void put(int fd, bool dimensions=1, bool transp=0, bool orca_format=false) const;
//! routine for formatted input //! routine for formatted input
void fscanf(FILE *f, const char *format); void fscanf(FILE *f, const char *format);
//! routine for unformatted input //! routine for unformatted input
void get(int fd, bool dimensions=1, bool transp=0); void get(int fd, bool dimensions=1, bool transp=0, bool orca_format=false);
//! constructor creating vector from sparse matrix //! constructor creating vector from sparse matrix
explicit NRVec(const SparseMat<T> &rhs); explicit NRVec(const SparseMat<T> &rhs);
@@ -1073,10 +1073,25 @@ inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const {
NOT_GPU(rhs); NOT_GPU(rhs);
T dot(0); T dot(0);
for(register int i=0; i<nn; ++i) dot += v[i]*rhs.v[i]; if(LA_traits<T>::is_complex()) for(register int i=0; i<nn; ++i) dot += LA_traits<T>::conjugate(v[i])*rhs.v[i];
else for(register int i=0; i<nn; ++i) dot += v[i]*rhs.v[i];
return dot; return dot;
} }
/***************************************************************************
* compute dot product with optional conjugation
*/
template<typename T>
inline const T NRVec<T>::dot(const T *a, const int stride , bool conjugate) const
{
NOT_GPU(*this);
T dot(0);
if(LA_traits<T>::is_complex() && conjugate) for(register int i=0; i<nn; ++i) dot += LA_traits<T>::conjugate(v[i])*a[i*stride];
else for(register int i=0; i<nn; ++i) dot += v[i]*a[i*stride];
return dot;
}
/***************************************************************************//** /***************************************************************************//**
* indexing operator giving the element at given position with range checking in * indexing operator giving the element at given position with range checking in
* the DEBUG mode * the DEBUG mode
@@ -1801,9 +1816,9 @@ inline const std::complex<double> NRVec<std::complex<double> >::operator*(const
* @return \f$\sum_{i=1}^N\vec{x}_{i}\cdot y_{\mathrm{stride}\cdot(i-1) + 1}\f$ * @return \f$\sum_{i=1}^N\vec{x}_{i}\cdot y_{\mathrm{stride}\cdot(i-1) + 1}\f$
******************************************************************************/ ******************************************************************************/
template<> template<>
inline const double NRVec<double>::dot(const double *y, const int stride) const { inline const double NRVec<double>::dot(const double *y, const int stride, bool conjugate) const {
NOT_GPU(*this); NOT_GPU(*this);
return cblas_ddot(nn, y, stride, v, 1); return cblas_ddot(nn,v, 1,y,stride);
} }
/***************************************************************************//** /***************************************************************************//**
@@ -1813,10 +1828,11 @@ inline const double NRVec<double>::dot(const double *y, const int stride) const
* @return \f$\sum_{i=1}^N\vec{x}_{i}\cdot \overbar{y_{\mathrm{stride}\cdot(i-1) + 1}}\f$ * @return \f$\sum_{i=1}^N\vec{x}_{i}\cdot \overbar{y_{\mathrm{stride}\cdot(i-1) + 1}}\f$
******************************************************************************/ ******************************************************************************/
template<> template<>
inline const std::complex<double> NRVec<std::complex<double> >::dot(const std::complex<double> *y, const int stride) const { inline const std::complex<double> NRVec<std::complex<double> >::dot(const std::complex<double> *y, const int stride, bool conjugate) const {
std::complex<double> dot; std::complex<double> dot;
NOT_GPU(*this); NOT_GPU(*this);
cblas_zdotc_sub(nn, y, stride, v, 1, &dot); if(conjugate) cblas_zdotc_sub(nn, v,1,y, stride, &dot);
else cblas_zdotu_sub(nn, v,1,y, stride, &dot);
return dot; return dot;
} }
@@ -2015,12 +2031,12 @@ inline const std::complex<double> NRVec<std::complex<double> >::amin() const {
* @see NRMat<T>::put() * @see NRMat<T>::put()
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
void NRVec<T>::put(int fd, bool dim, bool transp) const { void NRVec<T>::put(int fd, bool dim, bool transp, bool orcaformat) const {
#ifdef CUDALA #ifdef CUDALA
if(location != cpu){ if(location != cpu){
NRVec<T> tmp = *this; NRVec<T> tmp = *this;
tmp.moveto(cpu); tmp.moveto(cpu);
tmp.put(fd,dim,transp); tmp.put(fd,dim,transp,orcaformat);
return; return;
} }
#endif #endif
@@ -2028,7 +2044,15 @@ void NRVec<T>::put(int fd, bool dim, bool transp) const {
int pad(1); //align at least 8-byte int pad(1); //align at least 8-byte
if(dim){ if(dim){
if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("write failed"); if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("write failed");
if(sizeof(int) != write(fd,&pad,sizeof(int))) laerror("write failed"); if(!orcaformat)
{
if(sizeof(int) != write(fd,&pad,sizeof(int))) laerror("write failed");
}
if(orcaformat)
{
int tmp=sizeof(T);
if(sizeof(int) != write(fd,&tmp,sizeof(int))) laerror("write failed");
}
} }
LA_traits<T>::multiput(nn,fd,v,dim); LA_traits<T>::multiput(nn,fd,v,dim);
} }
@@ -2041,12 +2065,12 @@ void NRVec<T>::put(int fd, bool dim, bool transp) const {
* @see NRMat<T>::get(), copyonwrite() * @see NRMat<T>::get(), copyonwrite()
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
void NRVec<T>::get(int fd, bool dim, bool transp) { void NRVec<T>::get(int fd, bool dim, bool transp, bool orcaformat) {
#ifdef CUDALA #ifdef CUDALA
if(location != cpu){ if(location != cpu){
NRVec<T> tmp; NRVec<T> tmp;
tmp.moveto(cpu); tmp.moveto(cpu);
tmp.get(fd,dim,transp); tmp.get(fd,dim,transp,orcaformat);
tmp.moveto(location); tmp.moveto(location);
*this = tmp; *this = tmp;
return; return;
@@ -2055,7 +2079,14 @@ void NRVec<T>::get(int fd, bool dim, bool transp) {
int nn0[2]; //align at least 8-byte int nn0[2]; //align at least 8-byte
errno = 0; errno = 0;
if(dim){ if(dim){
if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("read failed"); int ndims=orcaformat?1:2;
if(ndims*sizeof(int) != read(fd,&nn0,ndims*sizeof(int))) laerror("read failed");
if(orcaformat)
{
int tmp;
if(sizeof(int) != read(fd,&tmp,sizeof(int))) laerror("read failed");
if(tmp!=sizeof(T)) laerror("mismatch in orca format");
}
resize(nn0[0]); resize(nn0[0]);
}else{ }else{
copyonwrite(); copyonwrite();