diff --git a/bitvector.cc b/bitvector.cc
index c650df5..afd8d7f 100644
--- a/bitvector.cc
+++ b/bitvector.cc
@@ -103,7 +103,7 @@ bitvector& bitvector::operator&=(const bitvector &rhs)
 {
 if(size()<rhs.size()) resize(rhs.size(),true);
 copyonwrite();
-for(int i=0; i<nn; ++i) v[i] &= rhs.v[i];
+for(int i=0; i<nn; ++i) v[i] &= (i>=rhs.nn? 0 : rhs.v[i]);
 return *this;
 }
 
@@ -111,7 +111,7 @@ bitvector& bitvector::operator|=(const bitvector &rhs)
 {
 if(size()<rhs.size()) resize(rhs.size(),true);
 copyonwrite();
-for(int i=0; i<nn; ++i) v[i] |= rhs.v[i];
+for(int i=0; i<nn && i<rhs.nn; ++i) v[i] |= rhs.v[i];
 return *this;
 }
 
@@ -119,7 +119,7 @@ bitvector& bitvector::operator^=(const bitvector &rhs)
 {
 if(size()<rhs.size()) resize(rhs.size(),true);
 copyonwrite();
-for(int i=0; i<nn; ++i) v[i] ^= rhs.v[i];
+for(int i=0; i<nn && i<rhs.nn; ++i) v[i] ^= rhs.v[i];
 return *this;
 }
 
@@ -136,7 +136,7 @@ x+= (x>>16);
 return x&0x3f;
 }
 #else
-//@@@@ use an efficient trick
+//@@@@ use an efficient trick too
 static unsigned int word_popul(unsigned long x)
 {
 unsigned int s=0;
@@ -222,7 +222,7 @@ if(modulo)
 return s+word_popul(a);
 }
 
-unsigned int bitvector::operator%(const bitvector &y) const
+unsigned int bitvector::bitdiff(const bitvector &y) const
 {
 if(nn!=y.nn) laerror("incompatible size in bitdifference");
 
@@ -236,6 +236,143 @@ if(modulo)
         a &= ~mask;
         }
 return s+word_popul(a);
+}
+
+static unsigned int nlz64(uint64_t x0)
+{
+int64_t x=x0;
+uint64_t y;
+unsigned int n;
+n=0;
+y=x;
+L: if ( x<0) return n;
+if(y==0) return 64-n;
+++n;
+x<<=1;
+y>>=1;
+goto L;
+}
+
+static unsigned int ntz64(uint64_t x)
+{
+unsigned int n;
+if(x==0) return 64;
+n=1;
+if((x&0xffffffff)==0) {n+=32; x>>=32;}
+if((x&0xffff)==0) {n+=16; x>>=16;}
+if((x&0xff)==0) {n+=8; x>>=8;}
+if((x&0xf)==0) {n+=4; x>>=4;}
+if((x&0x3)==0) {n+=2; x>>=2;}
+return n-(x&1);
+}
+
+unsigned int bitvector::nlz() const
+{
+int leadblock=nn-1;
+unsigned int n=0;
+while(leadblock>0 && v[leadblock] == 0) 
+	{
+	--leadblock;
+	n+=blockbits;
+	}
+n+= nlz64(v[leadblock]);
+if(modulo) n-= blockbits-modulo;
+return n;
+}
+
+unsigned int bitvector::ntz() const
+{
+int tailblock=0;
+unsigned int n=0;
+if(iszero()) return size();
+while(tailblock<nn-1 && v[tailblock] == 0)
+        {
+        ++tailblock;
+        n+=blockbits;
+        }
+n+= ntz64(v[tailblock]);
+return n;
+}
+
+//NOTE: naive algorithm, just for testing
+//does not perform modulo irreducible polynomial, is NOT GF(2^n) multiplication
+bitvector bitvector::operator*(const bitvector &rhs) const
+{
+bitvector r(size()+rhs.size());
+r.clear();
+bitvector tmp(rhs);
+tmp.resize(size()+rhs.size(),true);
+for(int i=0; i<=degree(); ++i)
+	{
+	if((*this)[i]) r+= tmp;
+	tmp.leftshift(1,false);
+	}
+return r;
+}
+
+void bitvector::resize(const unsigned int n, bool preserve)
+{
+int old=size(); 
+NRVec<bitvector_block>::resize((n+blockbits-1)/blockbits,preserve); 
+modulo=n%blockbits; 
+if(preserve)  //clear newly allocated memory
+	{
+	for(int i=old; i<nn*blockbits; ++i) this->reset(i);
+	} 
+else clear();
+}
+
+
+bitvector bitvector::division(const bitvector &rhs,  bitvector &remainder) const
+{
+if(rhs.is_zero()) laerror("division by zero binary polynomial");
+if(is_zero() || rhs.is_one()) {remainder.clear(); return *this;}
+bitvector r(size());
+r.clear();
+remainder= *this;
+remainder.copyonwrite();
+
+int rhsd = rhs.degree();
+int d;
+while((d=remainder.degree()) >= rhsd)
+	{
+	unsigned int pos = d-rhsd;
+	r.set(pos);
+	remainder -= (rhs<<pos);
+	}
+
+return r;
+}
+
+bitvector bitvector::gcd(const bitvector &rhs) const
+{
+bitvector big,small;
+
+if(degree()>=rhs.degree()) 
+	{big= *this; small=rhs;}
+else
+	{big=rhs; small= *this;}
+
+if(big.is_zero())
+	{
+	if(small.is_zero()) laerror("two zero arguments in gcd");
+	return small;
+	}
+
+if(small.is_zero()) return big;
+if(small.is_one()) return small;
+if(big.is_one()) return big;
+
+do      {
+        bitvector help=small;
+        small= big%small;
+        big=help;
+        }
+while(! small.is_zero());
+return big;
+
+
+
 }
 
 void bitvector::read(int fd, bool dimensions, bool transp)
@@ -260,4 +397,5 @@ NRVec<bitvector_block>::put(fd,dimensions,transp);
 
 
 
+
 }//namespace
diff --git a/bitvector.h b/bitvector.h
index a91c461..7edf0b6 100644
--- a/bitvector.h
+++ b/bitvector.h
@@ -25,7 +25,7 @@
 
 namespace LA {
 //compressed storage of large bit vectors
-//let's now use 64-bit blocks exclusively 
+//let's now use 64-bit blocks exclusively  for simplicity
 
 typedef uint64_t bitvector_block; 
 
@@ -48,10 +48,10 @@ public:
 	//operator= seems to be correctly synthetized by the compiler
 	//override dereferencing to address single bits, is however possible
 	//only in the const context (otherwise we would have to define a type which, when assigned to, changes a single bit - possible but probably inefficient)
-	void resize(const unsigned int n, bool preserve=false) {NRVec<bitvector_block>::resize((n+blockbits-1)/blockbits,preserve); modulo=n%blockbits;};
+	void resize(const unsigned int n, bool preserve=false); //preserve data or clear
 	unsigned int size() const {return (nn*blockbits)-blockbits+(modulo?modulo:blockbits);};
 	//arguments must be unsigned to keep the resulting assembly code simple and efficient
-        const bool operator[](const unsigned int i) const {return (v[i/blockbits] >>(i%blockbits))&1UL;};
+        const bool operator[](const unsigned int i) const {return (v[i/blockbits] >>(i%blockbits))&1ULL;};
 	const bool get(const unsigned int i) const {return (*this)[i];};
 	bitvector_block getblock(const unsigned int i) const {return v[i];}; //integer interpretation
 	void setblock(const unsigned int i, const bitvector_block b) {v[i]=b;};
@@ -62,6 +62,8 @@ public:
 	void clear() {copyonwrite(true); memset(v,0,nn*sizeof(bitvector_block));};
 	void fill() {memset(v,0xff,nn*sizeof(bitvector_block));};
 	bool iszero() const {for(int i=0; i<nn; ++i) if(v[i]) return false; return true;};
+	bool is_zero() const {return iszero();};
+	bool is_one() const {if(v[0]!=1) return false; for(int i=1; i<nn; ++i) if(v[i]) return false; return true;};
 	void randomize();
 	bool operator!=(const bitvector &rhs) const;
 	bool operator==(const bitvector &rhs) const {return !(*this != rhs);};
@@ -80,15 +82,24 @@ public:
 	bitvector operator^(const bitvector &rhs) const {return bitvector(*this) ^= rhs;};
 	bitvector operator+(const bitvector &rhs) const {return *this ^ rhs;}; //addition modulo 2
 	bitvector operator-(const bitvector &rhs) const {return *this ^ rhs;}; //subtraction modulo 2
-        unsigned int operator%(const bitvector &y) const; //number of differing bits
+	bitvector operator*(const bitvector &rhs) const; //multiplication of polynomials over GF(2) NOTE: naive algorithm, does not employ CLMUL nor fft-like approach, only for short vectors!!!
+	bitvector division(const bitvector &rhs,  bitvector &remainder) const;
+	bitvector operator/(const bitvector &rhs) const {bitvector rem(rhs.size()); return division(rhs,rem);};
+	bitvector operator%(const bitvector &rhs) const {bitvector rem(rhs.size()); division(rhs,rem); return rem;};
+	bitvector gcd(const bitvector &rhs) const;
+	bitvector lcm(const bitvector &rhs) const {return (*this)*rhs/this->gcd(rhs);};
+        unsigned int bitdiff(const bitvector &y) const; //number of differing bits
 	unsigned int population(const unsigned int before=0) const; //number of 1's
+	unsigned int nlz() const; //number of leading zeroes
+	unsigned int degree() const {if(iszero()) return 0; else return size()-nlz()-1;}; //interprested as a polynomial over GF(2)
+	unsigned int ntz() const;  //number of trailing zeroes
 	//extended, truncated const i.e. not on *this but return new entity, take care of modulo's bits
 	//logical shifts
 	bitvector& operator>>=(unsigned int i);
 	bitvector& leftshift(unsigned int i, bool autoresize=false);
 	bitvector& operator<<=(unsigned int i) {return leftshift(i,true);};
-	bitvector operator>>(unsigned int i) {bitvector r(*this); return r>>=i;};
-	bitvector operator<<(unsigned int i) {bitvector r(*this); return r<<=i;};
+	bitvector operator>>(unsigned int i) const {bitvector r(*this); return r>>=i;};
+	bitvector operator<<(unsigned int i) const {bitvector r(*this); return r<<=i;};
 	//logical rotations not implemented yet
 	//unformatted file IO
 	void read(int fd, bool dimensions=1, bool transp=0);
diff --git a/polynomial.h b/polynomial.h
index f4b21ea..81bdff2 100644
--- a/polynomial.h
+++ b/polynomial.h
@@ -69,7 +69,7 @@ public:
                 }
 	Polynomial operator+(const Polynomial &rhs) const {return Polynomial(*this) += rhs;};
 	Polynomial operator-(const Polynomial &rhs) const {return Polynomial(*this) -= rhs;};
-	Polynomial operator*(const Polynomial &rhs) const //for very long polynomials FFT should be used
+	Polynomial operator*(const Polynomial &rhs) const //NOTE: naive implementation! For very long polynomials FFT-based methods should be used
 		{
 		NOT_GPU(*this); 
 		Polynomial r(degree()+rhs.degree());
diff --git a/t.cc b/t.cc
index 26dc7b7..8ac0a84 100644
--- a/t.cc
+++ b/t.cc
@@ -2852,7 +2852,7 @@ cout <<endl<<"Inverse via svd\n"<<ainv2<<endl;
 cout <<"Difference of inverses = "<<(ainv-ainv2).norm()<<endl;
 }
 
-if(1)
+if(0)
 {
 int seed;
 int f=open("/dev/random",O_RDONLY);
@@ -2864,17 +2864,79 @@ int n;
 cin >>n;
 bitvector v(n);
 v.randomize();
-//do{
-//	cout <<v <<endl;
-//	v>>=1;
-//}while(!v.iszero());
+do{
+	cout <<v << " NLZ "<<v.nlz()<<" DEG "<<v.degree()<<" NTZ "<<v.ntz()<<endl;
+	v>>=1;
+}while(!v.iszero());
+	cout <<v << " NLZ "<<v.nlz()<< " DEG "<<v.degree()<<" NTZ "<<v.ntz()<<endl;
 
+v.randomize();
 
 for(int i=0; i<n; ++i) 
 	{
-	cout <<v <<endl;
+	cout <<v << " size "<<v.size()<<" NLZ "<<v.nlz()<< " DEG "<<v.degree()<<" NTZ "<<v.ntz()<<endl;
         v<<=1;
 	}
+	cout <<v << " size "<<v.size()<<" NLZ "<<v.nlz()<< " DEG "<<v.degree()<<" NTZ "<<v.ntz()<<endl;
+
+v.randomize();
+for(int i=0; i<v.size(); ++i) cout <<(v<<i)<<endl;
+}
+
+if(1)
+{
+int seed;
+int f=open("/dev/random",O_RDONLY);
+if(sizeof(int)!=read(f,&seed,sizeof(int))) laerror("cannot read /dev/random");
+close(f);
+srand(seed);
+
+int n;
+cin >>n;
+bitvector v(n),u(n);
+u.randomize();
+v.randomize();
+bitvector w=u*v;
+bitvector z=v*u;
+cout <<u<<endl;
+cout <<v<<endl;
+cout <<w<<endl;
+if(w!=z) laerror("error in bitvector multiplication");
+if(w.degree()!=v.degree()+u.degree()) laerror("error in degree or multiplication");
+cout <<w/u-v<<endl;
+cout <<w%u<<endl;
+cout <<w/v-u<<endl;
+cout <<w%v<<endl;
+if(!(w/u-v).is_zero()) laerror("error in division");
+if(!(w/v-u).is_zero()) laerror("error in division");
+if(!(w%u).is_zero()) laerror("error in division");
+if(!(w%v).is_zero()) laerror("error in division");
+cout <<w.gcd(u)-u<<endl;
+cout <<w.gcd(v)-v<<endl;
+if(!(w.gcd(u)-u).is_zero()) laerror("error in gcd");
+if(!(w.gcd(v)-v).is_zero()) laerror("error in gcd");
+
+u.randomize();
+v.randomize();
+bitvector g=u.gcd(v);
+bitvector l=u.lcm(v);
+cout <<u<<endl;
+cout <<v<<endl;
+cout <<g<<endl;
+cout <<l<<endl;
+cout <<l/u - v/g<<endl;
+cout <<l/v - u/g<<endl;
+cout <<l%u<<endl;
+cout <<l%v<<endl;
+cout <<u%g<<endl;
+cout <<v%g<<endl;
+if(!(l/u - v/g).is_zero()) laerror("error in gcd");
+if(!(l/v - u/g).is_zero()) laerror("error in gcd");
+if(!(l%u).is_zero()) laerror("error in gcd");
+if(!(l%v).is_zero()) laerror("error in gcd");
+if(!(u%g).is_zero()) laerror("error in gcd");
+if(!(v%g).is_zero()) laerror("error in gcd");
+
 }
 
 }