/*
								+----------------------------------+
								|                                  |
								|    ***  Bit array class   ***    |
								|                                  |
								|   Copyright  -tHE SWINe- 2005   |
								|                                  |
								|            BitArray.h            |
								|                                  |
								+----------------------------------+
*/

#pragma once
#ifndef __BIT_ARRAY_INCLUDED
#define __BIT_ARRAY_INCLUDED

/**
 *	@file BitArray.h
 *	@author -tHE SWINe-
 *	@date 2005
 *	@brief Simple and fast bit array class
 *
 *	@date 2007-03-15
 *
 *	passed code revision, improved to meet berLame quality standards
 *
 *	now using unsigned integers for lengths instead of signed
 *
 *	made CBit owned by CBitArray (as was logical)
 *
 *	fixed hack in constant bit access using operator []
 *
 *	renamed CBitArray::Free() to CBitArray::Erase()
 *
 *	CBitArray::operator ==() now returns wheter arrays are equal (bool)
 *	instead of integer equality sign which is now returned by CBitArray::n_Compare()
 *
 *	added inequality comparison operators
 *
 *	removed most of non-operator functions (Set_Bit_High() and Set_Bit_Low()
 *	remains only for preformance purposes)
 *
 *	most stuff is now inline, the whole thing is significantly faster
 *
 *	@date 2007-06-26
 *
 *	fixed some bugs in array reallocation using CBitArray::Extend and
 *	in array copy operator CBitArray::operator =
 *
 *	@date 2007-03-04
 *
 *	changed integer types for more safety (now using uint32_t type as array
 *	base unit and unsigned long to store array length)
 *
 *	@date 2008-12-28
 *
 *	exposed internal data buffer for easier and faster serialization
 *
 *	fixed error which occured when copying bits from one array to another
 *	(CBit instances were copied, no array writes occured)
 *
 *	@date 2009-04-23
 *
 *	motivation for writing this file:
 *		std::vector<bool> is troublesome and generally two considered options are
 *			to erase (not deprecate) it, or rename it
 *		std::vector<bool> doesn't offer access to it's data buffer
 *		std::vector<bool> doesn't offer Set_Bit_Low() and Set_Bit_High() functions,
 *			therefore theoretically offers lower reachable performance
 *
 *	added CBitArray::n_Capacity(), CBitArray::Reserve() functions for more efficient
 *	(re)allocation
 *
 *	renamed CBitArray::CBit to CBitArray::CBitReference
 *
 *	added CBitArray::CBitReference::Raise() and CBitArray::CBitReference::Clear()
 *	functions for more efficient writes to individual bits (note
 *	CBitArray::CBitReference::operator =(bool) contains branch, not present
 *	in either of above functions)
 *
 *	@date 2009-10-26
 *
 *	added CBitReference::Invert(), CBitArray::Invert_Bit()
 *
 *	renamed CBitArray::Set_Bit_High() to CBitArray::Raise_Bit() and CBitArray::Set_Bit_Low()
 *	to CBitArray::Clear_Bit().
 *
 *	renamed CBitArray::Extend() to CBitArray::Resize().
 *
 *	changed CBitArray::n_Buffer_Size() to return size in bytes (to avoid crashing older code,
 *	renamed CBitArray::p_Buffer() to CBitArray::p_Get_Buffer(), CBitArray::FillBuffer() to
 *	CBitArray::Set_Buffer() and added size parameter, so older code had to be rewritten).
 *
 *	@t_odo write CBitArray::transform() (had it somewhere - investigate that), write optimized
 *		versions and test them extensively.
 *	@t_odo specify bit array storage type as CBitArray member (now using uint32_t), for
 *		experiments
 *
 *	@date 2010-11-25
 *
 *	Implemented and tested many new functions, written documentation comments. Changes include
 *	a new functions CBitArray::Generate(), CBitArray::ForEach(), CBitArray::Transform() and
 *	their _Constrained versions.
 *
 *	@todo implement CBitIterator (similar to CBitDifference, but allowing change of index,
 *		index difference and comparison), mind VS 2008 safe iterators
 *	@todo write CBitArray::insert(), CBitArray::copy(), CBitArray::erase() and test them extensively.
 *
 *	@date 2012-06-19
 *
 *	Moved multiple inclusion guard before file documentation comment.
 *
 */

#include "Integer.h"
#include "Buffer.h"

/**
 *	@def __BIT_ARRAY_FORCE32
 *	@brief forces bit array to use 32-bit integers (fixes incompatibilities between x86 and x64 data)
 */
#define __BIT_ARRAY_FORCE32

/**
 *	@brief simple bit-array implementation
 *
 *	This is bit array implementation, similar to std::vector<bool>, but
 *	exposing some more of the implementation, so it's theoretically faster.
 *	This also allows accessing it's internal buffer contents.
 *
 */
class CBitArray : protected TBuffer {
public:
	/**
	 *	@brief type for bit array size
	 *
	 *	This normally equals to global size_t type, however,
	 *	if __LARGE_BIT_ARRAY__ is defined, uint64_t is used instead.
	 */
#ifdef __LARGE_BIT_ARRAY__
#ifndef __LARGE_BUFFER__
#error "error: __LARGE_BUFFER__ not defined, while __LARGE_BIT_ARRAY__ is defined"
#endif // __LARGE_BUFFER__
	typedef uint64_t size_t;
#endif // __LARGE_BIT_ARRAY__

	typedef size_t index_t; /**< @brief type for bit array indices */

	/**
	 *	@brief internal type to store the bit array units
	 *
	 *	This bit array implementation uses largest native integers available
	 *	for the highest efficiency (uses 64 bit ints on the new machines).
	 */
#ifdef __BIT_ARRAY_FORCE32
	typedef uint32_t _NativeInt;
#else // __BIT_ARRAY_FORCE32
	typedef unsigned long _NativeInt;
#endif // __BIT_ARRAY_FORCE32

	/**
	 *	@brief bit array internal parameters stored in enum
	 */
	enum {
		unit_SizeBytes = sizeof(_NativeInt), /**< size of internal type, in bytes (4, or 8) */
		unit_SizeBits = unit_SizeBytes * 8, /**< size of internal type, in bits (32, or 64) */
		index_Shift = n_Log2_Static(unit_SizeBits), /**< bit index shift (5, or 6) */
		index_Mask = unit_SizeBits - 1 /**< bit index mask (0x1f, or 0x3f) */
	};

	/**
	 *	@brief writable bit reference class
	 *
	 *	This is kind of pointer which points to a single bit
	 *	in the array, enabling it to be read or written.
	 *
	 *	@note Note this bit reference gets invalidated on bit array resize!
	 */
	class CBitReference {
	protected:
		_NativeInt &m_r_array; /**< @brief reference to the bit array unit, containing the bit */
		_NativeInt m_n_mask; /**< @brief mask of the bit in the unit (a single bit is set here) */

	public:
		/**
		 *	@brief default constructor; creates reference to a bit
		 *
		 *	@param[in,out] r_array is integer from bit array, containing the bit to be referenced
		 *	@param[in] n_mask is bit mask
		 */
		inline CBitReference(_NativeInt &r_array, _NativeInt n_mask)
			:m_r_array(r_array), m_n_mask(n_mask)
		{}

		/**
		 *	@brief sets a new value of this bit (change reflects in the array)
		 *
		 *	@param[in] b_value is a value to be written
		 *
		 *	@return Returns b_value
		 */
		inline bool operator =(bool b_value)
		{
			if(b_value)
				m_r_array |= m_n_mask;
			else
				m_r_array &= ~m_n_mask;
			return b_value;
		}

		/**
		 *	@brief inverts this bit
		 */
		inline void Invert()
		{
			m_r_array ^= m_n_mask;
		}

		/**
		 *	@brief writes a '1' to this bit
		 *
		 *	This effectively does the same as "bit = true", but operator =() in fact contains
		 *	a branch to decide wheter to clear the bit (use logical and), or raise the bit
		 *	(use logical ot). Using this function can lead to small speed improvement.
		 *
		 *	@note In case speed is critical, it might be more appropriate to use CBitArray::Raise_Bit().
		 */
		inline void Raise()
		{
			m_r_array |= m_n_mask;
		}

		/**
		 *	@brief writes a '0' to this bit
		 *
		 *	This effectively does the same as "bit = false", but operator =() in fact contains
		 *	a branch to decide wheter to clear the bit (use logical and), or raise the bit
		 *	(use logical ot). Using this function can lead to small speed improvement.
		 *
		 *	@note In case speed is critical, it might be more appropriate to use CBitArray::Clear_Bit().
		 */
		inline void Clear()
		{
			m_r_array &= ~m_n_mask;
		}

		/**
		 *	@brief writes value of r_bit to bit in the array
		 *
		 *	Writes value of r_bit to bit in the array, referenced by this
		 *	object, and returns the written value.
		 *
		 *	@param[in] r_bit is reference to a bit containing value to be written
		 *
		 *	@return Returns value of r_bit
		 */
		inline bool operator =(const CBitReference &r_bit)
		{
			return *this = bool(r_bit);
		}

		/**
		 *	@brief bit comparison operator
		 *
		 *	Compares values of two bits (does not compare
		 *	array and index, compares referenced value).
		 *
		 *	@param[in] b_value is reference value to be compared to
		 *
		 *	@return Returns true if bits are equal, false otherwise.
		 */
		inline bool operator ==(const CBitReference &r_bit) const
		{
			return bool(*this) == bool(r_bit);
		}

		/**
		 *	@brief bit comparison operator
		 *
		 *	Compares values of two bits (does not compare
		 *	array and index, compares referenced value).
		 *
		 *	@param[in] b_value is reference value to be compared to
		 *
		 *	@return Returns true if this bit is zero, while b_value is true, false otherwise.
		 */
		inline bool operator <(const CBitReference &r_bit) const
		{
			return !bool(*this) && bool(r_bit);
		}

		/**
		 *	@brief bit comparison operator
		 *
		 *	Compares values of two bits (does not compare
		 *	array and index, compares referenced value).
		 *
		 *	@param[in] b_value is reference value to be compared to
		 *
		 *	@return Returns true if this bit is one, while b_value is false, false otherwise.
		 */
		inline bool operator >(const CBitReference &r_bit) const
		{
			return r_bit < *this;
		}

		/**
		 *	@brief bit comparison operator
		 *
		 *	Compares values of two bits (does not compare
		 *	array and index, compares referenced value).
		 *
		 *	@param[in] b_value is reference value to be compared to
		 *
		 *	@return Returns true if this <= r_bit, false otherwise.
		 */
		inline bool operator <=(const CBitReference &r_bit) const
		{
			return !(r_bit < *this);
		}

		/**
		 *	@brief bit comparison operator
		 *
		 *	Compares values of two bits (does not compare
		 *	array and index, compares referenced value).
		 *
		 *	@param[in] b_value is reference value to be compared to
		 *
		 *	@return Returns true if this >= r_bit, false otherwise.
		 */
		inline bool operator >=(const CBitReference &r_bit) const
		{
			return !(r_bit > *this);
		}

		/**
		 *	@brief bit comparison operator
		 *
		 *	Compares values of two bits (does not compare
		 *		array and index, compares referenced value).
		 *
		 *	@param[in] b_value is reference value to be compared to
		 *
		 *	@return Returns true if this != r_bit, false otherwise.
		 */
		inline bool operator !=(const CBitReference &r_bit) const
		{
			return !(r_bit == *this);
		}

		/**
		 *	@brief gets value of the referenced bit
		 *
		 *	@return Returns value of bit in referenced array.
		 */
		inline operator bool() const
		{
			return (m_r_array & m_n_mask) != 0;
		}
	};

protected:
	/**
	 *	@brief unit-aligned bit reader
	 */
	class CAlignedReader {
	protected:
		const _NativeInt *m_p_src;
#ifdef _DEBUG
		const _NativeInt *m_p_end;
		bool m_b_called_last;
#endif // _DEBUG

	public:
#ifdef _DEBUG
		/**
		 *	@brief default constructor (debug)
		 *
		 *	@param[in] p_src is pointer to the unit, containing the first bit
		 *	@param[in] p_end is pointer to the last unit; this pointer must never be read (including values past this pointer)
		 */
		inline CAlignedReader(const _NativeInt *p_src, const _NativeInt *p_end)
			:m_p_src(p_src), m_p_end(p_end), m_b_called_last(false)
#else // _DEBUG
		/**
		 *	@brief default constructor
		 *	@param[in] p_src is pointer to the unit, containing the first bit
		 */
		inline CAlignedReader(const _NativeInt *p_src)
			:m_p_src(p_src)
#endif // _DEBUG
		{}

		inline _NativeInt operator()()
		{
			_ASSERTE(!m_b_called_last); // make sure this isn't called after operator()(size_t n_bit_num)
			_ASSERTE(m_p_src < m_p_end); // make sure array bounds aren't crossed

			_NativeInt n_result = *m_p_src;
			++ m_p_src;
			return n_result;
		}

		inline _NativeInt operator()(size_t n_bit_num)
		{
			_ASSERTE(!m_b_called_last); // make sure this is called exactly once
#ifdef _DEBUG
			m_b_called_last = true;
#endif // _DEBUG

			return *m_p_src & CBitArray::n_Mask_FirstN(n_bit_num);
			// no need to increment, this is the last call to the reader
		}
	};

	/**
	 *	@brief unit non-aligned bit reader
	 */
	class CReader {
	protected:
		const _NativeInt *m_p_src;
#ifdef _DEBUG
		const _NativeInt *m_p_end;
		bool m_b_called_last;
#endif // _DEBUG
		_NativeInt m_n_carry; // always contains CBitArray::unit_SizeBits - m_n_shift bits
		size_t m_n_shift;
		_NativeInt m_n_mask;

	public:
#ifdef _DEBUG
		/**
		 *	@brief default constructor (debug); initializes the reader and reads in the first unit
		 *
		 *	@param[in] p_src is pointer to the unit, containing the first bit
		 *	@param[in] n_bit_offset is bit offset of the first significant bit to be read (MSB = 0, LSB = unit_SizeBits-1)
		 *	@param[in] p_end is pointer to the last unit; this pointer must never be read (including values past this pointer)
		 */
		inline CReader(const _NativeInt *p_src, size_t n_bit_offset, const _NativeInt *p_end)
			:m_p_src(p_src + 1), m_p_end(p_end), m_b_called_last(false), m_n_carry(*p_src << n_bit_offset), m_n_shift(n_bit_offset),
			m_n_mask(CBitArray::n_Mask_FirstN(n_bit_offset))
#else // _DEBUG
		/**
		 *	@brief default constructor; initializes the reader and reads in the first unit
		 *
		 *	@param[in] p_src is pointer to the unit, containing the first bit
		 *	@param[in] n_bit_offset is bit offset of the first significant bit to be read (MSB = 0, LSB = unit_SizeBits-1)
		 */
		inline CReader(const _NativeInt *p_src, size_t n_bit_offset)
			:m_p_src(p_src + 1), m_n_carry(*p_src << n_bit_offset), m_n_shift(n_bit_offset),
			m_n_mask(CBitArray::n_Mask_FirstN(n_bit_offset))
#endif // _DEBUG
		{
			_ASSERTE(n_bit_offset > 0 && n_bit_offset < CBitArray::unit_SizeBits);
			// make sure this really is unaligned; otherwise the CAlignedReader should be used instead
		}

		inline _NativeInt operator()()
		{
			_ASSERTE(!m_b_called_last); // make sure this isn't called after operator()(size_t n_bit_num)
			_ASSERTE(m_p_src < m_p_end); // make sure array bounds aren't crossed

			_NativeInt n_src = *m_p_src;
			++ m_p_src;
			// read one more unit

			_NativeInt n_result = m_n_carry | ((n_src & m_n_mask) >> (CBitArray::unit_SizeBits - m_n_shift));
			// merge with carry to get full unit

			m_n_carry = n_src << m_n_shift;
			// carry the rest

			return n_result;
		}

		inline _NativeInt operator()(size_t n_bit_num)
		{
			_ASSERTE(!m_b_called_last); // make sure this is called exactly once
#ifdef _DEBUG
			m_b_called_last = true;
#endif // _DEBUG
			if(n_bit_num <= CBitArray::unit_SizeBits - m_n_shift)
				return m_n_carry & CBitArray::n_Mask_FirstN(n_bit_num);
			// in case we have enough bits in the carry accumulator

			_ASSERTE(m_p_src < m_p_end); // make sure array bounds aren't crossed

			return (m_n_carry | ((*m_p_src & m_n_mask) >> (CBitArray::unit_SizeBits - m_n_shift))) &
				CBitArray::n_Mask_FirstN(n_bit_num);
			// otherwise there is one last read
		}
	};

	template <class CUnaryOp, class _Reader>
	class CTransform {
		CUnaryOp m_op;
		_Reader m_reader;

	public:
		inline CTransform(CUnaryOp op, _Reader reader)
			:m_op(op), m_reader(reader)
		{}

		inline CBitArray::_NativeInt operator ()()
		{
			return m_op(m_reader());
		}

		inline CBitArray::_NativeInt operator ()(size_t n_bit_num)
		{
			return m_op(m_reader(n_bit_num));
		}

		inline operator CUnaryOp() const
		{
			return m_op;
		}
	};

	template <class CBinaryOp, class _Reader0, class _Reader1>
	class CTransform2 {
		CBinaryOp m_op;
		_Reader0 m_reader0;
		_Reader0 m_reader1;

	public:
		inline CTransform2(CBinaryOp op, _Reader0 reader0, _Reader1 reader1)
			:m_op(op), m_reader0(reader0), m_reader1(reader1)
		{}

		inline CBitArray::_NativeInt operator ()()
		{
			return m_op(m_reader0(), m_reader1());
		}

		inline CBitArray::_NativeInt operator ()(size_t n_bit_num)
		{
			return m_op(m_reader0(n_bit_num), m_reader1(n_bit_num));
		}

		inline operator CBinaryOp() const
		{
			return m_op;
		}
	};

protected:
	size_t m_n_used_bits; /**< @brief size of the array, in bits */

public:
	/**
	 *	@brief default constructor; creates an empty bit array
	 */
	CBitArray();

	/**
	 *	@brief constructor; creates bit array with specified length and undefined contents
	 *	@param[in] n_length_bits is length of a new array, in bits
	 *	@note It is advised to call n_Size() afterwards to make sure there was enough memory.
	 */
	CBitArray(size_t n_length_bits);

	/**
	 *	@brief copy-constructor
	 *	@param[in] r_other is the array being copied from
	 *	@note It is advised to call n_Size() afterwards to make sure there was enough memory.
	 */
	CBitArray(const CBitArray &r_other);

	/**
	 *	@brief whole array assignment operator
	 *	@param[in] b_value is value all the bits in the array are changed to
	 *	@note This operator intentionaly returns void as it is unclear whether it should
	 *		return b_value, or reference to the array, which could cause bugs when using
	 *		this operator in multiple assignments.
	 */
	inline void operator =(bool b_value)
	{
		if(b_value)
			Raise();
		else
			Clear();
	}

	/**
	 *	@brief assignment operator
	 *
	 *	@param[in] r_other is the array to copy to this array
	 *
	 *	@note This operator intentionaly returns void as it is unclear whether it should
	 *		return b_value, or reference to the array, which could cause bugs when using
	 *		this operator in multiple assignments.
	 *	@note In case there was not enough memory to copy the array, the length is set to 0.
	 */
	void operator =(const CBitArray &r_other);

	/**
	 *	@brief allocates array to the specified lenght
	 *
	 *	@param[in] n_length_bits is new length in bits
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 *
	 *	@note The contents of the new array are undefined (use Resize() if the array
	 *		is required to contain (part of) the original data).
	 *	@note This always succeeds if the new size is smaller or equal to the current size.
	 */
	bool Alloc(size_t n_length_bits);

	/**
	 *	@brief resizes array to the specified lenght
	 *
	 *	@param[in] n_length_bits is new length in bits
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 *
	 *	@note The original contents of the array are preserved, with the exception
	 *		if the new length is smaller than the original length. Part of the data
	 *		at the end of the array is effectively erased. If the new length is larger,
	 *		the value of the new bits at the end of the array is undefined.
	 *	@note This always succeeds if the new size is smaller or equal to the current size.
	 */
	bool Resize(size_t n_length_bits);

	/**
	 *	@brief extends the buffer to be able to hold specified length; length of the array doesn't change
	 *	@param[in] n_min_capacity_bits is minimal array capacity in bits
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	bool Reserve(size_t n_min_capacity_bits);

	/**
	 *	@brief erase array, free it's memory
	 *	@note Do not confuse this function with Clear(), which doesn't change array length
	 *		but sets all the bits of the array to 0 instead.
	 */
	void Erase();

	/**
	 *	@brief swaps two arrays
	 *	@param[in,out] r_t_other is the other array
	 */
	void Swap(CBitArray &r_other);

	/**
	 *	@brief determines whether the array is empty
	 *	@return Returns true if the array is empty, otherwis returns false.
	 */
	inline bool b_Empty() const
	{
		return !m_n_used_bits; // dumb, but the only way really
	}

	/**
	 *	@brief gets size of the array
	 *	@return Returns the length of the array in bits.
	 *	@deprecated This is just for backward compatibility. Use n_Size().
	 */
	inline size_t n_Length() const
	{
		return n_Size();
	}

	/**
	 *	@brief gets size of the array
	 *	@return Returns the length of the array in bits.
	 */
	inline size_t n_Size() const
	{
		return m_n_used_bits;
	}

	/**
	 *	@brief gets array capacity
	 *	@return Returns maximal length in bits the array can be resized to, without reallocating.
	 */
	inline size_t n_Capacity() const
	{
		return TBuffer::n_Capacity() * 8;
	}

	/**
	 *	@brief read-only bit access operator
	 *	@param[in] n_index is zero-based index of the bit
	 *	@return Returns value of the bit with the specified index.
	 */
	inline bool operator [](index_t n_index) const
	{
		_ASSERTE(n_index >= 0 && n_index < n_Size());
		return (p_Data()[n_Index(n_index)] & n_Mask(n_index)) != 0;
	}

	/**
	 *	@brief read-write bit access operator
	 *	@param[in] n_index is zero-based index of the bit
	 *	@return Returns write-able bit with the specified index.
	 */
	inline CBitReference operator [](index_t n_index)
	{
		_ASSERTE(n_index >= 0 && n_index < n_Size());
		return CBitReference(p_Data()[n_Index(n_index)], n_Mask(n_index));
	}

	/**
	 *	@brief counts nonzero bits
	 *	@return Returns the number of nonzero bits in the entire array.
	 */
	size_t n_Nonzero_Num() const
	{
		size_t n_num = 0;
		size_t n_last_full = n_Index(m_n_used_bits);
		size_t n_last_num_used_bits = n_Shift(m_n_used_bits);

		const _NativeInt *p_data = p_Data();
		if(n_last_full > 1) {
			for(const _NativeInt *p_end = p_data + n_last_full; p_data != p_end; ++ p_data)
				n_num += n_SetBit_Num(*p_data);
			// count bits in each number of the array
		}

		if(n_last_num_used_bits) {
			// in case the array is aligned, the last number was already processed in the loop above

			_ASSERTE(p_data == p_Data() + n_last_full);
			n_num += n_SetBit_Num(*p_data & n_Mask_FirstN(n_last_num_used_bits));
			// add bits at the end (must mask it)
		}
		// last number

#ifdef _DEBUG
		size_t n_num_ref = 0;
		for(size_t i = 0, n = m_n_used_bits; i < n; ++ i) {
			if(b_GetBit(i))
				++ n_num_ref;
		}
		_ASSERTE(n_num_ref == n_num);
#endif // _DEBUG
		// double-check the result

		return n_num;
	}

	/**
	 *	@brief read-only bit access operator
	 *	@param[in] n_index is zero-based index of the bit
	 *	@return Returns value of the bit with the specified index.
	 */
	inline bool b_GetBit(index_t n_index) const
	{
		return (*this)[n_index];
	}

	/**
	 *	@brief sets contents of a selected bit to the specified value
	 *
	 *	@param[in] n_index is zero-based index of the bit
	 *	@param[in] b_value is a new value of the bit
	 *
	 *	@note It might be more efficient to call one of Raise() or Clear() directly.
	 */
	inline void Set(index_t n_index, bool b_value)
	{
		if(b_value)
			Raise(n_index);
		else
			Clear(n_index);
	}

	/**
	 *	@brief sets value of a selected bit to zero
	 *	@param[in] n_index is zero-based index of the bit
	 */
	inline void Clear(index_t n_index)
	{
		p_Data()[n_Index(n_index)] &= ~n_Mask(n_index);
	}

	/**
	 *	@brief sets value of a selected bit to one
	 *	@param[in] n_index is zero-based index of the bit
	 */
	inline void Raise(index_t n_index)
	{
		p_Data()[n_Index(n_index)] |= n_Mask(n_index);
	}

	/**
	 *	@brief inverts sets value of a selected bit
	 *	@param[in] n_index is zero-based index of the bit
	 */
	inline void Invert(index_t n_index)
	{
		p_Data()[n_Index(n_index)] ^= n_Mask(n_index);
	}

	/**
	 *	@brief sets values of all the bits in the array to the specified value
	 *	@param[in] b_value is a new value of the array bits
	 */
	void Set(bool b_value);

	/**
	 *	@brief sets all the bits in the array to zero
	 *	@note Do not confuse this with Erase(), which doesn't change value of the bits
	 *		but sets length of the array to 0 instead.
	 */
	void Clear();

	/**
	 *	@brief sets all the bits in the array to one
	 */
	void Raise();

	/**
	 *	@brief inverts all the bits in the array
	 */
	void Invert();

	/**
	 *	@brief sets bits in the specified range to the specified value
	 *
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (non-inclusive)
	 *	@param[in] b_value is a new value of the selected bits
	 *
	 *	@note It might be more efficient to call Raise() or Clear() directly.
	 */
	inline void Set(size_t n_begin, size_t n_end, bool b_value)
	{
		if(b_value)
			Raise(n_begin, n_end);
		else
			Clear(n_begin, n_end);
	}

	/**
	 *	@brief sets values of the bits in the specified range to zero
	 *
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (non-inclusive)
	 */
	inline void Clear(size_t n_begin, size_t n_end)
	{
		if(n_begin == n_end)
			return;
		// nothing to copy (must be here to prevent ei underflowing below)

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin); // bits bo to lsb (32 - bo bits)
		index_t ei = n_Index(n_end);
		size_t eo = n_Shift(n_end); // bits msb to eo (eo bits)

		if(!eo) {
			_ASSERTE(ei > bi); // !!
			-- ei;
			eo = unit_SizeBits;
		}
		// handle fill ending at native integer boundary (caused by fact the end isn't
		// part of filled data (could change that as well, i guess))

		_NativeInt *p_dest = p_Data() + bi;
		// pointer to destination data

		if(n_end - n_begin <= unit_SizeBits) { // copy inside a single native integer (single call to op())
			if(bi == ei) {
				_NativeInt n_mask = n_Mask_Range(bo, eo);
				// calculate mask

				*p_dest &= ~n_mask;
				// write masked data to the array
			} else {
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				// calculate mask

				p_dest[0] &= ~n_mask;
				// write masked data to the array

				_NativeInt n_mask_e = n_Mask_FirstN(eo);
				// calculate mask

				p_dest[1] &= ~n_mask_e;
				// write masked data to the array
			}
		} else {
			if(bo != 0) { // unaligned copy (most probable, least favorable)
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				size_t n_size = unit_SizeBits - bo;
				// calculate mask

				*p_dest &= ~n_mask;
				++ p_dest;
				// write masked data to the array
			}

			const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
			// get the end of the data

			for(; p_dest != p_end; ++ p_dest)
				*p_dest = 0;
			// write all nulls

			if(eo != 0) {
				_NativeInt n_mask_e = n_Mask_FirstN(eo);
				// calculate mask

				*p_dest &= ~n_mask_e;
				// write masked data to the array
			}
			// write last partial unit
		}
	}

	/**
	 *	@brief sets values of the bits in the specified range to one
	 *
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (non-inclusive)
	 */
	inline void Raise(size_t n_begin, size_t n_end)
	{
		if(n_begin == n_end)
			return;
		// nothing to copy (must be here to prevent ei underflowing below)

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin); // bits bo to lsb (32 - bo bits)
		index_t ei = n_Index(n_end);
		size_t eo = n_Shift(n_end); // bits msb to eo (eo bits)

		if(!eo) {
			_ASSERTE(ei > bi); // !!
			-- ei;
			eo = unit_SizeBits;
		}
		// handle fill ending at native integer boundary (caused by fact the end isn't
		// part of filled data (could change that as well, i guess))

		_NativeInt *p_dest = p_Data() + bi;
		// pointer to destination data

		if(n_end - n_begin <= unit_SizeBits) { // copy inside a single native integer (single call to op())
			if(bi == ei) {
				_NativeInt n_mask = n_Mask_Range(bo, eo);
				// calculate mask

				*p_dest |= n_mask;
				// write masked data to the array
			} else {
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				// calculate mask

				p_dest[0] |= n_mask;
				// write masked data to the array

				_NativeInt n_mask_e = n_Mask_FirstN(eo);
				// calculate mask

				p_dest[1] |= n_mask_e;
				// write masked data to the array
			}
		} else {
			if(bo != 0) { // unaligned copy (most probable, least favorable)
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				size_t n_size = unit_SizeBits - bo;
				// calculate mask

				*p_dest |= n_mask;
				++ p_dest;
				// write masked data to the array
			}

			const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
			// get the end of the data

			for(; p_dest != p_end; ++ p_dest)
				*p_dest = _NativeInt(-1);
			// write all ones

			if(eo != 0) {
				_NativeInt n_mask_e = n_Mask_FirstN(eo);
				// calculate mask

				*p_dest |= n_mask_e;
				// write masked data to the array
			}
			// write last partial unit
		}
	}

	/**
	 *	@brief inverts values of the bits in the specified range
	 *
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (non-inclusive)
	 */
	inline void Invert(size_t n_begin, size_t n_end)
	{
		Transform(n_begin, n_end, n_Inverse); // @t_odo - is this what i meant?
	}

	/**
	 *	@brief uses function object to generate values of the selected bits
	 *
	 *	A simple generator model is used here. Generator is a function or a function object
	 *	with overloaded function operator, which generates unit_SizeBits and returns them
	 *	as a _NativeInt (where MSB is the first bit, LSB is the last one). This is more
	 *	efficient than generating the bits one by one.
	 *	The generator is called exactly ceil(n_end - n_begin / unit_SizeBits) times.
	 *
	 *	@param[in] COp is a simple generator model
	 *	@param[in] n_begin is zero-based index of the first bit to generate
	 *	@param[in] n_end is zero-based index of the last bit to generate (non-inclusive)
	 *	@param[in] op is generator instance
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class COp>
	COp Generate(size_t n_begin, size_t n_end, COp op)
	{
		if(n_begin == n_end)
			return op;
		// nothing to copy (must be here to prevent ei underflowing below)

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin); // bits bo to lsb (32 - bo bits)
		index_t ei = n_Index(n_end);
		size_t eo = n_Shift(n_end); // bits msb to eo (eo bits)

		if(!eo) {
			_ASSERTE(ei > bi); // !!
			-- ei;
			eo = unit_SizeBits;
		}
		// handle fill ending at native integer boundary (caused by fact the end isn't
		// part of filled data (could change that as well, i guess))

		_NativeInt *p_dest = p_Data() + bi;
		// pointer to destination data

		if(n_end - n_begin <= unit_SizeBits) { // copy inside a single native integer (single call to op())
			_NativeInt n_data = op(); // generate first and last few bits
			// get data to write to the array

			if(bi == ei) {
				_NativeInt n_mask = n_Mask_Range(bo, eo);
				// calculate mask

				*p_dest = (*p_dest & ~n_mask) | ((n_data >> bo) & n_mask);
				// write masked data to the array
			} else {
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				// calculate mask

				p_dest[0] = (p_dest[0] & ~n_mask) | ((n_data >> bo) & n_mask);

				_NativeInt n_mask_e = n_Mask_FirstN(eo);
				// calculate mask

				p_dest[1] = (p_dest[1] & ~n_mask_e) | ((n_data << (unit_SizeBits - bo)) & n_mask_e);
			}
		} else {
			if(bo != 0) { // unaligned copy (most probable, least favorable)
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				size_t n_size = unit_SizeBits - bo;
				// calculate mask

				_NativeInt n_data = op();
				// get data to write to the array

				*p_dest = (*p_dest & ~n_mask) | ((n_data >> bo) & n_mask);
				++ p_dest;
				// write masked data to the array

				n_data <<= unit_SizeBits - bo;
				// shift data to contain the rest of unused word

				const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
				// get end of data

				for(; p_dest != p_end; ++ p_dest) {
					_NativeInt n_more = op();
					// get more data

					_ASSERTE(!(n_data & n_mask));
					_ASSERTE(!((n_more >> bo) & ~n_mask));
					*p_dest = n_data | (n_more >> bo);
					// write

					n_data = n_more << (unit_SizeBits - bo);
					// save the rest
				}
				// write aligned data

				if(eo) {
					_NativeInt n_mask_e = n_Mask_FirstN(eo);
					// calculate mask

					if(eo > bo)
						n_data |= op() >> bo;
					// get more data, if needed

					*p_dest = (*p_dest & ~n_mask_e) | (n_data & n_mask_e);
					// write
				}
			} else { // aligned copy
				const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
				// get the end of the data

				for(; p_dest != p_end; ++ p_dest)
					*p_dest = op();
				// write aligned data

				if(eo != 0) {
					_NativeInt n_mask_e = n_Mask_FirstN(eo);
					// calculate mask

					_NativeInt n_data = op();
					// get data to write to the array

					*p_dest = (*p_dest & ~n_mask_e) | (n_data & n_mask_e);
					// write masked data to the array
				}
				// write last partial unit
			}
		}

		return op;
		// return op (may come in handy for generating more data later)
	}

	/**
	 *	@brief generates a range of bits
	 *
	 *	This works similarly to Generate(), but care is taken that the number
	 *	of generated bits doesn't exceed n_end - n_begin. For that purpose, the
	 *	CConstrainedOp must have two versions of operator (). The first version
	 *	has got no arguments and always generates unit_SizeBits bits. The second
	 *	version has got a single argument, specifying number of bits to generate.
	 *	It is guaranteed that the second version is called only once at most, and
	 *	that the number of bits it needs to generate is less than unit_SizeBits.
	 *	The returned bits are MSB-aligned.
	 *
	 *	@param[in] CConstrainedOp is constrained generator model
	 *	@param[in] n_begin is zero-based index of the first bit to generate
	 *	@param[in] n_end is zero-based index of the last bit to generate (not inclusive)
	 *	@param[in] op is constrained generator instance
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CConstrainedOp>
	CConstrainedOp Generate_Constrained(size_t n_begin, size_t n_end, CConstrainedOp op)
	{
		if(n_begin == n_end)
			return op;
		// nothing to copy (must be here to prevent ei underflowing below)

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin); // bits bo to lsb (32 - bo bits)
		index_t ei = n_Index(n_end);
		size_t eo = n_Shift(n_end); // bits msb to eo (eo bits)

		if(!eo) {
			_ASSERTE(ei > bi); // !!
			-- ei;
			eo = unit_SizeBits;
		}
		// handle fill ending at native integer boundary (caused by fact the end isn't
		// part of filled data (could change that as well, i guess))

		_NativeInt *p_dest = p_Data() + bi;
		// pointer to destination data

		if(n_end - n_begin <= unit_SizeBits) { // copy inside a single native integer (single call to op())
			_NativeInt n_data = (n_end - n_begin < unit_SizeBits)? op(n_end - n_begin) : op(); // generate first and last few bits
			// get data to write to the array

			if(bi == ei) {
				_NativeInt n_mask = n_Mask_Range(bo, eo);
				// calculate mask

				*p_dest = (*p_dest & ~n_mask) | ((n_data >> bo) & n_mask);
				// write masked data to the array
			} else {
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				// calculate mask

				p_dest[0] = (p_dest[0] & ~n_mask) | ((n_data >> bo) & n_mask);

				_NativeInt n_mask_e = n_Mask_FirstN(eo);
				// calculate mask

				p_dest[1] = (p_dest[1] & ~n_mask_e) | ((n_data << (unit_SizeBits - bo)) & n_mask_e);
			}
		} else {
			if(bo != 0) { // unaligned copy (most probable, least favorable)
				_NativeInt n_mask = n_Mask_Range(bo, unit_SizeBits);
				size_t n_size = unit_SizeBits - bo;
				// calculate mask

				_NativeInt n_data = op();
				// get data to write to the array

				*p_dest = (*p_dest & ~n_mask) | ((n_data >> bo) & n_mask);
				++ p_dest;
				// write masked data to the array

				n_data <<= unit_SizeBits - bo;
				// shift data to contain the rest of unused word

				if(eo > bo) {
					const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
					// get end of data

					for(; p_dest != p_end; ++ p_dest) {
						_NativeInt n_more = op();
						// get more data

						_ASSERTE(!(n_data & n_mask));
						_ASSERTE(!((n_more >> bo) & ~n_mask));
						*p_dest = n_data | (n_more >> bo);
						// write

						n_data = n_more << (unit_SizeBits - bo);
						// save the rest
					}
					// write aligned data

					_NativeInt n_mask_e = n_Mask_FirstN(eo);
					// calculate mask

					_NativeInt n_more = op(eo - bo);
					// get more data

					_ASSERTE(!(n_data & n_mask));
					_ASSERTE(!((n_more >> bo) & ~n_mask));

					*p_dest = (*p_dest & ~n_mask_e) | ((n_data | (n_more >> bo)) & n_mask_e);
					// write
				} else /*if(eo <= bo)*/ {
					const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
					// get end of data

					if(eo < bo) {
						-- p_end;
						// last loop iteration will be different

						for(; p_dest != p_end; ++ p_dest) {
							_NativeInt n_more = op();
							// get more data

							_ASSERTE(!(n_data & n_mask));
							_ASSERTE(!((n_more >> bo) & ~n_mask));
							*p_dest = n_data | (n_more >> bo);
							// write

							n_data = n_more << (unit_SizeBits - bo);
							// save the rest
						}
						// write aligned data

						{
							_NativeInt n_more = op(unit_SizeBits - bo + eo);
							// get more data (less than unit_SizeBits)

							_ASSERTE(!(n_data & n_mask));
							_ASSERTE(!((n_more >> bo) & ~n_mask));
							*p_dest = n_data | (n_more >> bo);
							++ p_dest;
							// write

							n_data = n_more << (unit_SizeBits - bo);
							// save the rest
						}
						// last iteration gets (unit_SizeBits - bo - eo) bits of data
					} else /*if(eo == bo)*/ {
						for(; p_dest != p_end; ++ p_dest) {
							_NativeInt n_more = op();
							// get more data

							_ASSERTE(!(n_data & n_mask));
							_ASSERTE(!((n_more >> bo) & ~n_mask));
							*p_dest = n_data | (n_more >> bo);
							// write

							n_data = n_more << (unit_SizeBits - bo);
							// save the rest
						}
						// write aligned data
					}

					if(eo) { // we have enough data in n_data
						_NativeInt n_mask_e = n_Mask_FirstN(eo);
						// calculate mask

						*p_dest = (*p_dest & ~n_mask_e) | (n_data & n_mask_e);
						// write
					}
				}
			} else { // aligned copy
				const _NativeInt *p_end = p_Data() + ei; // ((eo != 0)? ei : ei + 1); // eo is never zero
				// get end of data

				for(; p_dest != p_end; ++ p_dest)
					*p_dest = op();
				// write aligned data

				if(eo != 0) {
					_NativeInt n_mask_e = n_Mask_FirstN(eo);
					// calculate mask

					_NativeInt n_data = (eo < unit_SizeBits)? op(eo) : op();
					// get data to write to the array

					*p_dest = (*p_dest & ~n_mask_e) | (n_data & n_mask_e);
					// write masked data to the array
				}
				// write last partial unit
			}
		}

		return op;
		// return op (may come in handy for generating more data later)
	}

	/**
	 *	@brief iterates over selected range of the bits
	 *
	 *	This iterates over the selected range of bits, while passing
	 *	groups of unit_SizeBits, stored as _NativeInt to the unary function
	 *	(where MSB is the first bit, LSB is the last one). In case n_end - n_begin
	 *	is not integer multiple of unit_SizeBits, the last call to the unary
	 *	function contains the remainder bits, MSB aligned, while the rest of the
	 *	bits in it's _NativeInt argument is set to zeros.
	 *
	 *	@param[in] CUnaryOp is unary function model
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (not inclusive)
	 *	@param[in] op is an unary function
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CUnaryOp>
	CUnaryOp ForEach(size_t n_begin, size_t n_end, CUnaryOp op) const
	{
		if(n_begin == n_end)
			return op;
		// nothing to copy (must be here to prevent ei underflowing below)

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin);
		// calculate starting index and bit offet

		const _NativeInt *p_src = p_Data() + bi;
		// pointer to source data

		if(!bo) {
#ifdef _DEBUG
			CAlignedReader reader(p_src, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CAlignedReader reader(p_src);
#endif // _DEBUG
			// use aligned reader

			for(size_t n_whole_steps = (n_end - n_begin) / unit_SizeBits; n_whole_steps; -- n_whole_steps)
				op(reader());
			// process the whole units

			size_t n_remainder = (n_end - n_begin) % unit_SizeBits;
			if(n_remainder)
				op(reader(n_remainder));
			// process the remainder, if any
		} else {
#ifdef _DEBUG
			CReader reader(p_src, bo, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CReader reader(p_src, bo);
#endif // _DEBUG
			// use unaligned reader

			for(size_t n_whole_steps = (n_end - n_begin) / unit_SizeBits; n_whole_steps; -- n_whole_steps)
				op(reader());
			// process the whole units

			size_t n_remainder = (n_end - n_begin) % unit_SizeBits;
			if(n_remainder)
				op(reader(n_remainder));
			// process the remainder, if any
		}

		return op;
	}

	/**
	 *	@brief iterates over selected range of the bits
	 *
	 *	This iterates over the selected range of bits, while passing
	 *	groups of unit_SizeBits, stored as _NativeInt to the function object
	 *	(where MSB is the first bit, LSB is the last one). In case n_end - n_begin
	 *	is not integer multiple of unit_SizeBits, the last call to the function
	 *	object is binary. The first argument contains the remainder bits, MSB aligned, while the rest of the
	 *	bits in it's _NativeInt argument is set to zeros.
	 *
	 *	@param[in] CUnaryOp is unary function model
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (not inclusive)
	 *	@param[in] op is an unary function
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CConstrainedUnaryOp>
	CConstrainedUnaryOp ForEach_Constrained(size_t n_begin, size_t n_end, CConstrainedUnaryOp op) const
	{
		if(n_begin == n_end)
			return op;
		// nothing to copy (must be here to prevent ei underflowing below)

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin);
		// calculate starting index and bit offet

		const _NativeInt *p_src = p_Data() + bi;
		// pointer to source data

		if(!bo) {
#ifdef _DEBUG
			CAlignedReader reader(p_src, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CAlignedReader reader(p_src);
#endif // _DEBUG
			// use aligned reader

			for(size_t n_whole_steps = (n_end - n_begin) / unit_SizeBits; n_whole_steps; -- n_whole_steps)
				op(reader());
			// process the whole units

			size_t n_remainder = (n_end - n_begin) % unit_SizeBits;
			if(n_remainder)
				op(reader(n_remainder), n_remainder);
			// process the remainder, if any
		} else {
#ifdef _DEBUG
			CReader reader(p_src, bo, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CReader reader(p_src, bo);
#endif // _DEBUG
			// use unaligned reader

			for(size_t n_whole_steps = (n_end - n_begin) / unit_SizeBits; n_whole_steps; -- n_whole_steps)
				op(reader());
			// process the whole units

			size_t n_remainder = (n_end - n_begin) % unit_SizeBits;
			if(n_remainder)
				op(reader(n_remainder), n_remainder);
			// process the remainder, if any
		}

		return op;
	}

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief transforms values in the selected range
	 *
	 *	This iterates over the selected range of bits, while passing
	 *	groups of unit_SizeBits, stored as _NativeInt to the unary function
	 *	(where MSB is the first bit, LSB is the last one). In case n_end - n_begin
	 *	is not integer multiple of unit_SizeBits, the last call to the unary
	 *	function contains the remainder bits, MSB aligned, while the rest of the
	 *	bits in it's _NativeInt argument is set to zeros.
	 *
	 *	The unary function, in turn, returns transformed bits to be written back
	 *	to the bit array. The last unaligned group of bits doesn't need to be masked.
	 *
	 *	@param[in] CUnaryOp is unary function model
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (not inclusive)
	 *	@param[in] op is an unary function
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CUnaryOp>
	inline CUnaryOp Transform(size_t n_begin, size_t n_end, CUnaryOp op)
	{
		return Transform(n_begin, n_end, n_begin, *this, op);
	}

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief transforms values in the selected range
	 *
	 *	This iterates over the selected range of bits, while passing
	 *	groups of unit_SizeBits, stored as _NativeInt to the unary function
	 *	(where MSB is the first bit, LSB is the last one). In case n_end - n_begin
	 *	is not integer multiple of unit_SizeBits, the last call to the unary
	 *	function contains the remainder bits, MSB aligned, while the rest of the
	 *	bits in it's _NativeInt argument is set to zeros.
	 *
	 *	The unary function, in turn, returns transformed bits to be written
	 *	to the destination array. The last unaligned group of bits doesn't
	 *	need to be masked.
	 *
	 *	The destination array needs to be sufficiently large, so the [n_begin_dest,
	 *	n_begin_dest + n_end - n_begin) is a valid range. In case the destination
	 *	array is reference to this, n_begin_dest must not lie within this interval,
	 *	with the exception of n_begin.
	 *
	 *	@param[in] CUnaryOp is unary function model
	 *	@param[in] n_begin is zero-based index of the first bit
	 *	@param[in] n_end is zero-based index of the last bit (not inclusive)
	 *	@param[in] n_begin_dest is zero-based index of the first destination bit
	 *	@param[out] r_dest_array is the destination array
	 *	@param[in] op is an unary function
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CUnaryOp>
	CUnaryOp Transform(size_t n_begin, size_t n_end, size_t n_begin_dest, CBitArray &r_dest_array, CUnaryOp op) const
	{
		_ASSERTE(&r_dest_array != this || n_begin_dest <= n_begin || n_begin_dest >= n_end);
		// either modify different array, modify completely different range or modify source range in this array

		if(n_begin == n_end)
			return op;
		// nothing to copy (must be here to prevent ei underflowing below)

		size_t n_end_dest = n_begin_dest + n_end - n_begin;
		// calculate destination end

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin);
		// calculate starting index and bit offet

		const _NativeInt *p_src = p_Data() + bi;
		// pointer to source data

		if(!bo) {
#ifdef _DEBUG
			CAlignedReader reader(p_src, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CAlignedReader reader(p_src);
#endif // _DEBUG
			// use aligned reader

			return r_dest_array.Generate(n_begin_dest, n_end_dest, CTransform<CUnaryOp, CAlignedReader>(op, reader));
		} else {
#ifdef _DEBUG
			CReader reader(p_src, bo, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CReader reader(p_src, bo);
#endif // _DEBUG
			// use unaligned reader

			return r_dest_array.Generate_Constrained(n_begin_dest, n_end_dest, CTransform<CUnaryOp, CReader>(op, reader));
		}
	}

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief transforms values in the selected ranges
	 *
	 *	This iterates over the two selected ranges of bits, while passing
	 *	groups of unit_SizeBits from both ranges, stored as _NativeInt to the binary function
	 *	(where MSB is the first bit, LSB is the last one). In case n_end - n_begin
	 *	is not integer multiple of unit_SizeBits, the last call to the binary
	 *	function contains the remainder bits, MSB aligned, while the rest of the
	 *	bits in it's _NativeInt argument is set to zeros.
	 *
	 *	The binary function, in turn, returns transformed bits to be written back
	 *	to the second array. The last unaligned group of bits doesn't need to be masked.
	 *
	 *	The destination array needs to be sufficiently large, so the [n_begin_dest,
	 *	n_begin_dest + n_end - n_begin) is a valid range.
	 *
	 *	@param[in] CBinaryOp is binary function model
	 *	@param[in] n_begin is zero-based index of the first bit in this array
	 *	@param[in] n_end is zero-based index of the last bit in this array (not inclusive)
	 *	@param[in] n_begin_dest is zero-based index of the first bit in the second array
	 *	@param[in,out] r_dest_array is reference to the second array
	 *	@param[in] op is a binary function
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CBinaryOp>
	inline CBinaryOp Transform2(size_t n_begin, size_t n_end,
		size_t n_begin_dest, CBitArray &r_dest_array, CBinaryOp op) const
	{
		_ASSERTE(&r_dest_array != this); // use unary version instead, this would be inefficient
		return Transform2(n_begin, n_end, n_begin_dest, r_dest_array, n_begin_dest, r_dest_array, op);
	}

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief transforms values in the selected ranges
	 *
	 *	This iterates over the two selected ranges of bits, while passing
	 *	groups of unit_SizeBits from both ranges, stored as _NativeInt to the binary function
	 *	(where MSB is the first bit, LSB is the last one). The second array needs
	 *	to be sufficiently large, so the [n_begin2, n_begin2 + n_end - n_begin)
	 *	is a valid range. In case n_end - n_begin is not integer multiple of unit_SizeBits,
	 *	the last call to the binary function contains the remainder bits, MSB aligned, while
	 *	the rest of the bits in it's _NativeInt argument is set to zeros.
	 *
	 *	The binary function, in turn, returns transformed bits to be written
	 *	to the third range in the third array. The last unaligned group of bits doesn't need to be masked.
	 *
	 *	The destination array needs to be sufficiently large, so the [n_begin_dest,
	 *	n_begin_dest + n_end - n_begin) is a valid range.
	 *
	 *	In case the destination array is reference to this, n_begin_dest must not lie within
	 *	the [n_begin, n_end) interval, with the exception of n_begin. Similarly in case the
	 *	destination array is reference to r_array2, n_begin_dest must not lie within
	 *	the [n_begin2, n_begin2 + n_end - n_begin) interval, with the exception of n_begin2.
	 *
	 *	@param[in] CBinaryOp is binary function model
	 *	@param[in] n_begin is zero-based index of the first bit in this array
	 *	@param[in] n_end is zero-based index of the last bit in this array (not inclusive)
	 *	@param[in] n_begin2 is zero-based index of the first bit in the second array
	 *	@param[in] r_array2 is reference to the second array
	 *	@param[in] n_begin_dest is zero-based index of the first bit in the third array
	 *	@param[out] r_dest_array is reference to the third array
	 *	@param[in] op is a binary function
	 *
	 *	@return Returns value of op, after generating all the bits.
	 */
	template <class CBinaryOp>
	CBinaryOp Transform2(size_t n_begin, size_t n_end, size_t n_begin2, const CBitArray &r_array2,
		size_t n_begin_dest, CBitArray &r_dest_array, CBinaryOp op) const
	{
		_ASSERTE(&r_dest_array != this || n_begin_dest <= n_begin || n_begin_dest >= end); // either modify different array, modify completely different range or modify source range in this array
		_ASSERTE(&r_dest_array != &r_array2 || n_begin_dest <= n_begin2 ||
			n_begin_dest >= (n_begin2 + end - n_begin)); // either modify different array, modify completely different range or modify source range in this array
		// makes sure the processed ranges do not overlap

		if(n_begin == n_end)
			return op;
		// nothing to copy (must be here to prevent ei underflowing below)

		size_t n_end_dest = n_begin_dest + n_end - n_begin;
		// calculate destination end

		index_t bi = n_Index(n_begin);
		size_t bo = n_Shift(n_begin);
		// calculate starting index and bit offet

		const _NativeInt *p_src = p_Data() + bi;
		// pointer to source data

		index_t bi2 = n_Index(n_begin2);
		size_t bo2 = n_Shift(n_begin2);
		// calculate starting index and bit offet for the second array

		const _NativeInt *p_src2 = r_array2.p_Data() + bi2;
		// pointer to source data in the second array

		if(!bo) {
#ifdef _DEBUG
			CAlignedReader reader(p_src, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CAlignedReader reader(p_src);
#endif // _DEBUG
			// use aligned reader

			if(!bo2) {
#ifdef _DEBUG
				size_t n_end2 = n_begin2 + n_end - n_begin;
				CAlignedReader reader2(p_src2, r_array2.p_Data() + ((n_Shift(n_end2))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
				CAlignedReader reader2(p_src2);
#endif // _DEBUG
				// use aligned reader

				return r_dest_array.Generate(n_begin_dest, n_end_dest,
					CTransform2<CBinaryOp, CAlignedReader, CAlignedReader>(op, reader, reader2));
			} else {
#ifdef _DEBUG
				size_t n_end2 = n_begin2 + n_end - n_begin;
				CReader reader2(p_src2, bo2, r_array2.p_Data() + ((n_Shift(n_end2))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
				CReader reader2(p_src2, bo2);
#endif // _DEBUG
				// use unaligned reader

				return r_dest_array.Generate_Constrained(n_begin_dest, n_end_dest,
					CTransform2<CBinaryOp, CAlignedReader, CReader>(op, reader, reader2));
			}
		} else {
#ifdef _DEBUG
			CReader reader(p_src, bo, p_Data() + ((n_Shift(n_end))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
			CReader reader(p_src, bo);
#endif // _DEBUG
			// use unaligned reader

			if(!bo2) {
#ifdef _DEBUG
				size_t n_end2 = n_begin2 + n_end - n_begin;
				CAlignedReader reader2(p_src2, r_array2.p_Data() + ((n_Shift(n_end2))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
				CAlignedReader reader2(p_src2);
#endif // _DEBUG
				// use aligned reader

				return r_dest_array.Generate_Constrained(n_begin_dest, n_end_dest,
					CTransform2<CBinaryOp, CReader, CAlignedReader>(op, reader, reader2));
			} else {
#ifdef _DEBUG
				size_t n_end2 = n_begin2 + n_end - n_begin;
				CReader reader2(p_src2, bo2, r_array2.p_Data() + ((n_Shift(n_end2))? n_Index(n_end) + 1 : n_Index(n_end)));
#else // _DEBUG
				CReader reader2(p_src2, bo2);
#endif // _DEBUG
				// use unaligned reader

				return r_dest_array.Generate_Constrained(n_begin_dest, n_end_dest,
					CTransform2<CBinaryOp, CReader, CReader>(op, reader, reader2));
			}
		}

		return op;
	}

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief 
	 */
	void Copy(size_t n_begin, size_t n_begin_src, size_t n_end_src, const CBitArray &r_array);

	/**
	 *	@todo This function hasn't been implemented yet. Do not use. Use the below functions with caution as some of them call this function.
	 *	@brief 
	 */
	void Inplace_Copy(size_t n_begin, size_t n_begin_src, size_t n_end_src);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief 
	 */
	bool Uninitialized_Gap(size_t n_begin, size_t n_size);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief 
	 */
	bool Insert(size_t n_begin, size_t n_size, bool b_value);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief 
	 */
	bool Insert(size_t n_begin, size_t n_begin2, size_t n_end2, const CBitArray &r_array);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief 
	 */
	void Erase(size_t n_begin, size_t n_end);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief 
	 */
	void Erase(size_t n_index);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief
	 *	@note This is rather ineffective if used frequently.
	 */
	bool PushBack(bool b_value);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief shifts the whole array left (direction of decreasing indices) by n_shift bits;
	 *		bits, shifted-out from array are dropped; bits at the end of array are filled with zeros
	 */
	void ShiftLeft(size_t n_shift);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief shifts the whole array right (direction of increasing indices) by n_shift bits;
	 *		bits, shifted-out from array are dropped; bits at the beginning of array are filled with zeros
	 */
	void ShiftRight(size_t n_shift);

	/**
	 *	@todo This function hasn't been tested yet. Use with caution.
	 *	@brief array concatenation
	 *		- return true in case there was enough memory and arrays
	 *		  were concatenated, otherwise false
	 */
	bool operator +=(const CBitArray &r_array);

	/**
	 *	@brief compares all bits in the array to a boolean value
	 *
	 *	@param[in] b_value is the value to compare to
	 *
	 *	@return Returns true in case all bits in array are equal to b_value, otherwise returns false.
	 *	@return Always returns true for an empty array.
	 */
	bool operator ==(bool b_value) const;

	/**
	 *	@brief array equality operator
	 *	@param[in] r_array is the other array to compare to
	 *	@return Returns true in case array lengths and contents are equal, otherwise returns false.
	 */
	bool operator ==(const CBitArray &r_array) const;

	/**
	 *	@brief less-than operator for bit arrays (lexicographic comparison)
	 *	@param[in] r_array is the other array to compare to
	 *	@return Returns true if the first position at which there is a different
	 *		bit in the two arrays contains null in this array. In case all the
	 *		bits at positions up to the end of the shorter array are equal,
	 *		returns true if length of this array is smaller than the length of r_array.
	 *		Otherwise returns false.
	 */
	bool operator <(const CBitArray &r_array) const;

	/**
	 *	@brief greater-than operator for bit arrays (lexicographic comparison)
	 *	@param[in] r_array is the other array to compare to
	 *	@return Returns true if the first position at which there is a different
	 *		bit in the two arrays contains one in this array. In case all the
	 *		bits at positions up to the end of the shorter array are equal,
	 *		returns true if length of this array is greater than the length of r_array.
	 *		Otherwise returns false.
	 */
	inline bool operator >(const CBitArray &r_array) const
	{
		return r_array < *this;
	}

	/**
	 *	@brief greater-than or equal operator for bit arrays (lexicographic comparison)
	 *	@param[in] r_array is the other array to compare to
	 *	@return Returns true if the first position at which there is a different
	 *		bit in the two arrays does not contain null in this array. In case all the
	 *		bits at positions up to the end of the shorter array are equal,
	 *		returns true if length of this array is not smaller than the length of r_array.
	 *		Otherwise returns false.
	 */
	inline bool operator >=(const CBitArray &r_array) const
	{
		return !(r_array > *this);
	}

	/**
	 *	@brief less-than or equal operator for bit arrays (lexicographic comparison)
	 *	@param[in] r_array is the other array to compare to
	 *	@return Returns true if the first position at which there is a different
	 *		bit in the two arrays does not contain one in this array. In case all the
	 *		bits at positions up to the end of the shorter array are equal,
	 *		returns true if length of this array is not grater than the length of r_array.
	 *		Otherwise returns false.
	 */
	inline bool operator <=(const CBitArray &r_array) const
	{
		return !(r_array < *this);
	}

	/**
	 *	@brief array inequality operator
	 *	@param[in] r_array is the other array to compare to
	 *	@return Returns false in case array lengths and contents are equal, otherwise returns true.
	 */
	inline bool operator !=(const CBitArray &r_array) const
	{
		return !(r_array == *this);
	}

	/**
	 *	@brief compares all bits in the array to a boolean value
	 *
	 *	@param[in] b_value is the value to compare to
	 *
	 *	@return Returns true in case some of bits in array are not equal to b_value, otherwise returns false.
	 *	@return Always returns false for an empty array.
	 */
	inline bool operator !=(bool b_value) const
	{
		return !(*this == b_value);
	}

	/**
	 *	@brief gets internal buffer size
	 *	@return Returns size of the internal data buffer, in bytes.
	 */
	inline size_t n_Buffer_Size() const
	{
		return n_BufferSize_Units(m_n_used_bits) * unit_SizeBytes;
	}

	/**
	 *	@brief gets internal buffer pointer
	 *	@return Returns const pointer to the data buffer.
	 *	@note The buffer is n_Buffer_Size() bytes long.
	 */
	inline const uint8_t *p_Buffer() const
	{
		return TBuffer::p_Data();
	}

	/**
	 *	@brief gets the internal buffer pointer
	 *
	 *	@return Returns pointer to the data buffer.
	 *
	 *	@note The buffer is n_Buffer_Size() bytes long.
	 *	@note This enables writing to the internal buffer. Do not free or realloc the buffer,
	 *		pay attention not to cross array boundaries.
	 */
	inline uint8_t *p_Buffer()
	{
		return TBuffer::p_Data();
	}

	/**
	 *	@brief copies the internal data buffer to the specified location
	 *
	 *	@param[in] n_buffer_size is size of the destination buffer, in bytes (in case this size is shorter,
	 *		only the part of the internal data buffer is copied; in case it's larger, the end of the destination
	 *		buffer is kept intact)
	 *	@param[out] p_dest is the destination pointer for the data
	 */
	void GetBuffer(size_t n_buffer_size, void *p_dest) const;

	/**
	 *	@brief fills the internal data buffer from the specified location
	 *
	 *	@param[in] n_buffer_size is size of the source buffer, in bytes (in case this size is larger,
	 *		only the part of the source buffer is copied; in case it's smaller, the end of the internal data
	 *		buffer is kept intact)
	 *	@param[in] p_src is the source pointer containing the data
	 */
	void SetBuffer(size_t n_buffer_size, const void *p_src);

	/**
	 *	@brief gets maximal array size
	 *	@return Returns the greatest size possible, in bits, any bit array
	 *		can be allocated to regardless of the amount of available memory.
	 */
	static size_t n_Max_Size();

	/**
	 *	@brief gets mask for first n bits
	 *	@param[in] n is number of bits set in the mask (must be greater than zero and smaller or equal to unit_SizeBits)
	 *	@return Returns _NativeInt mask with the n most significant bits set.
	 */
	static inline _NativeInt n_Mask_FirstN(size_t n)
	{
		_ASSERTE(n > 0 && n <= unit_SizeBits);
		_ASSERTE(!n || n_Mask(n - 1) == ((_NativeInt(1) << index_Mask) >> (n - 1))); // in case n > 0, this should assert
		return ~(((_NativeInt(1) << index_Mask) >> (n - 1)) - 1);
	}

	/**
	 *	@brief gets mask for selected range bits
	 *	@param[in] b is zero-based index of the first bit set in the mask (must be greater or equal to zero and smaller than unit_SizeBits)
	 *	@param[in] e is zero-based index of the last bit (not inclusive) set in the mask (must be greater than b and smaller or equal to unit_SizeBits)
	 *	@return Returns _NativeInt mask with the bits [b, e) set (counting from the most significant bit).
	 */
	static inline _NativeInt n_Mask_Range(size_t b, size_t e)
	{
		_ASSERTE(b >= 0 && b < unit_SizeBits);
		_ASSERTE(e > 0 && e <= unit_SizeBits);
		_ASSERTE(b < e);
		return n_Mask_FirstN(e - b) >> b;
	}

protected:
	static inline _NativeInt n_Identity(_NativeInt x);

	/**
	 *	@brief _NativeInt bit-wise inverse function
	 */
	static inline _NativeInt n_Inverse(_NativeInt x)
	{
		return ~x;
	}

	/**
	 *	@return Returns pointer to data buffer in native integer units.
	 */
	inline _NativeInt *p_Data()
	{
		return (_NativeInt*)TBuffer::p_Data();
	}

	/**
	 *	@return Returns const pointer to data buffer in native integer units.
	 */
	inline const _NativeInt *p_Data() const
	{
		return (const _NativeInt*)TBuffer::p_Data();
	}

	/**
	 *	@return Returns index of native integer, containing bit with zero-based index n_index.
	 */
	static inline index_t n_Index(index_t n_index)
	{
		return n_index >> index_Shift;
	}

	/**
	 *	@return Returns bit-shift inside native integer to get bit with zero-based index n_index.
	 */
	static inline size_t n_Shift(index_t n_index)
	{
		return n_index & index_Mask;
	}

	/**
	 *	@return Returns bit-shift inside native integer to get bit with zero-based index n_index.
	 */
	static inline _NativeInt n_Mask(index_t n_index)
	{
		return (_NativeInt(1) << index_Mask) >> n_Shift(n_index); // store bits with MSB being the first one
	}

	/**
	 *	@return Returns minimal size of buffer, in native integers to accomodate n_bits_size bits.
	 */
	static inline size_t n_BufferSize_Units(size_t n_bits_size)
	{
		return (n_bits_size + unit_SizeBits - 1) / unit_SizeBits;
	}

	inline size_t n_WholeUnitsUsed() const;
	static inline size_t n_WholeUnitsUsed(size_t n_bits_used);
	inline bool b_LastUnitUsedPartialy() const;
	static inline bool b_LastUnitUsedPartialy(size_t n_bits_used);
	inline _NativeInt n_LastUnitMask() const;
	static inline _NativeInt n_LastUnitMask(size_t n_bits_used);
};

/**
 *	@brief swaps values of two bit references (not the references themselves)
 *
 *	Overrides global ::swap to swap bits two CBitArray::CBitReference-s
 *	point to, instead of swapping their contents.
 *
 *	@param[in] a is the first bit reference
 *	@param[in] b is the second bit reference
 */
inline void swap(CBitArray::CBitReference a, CBitArray::CBitReference b)
{
	bool b_temp = a;
	a = b;
	b = b_temp;
}

namespace std {

/**
 *	@brief swaps values of two bit references (not the references themselves)
 *
 *	Overrides std::swap to swap bits two CBitArray::CBitReference-s
 *	point to, instead of swapping their contents.
 *
 *	@param[in] a is the first bit reference
 *	@param[in] b is the second bit reference
 */
inline void swap(CBitArray::CBitReference a, CBitArray::CBitReference b)
{
	bool b_temp = a;
	a = b;
	b = b_temp;
}

};

#endif // !__BIT_ARRAY_INCLUDED
