/*
								+----------------------------------+
								|                                  |
								|  *** Basic compression algs ***  |
								|                                  |
								|   Copyright  -tHE SWINe- 2008   |
								|                                  |
								|            Compress.h            |
								|                                  |
								+----------------------------------+
*/

#pragma once
#ifndef __SIMPLE_COMPRESSION_INCLUDED
#define __SIMPLE_COMPRESSION_INCLUDED

/**
 *	@file Compress.h
 *	@author -tHE SWINe-
 *	@date 2008
 *	@brief Simple experimental data compression framework, focused on Burrows-Wheeler methods.
 *
 *	@date 2007-02-25
 *
 *	this is first beta version if the file. todo - rewrite TBuffer so it can realloc
 *	itself in a way std::vector can and try to avoid any (re)allocations while (un)packing
 *
 *	@date 2008-03-13
 *
 *	TBuffer was rewritten as requested, fixed some signed / unsigned mismatches for gcc
 *
 *	@date 2008-11-20
 *
 *	TBuffer unit is no longer unsigned char, but uint8_t instead, this should avoid
 *	incompatibility with some extended character encoding in newer versions of visual studio
 *
 *	@date 2009-05-23
 *
 *	removed all instances of std::vector::reserve and replaced them by stl_ut::Reserve_*
 *
 *	@date 2009-10-08
 *
 *	slightly improved CHuffmanCodec, fixed bug in cannonical Huffman code generation for
 *	trees where there are no symbols of length n, but there are both shorter and longer
 *	symbols (codes got shifted too much, got too long, had to be regenerated). this was
 *	hurting compression and so it had to be fixed, but the bug was also in decomression
 *	code, so this sadly breaks backward compatibility.
 *
 *	@date 2009-10-11
 *
 *	replaced stl container ::resize() by stl_ut::Resize_*() to avoid unhandled
 *	std::bad_alloc
 *
 *	optimized CBurrowsWheelerTransform::CWrapMemCmp() wrap-arround memory comparator by
 *	calculating lengths of blocks that do not wrap and comparing them in shorter loops
 *
 *	added __BWT_ENABLE_THREADED_ENCODE macro
 *
 *	@date 2009-10-20
 *
 *	fixed some warnings when compiling under VC 2005, implemented "Security
 *	Enhancements in the CRT" for VC 2008. compare against MyProjects_2009-10-19_
 *
 *	@date 2012-06-19
 *
 *	Moved multiple inclusion guard before file documentation comment.
 *
 *	@date 2012-06-30
 *
 *	Changed the function for symbol lookup in CHuffmanTree to use a predicate instead
 *	of defining yet another comparison function for comparing symbols with frequencies.
 *
 *	@date 2014-09-12
 *
 *	Moved definitions of inline and template functions to Compress.inl.
 *	Changed CModifiedRLECodec to implement bit-wise RLE-EXP.
 *	Implemented CInversionFrequenciesCodec::ModifiedEncode() and ModifiedDecode(),
 *	attaining reasonable compression ratios (it shares bulk of code with
 *	CInversionFrequenciesCodec::Encode() and Decode(), thus it resides in the same class).
 *	Added reusable bit encoding routines in CBitEncoder / CBitDecoder.
 *	Added simple integer encoding routines in CEmitInt / CDecodeInt and
 *	CEmitVarLength / CDecodeVarLength.
 *	Added CCRC32CheckCodec, a simple CRC-32 based integrity check.
 *
 */

#include "Buffer.h"
#include "MinMax.h"

/**
 *	@def __BWT_ENABLE_THREADED_ENCODE
 *
 *	@brief enables CBurrowsWheelerTransform::ThreadedEncode()
 *
 *	This enables multi-thread implementation of Burrows-Wheeler transform. While
 *		it may be faster than single-threaded implementation, it's far from perfect
 *		(threads are not loaded equally). Therefore, for high-performance
 *		implementations, paralelism should be achieved another way.
 */
//#define __BWT_ENABLE_THREADED_ENCODE

/**
 *	@brief simple BWT encoder / decoder
 *
 *	Simple Burrows-Wheeler transform implementation. Uses indices instead
 *		of string copies to minimize memory usage. Also uses somewhat optimized
 *		version of memcpy, only available under windows (it is written in MASM).
 *
 *	@todo Create GAS port of assembly part, so this would be same fast in linux.
 */
class CBurrowsWheelerTransform {
private:
	class CWrapMemCmp;
	class CIota;
	class CSorter;
	class CMerger;

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_dest);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note This doesn't work (is going to return false) with empty input buffer.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest);

#ifdef __BWT_ENABLE_THREADED_ENCODE

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest, works in parallel. While
	 *		this may be faster than single-threaded implementation, it's far from
	 *		perfect (threads are not loaded equally). Therefore, for high-performance
	 *		implementations, paralelism should be achieved another way.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *	@param[in] n_thread_num is number of worker threads (must be power of two).
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note Doesn't work (is going to return false) with empty input buffer.
	 *	@note This only gets compiled if __BWT_ENABLE_THREADED_ENCODE macro
	 *		is defined (not by default).
	 */
	static bool ThreadedEncode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_thread_num);

#endif // __BWT_ENABLE_THREADED_ENCODE
};

/**
 *	@brief basic move to front transformation implementation
 *
 *	Implements MTF as originally proposed, and it's variant MTF-1.
 */
class CMoveToFrontTransform {
public:
	/**
	 *	@brief MTF algorithm names
	 */
	enum {
		algo_MTF, /**< original MTF */
		algo_MTF_1 /**< MTF-1 */
	};

	/**
	 *	@brief in-place decoding function
	 *
	 *	Decodes data in r_t_buffer.
	 *
	 *	@param[in,out] r_t_buffer is both source and destination data buffer
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note Using different algorithm than the one used when encoding yields
	 *		different results.
	 */
	static void Decode(TBuffer &r_t_buffer, int n_algorithm = algo_MTF);

	/**
	 *	@brief in-place encoding function
	 *
	 *	Encodes data in r_t_buffer.
	 *
	 *	@param[in,out] r_t_buffer is both source and destination data buffer
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static void Encode(TBuffer &r_t_buffer, int n_algorithm = algo_MTF);

	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note Using different algorithm than the one used when encoding yields
	 *		different results.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm = algo_MTF);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *	@param[in] n_algorithm is MTF algorithm, one of algo_MTF, algo_MTF_1
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm = algo_MTF);

private:
	static void _Encode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm);
	static void _Decode(const TBuffer &r_t_src, TBuffer &r_t_dest, int n_algorithm);
};

/**
 *	@brief simple run length coder
 *
 *	Implementation of RLE, optimized for packing MTF outputs (compression flag bit is LSB so,
 *		in theory, symbols with lower values are generated, in hope not to disturb symbol
 *		probabilities after MTF too much). It actually works with the Callgary corpus.
 */
class CRunLengthCodec {
public:
	/**
	  *	@brief RLE configuration enums
	  */
	enum {
		min_RunLength = 3 /**< @brief minimal length setting */
	};

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_src, TBuffer &r_t_dest);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest);
};

/**
 *	@brief modified dual-stream run length coder
 *
 *	Simple RLE, optimized for packing IF outputs (run lengths are stored in a second buffer).
 *		This implementation is similar to the RLE-EXP algorithm.
 */
class CModifiedRLECodec {
public:
	/**
	 *	@brief configuration stored as enum
	 */
	enum {
		min_RunLength = 3, /**< @brief minimal run length to be encoded as compressed data */
		expCode_BitGranularity = 1, /**< @brief granularity of encoded lenghts, in bits */
		expCode_MinWidth = 2 /**< @brief minimal run length encode size, in granules */
	};

protected:
	template <const int _n_min_run_length = min_RunLength,
		const int _n_bit_granularity = expCode_BitGranularity,
		const int _n_min_width = expCode_MinWidth>
	struct TExpEncoding;

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_src, r_t_src_runs and outputs to r_t_dest.
	 *
	 *	@param[in] r_t_src is source data buffer, containing encoded symbols
	 *	@param[in] r_t_src_runs is source data buffer, containing run lengths
	 *	@param[out] r_t_dest is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_src, const TBuffer &r_t_src_runs, TBuffer &r_t_dest);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_src, outputs to r_t_dest and r_t_dest_runs.
	 *
	 *	@param[in] r_t_src is source data buffer
	 *	@param[out] r_t_dest is destination data buffer, containing encoded symbols
	 *		(original contents will be lost)
	 *	@param[in] r_t_dest_runs is source data buffer, containing run lengths
	 *		(original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_src, TBuffer &r_t_dest, TBuffer &r_t_dest_runs);
};

/**
 *	@brief a simple Huffman tree template; features simple interface for common cases,
 *		and static interface for more advanced use
 *
 *	@tparam CSymbol is encoded symbol data type, it should be an unsigned integer
 *	@tparam n_max_code_bit_num is maximal length of Huffman code
 *
 *	@note This facilitates encoding only (decoding is supposed to be very simple).
 */
template <class CSymbol = uint8_t, const int n_max_code_bit_num = 16>
class CHuffmanTree {
public:
	/**
	 *	@brief configuration, stored as enum
	 */
	enum {
		max_CodeBitNum = n_max_code_bit_num /**< @brief maximal length of Huffman code */
	};

	typedef uint32_t _TyCodeWord; /**< @brief code word storage */
	typedef CSymbol _TySymbol; /**< @brief symbol data type */
	typedef size_t _TyFrequency; /**< @brief symbol frequency data type */

	/**
	 *	@brief structure, holding symbol, it's frequency and eventualy also it's code
	 */
	struct TFrequency {
		_TySymbol n_symbol; // valid for leaf nodes only
		_TyFrequency n_frequency;

		_TyCodeWord n_code_word;
		uint32_t n_code_length;

		inline TFrequency(_TySymbol n_sym, _TyFrequency n_freq = 0)
			:n_symbol(n_sym), n_frequency(n_freq), n_code_word(0), n_code_length(-1)
		{}

		inline bool operator <(const TFrequency &r_t_freq) const // lower frequency comparison (reversed)
		{
			return n_frequency > r_t_freq.n_frequency;
		}

		inline bool operator ==(_TySymbol n_sym) const // symbol equality comparison
		{
			return n_symbol == n_sym;
		}

		inline bool operator <(_TySymbol n_sym) const // symbol equality comparison
		{
			return n_symbol < n_sym;
		}

		inline operator _TySymbol() const // get symbol
		{
			return n_symbol;
		}
	};

protected:
	/**
	 *	@brief Huffman tree node
	 */
	struct TNode {
		_TyFrequency n_frequency;
		const TNode *p_left, *p_right;

		inline TNode(size_t n_freq = 0, const TNode *p_l = 0, const TNode *p_r = 0)
			:n_frequency(n_freq), p_left(p_l), p_right(p_r)
		{}

		inline bool b_Leaf() const
		{
			_ASSERTE(!p_left == !p_right);
			return !p_left; // && !p_right
		}

		inline bool operator <(const TNode &r_t_node) const // lower frequency comparison (reversed)
		{
			return n_frequency > r_t_node.n_frequency;
		}

		static inline bool CompareFreq(const TNode *p_a, const TNode *p_b)
		{
			return *p_a < *p_b;
		}
	};

	/**
	 *	@brief a simple function object, converting node references to pointers
	 */
	class CMakePtr {
	public:
		inline const TNode *operator ()(const TNode &r_t_node) const
		{
			return &r_t_node;
		}
	};

	/**
	 *	@brief a simple function object that copies pointers to children of specified nodes to a given vector
	 */
	class CGetChildren {
	protected:
		std::vector<const TNode*> &m_r_dest;
		uint32_t &m_r_n_leaf_counter;

	public:
		inline CGetChildren(std::vector<const TNode*> &r_dest, uint32_t &r_n_leaf_counter)
			:m_r_dest(r_dest), m_r_n_leaf_counter(r_n_leaf_counter)
		{}

		inline void operator ()(const TNode *p_node)
		{
			if(p_node->b_Leaf())
				++ m_r_n_leaf_counter;
			else {
				_ASSERTE(m_r_dest.capacity() >= m_r_dest.size() + 2);
				m_r_dest.push_back(p_node->p_left);
				m_r_dest.push_back(p_node->p_right);
			}
		}
	};

#ifdef _DEBUG
	class CFindUnsorted {
	protected:
		const TNode *m_p_last;

	public:
		inline CFindUnsorted(const TNode *p_last)
			:m_p_last(p_last)
		{}

		inline bool operator ()(const TNode *p_next)
		{
			if(*p_next < *m_p_last)
				return true; // unsorted
			m_p_last = p_next;
			return false;
		}

		static inline bool b_IsSorted(std::vector<const TNode*> &r_queue)
		{
			return r_queue.size() < 2 || std::find_if(r_queue.begin() + 1,
				r_queue.end(), CFindUnsorted(r_queue.front())) == r_queue.end();
		}
	};
#endif // _DEBUG

protected:
	std::vector<TFrequency> m_freq_list; /**< @brief list of symbols and their frequencies */
	uint32_t m_p_code_num[max_CodeBitNum]; /**< @brief numbers of symbols per each code length @note The first element (the one with index 0) is the number of 1-bit codes. */
	unsigned int m_n_max_code_bit_num; /**< @brief maximum symbol length, set at runtime (not more than max_CodeBitNum) */

public:
	/**
	 *	@brief default constructor; sets number of code bits
	 *	@param[in] n_max_code_bit_num is maximum number of code bits
	 *		(default max_CodeBitNum; must not exceed max_CodeBitNum)
	 */
	inline CHuffmanTree(unsigned int n_max_code_bit_num = max_CodeBitNum);

	/**
	 *	@brief calculates symbol frequencies from a sample of input data
	 *
	 *	@param[in] p_begin is the first element of input data
	 *	@param[in] p_end is one past last element of input data
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 *
	 *	@note This does not necessarrily allocate space for all the symbols
	 *		but only for those present (would be unfeasible for large symbols types).
	 */
	bool CalculateSymbolFrequencies(const _TySymbol *p_begin, const _TySymbol *p_end);

	/**
	 *	@brief copies symbol frequencies
	 *
	 *	@param[in,out] r_freq_list is list of symbol frequencies (must contain frequencies
	 *		for all the encoded symbols, otherwise the encoding will fail)
	 *	@param[in] b_allow_swap is swap flag (if set, the frequencies are swapped, destroying
	 *		r_freq_list; if not set, the frequencies are copied, requiring memory allocation)
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	bool Use_SymbolFrequencies(std::vector<TFrequency> &r_freq_list, bool b_allow_swap);

	/**
	 *	@brief builds Huffman tree and assigns symbols and lengths to frequencies
	 *	@note This is only valid after CalculateSymbolFrequencies() was called.
	 */
	inline bool Assign_CodeWords();

	/**
	 *	@brief gets number of elements of code counts table
	 *	@return Returs size of code counts table, in elements.
	 *	@note This is only valid after Assign_CodeWords() was called.
	 */
	inline unsigned int n_CodeTable_Size() const;

	/**
	 *	@brief gets code counts table
	 *	@return Returs const pointer to the code counts table.
	 *	@note This is only valid after Assign_CodeWords() was called.
	 */
	inline const uint32_t *p_CodeTable() const;

	/**
	 *	@brief gets size of symbols table
	 *	@return Returs size of symbols table.
	 *	@note This is only valid after Assign_CodeWords() was called.
	 */
	inline size_t n_SymbolTable_Size() const;

	/**
	 *	@brief gets a copy of table of symbols 
	 *
	 *	This writes n_SymbolTable_Size() * sizeof(_TySymbol) bytes,
	 *	containing the table of symbols (a table of all encoded symbols,
	 *	sorted by cannonical codeword assignment) into the destination.
	 *
	 *	@param[out] p_dest is destination for the table of symbols (allocated by caller)
	 *	@param[in] n_space_bytes is free space in p_dest, in bytes
	 *		(must be at least n_SymbolTable_Size() * sizeof(_TySymbol))
	 *
	 *	@note This is only valid after Assign_CodeWords() was called.
	 */
	void Get_SymbolTable(_TySymbol *p_dest, size_t UNUSED(n_space_bytes)) const;

	/**
	 *	@brief sorts frequencies by symbol for fast lookup
	 *	@note This is only valid after Assign_CodeWords() was called.
	 *	@note This must not be called between Assign_CodeWords() and
	 *		Get_SymbolTable() as it will disrupt symbol order.
	 */
	inline void SortFrequencies_BySymbol();

	/**
	 *	@brief gets symbol information (codeword)
	 *
	 *	@param[in] t_sym is value of the symbol
	 *
	 *	@return Returns const reference to the symbol information.
	 *
	 *	@note This is only valid after Assign_CodeWords() was called.
	 *	@note This assumes that the symbol is present in the table,
	 *		errors are not handled.
	 */
	const TFrequency &r_LookupSymbol(const _TySymbol t_sym) const;

	/**
	 *	@brief builds Huffman tree and assigns symbols and lengths to frequencies
	 *
	 *	@param[in,out] r_freq_list is list of symbol frequencies
	 *		(will remove zero-freq symbols and will fill code words upon successful return)
	 *	@param[out] p_code_num is filled with number of Huffman codes of every length
	 *		from 0 up to n_max_code_bit_num; must be allocated by caller to n_max_code_bit_num
	 *	@param[in] n_max_code_bit_num is maximum code bit num (default max_CodeBitNum)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Assign_CodeWords(std::vector<TFrequency> &r_freq_list,
		uint32_t *p_code_num, unsigned int n_max_code_bit_num = max_CodeBitNum);

	/**
	 *	@brief sorts frequencies by symbol for fast lookup
	 *	@param[in,out] freq_list is the list of symbol frequencies
	 */
	static inline void SortFrequencies_BySymbol(std::vector<TFrequency> &freq_list);

	/**
	 *	@brief gets symbol information (codeword)
	 *
	 *	@param[in] t_sym is value of the symbol
	 *	@param[in,out] freq_list is the list of symbol frequencies
	 *
	 *	@return Returns const reference to the symbol information.
	 *
	 *	@note This assumes that the symbol is present in the table,
	 *		errors are not handled.
	 */
	static inline const TFrequency &r_LookupSymbol(const _TySymbol t_sym, const std::vector<TFrequency> &freq_list);

protected:
	static bool b_LowerSymbol(_TySymbol n_sym, const TFrequency &r_freq); // symbol equality comparison for r_LookupSymbol()
	static inline bool FindZeroFreq(const TFrequency &r_t_freq);
	static inline bool CompareSymbol(const TFrequency &r_freq_a, const TFrequency &r_freq_b);
};

/**
 *	@brief cannonical Huffman table reader and Huffman decoder
 *	@todo Implement lookahead table for even faster decoding.
 *
 *	@tparam CSymbolType is encoded symbol data type, it should be an unsigned integer
 *	@tparam n_max_tree_size is maximal length of Huffman code
 *
 *	@note This needs to be in the global namespace instead of inside
 *		CHuffmanUtil, otherwise msbvc 60 runs into trouble.
 */
template <class CSymbolType, int n_max_tree_size>
class CHuffmanUtil_DecodeTable {
protected:
	uint8_t m_n_byte; /**< @brief current decoded byte */
	int m_n_bit_num; /**< @brief number of valid bits in the current byte */
	uint32_t m_p_max_code[n_max_tree_size + 1]; /**< @brief table of maximum code values for each code length (one more as sentinell) */
	// touched every time a bit is read

	int32_t m_p_table_off[n_max_tree_size]; /**< @brief offset from code value to symbol table for each code length */
	const CSymbolType *m_p_symbol; /**< @brief list of symbols (points to input data) */
	unsigned int m_n_code_bit_num; /**< @brief maximum code length */
	// touched every time a symbol is decoded

	const uint8_t *m_p_input; /**< @brief pointer to the next byte to read */
	const uint8_t *m_p_end; /**< @brief pointer to the end of the input buffer */
	// touched every time a byte is read (if compressed well, less frequent than symbol reads)

	size_t m_n_symbol_num; /**< @brief number of different symbols encoded by this table */
	const uint32_t *m_p_code_num; /**< @brief number of assigned codes for each code length (points to input data) */
	// used only in debugging / getting information about the table

public:
	/**
	 *	@brief default constructor; initializes an empty table
	 *
	 *	@param[in] p_input is pointer to the first byte to read
	 *	@param[in] p_end is pointer to the end of the buffer
	 *	@param[in] n_tree_size is maximum code length (default n_max_tree_size)
	 *
	 *	@note This does not perform any reading, one needs to call Initialize().
	 */
	inline CHuffmanUtil_DecodeTable(const uint8_t *p_input, const uint8_t *p_end,
		unsigned int n_tree_size = n_max_tree_size);

	/**
	 *	@brief reads the serialized table from the buffer, reconstructs Huffman tree
	 *	@return Returns true on success, false on failure.
	 */
	bool Initialize();

	/**
	 *	@brief gets maximum code length
	 *	@return Returns maximum code length, in bits.
	 */
	inline int n_Code_Bit_Num() const;

	/**
	 *	@brief gets minimum code value
	 *	@param[in] n_code_bit_num is number of bits of the code
	 *	@return Returns minimum code value for the given code length.
	 */
	inline uint32_t n_Min_Code(int n_code_bit_num) const;

	/**
	 *	@brief gets maximum code value
	 *	@param[in] n_code_bit_num is number of bits of the code
	 *	@return Returns maximum code value for the given code length.
	 */
	inline uint32_t n_Max_Code(int n_code_bit_num) const;

	/**
	 *	@brief gets minimum symbol value
	 *	@return Returns the minimum symbol contained in the
	 *		table (not neccessarily in the decoded data).
	 */
	CSymbolType n_Min_Symbol() const;

	/**
	 *	@brief gets maximum symbol value
	 *	@return Returns the maximum symbol contained in the
	 *		table (not neccessarily in the decoded data).
	 */
	CSymbolType n_Max_Symbol() const;

	/**
	 *	@brief decodes a single symbol
	 *	@param[out] r_n_value is decoded value
	 *	@return Returns true on success, false on failure.
	 */
	inline bool Decode_Symbol(CSymbolType &r_n_value);

	/**
	 *	@brief decodes a single symbol using external accumulator
	 *
	 *	@param[out] r_n_value is decoded value
	 *	@param[in,out] r_n_byte is value of the current decoded byte
	 *	@param[in,out] r_n_bit_num is number of valid bits in r_n_byte
	 *	@param[in,out] r_p_input is pointer to the next byte to read
	 *	@param[in] p_end is pointer to the end of the buffer
	 *
	 *	@return Returns true on success, false on failure.
	 *
	 *	@note This is used if reading values from multiple interleaved Huffman streams.
	 */
	inline bool Decode_Symbol(CSymbolType &r_n_value, uint8_t &r_n_byte, int &r_n_bit_num,
		const uint8_t *&r_p_input, const uint8_t *p_end) const;

	/**
	 *	@brief determines whether the end of the buffer was reached
	 *	@return Returns true if the end of the buffer was reached, otherwise returns false.
	 */
	inline bool b_Finished() const;

	/**
	 *	@brief gets pointer to the next byte to be decoded
	 *	@return Returns pointer to the next byte to be decoded.
	 */
	inline const uint8_t *p_Pointer() const;
};

/**
 *	@brief cannonical Huffman table writer
 *
 *	@tparam CSymbolType is encoded symbol data type, it should be an unsigned integer
 *	@tparam n_max_tree_size is maximal length of Huffman code
 *
 *	@note This needs to be in the global namespace instead of inside
 *		CHuffmanUtil, otherwise msbvc 60 runs into trouble.
 */
template <class CSymbolType, int n_max_tree_size>
class CHuffmanUtil_EncodeTable {
public:
	typedef CHuffmanTree<CSymbolType, n_max_tree_size> CHuff; /**< @brief compatible Huffman tree type */

protected:
	const CHuff &m_r_huffman_tree; /**< @brief reference to the Huffman tree */

public:
	/**
	 *	@brief default constructor
	 *	@param[in] r_huffman_tree is reference to the Huffman tree
	 */
	inline CHuffmanUtil_EncodeTable(const CHuff &r_huffman_tree);

	/**
	 *	@brief calculates table size
	 *	@return Returns serialized table size, in bytes.
	 */
	inline size_t n_Table_Size() const;

	/**
	 *	@brief writes Huffman table to a buffer
	 *	@param[in,out] r_t_buffer is buffer to write the table to (appended at the end)
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	inline bool Write_Table(TBuffer &r_t_buffer) const;

	/**
	 *	@brief writes Huffman table to a buffer
	 *
	 *	@param[in,out] r_p_output is pointer to the write position in the buffer
	 *		(upon successful return points to the buffer after the table data)
	 *	@param[in,out] r_t_buffer is buffer to write the table to
	 *		(used to allocate more data, if needed)
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	inline bool Write_Table(uint8_t *&r_p_output, TBuffer &r_t_buffer) const;

	/**
	 *	@brief calculates table size
	 *	@param[in] r_huffman_tree is reference to the Huffman tree
	 *	@return Returns serialized table size, in bytes.
	 */
	static size_t n_Table_Size(const CHuff &r_huffman_tree);

	/**
	 *	@brief writes Huffman table to a buffer
	 *
	 *	@param[in,out] r_p_output is pointer to the write position in the buffer
	 *		(upon successful return points to the buffer after the table data)
	 *	@param[in] p_end is pointer to the end of the buffer (must be large
	 *		enough to accomodate the table, the size is only checked in debug)
	 *	@param[in] r_huffman_tree is reference to the Huffman tree
	 */
	static void Write_Table(uint8_t *&r_p_output, const uint8_t *p_end, const CHuff &r_huffman_tree);

	/**
	 *	@brief writes Huffman table to a buffer
	 *
	 *	@param[in,out] r_p_output is pointer to the write position in the buffer
	 *		(upon successful return points to the buffer after the table data)
	 *	@param[in,out] r_t_buffer is buffer to write the table to
	 *		(used to allocate more data, if needed)
	 *	@param[in] r_huffman_tree is reference to the Huffman tree
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	static bool Write_Table(uint8_t *&r_p_output, TBuffer &r_t_buffer, const CHuff &r_huffman_tree);
};

/**
 *	@brief Huffman tree (de)serialization utilities
 *
 *	@tparam CSymbolType is encoded symbol data type, it should be an unsigned integer
 *	@tparam n_max_tree_size is maximal length of Huffman code
 */
template <class CSymbolType, int n_max_tree_size>
class CHuffmanUtil {
public:
	typedef CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size> CDecodeTable; /**< @brief Huffman table decoder */
	typedef CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size> CEncodeTable; /**< @brief Huffman table encoder */

public:
	/**
	 *	@brief decodes Huffman-encoded data packet
	 *
	 *	@param[in] r_t_in_buffer is Huffman-encoded buffer
	 *	@param[out] p_max_symbol is filled with the maximum symbol that can be coded (can be null)
	 *	@param[out] r_t_out_buffer is buffer to store the decompressed data
	 *	@param[in] n_max_code_bit_num is maximum code length (default n_max_tree_size)
	 *
	 *	@return Returns pointer to where it stopped reading on success, or null on failure.
	 */
	static const uint8_t *p_Decode(const TBuffer &r_t_in_buffer,
		CSymbolType *p_max_symbol, TBuffer &r_t_out_buffer,
		unsigned int n_max_code_bit_num = n_max_tree_size);

	/**
	 *	@brief encodes Huffman-encoded data packet
	 *
	 *	@param[in] r_t_in_buffer is buffer to be compressed
	 *	@param[out] r_t_out_buffer is buffer to store the Huffman-encoded data
	 *	@param[in] n_max_code_bit_num is maximum code length (default n_max_tree_size)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer,
		unsigned int n_max_code_bit_num = n_max_tree_size);
};

/**
 *	@brief Huffman coder
 *
 *	Very simple and easy to use cannonical Huffman (de)coder, operating on bytes.
 *	Stores (uncompressed) data length, canonical Huffman tree (symbol counts and
 *	symbols), followed by bitstream. last byte is padded with zeros.
 */
class CHuffmanCodec {
public:
	/**
	 *	@brief Huffman configuration enums
	 */
	enum {
		max_CodeBitNum = 16 /**< @brief Huffman code length limit (can be 1 to 31) */
	};

	typedef CHuffmanTree<uint8_t, max_CodeBitNum> CHuff8; /**< @brief Huffman tree */

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);
};

/**
 *	@brief RLE-0 Huffman coder
 *
 *	Very simple and easy to use cannonical Huffman (de)coder, operating on bytes.
 *	Stores (uncompressed) data length, canonical Huffman tree (symbol counts and
 *	symbols), followed by bitstream. last byte is padded with zeros.
 *
 *	This version actually builds two Huffman trees to facilitate RLE-0 compression.
 *	The first one encodes symbols, and the other one encodes 16-bit run lengths,
 *	if the run length is over 2^16, it is encoded as several runs, each initiated
 *	by a new zero symbol (note that the next zero symbol is either not necessary
 *	and the zero runs can be added up in the decoder automatically upon detection
 *	of run of maximal length, or it can be used to facilitate exponential run
 *	length encoding, none of which is done here).
 */
class CRLE0_HuffmanCodec {
public:
	/**
	 *	@brief Huffman configuration enums
	 */
	enum {
		max_CodeBitNum = 16 /**< @brief Huffman code length limit (can be 1 to 31) */
	};

	typedef CHuffmanTree<uint16_t, max_CodeBitNum> CHuff16; /**< @brief Huffman tree */
	typedef CHuffmanTree<uint8_t, max_CodeBitNum> CHuff8; /**< @brief Huffman tree */

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);
};

/**
 *	@brief RLE-0 Huffman coder
 *
 *	Very simple and easy to use cannonical Huffman (de)coder, operating on bytes.
 *	Stores (uncompressed) data length, canonical Huffman tree (symbol counts and
 *	symbols), followed by bitstream. last byte is padded with zeros.
 *
 *	This version actually builds three Huffman trees to facilitate RLE-0 compression
 *	and context-sensitive encoding for symbols that come after the zero run, much
 *	as in BW94. Note that the run lengths are encoded as 1-based, not 0-based, but
 *	changing it breaks backward compatibility while not improving compression at all.
 *	Zero runs longer than 2^16 - 1 are not chained, a separate zero run is encoded
 *	instead.
 *
 *	Added run_Chaining to switch between run chaining method, either explicit with
 *	zero symbol encoded always, or implicit with symbol encoded only before the first
 *	word of the run length (with possibility of representing a long run by multiple
 *	adding words), or exponential, where the words are not added but form an integer,
 *	starting from the least significant 16 bits, going up (each consecutive word
 *	adds 16 bits).
 *
 *	Added the CHuffRL type, which is the setting of the Huffman tree, used for
 *	encoding the run lengths. It can be either CHuff16 (backward compatible), or
 *	CHuff8.
 *
 *	On the Canterbury corpus, if CHuffRL is CHuff16:
 *
 *		- mode 0 yields 142 B advantage
 *		- mode 1 yields 146 B advantage
 *		- mode 2 yields 147 B advantage
 *		- mode 3 yields -559 B advantage
 *
 *	while if CHuffRL is CHuff8:
 *
 *		- mode 0 yields -1.868k B advantage (the worst)
 *		- mode 1 yields 66 B advantage
 *		- mode 2 yields +1.532k B advantage (the best)
 *		- mode 3 yields 844 B advantage
 *
 *	when compared with an unspecified baseline codec (the relative difference
 *	is important here).
 *
 *	The backward-compatible setting is then run_Chaining = 0, CHuffRL = CHuff16.
 *	The alternative "best" setting is run_Chaining = 2, CHuffRL = CHuff8.
 */
class CRLE0_HuffmanCodec_1 {
public:
	/**
	 *	@brief Huffman configuration enums
	 */
	enum {
		max_CodeBitNum = 16, /**< @brief Huffman code length limit (can be 1 to 31) */
		run_Chaining = 2 /**< @brief run chaining method (0: explicit additive (backward compatible), 1: implicit additive, 2: explicit exponential, 3: explicit exponential with second tree for encoding higher words) */
	};

	typedef CHuffmanTree<uint16_t, max_CodeBitNum> CHuff16; /**< @brief Huffman tree */
	typedef CHuffmanTree<uint8_t, max_CodeBitNum> CHuff8; /**< @brief Huffman tree */
	typedef CHuff8 CHuffRL; // tree used to encode run lengths

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);
};

/**
 *	@brief RLE-0 Huffman coder
 *
 *	Very simple and easy to use cannonical Huffman (de)coder, operating on bytes.
 *	Stores (uncompressed) data length, canonical Huffman tree (symbol counts and
 *	symbols), followed by bitstream. last byte is padded with zeros.
 *
 *	This version actually builds two Huffman trees to facilitate RLE-0 compression
 *	and context-sensitive encoding for symbols that come after the zero run.
 *	In this version (in contrast with CRLE0_HuffmanCodec_1), the run-lengths are not
 *	Huffman encoded, and are stored as raw 16 bit numbers instead. The run-lengths
 *	are not chained, runs of more than 2^16 - 1 zeroes are encoded as several
 *	separate runs.
 *
 *	Note that this model gives poor performance, and therefore a third Huffman
 *	tree was added to encode the length of the raw numbers (1 to 32), which
 *	is enabled by setting use_LengthCoding to true. This, however, breaks backward
 *	compatibility. If enabled, long zero runs are chained instead of encoding
 *	an extra zero symbol in between.
 */
class CRLE0_HuffmanCodec_2 {
public:
	/**
	 *	@brief Huffman configuration enums
	 */
	enum {
		max_CodeBitNum = 16, /**< @brief Huffman code length limit (can be 1 to 31) */
		use_LengthCoding = true /**< @brief use additional Huffman tree to encode number of bits of the run length */
	};

	//typedef CHuffmanTree<uint16_t, max_CodeBitNum> CHuff16; // not used here
	typedef CHuffmanTree<uint8_t, max_CodeBitNum> CHuff8; /**< @brief Huffman tree */

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);
};

/**
 *	@brief RLE-0 Huffman coder
 *
 *	Very simple and easy to use cannonical Huffman (de)coder, operating on bytes.
 *	Stores (uncompressed) data length, canonical Huffman tree (symbol counts and
 *	symbols), followed by bitstream. last byte is padded with zeros.
 *
 *	This version actually builds two Huffman trees to facilitate RLE-0 compression
 *	(symbols and zero run lengths are both in the same 16-bit tree) and context-
 *	sensitive encoding for symbols that come after the zero run. Very long zero
 *	runs are chained to avoid encoding zero symbol several times.
 */
class CRLE0_HuffmanCodec_3 {
public:
	/**
	 *	@brief Huffman configuration enums
	 */
	enum {
		max_CodeBitNum = 16, /**< @brief Huffman code length limit (can be 1 to 31) */
	};

	typedef CHuffmanTree<uint16_t, max_CodeBitNum> CHuff16; /**< @brief Huffman tree */
	typedef CHuffmanTree<uint8_t, max_CodeBitNum> CHuff8; /**< @brief Huffman tree */

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);
};

template <class _TyHuffmanCodec = CHuffmanCodec, const size_t n_block_size = 16384>
class CBlockyHuffmanCodec {
public:
	typedef _TyHuffmanCodec CBlockCodec; /**< @brief block codec type */

	/**
	 *	@brief configuration, stored as enum
	 */
	enum {
		block_Size = n_block_size /**< @brief size of the individual blocks, compressed with Huffman codec */
	};

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);
};

/**
 *	@brief inversion frequencies coder
 *
 *	Naive (sorted) inversion frequencies implementation.
 */
class CInversionFrequenciesCodec {
public:
	/**
	 *	@brief symbol sorting names for encoding
	 */
	enum {
		sort_NoSort = 0, /**< @brief symbols are not sorted */
		sort_FreqAscending, /**< @brief symbols are sorted with increasing frequency before encoding */
		sort_FreqDescending /**< @brief symbols are sorted with decreasing frequency before encoding */
	};

	/**
	 *	@brief configuration stored as enum
	 */
	enum {
		symbol_FreqThresh = 8, /**< @brief used in ModifiedEncode(), symbols with lower frequency escaped */
		max_CodeBitNum8 = 16, /**< @brief maximal length of Huffman code for 8-bit frequencies */
		max_CodeBitNum16 = 17, /**< @brief maximal length of Huffman code for 16-bit frequencies */ // compression decreases with longer trees, may need to increase for large blocks with very high entropy in case encoding fails
		max_CodeBitNum32 = 24 /**< @brief maximal length of Huffman code for 32-bit frequencies */ // untested, would require large data
	};

private:
	class CSortAsc; /**< @brief comparator object for ascending sort of symbols by frequency */
	class CSortDesc; /**< @brief comparator object for descending sort of symbols by frequency */

public:
	/**
	 *	@brief decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *	@param[in] n_permutation_type is type of permutation, applied to symbols before encoding
	 *		(one of sort_NoSort, sort_FreqAscending or sort_FreqDescending)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer,
		TBuffer &r_t_out_buffer, int n_permutation_type = sort_FreqDescending);

	/**
	 *	@brief modified decoding function
	 *
	 *	Decodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is primary data buffer
	 *	@param[in] r_t_table_buffer is table data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *	@param[in,out] r_t_temp_buffer is temporary buffer for decompress
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool ModifiedDecode(const TBuffer &r_t_in_buffer,
		const TBuffer &r_t_table_buffer, TBuffer &r_t_out_buffer, TBuffer &r_t_temp_buffer);

	/**
	 *	@brief modified encoding function
	 *
	 *	Encodes data from r_t_in_buffer, outputs to r_t_out_buffer.
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is primary data buffer (contains compressed data, original contents will be lost)
	 *	@param[out] r_t_table_buffer is table buffer (contains compressible data, original contents will be lost)
	 *	@param[in,out] r_t_temp_buffer is temporary buffer for decompress (it can point to the same buffer as r_t_table_buffer)
	 *	@param[in] n_permutation_type is type of permutation, applied to symbols before encoding
	 *		(one of sort_NoSort, sort_FreqAscending or sort_FreqDescending)
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool ModifiedEncode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer,
		TBuffer &r_t_table_buffer, TBuffer &r_t_temp_buffer,
		int n_permutation_type = sort_FreqDescending);

private:
	template <class CEmitObject>
	static bool Encode_IF(const TBuffer &r_t_in_buffer, int n_permutation_type,
		uint32_t p_frequency[256], uint8_t p_perm_table[256], CEmitObject emit);
	static bool Encode_PermTable(int n_permutation_type, const uint8_t p_perm_table[256],
		const uint32_t p_frequency[256], TBuffer &r_t_out_buffer);
	template <class CDecodeObject>
	static bool Decode_IF(uint32_t n_output_size, CDecodeObject &decode,
		TBuffer &r_t_out_buffer);
	static bool Decode_PermTable(const uint8_t *p_src,
		const uint8_t *p_end, TBuffer &r_t_out_buffer);
};

/**
 *	@brief a simple CRC-32 check wrapper
 *
 *	This appends CRC-32 of the input when "encoding", when "decoding" the CRC
 *	is recalculated, compared and stripped. The "decoding" fails if the CRC
 *	changed. Otherwise no "coding" takes place here.
 */
class CCRC32CheckCodec {
public:
	/**
	 *	@brief encoding function
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 *
	 *	@note The input and the output buffer may be the same.
	 */
	static bool Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief decoding function
	 *
	 *	@param[in] r_t_in_buffer is source data buffer
	 *	@param[out] r_t_out_buffer is destination data buffer (original contents will be lost)
	 *
	 *	@return Returns true on success, false on failure (not enough memory or CRC check failed).
	 *
	 *	@note The input and the output buffer may be the same.
	 */
	static bool Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer);

	/**
	 *	@brief inplace encoding function
	 *	@param[in] r_t_in_out_buffer is source and destination data buffer
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	static inline bool Encode(TBuffer &r_t_in_out_buffer)
	{
		return Encode(r_t_in_out_buffer, r_t_in_out_buffer);
	}

	/**
	 *	@brief inplace decoding function
	 *	@param[in] r_t_in_out_buffer is source and destination data buffer
	 *	@return Returns true on success, false on failure (CRC check failed).
	 */
	static inline bool Decode(TBuffer &r_t_in_out_buffer)
	{
		return Decode(r_t_in_out_buffer, r_t_in_out_buffer);
	}
};

template <class CSymbolType>
class CBitCoder; // forward declaration

/**
 *	@brief bit decoder (reads numbers of specified bit width from a byte buffer)
 */
class CBitDecoder {
protected:
	uint8_t m_n_byte; /**< @brief the current byte */
	int m_n_bit_num; /**< @brief number of valid bits in the current byte */
	const uint8_t *m_p_input; /**< @brief pointer to the next byte to read */
	const uint8_t *m_p_end; /**< @brief pointer to the end of the buffer */

public:
	/**
	 *	@brief constructor
	 *	@param[in] r_t_in_buffer is buffer with data to be decoded
	 */
	inline CBitDecoder(const TBuffer &r_t_in_buffer);

	/**
	 *	@brief constructor
	 *
	 *	@param[in] p_src is pointer to a buffer with data to be decoded
	 *	@param[in] p_end is pointer to the end of the buffer
	 */
	inline CBitDecoder(const uint8_t *p_src, const uint8_t *p_end);

	/**
	 *	@brief decodes a single symbol
	 *
	 *	@tparam CSymbolType is data type of a symbol to be decoded
	 *
	 *	@param[out] r_n_value is decoded value
	 *	@param[in] n_symbol_bit_num is number of bits of the symbol
	 *
	 *	@return Returns true on success, false on failure (reached the end of the buffer).
	 */
	template <class CSymbolType>
	inline bool Decode_Symbol(CSymbolType &r_n_value, int n_symbol_bit_num) // unfortunately, msvc 6.0 requires the implementation here
	{
		return CBitCoder<CSymbolType>::Decode_Symbol(r_n_value, n_symbol_bit_num,
			m_n_byte, m_n_bit_num, m_p_input, m_p_end);
	}

	/**
	 *	@brief determines whether the end of the buffer was reached
	 *	@return Returns true if the end of the buffer was reached, otherwise returns false.
	 */
	inline bool b_Finished() const;

	/**
	 *	@brief gets pointer to the next byte to be decoded
	 *	@return Returns pointer to the next byte to be decoded.
	 */
	inline const uint8_t *p_Pointer() const;
};

/**
 *	@brief bit encoder (writes numbers of specified bit width to a byte buffer)
 *
 *	This writes raw bits to a byte buffer. It is not possible to determine boundaries
 *	of the written numbers without additional info (such as Huffman tree or a list of
 *	lengths in bits). To make a decodable stream, use CEmitVarLength instead.
 */
class CBitEncoder {
protected:
	uint8_t m_n_byte; /**< @brief the current byte */
	int m_n_bit_num_left; /**< @brief number of remaining bits in the current byte */
	uint8_t *m_p_output; /**< @brief pointer to the next byte to write */
	uint8_t *m_p_out_end; /**< @brief pointer to the end of the buffer */
	TBuffer &m_r_t_out_buffer; /**< @brief the output buffer (used for allocating more space, if needed) */

public:
	/**
	 *	@brief constructor
	 *	@param[in] r_t_out_buffer is output buffer (the encoded data is appended at the end of the buffer)
	 */
	inline CBitEncoder(TBuffer &r_t_out_buffer);

	/**
	 *	@brief constructor
	 *
	 *	@param[in] p_output is pointer to a the first write position inside the buffer
	 *	@param[in] r_t_out_buffer is output buffer (contents get overwritten by the encoded data)
	 */
	inline CBitEncoder(uint8_t *p_output, TBuffer &r_t_out_buffer);

	/**
	 *	@brief constructor
	 *
	 *	@param[in] p_output is pointer to a the first write position inside the buffer
	 *	@param[in] p_out_end is pointer to the end of the buffer
	 *	@param[in] r_t_out_buffer is output buffer (contents get overwritten by the encoded data)
	 */
	inline CBitEncoder(uint8_t *p_output, uint8_t *p_out_end, TBuffer &r_t_out_buffer);

	/**
	 *	@brief constructor
	 *
	 *	@param[in] n_byte is partially filled byte
	 *	@param[in] n_bit_num_left is number of remaining bits in the byte
	 *	@param[in] p_output is pointer to a the first write position inside the buffer
	 *	@param[in] p_out_end is pointer to the end of the buffer
	 *	@param[in] r_t_out_buffer is output buffer (contents get overwritten by the encoded data)
	 *
	 *	@note This is used to resume bit encoding.
	 */
	inline CBitEncoder(uint8_t n_byte, int n_bit_num_left,
		uint8_t *p_output, uint8_t *p_out_end, TBuffer &r_t_out_buffer);

	/**
	 *	@brief encodes a single symbol
	 *
	 *	@tparam CSymbolType is data type of a symbol to be encoded
	 *
	 *	@param[in] n_value is encoded value
	 *	@param[in] n_bit_num is number of bits of the symbol
	 *
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	template <class CSymbolType>
	inline bool Encode_Symbol(CSymbolType n_value, int n_bit_num)
	{
		return CBitCoder<CSymbolType>::Encode_Symbol(n_value, n_bit_num, m_n_byte,
			m_n_bit_num_left, m_p_output, m_p_out_end, m_r_t_out_buffer);
	}

	/**
	 *	@brief encodes a single symbol
	 *	@tparam CSymbolType is data type of a symbol to be encoded
	 *	@param[in] n_value is encoded value (the number of bits of the symbol is determined automatically)
	 *	@return Returns true on success, false on failure (not enough memory).
	 */
	template <class CSymbolType>
	inline bool Encode_Symbol(CSymbolType n_value)
	{
		return Encode_Symbol(n_value, n_Bit_Width(n_value));
	}

	/**
	 *	@brief writes any remaining bits that do not make a whole byte
	 *	@return Returns true of success, false on failure (not enough memory).
	 *	@note Calling Flush() multiple times has no effect.
	 */
	inline bool Flush();

	/**
	 *	@brief gets pointer to the next byte to be written
	 *	@return Returns pointer to the next byte to be written.
	 */
	inline uint8_t *p_Pointer();

	/**
	 *	@brief gets pointer to the next byte to be written
	 *	@return Returns const pointer to the next byte to be written.
	 */
	inline const uint8_t *p_Pointer() const;
};

/**
 *	@brief utility object for encoding variable-length integers
 *
 *	This uses UTF-8 style of encoding, which can be also decoded without
 *	any further information (unlike CBitEncoder).
 */
class CEmitVarLength {
protected:
	TBuffer &m_r_dest; /**< @brief destination buffer */

public:
	/**
	 *	@brief constructor
	 *	@param[in] r_t_out_buffer is output buffer (the encoded data appended at the end)
	 */
	inline CEmitVarLength(TBuffer &r_t_out_buffer);

	/**
	 *	@brief encodes a single symbol
	 *	@param[in] n_value is value of the symbol to be encoded
	 *	@return Returns true of success, false on failure (not enough memory).
	 *	@note A single symbol is encoded as 1 to 5 bytes, depending on
	 *		its value. Smaller (unsigned) values encode shorter.
	 */
	inline bool operator ()(uint32_t n_value);
};

/**
 *	@brief utility object for decoding variable-length integers, encoded by CEmitVarLength
 */
class CDecodeVarLength {
protected:
	const uint8_t *m_p_src; /**< @brief pointer to the next byte to read */
	const uint8_t *m_p_end; /**< @brief pointer to the end of the buffer */

public:
	/**
	 *	@brief default constructor
	 *
	 *	@param[in] p_src is pointer to a buffer with data to be decoded
	 *	@param[in] p_end is pointer to the end of the buffer
	 */
	inline CDecodeVarLength(const uint8_t *p_src, const uint8_t *p_end);

	/**
	 *	@brief decodes a single symbol
	 *	@param[out] r_n_value is filled with the decoded symbol
	 *	@return Returns true of success, false on failure (not enough memory).
	 */
	inline bool operator ()(uint32_t &r_n_value);

	/**
	 *	@brief gets pointer to the next byte to be read
	 *	@return Returns const pointer to the next byte to be read.
	 */
	inline const uint8_t *p_Pointer() const;

	/**
	 *	@brief determines whether the end of the buffer was reached
	 *	@return Returns true if the end of the buffer was reached, otherwise returns false.
	 */
	inline bool b_Finished() const;

};

/**
 *	@brief utility object for encoding fixed-length integers
 *	@tparam CIntType is integer (or any POD) data type
 *
 *	This is a simple utility that can be used to append integers at the end of a buffer.
 */
template <class CIntType>
class CEmitInt {
protected:
	TBuffer &m_r_dest; /**< @brief destination buffer */

public:
	/**
	 *	@brief constructor
	 *	@param[in] r_t_out_buffer is output buffer (the encoded data appended at the end)
	 */
	inline CEmitInt(TBuffer &r_t_out_buffer);

	/**
	 *	@brief encodes a single symbol
	 *	@return Returns true of success, false on failure (not enough memory).
	 */
	inline bool operator ()(CIntType n_value);
};

/**
 *	@brief utility object for decoding fixed-length integers
 *	@tparam CIntType is integer (or any POD) data type
 *
 *	This is a simple utility that can be used to read integers from a buffer.
 */
template <class CIntType>
class CDecodeInt {
	const uint8_t *m_p_src; /**< @brief pointer to the next byte to read */
	const uint8_t *m_p_end; /**< @brief pointer to the end of the buffer */

public:
	/**
	 *	@brief default constructor
	 *
	 *	@param[in] p_src is pointer to a buffer with data to be decoded
	 *	@param[in] p_end is pointer to the end of the buffer
	 */
	inline CDecodeInt(const uint8_t *p_src, const uint8_t *p_end);

	/**
	 *	@brief decodes a single symbol
	 *	@param[out] r_n_value is filled with the decoded symbol
	 *	@return Returns true of success, false on failure (not enough memory).
	 */
	inline bool operator ()(CIntType &r_n_value);

	/**
	 *	@brief gets pointer to the next byte to be read
	 *	@return Returns const pointer to the next byte to be read.
	 */
	inline const uint8_t *p_Pointer() const;

	/**
	 *	@brief determines whether the end of the buffer was reached
	 *	@return Returns true if the end of the buffer was reached, otherwise returns false.
	 */
	inline bool b_Finished() const;
};

#include "Compress.inl"
// implementations of inline functions

#endif // !__SIMPLE_COMPRESSION_INCLUDED
