/*
								+---------------------------------+
								|                                 |
								|   ***  Document splitter  ***   |
								|                                 |
								|  Copyright   -tHE SWINe- 2010  |
								|                                 |
								|         DocSplitter.cpp         |
								|                                 |
								+---------------------------------+
*/

#include "../../UberLame_src/NewFix.h"
#include "../../UberLame_src/CallStack.h"
#include <stdio.h> // debug
#include <math.h> // debug
#include <vector>
#include <string>
#include <numeric>
#include <algorithm>
#include <hash_map>
#include <map>
#include "../../UberLame_src/Integer.h"
#include "../../UberLame_src/StlUtils.h"
#include "../../UberLame_src/MinMax.h"
#include "../../UberLame_src/Dir.h"
#include "../../UberLame_src/Timer.h"
#include "DocSplitter.h"

#if defined(_MSC_VER) && !defined(__MWERKS__) && !defined(for)
#define for if(0) {} else for
#endif
// msvc 'for' scoping hack

#if defined(_MSC_VER) && !defined(__MWERKS__) && _MSC_VER < 1400
#define stdext std
#endif //_MSC_VER && !__MWERKS__ && _MSC_VER < 1400
// msvc60 doesn't have stdext::hash map, but std::hash_map instead

/*
 *								=== CDocStreamSplitter::TRange ===
 */

CDocStreamSplitter::TRange::TRange()
	:n_offset(0), n_length(0)
{}

CDocStreamSplitter::TRange::TRange(chunk_off_t _n_offset, chunk_off_t _n_length)
	:n_offset(_n_offset), n_length(_n_length)
{}

bool CDocStreamSplitter::TRange::operator <(const TRange &r_t_other) const
{
	return n_length < r_t_other.n_length;
}

bool CDocStreamSplitter::TRange::operator >(const TRange &r_t_other) const
{
	return n_length > r_t_other.n_length;
}

/*
 *								=== ~CDocStreamSplitter:: ===
 */

/*
 *								=== CDocStreamSplitter::TTermRange ===
 */

CDocStreamSplitter::TTermRange::TTermRange()
	:TRange(), n_term_id(-1)
{}

CDocStreamSplitter::TTermRange::TTermRange(chunk_off_t _n_offset, chunk_off_t _n_length, term_id_t _n_term_id)
	:TRange(_n_offset, _n_length), n_term_id(_n_term_id)
{}

/*
 *								=== ~CDocStreamSplitter::TTermRange ===
 */

/*
 *								=== CDocStreamSplitter::TPass ===
 */

CDocStreamSplitter::TPass::TPass()
	:TRange(), b_primary(true)
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
	, b_slice_aligned(false)
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
{}

CDocStreamSplitter::TPass::TPass(chunk_off_t _n_offset, chunk_off_t _n_length, bool _b_primary)
	:TRange(_n_offset, _n_length), b_primary(_b_primary)
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
	, b_slice_aligned(false)
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
{}

/*
 *								=== ~CDocStreamSplitter::TPass ===
 */

/*
 *								=== CDocStreamSplitter::CGetIndexFrequencyPair ===
 */

/**
 *	@brief a small function object for building the term occurence permutation table
 */
class CDocStreamSplitter::CGetIndexFrequencyPair {
protected:
	chunk_off_t m_n_index; /**< @brief term occurence index counter */

public:
	/**
	 *	@brief default constructor
	 */
	inline CGetIndexFrequencyPair()
		:m_n_index(0)
	{}

	/**
	 *	@brief conversion of TTermOccurence to pair, containing term index and term id
	 *
	 *	@param[in] term_occurence is term occurence table entry; those are supposed
	 *		to be supplied in order they appear in the table (eg. via std::for_each)
	 *		in order for indices to be generated correctly
	 *
	 *	@return Returns std::pair of term occurence index (first term has index 0,
	 *		each successive term has index one larger) and term id.
	 */
	inline std::pair<chunk_off_t, chunk_off_t> operator ()(const TTermOccurence &term_occurence)
	{
		_ASSERTE(m_n_index < UINT32_MAX);
		_ASSERTE(term_occurence.second.size() <= UINT32_MAX);
		return std::make_pair(m_n_index ++, chunk_off_t(term_occurence.second.size()));
	}
};

/*
 *								=== ~CDocStreamSplitter::CGetIndexFrequencyPair ===
 */

/*
 *								=== CDocStreamSplitter ===
 */

CDocStreamSplitter::CDocStreamSplitter(CDocumentStorage &r_doc_storage,
	term_id_t n_dummy_term, size_t n_max_chunk_size, size_t n_halfwindow_size)
	:m_r_doc_storage(r_doc_storage), m_n_max_chunk_size(chunk_off_t(n_max_chunk_size)),
	m_n_window_size(chunk_off_t(n_halfwindow_size)), m_n_dummy_term(n_dummy_term),
	m_n_current_chunk(0), m_n_current_doc_off(0)
{
	_ASSERTE(n_max_chunk_size <= CHUNK_OFF_MAX);
	r_doc_storage.Reset(); // for the next line to work properly
	_ASSERTE(r_doc_storage.b_HaveMoreDocuments()); // at least a single document is needed to ensure correct amount of padding is inserted
	_ASSERTE(n_halfwindow_size <= CHUNK_OFF_MAX / 2); // 2 * window size must not overflow
	_ASSERTE(n_max_chunk_size > 2 * n_halfwindow_size); // chunk size must be greater than restart size

	// do not call Prepare_FirstChunk(), it must be called anyway. instead just reset all counters, that should do it.
}

void CDocStreamSplitter::Reset()
{
	m_n_current_chunk = 0;
	m_n_current_doc_off = 0;
	m_chunk.clear();
	m_current_doc.clear();
	m_r_doc_storage.Reset();
}

bool CDocStreamSplitter::Build_NextChunk()
{
	_ASSERTE(m_n_current_doc_off <= m_current_doc.size() + m_n_window_size);
	if(m_n_current_doc_off == m_current_doc.size() + m_n_window_size && !m_r_doc_storage.b_HaveMoreDocuments())
		return false;
	// no more chunks

	const size_t n_restart_size = 2 * m_n_window_size;
	const size_t n_relevant_restart_size = m_n_window_size;

	if(m_n_current_chunk) {
		_ASSERTE(m_chunk.capacity() >= m_n_max_chunk_size);
		_ASSERTE(m_chunk.size() == m_n_max_chunk_size);
		m_chunk.erase(m_chunk.begin(), m_chunk.begin() + m_n_max_chunk_size - n_restart_size);
		// leave part of previous chunk to be processed in this chunk as well
	} else {
		_ASSERTE(m_chunk.empty());
		if(!stl_ut::Reserve_N(m_chunk, m_n_max_chunk_size))
			return false;
		//m_chunk.insert(m_chunk.begin(), m_n_window_size, m_n_dummy_term); // this is added all by itself in the loop below
		// allocate the chunk
	}

	for(size_t n_space = m_n_max_chunk_size - m_chunk.size(); n_space > 0;) {
		_ASSERTE(m_n_current_doc_off <= m_current_doc.size() + m_n_window_size);
		if(m_n_current_doc_off == m_current_doc.size() + m_n_window_size) {
			if(!m_r_doc_storage.b_HaveMoreDocuments())
				break; // leave m_n_current_doc_off == m_current_doc.size() + m_n_window_size !!
			m_n_current_doc_off = 0;
			if(!m_r_doc_storage.GetNextDocument(m_current_doc))
				return false;
		}
		// make sure there is a document

		size_t n_doc_size = m_current_doc.size();
		_ASSERTE(n_doc_size <= SIZE_MAX - m_n_window_size);
		// get current document size

		if(m_n_current_doc_off < n_doc_size) {
			size_t n_doc_copy = min(n_doc_size - m_n_current_doc_off, n_space);
			// calculate amount of terms to be inserted

			m_chunk.insert(m_chunk.end(), m_current_doc.begin() + m_n_current_doc_off,
				m_current_doc.begin() + (m_n_current_doc_off + n_doc_copy));
			// copy term indices

			m_n_current_doc_off += n_doc_copy;
			n_space -= n_doc_copy;

			if(!n_space)
				break;
			// detect full buffer
		}
		if(m_n_current_doc_off >= n_doc_size) {
			size_t n_padding = min(m_n_window_size - (m_n_current_doc_off - n_doc_size), n_space);
			// calculate amount of terms to be inserted

			m_chunk.insert(m_chunk.end(), n_padding, m_n_dummy_term);
			// insert dummy terms to separate documents

			m_n_current_doc_off += n_padding;
			n_space -= n_padding;
		}
	}
	// fill chunk with term indices

	++ m_n_current_chunk;

	_ASSERTE(!m_chunk.empty());
	_ASSERTE(m_chunk.size() == m_n_max_chunk_size || (!m_r_doc_storage.b_HaveMoreDocuments() &&
		m_chunk.size() <= m_n_max_chunk_size));
	_ASSERTE(m_chunk.size() >= m_n_window_size);
	// the chunk must exceed max chunk size, unless it's the last one

	return true;
}

bool CDocStreamSplitter::Build_TermOccurenceTable_v2()
{
	size_t n_expected_term_num = m_chunk.size() / 4; // ...
	// quickly assess expected number of terms

	m_term_occurence_table.clear();
	if(!stl_ut::Reserve_N(m_term_occurence_table, n_expected_term_num))
		return false;
	// pre-alloc term occurence table

#ifdef DOC_SPLITTER2_USE_HASH_CONTAINERS
	stdext::hash_map<term_id_t, chunk_off_t> table_indexer;
#else //DOC_SPLITTER2_USE_HASH_CONTAINERS
	std::map<term_id_t, chunk_off_t> table_indexer;
#endif //DOC_SPLITTER2_USE_HASH_CONTAINERS
	// map of term id -> term occurence table index

	for(chunk_off_t i = m_n_window_size, n = chunk_off_t(m_chunk.size()) - m_n_window_size; i < n; ++ i) {
		term_id_t n_term = m_chunk[i];
		if(n_term == m_n_dummy_term)
			continue; // do not need dummy occurences
		// get term from chunk

		chunk_off_t n_index;
#ifdef DOC_SPLITTER2_USE_HASH_CONTAINERS
		stdext::hash_map<term_id_t, chunk_off_t>::const_iterator p_hash_it;
#else //DOC_SPLITTER2_USE_HASH_CONTAINERS
		std::map<term_id_t, chunk_off_t>::const_iterator p_hash_it;
#endif //DOC_SPLITTER2_USE_HASH_CONTAINERS
		if((p_hash_it = table_indexer.find(n_term)) != table_indexer.end()) {
			n_index = (*p_hash_it).second;
			// get term index from hash-map
		} else {
			_ASSERTE(m_term_occurence_table.size() < CHUNK_OFF_MAX); // !!
			n_index = chunk_off_t(m_term_occurence_table.size());

			try {
				m_term_occurence_table.push_back(std::make_pair(n_term, std::vector<chunk_off_t>()));
				table_indexer.insert(std::make_pair(n_term, n_index));
			} catch(std::bad_alloc&) {
				return false;
			}
			// put new term to the end of the table, put index to hash-map
		}
		_ASSERTE(n_index >= 0 && n_index < m_term_occurence_table.size() &&
			m_term_occurence_table[n_index].first == n_term);
		// get index of that term

		if(!stl_ut::Resize_Add_1More(m_term_occurence_table[n_index].second, i)) // 0.07 sec
			return false;
		// add occurence of this term to the table
	}
	// fill term occurence table by scanning over the chunk (note that could be
	// done right away, but it would involve copying complex term occurence table items)
	// t_odo - take offset list, split work-items under slice-size, generate task passes

	m_work_item_list_v2.clear();
	m_work_item_list_v2.clear();
	m_pass_list.clear();
	// clears the other structures so they aren't used accidentally

	return true;
}

/**
 *	@brief greater-than comparator for std::lower_bound on work-item list
 *
 *	@param[in] r_t_work_item is work-item whose lenght is compared
 *	@param[in] n_thresh is threshold
 *
 *	@return Returns true if r_t_work_item.n_length > n_thresh, otherwise returns false.
 */
#ifdef _DEBUG
class CDocStreamSplitter::CCompareWorkItemLenght { /**< vc80 stl tests predicate sanity, need to have reverse operator as well */
public:
	inline bool operator ()(chunk_off_t n_thresh, const TWorkItem &r_t_work_item) const /** @brief refer to CDocStreamSplitter::b_WorkItemLenght_Above() */
	{
		return n_thresh > r_t_work_item.n_length;
	}

	inline bool operator ()(const TWorkItem &r_t_work_item, chunk_off_t n_thresh) const /** @brief refer to CDocStreamSplitter::b_WorkItemLenght_Above() */
	{
		return r_t_work_item.n_length > n_thresh;
	}

	inline bool operator ()(const TWorkItem &r_t_work_item, const TWorkItem &r_t_work_item2) const /** @brief refer to CDocStreamSplitter::b_WorkItemLenght_Above() */
	{
		return r_t_work_item.n_length > r_t_work_item2.n_length;
	}
};
#define b_WorkItemLenght_Above CCompareWorkItemLenght()
#else //_DEBUG
inline bool CDocStreamSplitter::b_WorkItemLenght_Above(const TWorkItem &r_t_work_item, chunk_off_t n_thresh)
{
	return r_t_work_item.n_length > n_thresh;
}
#endif //_DEBUG

bool CDocStreamSplitter::Build_PassList(chunk_off_t n_max_slice_length,
	chunk_off_t n_max_pass_size, chunk_off_t n_min_primary_pass_size,
	chunk_off_t n_min_last_primary_pass_size, size_t n_dummy_vector_bank_num)
{
	std::vector<std::pair<chunk_off_t, chunk_off_t> > tot_permutation;
	if(!stl_ut::Resize_To_N(tot_permutation, m_term_occurence_table.size()))
		return false;
	std::transform(m_term_occurence_table.begin(), m_term_occurence_table.end(),
		tot_permutation.begin(), CGetIndexFrequencyPair());
	// build permutation table

	std::sort(tot_permutation.begin(), tot_permutation.end(), b_HasGreaterFrequency_Perm);
	// sort permutation table by frequency (in descending order)

	tot_permutation.erase(std::lower_bound(tot_permutation.begin(), tot_permutation.end(),
		std::pair<chunk_off_t, chunk_off_t>(0, 0), b_HasGreaterFrequency_Perm), tot_permutation.end());
	// throw away items with zero frequency (use binary search)

	_ASSERTE(tot_permutation.size() <= SIZE_MAX / 2);
	_ASSERTE(m_chunk.size() <= SIZE_MAX - 2 * tot_permutation.size());
	if(!stl_ut::Resize_To_N(m_work_item_list_v2, tot_permutation.size()) ||
	   !stl_ut::Resize_To_N(m_occurence_list_v2, m_chunk.size()))
		return false;
	// allocates list of offsets and list of occurences (low-level algorithm inputs)

	chunk_off_t n_offset = 0;
	for(size_t i = 0, n = tot_permutation.size(); i < n; ++ i) {
		std::pair<chunk_off_t, chunk_off_t> t_perm = tot_permutation[i];
		chunk_off_t n_idx = t_perm.first;
		chunk_off_t n_size = t_perm.second;
		// get permutated index into the real table

		_ASSERTE(m_term_occurence_table[n_idx].second.size() <= CHUNK_OFF_MAX);
		m_work_item_list_v2[i] = TWorkItem(n_offset, chunk_off_t(m_term_occurence_table[n_idx].second.size()),
			m_term_occurence_table[n_idx].first);
		// store offset into the second table in the list

		const std::vector<chunk_off_t> &r_vec = m_term_occurence_table[n_idx].second;
		std::copy(r_vec.begin(), r_vec.end(), m_occurence_list_v2.begin() + n_offset);
		_ASSERTE(r_vec.size() <= CHUNK_OFF_MAX - n_offset);
		n_offset += chunk_off_t(r_vec.size());
		// list of occurences goes last
	}
	_ASSERTE(n_offset <= m_occurence_list_v2.size());
	m_occurence_list_v2.erase(m_occurence_list_v2.begin() + n_offset, m_occurence_list_v2.end()); // cut-off unused space (dummy term occurences)
	// fills list of offsets and list of occurences (low-level algorithm inputs)

	m_pass_list.clear();
	// !!

	if(m_work_item_list_v2.empty())
		return true;
	// it seldom happens, but in extreme case, chunk can contain just dummy items, offset list is then empty

#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
	bool b_slice_remainders = true;
	// puts all slices smaller than slice length to the first pass

	bool b_slice_aligned_pass = false;
	// no easy way of determining that for any pass in general. work-items are sorted by lenght
	// at the beginning, but their length modulo n_max_slice_length isn't sorted anymore
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS

	size_t n_first_work_item = 0;
	size_t n_last_work_item = m_work_item_list_v2.size();
	do {
		if(n_last_work_item - n_first_work_item > n_min_primary_pass_size/* || (n_last_work_item > n_first_work_item &&
		   m_work_item_list_v2[(n_first_work_item + n_last_work_item) / 2].n_length <= n_max_slice_length)*/) {
			// either there's enough work-items, or there are still slices which can be processed without
			// direct parallelization (todo - design better metric)

			for(size_t i = n_first_work_item; i < n_last_work_item; ++ i) {
				TWorkItem &r_item = m_work_item_list_v2[i];
				if(r_item.n_length > n_max_slice_length) {
					chunk_off_t n_slice = n_max_slice_length;
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					if(b_slice_remainders) {
						n_slice = r_item.n_length % n_max_slice_length;
#ifdef DOC_SPLITTER2_ALIGNED_SLICES_IN_FIRST_PASS
						if(!n_slice)
							n_slice = n_max_slice_length;
						// in case slice is aligned, use full size
#endif //DOC_SPLITTER2_ALIGNED_SLICES_IN_FIRST_PASS
					}
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					// determine current slice length

					chunk_off_t n_remainder_length = r_item.n_length - n_slice;
					// calculate remainder length

#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					_ASSERTE(!b_slice_aligned_pass || n_slice == n_max_slice_length);
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					r_item.n_length = n_slice;
					// trim the original work-item

					if(!stl_ut::Resize_Add_1More(m_work_item_list_v2, TWorkItem(r_item.n_offset +
					   n_slice, n_remainder_length, r_item.n_term_id)))
						return false;
					// add second part of this work-item to the back

#if defined(DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS) && !defined(DOC_SPLITTER2_ALIGNED_SLICES_IN_FIRST_PASS)
					if(!n_slice) {
						m_work_item_list_v2.erase(m_work_item_list_v2.begin() + i);
						-- i;
						-- n_last_work_item; // !!
					}
					// in case the slice is aligned, do not include it in the first pass (it lowers variance
					// of first pass slice lenght, making computation potentialy slightly more efficient)
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS && !DOC_SPLITTER2_ALIGNED_SLICES_IN_FIRST_PASS
				}
			}
			// there's enough work-items, running in parallel, just chop them up to slices

#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(b_slice_remainders && n_last_work_item - n_first_work_item > n_min_primary_pass_size) {
				// this is the first pass, containing slice remainders

				std::stable_sort(m_work_item_list_v2.begin() + n_first_work_item,
					m_work_item_list_v2.begin() + n_last_work_item, std::greater<TWorkItem>());
				// sort work-items by length (they have modulo lengths, which are no longer sorted)

				std::vector<std::pair<size_t, size_t> > peeloff_range_list;
				size_t n_peeloff_last_work_item = n_last_work_item;
				do {
					chunk_off_t n_peeloff_length = m_work_item_list_v2[n_peeloff_last_work_item - 1].n_length;
					// we're peeling-off this length

					size_t n_peeloff_first_work_item = std::lower_bound(m_work_item_list_v2.begin() +
						n_first_work_item, m_work_item_list_v2.begin() + n_peeloff_last_work_item,
						n_peeloff_length, b_WorkItemLenght_Above) - m_work_item_list_v2.begin();
					// how much work-items of this length are there?

					size_t n_round_length = n_peeloff_last_work_item - n_peeloff_first_work_item;
					// calculate number of work-items in this pass

					if(n_round_length < n_min_primary_pass_size)
						n_peeloff_first_work_item = n_first_work_item;
					// in case there's not enough of them, finish them as a single pass

					if(!stl_ut::Resize_Add_1More(peeloff_range_list,
					   std::make_pair(n_peeloff_first_work_item, n_peeloff_last_work_item)))
						return false;
					// put it to the list

					n_peeloff_last_work_item = n_peeloff_first_work_item;
					// next turn processess preceding work-items
				} while(n_peeloff_last_work_item > n_first_work_item);
				// find long enough ranges of work-items with the same length

				for(size_t i = peeloff_range_list.size(); i > 0; -- i) {
					std::pair<size_t, size_t> range = peeloff_range_list[i - 1];
					if(!Generate_PrimaryPasses(range.first, range.second, n_max_pass_size, b_slice_aligned_pass))
						return false;
				}
				// generate simple primary passess in reverse order so they're sorted
			} else {
				if(!Generate_PrimaryPasses(n_first_work_item, n_last_work_item, n_max_pass_size, b_slice_aligned_pass))
					return false;
				// generate simple primary passess
			}
#else //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(!Generate_PrimaryPasses(n_first_work_item, n_last_work_item, n_max_pass_size, b_slice_aligned_pass))
				return false;
			// generate simple primary passess
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS

			n_first_work_item = n_last_work_item;
			n_last_work_item = m_work_item_list_v2.size();
			// new group of work-items

#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(b_slice_remainders) {
				b_slice_aligned_pass = true;
				b_slice_remainders = false;
			}
			// there are no remainders anymore
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
		} else {
			// there's a few, potentialy long work-items. those will be chopped to slices different way

#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(b_slice_remainders)
				b_slice_remainders = false;
			// secondary passess do not know how to align slices
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS

			for(size_t i = n_first_work_item; i < m_work_item_list_v2.size(); ++ i) {
				TWorkItem &r_item = m_work_item_list_v2[i];
				if(r_item.n_length > n_max_slice_length) {
					size_t n_slice_num = (r_item.n_length + n_max_slice_length - 1) / n_max_slice_length;
					size_t n_new_slice_num = n_slice_num - 1; // the first one is in the list already
					// calculate number of slices

					_ASSERTE(r_item.n_length >= n_new_slice_num * n_max_slice_length);
					_ASSERTE(r_item.n_length - n_new_slice_num * n_max_slice_length <= n_max_slice_length);
					chunk_off_t n_last_work_item_length = chunk_off_t(r_item.n_length - n_new_slice_num * n_max_slice_length);
					_ASSERTE(n_new_slice_num * n_max_slice_length + n_last_work_item_length == r_item.n_length);
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					_ASSERTE(!b_slice_aligned_pass || n_last_work_item_length == n_max_slice_length);
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					// calculate size of the last slice

					r_item.n_length = n_max_slice_length;
					// trim the original work-item

					TWorkItem t_item = r_item;
					// copy trimmed item

					if(!stl_ut::Reserve_NMore(m_work_item_list_v2, n_new_slice_num))
						return false;
					m_work_item_list_v2.insert(m_work_item_list_v2.begin() + (i + 1), n_new_slice_num, t_item); // insert n_new_slice_num trimmed items, this saves time when copying offset list, compare to calling insert for each item
					// make sure all the slices fit there

					for(size_t j = 1, n = n_new_slice_num; j < n; ++ j) {
						_ASSERTE(t_item.n_offset + j * n_max_slice_length <= CHUNK_OFF_MAX);
						m_work_item_list_v2[i + j].n_offset = chunk_off_t(t_item.n_offset + j * n_max_slice_length);
					}
					// fix offsets of n_slice_num - 2 slices (+ original slice + last slice is written right below)

					_ASSERTE(t_item.n_offset + n_new_slice_num * n_max_slice_length <= CHUNK_OFF_MAX);
					m_work_item_list_v2[i + n_new_slice_num].n_offset = chunk_off_t(t_item.n_offset + n_new_slice_num * n_max_slice_length);
					m_work_item_list_v2[i + n_new_slice_num].n_length = n_last_work_item_length;
					// fix offset and size of the last slice
				}
			}
			// subdivide work-items to consecutive slices

			size_t n_leftover_primary_pass_size = m_work_item_list_v2.size() - n_first_work_item;
			if(!m_work_item_list_v2.empty()) { // m_work_item_list_v2.size() - 1 would underflow
				for(size_t j = m_work_item_list_v2.size() - 1; j > n_first_work_item; -- j) {
					if(m_work_item_list_v2[j - 1].n_term_id == m_work_item_list_v2[j].n_term_id) {
						n_leftover_primary_pass_size = m_work_item_list_v2.size() - (j + 1);
						break;
					}
				}
			}
			if(n_leftover_primary_pass_size < n_min_last_primary_pass_size)
				n_leftover_primary_pass_size = 0;
			// there might be some work-items which are not split to multiple consecutive items,
			// those could be processed directly in one (or more) last primary passes

			size_t n_round_length = m_work_item_list_v2.size() - n_first_work_item - n_leftover_primary_pass_size;
			// calculate number of work-items in this pass

			n_last_work_item = n_first_work_item + n_round_length;
			_ASSERTE(n_last_work_item <= CHUNK_OFF_MAX);
			// this is the last work-item, which will be processed by secondary passess

			{
				chunk_off_t n_secondary_max_pass_size = chunk_off_t(min(size_t(n_max_pass_size), n_dummy_vector_bank_num));
				// calculate secondary pass size (this implementation decides to keep things
				// simple, it's expected that n_dummy_vector_bank_num <= n_max_pass_size)

				size_t n_pass_num = (n_round_length + n_secondary_max_pass_size - 1) / n_secondary_max_pass_size;
				// calculate number of passes to avoid exceeding n_max_pass_size

				if(n_pass_num) { // n_pass_num - 1 would underflow
					size_t n_pass_size = n_round_length / n_pass_num;
					size_t n_last_pass_size = n_round_length - (n_pass_num - 1) * n_pass_size/*n_secondary_max_pass_size*/;
					_ASSERTE(n_last_pass_size <= n_secondary_max_pass_size);
					_ASSERTE((n_pass_num - 1) * n_pass_size/*n_secondary_max_pass_size*/ + n_last_pass_size == n_round_length);
					// calculate size of the last slice

					size_t n_first_pass = m_pass_list.size();
					if(!stl_ut::Resize_Add_NMore(m_pass_list, n_pass_num))
						return false;
					for(size_t j = 0; j < n_pass_num; ++ j) {
						TPass &r_t_pass = m_pass_list[n_first_pass + j];

						r_t_pass.n_offset = chunk_off_t(n_first_work_item + j * n_pass_size/*n_secondary_max_pass_size*/);
						r_t_pass.n_length = chunk_off_t((j + 1 < n_pass_num)? n_pass_size/*n_secondary_max_pass_size*/ : n_last_pass_size);
						r_t_pass.b_primary = false;
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
						r_t_pass.b_slice_aligned = b_slice_aligned_pass;
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS

						for(chunk_off_t k = 0; k < r_t_pass.n_length; ++ k) {
							TWorkItem &r_item = m_work_item_list_v2[r_t_pass.n_offset + k];
							// take work-item

							if(r_t_pass.summation_list.empty() || r_t_pass.summation_list.back().n_term_id != r_item.n_term_id) {
								if(!stl_ut::Resize_Add_1More(r_t_pass.summation_list, TPass::TSummationStep(/*r_t_pass.n_offset +*/ k, 1, r_item.n_term_id))) /*+++*/
									return false;
							} else
								++ r_t_pass.summation_list.back().n_length;
							// add slice to summation step if it's still the same term / create a new summation step

							r_item.n_term_id = k;
							// set slice number for term-id (we're writing to dummy vector slots)
						}
					}
				}
				// add passes, set up summation steps (greedy merging approach, todo - might want to look into it)
			}
			// build secondary passes

			n_first_work_item = n_last_work_item;
			n_last_work_item = m_work_item_list_v2.size();
			_ASSERTE(n_last_work_item - n_first_work_item == n_leftover_primary_pass_size);
			// group of work-items marked for leftover primary processing
			
			if(!Generate_PrimaryPasses(n_first_work_item, n_last_work_item, n_max_pass_size, b_slice_aligned_pass))
				return false;
			// generate simple primary passess

			n_first_work_item = n_last_work_item;
			n_last_work_item = m_work_item_list_v2.size();
			// new group of work-items
		}
		// split long slices to work-items, either for primary or for secondary processing
	} while(n_first_work_item < n_last_work_item);
	// split work-items to slices, build passes

	// t_odo - try to put all slices smaller than slice length to the first pass
	// (todo - try taking them from beginning of all the slices / from the end of all the slices)

	return true;
}

/**
 *	@brief greater-than comparator for sorting term occurence permutatuion table by frequency
 *
 *	@param[in] a is first compared term occurence permutatuion table item
 *	@param[in] b is second compared term occurence permutatuion table item
 *
 *	@return Returns true if first term has greater frequency than the other one, otherwise returns false.
 */
inline bool CDocStreamSplitter::b_HasGreaterFrequency_Perm(const std::pair<chunk_off_t, chunk_off_t> &a,
	const std::pair<chunk_off_t, chunk_off_t> &b)
{
	return a.second > b.second;
}

/**
 *	@brief generates primary passess, procesing given range of work-items
 *
 *	@param[in] n_first_work_item is index of the first work-item
 *	@param[in] n_last_work_item is 1 + index of the last work-item
 *	@param[in] n_max_pass_size is maximal number of work-items in a single pass
 *	@param[in] b_slice_aligned_pass is set if all the work-items have length n_max_slice_lenght
 *
 *	@return Returns true on success, false on failure.
 */
bool CDocStreamSplitter::Generate_PrimaryPasses(size_t n_first_work_item,
	size_t n_last_work_item, size_t n_max_pass_size, bool b_slice_aligned_pass)
{
	_ASSERTE(n_max_pass_size <= CHUNK_OFF_MAX);
	_ASSERTE(n_last_work_item <= CHUNK_OFF_MAX);
	_ASSERTE(n_first_work_item <= n_last_work_item);
	_ASSERTE(n_first_work_item == n_last_work_item || n_first_work_item < m_work_item_list_v2.size()); // in case they're equal, then it doesn't matter how much they are
	_ASSERTE(n_last_work_item <= m_work_item_list_v2.size()); // but be polite

	size_t n_round_length = n_last_work_item - n_first_work_item;
	// calculate number of work-items in this pass

	size_t n_pass_num = (n_round_length + n_max_pass_size - 1) / n_max_pass_size;
	// calculate number of passes to avoid exceeding n_max_pass_size

	if(n_pass_num) { // n_pass_num - 1 would underflow
		size_t n_pass_size = n_round_length / n_pass_num;
		size_t n_last_pass_size = n_round_length - (n_pass_num - 1) * n_pass_size/*n_max_pass_size*/;
		_ASSERTE(n_last_pass_size <= n_max_pass_size);
		_ASSERTE((n_pass_num - 1) * n_pass_size/*n_max_pass_size*/ + n_last_pass_size == n_round_length);
		// calculate size of the last slice

		size_t n_first_pass = m_pass_list.size();
		if(!stl_ut::Resize_Add_NMore(m_pass_list, n_pass_num))
			return false;
		for(size_t j = 0, n = n_pass_num - 1; j < n; ++ j) {
			TPass &r_t_pass = m_pass_list[n_first_pass + j];
			r_t_pass.n_offset = chunk_off_t(n_first_work_item + j * n_pass_size/*n_max_pass_size*/);
			r_t_pass.n_length = chunk_off_t(n_pass_size/*n_max_pass_size*/);
			_ASSERTE(r_t_pass.b_primary == true); // by default
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			r_t_pass.b_slice_aligned = b_slice_aligned_pass;
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			// set a single pass up
		}
		{
			TPass &r_t_pass = m_pass_list[n_first_pass + n_pass_num - 1];
			r_t_pass.n_offset = chunk_off_t(n_first_work_item + (n_pass_num - 1) * n_pass_size/*n_max_pass_size*/);
			r_t_pass.n_length = chunk_off_t(n_last_pass_size);
			_ASSERTE(r_t_pass.b_primary == true); // by default
#ifdef DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			r_t_pass.b_slice_aligned = b_slice_aligned_pass;
#endif //DOC_SPLITTER2_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			// set last pass up
		}
		// add passes
	}

	return true;
}

/*
 *								=== ~CDocStreamSplitter ===
 */
