/*
								+---------------------------------+
								|                                 |
								|   ***  Document splitter  ***   |
								|                                 |
								|  Copyright   -tHE SWINe- 2010  |
								|                                 |
								|         DocSplitter.cpp         |
								|                                 |
								+---------------------------------+
*/

#include "../../UberLame_src/NewFix.h"
#include "../../UberLame_src/CallStack.h"
#include <stdio.h> // debug
#include <math.h> // debug
#include <vector>
#include <string>
#include <numeric>
#include <algorithm>
#include <hash_map>
#include "../../UberLame_src/Integer.h"
#include "../../UberLame_src/StlUtils.h"
#include "../../UberLame_src/MinMax.h"
#include "../../UberLame_src/Dir.h"
#include "../../UberLame_src/Timer.h"
#include "DocStorage.h"
#include "DocSplitter.h"

#if defined(_MSC_VER) && !defined(__MWERKS__) && !defined(for)
#define for if(0) {} else for
#endif
// msvc 'for' scoping hack

#if defined(_MSC_VER) && !defined(__MWERKS__) && _MSC_VER < 1400
#define stdext std
#endif //_MSC_VER && !__MWERKS__ && _MSC_VER < 1400
// msvc60 doesn't have stdext::hash map, but std::hash_map instead

/*
 *								=== CDocumentSplitter::CSliceLengthBelow ===
 */

CDocumentSplitter::CSliceLengthBelow::CSliceLengthBelow(size_t n_ref, const std::vector<size_t> &r_occurence_list)
	:m_r_occurence_list(r_occurence_list)
{
#ifdef _DEBUG
	m_n_ref = n_ref;
#endif //_DEBUG
}

bool CDocumentSplitter::CSliceLengthBelow::operator ()(size_t n_ref, size_t n_offset) const
{
#ifdef _DEBUG
	_ASSERTE(m_n_ref == n_ref);
	// make sure argument order is correct (we're comparing offset to occurence list with
	// number of occurences, which are two different quantities. argument order is important here).
#endif //_DEBUG

	_ASSERTE(/*n_offset >= 0 &&*/ n_offset < m_r_occurence_list.size() - 1); // size_t is unsigned

	return m_r_occurence_list[n_offset + 1] < n_ref;
}

/*
 *								=== ~CDocumentSplitter::CSliceLengthBelow ===
 */

/*
 *								=== CDocumentSplitter::TRange ===
 */

CDocumentSplitter::TRange::TRange()
	:n_offset(0), n_length(0)
{}

CDocumentSplitter::TRange::TRange(size_t _n_offset, size_t _n_length)
	:n_offset(_n_offset), n_length(_n_length)
{}

bool CDocumentSplitter::TRange::operator <(const TRange &r_t_other) const
{
	return n_length < r_t_other.n_length;
}

bool CDocumentSplitter::TRange::operator >(const TRange &r_t_other) const
{
	return n_length > r_t_other.n_length;
}

/*
 *								=== ~CDocumentSplitter:: ===
 */

/*
 *								=== CDocumentSplitter::TTermRange ===
 */

CDocumentSplitter::TTermRange::TTermRange()
	:TRange(), n_term_id(-1)
{}

CDocumentSplitter::TTermRange::TTermRange(size_t _n_offset, size_t _n_length, size_t _n_term_id)
	:TRange(_n_offset, _n_length), n_term_id(_n_term_id)
{}

/*
 *								=== ~CDocumentSplitter::TTermRange ===
 */

/*
 *								=== CDocumentSplitter::TPass ===
 */

CDocumentSplitter::TPass::TPass()
	:TRange(), b_primary(true)
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
	, b_slice_aligned(false)
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
{}

CDocumentSplitter::TPass::TPass(size_t _n_offset, size_t _n_length, bool _b_primary)
	:TRange(_n_offset, _n_length), b_primary(_b_primary)
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
	, b_slice_aligned(false)
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
{}

/*
 *								=== ~CDocumentSplitter::TPass ===
 */

/*
 *								=== CDocumentSplitter ===
 */

CDocumentSplitter::CDocumentSplitter(const std::vector <TDocument> &r_document_list,
	size_t n_dummy_term, size_t n_max_chunk_size, size_t n_halfwindow_size)
	:m_r_document_list(r_document_list), m_n_dummy_term(n_dummy_term),
	m_n_max_chunk_size(n_max_chunk_size), m_n_window_size(n_halfwindow_size),
	m_n_length_of_all_docs(n_LengthOfAllDocuments(r_document_list)),
	m_n_length_of_concat(n_LengthOfConcatSequence(m_n_length_of_all_docs, r_document_list.size(), n_halfwindow_size)),
	m_n_chunk_num(n_Chunk_Num(m_n_length_of_concat, n_max_chunk_size, n_halfwindow_size))
{
	_ASSERTE(!r_document_list.empty()); // we need at least single document to ensure correct amount of padding is inserted
	_ASSERTE(n_halfwindow_size <= SIZE_MAX / 2); // 2 * window size must not overflow
	_ASSERTE(n_max_chunk_size > 2 * n_halfwindow_size); // chunk size must be greater than restart size
	_ASSERTE(r_document_list.size() < SIZE_MAX); // r_document_list.size() + 1 must not overflow

	//Prepare_FirstChunk();

	m_n_current_chunk = 0;
	m_n_current_doc = 0;
	m_n_current_doc_off = 0;
	m_chunk.clear();
#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
	m_n_next_chunk_first_doc = 0;
	m_n_first_chunk_doc = 0;
	m_n_last_chunk_doc = 0;
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS
	// do not call Prepare_FirstChunk(), it must be called anyway. instead just reset all counters, that should do it.
}

double CDocumentSplitter::f_Padding_Overhead() const
{
	return double(m_n_length_of_concat) / m_n_length_of_all_docs - 1;
}

double CDocumentSplitter::f_ChunkRestart_Overhead() const
{
	return double(m_n_chunk_num * 2 * m_n_window_size) / m_n_length_of_all_docs;
}

bool CDocumentSplitter::Prepare_FirstChunk()
{
	m_n_current_chunk = 0;
	m_n_current_doc = 0;
	m_n_current_doc_off = 0;
	m_chunk.clear();
#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
	m_n_next_chunk_first_doc = 0;
	m_n_first_chunk_doc = 0;
	m_n_last_chunk_doc = 0;
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

	_ASSERTE(m_n_chunk_num >= 0);
	return stl_ut::Reserve_N(m_chunk, m_n_max_chunk_size) && Prepare_NextChunk();
}

bool CDocumentSplitter::Prepare_NextChunk()
{
	if(m_n_current_chunk == m_n_chunk_num) {
		_ASSERTE(m_n_current_doc == m_r_document_list.size());
		// make sure all the documents have been processed

		_ASSERTE(!m_n_current_doc_off);
		// should be zero

		return false;
	}
	// no next chunks

	const size_t n_restart_size = 2 * m_n_window_size;
	const size_t n_relevant_restart_size = m_n_window_size;

	if(m_n_current_chunk) {
		_ASSERTE(m_chunk.size() == m_n_max_chunk_size);
		m_chunk.erase(m_chunk.begin(), m_chunk.begin() + m_n_max_chunk_size - n_restart_size);
		// leave part of previous chunk to be processed in this chunk as well
	} else {
		_ASSERTE(m_chunk.empty());
		m_chunk.insert(m_chunk.begin(), m_n_window_size, m_n_dummy_term);
		// insert dummy terms before the first document
	}

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
	m_n_first_chunk_doc = m_n_next_chunk_first_doc;
	// remember first document in this chunk

#ifdef _DEBUG
	bool b_next_chunk_first_doc_written = false;
#endif //_DEBUG

	if(m_n_max_chunk_size - m_chunk.size() < n_relevant_restart_size) {
		_ASSERTE(m_n_next_chunk_first_doc == m_n_first_chunk_doc);
		//m_n_next_chunk_first_doc = m_n_first_chunk_doc; // void expression
#ifdef _DEBUG
		b_next_chunk_first_doc_written = true;
#endif //_DEBUG
	}
	// in case padding writes into restart interval, we can't really tell document number (extreme case)
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

	for(size_t n_space = m_n_max_chunk_size - m_chunk.size(); n_space > 0;) {
		const TDocument &r_doc = m_r_document_list[m_n_current_doc];
		size_t n_doc_size = r_doc.term_position_list.size();
		if(m_n_current_doc_off < n_doc_size) {
			size_t n_doc_copy = min(n_doc_size - m_n_current_doc_off, n_space);
			// calculate amount of terms to be inserted

			m_chunk.insert(m_chunk.end(), r_doc.term_position_list.begin() + m_n_current_doc_off,
				r_doc.term_position_list.begin() + m_n_current_doc_off + n_doc_copy);
			// copy term indices

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
			if(n_space >= n_relevant_restart_size && n_space - n_doc_copy < n_relevant_restart_size) {
				m_n_next_chunk_first_doc = m_n_current_doc;
#ifdef _DEBUG
				_ASSERTE(!b_next_chunk_first_doc_written);
				b_next_chunk_first_doc_written = true;
#endif //_DEBUG
			}
			// in case we started copying in restart area, remember document number
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

			m_n_current_doc_off += n_doc_copy;
			n_space -= n_doc_copy;

			if(!n_space)
				break;
			// detect full buffer
		}
		if(m_n_current_doc_off >= n_doc_size) {
			size_t n_padding = min(m_n_window_size - (m_n_current_doc_off - n_doc_size), n_space);
			// calculate amount of terms to be inserted

			m_chunk.insert(m_chunk.end(), n_padding, m_n_dummy_term);
			// insert dummy terms to separate documents

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
			if(n_space >= n_relevant_restart_size && n_space - n_padding < n_relevant_restart_size) {
				m_n_next_chunk_first_doc = min(m_r_document_list.size(), m_n_current_doc + 1); // this document only wrote dummy items, it won't contribute new terms
#ifdef _DEBUG
				_ASSERTE(!b_next_chunk_first_doc_written);
				b_next_chunk_first_doc_written = true;
#endif //_DEBUG
			}
			// in case we started copying in restart area, remember document number
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

			m_n_current_doc_off += n_padding;
			n_space -= n_padding;

			_ASSERTE(n_doc_size <= SIZE_MAX - m_n_window_size);
			if(m_n_current_doc_off == n_doc_size + m_n_window_size) {
				m_n_current_doc_off = 0;
				++ m_n_current_doc;

				if(m_n_current_doc == m_r_document_list.size())
					break;
			}
			// detect end of current document (including it's padding)
		}
	}
	// fill chunk with term indices

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
	m_n_last_chunk_doc = (m_n_current_doc_off)? min(m_r_document_list.size(), m_n_current_doc + 1) : m_n_current_doc; // so it returns valid last after processing the last document
	// remember last document in this chunk
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

	++ m_n_current_chunk;

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
#ifdef _DEBUG
	_ASSERTE(b_next_chunk_first_doc_written || m_n_current_chunk == m_n_chunk_num);
#endif //_DEBUG
	// first document writing to chunk restart area must be detected, unless we're doing the last chunk
	// (restart area will be part of the next chunk, we need to know indices of all documents,
	// comprising the chunk)
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

	_ASSERTE(!m_chunk.empty());
	_ASSERTE(m_chunk.size() == m_n_max_chunk_size || m_n_current_chunk == m_n_chunk_num);
	// chunk must exceed max chunk size, unless it's the last one

	return true;
}

const std::vector<CDocumentSplitter::size_t> &CDocumentSplitter::Get_Chunk() const
{
	return m_chunk;
}

inline uint64_t CDocumentSplitter::SumDocLengths(uint64_t n_length, const TDocument &r_doc)
{
	size_t n_document_length = r_doc.term_position_list.size();
	_ASSERTE(n_length <= UINT64_MAX - n_document_length); // overflow check
	return n_length + n_document_length;
}

uint64_t CDocumentSplitter::n_LengthOfAllDocuments(const std::vector <TDocument> &r_document_list)
{
	/*uint64_t n_length = 0;
	for(size_t i = 0, n = r_document_list.size(); i < n; ++ i) {
		size_t n_document_i_length = r_document_list[i].term_position_list.size();
		_ASSERTE(n_length <= UINT64_MAX - n_document_i_length); // overflow check
		n_length += n_document_i_length;
	}
	return n_length;*/
	return std::accumulate(r_document_list.begin(), r_document_list.end(), uint64_t(0), SumDocLengths);
}

uint64_t CDocumentSplitter::n_LengthOfConcatSequence(uint64_t n_length_of_all_documents, size_t n_document_num, size_t n_halfwindow_size)
{
	_ASSERTE(n_document_num < SIZE_MAX && n_halfwindow_size > 0 && (n_document_num + 1) < UINT64_MAX / n_halfwindow_size); // overflow check
	uint64_t n_dummy_length = (n_document_num + 1) * n_halfwindow_size;
	_ASSERTE(n_length_of_all_documents < UINT64_MAX - n_dummy_length); // overflow check
	return n_length_of_all_documents + n_dummy_length;
}

CDocumentSplitter::size_t CDocumentSplitter::n_Chunk_Num(uint64_t n_length_of_concat_sequence, size_t n_max_chunk_size, size_t n_halfwindow_size)
{
	_ASSERTE(n_halfwindow_size <= SIZE_MAX / 2); // overflow check
	size_t n_repeat_size = 2 * n_halfwindow_size;
	if(n_length_of_concat_sequence < n_repeat_size)
		return 1;
	_ASSERTE(n_max_chunk_size > n_repeat_size); // in case repeat size equals maximal chunk size, we can't progressively process any data
	uint64_t n_chunk_num = (n_length_of_concat_sequence - n_repeat_size + n_max_chunk_size - n_repeat_size - 1) / (n_max_chunk_size - n_repeat_size);
	_ASSERTE(n_chunk_num < SIZE_MAX); // overflow check (this one might actually trigger on large datasets while max chunk size is too small)
	return size_t(n_chunk_num);
}

bool CDocumentSplitter::Build_TermOccurenceTable()
{
#ifdef DOC_SPLITTER_BUILD_CHUNK_TERM_LIST
	// build tables for GPU from chunk term list

#ifndef DOC_SPLITTER_TRACK_CHUNK_DOCS
#error "DOC_SPLITTER_TRACK_CHUNK_DOCS must be defined for DOC_SPLITTER_BUILD_CHUNK_TERM_LIST"
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS

#if 0
	{
		m_chunk_term_list.clear();

		std::vector<size_t> doc_term_list, tmp_term_list;
		for(size_t i = m_n_first_chunk_doc; i < m_n_last_chunk_doc; ++ i) {
			const TDocument &r_doc = m_r_document_list[i];

			if(!stl_ut::Resize_To_N(doc_term_list, r_doc.term_frequency_map.size()))
				return false;
			std::transform(r_doc.term_frequency_map.begin(), r_doc.term_frequency_map.end(),
				doc_term_list.begin(), n_GetTerm_Id);
			std::sort(doc_term_list.begin(), doc_term_list.end());
			// get sorted list of terms, ocuring in a document

			if(i > m_n_first_chunk_doc) {
				_ASSERTE(m_chunk_term_list.size() < SIZE_MAX - doc_term_list.size());
				if(!stl_ut::Resize_To_N(tmp_term_list, m_chunk_term_list.size() + doc_term_list.size()))
					return false;
				std::merge(m_chunk_term_list.begin(), m_chunk_term_list.end(), doc_term_list.begin(), doc_term_list.end(), tmp_term_list.begin());
				tmp_term_list.erase(std::unique(tmp_term_list.begin(), tmp_term_list.end()), tmp_term_list.end());
				m_chunk_term_list.swap(tmp_term_list);
			} else
				m_chunk_term_list.swap(doc_term_list); // possibly faster
			// merge with terms, occurring in other documents in this chunk
		}
	}
	// variant with merging after each document
#elif 1
	{
		m_chunk_term_list.clear();

		for(size_t i = m_n_first_chunk_doc; i < m_n_last_chunk_doc; ++ i) {
			const TDocument &r_doc = m_r_document_list[i];

			size_t n_off = m_chunk_term_list.size();
			if(!stl_ut::Resize_Add_NMore(m_chunk_term_list, r_doc.term_frequency_map.size()))
				return false;
			std::transform(r_doc.term_frequency_map.begin(), r_doc.term_frequency_map.end(),
				m_chunk_term_list.begin() + n_off, n_GetTerm_Id);
		}
		// get list of terms, occuring in all documents, note there may be duplicates

		std::sort(m_chunk_term_list.begin(), m_chunk_term_list.end());
		// sort items

		m_chunk_term_list.erase(std::unique(m_chunk_term_list.begin(), m_chunk_term_list.end()), m_chunk_term_list.end());
		// remove duplicate items
	}
	// variant with merging after all documents are connected
	// (faster than code above, and for reasonable (big enough) chunk sizes also faster than code below)
#elif 0
	m_chunk_term_list.clear();
	if(!stl_ut::Reserve_N(m_chunk_term_list, m_chunk.size()))
		return false;
	m_chunk_term_list.insert(m_chunk_term_list.end(), m_chunk.begin(), m_chunk.end());
	// get list of terms, occuring in all documents (ie. contents of current chunk; note there are duplicates)

	std::sort(m_chunk_term_list.begin(), m_chunk_term_list.end());
	// sort items

	m_chunk_term_list.erase(std::unique(m_chunk_term_list.begin(), m_chunk_term_list.end()), m_chunk_term_list.end());
	// remove duplicate items
#endif
	// just under 0.5 secs
	// 0.47922

#if 1
	stdext::hash_map<size_t, size_t> table_indexer; // default hash function for size_t works just well
	try {
		for(size_t i = 0, n = m_chunk_term_list.size(); i < n; ++ i)
			table_indexer.insert(std::hash_map<size_t, size_t>::value_type(m_chunk_term_list[i], i));
	} catch(std::bad_alloc&) {
		return false;
	}
	// implementation using stdext::hash_map, saving 0.5 sec here + 0.7 sec when reading it, that totals 1.2 sec
	// 1.40497
	// this is still quite slow (more than 0.9 sec on reasonable buffer size)
#else
	std::map<size_t, size_t> table_indexer;
	try {
		for(size_t i = 0, n = m_chunk_term_list.size(); i < n; ++ i) {
			table_indexer.insert(std::map<size_t, size_t>::value_type(m_chunk_term_list[i], i)); // this is slightly faster
			//table_indexer[m_chunk_term_list[i]] = i;
		}
	} catch(std::bad_alloc&) {
		return false;
	}
	// 1.93429
	// this is *very* slow (more than 1.5 sec on reasonable buffer size)
#endif
	// create map of term id -> term occurence table index

	if(!stl_ut::Resize_To_N(m_term_occurence_table, m_chunk_term_list.size()))
		return false;
	for(size_t i = 0, n = m_chunk_term_list.size(); i < n; ++ i) {
		m_term_occurence_table[i].first = m_chunk_term_list[i];
		m_term_occurence_table[i].second.clear(); // !!
	}
	// 1.60619
	// alloc term occurence table and assign term id's (0.2 secs on reasonable buffer size)

	for(size_t i = m_n_window_size, n = m_chunk.size() - m_n_window_size; i < n; ++ i) {
		size_t n_term = m_chunk[i];
		if(n_term == m_n_dummy_term)
			continue; // do not need dummy occurences
		_ASSERTE(table_indexer.find(n_term) != table_indexer.end());
		// get term from chunk

		size_t n_index = table_indexer[n_term]; // this is still slow (about 0.2 sec)
		_ASSERTE(n_index >= 0 && n_index < m_term_occurence_table.size() &&
			m_term_occurence_table[n_index].first == n_term);
		// get index of that term

		if(!stl_ut::Resize_Add_1More(m_term_occurence_table[n_index].second, i)) // 0.07 sec
			return false;
	}
	// 1.93130
	// fill term occurence table by scanning over the chunk (note that could be
	// done right away, but it would involve copying complex term occurence table items)
	// this is still slow (0.3 sec on optimal buffer size)

#else //DOC_SPLITTER_BUILD_CHUNK_TERM_LIST
	// build tables for GPU without chunk term list

	size_t n_expected_term_num = 0;
#if 1
#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
	for(size_t i = m_n_first_chunk_doc; i < m_n_last_chunk_doc; ++ i) {
		size_t n_doc_term_num = m_r_document_list[i].term_frequency_map.size();
		if(n_expected_term_num < n_doc_term_num)
			n_expected_term_num = n_doc_term_num;
	}
	/*n_expected_term_num = max(n_expected_term_num,
		size_t(n_expected_term_num * log(m_n_last_chunk_doc - m_n_first_chunk_doc)));*/ // makes it slightly slower
#else //DOC_SPLITTER_TRACK_CHUNK_DOCS
	n_expected_term_num = m_chunk.size() / 4; // ...
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS
	// quickly assess expected number of terms
#endif

	m_term_occurence_table.clear();
	if(!stl_ut::Reserve_N(m_term_occurence_table, n_expected_term_num))
		return false;
	// pre-alloc term occurence table

	stdext::hash_map<size_t, size_t> table_indexer;
	// map of term id -> term occurence table index

	for(size_t i = m_n_window_size, n = m_chunk.size() - m_n_window_size; i < n; ++ i) {
		size_t n_term = m_chunk[i];
		if(n_term == m_n_dummy_term)
			continue; // do not need dummy occurences
		// get term from chunk

		size_t n_index;
		stdext::hash_map<size_t, size_t>::const_iterator p_hash_it;
		if((p_hash_it = table_indexer.find(n_term)) != table_indexer.end()) {
			n_index = (*p_hash_it).second;
			// get term index from hash-map
		} else {
			n_index = m_term_occurence_table.size();

			try {
				m_term_occurence_table.push_back(std::make_pair(n_term, std::vector<size_t>()));
				table_indexer.insert(stdext::hash_map<size_t, size_t>::value_type(n_term, n_index));
			} catch(std::bad_alloc&) {
				return false;
			}
			// put new term to the end of the table, put index to hash-map
		}
		_ASSERTE(n_index >= 0 && n_index < m_term_occurence_table.size() &&
			m_term_occurence_table[n_index].first == n_term);
		// get index of that term

		if(!stl_ut::Resize_Add_1More(m_term_occurence_table[n_index].second, i)) // 0.07 sec
			return false;
		// add occurence of this term to the table
	}
	// fill term occurence table by scanning over the chunk (note that could be
	// done right away, but it would involve copying complex term occurence table items)
	// 1.58879 (saves over 0.3 sec)

#endif //DOC_SPLITTER_BUILD_CHUNK_TERM_LIST

#ifdef DOC_SPLITTER_USE_TERM_OCCURENCE_PERMUTATION_TABLE
	std::vector<std::pair<size_t, size_t> > tot_permutation;
	if(!stl_ut::Resize_To_N(tot_permutation, m_term_occurence_table.size()))
		return false;
	std::transform(m_term_occurence_table.begin(), m_term_occurence_table.end(),
		tot_permutation.begin(), CGetIndexFrequencyPair());
	// build permutation table
	// 1.95679

	std::sort(tot_permutation.begin(), tot_permutation.end(), b_HasGreaterFrequency_Perm);
	// sort permutation table by frequency (in descending order)
	// 2.40161 (takes 0.5 sec)

#if 1
	//size_t n_total_size = tot_permutation.size(); // debug
	tot_permutation.erase(std::lower_bound(tot_permutation.begin(), tot_permutation.end(),
		std::pair<size_t, size_t>(0, 0), b_HasGreaterFrequency_Perm), tot_permutation.end());
	//printf("erased %d items of %d\n", n_total_size - tot_permutation.size(), n_total_size); // debug
	// use binary search
	// 2.47412 (takes 0.07 sec)
#else
	{
		std::vector<std::pair<size_t, size_t> >::iterator p_back = tot_permutation.end();
		std::vector<std::pair<size_t, size_t> >::iterator p_rend = tot_permutation.begin() - 1;
		while(p_back != p_rend && !(*(p_back - 1)).second)
			-- p_back;
		//printf("erased %d items of %d\n", tot_permutation.end() - p_back, tot_permutation.size()); // debug
		tot_permutation.erase(p_back, tot_permutation.end());
	}
	// 2.58537 (takes 0.2 sec)
#endif
	// throw away items with zero frequency

	_ASSERTE(tot_permutation.size() <= SIZE_MAX / 2);
	_ASSERTE(m_chunk.size() <= SIZE_MAX - 2 * tot_permutation.size());
	if(!stl_ut::Resize_To_N(m_offset_list, tot_permutation.size()) ||
	   !stl_ut::Resize_To_N(m_occurence_list, m_chunk.size() + 2 * tot_permutation.size()))
		return false;
	// allocates list of offsets and list of occurences (low-level algorithm inputs)
	// 2.54276 (takes 0.07 sec)

	size_t n_offset = 0;
	for(size_t i = 0, n = tot_permutation.size(); i < n; ++ i) {
		std::pair<size_t, size_t> t_perm = tot_permutation[i];
		size_t n_idx = t_perm.first;
		size_t n_size = t_perm.second;
		// get permutated index into the real table

		m_offset_list[i] = n_offset;
		// store offset into the second table in the list

		m_occurence_list[n_offset ++] = m_term_occurence_table[n_idx].first;
		// term id goes first

		_ASSERTE(n_size == m_term_occurence_table[n_idx].second.size());
		m_occurence_list[n_offset ++] = n_size;
		// number of occurences goes second

		const std::vector<size_t> &r_vec = m_term_occurence_table[n_idx].second;
		std::copy(r_vec.begin(), r_vec.end(), m_occurence_list.begin() + n_offset);
		n_offset += r_vec.size();
		// list of occurences goes last
	}
	_ASSERTE(n_offset <= m_occurence_list.size());
	m_occurence_list.erase(m_occurence_list.begin() + n_offset, m_occurence_list.end()); // cut-off unused space (dummy term occurences)
	// fills list of offsets and list of occurences (low-level algorithm inputs)
	// 2.78937 (takes 0.25 sec)
#else //DOC_SPLITTER_USE_TERM_OCCURENCE_PERMUTATION_TABLE
	std::sort(m_term_occurence_table.begin(), m_term_occurence_table.end(), b_HasGreaterFrequency);
	// sort table by frequency (in descending order)
	// this is *very* slow (about 3.3 secs)
	// 5.03042

	{
		std::vector<TTermOccurence>::iterator p_back = m_term_occurence_table.end() - 1;
		std::vector<TTermOccurence>::iterator p_begin = m_term_occurence_table.begin();
		while(p_back != p_begin && (*p_back).second.empty())
			-- p_back;
		m_term_occurence_table.erase(p_back + 1, m_term_occurence_table.end());
	}
	// throw away items with zero frequency (note this always leaves first item, but it doesn't really matter)
	// 5.11731 (0.07 sec)

	_ASSERTE(m_term_occurence_table.size() <= SIZE_MAX / 2);
	_ASSERTE(m_chunk.size() <= SIZE_MAX - 2 * m_term_occurence_table.size());
	if(!stl_ut::Resize_To_N(m_offset_list, m_term_occurence_table.size()) ||
	   !stl_ut::Resize_To_N(m_occurence_list, m_chunk.size() + 2 * m_term_occurence_table.size()))
		return false;
	// allocates list of offsets and list of occurences (low-level algorithm inputs)
	// 5.11887

	size_t n_offset = 0;
	for(size_t i = 0, n = m_term_occurence_table.size(); i < n; ++ i) {
		m_offset_list[i] = n_offset;
		// store offset into the second table in the list

		m_occurence_list[n_offset] = m_term_occurence_table[i].first;
		// term id goes first

		const std::vector<size_t> &r_vec = m_term_occurence_table[i].second;
		m_occurence_list[++ n_offset] = r_vec.size();
		// number of occurences goes second

		std::copy(r_vec.begin(), r_vec.end(), m_occurence_list.begin() + ++ n_offset);
		n_offset += r_vec.size();
		// list of occurences goes last
	}
	_ASSERTE(n_offset <= m_occurence_list.size());
	m_occurence_list.erase(m_occurence_list.begin() + n_offset, m_occurence_list.end()); // cut-off unused space (dummy term occurences)
	// fills list of offsets and list of occurences (low-level algorithm inputs)
	// 5.20950 (takes 0.1 sec)
#endif //DOC_SPLITTER_USE_TERM_OCCURENCE_PERMUTATION_TABLE

	//printf("offset-list: %d, occurence-list: %d\n", m_offset_list.size(), m_occurence_list.size());
	// debug

	return true;
}

bool CDocumentSplitter::Build_TermOccurenceTable_v2(size_t n_max_slice_length,
	size_t n_max_pass_size, size_t n_min_primary_pass_size,
	size_t n_min_last_primary_pass_size, size_t n_dummy_vector_bank_num)
{
	size_t n_expected_term_num = 0;

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
	for(size_t i = m_n_first_chunk_doc; i < m_n_last_chunk_doc; ++ i) {
		size_t n_doc_term_num = m_r_document_list[i].term_frequency_map.size();
		if(n_expected_term_num < n_doc_term_num)
			n_expected_term_num = n_doc_term_num;
	}
	/*n_expected_term_num = max(n_expected_term_num,
		size_t(n_expected_term_num * log(m_n_last_chunk_doc - m_n_first_chunk_doc)));*/ // makes it slightly slower
#else //DOC_SPLITTER_TRACK_CHUNK_DOCS
	n_expected_term_num = m_chunk.size() / 4; // ...
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS
	// quickly assess expected number of terms

	m_term_occurence_table.clear();
	if(!stl_ut::Reserve_N(m_term_occurence_table, n_expected_term_num))
		return false;
	// pre-alloc term occurence table

	stdext::hash_map<size_t, size_t> table_indexer;
	// map of term id -> term occurence table index

	for(size_t i = m_n_window_size, n = m_chunk.size() - m_n_window_size; i < n; ++ i) {
		size_t n_term = m_chunk[i];
		if(n_term == m_n_dummy_term)
			continue; // do not need dummy occurences
		// get term from chunk

		size_t n_index;
		stdext::hash_map<size_t, size_t>::const_iterator p_hash_it;
		if((p_hash_it = table_indexer.find(n_term)) != table_indexer.end()) {
			n_index = (*p_hash_it).second;
			// get term index from hash-map
		} else {
			n_index = m_term_occurence_table.size();

			try {
				m_term_occurence_table.push_back(std::make_pair(n_term, std::vector<size_t>()));
				table_indexer.insert(stdext::hash_map<size_t, size_t>::value_type(n_term, n_index));
			} catch(std::bad_alloc&) {
				return false;
			}
			// put new term to the end of the table, put index to hash-map
		}
		_ASSERTE(n_index >= 0 && n_index < m_term_occurence_table.size() &&
			m_term_occurence_table[n_index].first == n_term);
		// get index of that term

		if(!stl_ut::Resize_Add_1More(m_term_occurence_table[n_index].second, i)) // 0.07 sec
			return false;
		// add occurence of this term to the table
	}
	// fill term occurence table by scanning over the chunk (note that could be
	// done right away, but it would involve copying complex term occurence table items)

	std::vector<std::pair<size_t, size_t> > tot_permutation;
	if(!stl_ut::Resize_To_N(tot_permutation, m_term_occurence_table.size()))
		return false;
	std::transform(m_term_occurence_table.begin(), m_term_occurence_table.end(),
		tot_permutation.begin(), CGetIndexFrequencyPair());
	// build permutation table

	std::sort(tot_permutation.begin(), tot_permutation.end(), b_HasGreaterFrequency_Perm);
	// sort permutation table by frequency (in descending order)

	tot_permutation.erase(std::lower_bound(tot_permutation.begin(), tot_permutation.end(),
		std::pair<size_t, size_t>(0, 0), b_HasGreaterFrequency_Perm), tot_permutation.end());
	// throw away items with zero frequency (use binary search)

	_ASSERTE(tot_permutation.size() <= SIZE_MAX / 2);
	_ASSERTE(m_chunk.size() <= SIZE_MAX - 2 * tot_permutation.size());
	if(!stl_ut::Resize_To_N(m_work_item_list_v2, tot_permutation.size()) ||
	   !stl_ut::Resize_To_N(m_occurence_list_v2, m_chunk.size()))
		return false;
	// allocates list of offsets and list of occurences (low-level algorithm inputs)

	size_t n_offset = 0;
	for(size_t i = 0, n = tot_permutation.size(); i < n; ++ i) {
		std::pair<size_t, size_t> t_perm = tot_permutation[i];
		size_t n_idx = t_perm.first;
		size_t n_size = t_perm.second;
		// get permutated index into the real table

		m_work_item_list_v2[i] = TWorkItem(n_offset, m_term_occurence_table[n_idx].second.size(),
			m_term_occurence_table[n_idx].first);
		// store offset into the second table in the list

		/*m_occurence_list_v2[n_offset ++] = m_term_occurence_table[n_idx].first;
		// term id goes first

		_ASSERTE(n_size == m_term_occurence_table[n_idx].second.size());
		m_occurence_list_v2[n_offset ++] = n_size;*/
		// number of occurences goes second

		const std::vector<size_t> &r_vec = m_term_occurence_table[n_idx].second;
		std::copy(r_vec.begin(), r_vec.end(), m_occurence_list_v2.begin() + n_offset);
		n_offset += r_vec.size();
		// list of occurences goes last
	}
	_ASSERTE(n_offset <= m_occurence_list_v2.size());
	m_occurence_list_v2.erase(m_occurence_list_v2.begin() + n_offset, m_occurence_list_v2.end()); // cut-off unused space (dummy term occurences)
	// fills list of offsets and list of occurences (low-level algorithm inputs)

	// t_odo - take offset list, split work-items under slice-size, generate task passes

	m_pass_list.clear();
	// !!

	if(m_work_item_list_v2.empty())
		return true;
	// it seldom happens, but in extreme case, chunk can contain just dummy items, offset list is then empty

#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
	bool b_slice_remainders = true;
	// puts all slices smaller than slice length to the first pass

	bool b_slice_aligned_pass = false;
	// no easy way of determining that for any pass in general. work-items are sorted by lenght
	// at the beginning, but their length modulo n_max_slice_length isn't sorted anymore
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS

	size_t n_first_work_item = 0;
	size_t n_last_work_item = m_work_item_list_v2.size();
	do {
		if(n_last_work_item - n_first_work_item > n_min_primary_pass_size/* || (n_last_work_item > n_first_work_item &&
		   m_work_item_list_v2[(n_first_work_item + n_last_work_item) / 2].n_length <= n_max_slice_length)*/) {
			// either there's enough work-items, or there are still slices which can be processed without
			// direct parallelization (todo - design better metric)

			for(size_t i = n_first_work_item; i < n_last_work_item; ++ i) {
				TWorkItem &r_item = m_work_item_list_v2[i];
				if(r_item.n_length > n_max_slice_length) {
					size_t n_slice = n_max_slice_length;
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					if(b_slice_remainders) {
						n_slice = r_item.n_length % n_max_slice_length;
#ifdef DOC_SPLITTER_ALIGNED_SLICES_IN_FIRST_PASS
						if(!n_slice)
							n_slice = n_max_slice_length;
						// in case slice is aligned, use full size
#endif //DOC_SPLITTER_ALIGNED_SLICES_IN_FIRST_PASS
					}
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					// determine current slice length

					size_t n_remainder_length = r_item.n_length - n_slice;
					// calculate remainder length

#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					_ASSERTE(!b_slice_aligned_pass || n_slice == n_max_slice_length);
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					r_item.n_length = n_slice;
					// trim the original work-item

					if(!stl_ut::Resize_Add_1More(m_work_item_list_v2, TWorkItem(r_item.n_offset +
					   n_slice, n_remainder_length, r_item.n_term_id)))
						return false;
					// add second part of this work-item to the back

#if defined(DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS) && !defined(DOC_SPLITTER_ALIGNED_SLICES_IN_FIRST_PASS)
					if(!n_slice) {
						m_work_item_list_v2.erase(m_work_item_list_v2.begin() + i);
						-- i;
						-- n_last_work_item; // !!
					}
					// in case the slice is aligned, do not include it in the first pass (it lowers variance
					// of first pass slice lenght, making computation potentialy slightly more efficient)
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS && !DOC_SPLITTER_ALIGNED_SLICES_IN_FIRST_PASS
				}
			}
			// there's enough work-items, running in parallel, just chop them up to slices

#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(b_slice_remainders && n_last_work_item - n_first_work_item > n_min_primary_pass_size) {
				// this is the first pass, containing slice remainders

				std::stable_sort(m_work_item_list_v2.begin() + n_first_work_item,
					m_work_item_list_v2.begin() + n_last_work_item, std::greater<TWorkItem>());
				// sort work-items by length (they have modulo lengths, which are no longer sorted)

				std::vector<std::pair<size_t, size_t> > peeloff_range_list;
				size_t n_peeloff_last_work_item = n_last_work_item;
				do {
					size_t n_peeloff_length = m_work_item_list_v2[n_peeloff_last_work_item - 1].n_length;
					// we're peeling-off this length

					size_t n_peeloff_first_work_item = std::lower_bound(m_work_item_list_v2.begin() +
						n_first_work_item, m_work_item_list_v2.begin() + n_peeloff_last_work_item,
						n_peeloff_length, b_WorkItemLenght_Above) - m_work_item_list_v2.begin();
					// how much work-items of this length are there?

					size_t n_round_length = n_peeloff_last_work_item - n_peeloff_first_work_item;
					// calculate number of work-items in this pass

					if(n_round_length < n_min_primary_pass_size)
						n_peeloff_first_work_item = n_first_work_item;
					// in case there's not enough of them, finish them as a single pass

					if(!stl_ut::Resize_Add_1More(peeloff_range_list,
					   std::make_pair(n_peeloff_first_work_item, n_peeloff_last_work_item)))
						return false;
					// put it to the list

					n_peeloff_last_work_item = n_peeloff_first_work_item;
					// next turn processess preceding work-items
				} while(n_peeloff_last_work_item > n_first_work_item);
				// find long enough ranges of work-items with the same length

				for(size_t i = peeloff_range_list.size(); i > 0; -- i) {
					std::pair<size_t, size_t> range = peeloff_range_list[i - 1];
					if(!Generate_PrimaryPasses(range.first, range.second, n_max_pass_size, b_slice_aligned_pass))
						return false;
				}
				// generate simple primary passess in reverse order so they're sorted
			} else {
				if(!Generate_PrimaryPasses(n_first_work_item, n_last_work_item, n_max_pass_size, b_slice_aligned_pass))
					return false;
				// generate simple primary passess
			}
#else //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(!Generate_PrimaryPasses(n_first_work_item, n_last_work_item, n_max_pass_size, b_slice_aligned_pass))
				return false;
			// generate simple primary passess
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS

			n_first_work_item = n_last_work_item;
			n_last_work_item = m_work_item_list_v2.size();
			// new group of work-items

#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(b_slice_remainders) {
				b_slice_aligned_pass = true;
				b_slice_remainders = false;
			}
			// there are no remainders anymore
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
		} else {
			// there's a few, potentialy long work-items. those will be chopped to slices different way

#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			if(b_slice_remainders)
				b_slice_remainders = false;
			// secondary passess do not know how to align slices
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS

			for(size_t i = n_first_work_item; i < m_work_item_list_v2.size(); ++ i) {
				TWorkItem &r_item = m_work_item_list_v2[i];
				if(r_item.n_length > n_max_slice_length) {
					size_t n_slice_num = (r_item.n_length + n_max_slice_length - 1) / n_max_slice_length;
					size_t n_new_slice_num = n_slice_num - 1; // the first one is in the list already
					// calculate number of slices

					size_t n_last_work_item_length = r_item.n_length - n_new_slice_num * n_max_slice_length;
					_ASSERTE(n_last_work_item_length <= n_max_slice_length);
					_ASSERTE(n_new_slice_num * n_max_slice_length + n_last_work_item_length == r_item.n_length);
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					_ASSERTE(!b_slice_aligned_pass || n_last_work_item_length == n_max_slice_length);
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
					// calculate size of the last slice

					r_item.n_length = n_max_slice_length;
					// trim the original work-item

					TWorkItem t_item = r_item;
					// copy trimmed item

					if(!stl_ut::Reserve_NMore(m_work_item_list_v2, n_new_slice_num))
						return false;
					m_work_item_list_v2.insert(m_work_item_list_v2.begin() + (i + 1), n_new_slice_num, t_item); // insert n_new_slice_num trimmed items, this saves time when copying offset list, compare to calling insert for each item
					// make sure all the slices fit there

					for(size_t j = 1, n = n_new_slice_num; j < n; ++ j)
						m_work_item_list_v2[i + j].n_offset = t_item.n_offset + j * n_max_slice_length;
					// fix offsets of n_slice_num - 2 slices (+ original slice + last slice is written right below)

					m_work_item_list_v2[i + n_new_slice_num].n_offset = t_item.n_offset + n_new_slice_num * n_max_slice_length;
					m_work_item_list_v2[i + n_new_slice_num].n_length = n_last_work_item_length;
					// fix offset and size of the last slice
				}
			}
			// subdivide work-items to consecutive slices

			size_t n_leftover_primary_pass_size = m_work_item_list_v2.size() - n_first_work_item;
			if(!m_work_item_list_v2.empty()) { // m_work_item_list_v2.size() - 1 would underflow
				for(size_t j = m_work_item_list_v2.size() - 1; j > n_first_work_item; -- j) {
					if(m_work_item_list_v2[j - 1].n_term_id == m_work_item_list_v2[j].n_term_id) {
						n_leftover_primary_pass_size = m_work_item_list_v2.size() - (j + 1);
						break;
					}
				}
			}
			if(n_leftover_primary_pass_size < n_min_last_primary_pass_size)
				n_leftover_primary_pass_size = 0;
			// there might be some work-items which are not split to multiple consecutive items,
			// those could be processed directly in one (or more) last primary passes

			size_t n_round_length = m_work_item_list_v2.size() - n_first_work_item - n_leftover_primary_pass_size;
			// calculate number of work-items in this pass

			n_last_work_item = n_first_work_item + n_round_length;
			// this is the last work-item, which will be processed by secondary passess

			{
				size_t n_secondary_max_pass_size = min(n_max_pass_size, n_dummy_vector_bank_num);
				// calculate secondary pass size (this implementation decides to keep things
				// simple, it's expected that n_dummy_vector_bank_num <= n_max_pass_size)

				size_t n_pass_num = (n_round_length + n_secondary_max_pass_size - 1) / n_secondary_max_pass_size;
				// calculate number of passes to avoid exceeding n_max_pass_size

				if(n_pass_num) { // n_pass_num - 1 would underflow
					size_t n_pass_size = n_round_length / n_pass_num;
					size_t n_last_pass_size = n_round_length - (n_pass_num - 1) * n_pass_size/*n_secondary_max_pass_size*/;
					_ASSERTE(n_last_pass_size <= n_secondary_max_pass_size);
					_ASSERTE((n_pass_num - 1) * n_pass_size/*n_secondary_max_pass_size*/ + n_last_pass_size == n_round_length);
					// calculate size of the last slice

					size_t n_first_pass = m_pass_list.size();
					if(!stl_ut::Resize_Add_NMore(m_pass_list, n_pass_num))
						return false;
					for(size_t j = 0; j < n_pass_num; ++ j) {
						TPass &r_t_pass = m_pass_list[n_first_pass + j];

						r_t_pass.n_offset = n_first_work_item + j * n_pass_size/*n_secondary_max_pass_size*/;
						r_t_pass.n_length = (j + 1 < n_pass_num)? n_pass_size/*n_secondary_max_pass_size*/ : n_last_pass_size;
						r_t_pass.b_primary = false;
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
						r_t_pass.b_slice_aligned = b_slice_aligned_pass;
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS

						for(size_t k = 0; k < r_t_pass.n_length; ++ k) {
							TWorkItem &r_item = m_work_item_list_v2[r_t_pass.n_offset + k];
							// take work-item

							if(r_t_pass.summation_list.empty() || r_t_pass.summation_list.back().n_term_id != r_item.n_term_id) {
								if(!stl_ut::Resize_Add_1More(r_t_pass.summation_list, TPass::TSummationStep(/*r_t_pass.n_offset +*/ k, 1, r_item.n_term_id))) /*+++*/
									return false;
							} else
								++ r_t_pass.summation_list.back().n_length;
							// add slice to summation step if it's still the same term / create a new summation step

							r_item.n_term_id = k;
							// set slice number for term-id (we're writing to dummy vector slots)
						}
					}
				}
				// add passes, set up summation steps (greedy merging approach, todo - might want to look into it)
			}
			// build secondary passes

			n_first_work_item = n_last_work_item;
			n_last_work_item = m_work_item_list_v2.size();
			_ASSERTE(n_last_work_item - n_first_work_item == n_leftover_primary_pass_size);
			// group of work-items marked for leftover primary processing
			
			if(!Generate_PrimaryPasses(n_first_work_item, n_last_work_item, n_max_pass_size, b_slice_aligned_pass))
				return false;
			// generate simple primary passess

			n_first_work_item = n_last_work_item;
			n_last_work_item = m_work_item_list_v2.size();
			// new group of work-items
		}
		// split long slices to work-items, either for primary or for secondary processing
	} while(n_first_work_item < n_last_work_item);
	// split work-items to slices, build passes

	// t_odo - try to put all slices smaller than slice length to the first pass
	// (todo - try taking them from beginning of all the slices / from the end of all the slices)

	return true;
}

bool CDocumentSplitter::Generate_PrimaryPasses(size_t n_first_work_item,
	size_t n_last_work_item, size_t n_max_pass_size, bool b_slice_aligned_pass)
{
	_ASSERTE(n_first_work_item <= n_last_work_item);
	_ASSERTE(n_first_work_item == n_last_work_item || n_first_work_item < m_work_item_list_v2.size()); // in case they're equal, then it doesn't matter how much they are
	_ASSERTE(n_last_work_item <= m_work_item_list_v2.size()); // but be polite

	size_t n_round_length = n_last_work_item - n_first_work_item;
	// calculate number of work-items in this pass

	size_t n_pass_num = (n_round_length + n_max_pass_size - 1) / n_max_pass_size;
	// calculate number of passes to avoid exceeding n_max_pass_size

	if(n_pass_num) { // n_pass_num - 1 would underflow
		size_t n_pass_size = n_round_length / n_pass_num;
		size_t n_last_pass_size = n_round_length - (n_pass_num - 1) * n_pass_size/*n_max_pass_size*/;
		_ASSERTE(n_last_pass_size <= n_max_pass_size);
		_ASSERTE((n_pass_num - 1) * n_pass_size/*n_max_pass_size*/ + n_last_pass_size == n_round_length);
		// calculate size of the last slice

		size_t n_first_pass = m_pass_list.size();
		if(!stl_ut::Resize_Add_NMore(m_pass_list, n_pass_num))
			return false;
		for(size_t j = 0, n = n_pass_num - 1; j < n; ++ j) {
			TPass &r_t_pass = m_pass_list[n_first_pass + j];
			r_t_pass.n_offset = n_first_work_item + j * n_pass_size/*n_max_pass_size*/;
			r_t_pass.n_length = n_pass_size/*n_max_pass_size*/;
			_ASSERTE(r_t_pass.b_primary == true); // by default
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			r_t_pass.b_slice_aligned = b_slice_aligned_pass;
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			// set a single pass up
		}
		{
			TPass &r_t_pass = m_pass_list[n_first_pass + n_pass_num - 1];
			r_t_pass.n_offset = n_first_work_item + (n_pass_num - 1) * n_pass_size/*n_max_pass_size*/;
			r_t_pass.n_length = n_last_pass_size;
			_ASSERTE(r_t_pass.b_primary == true); // by default
#ifdef DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			r_t_pass.b_slice_aligned = b_slice_aligned_pass;
#endif //DOC_SPLITTER_SLICE_REMAINDERS_TO_THE_FIRST_PASS
			// set last pass up
		}
		// add passes
	}

	return true;
}

/*
 *								=== ~CDocumentSplitter ===
 */

/*
 *								=== CSplitterTester ===
 */

bool CSplitterTester::PrepareDocs(size_t n_doc_num, size_t n_doc_size,
	size_t n_doc_size_variation, bool b_contiguous_docs)
{
	m_b_contiguous_docs = b_contiguous_docs;
	try {
		const size_t n_dummy_term = 0;
		size_t n_term = 1;
		// first term is always 1

		size_t n_random_term = ((n_doc_num * (n_doc_size + n_doc_size_variation) + 3) / 4) + 999;
		n_random_term -= n_random_term % 1000;
		// random terms start at integer multiple of 1000 to be easily spotted

		m_document_list.clear();
		for(size_t i = 0; i < n_doc_num; ++ i) {
			size_t n_size = size_t(n_doc_size + (float(rand()) / RAND_MAX) * n_doc_size_variation);
			TDocument t_doc;
			t_doc.s_name = L"somedoc";
			t_doc.s_date = L"2010-08-09";
			if(m_b_contiguous_docs) {
				for(size_t j = 0; j < n_size; ++ j, ++ n_term) {
					t_doc.term_position_list.push_back(n_term);
					t_doc.term_frequency_map[n_term] = 1; // each term once
				}
			} else {
				for(size_t j = 0; j < n_size; ++ j) {
					if(j < n_size / 4) {
						t_doc.term_position_list.push_back(n_term);
						t_doc.term_frequency_map[n_term] = 1; // each term once
						 ++ n_term;
						// each document introduces some new terms
					} else {
						size_t n_repeating_term = n_random_term + size_t(pow(float(rand()) / RAND_MAX, 4) * 1000);
						// each document contains some potentially repeating terms

						t_doc.term_position_list.push_back(n_repeating_term);

						if(t_doc.term_frequency_map.find(n_repeating_term) == t_doc.term_frequency_map.end())
							t_doc.term_frequency_map[n_repeating_term] = 1;
						else
							++ t_doc.term_frequency_map[n_repeating_term];
						// count term frequencies
					}
				}
			}
			m_document_list.push_back(t_doc);
		}

		_ASSERTE(m_b_contiguous_docs || n_term < n_random_term); // make sure they do not collide

		m_n_dummy_term = n_dummy_term;
		m_n_term_num = n_term;
	} catch(std::bad_alloc&) {
		return false;
	}

	return true;
}

static inline bool b_IsTermSmaller(const CDocumentSplitter::TWorkItem &a, const CDocumentSplitter::TWorkItem &b)
{
	return a.n_term_id < b.n_term_id;
}

static inline bool b_IsTermSmaller_IsOffsetSmaller(const CDocumentSplitter::TWorkItem &a, const CDocumentSplitter::TWorkItem &b)
{
	return a.n_term_id < b.n_term_id || (a.n_term_id == b.n_term_id && a.n_offset < b.n_offset);
}

bool CSplitterTester::Test_SplitterIntegrity(size_t n_chunk_size, size_t n_halfwindow_size) const
{
	CDocumentSplitter splitter(m_document_list, m_n_dummy_term, n_chunk_size, n_halfwindow_size);

	if(!splitter.Prepare_FirstChunk())
		return false;

	size_t n_focused = 1;
	size_t n_index = 0;

	do {
		const std::vector<CDocumentSplitter::size_t> &chunk = splitter.Get_Chunk();

		if(m_b_contiguous_docs) {
			for(size_t i = n_halfwindow_size; i < chunk.size() - n_halfwindow_size; ++ i) {
				size_t n_window_terms = 0, n_left_terms = 0, n_right_terms = 0;
				size_t n_focused_term_id = chunk[i];

				if(n_focused_term_id != m_n_dummy_term) {
					if(n_focused_term_id != n_focused)
						return false;
					++ n_focused;
				}
				// make sure we're focusing all the terms

				for(size_t wnd = i - n_halfwindow_size; wnd <= i + n_halfwindow_size; ++ wnd) {
					++ n_window_terms;
					// count 

					if(wnd == i)
						continue;

					size_t n_co_occuring_term_id = chunk[wnd];

					if(n_focused_term_id != m_n_dummy_term && n_co_occuring_term_id != m_n_dummy_term) {
						if(wnd < i)
							++ n_left_terms;
						else
							++ n_right_terms;
						// count co-occuring terms

						if(n_co_occuring_term_id != n_focused_term_id + wnd - i)
							return false;
						// terms are supposed to be contiguous
					}

					// here we update term vectors for n_focused_term_id, based on n_co_occuring_term_id
				}

				_ASSERTE(n_window_terms == 2 * n_halfwindow_size + 1);
				// elementary check

				if(n_focused_term_id != m_n_dummy_term) {
					size_t n_doc = 0, n_idx = n_index;
					while(n_doc < m_document_list.size() && m_document_list[n_doc].term_position_list.size() <= n_idx) {
						n_idx -= m_document_list[n_doc].term_position_list.size();
						++ n_doc;
					}
					// find document

					if(n_left_terms != min(n_halfwindow_size, max(size_t(0), n_idx)) ||
					   n_right_terms != min(n_halfwindow_size, max(size_t(0), m_document_list[n_doc].term_position_list.size() - n_idx - 1)))
						return false;
					// make sure there's right count of (non-dummy) terms on the left and right side of focused term

					++ n_index;
					_ASSERTE(n_index == n_focused - 1);
				}
			}
			// makes sure chunk is filled with correct data

#ifdef DOC_SPLITTER_TRACK_CHUNK_DOCS
			size_t n_min_term, n_max_term;
			size_t n_min_min_term, n_max_max_term;
			for(size_t i = n_halfwindow_size, n = chunk.size(); i < n; ++ i) {
				if(chunk[i] != m_n_dummy_term) {
					n_min_term = chunk[i];
					break;
				}
			}
			for(size_t i = chunk.size() - n_halfwindow_size - 1; i >= 0; -- i) {
				if(chunk[i] != m_n_dummy_term) {
					n_max_term = chunk[i];
					break;
				}
			}
			for(size_t i = 0, n = chunk.size(); i < n; ++ i) {
				if(chunk[i] != m_n_dummy_term) {
					n_min_min_term = chunk[i];
					break;
				}
			}
			for(size_t i = chunk.size() - 1; i >= 0; -- i) {
				if(chunk[i] != m_n_dummy_term) {
					n_max_max_term = chunk[i];
					break;
				}
			}
			// find min / max term id's (chunk contents are monotonicaly increasing, except for dummy items)

			size_t n_min_doc = 0, n_max_doc = 0;
			size_t n_min_min_doc = 0, n_max_max_doc = 0;
			{
				size_t n_idx = n_min_term - 1;
				while(n_min_doc < m_document_list.size() && m_document_list[n_min_doc].term_position_list.size() <= n_idx) {
					n_idx -= m_document_list[n_min_doc].term_position_list.size();
					++ n_min_doc;
				}
			}
			{
				size_t n_idx = n_max_term - 1;
				while(n_max_doc < m_document_list.size() && m_document_list[n_max_doc].term_position_list.size() <= n_idx) {
					n_idx -= m_document_list[n_max_doc].term_position_list.size();
					++ n_max_doc;
				}
			}
			{
				size_t n_idx = n_min_min_term - 1;
				while(n_min_min_doc < m_document_list.size() && m_document_list[n_min_min_doc].term_position_list.size() <= n_idx) {
					n_idx -= m_document_list[n_min_min_doc].term_position_list.size();
					++ n_min_min_doc;
				}
			}
			{
				size_t n_idx = n_max_max_term - 1;
				while(n_max_max_doc < m_document_list.size() && m_document_list[n_max_max_doc].term_position_list.size() <= n_idx) {
					n_idx -= m_document_list[n_max_max_doc].term_position_list.size();
					++ n_max_max_doc;
				}
			}
			// find indices of min / max documents, present in the chunk (of relevant / of all)

			if(splitter.n_First_ChunkDocument() > n_min_doc ||
			   splitter.n_Last_ChunkDocument() <= n_max_doc)
				return false;
			if(splitter.n_First_ChunkDocument() != n_min_doc) { // relevant docs
				if(splitter.n_First_ChunkDocument() != n_min_min_doc) { // all docs
					if(n_halfwindow_size * 3 < n_chunk_size)
						fprintf(stderr, "warning: splitter undershoots by %d\n", n_min_doc - splitter.n_First_ChunkDocument());
					else {
						// in this case, we can't effectively track which document(s) are repeated in restart interval
					}
				}
			}
			if(splitter.n_Last_ChunkDocument() != n_max_doc + 1) { // relevant docs
				if(splitter.n_Last_ChunkDocument() != n_max_max_doc + 1) // all docs
					fprintf(stderr, "warning: splitter overshoots by %d\n", splitter.n_Last_ChunkDocument() - (n_max_doc + 1));
			}
			// make sure splitter is aware of all the documents, present in the chunk (doesn't really matter if there's more of them)
#else //DOC_SPLITTER_TRACK_CHUNK_DOCS
			static bool b_warned = false;
			if(!b_warned) {
				b_warned = true;
				fprintf(stderr, "warning: DOC_SPLITTER_TRACK_CHUNK_DOCS not defined,"
					" unable to fully check splitter integrity\n");
			}
#endif //DOC_SPLITTER_TRACK_CHUNK_DOCS
		}
		// above tests only work for documents, filled with contiguous terms

		if(!splitter.Build_TermOccurenceTable())
			return false;
		// build term occurence table

		const std::vector<CDocumentSplitter::size_t> &offset_list = splitter.Get_TermOccurence_OffsetList();
		const std::vector<CDocumentSplitter::size_t> &occurence_list = splitter.Get_TermOccurenceList();
		// get lists

		{
			size_t n_counted = 0;
			for(size_t i = 0, n = offset_list.size(); i < n; ++ i) {
				size_t n_offset = offset_list[i];
				// get offset to occurence list

				size_t n_term = occurence_list[n_offset ++];
				if(n_term == m_n_dummy_term)
					return false; // there are not supposed to be dummy terms
				size_t n_occurence_num = occurence_list[n_offset ++];
				if(!n_occurence_num)
					return false; // there are not supposed to be zero-length records
				const CDocumentSplitter::size_t *p_position = &occurence_list[n_offset];
				// get occurence list

				for(size_t j = 0; j < n_occurence_num; ++ j) {
					size_t n_position = p_position[j];
					if(n_position < n_halfwindow_size || n_position >= chunk.size() - n_halfwindow_size)
						return false;
					if(chunk[n_position] != n_term)
						return false;
				}
				// make sure all occurences are correct

				for(size_t j = 1; j < n_occurence_num; ++ j) {
					if(p_position[j - 1] >= p_position[j])
						return false;
				}
				// make sure position list is ordered and doesn't contain repetitions

				size_t n_correct_count = 0;
#if defined(_MSC_VER) && !defined(__MWERKS__) && _MSC_VER >= 1400
				n_correct_count = std::count(chunk.begin() + n_halfwindow_size,
					chunk.end() - n_halfwindow_size, n_term);
#else //_MSC_VER && !__MWERKS__ && _MSC_VER >= 1400
				std::count(chunk.begin() + n_halfwindow_size,
					chunk.end() - n_halfwindow_size, n_term, n_correct_count);
#endif //_MSC_VER && !__MWERKS__ && _MSC_VER >= 1400
				if(n_occurence_num != n_correct_count)
					return false;
				// make sure no occurence is missed

				n_counted += n_occurence_num;
				// counts terms
			}

			n_counted += 2 * n_halfwindow_size;
			size_t n_dummy_count = 0;
#if defined(_MSC_VER) && !defined(__MWERKS__) && _MSC_VER >= 1400
			n_dummy_count = std::count(chunk.begin() + n_halfwindow_size,
				chunk.end() - n_halfwindow_size, m_n_dummy_term);
#else //_MSC_VER && !__MWERKS__ && _MSC_VER >= 1400
			std::count(chunk.begin() + n_halfwindow_size,
				chunk.end() - n_halfwindow_size, m_n_dummy_term, n_dummy_count);
#endif //_MSC_VER && !__MWERKS__ && _MSC_VER >= 1400
			n_counted += n_dummy_count;
			// count dummy terms & padding

			if(n_counted != chunk.size())
				return false;
			// makes sure no terms are missed
		}
		// test term occurence table integrity

		const size_t n_max_slice_length = 256;
		const size_t n_max_pass_size = 10000;
		const size_t n_min_primary_pass_size = 64;
		const size_t n_dummy_vector_bank_num = 8192;
		// term occurence table v2 config

		if(!splitter.Build_TermOccurenceTable_v2(n_max_slice_length,
		   n_max_pass_size, n_min_primary_pass_size, n_min_primary_pass_size, n_dummy_vector_bank_num))
			return false;
		// build term occurence table v2

		const std::vector<CDocumentSplitter::TWorkItem> &offset_list_v2 = splitter.Get_WorkItemList_v2();
		const std::vector<CDocumentSplitter::size_t> &occurence_list_v2 = splitter.Get_TermOccurenceList_v2();
		const std::vector<CDocumentSplitter::TPass> &pass_list_v2 = splitter.Get_PassList_v2();
		// get v2 lists

		std::vector<CDocumentSplitter::TWorkItem> offset_list_v2_reconst;
		for(size_t i = 0, n = pass_list_v2.size(); i < n; ++ i) {
			const CDocumentSplitter::TPass &r_pass = pass_list_v2[i];

			_ASSERTE(r_pass.n_offset <= SIZE_MAX && r_pass.n_length <= SIZE_MAX &&
				r_pass.n_offset <= SIZE_MAX - r_pass.n_length);
			if(r_pass.n_offset >= offset_list_v2.size() || r_pass.n_offset + r_pass.n_length > offset_list_v2.size())
				return false;
			// make sure pass points to valid slices

			{
				std::vector<size_t> term_presence;
				for(size_t j = r_pass.n_offset, m = r_pass.n_offset + r_pass.n_length; j < m; ++ j) {
					if(std::find(term_presence.begin(), term_presence.end(), offset_list_v2[j].n_term_id) != term_presence.end())
						return false;
					// terms must not repeat, conflicts would occur

					term_presence.push_back(offset_list_v2[j].n_term_id);
					// this term was present
				}
			}
			// make sure no two threads in the pass process the same term

			if(r_pass.b_primary) {
				if(!r_pass.summation_list.empty())
					return false;
				// primary passes do not have summation step

				offset_list_v2_reconst.insert(offset_list_v2_reconst.end(), offset_list_v2.begin() + r_pass.n_offset,
					offset_list_v2.begin() + (r_pass.n_offset + r_pass.n_length));
				// in case it's primary pass, no dummy remapping occurs, just copy processed slices to reconstructed offset list
			} else {
				if(r_pass.summation_list.empty())
					return false;
				// secondary passes should have summation step

				for(size_t j = r_pass.n_offset, m = r_pass.n_offset + r_pass.n_length; j < m; ++ j) {
					if(offset_list_v2[j].n_term_id >= n_dummy_vector_bank_num)
						return false;
				}
				// term id's in secondary passes point to dummy vector banks

				if(r_pass.summation_list.front().n_offset != 0)
					return false;
				// first summation step should take dummy vector bank 0

				size_t n_slice_num = 0;
				for(size_t j = 0, m = r_pass.summation_list.size(); j < m; ++ j) {
					CDocumentSplitter::TPass::TSummationStep t_sum = r_pass.summation_list[j];

					_ASSERTE(t_sum.n_offset <= SIZE_MAX && t_sum.n_length <= SIZE_MAX &&
						t_sum.n_offset <= SIZE_MAX - t_sum.n_length);
					if(t_sum.n_offset >= offset_list_v2.size() || t_sum.n_offset + t_sum.n_length > offset_list_v2.size())
						return false;
					// make sure summation points to valid slices

					for(size_t k = t_sum.n_offset, o = t_sum.n_offset + t_sum.n_length; k < o; ++ k) {
						CDocumentSplitter::TWorkItem t_wi = offset_list_v2[k + r_pass.n_offset]; /*+++*/
						if(t_wi.n_term_id != k /*- r_pass.n_offset*/) /*+++*/
							return false;
						// terms should map to dummy vector banks

						t_wi.n_term_id = t_sum.n_term_id;
						// put there correct term id where vector go after summation

						offset_list_v2_reconst.push_back(t_wi);
						// add it to the list

						++ n_slice_num;
						// count added slices
					}
				}
				// check slice to dummy vector bank mapping, add slices with correct term id to reconstructed offset list

				if(n_slice_num != r_pass.n_length)
					return false;
				// number of slices processed in the pass should equal slices processed in summation passes
			}
		}
		// check all the passes, build reconstructed offset list

		std::sort(offset_list_v2_reconst.begin(), offset_list_v2_reconst.end(), b_IsTermSmaller_IsOffsetSmaller);
		// sort reconstructed offset list by terms and offsets

		for(size_t i = 1; i < offset_list_v2_reconst.size(); ++ i) {
			if(offset_list_v2_reconst[i - 1].n_term_id == offset_list_v2_reconst[i].n_term_id) {
				CDocumentSplitter::TWorkItem &a = offset_list_v2_reconst[i - 1];
				const CDocumentSplitter::TWorkItem &b = offset_list_v2_reconst[i];

				if(a.n_offset + a.n_length != b.n_offset)
					return false;
				// if sorted, slices should coner continuous segment in occurence list

				a.n_length += b.n_length;
				offset_list_v2_reconst.erase(offset_list_v2_reconst.begin() + i);
				-- i; // repeat for connected slice
				// join slices, erase the second one
			}
		}
		// join slices, forming continuous segments in term occurence list

		if(offset_list_v2_reconst.size() != offset_list.size())
			return false;
		// this list is in fact offset list

		for(size_t i = 0, n = offset_list.size(); i < n; ++ i) {
			size_t n_offset = offset_list[i];
			size_t n_term_id = occurence_list[n_offset];
			size_t n_length = occurence_list[n_offset + 1];
			const CDocumentSplitter::size_t *p_occurences = &occurence_list[n_offset + 2];
			// read original span from offset list

			bool b_found = false;
			for(size_t j = 0; j < n; ++ j) {
				if(offset_list_v2_reconst[j].n_term_id == n_term_id) {
					size_t n_offset2 = offset_list_v2_reconst[j].n_offset;
					size_t n_length2 = offset_list_v2_reconst[j].n_length;
					const CDocumentSplitter::size_t *p_occurences2 = &occurence_list_v2[n_offset2];
					// read span from the reconstructed lsit

					if(n_length != n_length2 || memcmp(p_occurences, p_occurences2, n_length * sizeof(size_t)))
						return false;
					// make sure spans have same length and contents

					b_found = true;
					break;
				}
			}
			if(!b_found)
				return false;
			// each span must map to identical span in v2
		}
		if(occurence_list.size() - 2 * offset_list.size() != occurence_list_v2.size())
			return false;
		// check contents of occurence list
	} while(splitter.Prepare_NextChunk());

	if(m_b_contiguous_docs) {
		if(n_focused != m_n_term_num)
			return false;
		// make sure we're focusing all the terms, and not more
	}

	return true;
}

bool CSplitterTester::DoTests()
{
	CSplitterTester test;

	bool b_result =  true;

	if(!test.PrepareDocs(1, 100, 0)) {
		fprintf(stderr, "error: test.PrepareDocs(1, 100, 0) failed\n");
		return false;
	}
	printf("test with a single document, 100 terms long\n");

	if(!test.Test_SplitterIntegrity(256, 5)) {
		fprintf(stderr, "error: test.Test_SplitterIntegrity(256, 5) failed\n");
		b_result = false;
	} else
		printf("\ttest.Test_SplitterIntegrity(256, 5) succeeded\n");

	if(!test.Test_SplitterIntegrity(64, 5)) {
		fprintf(stderr, "error: test.Test_SplitterIntegrity(64, 5) failed\n");
		b_result = false;
	} else
		printf("\ttest.Test_SplitterIntegrity(64, 5) succeeded\n");

	if(!test.PrepareDocs(10, 10, 5)) {
		fprintf(stderr, "error: test.PrepareDocs(10, 10, 5) failed\n");
		return false;
	}
	printf("test with 10 documents, 10 to 15 terms long\n");

	if(!test.Test_SplitterIntegrity(256, 5)) {
		fprintf(stderr, "error: test.Test_SplitterIntegrity(256, 5) failed\n");
		b_result = false;
	} else
		printf("\ttest.Test_SplitterIntegrity(256, 5) succeeded\n");

	if(!test.Test_SplitterIntegrity(64, 5)) {
		fprintf(stderr, "error: test.Test_SplitterIntegrity(64, 5) failed\n");
		b_result = false;
	} else
		printf("\ttest.Test_SplitterIntegrity(64, 5) succeeded\n");

	if(!test.Test_SplitterIntegrity(64, 10)) {
		fprintf(stderr, "error: test.Test_SplitterIntegrity(64, 10) failed\n");
		b_result = false;
	} else
		printf("\ttest.Test_SplitterIntegrity(64, 10) succeeded\n");

	if(!test.Test_SplitterIntegrity(64, 25)) {
		fprintf(stderr, "error: test.Test_SplitterIntegrity(64, 25) failed\n");
		b_result = false;
	} else
		printf("\ttest.Test_SplitterIntegrity(64, 25) succeeded\n");

	const size_t n_document_configuration_num = 10;
	const size_t n_splitter_configuration_num = 10;

	printf("do some test with random contiguous docs ...\n");
	for(int i = 0; i < n_document_configuration_num; ++ i) {
		size_t n_doc_num = 1 + rand() % 100;
		size_t n_doc_size = 1 + rand() % 1000;
		size_t n_doc_size_var = (rand() & 1)? 0 : n_doc_size / 2;

		if(!test.PrepareDocs(n_doc_num, n_doc_size, n_doc_size_var)) {
			fprintf(stderr, "error: test.PrepareDocs(%d, %d, %d) failed\n", n_doc_num, n_doc_size, n_doc_size_var);
			return false;
		}

		if(n_doc_size_var) {
			printf("test with %d documents, %d to %d terms long (~" PRIsizeB " terms)\n",
				n_doc_num, n_doc_size, n_doc_size + n_doc_size_var,
				PRIsizeBparams(n_doc_num * (n_doc_size + n_doc_size_var / 2)));
		} else {
			printf("test with %d documents, %d terms long (" PRIsizeB " terms)\n",
				n_doc_num, n_doc_size, PRIsizeBparams(n_doc_num * n_doc_size));
		}

		size_t n_succeeded = 0;
		for(int j = 0; j < n_splitter_configuration_num; ++ j) {
			size_t n_buffer_size = 1 << (5 + rand() % 16); // 32B - 1MB
			if(i >= 50)
				n_buffer_size += rand() % n_buffer_size; // NPOT
			size_t n_halfwnd_size = max(size_t(1), rand() % min(size_t(1024), n_buffer_size / 2)); // do not blow window size too much!
			if(n_halfwnd_size * 2 >= n_buffer_size) {
				-- j; // keep number of tests constant (note j must be signed)
				continue;
			}
			if(!test.Test_SplitterIntegrity(n_buffer_size, n_halfwnd_size)) {
				fprintf(stderr, "error: test.Test_SplitterIntegrity(%d, %d) failed           \n", n_buffer_size, n_halfwnd_size);
				b_result = false;
			} else {
				printf("\ttest.Test_SplitterIntegrity(%d, %d) succeeded        \r", n_buffer_size, n_halfwnd_size);
				++ n_succeeded;
			}
		}
		if(n_succeeded != n_splitter_configuration_num)
			fprintf(stderr, "%d tests succeeded, %d failed          \n", n_succeeded, n_splitter_configuration_num - n_succeeded);
		else
			printf("%-79s\n", "\tall tests succeeded");
	}
	// do some random tests

	printf("and now with non-contiguous docs ...\n");
	for(int i = 0; i < n_document_configuration_num; ++ i) {
		size_t n_doc_num = 1 + rand() % 100;
		size_t n_doc_size = 1 + rand() % 1000;
		size_t n_doc_size_var = (rand() & 1)? 0 : n_doc_size / 2;

		if(!test.PrepareDocs(n_doc_num, n_doc_size, n_doc_size_var, false)) {
			fprintf(stderr, "error: test.PrepareDocs(%d, %d, %d, false) failed\n", n_doc_num, n_doc_size, n_doc_size_var);
			return false;
		}
		// prepare non-contiguous docs

		if(n_doc_size_var) {
			printf("test with %d documents, %d to %d terms long (~" PRIsizeB " terms)\n",
				n_doc_num, n_doc_size, n_doc_size + n_doc_size_var,
				PRIsizeBparams(n_doc_num * (n_doc_size + n_doc_size_var / 2)));
		} else {
			printf("test with %d documents, %d terms long (" PRIsizeB " terms)\n",
				n_doc_num, n_doc_size, PRIsizeBparams(n_doc_num * n_doc_size));
		}

		size_t n_succeeded = 0;
		for(int j = 0; j < n_splitter_configuration_num; ++ j) {
			size_t n_buffer_size = 1 << (5 + rand() % 16); // 32B - 1MB
			if(i >= 50)
				n_buffer_size += rand() % n_buffer_size; // NPOT
			size_t n_halfwnd_size = max(size_t(1), rand() % min(size_t(1024), n_buffer_size / 2)); // do not blow window size too much!
			if(n_halfwnd_size * 2 >= n_buffer_size) {
				-- j; // keep number of tests constant (note j must be signed)
				continue;
			}
			if(!test.Test_SplitterIntegrity(n_buffer_size, n_halfwnd_size)) {
				fprintf(stderr, "error: test.Test_SplitterIntegrity(%d, %d) failed           \n", n_buffer_size, n_halfwnd_size);
				b_result = false;
			} else {
				printf("\ttest.Test_SplitterIntegrity(%d, %d) succeeded        \r", n_buffer_size, n_halfwnd_size);
				++ n_succeeded;
			}
		}
		if(n_succeeded != n_splitter_configuration_num)
			fprintf(stderr, "%d tests succeeded, %d failed          \n", n_succeeded, n_splitter_configuration_num - n_succeeded);
		else
			printf("%-79s\n", "\tall tests succeeded");
	}
	// do some random tests

	printf("do some test with some large docs ...\n");
	for(int i = 0; i < n_document_configuration_num; ++ i) {
		size_t n_doc_num = 1 + rand() % 100;
		size_t n_doc_size = 4000 + rand() % 4000;
		size_t n_doc_size_var = (rand() & 1)? 0 : n_doc_size / 2;

		if(!test.PrepareDocs(n_doc_num, n_doc_size, n_doc_size_var, false)) {
			fprintf(stderr, "error: test.PrepareDocs(%d, %d, %d, false) failed\n", n_doc_num, n_doc_size, n_doc_size_var);
			return false;
		}

		if(n_doc_size_var) {
			printf("test with %d documents, %d to %d terms long (~" PRIsizeB " terms)\n",
				n_doc_num, n_doc_size, n_doc_size + n_doc_size_var,
				PRIsizeBparams(n_doc_num * (n_doc_size + n_doc_size_var / 2)));
		} else {
			printf("test with %d documents, %d terms long (" PRIsizeB " terms)\n",
				n_doc_num, n_doc_size, PRIsizeBparams(n_doc_num * n_doc_size));
		}

		size_t n_succeeded = 0;
		for(int j = 0; j < n_splitter_configuration_num; ++ j) {
			size_t n_buffer_size = 1 << (5 + rand() % 16); // 32B - 1MB
			if(i >= 50)
				n_buffer_size += rand() % n_buffer_size; // NPOT
			size_t n_halfwnd_size = max(size_t(1), rand() % min(size_t(20), n_buffer_size / 2)); // do not blow window size too much!
			if(n_halfwnd_size * 2 >= n_buffer_size) {
				-- j; // keep number of tests constant (note j must be signed)
				continue;
			}
			if(!test.Test_SplitterIntegrity(n_buffer_size, n_halfwnd_size)) {
				fprintf(stderr, "error: test.Test_SplitterIntegrity(%d, %d) failed           \n", n_buffer_size, n_halfwnd_size);
				b_result = false;
			} else {
				printf("\ttest.Test_SplitterIntegrity(%d, %d) succeeded        \r", n_buffer_size, n_halfwnd_size);
				++ n_succeeded;
			}
		}
		if(n_succeeded != n_splitter_configuration_num)
			fprintf(stderr, "%d tests succeeded, %d failed          \n", n_succeeded, n_splitter_configuration_num - n_succeeded);
		else
			printf("%-79s\n", "\tall tests succeeded");
	}
	// do some random tests

	if(b_result)
		printf("all tests succeeded\n");
	else
		printf("some tests failed\n");

	return b_result;
}

bool CSplitterTester::Test_SplitterTiming(size_t n_chunk_size, size_t n_halfwindow_size, int n_repeat_num) const
{
	CDocumentSplitter splitter(m_document_list, m_n_dummy_term, n_chunk_size, n_halfwindow_size);

	CTimer timer;
	timer.ResetTimer();
	double f_chunks_time;
	{
		double f_chunks_start = timer.f_Time();
		for(int i = 0; i < n_repeat_num; ++ i) {
			if(!splitter.Prepare_FirstChunk())
				return false;
			while(splitter.Prepare_NextChunk())
				;
		}
		f_chunks_time = timer.f_Time() - f_chunks_start;
	}
	double f_subtotal_time;
	{
		double f_subtotal_start = timer.f_Time();
		for(int i = 0; i < n_repeat_num; ++ i) {
			if(!splitter.Prepare_FirstChunk())
				return false;
			do {
				if(!splitter.Build_TermOccurenceTable())
					return false;
			} while(splitter.Prepare_NextChunk());
		}
		f_subtotal_time = timer.f_Time() - f_subtotal_start;
	}
	double f_total_time;
	{
		double f_total_start = timer.f_Time();
		for(int i = 0; i < n_repeat_num; ++ i) {
			if(!splitter.Prepare_FirstChunk())
				return false;
			do {
				if(!splitter.Build_TermOccurenceTable())
					return false;

				const size_t n_max_slice_length = 256;
				const size_t n_max_pass_size = 10000;
				const size_t n_min_primary_pass_size = 64;
				const size_t n_dummy_vector_bank_num = 8192;
				// term occurence table v2 config

				if(!splitter.Build_TermOccurenceTable_v2(n_max_slice_length,
				   n_max_pass_size, n_min_primary_pass_size, n_min_primary_pass_size, n_dummy_vector_bank_num))
					return false;
			} while(splitter.Prepare_NextChunk());
		}
		f_total_time = timer.f_Time() - f_total_start;
	}
	double f_occurences_time = f_subtotal_time - f_chunks_time;
	double f_occurences_v2_time = f_total_time - f_subtotal_time;
	// measure times

	printf("splitter timing for chunk size " PRIsizeB "B and half-window size %d:\n",
		PRIsizeBparams(n_chunk_size * sizeof(size_t)), n_halfwindow_size);
	printf("\ttotal time: " PRItimeprecise " sec\n", PRItimeparams(f_total_time / n_repeat_num));
	printf("\tchunk time: " PRItimeprecise " sec\n", PRItimeparams(f_chunks_time / n_repeat_num));
	printf("\toccur time: " PRItimeprecise " sec\n", PRItimeparams(f_occurences_time / n_repeat_num));
	printf("\toccv2 time: " PRItimeprecise " sec\n", PRItimeparams(f_occurences_v2_time / n_repeat_num));
	// display times

	return true;
}

bool CSplitterTester::DoSpeedTests(int n_repeat_num)
{
	CSplitterTester test;

	if(!test.PrepareDocs(1024, 4096, 4096)) {
		fprintf(stderr, "error: test.PrepareDocs(1024, 4096, 4096) failed\n");
		return false;
	}
	printf("test with 1024 documents, 4096 - 8192 terms long\n");

	for(int n_shift = 10; n_shift <= 20; ++ n_shift) {
		size_t n_chunk_size = 1 << n_shift;
		for(int n_window_size = 10; n_window_size <= 100; n_window_size += 10) {
			if(!test.Test_SplitterTiming(n_chunk_size, n_window_size, n_repeat_num))
				return false;
		}
	}

	return true;
}

/*
 *								=== ~CSplitterTester ===
 */
