/**
 *	@file ChunkProducer.h
 *	@brief Encapsulation of code, required to generate TChunkData,
 *		possibly in two-threaded overlapping producer-consumer manner.
 *	@author -tHE SWINe-
 *	@date 2010-12-06
 *	@note This is not standalone re-usable file, this is include-once file for this particular application,
 *		intended to reduce clutter in ProcessTermVectors/Main.cpp.
 */

/**
 *	@brief adapter of CLuceneIndexReaderInterface for CDocStreamSplitter
 */
class CLuceneDocStorage : public CDocStreamSplitter::CDocumentStorage {
protected:
	size_t m_n_cur_doc;
	::wstring m_s_doc_file, m_s_doc_date; // verbose
	CLuceneIndexReaderInterface &m_r_reader;

public:
	/**
	 *	@brief default constructor
	 *	@param[in] r_reader is reader with opened and lemmatized index
	 */
	CLuceneDocStorage(CLuceneIndexReaderInterface &r_reader)
		:m_n_cur_doc(0), m_r_reader(r_reader)
	{}

	/**
	 *	@brief gets curret document
	 *	@return Returns zero-based index of the next document that will be returned by GetNextDocument().
	 */
	inline size_t n_Current_Doc() const
	{
		return m_n_cur_doc;
	}

	/**
	 *	@brief gets current document file
	 *	@return Returns const reference to wide (utf-16) string, containing filename of the current document.
	 *	@note See n_Current_Doc() function documentation for more information to what the current document means.
	 */
	inline const ::wstring &r_s_Current_DocFile() const
	{
		return m_s_doc_file;
	}

	/**
	 *	@brief gets current document last modification date
	 *	@return Returns const reference to wide (utf-16) string, containing
	 *		the last modification date of the current document.
	 *	@note See n_Current_Doc() function documentation for more information to what the current document means.
	 */
	inline const ::wstring &r_s_Current_DocDate() const
	{
		return m_s_doc_date;
	}

	/**
	 *	@brief resets index of the current document to zero
	 *	@note See n_Current_Doc() function documentation for more information to what the current document means.
	 */
	virtual void Reset()
	{
		m_n_cur_doc = 0;
	}

	/**
	 *	@brief determines whether there are more documents
	 *	@return Returns true if document index is less than document counnt, otherwise returns false.
	 */
	virtual bool b_HaveMoreDocuments() const
	{
		return m_n_cur_doc < m_r_reader.n_Document_Num();
	}

	/**
	 *	@brief gets list of terms in the current document, and increments the document counter
	 *	@param[out] r_term_vector is list of document terms
	 *	@return Returns true on success, false on failure or if there are no more diocuments.
	 */
	virtual bool GetNextDocument(std::vector<CDocStreamSplitter::term_id_t> &r_term_vector)
	{
		if(!b_HaveMoreDocuments())
			return false;
		return m_r_reader.Get_Document(m_n_cur_doc ++, true, r_term_vector, m_s_doc_file, m_s_doc_date);
	}
};

/**
 *	@brief chunk producer class
 */
class CChunkProducer {
public:
	/**
	 *	@brief collection of all the tables, required to process a single chunk
	 */
	struct TChunkData {
		std::vector<CDocStreamSplitter::term_id_t> chunk; /**< @brief copy of the chunk, with original term id's */
		std::vector<CDocStreamSplitter::chunk_off_t> occurence_list; /**< @brief list of occurences of distinct terms in the chunk */
		std::vector<CDocStreamSplitter::TWorkItem> work_item_list; /**< @brief list of work-items, term id's are permutated so as to index gpu allocation */
		std::vector<CDocStreamSplitter::TPass> pass_list; /**< @brief list of processing passes, term id's are permutated so as to index gpu allocation */

		std::vector<CGPUTermVectorAllocator::TTermAllocation> allocation_list; /**< @brief list of term vector uploads */
		std::vector<CGPUTermVectorAllocator::TTermVacation> vacation_list; /**< @brief list of term vector downloads */
		std::vector<CGPUTermVectorAllocator::TTermVacation> post_vacation_list; /**< @brief list of final term vector downloads after the pass (only if not lazy producer) */
	};

protected:
	/**
	 *	@brief a simple single value substitution function object
	 */
	template <class _Ty>
	class CSubstitution {
	protected:
		_Ty m_substituted, m_substitute;

	public:
		inline CSubstitution(_Ty substituted, _Ty substitute) // "what, by what"
			:m_substituted(substituted), m_substitute(substitute)
		{}

		inline _Ty operator ()(_Ty x) const
		{
			return (x != m_substituted)? x : m_substitute;
		}
	};

	/**
	 *	@brief a simple table lookup function object
	 */
	class CFindGPUAllocation {
	protected:
		const std::vector<uint32_t> &m_r_term_alloc_table;

	public:
		inline CFindGPUAllocation(const std::vector<uint32_t> &r_term_alloc_table)
			:m_r_term_alloc_table(r_term_alloc_table)
		{}

		inline uint32_t operator ()(term_id_t n_term) const
		{
			_ASSERTE(n_term >= 0 && n_term < m_r_term_alloc_table.size()); // this should be a valid term id, no dummy, no no-terms
			_ASSERTE(m_r_term_alloc_table[n_term] != -1); // this term should be allocated
			return m_r_term_alloc_table[n_term];
		}

		inline CDocStreamSplitter::TTermRange operator ()(const CDocStreamSplitter::TTermRange &r_t_trange) const
		{
			_ASSERTE(r_t_trange.n_term_id >= 0 && r_t_trange.n_term_id < m_r_term_alloc_table.size()); // this should be a valid term id, no dummy, no no-terms
			_ASSERTE(m_r_term_alloc_table[r_t_trange.n_term_id] != -1); // this term should be allocated
			return CDocStreamSplitter::TTermRange(r_t_trange.n_offset, r_t_trange.n_length,
				m_r_term_alloc_table[r_t_trange.n_term_id]);
		}
	};

	CGPUTermVectorAllocator m_gpu_allocator; /**< @brief gpu allocator */
	CDocStreamSplitter m_doc_splitter; /**< @brief document splitter instance */

	const CDocStreamSplitter::chunk_off_t m_n_dummy_term; /**< @brief dummy term id (in the input data) */
	const CDocStreamSplitter::chunk_off_t m_n_max_slice_length; /**< @brief maximum slice length */
	const CDocStreamSplitter::chunk_off_t m_n_max_pass_size; /**< @brief maximum pass size */
	const CDocStreamSplitter::chunk_off_t m_n_min_primary_pass_size; /**< @brief minimal primary pass size */
	const CDocStreamSplitter::chunk_off_t m_n_min_last_primary_pass_size; /**< @brief minimal size of the last primary pass */
	const CDocStreamSplitter::chunk_off_t m_n_dummy_term_seed_id; /**< @brief id of dummy seed vector */
	const size_t m_n_dummy_vector_bank_num; /**< @brief number of dummy vector banks */
	const bool m_b_lazy_gpu_deallocation; /**< @brief use lazy gpu deallocation */

	size_t m_n_peak_term_usage; /**< @brief peak term usage (stats) */

public:
	/**
	 *	@brief default constructor
	 *
	 *	@param[in] n_gpu_pool_size is size of the GPU pool, in term vectors
	 *	@param[in] r_term_list is list of terms (pairs of term text and frequency
	 *		(or SIZE_MAX if the frequency is not known))
	 *	@param[in] r_doc_storage is document storage provider
	 *	@param[in] n_dummy_term is index of dummy term vector (vector, containing nulls, used to separate documents)
	 *	@param[in] n_chunk_size is maximal chunk size (in term indices)
	 *	@param[in] n_halfwindow_size is half-size of window in the semantic term vector calculation algorithm
	 *	@param[in] n_max_slice_length is number of term occurences, processed by one GPU thread in one pass (default 256)
	 *	@param[in] n_max_pass_size is maximal number of GPU threads, running in parallel in one pass (default 10000)
	 *	@param[in] n_min_primary_pass_size is minimal number of GPU threads, running in primary pass (default 1000)
	 *	@param[in] n_min_last_primary_pass_size is minimal number of GPU threads,
	 *		running in the last primary pass (default 500)
	 *	@param[in] n_dummy_vector_bank_num is number of dummy term vector banks;
	 *		this should be no more than n_max_pass_size, otherwise all the banks
	 *		above that number will be left unused (default 8192)
	 *	@param[in] b_lazy_gpu_deallocation is set to true if the deallocation of term vectors
	 *		shall not be enforced after the last term occurence
	 *
	 *	@note This may fail on low memory, it is therefore recommended to call b_Status() afterwards.
	 *	@note See also CDocStreamSplitter::Build_PassList() function documentstion.
	 */
	inline CChunkProducer(size_t n_gpu_pool_size, const std::vector<std::pair<::wstring, size_t> > &r_term_list,
		CDocStreamSplitter::CDocumentStorage &r_doc_storage, CDocStreamSplitter::term_id_t n_dummy_term,
		size_t n_chunk_size, size_t n_halfwindow_size, CDocStreamSplitter::chunk_off_t n_max_slice_length,
		CDocStreamSplitter::chunk_off_t n_max_pass_size, CDocStreamSplitter::chunk_off_t n_min_primary_pass_size,
		CDocStreamSplitter::chunk_off_t n_min_last_primary_pass_size, size_t n_dummy_vector_bank_num,
		bool b_lazy_gpu_deallocation = true)
		:m_gpu_allocator(n_gpu_pool_size, n_chunk_size, r_term_list),
		m_doc_splitter(r_doc_storage, n_dummy_term, n_chunk_size, n_halfwindow_size), m_n_dummy_term(n_dummy_term),
		m_n_max_slice_length(n_max_slice_length), m_n_max_pass_size(n_max_pass_size),
		m_n_min_primary_pass_size(n_min_primary_pass_size), m_n_min_last_primary_pass_size(n_min_last_primary_pass_size),
		m_n_dummy_vector_bank_num(n_dummy_vector_bank_num),
		m_n_dummy_term_seed_id(CDocStreamSplitter::chunk_off_t(r_term_list.size())),
		m_b_lazy_gpu_deallocation(b_lazy_gpu_deallocation),

		m_n_peak_term_usage(0)
	{}

	/**
	 *	@brief checks constructor success
	 *	@return Returns true if the constructor succeeded, otherwise returns false.
	 */
	bool b_Status() const
	{
		return m_gpu_allocator.b_Status();
	}	

	/**
	 *	@brief gets peak term usage
	 *	@return Returns maximal number of terms occuring in a single chunk so far.
	 */
	inline size_t n_Peak_Term_Usage() const
	{
		return m_n_peak_term_usage;
	}

	/**
	 *	@brief gets term usage
	 *	@return Returns number of terms occuring in the last chunk processed (not valid until operator ()() is called).
	 */
	inline size_t n_Term_Usage() const
	{
		return m_doc_splitter.Get_TermOccurenceTable().size();
	}

	/**
	 *	@brief chunk producing operator
	 *
	 *	@param[out] r_b_have_chunk is set to true if there is a next chunk, otherwise it's set to false (end of data)
	 *	@param[out] r_t_chunk_data is filled with the chunk and all associated data needed for processing
	 *		(the original contents are erased if not empty)
	 *
	 *	@return Returns true on success (even if no chunk was produced), otherwise returns false.
	 */
	bool operator ()(bool &r_b_have_chunk, TChunkData &r_t_chunk_data)
	{
		if(!m_doc_splitter.b_Have_NextChunk()) {
			r_b_have_chunk = false;
			return true;
		}
		r_b_have_chunk = true;
		// is there next chunk?

		if(!m_doc_splitter.Build_NextChunk()) {
			fprintf(stderr, "error: doc_splitter.Build_NextChunk() failed\n");
			return false;
		}
		// build the chunk

		if(!m_doc_splitter.Build_TermOccurenceTable_v2()) {
			fprintf(stderr, "error: doc_splitter.Build_TermOccurenceTable_v2() failed\n");
			return false;
		}
		const std::vector<CDocStreamSplitter::TTermOccurence> &r_occ_list = m_doc_splitter.Get_TermOccurenceTable();
		// get the list of term vector occurences, (and of the seed vector occurences if retraining - todo)

		if(m_n_peak_term_usage < r_occ_list.size())
			m_n_peak_term_usage = r_occ_list.size();
		// track peak usage of terms per chunk

		if(!m_gpu_allocator.Plan_ChunkAllocations(r_occ_list, r_t_chunk_data.vacation_list, r_t_chunk_data.allocation_list)) {
			fprintf(stderr, "error: page allocator failed (try to increase pool size or decrease chunk size)\n");
			return false;
		}
		// build plan of allocations / deallocations for this chunk

		if(!m_doc_splitter.Build_PassList(m_n_max_slice_length, m_n_max_pass_size,
		   m_n_min_primary_pass_size, m_n_min_last_primary_pass_size, m_n_dummy_vector_bank_num)) {
			fprintf(stderr, "error: doc_splitter.Build_PassList() failed\n");
			return false;
		}
		// build list of processing passes

		try {
			const std::vector<CDocStreamSplitter::term_id_t> &r_chunk = m_doc_splitter.Get_Chunk();
			r_t_chunk_data.chunk.resize(r_chunk.size());
			std::transform(r_chunk.begin(), r_chunk.end(), r_t_chunk_data.chunk.begin(),
				CSubstitution<term_id_t>(m_n_dummy_term, m_n_dummy_term_seed_id));
			// copy the chunk, perform the substitution of the dummy term for it's real id

			const std::vector<CDocStreamSplitter::chunk_off_t> &r_occ_list = m_doc_splitter.Get_TermOccurenceList_v2();
			r_t_chunk_data.occurence_list.resize(r_occ_list.size());
			std::copy(r_occ_list.begin(), r_occ_list.end(), r_t_chunk_data.occurence_list.begin());
			// copy the occurence list (no changes in here)

			const std::vector<uint32_t> &r_term_alloc_table = m_gpu_allocator.r_GPU_Mapping_Table();
			const std::vector<CDocStreamSplitter::TWorkItem> &r_wi_list = m_doc_splitter.Get_WorkItemList_v2();
			r_t_chunk_data.work_item_list.resize(r_wi_list.size());
			const std::vector<CDocStreamSplitter::TPass> &r_pass_list = m_doc_splitter.Get_PassList_v2();
			r_t_chunk_data.pass_list.resize(r_pass_list.size());
#ifdef _DEBUG
			size_t n_copied_work_item_num = 0; // debug
#endif //_DEBUG
			for(size_t i = 0, n = r_pass_list.size(); i < n; ++ i) {
				const CDocStreamSplitter::TPass &r_t_pass = r_pass_list[i];
				if(!r_t_pass.b_primary) {
					std::copy(r_wi_list.begin() + r_t_pass.n_offset,
						r_wi_list.begin() + (r_t_pass.n_offset + r_t_pass.n_length),
						r_t_chunk_data.work_item_list.begin() + r_t_pass.n_offset);
					// in case it's not a primary pass, just copy the work-items, the terms point to dummy vector bank
				} else {
					std::transform(r_wi_list.begin() + r_t_pass.n_offset,
						r_wi_list.begin() + (r_t_pass.n_offset + r_t_pass.n_length),
						r_t_chunk_data.work_item_list.begin() + r_t_pass.n_offset,
						CFindGPUAllocation(r_term_alloc_table));
					// in case it's a primary pass, transform term id in the work-items
					// to point to correct slots in the vector pool
				}
#ifdef _DEBUG
				n_copied_work_item_num += r_t_pass.n_length; // debug
#endif //_DEBUG
				// copy the work-items

				CDocStreamSplitter::TPass &r_t_dest_pass = r_t_chunk_data.pass_list[i];
				r_t_dest_pass.n_offset = r_t_pass.n_offset;
				r_t_dest_pass.n_length = r_t_pass.n_length;
				r_t_dest_pass.b_primary = r_t_pass.b_primary;
				r_t_dest_pass.b_slice_aligned = r_t_pass.b_slice_aligned;
				// copy the pass

				r_t_dest_pass.summation_list.resize(r_t_pass.summation_list.size());
				std::transform(r_t_pass.summation_list.begin(), r_t_pass.summation_list.end(),
					r_t_dest_pass.summation_list.begin(), CFindGPUAllocation(r_term_alloc_table));
				// copy the summation steps, update the terms
			}
			_ASSERTE(n_copied_work_item_num == r_wi_list.size());
			// copy the work-item list and the pass list
		} catch(std::bad_alloc&) {
			fprintf(stderr, "error: not enough memory\n");
			return false;
		}
		// copy the chunk, the occurence list, the work-item list and the pass list

		if(!m_gpu_allocator.Plan_PostChunkDeallocations(r_occ_list,
			r_t_chunk_data.post_vacation_list, m_b_lazy_gpu_deallocation, !m_doc_splitter.b_Have_NextChunk())) {
			fprintf(stderr, "error: page allocator failed (try to increase pool size or decrease chunk size)\n");
			return false;
		}
		// build plan of post-chunk deallocations for this chunk (after filling the TChunkData
		// structure, as it requires current gpu mapping table which is altered by this function)

		return true;
	}
};
