/**
 *	@file GPU_RI_Memory.h
 *	@brief GPU memory bilance for random indexing
 *	@author -tHE SWINe-
 *	@date 2010-12-06
 *	@note This is not standalone re-usable file, this is include-once file for this particular application,
 *		intended to reduce clutter in ProcessTermVectors/Main.cpp.
 */

/**
 *	@brief GPU random indexing memory bilance (a proposal of allocation of available GPU memory to various buffers)
 */
struct TGPUMemoryBilance {
	size_t n_term_num;
	size_t n_seed_length;
	size_t n_vector_length;
	size_t n_chunk_size;
	size_t n_dummy_vector_bank_num;
	size_t n_gpu_memory_megs;
	size_t n_gpu_memory_reserve_megs;
	size_t n_max_gpu_pool_size_megs;

	size_t n_seed_size;
	size_t n_dummy_vector_bank_size;
	size_t n_chunk_buffer_size;
	size_t n_occurence_buffer_size;
	size_t n_work_item_buffer_size;
	size_t n_free_gpu_space;
	size_t n_gpu_buffers_size;
	size_t n_gpu_pool_size;
	size_t n_gpu_pool_size_bytes;
	size_t n_total_terms_to_store_on_gpu;
	size_t n_max_terms_chunk;

	TGPUMemoryBilance(size_t _n_term_num, size_t _n_seed_length, size_t _n_vector_length, size_t _n_chunk_size,
		size_t _n_dummy_vector_bank_num, size_t _n_gpu_memory_megs = 790,
		size_t _n_gpu_memory_reserve_megs = 100, size_t _n_max_gpu_pool_size_megs = 4095)
		:n_term_num(_n_term_num), n_seed_length(_n_seed_length), n_vector_length(_n_vector_length),
		n_chunk_size(_n_chunk_size), n_dummy_vector_bank_num(_n_dummy_vector_bank_num),
		n_gpu_memory_megs(_n_gpu_memory_megs), n_gpu_memory_reserve_megs(_n_gpu_memory_reserve_megs),
		n_max_gpu_pool_size_megs(_n_max_gpu_pool_size_megs)
	{
		n_seed_size = _n_seed_length * sizeof(uint16_t) * (_n_term_num + 1);
		n_dummy_vector_bank_size = _n_vector_length * sizeof(int32_t) * _n_dummy_vector_bank_num;
		n_chunk_buffer_size = _n_chunk_size * sizeof(uint32_t);
		n_occurence_buffer_size = n_chunk_buffer_size;
		n_work_item_buffer_size = _n_chunk_size * sizeof(CDocStreamSplitter::TWorkItem);
		n_free_gpu_space = (_n_gpu_memory_megs - _n_gpu_memory_reserve_megs) * 1048576;
		n_gpu_buffers_size = n_seed_size + n_dummy_vector_bank_size + n_chunk_buffer_size +
			n_occurence_buffer_size + n_work_item_buffer_size;
		/*if(n_free_gpu_space < n_gpu_buffers_size) {
			fprintf(stderr, "error: not enough memory on GPU (more than "
				PRIsizeB "B required, " PRIsizeB "B available)\n",
				PRIsizeBparams(n_gpu_buffers_size), PRIsizeBparams(n_free_gpu_space));
			return -1;
		}*/
		n_gpu_pool_size_bytes = ::min(n_free_gpu_space - n_gpu_buffers_size, _n_max_gpu_pool_size_megs * 1048576);
		n_gpu_pool_size_bytes -= n_gpu_pool_size_bytes % (_n_vector_length * sizeof(int32_t)); // make sure there are no leftovers
		// memory calculations (seed)

		n_gpu_pool_size = n_gpu_pool_size_bytes / (_n_vector_length * sizeof(int32_t));
		n_total_terms_to_store_on_gpu = _n_term_num/* + 1*/; // no dummy here
		n_max_terms_chunk = ::min(_n_chunk_size/* - 2 * n_half_window_size*/, n_total_terms_to_store_on_gpu);
		if(n_gpu_pool_size > n_total_terms_to_store_on_gpu) {
			n_gpu_pool_size = n_total_terms_to_store_on_gpu; // just cut it down to the required number of vectors, no need to waste the space
			n_gpu_pool_size_bytes = n_gpu_pool_size * _n_vector_length * sizeof(int32_t);
		}
		// calculate maximal limit of terms needed when processing a single chunk
	}

	bool b_Status() const
	{
		return n_free_gpu_space >= n_gpu_buffers_size;
	}

	void Dump()
	{
		if(n_gpu_pool_size < n_max_terms_chunk)
			fprintf(stderr, "warning: gpu space for term vectors may be insufficient\n");
		// make sure the GPU pool is large enough to hold all possible terms in the chunk
		// (but there will usually be less terms so this is only a warning)

		printf("host memory usage: max " PRIsizeB "B, estimated " PRIsizeB "B (%d vectors)\n",
			PRIsizeBparams(n_term_num * uint64_t(n_vector_length * sizeof(int32_t))),
			PRIsizeBparams(.25 * (n_term_num * uint64_t(n_vector_length * sizeof(int32_t)) - n_gpu_pool_size_bytes)),
			(n_term_num - n_gpu_pool_size) / 4);
		// a crude estimate of how much memory should be required to hold all the term vectors that aren't on the gpu

		printf("GPU memory size " PRIsizeB "B, leaving " PRIsizeB "B free. Layout:\n"
			"\t" PRIsizeB "B seed vectors (%d x %d)\n"
			"\t" PRIsizeB "B dummy vector bank (%d x %d)\n"
			"\t" PRIsizeB "B chunk and work buffers (%d x 20)\n"
			"\t" PRIsizeB "B term vector pool (%d x %d)\n"
			"\t" PRIsizeB "B spare memory (+ %d.00 MB reserved memory)\n\n",
			PRIsizeBparams(n_gpu_memory_megs * 1048576), PRIsizeBparams(n_gpu_memory_reserve_megs * 1048576),
			PRIsizeBparams(n_seed_size), n_seed_length, n_term_num + 1,
			PRIsizeBparams(n_dummy_vector_bank_size), n_vector_length, n_dummy_vector_bank_num,
			PRIsizeBparams(n_chunk_buffer_size + n_occurence_buffer_size + n_work_item_buffer_size), n_chunk_size,
			PRIsizeBparams(n_gpu_pool_size_bytes), n_vector_length, n_gpu_pool_size,
			PRIsizeBparams(n_free_gpu_space - (n_gpu_buffers_size + n_gpu_pool_size_bytes)), n_gpu_memory_reserve_megs);
		// verbose
	}
};

