/*
								+--------------------------------+
								|                                |
								|  ***   Simple threading   ***  |
								|                                |
								|  Copyright  -tHE SWINe- 2008  |
								|                                |
								|            Thread.h            |
								|                                |
								+--------------------------------+
*/

#pragma once
#ifndef __LAME_THREADS_INCLUDED
#define __LAME_THREADS_INCLUDED

/**
 *	@file Thread.h
 *	@brief simple multithreading primitives
 *	@date 2008
 *	@author -tHE SWINe-
 *
 *	@date 2008-12-21
 *
 *	removed some warning under g++, note this only works on BSD, linux gives error 12
 *	on pthread_create, whatever that one means. t_odo. (guess i'm not returning something)
 *
 *	@date 2009-06-01
 *
 *	added CMutex class for primitive thread synchronisation
 *
 *	@date 2009-07-07
 *
 *	added CMutex copy-constructor and copy-operator to protected functions (mutex instances
 *	shouldn't be copied. use pointers / references)
 *
 *	@date 2009-11-12
 *
 *	added CThread::Suspend() and CThread::Resume()
 *
 *	changed CThread::Run() in linux version (simplified it)
 *
 *	@todo - debug linux version
 *	http://www.linuxquestions.org/questions/programming-9/resume-and-suspend-pthreads-184535/
 *
 *	@date 2010-02-19
 *
 *	added CCurrentThreadHandle class, it currently doesn't implement any functions
 *	@todo - need to debug SuspendThread() under linux first, then implement it in CCurrentThreadHandle
 *	@todo - implement some unified thread priority control functions
 *	@todo - think about NUMA
 *
 *	@date 2010-10-25
 *
 *	Added Unused.h, decorated some function parameters as UNUSED().
 *
 *	@date 2010-10-29
 *
 *	Unified windows detection macro to "\#if defined(_WIN32) || defined(_WIN64)".
 *
 *	@date 2010-11-03
 *
 *	Added CSemaphore.
 *	Reformatted comments so they can be parsed by doxygen.
 *
 *	@date 2010-11-10
 *
 *	Added CProducerConsumerQueue.
 *
 *	@date 2011-11-01
 *
 *	Fixed a typo in CCurrentThreadHandle::Set_*Priority().
 *
 *	@date 2012-06-19
 *
 *	Moved multiple inclusion guard before file documentation comment.
 *
 *	@date 2013-08-18
 *
 *	Added CSemaphore::TimedWait(), CSemaphore::n_Value() and Signal(int, int&),
 *	all of them remain mostly untested (seem to work on windows).
 *
 */

#include "Unused.h"
#include "Integer.h"

/**
 *	@brief virtual class for thread-attached runable object
 */
class CRunable {
public:
	/**
	 *	@brief this function is called from within the thread; thread code comes here
	 */
	virtual void Run() = 0;
};

#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#else // _WIN32 || _WIN64
#include <pthread.h>
#include <semaphore.h>
#include <sched.h>
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
#include <pthread_np.h>
#endif // __FreeBSD__ || __NetBSD__ || __OpenBSD__
#ifdef __FreeBSD__
#include <sys/param.h>
#include <sys/cpuset.h>
#endif // __FreeBSD__
#endif // _WIN32 || _WIN64

/**
 *	@brief simple thread class
 */
class CThread {
protected:
	CRunable *m_p_runable;

#if defined(_WIN32) || defined(_WIN64)
	mutable HANDLE m_h_thread;
#else // _WIN32 || _WIN64
	pthread_t m_t_thread; // thread
	pthread_mutex_t m_t_running_mutex, m_t_suspend_mutex;
	pthread_cond_t m_t_suspend_cond;
	bool m_b_running; // thread state
#endif // _WIN32 || _WIN64

	class CEmptyRunable : public CRunable {
		virtual void Run() {}
	};
	static CEmptyRunable empty_runable;

public:
	/**
	 *	@brief default constructor; attaches empty runable object to a thread
	 */
	CThread();

	/**
	 *	@brief constructor, attaches runable object to a thread
	 *	@param[in] r_runable is reference to the runable object to be executed in the thread
	 */
	CThread(CRunable &r_runable);

	/**
	 *	@brief destructor
	 */
	~CThread();

	/**
	 *	@brief attaches a new runable, the thread must not be running
	 *	@param[in] r_runable is reference to the runable object to be executed in the thread
	 *	@return Returns true on success, false on failure.
	 */
	bool AttachRunable(CRunable &r_runable);

	/**
	 *	@brief gets current runable object
	 *	@return Returns reference to current runable object.
	 */
	CRunable &r_Runable();

	/**
	 *	@brief gets current runable object
	 *	@return Returns const reference to current runable object.
	 */
	const CRunable &r_Runable() const;

	/**
	 *	@brief starts the thread and returns immediately
	 *	@return Returns true on success, false on failure.
	 *	@note This fails in case the thread is already running.
	 */
	bool Start();

	/**
	 *	@brief starts the thread and waits untill it finishes
	 *	@return Returns true on success, false on failure.
	 *	@note This fails in case the thread is already running. Also note this
	 *		function is not thread-safe (the thread object doesn't change
	 *		it's state to running while in this function, and it can't be stopped)
	 */
	bool Run();

	/**
	 *	@brief suspends the thread (must be called from within the thread!)
	 *
	 *	@important Must be called from within the thread!
	 *
	 *	Suspends the thread undefinitely. Use Resume() to resume thread execution.
	 *		Suspended thread may be killed using Stop(); but waiting for suspended
	 *		thread to finish causes deadlock.
	 *
	 *	@return Returns true on success, false on failure.
	 *	@return Always returns false in case thread is not running.
	 *	@return Always returns true in case thread is running and is already suspended.
	 */
	bool Suspend();

	/**
	 *	@brief resumes the thread
	 *
	 *	Resumes execution of thread, previously stopped using Suspend().
	 *		Resuming running thread has no effect.
	 *
	 *	@return Returns true on success, false on failure.
	 *	@return Always returns false in case thread is not running.
	 *	@return Always returns true in case thread is running, and is not suspended.
	 *
	 *	@note For windows programmers - thread, executing Sleep() is woken up as well.
	 */
	bool Resume();

	/**
	 *	@brief gets current thread state
	 *	@return Returns true if thread is running, otherwise returns false.
	 */
	bool b_IsRunning() const;

	/**
	 *	@brief waits for the thread to end, if b_force_kill is set, it's ended forcibly
	 *	@param[in] b_force_kill specifies whether to forcibly end the thread (true),
	 *		or whether to wait until the thread finishes (false; default behavior)
	 *	@return Returns true if thread is stopped, returns false if thread didn't stop in given time, or on failure.
	 */
	bool Stop(bool b_force_kill = false);

	/**
	 *	@brief utility function; gets number of logical CPUs on the system
	 *	@return Returns number of logical CPUs, in case CPU count cannot be determined, returns -1.
	 */
	static size_t n_CPU_Num();

protected:
#if defined(_WIN32) || defined(_WIN64)
	static unsigned long __stdcall _run(void *p_arg); // run for windows
#else // _WIN32 || _WIN64
	static void *_run(void *p_arg); // run for linux
#endif // _WIN32 || _WIN64
	CThread(const CThread &UNUSED(r_thread)) {} /**< @brief can't copy threads this way, use pointers instead */
	const CThread &operator =(const CThread &UNUSED(r_thread)) { return r_thread; } /**< @brief can't copy threads this way, use pointers instead */
};

/**
 *	@brief virtual class for thread-attached runable object, containing it's thread
 */
class CRunable_Thread : public CRunable {
protected:
	CThread m_thread; /**< @brief thread for the runable */

public:
	/**
	 *	@brief constructor; attaches this runable to the thread it contains
	 */
	inline CRunable_Thread()
	{
		m_thread.AttachRunable(*this);
	}

	/**
	 *	@brief simple stert function; starts the associated thread
	 *	@return Returns true on success, false on failure. If the thread is already running, fails.
	 *	@note This can be overriden by the inheriting classes (add run parameters, ...).
	 */
	inline bool Start()
	{
		return m_thread.Start();
	}

	/**
	 *	@brief simple stop function; waits for the associated thread to finish
	 *	@return Returns true on success, false on failure. If the thread is not running, succeeds.
	 *	@note This can be overriden by the inheriting classes (add error checking, ...).
	 */
	inline bool WaitForFinish()
	{
		return m_thread.Stop(false);
	}
};

/**
 *	@brief virtual class for thread-attached runable object, containing it's thread, supports shallow copy
 *
 *	These objects can be e.g. stored in std::vector and the task parameters can be distributed
 *	via the copy constructor. This must be used with caution, as the thread itself is not copied.
 *	Therefore copying over a running runable might have unexpected consequences.
 *
 *	@remark Use only if you know what you are doing.
 */
class CRunable_Thread_ShallowCopy : public CRunable {
protected:
	CThread m_thread; /**< @brief thread for the runable */

public:
	/**
	 *	@brief constructor; attaches this runable to the thread it contains
	 */
	inline CRunable_Thread_ShallowCopy()
	{
		m_thread.AttachRunable(*this);
	}

	/**
	 *	@brief copy-constructor; attaches this runable to the thread it contains
	 *	@param[in] r_other is unused (the thread is not copied)
	 */
	inline CRunable_Thread_ShallowCopy(const CRunable_Thread_ShallowCopy UNUSED(&r_other))
	{
		m_thread.AttachRunable(*this);
	}

	/**
	 *	@brief copy-operator; attaches this runable to the thread it contains
	 *	@param[in] r_other is unused (the thread is not copied)
	 *	@return Returns reference to this.
	 */
	inline CRunable_Thread_ShallowCopy &operator =(const CRunable_Thread_ShallowCopy UNUSED(&r_other))
	{
		return *this;
	}

	/**
	 *	@brief simple stert function; starts the associated thread
	 *	@return Returns true on success, false on failure. If the thread is already running, fails.
	 *	@note This can be overriden by the inheriting classes (add run parameters, ...).
	 */
	inline bool Start()
	{
		return m_thread.Start();
	}

	/**
	 *	@brief simple stop function; waits for the associated thread to finish
	 *	@return Returns true on success, false on failure. If the thread is not running, succeeds.
	 *	@note This can be overriden by the inheriting classes (add error checking, ...).
	 */
	inline bool WaitForFinish()
	{
		return m_thread.Stop(false);
	}
};

/**
 *	@brief wrapper for current thread handle
 *
 *	Allows users to execute operations on current thread, without having
 *		it's CThread (such as process first thread).
 */
class CCurrentThreadHandle {
protected:
#if defined(_WIN32) || defined(_WIN64)
	mutable HANDLE m_h_thread;
#else // _WIN32 || _WIN64
	pthread_t m_t_thread; // thread
#endif // _WIN32 || _WIN64

public:
	/**
	 *	@brief default constructor
	 */
	CCurrentThreadHandle()
	{
#if defined(_WIN32) || defined(_WIN64)
		DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
			GetCurrentProcess(), &m_h_thread, 0, TRUE, DUPLICATE_SAME_ACCESS);
		// get thread handle (must duplicate, handle returned by
		// GetCurrentThread() can't be used to do some things)
#else // _WIN32 || _WIN64
		m_t_thread = pthread_self();
#endif // _WIN32 || _WIN64
	}

	/**
	 *	@brief destructor
	 */
	~CCurrentThreadHandle()
	{
#if defined(_WIN32) || defined(_WIN64)
		CloseHandle(m_h_thread);
		// must return duplicated thread handle
#endif // _WIN32 || _WIN64
	}

	/**
	 *	@brief gets current thread id (can be used as means of thread identification) as an integer
	 *	@return Returns current thread id.
	 *	@note Actual range of values of thread id's is system-dependent.
	 */
	static int n_Get_Id()
	{
#if defined(_WIN32) || defined(_WIN64)
		return GetCurrentThreadId();
#else // _WIN32 || _WIN64
		return (long)pthread_self(); // cast to long required for FreeBSD
#endif // _WIN32 || _WIN64
	}

	bool Set_AffinityMask32(uint32_t n_mask)
	{
#if defined(_WIN32) || defined(_WIN64)
		return SetThreadAffinityMask(m_h_thread, n_mask) != 0;
#else // _WIN32 || _WIN64
#if defined(__FreeBSD__)
		cpuset_t t_cpuset;
		CPU_ZERO(&t_cpuset);
		for(int i = 0; i < 32; ++ i) {
			if((n_mask >> i) & 1)
				CPU_SET(i, &t_cpuset); // todo - try to get rid of this
		}
		return !pthread_setaffinity_np(m_t_thread, sizeof(cpuset_t), &t_cpuset);
#elif defined(__NetBSD__)
		cpuset_t *cmask = cpuset_create();
		cpuset_zero(cmask);
		for(int i = 0; i < 32; ++ i) {
			if((n_mask >> i) & 1)
				cpuset_set(bitnum, cmask);
		}
		bool b_result = !pthread_setaffinity_np(m_t_thread, sizeof(cpuset_t), &t_cpuset);
		cpuset_destroy(cmask);
		return b_result;
#else // __FreeBSD__
		cpu_set_t t_cpuset;
		CPU_ZERO(&t_cpuset);
		for(int i = 0; i < 32; ++ i) {
			if((n_mask >> i) & 1)
				CPU_SET(i, &t_cpuset); // todo - try to get rid of this
		}
		return !pthread_setaffinity_np(m_t_thread, sizeof(cpu_set_t), &t_cpuset);
#endif // __FreeBSD__
#endif // _WIN32 || _WIN64
	}

	bool Set_HighPriority()
	{
#if defined(_WIN32) || defined(_WIN64)
		return SetThreadPriority(m_h_thread, THREAD_PRIORITY_HIGHEST) != 0;
#else // _WIN32 || _WIN64
		int n_policy;
		struct sched_param t_param;
		if(pthread_getschedparam(m_t_thread, &n_policy, &t_param))
			return false;
		// get scheduling policy for the thread

		int n_prio = sched_get_priority_max(n_policy);
		if(n_prio == -1)
			return false;
		// get maximal priority for that policy

#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
		return true;
		// This function is missing on some platforms: glibc 2.3.6, MacOS X 10.3, FreeBSD 6.0,
		// NetBSD 3.0, OpenBSD 3.8, AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 5.1, Solaris 9, Cygwin, mingw, BeOS.
#else //  __FreeBSD__ || __NetBSD__ || __OpenBSD__
		return !pthread_setschedprio(m_t_thread, n_prio - 1); // set priority one lower
		// set minimal priority
#endif // __FreeBSD__ || __NetBSD__ || __OpenBSD__
#endif // _WIN32 || _WIN64
	}

	bool Set_LowPriority()
	{
#if defined(_WIN32) || defined(_WIN64)
		return SetThreadPriority(m_h_thread, THREAD_PRIORITY_LOWEST) != 0;
#else // _WIN32 || _WIN64
		int n_policy;
		struct sched_param t_param;
		if(pthread_getschedparam(m_t_thread, &n_policy, &t_param))
			return false;
		// get scheduling policy for the thread

		int n_prio = sched_get_priority_min(n_policy);
		if(n_prio == -1)
			return false;
		// get minimal priority for that policy

#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
		return true;
		// This function is missing on some platforms: glibc 2.3.6, MacOS X 10.3, FreeBSD 6.0,
		// NetBSD 3.0, OpenBSD 3.8, AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 5.1, Solaris 9, Cygwin, mingw, BeOS.
#else //  __FreeBSD__ || __NetBSD__ || __OpenBSD__
		return !pthread_setschedprio(m_t_thread, n_prio + 1); // set priority one higher
		// set minimal priority
#endif // __FreeBSD__ || __NetBSD__ || __OpenBSD__
#endif // _WIN32 || _WIN64
	}

	bool Set_NormalPriority()
	{
#if defined(_WIN32) || defined(_WIN64)
		return SetThreadPriority(m_h_thread, THREAD_PRIORITY_NORMAL) != 0;
#else // _WIN32 || _WIN64
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
		return true;
		// This function is missing on some platforms: glibc 2.3.6, MacOS X 10.3, FreeBSD 6.0,
		// NetBSD 3.0, OpenBSD 3.8, AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 5.1, Solaris 9, Cygwin, mingw, BeOS.
#else //  __FreeBSD__ || __NetBSD__ || __OpenBSD__
		return !pthread_setschedprio(m_t_thread, 0); // set default priority (fixme?)
		// set minimal priority
#endif // __FreeBSD__ || __NetBSD__ || __OpenBSD__
#endif // _WIN32 || _WIN64
	}

protected:
	CCurrentThreadHandle(const UNUSED(CCurrentThreadHandle) &r_cth) {} /**< @brief do not copy thread handles this way. use references / pointers */
	void operator =(const UNUSED(CCurrentThreadHandle) &r_cth) {} /**< @brief do not copy thread handles this way. use references / pointers */
};

#ifdef _OPENMP
#include <omp.h>

/**
 *	@def __MUTEX_USE_OPENMP_LOCK
 *	@brief if defined, enables the use of OpenMP lock instead of windows mutex (where available)
 */
//#define __MUTEX_USE_OPENMP_LOCK
#endif // _OPENMP

/**
 *	@brief simple mutex class
 */
class CMutex {
protected:
#if defined(_WIN32) || defined(_WIN64)
#ifdef __MUTEX_USE_OPENMP_LOCK
	omp_lock_t m_t_lock;
#else // __MUTEX_USE_OPENMP_LOCK
	HANDLE m_h_mutex;
	//bool m_b_status;
#endif // __MUTEX_USE_OPENMP_LOCK
#else // _WIN32 || _WIN64
	pthread_mutex_t m_t_mutex;
	bool m_b_status;
#endif // _WIN32 || _WIN64

public:
	/**
	 *	@brief default constructor; creates a new mutex
	 *	@note The mutex is initially unlocked.
	 *	@note It is advised to call b_Status() to see if mutex
	 *		was really created (might fail due to OS resource limits).
	 */
	CMutex();

	/**
	 *	@brief destructor; deletes mutex
	 */
	~CMutex();

	/**
	 *	@brief gets constructor result
	 *	@return Returns true if the mutex was successfuly created and can be used.
	 *	@note This doesn't reflect actual mutex state (locked / unlocked).
	 */
	bool b_Status() const;

	/**
	 *	@brief attempts to lock the mutex. in case mutex is already locked,
	 *		the calling thread is suspended until mutex owner calls Unlock()
	 *	@return Returns true on success, false on failure (doesn't reflect whether
	 *		the thread had to wait, or not. might fail because mutex was
	 *		deleted by another thread, ... shouldn't really happen).
	 */
	bool Lock();

	/**
	 *	@brief attempts to lock the mutex. in case mutex is already locked, returns immediately
	 *	@return Returns true in case mutex was successfuly locked, false in case the mutex
	 *		is already locked, or on failure (shouldn't really happen).
	 */
	bool TryLock();

	/**
	 *	@brief unlocks the mutex
	 *	@return Returns true on success, false on failure (such as mutex was locked by different
	 *		thread, and therefore cannot be unlocked, or if the mutex wasn't locked).
	 */
	bool Unlock();

protected:
	CMutex(const CMutex &UNUSED(r_mutex)) {} /**< @brief can't copy mutexes this way, use pointers */
	const CMutex &operator =(const CMutex &UNUSED(r_mutex)) { return *this; } /**< @brief can't copy mutexes this way, use pointers */
};

/**
 *	@brief simple semaphore class
 *	@note This hasn't been tested on linux / bsd yet.
 */
class CSemaphore {
protected:
#if defined(_WIN32) || defined(_WIN64)
	HANDLE m_h_semaphore;
#else // _WIN32 || _WIN64
	sem_t m_t_semaphore;
	bool m_b_status;
#endif // _WIN32 || _WIN64

public:
	/**
	 *	@brief default constructor; creates a new semaphore
	 *	@param[in] n_initial_count is the initial semaphore value
	 *	@note It is advised to call b_Status() to see if the semaphore
	 *		was really created (might fail due to OS resource limits).
	 */
	CSemaphore(int n_initial_count);

	/**
	 *	@brief destructor
	 */
	~CSemaphore();

	/**
	 *	@brief gets constructor result
	 *	@return Returns true if the semaphore was successfuly created and can be used, otherwise returns false.
	 *	@note This doesn't reflect semaphore state (signaled / unsignaled).
	 */
	bool b_Status() const;

	/**
	 *	@brief gets the current value of the semaphore
	 *	@return Returns the value of the semaphore on success, or INT_MAX on failure.
	 *	@note In Windows, this relies on some internal functionality. Where possible, this
	 *		should be replaced by Signal(int, int&) function (but that changes semaphore value).
	 */
	int n_Value() const;

	/**
	 *	@brief attempts to enter the semaphore. in case it's value is zero,
	 *		the calling thread is suspended until someone calls Signal()
	 *	@return Returns true on success, false on failure (doesn't reflect whether
	 *		the thread had to wait, or not. might fail because the semaphore was
	 *		deleted by another thread, ... shouldn't really happen).
	 */
	bool Wait();

	/**
	 *	@brief attempts to enter the semaphore. in case it's value is zero,
	 *		the calling thread is suspended until someone calls Signal()
	 *		or until the specified time elapses
	 *
	 *	@param[out] r_b_locked is set if the semaphore was successfully locked,
	 *		or cleared if the timeout elapsed
	 *	@param[in] n_wait_miliseconds is time to wait, in miliseconds
	 *
	 *	@return Returns true on success, false on failure (doesn't reflect whether
	 *		the thread had to wait, or not. might fail because the semaphore was
	 *		deleted by another thread, ... shouldn't really happen).
	 */
	inline bool TimedWait(bool &r_b_locked, int n_wait_miliseconds)
	{
		return TimedWait(r_b_locked, n_wait_miliseconds / 1000, n_wait_miliseconds % 1000);
	}

	/**
	 *	@brief attempts to enter the semaphore. in case it's value is zero,
	 *		the calling thread is suspended until someone calls Signal()
	 *		or until the specified time elapses
	 *
	 *	@param[out] r_b_locked is set if the semaphore was successfully locked,
	 *		or cleared if the timeout elapsed
	 *	@param[in] n_wait_seconds is time to wait, in seconds
	 *	@param[in] n_wait_nanoseconds is time to wait, in nanoseconds
	 *
	 *	@return Returns true on success, false on failure (doesn't reflect whether
	 *		the thread had to wait, or not. might fail because the semaphore was
	 *		deleted by another thread, ... shouldn't really happen).
	 */
	bool TimedWait(bool &r_b_locked, int n_wait_seconds, long n_wait_nanoseconds);

	/**
	 *	@brief attempts to enter the semaphore, in case it's value is zero (would block), returns immediately
	 *	@return Returns true in case mutex was successfuly locked,
	 *		otherwise returns false; also returns false on failure (shouldn't really happen).
	 */
	bool TryWait();

	/**
	 *	@brief Unlocks the semaphore, increments it's value by 1, returns immediately
	 *	@return Returns true on success, false on failure (such as the semaphore being deleted by another thread).
	 */
	bool Signal();

	/**
	 *	@brief Unlocks the semaphore, increments it's value by a given count, returns immediately
	 *	@param[in] n_signal_num is number of pending threads to signal (must be nonzero positive)
	 *	@return Returns true on success, false on failure (such as the semaphore being deleted by another thread).
	 */
	bool Signal(int n_signal_num);

	/**
	 *	@brief Unlocks the semaphore, increments it's value by a given count, returns immediately
	 *
	 *	@param[in] n_signal_num is number of pending threads to signal (must be nonzero positive)
	 *	@param[out] r_n_previous_value is filled with the semaphore value before the operation
	 *
	 *	@return Returns true on success, false on failure (such as the semaphore being deleted by another thread).
	 */
	bool Signal(int n_signal_num, int &r_n_previous_value);

protected:
	CSemaphore(const CSemaphore &UNUSED(r_sema)) {} /*<< @brief can't copy semaphores this way, use pointers */
	const CSemaphore &operator =(const CSemaphore &UNUSED(r_sema)) { return *this; } /*<< @brief can't copy semaphores this way, use pointers */
};

/**
 *	@brief a simple thread-safe queue for producer-consumer scenarios
 *	@param TWorkItem is data type of a single unit of work
 *	@note The correct usage is as follows:
 *	@code
 *	void Producer(CProducerConsumerQueue<TWorkItem> &r_queue)
 *	{
 *		TWorkItem t_wi;
 *		while(GetMoreWork(&t_wi))
 *			r_queue.Put(t_wi); // mind some error-checking here
 *		r_queue.Signal_Finished(); // and here
 *	}
 *
 *	void Consumer(CProducerConsumerQueue<TWorkItem> &r_queue)
 *	{
 *		TWorkItem t_wi;
 *		while(r_queue.Get(t_wi))
 *			ProcessWorkItem(t_wi);
 *		if(!r_queue.b_Finished())
 *			Error(); // there are still data in the queue, but Get() failed for some reason ...
 *	}@endcode
 *	@note There may be multiple producers or consumers, Get() and Put() are both thread-safe.
 */
template <class TWorkItem>
class CProducerConsumerQueue {
protected:
	CSemaphore m_full_count_sema, m_free_count_sema;
	CMutex m_buffer_access_mutex;

	size_t m_n_free_index; // index of first free work-item
	size_t m_n_full_index; // index of first full work-item
	std::vector<TWorkItem> m_queue_data;
	size_t m_n_queue_size;

	bool m_b_finished;

public:
	/**
	 *	@brief default constructor; initializes the queue
	 *	@param[in] n_queue_size is size of the queue, in work-items
	 *	@note This constructor may fail (due to OS limits, or low free memory),
	 *		it is therefore reccommended to call b_Status() afterwards.
	 */
	CProducerConsumerQueue(size_t n_queue_size)
		:m_full_count_sema(0), m_free_count_sema(int(n_queue_size)), m_n_free_index(0), m_n_full_index(0), m_b_finished(false)
	{
		_ASSERTE(n_queue_size < INT_MAX); // CSemaphore constructor
		try {
			_ASSERTE(n_queue_size < SIZE_MAX);
			m_queue_data.resize(n_queue_size + 1); // +1
			m_n_queue_size = m_queue_data.size();
		} catch(std::bad_alloc&) {
			m_n_queue_size = 0;
		}
	}

	/**
	 *	@brief gets constructor result
	 *	@return Returns true if constructor succeeded and the queue is ready to be used, otherwise returns false.
	 *	@note This doesn't reflect any kind of queue state (eg. empty / full / finished queue).
	 */
	bool b_Status() const
	{
		_ASSERTE(m_n_queue_size == m_queue_data.size());
		return m_n_queue_size && m_full_count_sema.b_Status() &&
			m_free_count_sema.b_Status() && m_buffer_access_mutex.b_Status();
	}

	/**
	 *	@brief gets queue size
	 *	@return returns queue size, in work-items
	 */
	inline size_t n_Size() const
	{
		return m_n_queue_size - 1;
	}

	/**
	 *	@brief gets number of items, currently in the queue
	 *	@return Returns number of items, currently in the queue.
	 *	@note This function is not thread-safe, and the result may thus be inaccurate.
	 */
	inline size_t n_EnqueuedItem_Num() const
	{
		size_t n_head = m_n_full_index, n_tail = m_n_free_index;
		return (n_tail >= n_head)? n_tail - n_head : n_tail + m_n_queue_size - n_head; // @t_odo - debug this
		// queue item num:
		//
		//		0123456789
		// tail * 
		// head * 0 = tail - head
		// tail  *
		// head * 1 = tail - head
		// tail *
		// head          * 1 = tail + 10 - head = 0+10-9
		// tail  *
		// head         * 3 = tail + 10 - head = 1+10-8
		// tail  *
		// head   * 9 = tail + 10 - head = 1 + 10 - 2
	}

	/**
	 *	@brief determines whether is the queue empty
	 *	@return Returns true if the queue is empty, otherwise returns false.
	 *	@note This does not signal consumers there will be no more data, b_Finished() is used to do that.
	 *	@note This function always returns immediately.
	 */
	inline bool b_Empty() const
	{
		return m_n_full_index == m_n_free_index;
	}

	/**
	 *	@brief determines whether is the queue full
	 *	@return Returns true if the queue is full, otherwise returns false.
	 *	@note This function always returns immediately.
	 */
	inline bool b_Full() const
	{
		return m_n_full_index == n_NextIndex(m_n_free_index); // f_ixme? yes, this is correct (full = head!)
	}

	/**
	 *	@brief signals the producer is done generating data
	 *	@return Returns true on success, false on failure (shouldn't really happen,
	 *		provided the queue was successfuly initialized and hasn't been deleted).
	 *	@note This function should only be called by the producer, and only once.
	 */
	bool Signal_Finished()
	{
		m_b_finished = true;
		return m_full_count_sema.Signal();
	}

	/**
	 *	@brief determines whether the finished flag was raised by the producer
	 *		(there will be no more data once the queue is empty)
	 *	@return Returns true in case the producer raised the finished flag (by calling Signal_Finished()),
	 *		otherwise returns false.
	 *	@note There still may be data in the queue which must be processed. The correct way to use this queue
	 *		is to call Get() until it fails, then calling b_Finished(). If it returns true, consumer can quit.
	 *		If, on the other hand, it returns false, there was an error.
	 */
	inline bool b_Finished() const
	{
		return b_Empty() && m_b_finished;
	}

	/**
	 *	@brief puts a single work-item to the queue
	 *	@param[in] t_wi is the work-item
	 *	@return Returns true on success, false on failure (shouldn't really happen, provided the queue
	 *		was successfuly initialized and hasn't been deleted).
	 *	@note Returns immediately if the queue is not full, or blocks until a consumer calls Get() and makes some space.
	 */
	bool Put(TWorkItem t_wi)
	{
		if(!m_free_count_sema.Wait())
			return false;
		// enter the "empty" semaphore

		if(!m_buffer_access_mutex.Lock())
			return false;
		// lock buffer access mutex

		_ASSERTE(!b_Full());
		_ASSERTE(m_n_free_index >= 0 && m_n_free_index < m_queue_data.size());
		m_queue_data[m_n_free_index] = t_wi;
		m_n_free_index = n_NextIndex(m_n_free_index);
		// add work-item to the queue

		return m_buffer_access_mutex.Unlock() && m_full_count_sema.Signal();
		// unlock buffer access mutex and signal the "fill" semaphore
	}

	/**
	 *	@brief tries to put a single work-item to the queue, in the specified time
	 *
	 *	@param[out] r_b_put_work_item is the result of the put (set if put, cleared if timed out)
	 *	@param[in] t_wi is the work-item
	 *	@param[in] n_timeout_msec is time to put the item in the queue, in milliseconds
	 *
	 *	@return Returns true on success, false on failure (shouldn't really happen, provided the queue
	 *		was successfuly initialized and hasn't been deleted).
	 *
	 *	@note Returns immediately if the queue is not full, or blocks until either a consumer calls Get()
	 *		and makes some space, or until the timeout is exceeded.
	 */
	bool TimedPut(bool &r_b_put_work_item, TWorkItem t_wi, int n_timeout_msec)
	{
		if(!m_free_count_sema.TimedWait(r_b_put_work_item, n_timeout_msec))
			return false;
		// enter the "empty" semaphore

		if(!m_buffer_access_mutex.Lock()) {
			r_b_put_work_item = false;
			return false;
		}
		// lock buffer access mutex

		_ASSERTE(!b_Full());
		_ASSERTE(m_n_free_index >= 0 && m_n_free_index < m_queue_data.size());
		m_queue_data[m_n_free_index] = t_wi;
		m_n_free_index = n_NextIndex(m_n_free_index);
		r_b_put_work_item = true;
		// add work-item to the queue

		return m_buffer_access_mutex.Unlock() && m_full_count_sema.Signal();
		// unlock buffer access mutex and signal the "fill" semaphore
	}

	/**
	 *	@brief tries to put a single work-item to the queue
	 *	@param[in] t_wi is the work-item
	 *	@return Returns true in case the work-item was successfuly enqueued, false in case the queue was full,
	 *		or on failure (shouldn't really happen, provided the queue was successfuly initialized and hasn't been deleted).
	 *	@note Returns immediately if the queue is not full, or blocks until a consumer calls Get() and makes some space.
	 */
	bool TryPut(TWorkItem t_wi)
	{
		if(!m_free_count_sema.TryWait())
			return false;
		// enter the "empty" semaphore

		if(!m_buffer_access_mutex.Lock())
			return false;
		// lock buffer access mutex

		_ASSERTE(!b_Full());
		_ASSERTE(m_n_free_index >= 0 && m_n_free_index < m_queue_data.size());
		m_queue_data[m_n_free_index] = t_wi;
		m_n_free_index = n_NextIndex(m_n_free_index);
		// add work-item to the queue

		return m_buffer_access_mutex.Unlock() && m_full_count_sema.Signal();
		// unlock buffer access mutex and signal the "fill" semaphore
	}

	/**
	 *	@brief gets a single work-item from the queue
	 *	@param[out] r_t_wi is place for the work-item to be written
	 *	@return Returns true on success, false if there's no more work (call b_Finished() to determine the cause)
	 *		or on failure (shouldn't really happen, provided the queue was successfuly initialized and hasn't been deleted).
	 *	@note Returns immediately if the queue is not empty, or blocks until a producer calls Put() and makes some work to do.
	 */
	bool Get(TWorkItem &r_t_wi)
	{
		if(!m_full_count_sema.Wait())
			return false;
		// enter the "fill" semaphore

		if(!m_buffer_access_mutex.Lock())
			return false;
		// lock buffer access mutex

		if(m_b_finished && b_Empty()) {
			m_buffer_access_mutex.Unlock();
			m_full_count_sema.Signal();
			// signal the "fill" semaphore in case there is more consumers accessing the queue to free the next one
			return false;
		}
		// handle finished state

		_ASSERTE(m_n_full_index >= 0 && m_n_full_index < m_queue_data.size());
		r_t_wi = m_queue_data[m_n_full_index];
		m_n_full_index = n_NextIndex(m_n_full_index);
		// get work-item from the queue

		return m_buffer_access_mutex.Unlock() && m_free_count_sema.Signal();
		// unlock buffer access mutex and signal the "empty" semaphore
	}

	/**
	 *	@brief tries to get a single work-item from the queue, in the specified time
	 *
	 *	@param[out] r_b_received_work_item is the result of the get (set if got one, cleared if timed out)
	 *	@param[out] r_t_wi is place for the work-item to be written
	 *	@param[in] n_timeout_msec is time to put the item in the queue, in milliseconds
	 *
	 *	@return Returns true on success, false on failure (shouldn't really happen, provided the queue
	 *		was successfuly initialized and hasn't been deleted).
	 *
	 *	@note Returns immediately if the queue is not empty, or blocks until either
	 *		a producer calls Put() and enqueues at least one item, or until the timeout is exceeded.
	 */
	bool TimedGet(bool &r_b_received_work_item, TWorkItem &r_t_wi, int n_timeout_msec)
	{
		if(!m_full_count_sema.TimedWait(r_b_received_work_item, n_timeout_msec))
			return false;
		// enter the "fill" semaphore

		if(!m_buffer_access_mutex.Lock()) {
			r_b_received_work_item = false;
			return false;
		}
		// lock buffer access mutex

		if(m_b_finished && b_Empty()) {
			m_buffer_access_mutex.Unlock();
			m_full_count_sema.Signal();
			// signal the "fill" semaphore in case there is more consumers accessing the queue to free the next one
			r_b_received_work_item = false;
			return true;
		}
		// handle finished state

		_ASSERTE(m_n_full_index >= 0 && m_n_full_index < m_queue_data.size());
		r_t_wi = m_queue_data[m_n_full_index];
		m_n_full_index = n_NextIndex(m_n_full_index);
		r_b_received_work_item = true;
		// get work-item from the queue

		return m_buffer_access_mutex.Unlock() && m_free_count_sema.Signal();
		// unlock buffer access mutex and signal the "empty" semaphore
	}

	/**
	 *	@brief tries to get a single work-item from the queue
	 *	@param[out] r_t_wi is place for the work-item to be written
	 *	@return Returns true in case work-item was successfuly obtained, false in case the queue was empty,
	 *		if there's no more work (call b_Finished() to determine the cause), or on failure (shouldn't
	 *		really happen, provided the queue was successfuly initialized and hasn't been deleted).
	 *	@note Always returns immediately.
	 */
	bool TryGet(TWorkItem &r_t_wi)
	{
		if(!m_full_count_sema.TryWait())
			return false;
		// enter the "fill" semaphore

		if(!m_buffer_access_mutex.Lock())
			return false;
		// lock buffer access mutex

		if(m_b_finished && b_Empty()) {
			m_buffer_access_mutex.Unlock();
			m_full_count_sema.Signal();
			// signal the "fill" semaphore in case there is more consumers accessing the queue to free the next one
			return false;
		}
		// handle finished state

		_ASSERTE(m_n_full_index >= 0 && m_n_full_index < m_queue_data.size());
		r_t_wi = m_queue_data[m_n_full_index];
		m_n_full_index = n_NextIndex(m_n_full_index);
		// get work-item from the queue

		return m_buffer_access_mutex.Unlock() && m_free_count_sema.Signal();
		// unlock buffer access mutex and signal the "empty" semaphore
	}

protected:
	/**
	 *	@brief indexing function for a circular buffer
	 *	@param[in] n_index is index of element in the queue
	 *	@return Returns index of the next element in the queue.
	 */
	inline size_t n_NextIndex(size_t n_index) const
	{
		_ASSERTE(n_index < SIZE_MAX);
		return (n_index + 1) % m_n_queue_size;
	}
};

#endif // __LAME_THREADS_INCLUDED
