/*
								+--------------------------------+
								|                                |
								|   ***  Document storage  ***   |
								|                                |
								|  Copyright  -tHE SWINe- 2010  |
								|                                |
								|          DocStorage.h          |
								|                                |
								+--------------------------------+
*/

/**
 *	@file DocStorage.h
 *	@author -tHE SWINe-
 *	@brief Document storage
 *	@date 2010-08-05
 */

#ifndef __DOCUMENT_STORAGE_INCLUDED
#define __DOCUMENT_STORAGE_INCLUDED

#include <vector>
#include <string>
#include <map>

#if !defined(_MSC_VER) || defined(__MWERKS__) || _MSC_VER < 1400
typedef unsigned short wchar_t; /**< @brief wide character data type */
#endif // !_MSC_VER || __MWERKS__ || _MSC_VER < 1400
// visual studio 2008 treats wchar_t as built-in type

/**
 *	@brief simple wrapper of CLucene document data
 */
struct TDocument {
#ifdef __x64__
	typedef uint32_t size_t;
#else //__x64__
	typedef ::size_t size_t;
#endif //__x64__

	std::basic_string<wchar_t> s_name; /**< @brief document filename */
	std::basic_string<wchar_t> s_date; /**< @brief document file last modification date */
	std::map<size_t, size_t> term_frequency_map; /**< @brief table of occurence counts per term (indexed by global term id's) */
	std::vector <size_t> term_position_list; /**< @brief list of global term id's in order as they occur in the document */

	/**
	 *	@brief reads (proprietary) intermediate binary file with documents
	 *
	 *	@param[out] r_terms is filled with unicate terms (ordered by global term id's) upon successful return
	 *	@param[out] r_documents is filled with documents upon successful return
	 *	@param[in] p_s_infile is input file name
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool Read(std::vector<std::basic_string<wchar_t> > &r_terms,
		std::vector<TDocument> &r_documents, const char *p_s_infile);

protected:
	/**
	 *	@brief reads wide string from a file
	 *
	 *	@param[out] r_s_dest is destination for read string
	 *	@param[in] p_fr is input file
	 *
	 *	@return Returns true on success, false on failure.
	 */
	static bool ReadWString(std::basic_string<wchar_t> &r_s_dest, FILE *p_fr);

	/**
	 *	@brief reads document from a file
	 *
	 *	@param[in] p_fr is input file
	 *
	 *	@return Returns true on success, false on failure.
	 */
	bool Read(FILE *p_fr);
};

#endif //__DOCUMENT_STORAGE_INCLUDED
