/*
								+---------------------------------+
								|                                 |
								|  ***   Tiny jpeg encoder   ***  |
								|                                 |
								|  Copyright   -tHE SWINe- 2006  |
								|                                 |
								|            JpegEnc.h            |
								|                                 |
								+---------------------------------+
*/

#pragma once
#ifndef __JPEG_ENC_INCLUDED
#define __JPEG_ENC_INCLUDED

/**
 *	@file JpegEnc.h
 *	@author -tHE SWINe-
 *	@date 2006
 *	@brief simple jpeg encoder
 *
 *	@date 2007-12-24
 *
 *	improved linux compatibility by adding posix integer types
 *
 *	@date 2008-03-04
 *
 *	using Integer.h header, using CallStack.h instead of crtdbg.h
 *	changed size of zigZag table back to 64 and it's type to int (speed / obfuscation purposes)
 *
 *	@date 2009-05-04
 *
 *	fixed mixed windows / linux line endings
 *
 *	@date 2012-06-19
 *
 *	Moved multiple inclusion guard before file documentation comment.
 *
 */

/*
 *		=== config section ===
 */

/**
 *	@def __JPEG_ENC_VERBOSE
 *
 *	@brief If defined, prints tables, header info, ... (to stdout).
 */
//#define __JPEG_ENC_VERBOSE

/**
 *	@def __JPEG_ENC_ENCODE_VERBOSE
 *
 *	@brief If defined, prints encoding progress to stdout (source image window,
 *		DCT + quantized, RLE compressed).
 */
//#define __JPEG_ENC_ENCODE_VERBOSE

/**
 *	@def __JPEG_ENC_HUFF_ENCODE_VERBOSE
 *
 *	@brief If defined, prints huffman encoding stage progress to
 *		stdout (RLE data + it's huffman representation).
 */
//#define __JPEG_ENC_HUFF_ENCODE_VERBOSE

/**
 *	@def __JPEG_ENC_FAST_CHROMINANCE_SAMPLING
 *
 *	@brief if defined, chrominance will be sampled just from a single pixel
 *
 *	It can save some time when encoding images with 2x2 or 4x4 sampling,
 *	but the quality can be lower, that depends on type of the image.
 *	Resulting code is smaller if enabled, of course.
 */
//#define __JPEG_ENC_FAST_CHROMINANCE_SAMPLING

/**
 *	@def __JPEG_ENC_LOOKUP_CHUFFCODER
 *
 *	@brief uses lookup table for symbol encoding
 *
 *	A bit (cca 9%) faster, but uses some more memory on stack and propably
 *	more memory on heap, code is a few bytes larger).
 */
//#define __JPEG_ENC_LOOKUP_CHUFFCODER

/**
 *	@def __JPEG_ENC_USE_REFERENCE_DCT
 *
 *	@brief enables DCT debugging (do not use with conjunction with
 *		__JPEG_ENC_INTEGER_FDCT, it won't compile).
 */
//#define __JPEG_ENC_USE_REFERENCE_DCT

#ifndef __JPEG_ENC_USE_REFERENCE_DCT
/**
 *	@def __JPEG_ENC_INTEGER_FDCT
 *
 *	@brief use faster integer DCT (on my machine, it's actualy not faster,
 *		but it should be - i guess).
 */
//#define __JPEG_ENC_INTEGER_FDCT
#endif

/**
 *	@def __JPEG_ENC_ENABLE_LOSSLESS
 *
 *	@brief if defined, lossless compression algorithm is also available
 */
#define __JPEG_ENC_ENABLE_LOSSLESS

#ifdef __JPEG_ENC_ENABLE_LOSSLESS
/**
 *	@def __JPEG_ENC_USE_LOSSLESS_NONSTD_RLE
 *
 *	@brief if defined, jpeg lossless algorithm is further enhanced by RLE
 *		compression
 *
 *	@note this is non-standard extension which would propably make std decoder
 *		crash or at least to decode rubbish never use in programs where your jpeg
 *		files are available in standalone-file form so they could be potentialy
 *		copied away! do not breed nor allow anyone to breed non-standard jpegs
 *		away from your app (note there is no record in image file header wheter
 *		this extension was used or not).
 */
#define __JPEG_ENC_USE_LOSSLESS_NONSTD_RLE
#endif

/**
 *	@def __JPEG_ENC_INTEGER_QUANT
 *
 *	@brief Integer quant table coeffs (this actually is faster; lower image quality,
 *		introduces quite a lot of noise; can be used without __JPEG_ENC_INTEGER_FDCT).
 */
//#define __JPEG_ENC_INTEGER_QUANT

/**
 *	@def __JPEG_ENC_WRITE_SINGLE_QUANT_TABLE_PER_DQT_BLOCK
 *
 *	@brief if defined, every quantization table is written into separate data-block
 *
 *	@note: Specs allow both separate and single-block versions, it's here only for
 *		compatibility reasons.
 */
//#define __JPEG_ENC_WRITE_SINGLE_QUANT_TABLE_PER_DQT_BLOCK

/**
 *	@def __JPEG_ENC_WRITE_SINGLE_HUFF_TABLE_PER_DQT_BLOCK
 *
 *	@brief if defined, every huffman table is written into separate data-block
 *
 *	@note: Specs allow both separate and single-block versions, it's here only for
 *		compatibility reasons.
 */
//#define __JPEG_ENC_WRITE_SINGLE_HUFF_TABLE_PER_DQT_BLOCK

/**
 *	@def __JPEG_ENC_ENABLE_CMYK
 *
 *	@brief if defined, it's possible to save jpegs, containing CMYK values instead of Y[CbCr]
 */
//#define __JPEG_ENC_ENABLE_CMYK

/**
 *	@def __JPEG_ENC_ENABLE_YCCK
 *
 *	@brief if defined, it's possible to save jpegs, containing YCCK values instead of Y[CbCr]
 */
//#define __JPEG_ENC_ENABLE_YCCK

/**
 *	@def __JPEG_ENC_BGR
 *
 *	@brief if defined, swap color channels while encoding colors
 */
#define __JPEG_ENC_BGR

/*
 *		=== ~config section ===
 */

#include "Integer.h"

/**
 *	@brief generates quantization tables, based on quality
 */
class CQuantTableFactory {
public:
	/**
	 *	@brief calculates 8-bit quantization table, based on simple formula
	 *
	 *	@param[out] p_output is pointer to array of 64 unsigned bytes where table
	 *		coefficients are to be output (in natural left-to-right top-to-bottom
	 *		order, no zig-zag)
	 *	@param[in] b_luminance controls wheter output table should be used for quantizing
	 *		luminance plane (higher quality) or chrominance plane (lower quality)
	 *	@param[in] f_quality is quality in range 0 - 1.1 (0% - 110%)
	 *
	 */
	static void Calc_QuantTable(uint8_t *p_output, bool b_luminance, float f_quality);
};

#include "Huffman.h"
#include "Bitmap.h"

/**
 *	@brief jpeg encoder class
 */
class CJpegEncoder {
private:
	static const int m_p_zig_indices[64];

	class COutputFile {
	protected:
		FILE *m_p_fw;

	public:
		COutputFile(FILE *p_fw);
		bool Write_Byte(uint8_t n_value);
		bool Write_Short(int16_t n_value);
	};

	class CBitWriter {
	protected:
		uint8_t m_n_byte;
		int m_n_bit_num;

	public:
		CBitWriter();
		bool WriteBit(bool b_bit, COutputFile *p_output_file);
		bool WriteNumber(int n_bit_num, int n_value, COutputFile *p_output_file);
		bool PaddByte(COutputFile *p_output_file);
	};

	struct TQuantTable { // quantization table
		bool b_write; // wheter to write quant table to the file
#ifdef __JPEG_ENC_INTEGER_QUANT
		int p_value[64]; // 16:16 fixed-point
#else
		float p_value[64]; // pre-scaled values for AA&N FDCT
#endif
		uint8_t p_store_value[64]; // original values to be written to file
	} m_p_quant_table[2];

	struct THuffmanTable {
		uint8_t p_code_num[16]; // number of codes for all possible 16 lengths
		uint8_t *p_code[16]; // 16 pointers into p_code_table where the actual data are
		uint8_t p_code_table[256]; // table for all huffman codes
	};

	struct TRLEBlockData {
		struct TRLEPair {
			uint8_t n_code_word;
			// upper nibble is zero num, lower nibble is n_unsigned_coeff bit num
			int16_t n_coeff;

			inline TRLEPair() {}
			inline TRLEPair(int16_t n_x); // set n_code_word to code length (preceding zero num has yet to be shifted << 4 and or-ed)
			static inline uint8_t n_SignificantBit_Num(int16_t n_x); // return number of significant bits in n_x (n_x as-is, no sign-dependent ops involved)
		};
		TRLEPair p_pair[64];
		uint8_t n_pair_num;
	};
	// data of a single compressed block, output by DCT and ready for huffman coding
	// (need to gather all codewords to generate optimized huffman table)

	struct TMacroBlock {
		TRLEBlockData *p_block;
	};
	// macro-block, can contain multiple blocks, pointers point to a single long
	// array -> do not free, free the array

	struct TComponent {
		uint8_t n_component_id;
		uint8_t n_sampling_horiz, n_sampling_vert;
		uint8_t n_scale_horiz, n_scale_vert;
		uint8_t n_quant_table_index;
		uint8_t n_dc_huff_coder_index, n_ac_huff_coder_index;
	};
	// component info (generated from encode color type)

	typedef CHuffmanCoder<16, uint8_t> CJpegHuffCoder; // sick of typing it again and again

	struct TBlockInfo {
		const TComponent *p_component;
		uint8_t n_offset_x, n_offset_y;
		TQuantTable *p_quant_table;
		CJpegHuffCoder *p_ac_huff_coder, *p_dc_huff_coder;
		int *p_ac_huff_code_table, *p_dc_huff_code_table;
	};
	// block info (used to speed-up compression, array with TBlockInfo for every block in
	// macro-block is generated, this array is then looped trough during encoding process)

public:
	/**
	 *	@brief encode color type
	 */
	enum {
		color_Gray,		/**< grayscale (Y = 0.299R + 0.587G + 0.114B, single huffman and quant table) */
		color_RG,		/**< red-green (no color transform, single huffman and quant table, red is sampled as required, green is always sampled 1x1 (i.e. there is potentialy less green samples)) */
		color_RGB		/**< RGB (stored as YCbCr, two huffman and quant tables, one for Y, second for CbCr) */
#ifdef __JPEG_ENC_ENABLE_CMYK
		, color_CMYK	/**< RGB (stored as CMYK, two tables, one for C and K, second for M and Y; note 4-component JPEG uses YCCK) */
#endif
#ifdef __JPEG_ENC_ENABLE_YCCK
		, color_YCCK	/**< RGB (stored as YCbCrK, two tables, one for Y and K, second for Cb and Cr) */
#endif
	};

	/**
	 *	@brief default constructor
	 *
	 *	Initializes encoder with quality setting 1.0.
	 */
	CJpegEncoder();

	/**
	 *	@brief conrols wheter specified quantization table is written
	 *
	 *	@param[in] n_index is zero-based quantization table index (0 or 1)
	 *	@param[in] b_write specifies wheter selected table should be written, or not
	 *
	 *	@note By default, both quantization tables are written (default for RGB images).
	 */
	void Write_QuantTable(int n_index, bool b_write);

	/**
	 *	@brief sets quantization table data
	 *
	 *	@param[in] n_index is zero-based quantization table index (0 or 1)
	 *	@param[in] p_quant_table is pointer to 64 element array, containing quantization coeffs
	 *		  (coming in natural left-to-right, top-to-bottom order, no zig-zag)
	 */
	void Set_QuantTable(int n_index, const uint8_t p_quant_table[64]);

	/**
	 *	@brief encodes jpeg image from bitmap in memory to a file
	 *
	 *	@param[in] p_fw is pointer to destination file (has to be opened for binary
	 *		writing; fwrite() only is used)
	 *	@param[in] p_bitmap is pointer to image to be encoded
	 *	@param[in] n_encode_color is one of color_Gray, color_RG, color_RGB, color_CMYK or color_YCCK,
	 *		determines way of storing color in jpeg file (source color is RGB (A is never used)).
	 *		For color_RG, R has dense 1 x 1 samples, G has samples of size n_color_sampling_horiz x
	 *		n_color_sampling_vert, quant table 0 is used only. color_RGB sets storing color as YCbCr,
	 *		Y has dense samples and uses quant table 0, Cb and Cr has normal samples and both uses
	 *		quant table 1. color_CMYK (valid only if __JPEG_ENC_ENABLE_CMYK is defined) sets storing
	 *		color information as CMYK, C and K has dense samples and uses quant table 0, M and Y has
	 *		normal samples and uses quant table 1. color_YCCK is used for standard 4-component jpegs
	 *		(YCCK means YCbCrK) C and K has dense samples and uses quant table 0, M and Y has normal
	 *		samples and uses quant table 1.
	 *	@param[in] n_color_sampling_horiz is color horizontal sampling (has to be 1, 2 or 4)
	 *	@param[in] n_color_sampling_vert is color vertical sampling (has to be 1, 2 or 4)
	 *
	 *	@return Returns true on success and false on failure (not enough memory for temporal info
	 *		  or not enough space on dest drive).
	 *
	 *	@note Bitmap resolution in jpeg is written as unsigned short so it's limited to 65535 ^ 2.
	 *	@note For n_encode_color = color_Gray, color sampling is forced to 1 x 1 (recommended
	 *		in jpeg specification) and quant table 0 is used only.
	 */
	bool Encode_Jpeg(FILE *p_fw, const TBmp *p_bitmap, int n_encode_color = color_RGB,
		int n_color_sampling_horiz = 2, int n_color_sampling_vert = 2);

#ifdef __JPEG_ENC_ENABLE_LOSSLESS
	/**
	 *	@brief encode lossless jpeg image from bitmap in memory to a file
	 *
	 *	@param[in] p_fw is pointer to destination file (has to be opened for binary
	 *		writing; fwrite() only is used)
	 *	@param[in] p_bitmap is pointer to image to be encoded
	 *	@param[in] n_sample_precission is destination data precission and can be 2 - 12 (but source
	 *		  data are 8 bpp so reasonable range is 2 - 8 bits per sample)
	 *	@param[in] b_separate_entropy decides wheter every component has it's own entropy table
	 *		(true). otherwise color components (if present) share one table and alpha channel
	 *		(if present) is compressed using another one (false)
	 *	@param[in] n_channel_num is number of channels (can be 1 to 4)
	 *	@param[in] n_channel0_index is zero-based index of source RGBA channel
	 *		to be stored in jpeg channel 0 (channel<i> = (n_rgba >> (8 * n_channel<i>_index)) & 0xff)
	 *	@param[in] n_channel1_index is zero-based index of source RGBA channel
	 *		to be stored in jpeg channel 1, it is only used if n_channel_num > 1
	 *	@param[in] n_channel2_index is zero-based index of source RGBA channel
	 *		to be stored in jpeg channel 2, it is only used if n_channel_num > 2
	 *	@param[in] n_channel3_index is zero-based index of source RGBA channel
	 *		to be stored in jpeg channel 3, it is only used if n_channel_num > 3
	 *
	 *	@note Bitmap resolution in jpeg is written as unsigned short so it's limited to 65535 ^ 2.
	 *	@note In case __JPEG_ENC_BGR is defined, index with value 0 is set to 2 and vice-versa.
	 */
	bool Encode_LosslessJpeg(FILE *p_fw, const TBmp *p_bitmap,
		int n_sample_precission = 8, bool b_separate_entropy = false, int n_channel_num = 3,
		int n_channel0_index = 0, int n_channel1_index = 1, int n_channel2_index = 2,
		int n_channel3_index = 3);
#endif

private:
	static void Get_ComponentInfo(int n_encode_color, int n_color_sampling_horiz,
		int n_color_sampling_vert, TComponent *p_component_info,
		int &r_n_component_num, int &r_n_huff_coder_num);
	void Get_BlockInfo(const TComponent *p_component, int n_component_num,
		CJpegHuffCoder *p_huff_coder_base, int *p_huff_code_table_base,
		TBlockInfo *p_block_info, int &r_n_block_num);

	template <class TPixelType, class CColorConvertor>
	static void Get_ImageBlock(TPixelType *p_dest_block, const uint32_t *p_image,
		int n_scanline_width, int n_residual_width, int n_residual_height, int n_sample_width,
		int n_sample_height, const CColorConvertor &r_conv);

	template <class TPixelType>
	static void FDCT_Quantize_Block(int16_t *p_quantized_block,
		const TPixelType *p_image_block, const TQuantTable *p_quant_table);

	static void RLE_Encode_Block(TRLEBlockData &r_block, const int16_t *p_quantized_block);

	static bool Write_JFIFHeader(COutputFile *p_output_file);
	bool Write_QuantTables(COutputFile *p_output_file);
	static bool Write_HuffmanTables(COutputFile *p_output_file,
		const THuffmanTable *p_huffman_table, int n_table_num, bool b_write_dc_only = false);
	static bool Write_FrameHeader(COutputFile *p_output_file, uint16_t n_marker,
		int n_width, int n_height, int n_sample_precission, int n_component_num,
		const TComponent *p_component);
	static bool Write_StartScan(COutputFile *p_output_file, int n_component_num,
		const TComponent *p_component, uint8_t n_start_of_spectrum_predictor = 0,
		uint8_t n_end_of_spectrum = 63);
#ifndef __JPEG_ENC_LOOKUP_CHUFFCODER
	template <class CJpegHuffCoderClass>
	static THuffmanTable t_HuffmanTable(CJpegHuffCoderClass &r_huff_coder);
	static inline bool HuffCompress_Write_BlockData(const TRLEBlockData &r_t_block,
		CJpegHuffCoder *p_cur_dc_huff_coder, CJpegHuffCoder *p_cur_ac_huff_coder,
		CBitWriter &r_bit_writer, COutputFile *p_out_file);
	// sorted-tree symbols variant (sometimes doesn't work, don't know why yet)
#else
	template <class CJpegHuffCoderClass>
	static THuffmanTable t_HuffmanTable(CJpegHuffCoderClass &r_huff_coder, int *p_huff_code_table);
	static inline bool HuffCompress_Write_BlockData(const TRLEBlockData &r_t_block,
		const int *p_cur_dc_huff_code_table, const int *p_cur_ac_huff_code_table,
		CBitWriter &r_bit_writer, COutputFile *p_out_file);
#endif
};

#endif // __JPEG_ENC_INCLUDED
