#include "../UberLame_src/NewFix.h"
#include "../UberLame_src/CallStack.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <vector>
#include <string>
#include "../UberLame_src/Jpeg.h"
#include "../UberLame_src/JpegEnc.h"
#include "../UberLame_src/Dir.h"
#include "../UberLame_src/Timer.h"
#include "../UberLame_src/Tga.h"
#include "../UberLame_src/iface/PNGLoad.h"
#include "../UberLame_src/iface/JpegLoad.h"
#include "../UberLame_src/Tga.h"
#include "Matrix.h"
#ifdef _OPENMP
#include <omp.h> // required by MSVC's manifest tool (wha?)
#ifdef for
#undef for
#endif // for
#endif // _OPENMP
//#include "BilFilFast.h"

/**
 *	@def __SNAPPER_HDR_PROCESSING
 *	@brief if defined, processes cr2 images found in the camera folders, otherwise uses the last jpeg
 */
#define __SNAPPER_HDR_PROCESSING

/**
 *	@def __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
 *	@brief if defined, raw data (data from cr2) are passed through a buffer; otherwise a temp file is created
 */
#define __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY

/**
 *	@def __SNAPPER_USE_EXR
 *	@brief if defined, OpenEXR is used to write output image
 */
#define __SNAPPER_USE_EXR

class CFileList_Listener {
protected:
	std::vector<std::string> &m_r_image_list;

public:
	inline CFileList_Listener(std::vector<std::string> &r_image_list)
		:m_r_image_list(r_image_list)
	{}

	inline void operator ()(const TFileInfo &r_t_file)
	{
		if(r_t_file.b_directory)
			return;
#ifdef __SNAPPER_HDR_PROCESSING
		if(_stricmp(r_t_file.p_s_Extension(), "cr2")) // only raws
#else // __SNAPPER_HDR_PROCESSING
		if(_stricmp(r_t_file.p_s_Extension(), "jpg")) // only jpegs
#endif // __SNAPPER_HDR_PROCESSING
			return;
		// only raws and jpegs

		m_r_image_list.push_back(r_t_file.p_s_Path());
	}
};

#include "Calibration.h"

extern "C" float flash_used, canon_ev, iso_speed, shutter, aperture, focal_len;
extern "C" int dcraw_main(int argc, const char **argv);
#ifdef __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
extern "C" int __drawc_write_inhibit;
extern "C" int __drawc_image_size[2];
extern "C" uint16_t *__drawc_image_pointer;
#endif // __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY

TBmp *p_FloatLoadImage(const std::vector<std::string> &camera_image_list, bool b_verbose) // throw(std::bad_alloc)
{
	std::vector<std::pair<TBmp*, float> > raw_image_list;

	const char *p_arg_list[] = {
		"./dcraw", "-w", "-4", "-W", "-h", /*"-t", "5",*/ camera_image_list.front().c_str(), "[padding]"
	};

	const size_t n_arg_num = sizeof(p_arg_list) / sizeof(p_arg_list[0]) - 1;
	for(size_t i = 0, n = camera_image_list.size(); i < n; ++ i) {
		p_arg_list[n_arg_num - 1] = camera_image_list[i].c_str();
		if(b_verbose)
			printf("\t%s\n", p_arg_list[n_arg_num - 1]);
		shutter = -1;
#if 1
#ifdef __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
		__drawc_write_inhibit = true; // don't write .ppm file
#endif // __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
		dcraw_main(n_arg_num, (const char**)p_arg_list);
#else
#ifdef __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
#error "please undefine __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY"
		// won't work without files
#endif // __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
		const float p_saved_shutter[] = {1.0f/4096, 1.0f/2048, 1.0f/256, 1.0f/32, 1.0f/4, 2.0f};
		shutter = p_saved_shutter[i]; // skip decoding for debugging ... read it from the ppm file
		iso_speed = 100;
		aperture = 11;
#endif
		_ASSERTE(shutter != -1);

#ifdef __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
		{
			if(!__drawc_image_pointer || __drawc_image_size[0] < 0 || __drawc_image_size[1] < 0) {
				fprintf(stderr, "error: failed to open\n");
				return 0;
			}
			int w = __drawc_image_size[0], h = __drawc_image_size[1];
			if(b_verbose) {
				printf("\t\tgot %d x %d image, shutter: 1/%g, ISO %g, aperture f/%g\n",
					w, h, 1 / shutter, iso_speed, aperture);
			}
			TBmp *p_raw_image = new TBmp;
			p_raw_image->n_width = w;
			p_raw_image->n_height = h;
			p_raw_image->b_alpha = false;
			p_raw_image->n_former_bpc = 16;
			p_raw_image->b_grayscale = false;
			p_raw_image->p_buffer = (uint32_t*)__drawc_image_pointer;
			// fill bitmap header

			__drawc_image_size[0] = -1;
			__drawc_image_size[1] = -1;
			__drawc_write_inhibit = false;
			__drawc_image_pointer = 0;
			// reset settings

			raw_image_list.push_back(std::make_pair(p_raw_image,
				shutter * (iso_speed / 100) * (11 / aperture)));
			// add it to the list
		}
#else // __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
		{
			std::string s_filename = camera_image_list[i];
			s_filename.erase(s_filename.rfind('.'));
			s_filename += ".ppm";
			// get output filename // todo - render directly to a buffer

			FILE *p_fr = fopen(s_filename.c_str(), "rb");
			if(!p_fr) {
				fprintf(stderr, "error: failed to open\n");
				return 0;
			}
			int n_colors, w, h, n_maxval;
			fscanf(p_fr, "P%d\n%d %d\n%d\n", &n_colors, &w, &h, &n_maxval);
			if(b_verbose) {
				printf("\t\tgot %d x %d image, shutter: 1/%g, ISO %g, aperture f/%g\n",
					w, h, 1 / shutter, iso_speed, aperture);
			}
			// P6
			// 3178 4770
			// 65535

			_ASSERTE(n_colors == 6);
			_ASSERTE(n_maxval == 65535);

			TBmp *p_raw_image = TBmp::p_Alloc((w * (3 * sizeof(uint16_t)) + sizeof(uint32_t) - 1) / sizeof(uint32_t), h);
			p_raw_image->n_width = w; // hack - want to store RGB 16-bit data
			if(i) {
				_ASSERTE(w == raw_image_list.front().first->n_width);
				_ASSERTE(h == raw_image_list.front().first->n_height);
				// make sure they are same size
			}
			fread(p_raw_image->p_buffer, sizeof(uint16_t), w * h * 3, p_fr);
			if(htons(0x55aa) != 0x55aa) {
				uint16_t *p_data = (uint16_t*)p_raw_image->p_buffer;
				for(size_t i = 0, n = w * h * 3; i < n; ++ i, ++ p_data)
					*p_data = (*p_data << 8) | (*p_data >> 8);
				// could use swab(), but that is undefined behavior if working inplace
			}
			fclose(p_fr);

			raw_image_list.push_back(std::make_pair(p_raw_image,
				shutter * (iso_speed / 100) * (11 / aperture)));
			// add it to the list
		}
		// load list of 48bpp images and associated shutter speeds
#endif // __SNAPPER_PASS_RAW_DATA_THROUGH_MEMORY
	}
	// should write a ppm image

	if(b_verbose)
		printf("\tbuilding HDR ...\n\n");

	int w = raw_image_list.front().first->n_width,
		h = raw_image_list.front().first->n_height;
	float *p_image = new float[size_t(w) * h * 3], f_max = 0;
	{const int n = w * h /** 3*/;
	#pragma omp parallel for
	for(int i = 0; i < n; ++ i) { // for each component ...
		float p_sum[3] = {0}, f_weights = 0;
		for(int j = 0, m = raw_image_list.size(); j < m; ++ j) {
			const TBmp *p_bitmap = raw_image_list[j].first;
			const uint16_t *p_buffer = (const uint16_t*)p_bitmap->p_buffer;
			float f_shutter = raw_image_list[j].second;

			int x = i % h, y = w - 1 - i / h;
			int ii = y + x * w; // rotate 90 CCW

			float k = /*1 /*/ f_shutter;
			float Ir = p_buffer[ii * 3 + 0] / 65535.0f; // linear intensity 0 to 1
			float Ig = p_buffer[ii * 3 + 1] / 65535.0f; // linear intensity 0 to 1
			float Ib = p_buffer[ii * 3 + 2] / 65535.0f; // linear intensity 0 to 1
			float I = (Ir + Ig + Ib) / 3;
			//float I = max(Ir, max(Ig, Ib));
			float f_weight = float((j + 1 == m)? (1 - fabs(I - .5f) * 2) + 1.0f / 65536 :
				((I < .5f)? I * 2 : 1)); // weight function (saw / truncated saw)
			p_sum[0] += f_weight * Ir * k;
			p_sum[1] += f_weight * Ig * k;
			p_sum[2] += f_weight * Ib * k;
			f_weights += f_weight * k;
		}
		if(f_weights == 0) {
			p_image[i * 3 + 0] = 0;
			p_image[i * 3 + 1] = 0;
			p_image[i * 3 + 2] = 0;
		} else {
			p_image[i * 3 + 0] = p_sum[0] / f_weights;
			p_image[i * 3 + 1] = p_sum[1] / f_weights;
			p_image[i * 3 + 2] = p_sum[2] / f_weights;
		}
	}}
	/*for(size_t i = 0, n = size_t(w) * h * 3; i < n; ++ i)
		f_max = max(f_max, p_image[i]);*/ // so the first loop can run in parallel
	// assembe float HDR

	TBmp *p_bmp_image = new TBmp;
	p_bmp_image->n_width = h;
	p_bmp_image->n_height = w; // rotate 90 CCW
	p_bmp_image->p_buffer = (uint32_t*)p_image; // hack
	p_bmp_image->b_alpha = false;
	p_bmp_image->b_grayscale = false;
	p_bmp_image->n_former_bpc = sizeof(float);
	// make a float image

	for(int j = 0, m = raw_image_list.size(); j < m; ++ j)
		raw_image_list[j].first->Delete();
	raw_image_list.clear(); // no-op
	// cleanup the bitmaps

	//delete[] p_image; // not this one

	return p_bmp_image;
}

#if 0
void HDRDemo()
{
	
		printf("compressing range ...\n");

		/*TFloatBmp Bitmapa;
		{
			Bitmapa.New(w, h, 1);

			float maxI = 0, minI = 1e37f;
			f_max /= 1000;

			float *P = Bitmapa.Pixel(0,0);
			for (int y=0; y<Bitmapa.MaxY; y++) {
				for (int x=0; x<Bitmapa.MaxX; x++,P++) {
					int i = x + w * y;
					p_image[i * 3 + 0] /= f_max;
					p_image[i * 3 + 1] /= f_max;
					p_image[i * 3 + 2] /= f_max;
					float r = p_image[i * 3 + 0];
					float g = p_image[i * 3 + 1];
					float b = p_image[i * 3 + 2];
					float I = 1 + (r + g + b) / 3;
					*P = I;
					maxI = max(maxI, I);
					minI = min(minI, I);
					p_image[i * 3 + 0] /= I;
					p_image[i * 3 + 1] /= I;
					p_image[i * 3 + 2] /= I;
				}
			}
			// put image in bitmap, divide by intensity

			printf("minI: %g, maxI: %g\n", minI, maxI);
			const float DIVLOG2 = 1.0/log(2.0);

			P = Bitmapa.Pixel(0,0);
			for (int y=0; y<Bitmapa.MaxY; y++) {
				for (int x=0; x<Bitmapa.MaxX; x++,P++)
					*P = log( *P ) * DIVLOG2;
			}

			FastBilateralFilter FBF;

			FBF.RadiusFrac = 0.2; // polomer filtru v pomeru k velikosti obrazu
			FBF.RadiusPixels = 0; // a v pixlech. Scita se to, lze i kombinovat

			// tady doporucuje jinou hodnotu Ledda (0.02) a Durand (1.32) Oboje je
			// vztazene k DVOJKOVEMU logaritmu, jestli budes pouzivat prirozeny nebo
			// desitkovy, musi se to prepocitat

			FBF.IntensityRadiusFrac = 0; // Polomer v intenzite - tady jako relativni k rozsahu obrazku
			FBF.IntensityRadius = 0.02;  // tady jako absolutni hodnota, zase se  to secit a jde kombinovat


			// tohle ho spocita
			FBF.ComputeBilateralPrecompile(Bitmapa);
			FBF.ComputeBilateralFinish(Bitmapa);

			// ted je vypocitany filtr v
			// FBF.FilterOutput
			// cist se da jednoduse pres Pixel(x,y), vraci ukazatel na pixel v pameti
			// V tomto pripade je vysledek alokovany, takze flouty lezi tesne za sebou bez ohledu na to
			// jestli je velikost delitelna comkoliv.


			// Tonemapping se dela tak, ze se od puvodniho obrazku odcita ten filtrovany:

			float SilaEfektu = 0.6;  // mezi 0 a 1, 0.8 uz je hone silny
			float Gamma = 1.0 / 2.2;       // pokud chces mit vysledek s gama korekci
			//2.2;

			P = Bitmapa.Pixel(0,0);
			float *POut = FBF.FilterOutput.Pixel(0,0);
			for (int y=0; y<Bitmapa.MaxY; y++) {
				for (int x=0; x<Bitmapa.MaxX; x++,P++,POut++)
				{
					*P = (*P) - SilaEfektu * (*POut);

					*P *= Gamma;       // Dokud jsi v logaritmicke oblasti, je dobre vyuzit toho
					// a aplikovat Gamma korekci, protoze na to staci nasobeni

					*P = pow(2.0f,*P);  // a prevedeme z logaritmu zpatky
				}
			}

			f_max = 0;
			P = Bitmapa.Pixel(0,0);
			for (int y=0; y<Bitmapa.MaxY; y++) {
				for (int x=0; x<Bitmapa.MaxX; x++,P++) {
					int i = x + w * y;
					float I = *P;
					p_image[i * 3 + 0] *= I;
					p_image[i * 3 + 1] *= I;
					p_image[i * 3 + 2] *= I;
					f_max = max(f_max, max(max(p_image[i * 3 + 2], p_image[i * 3 + 1]), p_image[i * 3 + 0]));
				}
			}
			// multiply RGB back by intensity

			// Bitmapa ted obsahuje tonemappovany obrazek

			// pri smazani FBF ze zasobniku by se to samo uvolnilo, jen kdyz chces predcasne uvolnit trochu pameti
			// jinak se da zase klidne pokracovat vypoctem jinyho obrazu i bez uvolneni
			FBF.ComputeBilateralFreeMem();
		}*/

		TBmp *p_bitmap = TBmp::p_Alloc(w, h);
		f_max = 1 / f_max;
		{const int n = w * h;
		#pragma omp parallel for
		for(int i = 0; i < n; ++ i) {
			float _f_r = p_image[i * 3 + 0] * f_max;
			float _f_g = p_image[i * 3 + 1] * f_max;
			float _f_b = p_image[i * 3 + 2] * f_max;
			int f_r = max(0, min(255, /**/pow/**/(_f_r/**/, .3f/**/) * 255));
			int f_g = max(0, min(255, /**/pow/**/(_f_g/**/, .3f/**/) * 255));
			int f_b = max(0, min(255, /**/pow/**/(_f_b/**/, .3f/**/) * 255));
			p_bitmap->p_buffer[i] = 0xff000000U | int(f_r) | (int(f_g) << 8) | (int(f_b) << 16);
		}}
		// "tone map" convert to 8 bit

		printf("saving ...\n");

		CPngCodec::Save_PNG("myHDR_cam1.png", *p_bitmap);

		p_bitmap->Delete();

		return 0;
}
#endif // 0

void ToneMap(TBmp *p_bitmap, const TBmp *p_destf)
{
	int w = p_bitmap->n_width;
	int h = p_bitmap->n_height;
	const float *p_image = (const float*)p_destf->p_buffer;

	float f_max = 0;
	for(size_t i = 0, n = size_t(w) * h * 3; i < n; ++ i)
		f_max = max(f_max, p_image[i]); // so the first loop can run in parallel
	// get maximum

	f_max = 1 / f_max;
	{const int n = w * h;
	#pragma omp parallel for
	for(int i = 0; i < n; ++ i) {
		double _f_r = p_image[i * 3 + 0] * f_max;
		double _f_g = p_image[i * 3 + 1] * f_max;
		double _f_b = p_image[i * 3 + 2] * f_max;
		int f_r = max(0, min(255, int(/**/pow/**/(_f_r/**/, .5/**/) * 255)));
		int f_g = max(0, min(255, int(/**/pow/**/(_f_g/**/, .5/**/) * 255)));
		int f_b = max(0, min(255, int(/**/pow/**/(_f_b/**/, .5/**/) * 255)));
		p_bitmap->p_buffer[i] = 0xff000000U | int(f_r) | (int(f_g) << 8) | (int(f_b) << 16);
	}}
	// "tone map" convert to 8 bit
}

#pragma pack(1)

struct float3 {
	float f[3];
};

#pragma pack()

void DrawRect_F3(float3 *p_buffer, int n_width, int n_height, int n_x0, int n_y0, int n_x1, int n_y1, float3 n_color)
{
	if(n_x0 > n_x1)
		std::swap(n_x0, n_x1);
	if(n_y0 > n_y1)
		std::swap(n_y0, n_y1);
	// make sure it is ordered

	if(n_y0 >= 0 && n_y0 < n_height) {
		_ASSERTE(n_y1 >= 0);
		if(n_y1 < n_height) {
			for(int x = max(0, n_x0); x < min(n_width, n_x1 + 1); ++ x) {
				p_buffer[x + n_y0 * n_width] = n_color;
				p_buffer[x + n_y1 * n_width] = n_color;
			}
			// both are in
		} else {
			for(int x = max(0, n_x0); x < min(n_width, n_x1 + 1); ++ x)
				p_buffer[x + n_y0 * n_width] = n_color;
		}
	} else if(n_y1 >= 0 && n_y1 < n_height) {
		for(int x = max(0, n_x0); x < min(n_width, n_x1 + 1); ++ x)
			p_buffer[x + n_y1 * n_width] = n_color;
	}
	// draw horizontal lines

	if(n_x0 >= 0 && n_x0 < n_width) {
		_ASSERTE(n_x1 >= 0);
		if(n_x1 < n_width) {
			for(int y = max(0, n_y0); y < min(n_height, n_y1 + 1); ++ y) {
				p_buffer[n_x0 + y * n_width] = n_color;
				p_buffer[n_x1 + y * n_width] = n_color;
			}
			// both are in
		} else {
			for(int y = max(0, n_y0); y < min(n_height, n_y1 + 1); ++ y)
				p_buffer[n_x0 + y * n_width] = n_color;
		}
	} else if(n_x1 >= 0 && n_x1 < n_width) {
		for(int y = max(0, n_y0); y < min(n_height, n_y1 + 1); ++ y)
			p_buffer[n_x1 + y * n_width] = n_color;
	}
	// draw vertical lines
}

#if 0
void ColorspaceTest()
{
	TBmp *p_test_image = CTgaCodec::p_Load_TGA("test.tga");
	// BGR(A) image

	CTgaCodec::Save_TGA("test_savetga_ok.tga", *p_test_image, true); // no flip
	CTgaCodec::Save_TGA("test_savetga_flip.tga", *p_test_image, false); // bgr flip
	CPngCodec::Save_PNG("test_savepng_ok.png", *p_test_image, true); // no flip
	CPngCodec::Save_PNG("test_savepng_flip.png", *p_test_image, false); // bgr flip
	CJpegCodec::Save_JPEG("test_savejpg_ok.jpg", *p_test_image, true); // no flip
	CJpegCodec::Save_JPEG("test_savejpg_flip.jpg", *p_test_image, false); // bgr flip
	{
#ifdef __JPEG_ENC_BGR
		FILE *p_fw = fopen("test_savelamejpg_ok.jpg", "wb"); // if BGR, will save ok
#else // __JPEG_ENC_BGR
		FILE *p_fw = fopen("test_savelamejpg_flip.jpg", "wb"); // otherwise will flip
#endif // __JPEG_ENC_BGR
		CJpegEncoder().Encode_Jpeg(p_fw, p_test_image);
		fclose(p_fw);
	}

	for(int y = 0, w = p_test_image->n_width, h = p_test_image->n_height; y < h; ++ y) {
		for(int x = 0; x < w; ++ x) {
			if((x / 10 + y / 10) & 1) {
				p_test_image->p_buffer[x + w * y] &= 0xffffff;
				p_test_image->p_buffer[x + w * y] |= 0x80000000U;
			} else
				p_test_image->p_buffer[x + w * y] |= 0xff000000U;
		}
	}
	p_test_image->b_alpha = true;
	// make alpha

	CTgaCodec::Save_TGA("test_savetgaalpha_ok.tga", *p_test_image, true); // no flip
	CTgaCodec::Save_TGA("test_savetgaalpha_flip.tga", *p_test_image, false); // bgr flip
	CPngCodec::Save_PNG("test_savepngalpha_ok.png", *p_test_image, true); // no flip
	CPngCodec::Save_PNG("test_savepngalpha_flip.png", *p_test_image, false); // bgr flip
	// try with alpha

	p_test_image->b_alpha = false;
	p_test_image->Make_Grayscale();
	CTgaCodec::Save_TGA("test_savetgagray_ok.tga", *p_test_image, true); // no flip
	CTgaCodec::Save_TGA("test_savetgagray_flip.tga", *p_test_image, false); // bgr flip
	CPngCodec::Save_PNG("test_savepnggray_ok.png", *p_test_image, true); // no flip
	CPngCodec::Save_PNG("test_savepnggray_flip.png", *p_test_image, false); // bgr flip
	CJpegCodec::Save_JPEG("test_savejpggray_ok.jpg", *p_test_image, true); // no flip
	CJpegCodec::Save_JPEG("test_savejpggray_flip.jpg", *p_test_image, false); // bgr flip

	for(int y = 0, w = p_test_image->n_width, h = p_test_image->n_height; y < h; ++ y) {
		for(int x = 0; x < w; ++ x) {
			if((x / 10 + y / 10) & 1) {
				p_test_image->p_buffer[x + w * y] &= 0xffffff;
				p_test_image->p_buffer[x + w * y] |= 0x80000000U;
			} else
				p_test_image->p_buffer[x + w * y] |= 0xff000000U;
		}
	}
	p_test_image->b_alpha = true;
	// make alpha
	CTgaCodec::Save_TGA("test_savetgagrayalpha_ok.tga", *p_test_image, true); // no flip
	CTgaCodec::Save_TGA("test_savetgagrayalpha_flip.tga", *p_test_image, false); // bgr flip
	CPngCodec::Save_PNG("test_savepnggrayalpha_ok.png", *p_test_image, true); // no flip
	CPngCodec::Save_PNG("test_savepnggrayalpha_flip.png", *p_test_image, false); // bgr flip
	CJpegCodec::Save_JPEG("test_savejpggrayalpha_ok.jpg", *p_test_image, true); // no flip
	CJpegCodec::Save_JPEG("test_savejpggrayalpha_flip.jpg", *p_test_image, false); // bgr flip

	p_test_image->Delete();

	p_test_image = CPngCodec::p_Load_PNG("test.png");
	CPngCodec::Save_PNG("test_loadpng.png", *p_test_image, true); // no flip
	p_test_image->Delete();
	p_test_image = CPngCodec::p_Load_PNG("test_rgba.png");
	CPngCodec::Save_PNG("test_loadpng_rgba.png", *p_test_image, true); // no flip
	p_test_image->Delete();
	p_test_image = CPngCodec::p_Load_PNG("test_256c.png");
	CPngCodec::Save_PNG("test_loadpng_256c.png", *p_test_image, true); // no flip
	p_test_image->Delete();
	p_test_image = CJpegCodec::p_Load_JPEG("test.jpg");
	CPngCodec::Save_PNG("test_loadjpg.png", *p_test_image, true); // no flip
	p_test_image->Delete();
	p_test_image = p_LoadJpegImage("test.jpg");
	CPngCodec::Save_PNG("test_loadlamejpg.png", *p_test_image, true); // no flip
	p_test_image->Delete();
}
#endif // 0

/**
 *	@brief prints help to stdout
 */
void PrintHelp(const char *p_s_module_name)
{
	printf("use: %s [-h|--help] [-q|--quiet|-v|--verbose]\n\t"
		"[-sb|--skip-brackets N] [-i|--input-path <path-to-cam>]\n"
		"\n"
		"where:\n"
		"\tN is number of bracketed pictures to skip (speeds processing up)\n"
		"\tpath-to-cam is name of folder, containing camera1 - camera4 subfolders\n"
		"\t\t(default is current working directory)\n");
}

#ifdef __SNAPPER_USE_EXR
#include "OpenEXR.h"
#endif // __SNAPPER_USE_EXR

int main(int n_arg_num, const char **p_arg_list)
{
	bool b_verbose = true;
	const char *p_s_path = ".";
	int n_skip_brackets = 0;

	for(int i = 1; i < n_arg_num; ++ i) {
		if(!strcmp(p_arg_list[i], "--help") || !strcmp(p_arg_list[i], "-h")) {
			PrintHelp(*p_arg_list);
			return 0;
		} else if(!strcmp(p_arg_list[i], "--quiet") || !strcmp(p_arg_list[i], "-q")) {
			b_verbose = false;
		} else if(!strcmp(p_arg_list[i], "--verbose") || !strcmp(p_arg_list[i], "-v")) {
			b_verbose = true;
		} else if(i + 1 == n_arg_num) {
			fprintf(stderr, "error: argument \'%s\': missing value or an unknown argument\n", p_arg_list[i]);
			return -1;
		} else if(!strcmp(p_arg_list[i], "--skip-brackets") || !strcmp(p_arg_list[i], "-sb")) {
			n_skip_brackets = max(0, atoi(p_arg_list[++ i]));
			// must be nonnegative number
		} else if(!strcmp(p_arg_list[i], "--input-path") || !strcmp(p_arg_list[i], "-i")) {
			p_s_path = p_arg_list[++ i];
			// load images from this folder
		} else {
			fprintf(stderr, "error: argument \'%s\': an unknown argument\n", p_arg_list[i]);
			return -1;
		}
	}
	// "parse" cmdline

	try {
		/*EXRTest();
		return 0;*/
		/*ColorspaceTest();
		return 0;*/
		/*printf("calibrating ...\n");
		Calibrate();
		return 0;*/

		CTimer t;
		std::vector<std::pair<double, const char*> > time_seg;
		// timing and benchmarking

		const float K[] = {0.250403979510877f, 0.277885983845201f, 0.3071973231629f}, xc = 0, yc = 0;
		// fitted distortion parametrs

		std::vector<std::string> camera_image_list[4];
		if(!CDirTraversal::Traverse((std::string(p_s_path) + "/camera1").c_str(),
		   CFileList_Listener(camera_image_list[0]), false) ||
		   !CDirTraversal::Traverse((std::string(p_s_path) + "/camera2").c_str(),
		   CFileList_Listener(camera_image_list[1]), false) ||
		   !CDirTraversal::Traverse((std::string(p_s_path) + "/camera3").c_str(),
		   CFileList_Listener(camera_image_list[2]), false) ||
		   !CDirTraversal::Traverse((std::string(p_s_path) + "/camera4").c_str(),
		   CFileList_Listener(camera_image_list[3]), false)) {
			fprintf(stderr, "error: can't find camera1 - 4 folders\n");
			return -1;
		}
		for(int i = 0; i < 4; ++ i) {
			std::sort(camera_image_list[i].begin(), camera_image_list[i].end());
			if(camera_image_list[i].empty()) {
#ifndef __SNAPPER_HDR_PROCESSING
				fprintf(stderr, "error: the camera1 - 4 folders are empty (expect .jpg inside)\n");
#else // !__SNAPPER_HDR_PROCESSING
				fprintf(stderr, "error: the camera1 - 4 folders are empty (expect .cr2 inside)\n");
#endif // !__SNAPPER_HDR_PROCESSING
				return -1;
			}
			if(camera_image_list[i].size() <= unsigned(n_skip_brackets)) {
				fprintf(stderr, "error: the camera%d contains only %d files, can't skip %d brackets\n",
					i + 1, camera_image_list[i].size(), n_skip_brackets);
				return -1;
			}
			camera_image_list[i].erase(camera_image_list[i].begin(),
				camera_image_list[i].begin() + n_skip_brackets);
		}
		// get file lists

		time_seg.push_back(std::make_pair(t.f_Time(), "directory lookup"));

		if(b_verbose)
			printf("loading images ...\n");

		TBmp *p_cam[4] = {
#ifndef __SNAPPER_HDR_PROCESSING
			p_LoadJpegImage(camera_image_list[0].back().c_str()),
			p_LoadJpegImage(camera_image_list[1].back().c_str()),
			p_LoadJpegImage(camera_image_list[2].back().c_str()),
			p_LoadJpegImage(camera_image_list[3].back().c_str())
#else // !__SNAPPER_HDR_PROCESSING
			p_FloatLoadImage(camera_image_list[0], b_verbose),
			p_FloatLoadImage(camera_image_list[1], b_verbose),
			p_FloatLoadImage(camera_image_list[2], b_verbose),
			p_FloatLoadImage(camera_image_list[3], b_verbose)
#endif // !__SNAPPER_HDR_PROCESSING
		};
		if(!p_cam[0] || !p_cam[1] || !p_cam[2] || !p_cam[3]) {
			fprintf(stderr, "error: failed to load all four camera images\n");
			return -1;
		}
		// load images // todo - load one at a time, minimize memory use

		time_seg.push_back(std::make_pair(t.f_Time(), "image loading"));

		if(b_verbose)
			printf("processing images ...\n");

		const int sw = p_cam[0]->n_width, sh = p_cam[0]->n_height;

		TBmp *p_corcam[4];
		for(int i = 0; i < 4; ++ i) {
#ifndef __SNAPPER_HDR_PROCESSING
			p_corcam[i] = p_cam[i]->p_Clone(true);
#else // !__SNAPPER_HDR_PROCESSING
			p_corcam[i] = new TBmp;
			*p_corcam[i] = *p_cam[i];
			p_corcam[i]->p_buffer = (uint32_t*)(new float[3 * p_cam[i]->n_width * p_cam[i]->n_height]);
			// contents ignored
#endif // !__SNAPPER_HDR_PROCESSING
		}
		const int ss = max(sw, sh), xoff = (ss - sw) / 2, yoff = (ss - sh) / 2;
		for(int n_pass = 0; n_pass < 2; ++ n_pass) {
			for(int y = 0; y < sh; ++ y) {
				for(int x = 0; x < sw; ++ x) {
					float xd = float(x + xoff) / ss * 2 - 1;
					float yd = float(y + yoff) / ss * 2 - 1;
					// get the coordiantes of the original (distorted) point

					float r = sqrt((xd - xc) * (xd - xc) + (yd - yc) * (yd - yc));
					float r2 = r * r, r4 = r2 * r2, r8 = r4 * r4;
					float xu = (xd - xc) * (1 + K[0] * r2 + K[1] * r4 + K[2] * r8);
					float yu = (yd - yc) * (1 + K[0] * r2 + K[1] * r4 + K[2] * r8);
					//float xu = xd, yu = yd, r; // no corr
					// calculate the corrected point

					float z = .8f; // focal length
					float r0 = sqrt(xu * xu + yu * yu + z * z);
					float r1 = sqrt(yu * yu + z * z);
					float theta = acos(r1 / r0) * ((xu < 0)? -1 : 1);
					float psi = acos(z / r1) * ((yu < 0)? -1 : 1);
					psi *= float(cosh(fabs((theta - .4f) * 0.9))); // fudge factor, compensate for misaligned lenses
					xu = theta * .85f; // stretch vertically in the resulting image
					yu = psi;

					int nxu = /*max*/(/*0, min*/(sw - 1, int((xu * .5f + .5f) * ss - xoff)));
					int nyu = /*max*/(/*0, min*/(sh - 1, int((yu * .5f + .5f) * ss - yoff)));
					// transform back to pixel coords

					if(nxu < 0 || nxu >= sw || nyu < 0 || nyu >= sh)
						continue;

					for(int i = 0; i < 4; ++ i) {
#ifndef __SNAPPER_HDR_PROCESSING
						uint32_t n_src = p_cam[i]->p_buffer[x + y * sw];
						if(n_pass)
							p_corcam[i]->p_buffer[nxu + nyu * sw] = p_cam[i]->p_buffer[x + y * sw]; // need to lerp it
						else
							p_corcam[i]->DrawRect(nxu - 2, nyu - 2, nxu + 2, nyu + 2, n_src); // fake it with "splatting" for now
#else // !__SNAPPER_HDR_PROCESSING
						const float3 *p_src = &((const float3*)p_cam[i]->p_buffer)[x + y * sw];
						float3 *p_dst = &((float3*)p_corcam[i]->p_buffer)[nxu + nyu * sw];
						if(n_pass)
							*p_dst = *p_src;
						else {
							DrawRect_F3((float3*)p_corcam[i]->p_buffer, p_corcam[i]->n_width, p_corcam[i]->n_height,
								nxu - 2, nyu - 2, nxu + 2, nyu + 2, *p_src);
						}
#endif // !__SNAPPER_HDR_PROCESSING
					}
				}
			}
		}

		time_seg.push_back(std::make_pair(t.f_Time(), "image deformation"));

#ifndef __SNAPPER_HDR_PROCESSING
		TBmp *p_dest = TBmp::p_Alloc(sh * 4, sw);
#else // !__SNAPPER_HDR_PROCESSING
		TBmp *p_dest = TBmp::p_Alloc(sh * 4, sw);
		TBmp *p_destf = TBmp::p_Alloc(sh * 4 * sizeof(float3) / sizeof(uint32_t), sw);
		p_destf->n_width = sh;
#endif // !__SNAPPER_HDR_PROCESSING

		for(int i = 0; i < 4; ++ i) {
			_ASSERTE(p_cam[i]->n_width == sw && p_cam[i]->n_height == sh);
			for(int y = 0; y < sw; ++ y) {
				for(int x = 0; x < sh; ++ x) {
#ifndef __SNAPPER_HDR_PROCESSING
					p_dest->p_buffer[x + i * sh + y * (sh * 4)] = p_corcam[i]->p_buffer[y + sw * (sh - 1 - x)];
#else // !__SNAPPER_HDR_PROCESSING
					((float3*)p_destf->p_buffer)[x + i * sh + y * (sh * 4)] =
						((float3*)p_corcam[i]->p_buffer)[y + sw * (sh - 1 - x)];
#endif // !__SNAPPER_HDR_PROCESSING
				}
			}
		}
		// merge the images

		time_seg.push_back(std::make_pair(t.f_Time(), "image stitching"));

#ifdef __SNAPPER_HDR_PROCESSING
		ToneMap(p_dest, p_destf);

		time_seg.push_back(std::make_pair(t.f_Time(), "tone mapping"));
#endif // __SNAPPER_HDR_PROCESSING

		if(b_verbose)
			printf("writing output ...\n");

#ifndef __SNAPPER_HDR_PROCESSING
		//CPngCodec::Save_PNG((std::string(p_s_path) + "/processed.png").c_str(), *p_dest, true);
		CJpegCodec::Save_JPEG((std::string(p_s_path) + "/processed.jpg").c_str(), *p_dest, true);
#else // !__SNAPPER_HDR_PROCESSING
		//CPngCodec::Save_PNG((std::string(p_s_path) + "/processed-hdr.png").c_str(), *p_dest, true);
		CJpegCodec::Save_JPEG((std::string(p_s_path) + "/processed-hdr.jpg").c_str(), *p_dest, true);
#ifdef __SNAPPER_USE_EXR
		EXRWrite((std::string(p_s_path) + "/processed-hdr.exr").c_str(),
			p_destf->n_width, p_destf->n_height, (const float*)p_destf->p_buffer);
#endif // __SNAPPER_USE_EXR
#endif // !__SNAPPER_HDR_PROCESSING
		// save

		time_seg.push_back(std::make_pair(t.f_Time(), "writing result"));

#ifdef __SNAPPER_HDR_PROCESSING
		p_destf->Delete();
#endif // __SNAPPER_HDR_PROCESSING
		p_dest->Delete();
		for(int i = 0; i < 4; ++ i) {
			if(p_cam[i])
				p_cam[i]->Delete();
			if(p_corcam[i])
				p_corcam[i]->Delete();
		}
		// free the memory

		time_seg.push_back(std::make_pair(t.f_Time(), "memory cleanup"));

		if(b_verbose) {
			printf("\ndone. it took " PRItime "\n", PRItimeparams(time_seg.back().first));

			printf("\ndebug:");
			double f_prev_t = 0;
			for(size_t i = 0, n = time_seg.size(); i < n; ++ i) {
				double f_time = time_seg[i].first - f_prev_t;
				f_prev_t = time_seg[i].first;
				const char *p_s_label = time_seg[i].second;
				printf("\tcounter \'%s\': %.5f sec\n", p_s_label, f_time);
			}
		}
	} catch(std::exception &r_exc) {
		fprintf(stderr, "error: uncaught exception: \'%s\'\n", r_exc.what());
		return -1;
	}

	return 0;
}
