#include "system.h"

static
void load_2x2_fast(
	float *t,
	const struct imageptr_t *data,
	const struct vec2_t local
)
{
	*(t+0) = *imageptr_pixel_const(data, vec2_offset_0_0(local));
	*(t+1) = *imageptr_pixel_const(data, vec2_offset_1_0(local));
	*(t+2) = *imageptr_pixel_const(data, vec2_offset_0_1(local));
	*(t+3) = *imageptr_pixel_const(data, vec2_offset_1_1(local));
}

static
void save_2x2_llband_fast(
	float *t,
	struct imageptr_t *codeblock,
	const struct vec2_t local
)
{
	*imageptr_pixel(codeblock, local) = *(t+0);
}

static
void save_2x2_hlband_fast(
	float *t,
	struct imageptr_t *codeblock,
	const struct vec2_t local
)
{
	*imageptr_pixel(codeblock, local) = *(t+1);
}

static
void save_2x2_lhband_fast(
	float *t,
	struct imageptr_t *codeblock,
	const struct vec2_t local
)
{
	*imageptr_pixel(codeblock, local) = *(t+2);
}

static
void save_2x2_hhband_fast(
	float *t,
	struct imageptr_t *codeblock,
	const struct vec2_t local
)
{
	*imageptr_pixel(codeblock, local) = *(t+3);
}

static
void vert_2x1(
	float *t0,
	float *t1,
	float *buff
)
{
	const float w0 = +CDF97_U2_S;
	const float w1 = +CDF97_P2_S;
	const float w2 = +CDF97_U1_S;
	const float w3 = +CDF97_P1_S;

	float l0, l1, l2, l3;
	float c0, c1, c2, c3;
	float r0, r1, r2, r3;
	float x0, x1;
	float y0, y1;

	l0 = buff[0];
	l1 = buff[1];
	l2 = buff[2];
	l3 = buff[3];

	x0 = *t0;
	x1 = *t1;

	c0 = l1;
	c1 = l2;
	c2 = l3;
	c3 = x0;

	r3 = x1;
	r2 = c3 + w3 * ( l3 + r3 );
	r1 = c2 + w2 * ( l2 + r2 );
	r0 = c1 + w1 * ( l1 + r1 );
	y0 = c0 + w0 * ( l0 + r0 );
	y1 = r0;

	l0 = r0;
	l1 = r1;
	l2 = r2;
	l3 = r3;

	*t0 = y0;
	*t1 = y1;

	buff[0] = l0;
	buff[1] = l1;
	buff[2] = l2;
	buff[3] = l3;
}

/**
 * @brief Scale 2&times;2 block of coefficients.
 */
static
void scale_2x2(
	float *t
)
{
	*(t+0) *= CDF97_S2_S;
	*(t+3) *= CDF97_S1_S;
}

static
void core_2x2(
	float *t,
	float *buffer_x,
	float *buffer_y
)
{
	const int buff_elem_size = 4;

	// horizontally
	vert_2x1(t+0, t+1, buffer_y+0*buff_elem_size);
	vert_2x1(t+2, t+3, buffer_y+1*buff_elem_size);

	// vertically
	vert_2x1(t+0, t+2, buffer_x+0*buff_elem_size);
	vert_2x1(t+1, t+3, buffer_x+1*buff_elem_size);

	// scaling
	scale_2x2(t);
}

static
void load_2x2_limited2(
	float *t,
	const struct imageptr_t *input,
	const struct vec2_t local,
	const struct vec2_t size
)
{
	for(int y = 0; y < 2; y++)
	{
		for(int x = 0; x < 2; x++)
		{
			*(t+y*2+x) = *imageptr_pixel_ext_limited2(input, vec2_add(local, vec2_create(x,y)), size);
		}
	}
}

static
void core_2x2_h(
	float *t,
	float *buffer_x,
	float *buffer_y
)
{
	UNUSED(buffer_x);

	const int buff_elem_size = 4;

	// horizontally
	vert_2x1(t+0, t+1, buffer_y+0*buff_elem_size);
	vert_2x1(t+2, t+3, buffer_y+1*buff_elem_size);
}

void load_core_save_8x8_fast(
	float *buffer_x,
	float *buffer_y,
	const struct imageptr_t *input,
	struct imageptr_t *llband,
	struct imageptr_t *hlband,
	struct imageptr_t *lhband,
	struct imageptr_t *hhband
)
{
	const int buff_elem_size = 4;

	for(int y = 0; y < 8; y += 2)
	{
		for(int x = 0; x < 8; x += 2)
		{
			float t[4];

			load_2x2_fast(
				t,
				input,
				vec2_create(x,y)
			);

			core_2x2(
				t,
				buffer_x + x*buff_elem_size,
				buffer_y + y*buff_elem_size
			);

			save_2x2_llband_fast(
				t,
				llband,
				vec2_create(x>>1,y>>1)
			);

			save_2x2_hlband_fast(
				t,
				hlband,
				vec2_create(x>>1,y>>1)
			);

			save_2x2_lhband_fast(
				t,
				lhband,
				vec2_create(x>>1,y>>1)
			);

			save_2x2_hhband_fast(
				t,
				hhband,
				vec2_create(x>>1,y>>1)
			);
		}
	}
}

void load_core_save_8x8_slow(
	float *buffer_x,
	float *buffer_y,
	const struct imageptr_t *input,
	const struct vec2_t local,
	const struct vec2_t size,
	struct imageptr_t *llband,
	struct imageptr_t *hlband,
	struct imageptr_t *lhband,
	struct imageptr_t *hhband
)
{
	const int buff_elem_size = 4;

	for(int y = 0; y < 8; y += 2)
	{
		for(int x = 0; x < 8; x += 2)
		{
			float t[4];

			// load
			load_2x2_limited2(
				t,
				input,
				vec2_add(local, vec2_create(x,y)),
				size
			);

			// core
			core_2x2(
				t,
				buffer_x + x*buff_elem_size,
				buffer_y + y*buff_elem_size
			);

			save_2x2_llband_fast(
				t,
				llband,
				vec2_create(x>>1,y>>1)
			);

			save_2x2_hlband_fast(
				t,
				hlband,
				vec2_create(x>>1,y>>1)
			);

			save_2x2_lhband_fast(
				t,
				lhband,
				vec2_create(x>>1,y>>1)
			);

			save_2x2_hhband_fast(
				t,
				hhband,
				vec2_create(x>>1,y>>1)
			);
		}
	}
}

void load_core_8x8_honly_fast(
	float *buffer_y,
	const struct imageptr_t *input
)
{
	const int buff_elem_size = 4;

	for(int y = 0; y < 8; y += 2)
	{
		for(int x = 0; x < 8; x += 2)
		{
			float t[4];

			load_2x2_fast(
				t,
				input,
				vec2_create(x,y)
			);

			core_2x2_h(
				t,
				0,
				buffer_y + y*buff_elem_size
			);
		}
	}
}

void load_core_8x8_honly_slow(
	float *buffer_y,
	const struct imageptr_t *input,
	const struct vec2_t local,
	const struct vec2_t size
)
{
	const int buff_elem_size = 4;

	for(int y = 0; y < 8; y += 2)
	{
		for(int x = 0; x < 8; x += 2)
		{
			float t[4];

			// load
			load_2x2_limited2(
				t,
				input,
				vec2_add(local, vec2_create(x,y)),
				size
			);

			// core
			core_2x2_h(
				t,
				0,
				buffer_y + y*buff_elem_size
			);
		}
	}
}

void save_8x8_debug(
	struct imageptr_t *llband,
	struct imageptr_t *hlband,
	struct imageptr_t *lhband,
	struct imageptr_t *hhband,
	struct imageptr_t *debug,
	const struct vec2_t local
)
{
	// store LL
	for(int y = 0; y < 4; y++)
	{
		for(int x = 0; x < 4; x++)
		{
			*imageptr_pixel(debug, vec2_add(local, vec2_create(2*x+0,2*y+0)))
				= *imageptr_pixel_const(llband, vec2_create(x,y));
		}
	}

	// store HL
	for(int y = 0; y < 4; y++)
	{
		for(int x = 0; x < 4; x++)
		{
			*imageptr_pixel(debug, vec2_add(local, vec2_create(2*x+1,2*y+0)))
				= *imageptr_pixel_const(hlband, vec2_create(x,y));
		}
	}

	// store LH
	for(int y = 0; y < 4; y++)
	{
		for(int x = 0; x < 4; x++)
		{
			*imageptr_pixel(debug, vec2_add(local, vec2_create(2*x+0,2*y+1)))
				= *imageptr_pixel_const(lhband, vec2_create(x,y));
		}
	}

	// store HH
	for(int y = 0; y < 4; y++)
	{
		for(int x = 0; x < 4; x++)
		{
			*imageptr_pixel(debug, vec2_add(local, vec2_create(2*x+1,2*y+1)))
				= *imageptr_pixel_const(hhband, vec2_create(x,y));
		}
	}
}
