#include "ImageTools2.h"


float gauss_5x5_kernel[5] = {0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f };
float gauss_7x7_kernel[7] = {0.006f, 0.061f, 0.242f, 0.382f, 0.242f , 0.061f , 0.006f};

 float* user_5x5 = NULL;
 float* user_7x7 = NULL;

float* act_kernel_5x5 = NULL;
float* act_kernel_7x7 = NULL;

 void image_down_5x5(float* src , int w_src , int h_src , float* dst , KERNEL kernel )
{ 
    float*  t_arr[5];
    float*  wrows[3];
    float*  src_img;
    int     offset = 5;
    int     offset_arr[5];
    int     y_up_bound = h_src-2;
    int     x, y;
    int     back_shift = w_src % 2;

    if ( kernel == USER_5x5 ) {
        act_kernel_5x5 = user_5x5;
    }
    else {
        act_kernel_5x5 = (float*)gauss_5x5_kernel;
    }

    (*t_arr) = (float*)malloc(sizeof(float)*w_src*5);
    for ( x = 1; x < 5; x++ ) {
            t_arr[x] = t_arr[x-1] + w_src;
    }
    for ( x = 0; x < 3; x++ ) {
            wrows[x] = src + x*w_src;
    }
    
    
    /*1 4 6 4 1*/
    *(t_arr[0]) = wrows[0][2]*act_kernel_5x5[0] + wrows[0][1]*act_kernel_5x5[1] +wrows[0][1]*act_kernel_5x5[3] + wrows[0][0]*act_kernel_5x5[2] + wrows[0][2]*act_kernel_5x5[4];
    *(t_arr[1]) = wrows[1][2]*act_kernel_5x5[0] + wrows[1][1]*act_kernel_5x5[1] +wrows[1][1]*act_kernel_5x5[3] + wrows[1][0]*act_kernel_5x5[2] + wrows[1][2]*act_kernel_5x5[4];
    /**(t_arr[0]+1) = wrows[0][1]*act_kernel_5x5[0] + wrows[0][0]*act_kernel_5x5[1] +wrows[0][2]*act_kernel_5x5[3] + wrows[0][1]*act_kernel_5x5[2] + wrows[0][3]*act_kernel_5x5[4];
    *(t_arr[1]+1) = wrows[1][1]*act_kernel_5x5[0] + wrows[1][0]*act_kernel_5x5[1] +wrows[1][2]*act_kernel_5x5[3] + wrows[1][1]*act_kernel_5x5[2] + wrows[1][3]*act_kernel_5x5[4];
    *(t_arr[2]+ x) = wrows[2][x-2] + (wrows[2][x-1]+wrows[2][x+1])*4.0f + wrows[2][x]*6.0f + wrows[2][x+2];
    *(t_arr[3]+ x) = wrows[3][x-2] + (wrows[3][x-1]+wrows[3][x+1])*4.0f + wrows[3][x]*6.0f + wrows[3][x+2];
    *(t_arr[4]+ x) = wrows[4][x-2] + (wrows[4][x-1]+wrows[4][x+1])*4.0f + wrows[4][x]*6.0f + wrows[4][x+2];*/
    
    for ( x = 2; x < w_src - 2; x+=2 ) {
        /*1 4 6 4 1*/
        *(t_arr[0]+ x) = wrows[0][x-2]*act_kernel_5x5[0] + wrows[0][x-1]*act_kernel_5x5[1] +wrows[0][x+1]*act_kernel_5x5[3] + wrows[0][x]*act_kernel_5x5[2] + wrows[0][x+2]*act_kernel_5x5[4];
        *(t_arr[1]+ x) = wrows[1][x-2]*act_kernel_5x5[0] + wrows[1][x-1]*act_kernel_5x5[1] +wrows[1][x+1]*act_kernel_5x5[3] + wrows[1][x]*act_kernel_5x5[2] + wrows[1][x+2]*act_kernel_5x5[4];
        /**(t_arr[2]+ x) = wrows[2][x-2] + (wrows[2][x-1]+wrows[2][x+1])*4.0f + wrows[2][x]*6.0f + wrows[2][x+2];
        *(t_arr[3]+ x) = wrows[3][x-2] + (wrows[3][x-1]+wrows[3][x+1])*4.0f + wrows[3][x]*6.0f + wrows[3][x+2];
        *(t_arr[4]+ x) = wrows[4][x-2] + (wrows[4][x-1]+wrows[4][x+1])*4.0f + wrows[4][x]*6.0f + wrows[4][x+2];*/
    }
    if ( (w_src-2)%2==0) {
        *(t_arr[0]+w_src-2) = wrows[0][w_src-4]*act_kernel_5x5[0] + wrows[0][w_src-3]*act_kernel_5x5[1]+wrows[0][w_src-1]*act_kernel_5x5[3] + wrows[0][w_src-2]*act_kernel_5x5[2] + wrows[0][w_src-1]*act_kernel_5x5[4];
        *(t_arr[1]+w_src-2) = wrows[1][w_src-4]*act_kernel_5x5[0] + wrows[1][w_src-3]*act_kernel_5x5[1]+wrows[1][w_src-1]*act_kernel_5x5[3] + wrows[1][w_src-2]*act_kernel_5x5[2] + wrows[1][w_src-2]*act_kernel_5x5[4];
    }
    else {
        *(t_arr[0]+w_src-1) = wrows[0][w_src-3]*act_kernel_5x5[0] + wrows[0][w_src-2]*act_kernel_5x5[1]+wrows[0][w_src-2]*act_kernel_5x5[3] + wrows[0][w_src-1]*act_kernel_5x5[2] + wrows[0][w_src-3]*act_kernel_5x5[4];
        *(t_arr[1]+w_src-1) = wrows[1][w_src-3]*act_kernel_5x5[0] + wrows[1][w_src-2]*act_kernel_5x5[1]+wrows[1][w_src-2]*act_kernel_5x5[3] + wrows[1][w_src-1]*act_kernel_5x5[2] + wrows[1][w_src-3]*act_kernel_5x5[4];
    }

    
    src_img = wrows[2];
    
    for ( y = 0; y < y_up_bound; y++ ) { 
        int ta_ind = (y+2)%5;
        int write_src = y%2==0;

        if ( y == 0 ) {
            offset_arr[0] = 2;
            offset_arr[1] = 1;
            offset_arr[2] = 0;
            offset_arr[3] = 1;
            offset_arr[4] = 2;
        }
        else {
            offset_arr[0] = (offset-2)%5;
            offset_arr[1] = (offset-1)%5;
            offset_arr[2] = (offset)%5;
            offset_arr[3] = (offset+1)%5;
            offset_arr[4] = (offset+2)%5;
        }
        
        
        
        
        *(t_arr[ta_ind]) = src_img[2]*act_kernel_5x5[0] + src_img[1]*act_kernel_5x5[1]+src_img[1]*act_kernel_5x5[3] + src_img[0]*act_kernel_5x5[2] + src_img[2]*act_kernel_5x5[4];
        if ( write_src ) {
            *dst = (t_arr[offset_arr[0]][0]*act_kernel_5x5[0] 
                            + t_arr[offset_arr[1]][0]*act_kernel_5x5[1]
                            + t_arr[offset_arr[2]][0]*act_kernel_5x5[2]
                            + t_arr[offset_arr[3]][0]*act_kernel_5x5[3]
                            + t_arr[offset_arr[4]][0]*act_kernel_5x5[4]);
            dst++;
        }
           
        
        for ( x = 2; x < w_src - 2; x+=2 ) {
            *(t_arr[ta_ind]+ x) = src_img[x-2]*act_kernel_5x5[0] + src_img[x-1]*act_kernel_5x5[1]+src_img[x+1]*act_kernel_5x5[3] + src_img[x]*act_kernel_5x5[2] + src_img[x+2]*act_kernel_5x5[4];
            if ( write_src ) {
                *dst = (t_arr[offset_arr[0]][x]*act_kernel_5x5[0] 
                                + t_arr[offset_arr[1]][x]*act_kernel_5x5[1]
                                + t_arr[offset_arr[2]][x]*act_kernel_5x5[2]
                                + t_arr[offset_arr[3]][x]*act_kernel_5x5[3]
                                + t_arr[offset_arr[4]][x]*act_kernel_5x5[4]);
                dst++;
            }
            
        }
        if ((w_src - 2)%2==0) {
            *(t_arr[ta_ind]+w_src - 2) = src_img[w_src-4]*act_kernel_5x5[0] + src_img[w_src-3]*act_kernel_5x5[1]+src_img[w_src-1]*act_kernel_5x5[3] + src_img[w_src-2]*act_kernel_5x5[2] + src_img[w_src-1]*act_kernel_5x5[4];
            if ( write_src ) {
                *dst = (t_arr[offset_arr[0]][w_src - 2]*act_kernel_5x5[0] 
                                + t_arr[offset_arr[1]][w_src - 2]*act_kernel_5x5[1]
                                + t_arr[offset_arr[2]][w_src - 2]*act_kernel_5x5[2]
                                + t_arr[offset_arr[3]][w_src - 2]*act_kernel_5x5[3]
                                + t_arr[offset_arr[4]][w_src - 2]*act_kernel_5x5[4]);
                dst++;
            }
            
        }
        else {
            *(t_arr[ta_ind]+w_src - 1) = src_img[w_src-3]*act_kernel_5x5[0] + src_img[w_src-2]*act_kernel_5x5[1]+src_img[w_src-2]*act_kernel_5x5[3] + src_img[w_src-1]*act_kernel_5x5[2] + src_img[w_src-3]*act_kernel_5x5[4];
            if ( write_src ) {
                *dst = (t_arr[offset_arr[0]][w_src - 1]*act_kernel_5x5[0] 
                                + t_arr[offset_arr[1]][w_src - 1]*act_kernel_5x5[1]
                                + t_arr[offset_arr[2]][w_src - 1]*act_kernel_5x5[2]
                                + t_arr[offset_arr[3]][w_src - 1]*act_kernel_5x5[3]
                                + t_arr[offset_arr[4]][w_src - 1]*act_kernel_5x5[4]);
                dst++;
            }
            
        }
        offset++;
        src_img += w_src;
        if (back_shift) {
            dst--;
        }
        
    }
    
   
    for ( y = y_up_bound; y < h_src; y++ ) {
        int offset_arr[5];
        int write_src = y%2==0;
        
        if ( y == h_src-1 ) {
            offset_arr[0] = 2;
            offset_arr[1] = 1;
            offset_arr[2] = 0;
            offset_arr[3] = 1;
            offset_arr[4] = 2;
        }
        else {
            offset_arr[0] = 1;
            offset_arr[1] = 0;
            offset_arr[2] = 1;
            offset_arr[3] = 2;
            offset_arr[4] = 3;
        }

        for ( x = 0; x < w_src; x+=2 ) { 
            if ( write_src ) {
                *dst = (t_arr[offset_arr[0]][x]*act_kernel_5x5[0] 
                            + t_arr[offset_arr[1]][x]*act_kernel_5x5[1]
                            + t_arr[offset_arr[2]][x]*act_kernel_5x5[2]
                            + t_arr[offset_arr[3]][x]*act_kernel_5x5[3]
                            + t_arr[offset_arr[4]][x]*act_kernel_5x5[4]);
                dst++;
            }
            
        }
       if (back_shift) {
            dst--;
        }
        offset++;
    }
    free(*t_arr);
}

 void convolution_5x5( float* src , int w_src , int h_src , float* dst , KERNEL kernel )
{
    float*  t_arr[5];
    float*  wrows[3];
    float*  src_img;
    int     offset = 5;
    int     offset_arr[5];
    int     y_up_bound = h_src-2;
    int     x, y;

    if ( kernel == USER_5x5 ) {
        act_kernel_5x5 = user_5x5;
    }
    else {
        act_kernel_5x5 = (float*)gauss_5x5_kernel;
    }

    (*t_arr) = (float*)malloc(sizeof(float)*w_src*5);
    for (  x = 1; x < 5; x++ ) {
            t_arr[x] = t_arr[x-1] + w_src;
    }
    for ( x = 0; x < 3; x++ ) {
            wrows[x] = src + x*w_src;
    }
    
    
    /*1 4 6 4 1*/
    *(t_arr[0]) = wrows[0][2]*act_kernel_5x5[0] + wrows[0][1]*act_kernel_5x5[1] +wrows[0][1]*act_kernel_5x5[3] + wrows[0][0]*act_kernel_5x5[2] + wrows[0][2]*act_kernel_5x5[4];
    *(t_arr[1]) = wrows[1][2]*act_kernel_5x5[0] + wrows[1][1]*act_kernel_5x5[1] +wrows[1][1]*act_kernel_5x5[3] + wrows[1][0]*act_kernel_5x5[2] + wrows[1][2]*act_kernel_5x5[4];
    *(t_arr[0]+1) = wrows[0][1]*act_kernel_5x5[0] + wrows[0][0]*act_kernel_5x5[1] +wrows[0][2]*act_kernel_5x5[3] + wrows[0][1]*act_kernel_5x5[2] + wrows[0][3]*act_kernel_5x5[4];
    *(t_arr[1]+1) = wrows[1][1]*act_kernel_5x5[0] + wrows[1][0]*act_kernel_5x5[1] +wrows[1][2]*act_kernel_5x5[3] + wrows[1][1]*act_kernel_5x5[2] + wrows[1][3]*act_kernel_5x5[4];
    /**(t_arr[2]+ x) = wrows[2][x-2] + (wrows[2][x-1]+wrows[2][x+1])*4.0f + wrows[2][x]*6.0f + wrows[2][x+2];
    *(t_arr[3]+ x) = wrows[3][x-2] + (wrows[3][x-1]+wrows[3][x+1])*4.0f + wrows[3][x]*6.0f + wrows[3][x+2];
    *(t_arr[4]+ x) = wrows[4][x-2] + (wrows[4][x-1]+wrows[4][x+1])*4.0f + wrows[4][x]*6.0f + wrows[4][x+2];*/
    
    for ( x = 2; x < w_src - 2; x++ ) {
        /*1 4 6 4 1*/
        *(t_arr[0]+ x) = wrows[0][x-2]*act_kernel_5x5[0] + wrows[0][x-1]*act_kernel_5x5[1] +wrows[0][x+1]*act_kernel_5x5[3] + wrows[0][x]*act_kernel_5x5[2] + wrows[0][x+2]*act_kernel_5x5[4];
        *(t_arr[1]+ x) = wrows[1][x-2]*act_kernel_5x5[0] + wrows[1][x-1]*act_kernel_5x5[1] +wrows[1][x+1]*act_kernel_5x5[3] + wrows[1][x]*act_kernel_5x5[2] + wrows[1][x+2]*act_kernel_5x5[4];
        /**(t_arr[2]+ x) = wrows[2][x-2] + (wrows[2][x-1]+wrows[2][x+1])*4.0f + wrows[2][x]*6.0f + wrows[2][x+2];
        *(t_arr[3]+ x) = wrows[3][x-2] + (wrows[3][x-1]+wrows[3][x+1])*4.0f + wrows[3][x]*6.0f + wrows[3][x+2];
        *(t_arr[4]+ x) = wrows[4][x-2] + (wrows[4][x-1]+wrows[4][x+1])*4.0f + wrows[4][x]*6.0f + wrows[4][x+2];*/
    }
    *(t_arr[0]+w_src-2) = wrows[0][w_src-4]*act_kernel_5x5[0] + wrows[0][w_src-3]*act_kernel_5x5[1]+wrows[0][w_src-1]*act_kernel_5x5[3] + wrows[0][w_src-2]*act_kernel_5x5[2] + wrows[0][w_src-1]*act_kernel_5x5[4];
    *(t_arr[1]+w_src-2) = wrows[1][w_src-4]*act_kernel_5x5[0] + wrows[1][w_src-3]*act_kernel_5x5[1]+wrows[1][w_src-1]*act_kernel_5x5[3] + wrows[1][w_src-2]*act_kernel_5x5[2] + wrows[1][w_src-2]*act_kernel_5x5[4];
    *(t_arr[0]+w_src-1) = wrows[0][w_src-3]*act_kernel_5x5[0] + wrows[0][w_src-2]*act_kernel_5x5[1]+wrows[0][w_src-2]*act_kernel_5x5[3] + wrows[0][w_src-1]*act_kernel_5x5[2] + wrows[0][w_src-3]*act_kernel_5x5[4];
    *(t_arr[1]+w_src-1) = wrows[1][w_src-3]*act_kernel_5x5[0] + wrows[1][w_src-2]*act_kernel_5x5[1]+wrows[1][w_src-2]*act_kernel_5x5[3] + wrows[1][w_src-1]*act_kernel_5x5[2] + wrows[1][w_src-3]*act_kernel_5x5[4];

    
    src_img = wrows[2];
    
    for ( y = 0; y < y_up_bound; y++ ) { 
        int ta_ind = (y+2)%5;

        if ( y == 0 ) {
            offset_arr[0] = 2;
            offset_arr[1] = 1;
            offset_arr[2] = 0;
            offset_arr[3] = 1;
            offset_arr[4] = 2;
        }
        else if ( y == 1 ) {
            offset_arr[0] = 1;
            offset_arr[1] = 0;
            offset_arr[2] = 1;
            offset_arr[3] = 2;
            offset_arr[4] = 3;
        }
        else {
            offset_arr[0] = (offset-2)%5;
            offset_arr[1] = (offset-1)%5;
            offset_arr[2] = (offset)%5;
            offset_arr[3] = (offset+1)%5;
            offset_arr[4] = (offset+2)%5;
        }
        
        
        
        
        *(t_arr[ta_ind]) = src_img[2]*act_kernel_5x5[0] + src_img[1]*act_kernel_5x5[1]+src_img[1]*act_kernel_5x5[3] + src_img[0]*act_kernel_5x5[2] + src_img[2]*act_kernel_5x5[4];
        *dst = (t_arr[offset_arr[0]][0]*act_kernel_5x5[0] 
                        + t_arr[offset_arr[1]][0]*act_kernel_5x5[1]
                        + t_arr[offset_arr[2]][0]*act_kernel_5x5[2]
                        + t_arr[offset_arr[3]][0]*act_kernel_5x5[3]
                        + t_arr[offset_arr[4]][0]*act_kernel_5x5[4]);
        ++dst;
        *(t_arr[ta_ind]+1) = src_img[1]*act_kernel_5x5[0] + src_img[0]*act_kernel_5x5[1]+src_img[2]*act_kernel_5x5[3] + src_img[1]*act_kernel_5x5[2] + src_img[3]*act_kernel_5x5[4];
        *dst = (t_arr[offset_arr[0]][1]*act_kernel_5x5[0] 
                        + t_arr[offset_arr[1]][1]*act_kernel_5x5[1]
                        + t_arr[offset_arr[2]][1]*act_kernel_5x5[2]
                        + t_arr[offset_arr[3]][1]*act_kernel_5x5[3]
                        + t_arr[offset_arr[4]][1]*act_kernel_5x5[4]);
        dst++;
        
        for ( x = 2; x < w_src - 2; x++ ) {
            *(t_arr[ta_ind]+ x) = src_img[x-2]*act_kernel_5x5[0] + src_img[x-1]*act_kernel_5x5[1]+src_img[x+1]*act_kernel_5x5[3] + src_img[x]*act_kernel_5x5[2] + src_img[x+2]*act_kernel_5x5[4];
            *dst = (t_arr[offset_arr[0]][x]*act_kernel_5x5[0] 
                            + t_arr[offset_arr[1]][x]*act_kernel_5x5[1]
                            + t_arr[offset_arr[2]][x]*act_kernel_5x5[2]
                            + t_arr[offset_arr[3]][x]*act_kernel_5x5[3]
                            + t_arr[offset_arr[4]][x]*act_kernel_5x5[4]);
            dst++;
        }

        *(t_arr[ta_ind]+w_src - 2) = src_img[w_src-4]*act_kernel_5x5[0] + src_img[w_src-3]*act_kernel_5x5[1]+src_img[w_src-1]*act_kernel_5x5[3] + src_img[w_src-2]*act_kernel_5x5[2] + src_img[w_src-1]*act_kernel_5x5[4];
        *dst = (t_arr[offset_arr[0]][w_src - 2]*act_kernel_5x5[0] 
                        + t_arr[offset_arr[1]][w_src - 2]*act_kernel_5x5[1]
                        + t_arr[offset_arr[2]][w_src - 2]*act_kernel_5x5[2]
                        + t_arr[offset_arr[3]][w_src - 2]*act_kernel_5x5[3]
                        + t_arr[offset_arr[4]][w_src - 2]*act_kernel_5x5[4]);
        dst++;
        *(t_arr[ta_ind]+w_src - 1) = src_img[w_src-3]*act_kernel_5x5[0] + src_img[w_src-2]*act_kernel_5x5[1]+src_img[w_src-2]*act_kernel_5x5[3] + src_img[w_src-1]*act_kernel_5x5[2] + src_img[w_src-3]*act_kernel_5x5[4];
        *dst = (t_arr[offset_arr[0]][w_src - 1]*act_kernel_5x5[0] 
                        + t_arr[offset_arr[1]][w_src - 1]*act_kernel_5x5[1]
                        + t_arr[offset_arr[2]][w_src - 1]*act_kernel_5x5[2]
                        + t_arr[offset_arr[3]][w_src - 1]*act_kernel_5x5[3]
                        + t_arr[offset_arr[4]][w_src - 1]*act_kernel_5x5[4]);
        dst++;
        offset++;
        src_img += w_src;
    }
    
   
    for ( y = y_up_bound; y < h_src; y++ ) {
        int offset_arr[5];
        
        if ( y == h_src-1 ) {
            offset_arr[0] = 2;
            offset_arr[1] = 1;
            offset_arr[2] = 0;
            offset_arr[3] = 1;
            offset_arr[4] = 2;
        }
        else {
            offset_arr[0] = 1;
            offset_arr[1] = 0;
            offset_arr[2] = 1;
            offset_arr[3] = 2;
            offset_arr[4] = 3;
        }

        for ( x = 0; x < w_src; x++ ) { 
            *dst = (t_arr[offset_arr[0]][x]*act_kernel_5x5[0] 
                        + t_arr[offset_arr[1]][x]*act_kernel_5x5[1]
                        + t_arr[offset_arr[2]][x]*act_kernel_5x5[2]
                        + t_arr[offset_arr[3]][x]*act_kernel_5x5[3]
                        + t_arr[offset_arr[4]][x]*act_kernel_5x5[4]);
            dst++;
        }
       
        offset++;
    }
    free(*t_arr);
}



#define GAUSS_7x7_IREGULAR_LEFT(arr,x_3,x_2,x_1,x0,x1,x2,x3)\
    ( (arr[abs(x_3)]**act_kernel_7x7) \
    + (arr[abs(x_2)]**(act_kernel_7x7+1)) \
    + (arr[abs(x_1)]**(act_kernel_7x7+2)) \
    + (arr[abs(x0)])**(act_kernel_7x7+3) \
    + (arr[abs(x1)])**(act_kernel_7x7+4) \
    + (arr[abs(x2)])**(act_kernel_7x7+5) \
    + (arr[abs(x3)]**(act_kernel_7x7+6)) )

#define GAUSS_7x7_IREGULAR_BOTTOM(arr, xdst ,x_3,x_2,x_1,x0,x1,x2,x3)\
    ( (arr[abs(x_3)][xdst]**act_kernel_7x7) \
    + (arr[abs(x_2)][xdst]**(act_kernel_7x7+1)) \
    + (arr[abs(x_1)][xdst]**(act_kernel_7x7+2)) \
    + (arr[abs(x0)][xdst])**(act_kernel_7x7+3) \
    + (arr[abs(x1)][xdst])**(act_kernel_7x7+4) \
    + (arr[abs(x2)][xdst])**(act_kernel_7x7+5) \
    + (arr[abs(x3)][xdst]**(act_kernel_7x7+6)) )

#define GAUSS_7x7_IREGULAR_RIGHT(arr, max_ind ,x_3,x_2,x_1,x0,x1,x2,x3)\
    ( (arr[(x_3)>max_ind ?max_ind -(x_3)%max_ind : (x_3)]**act_kernel_7x7) \
    + (arr[(x_2)>max_ind ?max_ind -(x_2)%max_ind : (x_2)]**(act_kernel_7x7+1)) \
    + (arr[(x_1)>max_ind ?max_ind -(x_1)%max_ind : (x_1)]**(act_kernel_7x7+2)) \
    + (arr[(x0)>max_ind ?max_ind -(x0)%max_ind : (x0)])**(act_kernel_7x7+3) \
    + (arr[(x1)>max_ind ?max_ind -(x1)%max_ind : (x1)])**(act_kernel_7x7+4) \
    + (arr[(x2)>max_ind ?max_ind -(x2)%max_ind : (x2)])**(act_kernel_7x7+5) \
    + (arr[(x3)>max_ind ?max_ind -(x3)%max_ind : (x3)]**(act_kernel_7x7+6)) )

#define GAUSS_7x7_REGULAR(arr,x_3,x_2,x_1,x0,x1,x2,x3)\
    ( (arr[(x_3)]**(act_kernel_7x7)) \
    + (arr[(x_2)]**(act_kernel_7x7+1)) \
    + (arr[(x_1)]**(act_kernel_7x7+2)) \
    + (arr[(x0)])**(act_kernel_7x7+3) \
    + (arr[(x1)])**(act_kernel_7x7+4) \
    + (arr[(x2)])**(act_kernel_7x7+5) \
    + (arr[(x3)]**(act_kernel_7x7+6)) )
#define GAUSS_7x7_REGULAR_Y(arr,xdst,x_3,x_2,x_1,x0,x1,x2,x3)\
    ( (arr[(x_3)][xdst]**(act_kernel_7x7)) \
    + (arr[(x_2)][xdst]**(act_kernel_7x7+1)) \
    + (arr[(x_1)][xdst]**(act_kernel_7x7+2)) \
    + (arr[(x0)][xdst])**(act_kernel_7x7+3) \
    + (arr[(x1)][xdst])**(act_kernel_7x7+4) \
    + (arr[(x2)][xdst])**(act_kernel_7x7+5) \
    + (arr[(x3)][xdst]**(act_kernel_7x7+6)) )

 void image_down_7x7( float* src , int w_src , int h_src , float* dst , KERNEL kernel )
{
    float*  t_arr[7];
    float*  wrows[7];
    float*  src_img;
    /*int     offset = 5;*/
    /*int     offset_arr[5];*/
    /*int     y_up_bound = h_src-3;*/
    int     x, y;
    float*  cyrcle_buff;
    int back_shift = w_src % 2;

    if ( kernel == USER_7x7 ) {
        act_kernel_7x7 = user_7x7;
    }
    else {
        act_kernel_7x7 = (float*)gauss_7x7_kernel;
    }

    cyrcle_buff = (*t_arr) = (float*)malloc(sizeof(float)*w_src*7);

    for ( x = 1; x < 7; x++ ) {
            t_arr[x] = t_arr[x-1] + w_src;
    }
    for ( x = 0; x < 7; x++ ) {
            wrows[x] = src + x*w_src;
    }
    
    
    *(t_arr[0]) = GAUSS_7x7_IREGULAR_LEFT(wrows[0],-3,-2,-1,0,1,2,3);
    *(t_arr[1]) = GAUSS_7x7_IREGULAR_LEFT(wrows[1],-3,-2,-1,0,1,2,3);
    *(t_arr[2]) = GAUSS_7x7_IREGULAR_LEFT(wrows[2],-3,-2,-1,0,1,2,3);

    /**(t_arr[0]+1) = GAUSS_7x7_IREGULAR_LEFT(wrows[0],-2,-1,0,1,2,3,4);
    *(t_arr[1]+1) = GAUSS_7x7_IREGULAR_LEFT(wrows[1],-2,-1,0,1,2,3,4);
    *(t_arr[2]+1) = GAUSS_7x7_IREGULAR_LEFT(wrows[2],-2,-1,0,1,2,3,4);*/

    *(t_arr[0]+2) = GAUSS_7x7_IREGULAR_LEFT(wrows[0],-1,0,1,2,3,4,5);
    *(t_arr[1]+2) = GAUSS_7x7_IREGULAR_LEFT(wrows[1],-1,0,1,2,3,4,5);
    *(t_arr[2]+2) = GAUSS_7x7_IREGULAR_LEFT(wrows[2],-1,0,1,2,3,4,5);
    
   
    for ( x = 3; x < w_src - 3; x++ ) {
        
        *(t_arr[0]+ x) = GAUSS_7x7_REGULAR(wrows[0],x-3,x-2,x-1,x,x+1,x+2,x+3);
        *(t_arr[1]+ x) = GAUSS_7x7_REGULAR(wrows[1],x-3,x-2,x-1,x,x+1,x+2,x+3);
        *(t_arr[2]+ x) = GAUSS_7x7_REGULAR(wrows[2],x-3,x-2,x-1,x,x+1,x+2,x+3);
        
    }
    int max_x = w_src-1;
    if ( (w_src-3)%2==0) {
        *(t_arr[0]+w_src-3) = GAUSS_7x7_IREGULAR_RIGHT(wrows[0],max_x,max_x-5,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1);
        *(t_arr[1]+w_src-3) = GAUSS_7x7_IREGULAR_RIGHT(wrows[1],max_x,max_x-5,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1);
        *(t_arr[2]+w_src-3) = GAUSS_7x7_IREGULAR_RIGHT(wrows[2],max_x,max_x-5,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1);

        *(t_arr[0]+w_src-1) = GAUSS_7x7_IREGULAR_RIGHT(wrows[0],max_x,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2,max_x+3);
        *(t_arr[1]+w_src-1) = GAUSS_7x7_IREGULAR_RIGHT(wrows[1],max_x,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2,max_x+3);
        *(t_arr[2]+w_src-1) = GAUSS_7x7_IREGULAR_RIGHT(wrows[2],max_x,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2,max_x+3);
    }
    else {
        *(t_arr[0]+w_src-2) = GAUSS_7x7_IREGULAR_RIGHT(wrows[0],max_x,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2);
        *(t_arr[1]+w_src-2) = GAUSS_7x7_IREGULAR_RIGHT(wrows[1],max_x,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2);
        *(t_arr[2]+w_src-2) = GAUSS_7x7_IREGULAR_RIGHT(wrows[2],max_x,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2);
    }

    
    src_img = wrows[3];
    
    for ( y = 0; y < h_src; y++ ) { 
        float *cbuff;
        int i;
        int write_result = y%2==0;

        for ( x = 0; x < 3; x+=2 ) {
            *(t_arr[3] + x) = GAUSS_7x7_IREGULAR_LEFT(wrows[3],x-3,x-2,x-1,x,x+1,x+2,x+3);
            if ( write_result ) {
                if (y<3) {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,2,1,0,1,2,3);
                                }
                else if (y>h_src-3) {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,2,3,4,6,4,3,2);
                }
                else {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,0,1,2);
                }
                dst++;
            }
            
        }
        
        for ( x = 4; x < w_src - 3; x+=2 ) {
            *(t_arr[3]+ x) = GAUSS_7x7_REGULAR(wrows[3],x-3,x-2,x-1,x,x+1,x+2,x+3);
            if ( write_result ) {
                if (y<3 ) {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,2,1,0,1,2,3);
                    
                }
                else if (y>h_src-4) {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,5,4,3);
                    
                }
                else {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,0,1,2);
                }
                dst++;
            }
        }

        for (; x < w_src; x+=2 ) {
            max_x = w_src-1;
            *(t_arr[3]+ x) = GAUSS_7x7_IREGULAR_RIGHT(wrows[3],max_x,x-3,x-2,x-1,x,x+1,x+2,x+3);
            if ( write_result ) {
                if (y<3 ) {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,2,1,0,1,2,3);
                }
                else if (y>h_src-4) {
                    *dst=GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,5,4,3);
                }
                else {
                    *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,0,1,2);
                }
                dst++;
            }
            
        }
        cbuff = t_arr[0];
        for ( i = 0; i < 6; i++ ) {
            t_arr[i] = t_arr[i+1];
        }
        t_arr[6] = cbuff; 
        if (y<h_src-4) {
            wrows[3] += w_src;
            
            
        }
        if (back_shift) {
            dst--;
        }

        
    }
    
   
   
    free(cyrcle_buff);
}


 void convolution_7x7( float* src , int w_src , int h_src , float* dst , KERNEL kernel )
{
    float*  t_arr[7];
    float*  wrows[7];
    float*  src_img;
    /*int     offset = 5;*/
    /*int     offset_arr[5];*/
    /*int     y_up_bound = h_src-3;*/
    int     x, y;
    float*  cyrcle_buff;

    if ( kernel == USER_7x7 ) {
        act_kernel_7x7 = user_7x7;
    }
    else {
        act_kernel_7x7 = (float*)gauss_7x7_kernel;
    }

    cyrcle_buff = (*t_arr) = (float*)malloc(sizeof(float)*w_src*7);

    for ( x = 1; x < 7; x++ ) {
            t_arr[x] = t_arr[x-1] + w_src;
    }
    for ( x = 0; x < 7; x++ ) {
            wrows[x] = src + x*w_src;
    }
    
    
    *(t_arr[0]) = GAUSS_7x7_IREGULAR_LEFT(wrows[0],-3,-2,-1,0,1,2,3);
    *(t_arr[1]) = GAUSS_7x7_IREGULAR_LEFT(wrows[1],-3,-2,-1,0,1,2,3);
    *(t_arr[2]) = GAUSS_7x7_IREGULAR_LEFT(wrows[2],-3,-2,-1,0,1,2,3);

    *(t_arr[0]+1) = GAUSS_7x7_IREGULAR_LEFT(wrows[0],-2,-1,0,1,2,3,4);
    *(t_arr[1]+1) = GAUSS_7x7_IREGULAR_LEFT(wrows[1],-2,-1,0,1,2,3,4);
    *(t_arr[2]+1) = GAUSS_7x7_IREGULAR_LEFT(wrows[2],-2,-1,0,1,2,3,4);

    *(t_arr[0]+2) = GAUSS_7x7_IREGULAR_LEFT(wrows[0],-1,0,1,2,3,4,5);
    *(t_arr[1]+2) = GAUSS_7x7_IREGULAR_LEFT(wrows[1],-1,0,1,2,3,4,5);
    *(t_arr[2]+2) = GAUSS_7x7_IREGULAR_LEFT(wrows[2],-1,0,1,2,3,4,5);
    
   
    for ( x = 3; x < w_src - 3; x++ ) {
        
        *(t_arr[0]+ x) = GAUSS_7x7_REGULAR(wrows[0],x-3,x-2,x-1,x,x+1,x+2,x+3);
        *(t_arr[1]+ x) = GAUSS_7x7_REGULAR(wrows[1],x-3,x-2,x-1,x,x+1,x+2,x+3);
        *(t_arr[2]+ x) = GAUSS_7x7_REGULAR(wrows[2],x-3,x-2,x-1,x,x+1,x+2,x+3);
        
    }
    int max_x = w_src-1;
    *(t_arr[0]+w_src-3) = GAUSS_7x7_IREGULAR_RIGHT(wrows[0],max_x,max_x-5,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1);
    *(t_arr[1]+w_src-3) = GAUSS_7x7_IREGULAR_RIGHT(wrows[1],max_x,max_x-5,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1);
    *(t_arr[2]+w_src-3) = GAUSS_7x7_IREGULAR_RIGHT(wrows[2],max_x,max_x-5,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1);

    *(t_arr[0]+w_src-2) = GAUSS_7x7_IREGULAR_RIGHT(wrows[0],max_x,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2);
    *(t_arr[1]+w_src-2) = GAUSS_7x7_IREGULAR_RIGHT(wrows[1],max_x,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2);
    *(t_arr[2]+w_src-2) = GAUSS_7x7_IREGULAR_RIGHT(wrows[2],max_x,max_x-4,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2);

    *(t_arr[0]+w_src-1) = GAUSS_7x7_IREGULAR_RIGHT(wrows[0],max_x,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2,max_x+3);
    *(t_arr[1]+w_src-1) = GAUSS_7x7_IREGULAR_RIGHT(wrows[1],max_x,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2,max_x+3);
    *(t_arr[2]+w_src-1) = GAUSS_7x7_IREGULAR_RIGHT(wrows[2],max_x,max_x-3,max_x-2,max_x-1,max_x,max_x+1,max_x+2,max_x+3);

    
    src_img = wrows[3];
    
    for ( y = 0; y < h_src; y++ ) { 
        float *cbuff;
        int i;

        for ( x = 0; x < 3; x++ ) {
            *(t_arr[3] + x) = GAUSS_7x7_IREGULAR_LEFT(wrows[3],x-3,x-2,x-1,x,x+1,x+2,x+3);
            if (y<3 ) {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,2,1,0,1,2,3);
                dst++;
            }
            else if (y>h_src-3) {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,2,3,4,6,4,3,2);
                dst++;
            }
            else {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,0,1,2);
                dst++;
            }
        }
        
        for ( x = 3; x < w_src - 3; x++ ) {
            *(t_arr[3]+ x) = GAUSS_7x7_REGULAR(wrows[3],x-3,x-2,x-1,x,x+1,x+2,x+3);
            if (y<3 ) {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,2,1,0,1,2,3);
                dst++;
            }
            else if (y>h_src-4) {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,5,4,3);
                dst++;
            }
            else {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,0,1,2);
                dst++;
            }
        }

        for ( x = w_src - 3; x < w_src; x++ ) {
            max_x = w_src-1;
            *(t_arr[3]+ x) = GAUSS_7x7_IREGULAR_RIGHT(wrows[3],max_x,x-3,x-2,x-1,x,x+1,x+2,x+3);
            if (y<3 ) {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,2,1,0,1,2,3);
                dst++;
            }
            else if (y>h_src-4) {
                *dst=GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,5,4,3);
                dst++;
                /**(dst-1) = 255.0f;*/
            }
            else {
                *dst= GAUSS_7x7_REGULAR_Y(t_arr,x,3,4,5,6,0,1,2);
                dst++;
                /**(dst-1) = 255.0f;*/
            }
            
        }
        cbuff = t_arr[0];
        for ( i = 0; i < 6; i++ ) {
            t_arr[i] = t_arr[i+1];
        }
        t_arr[6] = cbuff; 
        if (y<h_src-4) {
            wrows[3] += w_src;
            
        }

        
    }
    
   
   
    free(cyrcle_buff);
}

#define SOBEL_3x3_X_FLT(x0,x1,x2,x3,x4,x5,x6,x7,x8) \
    (( (-x0)+(x2)+ 2.0f*( (-x3)+(x5) ) + (-x6)+(x8) ))

#define SOBEL_3x3_Y_FLT(x0,x1,x2,x3,x4,x5,x6,x7,x8) \
    (( (-x0)+(x6)+ 2.0f*( (-x1)+(x7) ) + (-x2)+(x8) ))



#define SOBEL_3x3_X_C_FLT(x0,x1,x2,x3,x4,x5,x6,x7,x8)\
    ((-x0)+(x4))

#define SOBEL_3x3_Y_C_FLT(x0,x1,x2,x3,x4,x5,x6,x7,x8)\
    ((-x1)+(x3))

#define MAX_DIFF(dx,dy) ( fabs(dx)>fabs(dy)? dx : dy )



 void sobel_3x3( float* src , int w_src , int h_src , float* dest)
{
    int x , y;
    float* wrows[3];
    float dx , dy;
    float* dst = dest;

    for ( x = 0; x < 3; x++ ) {
        wrows[x] = src + x*w_src; 
    }
    dx = SOBEL_3x3_X_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    dy = SOBEL_3x3_Y_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    #ifdef TESTING
        *dst = (float)fabs(MAX_DIFF(dx,dy));
    #else
        *dst =(float) MAX_DIFF(dx,dy);
    #endif
    dst++;
    for ( x = 1; x < w_src-1; x++ ) {
        dx = SOBEL_3x3_X_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        dy = SOBEL_3x3_Y_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        #ifdef TESTING
            *dst = (float)fabs(MAX_DIFF(dx,dy));
        #else
            *dst = (float)MAX_DIFF(dx,dy);
        #endif
        dst++;
    }
    *dst = *(dst-1);
    dst = dest + w_src+1;
    for ( y = 1; y < (h_src-1); y++ ) {
        for ( x = 1; x < (w_src-1); x++ ) {
            dx = SOBEL_3x3_X_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
            dy = SOBEL_3x3_Y_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
            
            #ifdef TESTING
                *dst = (float)fabs(MAX_DIFF(dx,dy));
            #else
                *dst = (float)MAX_DIFF(dx,dy);
            #endif
                
            ++wrows[0];
            ++wrows[1];
            ++wrows[2];
            dst++;
        }
        wrows[0] += 2;
        wrows[1] += 2;
        wrows[2] += 2;
        *(dst) =*(dst+1) = 0.0f; 
        dst += 2;
    }
    wrows[0] -= (w_src + 2);
    wrows[1] -= (w_src + 2);
    wrows[2] -= (w_src + 2);
    dx = SOBEL_3x3_X_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    dy = SOBEL_3x3_Y_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    #ifdef TESTING
        *dst = (float)fabs(MAX_DIFF(dx,dy));
    #else
        *dst = (float)MAX_DIFF(dx,dy);
    #endif
    dst++;
    for ( x = 1; x < w_src-1; x++ ) {
        dx = SOBEL_3x3_X_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        dy = SOBEL_3x3_Y_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        #ifdef TESTING
            *dst = (float)fabs(MAX_DIFF(dx,dy));
        #else
            *dst =(float) MAX_DIFF(dx,dy);
        #endif
        dst++;
    }
    *(dst-1) = 0.0f;
      
}


void sobel( float* src , int w_src , int h_src , float* gradx , float* grady)
{
    int x , y;
    float* wrows[3];
    float dx , dy;
    //float* dst = dest;
    float* gx = gradx, *gy = grady; 

    for ( x = 0; x < 3; x++ ) {
        wrows[x] = src + x*w_src; 
    }
    dx = SOBEL_3x3_X_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    dy = SOBEL_3x3_Y_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    #ifdef TESTING
        *gx = (float)fabs(dx);
        *gy = (float)fabs(dy);
    #else
        *gx = (float) dx;
        *gy = (float) dy;
    #endif
    gx++;
    gy++;
    for ( x = 1; x < w_src-1; x++ ) {
        dx = SOBEL_3x3_X_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        dy = SOBEL_3x3_Y_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        #ifdef TESTING
            *gx = (float)fabs(dx);
            *gy = (float)fabs(dy);
         #else
            *gx = (float) dx;
            *gy = (float) dy;
         #endif
         gx++;
         gy++;
    }
    //*dst = *(dst-1);
    //dst = dest + w_src+1;
    gx = gradx + w_src+1;
    gy = grady + w_src+1;
    for ( y = 1; y < (h_src-1); y++ ) {
        for ( x = 1; x < (w_src-1); x++ ) {
            dx = SOBEL_3x3_X_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
            dy = SOBEL_3x3_Y_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
            
            #ifdef TESTING
               *gx = (float)fabs(dx);
               *gy = (float)fabs(dy);
            #else
               *gx = (float) dx;
               *gy = (float) dy;
            #endif
            gx++;
            gy++;
                
            ++wrows[0];
            ++wrows[1];
            ++wrows[2];
            
        }
        wrows[0] += 2;
        wrows[1] += 2;
        wrows[2] += 2;
        gx += 2;
        gy += 2;
    }
    wrows[0] -= (w_src + 2);
    wrows[1] -= (w_src + 2);
    wrows[2] -= (w_src + 2);
    dx = SOBEL_3x3_X_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    dy = SOBEL_3x3_Y_C_FLT(wrows[0][0],wrows[0][1],wrows[0][2],wrows[1][0],wrows[1][1],wrows[1][2],wrows[2][0],wrows[2][1],wrows[2][2]);
    #ifdef TESTING
      *gx = (float)fabs(dx);
      *gy = (float)fabs(dy);
   #else
      *gx = (float) dx;
      *gy = (float) dy;
   #endif
   gx++;
   gy++;
    for ( x = 1; x < w_src-1; x++ ) {
        dx = SOBEL_3x3_X_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        dy = SOBEL_3x3_Y_C_FLT(wrows[0][x],wrows[0][x+1],wrows[0][x+2],wrows[1][x],wrows[1][x+1],wrows[1][x+2],wrows[2][x],wrows[2][x+1],wrows[2][x+2]);
        #ifdef TESTING
            *gx = (float)fabs(dx);
            *gy = (float)fabs(dy);
         #else
            *gx = (float) dx;
            *gy = (float) dy;
         #endif
         gx++;
         gy++;
    }
    //*(dst-1) = 0.0f;

}

