
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_SIGNED.ALL;

entity Wavelet is
    Generic(
        KERNEL_TYPE  : integer   := 1;
        FRAME_WIDTH  : integer := 1920
    );
    Port ( 
        aclk : in STD_LOGIC;
        resetn : in STD_LOGIC;
        
        s_axis_tdata    : in std_logic_vector(63 downto 0);
        s_axis_tvalid   : in std_logic;
        s_axis_tready   : out std_logic;
        
        m_axis_tdata    : out std_logic_vector(63 downto 0);
        m_axis_tvalid   : out std_logic;
        m_axis_tready   : in std_logic
        
        
        );
end Wavelet;

architecture Behavioral of Wavelet is

component kernel is
    Generic(
       FRAME_WIDTH  : integer := 1920
    );
    Port ( 
    
        aclk : in STD_LOGIC;
        resetn : in STD_LOGIC;
        
        in_00  : in std_logic_vector(15 downto 0);
        in_01  : in std_logic_vector(15 downto 0);
        in_10  : in std_logic_vector(15 downto 0);
        in_11  : in std_logic_vector(15 downto 0);
        in_valid   : std_logic;
        
        out_00  : out std_logic_vector(15 downto 0);
        out_01  : out std_logic_vector(15 downto 0);
        out_10  : out std_logic_vector(15 downto 0);
        out_11  : out std_logic_vector(15 downto 0);
        out_valid  : out std_logic   
           
           );
end component kernel;

component kernel2 is
    Generic(
       FRAME_WIDTH  : integer := 1920
    );
    Port ( 
    
        aclk : in STD_LOGIC;
        resetn : in STD_LOGIC;
        
        in_00  : in std_logic_vector(15 downto 0);
        in_01  : in std_logic_vector(15 downto 0);
        in_10  : in std_logic_vector(15 downto 0);
        in_11  : in std_logic_vector(15 downto 0);
        in_valid   : std_logic;
        
        out_00  : out std_logic_vector(15 downto 0);
        out_01  : out std_logic_vector(15 downto 0);
        out_10  : out std_logic_vector(15 downto 0);
        out_11  : out std_logic_vector(15 downto 0);
        out_valid  : out std_logic   
           
           );
end component kernel2;

type array_linex32b is array (0 to 2047) of std_logic_vector(31 downto 0);
type array_4x16b is array (0 to 3) of std_logic_vector(15 downto 0);

signal ramA : array_linex32b := (others => (others =>'0'));
signal ramB : array_linex32b := (others => (others =>'0'));
signal ram_in_pixel : integer range 0 to 511 := 0;
signal ram_in_line  : integer range 0 to 1079 := 0;
signal ram_out_pixel: integer range 0 to 1023 := 0;
signal ram_out_line : integer range 0 to 1079 := 0;

signal ramA_out : std_logic_vector(31 downto 0);
signal ramB_out : std_logic_vector(31 downto 0);

signal ram_data : array_4x16b;
signal kernel_data : array_4x16b := (others => (others =>'0'));
signal out_data : array_4x16b;

signal valid    : std_logic;
signal kernel_valid : std_logic;
signal out_valid: std_logic;

signal ramA_addr    : integer;
signal ramB_addr    : integer;

signal next_ram_out_pixel   : std_logic;

type tstate is (st0, st1, st2);
signal state    : tstate;


begin

sync: process(aclk)
begin
    if aclk'event and aclk = '1' then
        if resetn = '0' then
            ram_in_pixel <= 0;
            ram_in_line  <= 0;
            state <= st0;
            ram_out_pixel <= 0;
        else 
            case state is
                when st0 =>
                    if ram_in_line = 3 and ram_in_pixel = 481 then
                        state <= st1;
                    end if;
                    ram_out_line <= 2;
                    valid <= '0';
                when st1 =>
                    if ram_out_pixel = 963 then
                        ram_out_pixel <= 0;                            
                        if ram_out_line = 0 then
                            state <= st2;
                            ram_out_line <= 1;
                        else
                            ram_out_line <= 0;
                        end if;
                    else                     
                        ram_out_pixel <= ram_out_pixel + 1;
                    end if;
                    valid <= '1';
                when st2 =>
                    if ram_out_pixel = 963 then
                        ram_out_pixel <= 0;
                        ram_out_line <= ram_out_line + 2;                       
                    else                     
                        ram_out_pixel <= ram_out_pixel + 1;
                    end if;
                    valid <= '1';          
            end case;
             
            if s_axis_tvalid = '1' then
                if ram_in_pixel = 481 then
                    ram_in_pixel <= 0; 
                    ram_in_line <= ram_in_line + 1;                   
                else
                    ram_in_pixel <= ram_in_pixel + 1;                    
                end if;  
            end if;
            
            if (ram_out_pixel mod 2) = 0 then
                next_ram_out_pixel <= '0';
            else
                next_ram_out_pixel <= '1';
            end if;
        end if;
    end if;
end process;


out_proc: process(state, ram_data, kernel_valid, ram_out_pixel, next_ram_out_pixel, ramA_out, ramB_out)
begin

    out_valid <= '0';
    
    case state is
        when st0 =>
            s_axis_tready <= '1';
        when st1 =>            
            s_axis_tready <= '0';
            if next_ram_out_pixel = '0' then 
                kernel_data(0) <= ramB_out(15 downto 0);
                kernel_data(1) <= ramB_out(31 downto 16);
                kernel_data(2) <= ramA_out(15 downto 0);
                kernel_data(3) <= ramA_out(31 downto 16);
            else
                kernel_data(0) <= ramA_out(15 downto 0);
                kernel_data(1) <= ramA_out(31 downto 16);
                kernel_data(2) <= ramB_out(15 downto 0);
                kernel_data(3) <= ramB_out(31 downto 16);
            end if; 
        when st2 =>          
            s_axis_tready <= '1';
            if next_ram_out_pixel = '0' then
                kernel_data(0) <= ramB_out(15 downto 0);
                kernel_data(1) <= ramB_out(31 downto 16);
                kernel_data(2) <= ramA_out(15 downto 0);
                kernel_data(3) <= ramA_out(31 downto 16);
            else
                kernel_data(0) <= ramA_out(15 downto 0);
                kernel_data(1) <= ramA_out(31 downto 16);
                kernel_data(2) <= ramB_out(15 downto 0);
                kernel_data(3) <= ramB_out(31 downto 16); 
            end if;
            if ram_out_pixel >= 7 then
                out_valid <= kernel_valid;
            end if;
    end case;

end process;

bramA_wr: process(aclk)
begin
    if(aclk'event and aclk = '1') then
        if(s_axis_tvalid = '1') then
            if (ram_in_line mod 2) = 0 then
                ramA(ram_in_pixel + (ram_in_line mod 4)*512) <= s_axis_tdata(31 downto 0);
            else
                ramA(ram_in_pixel + (ram_in_line mod 4)*512) <= s_axis_tdata(63 downto 32);
            end if;
        end if; 
    end if;
end process;

bramB_wr: process(aclk)
begin
    if(aclk'event and aclk = '1') then
        if(s_axis_tvalid = '1') then
            if (ram_in_line mod 2) = 0 then
                ramB(ram_in_pixel + (ram_in_line mod 4)*512) <= s_axis_tdata(63 downto 32);
            else
                ramB(ram_in_pixel + (ram_in_line mod 4)*512) <= s_axis_tdata(31 downto 0);
            end if;
        end if; 
    end if;
end process;


ramA_addr <= ((ram_out_pixel/2 + ram_out_line*512) mod 2048) when (ram_out_pixel mod 2) = (ram_out_line mod 2)
        else ((ram_out_pixel/2 + ram_out_line*512+512) mod 2048);
ramB_addr <= ((ram_out_pixel/2 + ram_out_line*512+512) mod 2048) when (ram_out_pixel mod 2) = (ram_out_line mod 2)
        else ((ram_out_pixel/2 + ram_out_line*512) mod 2048);
                


bramA_rd: process(aclk)
begin
    if(aclk'event and aclk = '1') then
        ramA_out <= ramA(ramA_addr);        
    end if;
end process;

bramB_rd: process(aclk)
begin
    if(aclk'event and aclk = '1') then
        ramB_out <= ramB(ramB_addr); 
    end if;
end process;


gen1: if KERNEL_TYPE = 1 generate
    kernel_map: kernel 
    Port map(     
        aclk => aclk,
        resetn => resetn,
        
        in_00  => kernel_data(0),
        in_01  => kernel_data(1),
        in_10  => kernel_data(2),
        in_11  => kernel_data(3),
        in_valid   => valid,
        
        out_00  => out_data(0),
        out_01  => out_data(1),
        out_10  => out_data(2),
        out_11  => out_data(3),
        out_valid  => kernel_valid           
    );
end generate;
    
gen2: if KERNEL_TYPE = 2 generate
    kernel_map: kernel2 
    Port map(     
        aclk => aclk,
        resetn => resetn,
        
        in_00  => kernel_data(0),
        in_01  => kernel_data(1),
        in_10  => kernel_data(2),
        in_11  => kernel_data(3),
        in_valid   => valid,
        
        out_00  => out_data(0),
        out_01  => out_data(1),
        out_10  => out_data(2),
        out_11  => out_data(3),
        out_valid  => kernel_valid           
    );
end generate;
        
m_axis_tdata <= out_data(3) & out_data(2) & out_data(1) & out_data(0);
m_axis_tvalid <= out_valid;

end Behavioral;
