-- hw_rxbuf.vhd - Hardware TX DMA Buffer
-- Copyright (C) 2013 CESNET
-- Author(s): Martin Spinler <spinler@cesnet.cz>
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
-- 1. Redistributions of source code must retain the above copyright
--    notice, this list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright
--    notice, this list of conditions and the following disclaimer in
--    the documentation and/or other materials provided with the
--    distribution.
-- 3. Neither the name of the Company nor the names of its contributors
--    may be used to endorse or promote products derived from this
--    software without specific prior written permission.
--
-- This software is provided ``as is'', and any express or implied
-- warranties, including, but not limited to, the implied warranties of
-- merchantability and fitness for a particular purpose are disclaimed.
-- In no event shall the company or contributors be liable for any
-- direct, indirect, incidental, special, exemplary, or consequential
-- damages (including, but not limited to, procurement of substitute
-- goods or services; loss of use, data, or profits; or business
-- interruption) however caused and on any theory of liability, whether
-- in contract, strict liability, or tort (including negligence or
-- otherwise) arising in any way out of the use of this software, even
-- if advised of the possibility of such damage.
--

library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_unsigned.all;
use IEEE.std_logic_arith.all;
use IEEE.numeric_std.all;
use work.math_pack.all;

-- ----------------------------------------------------------------------
--                            Entity declaration
-- ----------------------------------------------------------------------
entity DMA_BUFFER_TX is
   generic(
      CHANNELS          : integer := 8;
      --! Size of each buffer in bytes
      BUFFER_SIZE       : integer := 8192;
      --! Data align in bytes
      DATA_ALIGN        : integer := 8;
      --! Data width of module
      FLU_WIDTH         : integer := 512;
      --! FLU Specifics
      SOP_WIDTH         : integer := 1;
      --! Remove 4B header with packet size
      REMOVE_HEADER     : boolean := true
   );
   port(
      --! Common interface
      CLK               : in  std_logic;
      RESET             : in  std_logic;

      --! Enable specific channel
      ENABLE            : in  std_logic_vector(CHANNELS-1 downto 0);

      --! FrameLink Unaligned interface
      TX_CHANNEL        : out std_logic_vector(log2(CHANNELS)-1 downto 0);
      TX_DATA           : out std_logic_vector(FLU_WIDTH-1 downto 0);
      TX_SOP_POS        : out std_logic_vector(SOP_WIDTH-1 downto 0);
      TX_EOP_POS        : out std_logic_vector(log2(FLU_WIDTH/8)-1 downto 0);
      TX_SOP            : out std_logic;
      TX_EOP            : out std_logic;
      TX_SRC_RDY        : out std_logic;
      TX_DST_RDY        : in  std_logic;

      --! Interface to DMA Controller
      PACKET_SENT       : out std_logic;
      PACKET_LENGTH     : out std_logic_vector(log2(BUFFER_SIZE) downto 0);
      PACKET_CHANNEL    : out std_logic_vector(log2(CHANNELS)-1 downto 0);

      --! FIFO-like interface to DMA Bus, without VALID signal
      --! Write Address
      DMA_ADDRESS       : in  std_logic_vector(log2(BUFFER_SIZE/DATA_ALIGN)-1 downto 0);
      --! Write n WORDs
      DMA_WRITE         : in  std_logic_vector(log2(FLU_WIDTH/8 / DATA_ALIGN) downto 0);
      --! Channel to write
      DMA_CHANNEL       : in  std_logic_vector(log2(CHANNELS)-1 downto 0);
      --! Incoming data
      DMA_DATA          : in  std_logic_vector(FLU_WIDTH-1 downto 0);

      DMA_DONE          : in  std_logic;
      DMA_DONE_SIZE     : in  std_logic_vector(log2(BUFFER_SIZE/DATA_ALIGN) downto 0);
      DMA_DONE_CHANNEL  : in  std_logic_vector(log2(CHANNELS)-1 downto 0)

   );
end entity;

-- ----------------------------------------------------------------------
--                      Architecture declaration
-- ----------------------------------------------------------------------
architecture behavioral of DMA_BUFFER_TX is

   constant BUFFERS           : integer := (FLU_WIDTH/8) / DATA_ALIGN;
   constant BUFFER_WIDTH      : integer := (FLU_WIDTH / BUFFERS);
   constant BLOCK_COUNT       : integer := (BUFFER_SIZE / DATA_ALIGN) / BUFFERS;

   type t_state               is (S_IDLE, S_LOAD, S_PACKET);
   type t_buffer              is array(CHANNELS-1 downto 0) of std_logic_vector(BUFFERS-1 downto 0);
   type t_addr                is array(BUFFERS-1 downto 0) of std_logic_vector(log2(BLOCK_COUNT)-1 downto 0);
   type t_data                is array(BUFFERS-1 downto 0) of std_logic_vector(FLU_WIDTH / BUFFERS-1 downto 0);
   type t_datach              is array(CHANNELS-1 downto 0) of t_data;
   type t_datachlen           is array(BUFFERS-1 downto 0) of std_logic_vector(15 downto 0);

   subtype r_done_channel is natural range log2(CHANNELS)-1 downto 0;
   subtype r_done_size    is natural range log2(BUFFER_SIZE/DATA_ALIGN)+log2(CHANNELS) downto log2(CHANNELS);

   signal present_state       : t_state;
   signal next_state          : t_state;

   -- Write buffer signals
   signal reg_dma_data        : std_logic_vector(FLU_WIDTH-1 downto 0);
   signal reg_dma_address     : std_logic_vector(log2(BUFFER_SIZE/DATA_ALIGN)-1 downto 0);
   signal reg_dma_channel     : std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal reg_dma_write       : std_logic_vector(BUFFERS-1 downto 0);

   signal buffer_write        : t_buffer;
   signal buffer_write_address: t_addr;
   signal buffer_write_data   : t_data;

   -- DMA Done
   signal dma_done_fifo_in    : std_logic_vector(log2(BUFFER_SIZE/DATA_ALIGN)+log2(CHANNELS) downto 0);
   signal dma_done_fifo_out   : std_logic_vector(log2(BUFFER_SIZE/DATA_ALIGN)+log2(CHANNELS) downto 0);
   signal reg_dma_done_size   : std_logic_vector(log2(BUFFER_SIZE/DATA_ALIGN) downto 0);
   signal reg_dma_done_channel: std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal reg_dma_done        : std_logic_vector(4 downto 0) := (others => '0');

    -- Buffer usage
   signal buffer_usage        : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);

   signal written_wr          : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);
   signal written_rd          : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);
   signal written             : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);
   signal written_we          : std_logic;

   signal readen_wr           : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);
   signal readen_rd           : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);
   signal readen              : std_logic_vector(log2(BUFFER_SIZE / DATA_ALIGN) downto 0);
   signal readen_we           : std_logic;

   -- Read buffer signals
   signal buffer_read         : t_buffer;
   signal buffer_read_data    : t_datach;
   signal length_read_channel : t_datachlen;

   -- Packet length process signals
   signal length_channel      : std_logic_vector(log2(CHANNELS)-1 downto 0) := (others => '0');
   signal length_channel1     : std_logic_vector(log2(CHANNELS)-1 downto 0) := (others => '0');
   signal length_valid        : std_logic_vector(CHANNELS-1 downto 0) := (others => '0');
   signal length_buffer_busy  : std_logic_vector(CHANNELS-1 downto 0) := (others => '0');
   signal length_we           : std_logic := '1';
   signal length_in           : std_logic_vector(15 downto 0);
   signal length_just         : std_logic_vector(15 downto 0);
   signal length_last_read    : std_logic_vector(log2(BUFFERS)-1 downto 0);

   signal length_last_read_o  : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal length_last_read_w  : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal length_last_read_we : std_logic;

   -- Parse process
   signal parse_len_na        : std_logic_vector(log2(FLU_WIDTH/8)-1 downto 0);
   signal parse_len           : std_logic_vector(15 downto 0);
   signal parse_sent_len      : std_logic_vector(15 downto 0);
   signal parse_length        : std_logic_vector(15 downto 0);
   signal parse_channel       : std_logic_vector(log2(CHANNELS)-1 downto 0) := (others => '0');
   signal parse_sop           : std_logic;
   signal parse_eop           : std_logic;
   signal parse_eop_pos       : std_logic_vector(log2(FLU_WIDTH/8)-1 downto 0);
   signal parse_last_read     : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal parse_last_read_w   : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal parse_last_read_we  : std_logic;
   signal parse_ocp           : std_logic;
   signal parse_read_count    : integer range 0 to BUFFERS;
   signal parse_read          : std_logic_vector(BUFFERS-1 downto 0);
   signal parse_read_rotated  : std_logic_vector(BUFFERS-1 downto 0);
   signal parse_valid         : std_logic;
   signal parse_dst_rdy       : std_logic;

   -- Buffer in stage
   signal bufferin_read_count : integer range 0 to BUFFERS;
   signal bufferin_valid      : std_logic;
   signal bufferin_sop        : std_logic;
   signal bufferin_eop        : std_logic;
   signal bufferin_channel    : std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal bufferin_last_read  : integer range 0 to BUFFERS-1 := 0;

   -- Buffer out stage
   signal buffer_valid        : std_logic;
   signal buffer_eop          : std_logic;
   signal buffer_channel      : std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal buffer_last_read    : integer range 0 to BUFFERS-1 := 0;

   -- Channel mux stage
   signal mux_valid           : std_logic;
   signal mux_last_read       : integer range 0 to BUFFERS-1 := 0;
   signal mux_data            : t_data;
   signal mux_eop             : std_logic;
   signal mux_channel         : std_logic_vector(log2(CHANNELS)-1 downto 0);

   -- Rotate stage
   signal rotate_valid        : std_logic;
   signal rotate_data         : std_logic_vector(FLU_WIDTH-1 downto 0);

   -- Packet fifo stage
   signal packet_fifo_flags_in   : std_logic_vector(16+2+log2(CHANNELS)+log2(FLU_WIDTH/8)-1 downto 0);
   signal packet_fifo_flags_out  : std_logic_vector(16+2+log2(CHANNELS)+log2(FLU_WIDTH/8)-1 downto 0);
   signal packet_fifo_flags_read : std_logic;

   signal packet_fifo_empty   : std_logic;
   signal packet_fifo_status  : std_logic_vector(3 downto 0);

   constant zeros             : std_logic_vector(511 downto 0) := (others => '0');

   function fill(ARG:natural; count : natural) return std_logic_vector is
   variable result : std_logic_vector(count-1 downto 0);
   begin
      for i in 0 to count-1 loop
         if(conv_integer (ARG) > i) then
            result(i) := '1';
         else
            result(i) := '0';
         end if;
      end loop;
      return result;
   end;

begin

   -- ------------------------------------
   -- Write to buffer

   reg_dma_writep : process(CLK)
   begin
      if (CLK'event and CLK = '1') then
         reg_dma_data      <= DMA_DATA;
         reg_dma_address   <= DMA_ADDRESS;
         reg_dma_channel   <= DMA_CHANNEL;
         reg_dma_write     <= fill(conv_integer(DMA_WRITE), BUFFERS);
      end if;
   end process;

   gen_signals_write: for i in 0 to BUFFERS-1 generate
      writep: process(CLK)
      begin
         if (CLK'event and CLK = '1') then
            --! Address compute
            if(reg_dma_address(log2(BUFFERS)-1 downto 0) > i) then
               buffer_write_address(i) <= reg_dma_address(log2(BUFFER_SIZE/DATA_ALIGN)-1 downto log2(BUFFERS)) + 1;
            else
               buffer_write_address(i) <= reg_dma_address(log2(BUFFER_SIZE/DATA_ALIGN)-1 downto log2(BUFFERS));
            end if;

            --! Shift input data
            buffer_write_data(i)       <= reg_dma_data ((((i - conv_integer(reg_dma_address(log2(BUFFERS)-1 downto 0))) mod BUFFERS)+1)*64-1 downto ((i - conv_integer(reg_dma_address(log2(BUFFERS)-1 downto 0))) mod BUFFERS)*64);
         end if;
      end process;

      gen_buffer_write_rotated: for j in 0 to CHANNELS-1 generate
         write_rotatedp: process(CLK)
         begin
            if (CLK'event and CLK = '1') then
               if(j = reg_dma_channel) then
                  buffer_write(j)(i)   <= reg_dma_write((i - conv_integer(reg_dma_address(log2(BUFFERS)-1 downto 0))) mod BUFFERS);
               else
                  buffer_write(j)(i)   <= '0';
               end if;
            end if;
         end process;

         -- Generate 2D array of buffers
         fifo: entity work.DMA_BUFFER_TX_FIFO
         generic map(
            DATA_WIDTH           => BUFFER_WIDTH,
            ITEMS                => BLOCK_COUNT
         )
         port map(
            CLK                  => CLK,
            ENABLE               => ENABLE(j),

            IN_WRITE             => buffer_write(j)(i),
            IN_DATA              => buffer_write_data(i),
            IN_ADDR              => buffer_write_address(i),

            OUT_READ             => buffer_read(j)(i),
            OUT_DATA             => buffer_read_data(j)(i)
         );
      end generate;

   end generate;

   -- ------------------------------------
   -- Update buffer usage process

   reg_dma_donep: process(CLK)
   begin
      if (CLK'event AND CLK = '1') then
         reg_dma_done   <= DMA_DONE & reg_dma_done(4 downto 1);
      end if;
   end process;

   done_fifo: entity work.FIFO
   generic map(
      DATA_WIDTH        => log2(BUFFER_SIZE/DATA_ALIGN) + 1 + log2(CHANNELS),
      ITEMS             => 8
   )
   port map(
      RESET             => RESET,
      CLK               => CLK,

      DATA_IN           => dma_done_fifo_in,
      WRITE_REQ         => DMA_DONE,
      FULL              => open,
      LSTBLK            => open,

      DATA_OUT          => dma_done_fifo_out,
      READ_REQ          => reg_dma_done(0),
      EMPTY             => open
   );

   dma_done_fifo_in     <= DMA_DONE_SIZE & DMA_DONE_CHANNEL;
   reg_dma_done_channel <= dma_done_fifo_out(log2(CHANNELS)-1 downto 0);
   reg_dma_done_size    <= dma_done_fifo_out(log2(BUFFER_SIZE/DATA_ALIGN)+log2(CHANNELS) downto log2(CHANNELS));

   -- Buffer usage registers
   reg_readen_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(BUFFER_SIZE/DATA_ALIGN)+1,
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> readen_wr,
      WE      			=> readen_we,
      ADDRA   			=> parse_channel,
      DOA     			=> readen_rd,

      ADDRB   			=> length_channel,
      DOB     			=> readen
   );

   reg_written_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(BUFFER_SIZE/DATA_ALIGN)+1,
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> written_wr,
      WE      			=> written_we,
      ADDRA   			=> reg_dma_done_channel,
      DOA     			=> written_rd,

      ADDRB   			=> parse_channel,
      DOB     			=> written
   );

   readen_we         <= '1' when parse_read_count /= 0 else '0';
   readen_wr         <= readen_rd + parse_read_count;

   written_wr        <= written_rd + reg_dma_done_size;
   written_we        <= reg_dma_done(0);

   buffer_usage      <= written - readen_rd;

   -- ------------------------------------
   -- Get packet length process

   -- Packet length registers
   reg_packet_length_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> 16,
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> length_just,
      WE      			=> length_we,
      ADDRA   			=> length_channel,
      DOA     			=> open,

      ADDRB   			=> parse_channel,
      DOB     			=> parse_len
   );

   -- Packet length registers
   reg_packet_length_na_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(FLU_WIDTH/8),
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> length_in(log2(FLU_WIDTH/8)-1 downto 0),
      WE      			=> length_we,
      ADDRA   			=> length_channel,
      DOA     			=> open,

      ADDRB   			=> parse_channel,
      DOB     			=> parse_len_na
   );

   -- Packet length registers
   reg_len_last_read_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(BUFFERS),
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> length_last_read_w,
      WE      			=> length_last_read_we,
      ADDRA   			=> bufferin_channel,
      DOA     			=> length_last_read_o,

      ADDRB   			=> length_channel,
      DOB     			=> length_last_read
   );

   length_last_read_we  <= not RESET;
   length_last_read_w   <= (length_last_read_o + bufferin_read_count) when ENABLE(conv_integer(bufferin_channel)) = '1' else (others =>'0');

   --length_we         <= not length_buffer_busy(conv_integer(length_channel));
   length_in         <= length_read_channel(conv_integer(length_last_read));
   length_just       <= length_in when (length_in(log2(DATA_ALIGN)-1 downto 0) = "000") else (length_in(15 downto log2(DATA_ALIGN))+1) & zeros(log2(DATA_ALIGN)-1 downto 0);

   gen_lengthp: for i in 0 to CHANNELS-1 generate
      lengthp : process(CLK)
      begin
         if (CLK'event AND CLK = '1') then
            if(i = length_channel) then
               if(length_buffer_busy(i) = '0' and ENABLE(i) = '1' and not (i = parse_channel and parse_sop = '1')) then
                  length_valid(i) <= '1';
               else
                  length_valid(i) <= '0';
               end if;
            end if;

            if(i = parse_channel and parse_sop = '1') then
               length_buffer_busy(i) <= '1';
            elsif(mux_channel = i and mux_eop = '1') then
               length_buffer_busy(i) <= '0';
            end if;
         end if;
      end process;
   end generate;

   gen_length_read_channelp: for i in 0 to BUFFERS-1 generate
      length_read_channelp: process(CLK)
      begin
         if (CLK'event AND CLK = '1') then
            length_read_channel(i) <= buffer_read_data(conv_integer(length_channel1))(i)(15 downto 0);
         end if;
      end process;
   end generate;

   length_channelp : process(CLK)
   begin
      if (CLK'event AND CLK = '1') then
         length_channel   <= length_channel + 1;
         length_channel1  <= length_channel + 2;
      end if;
   end process;

   -- ------------------------------------
   -- FSM and parse processess

   -- Packet length registers
   reg_parse_last_read_i: entity work.SP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(BUFFERS),
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> parse_last_read_w,
      WE      			=> parse_last_read_we,
      ADDR    			=> parse_channel,
      DO      			=> parse_last_read
   );


   gen_parse_read : for i in 0 to BUFFERS-1 generate
      --! Read Enable logic
      parse_read(i)           <= '1' when parse_read_count > i else '0';
      --! Read from next buffer, than last data were readed to
      parse_read_rotated(i)   <= parse_read((i - conv_integer(parse_last_read)) mod BUFFERS);
   end generate;

   parse_last_read_we         <= not RESET;
   parse_last_read_w          <= (parse_last_read + parse_read_count) when ENABLE(conv_integer(parse_channel)) = '1' else (others =>'0');

   parse_dst_rdy              <= '1' when packet_fifo_status > 4 else '0';

   sync_logic : process(CLK)
   begin
      if (CLK'event AND CLK = '1') then
         if (RESET = '1') then
            present_state     <= S_IDLE;
         else
            present_state     <= next_state;
         end if;
      end if;
   end process sync_logic;

   next_state_logic : process (present_state, parse_length, parse_sent_len, parse_read_count, TX_DST_RDY, buffer_usage, parse_dst_rdy, length_valid, parse_channel)
   begin
      next_state              <= present_state;

      case (present_state) is
         -- ---------------------------------------------
         when S_IDLE =>
            if(buffer_usage > 0 and length_valid(conv_integer(parse_channel)) = '1') then
               next_state     <= S_LOAD;
            else
               next_state     <= S_IDLE;
            end if;
         -- ---------------------------------------------
         when S_LOAD =>
            if(parse_length <= (buffer_usage & "000")) then
               next_state     <= S_PACKET;
            else
               next_state     <= S_IDLE;
            end if;
         -- ---------------------------------------------
         when S_PACKET =>
            if(parse_dst_rdy = '1' and parse_sent_len >= parse_length) then
               next_state     <= S_IDLE;
            end if;
         -- ---------------------------------------------
         when others =>
            next_state        <= S_IDLE;
         -- ---------------------------------------------
      end case;
   end process;

   output_logic : process (present_state, parse_length, TX_DST_RDY, parse_sent_len, buffer_usage, parse_channel, parse_dst_rdy)
   begin
      parse_read_count        <=  0;
      parse_valid             <= '0';
      parse_sop               <= '0';
      parse_eop               <= '0';

      case (present_state) is
         -- ---------------------------------------------
         when S_IDLE =>
         -- ---------------------------------------------
         when S_LOAD =>
         -- ---------------------------------------------
         when S_PACKET =>

            if(parse_dst_rdy = '1') then
               parse_valid       <= '1';
               parse_read_count  <= BUFFERS;
            end if;

            if(parse_sent_len = 64) then
               parse_sop         <= '1';
            end if;

            if(parse_sent_len >= parse_length and parse_dst_rdy = '1') then
               parse_eop         <= '1';

               if(parse_length(log2(DATA_ALIGN*BUFFERS)-1 downto log2(DATA_ALIGN)) = zeros(log2(BUFFERS)-1 downto 0)) then
                  parse_read_count  <= BUFFERS;
               else
                  parse_read_count  <= conv_integer(parse_length(log2(DATA_ALIGN*BUFFERS)-1 downto log2(DATA_ALIGN)));
               end if;
            end if;
         -- ---------------------------------------------
         when others =>
         -- ---------------------------------------------
      end case;
   end process;


   regp : process(clk)
   begin
      if (CLK'event and CLK = '1') then

         -- packet length register
         if(present_state = S_IDLE) then
            parse_sent_len    <= conv_std_logic_vector(64, 16);

            parse_length      <= parse_len;
            parse_eop_pos     <= parse_len_na - 1;

            if(parse_len <= 64) then
               parse_ocp      <= '1';
            else
               parse_ocp      <= '0';
            end if;

            if(next_state = S_IDLE) then
               parse_channel  <= parse_channel + 1;
            end if;
         end if;

         if(present_state = S_LOAD) then
            if(next_state = S_IDLE) then
               parse_channel  <= parse_channel + 1;
            end if;
         end if;

         if(present_state = S_PACKET) then
            if(parse_dst_rdy = '1') then
               parse_sent_len <= parse_sent_len + parse_read_count*8;
            end if;

            if(next_state = S_IDLE) then
               parse_channel  <= parse_channel + 1;
            end if;
         end if;

      end if;
   end process;

   reg_stagesp : process(CLK)
   begin
      if (CLK'event AND CLK = '1') then
         bufferin_last_read  <= conv_integer(parse_last_read);
         bufferin_channel    <= parse_channel;
         bufferin_valid      <= parse_valid;
         bufferin_eop        <= parse_eop;
         bufferin_sop        <= parse_sop;
         bufferin_read_count <= parse_read_count;

         buffer_last_read  <= bufferin_last_read;
         buffer_channel    <= bufferin_channel;
         buffer_valid      <= bufferin_valid;
         buffer_eop        <= bufferin_eop;

         mux_last_read     <= buffer_last_read;
         mux_eop           <= buffer_eop;
         mux_channel       <= buffer_channel;
         mux_valid         <= buffer_valid;

         rotate_valid      <= mux_valid;
      end if;
   end process;

   -- ----------------------------------------------
   -- Buffer stage

   gen_bufferp: for i in 0 to CHANNELS-1 generate
      bufferp : process(CLK, parse_read_rotated, parse_channel)
      begin
         if(CLK'event and CLK = '1') then
            if(parse_channel = i) then
               buffer_read(i) <= parse_read_rotated;
            else
               buffer_read(i) <= (others => '0');
            end if;
         end if;
      end process;
   end generate;

   -- ----------------------------------------------
   gen_rotatep: for i in 0 to BUFFERS-1 generate
      rotate_datap : process(CLK)
      begin
         if (CLK'event AND CLK = '1') then
            -- Mux stage - select data from channel
            mux_data(i)    <= buffer_read_data(conv_integer(buffer_channel))(i);

            -- Rotate stage - rotate data
            rotate_data(i*64+63 downto i*64) <= mux_data((i + mux_last_read) mod BUFFERS);
         end if;
      end process;
   end generate;

   -- ------------------------------------
   -- Send packet process

   packet_fifo : entity work.FIFO_STATUS
   generic map(
      DATA_WIDTH        => FLU_WIDTH,
      ITEMS             => 8
   )
   port map(
      RESET             => RESET,
      CLK               => CLK,

      DATA_IN           => rotate_data,
      WRITE_REQ         => rotate_valid,
      FULL              => open,
      LSTBLK            => open,
      STATUS            => packet_fifo_status,

      DATA_OUT          => TX_DATA,
      READ_REQ          => TX_DST_RDY,
      EMPTY             => packet_fifo_empty
   );

   packet_fifo_flags : entity work.FIFO
   generic map(
      DATA_WIDTH        => 16+2+log2(CHANNELS)+log2(FLU_WIDTH/8),
      ITEMS             => 8
   )
   port map(
      RESET             => RESET,
      CLK               => CLK,

      DATA_IN           => packet_fifo_flags_in,
      WRITE_REQ         => parse_valid,
      FULL              => open,
      LSTBLK            => open,

      DATA_OUT          => packet_fifo_flags_out,
      READ_REQ          => packet_fifo_flags_read,
      EMPTY             => open
   );

   packet_fifo_flags_read <= TX_DST_RDY and not packet_fifo_empty;
   packet_fifo_flags_in <= parse_eop_pos & parse_channel & parse_length & parse_eop & parse_sop;

   TX_SRC_RDY           <= not packet_fifo_empty;
   TX_SOP               <= packet_fifo_flags_out(0);
   TX_EOP               <= packet_fifo_flags_out(1);
   TX_SOP_POS           <= "001" when REMOVE_HEADER = true else "000";
   TX_EOP_POS           <= packet_fifo_flags_out(16+2+log2(CHANNELS)+log2(FLU_WIDTH/8)-1 downto 16+2+log2(CHANNELS));
   TX_CHANNEL           <= packet_fifo_flags_out(16+2+log2(CHANNELS)-1 downto 16+2);

   PACKET_SENT          <= TX_DST_RDY and not packet_fifo_empty and packet_fifo_flags_out(1);
   PACKET_LENGTH        <= packet_fifo_flags_out(log2(BUFFER_SIZE)+2 downto 2);
   PACKET_CHANNEL       <= packet_fifo_flags_out(16+2+log2(CHANNELS)-1 downto 16+2);

end architecture;
-- ----------------------------------------------------------------------------
