-- block_merge.vhd : Merge 20 input lanes into 8 output lanes
--                     
-- Copyright (C) 2012 CESNET
-- Author(s): Stepan Friedl <friedl@cesnet.cz>
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
-- 1. Redistributions of source code must retain the above copyright
--    notice, this list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright
--    notice, this list of conditions and the following disclaimer in
--    the documentation and/or other materials provided with the
--    distribution.
-- 3. Neither the name of the Company nor the names of its contributors
--    may be used to endorse or promote products derived from this
--    software without specific prior written permission.
--
-- This software is provided ``as is'', and any express or implied
-- warranties, including, but not limited to, the implied warranties of
-- merchantability and fitness for a particular purpose are disclaimed.
-- In no event shall the company or contributors be liable for any
-- direct, indirect, incidental, special, exemplary, or consequential
-- damages (including, but not limited to, procurement of substitute
-- goods or services; loss of use, data, or profits; or business
-- interruption) however caused and on any theory of liability, whether
-- in contract, strict liability, or tort (including negligence or
-- otherwise) arising in any way out of the use of this software, even
-- if advised of the possibility of such damage.
--
-- $Id: $
--
-- NOTES:

library ieee;
use ieee.std_logic_1164.all;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;

entity block_merge_20x8 is
   port (
      -- Input port D - 20 word data, low freq
      RST_D : in std_logic; -- D clock domain reset
      CLK_D : in std_logic;  -- D clock, CLK_Q/2.5, edge aligned
      CE_D  : in std_logic;  -- D Clock enable. Two consecutive back-to-back data transfers are not allowed !!!
      D     : in std_logic_vector(20*66-1 downto 0);  -- Input data
      -- Output port Q - 8 word data, hi freq
      RST_Q : in std_logic; -- Q clock domain reset
      CLK_Q : in std_logic; -- Q clock
      CE_Q  : out std_logic := '0'; -- port Q clock enable
      Q     : out std_logic_vector(8*66-1 downto 0)  -- Output data
   );
end block_merge_20x8;

architecture behavioral of block_merge_20x8 is

constant DLY : time := 500 ps;

COMPONENT blockfifo
  PORT (
    wr_clk : IN STD_LOGIC;
    rd_clk : IN STD_LOGIC;
    din : IN STD_LOGIC_VECTOR(65 DOWNTO 0);
    wr_en : IN STD_LOGIC;
    rd_en : IN STD_LOGIC;
    dout : OUT STD_LOGIC_VECTOR(65 DOWNTO 0);
    full : OUT STD_LOGIC;
    empty : OUT STD_LOGIC
  );
END COMPONENT;

ATTRIBUTE SYN_BLACK_BOX : BOOLEAN;
ATTRIBUTE SYN_BLACK_BOX OF blockfifo : COMPONENT IS TRUE;
ATTRIBUTE BLACK_BOX_PAD_PIN : STRING;
ATTRIBUTE BLACK_BOX_PAD_PIN OF blockfifo : COMPONENT IS "wr_clk,rd_clk,din[65:0],wr_en,rd_en,dout[65:0],full,empty";

-- signal store : std_logic_vector(20*66-1 downto 0);
-- signal state : natural range 0 to 18 := 0;
-- signal reg_ce_d : std_logic;
-- signal ce_d_dly : std_logic;
-- signal d_dly    : std_logic_vector(20*66-1 downto 0);  -- Input data
attribute keep : string;
attribute dont_touch : string;

type t_cycle is (ONE, TWO, THREE, FOUR, FIVE);

signal cycle, next_cycle    : t_cycle; -- natural range 0 to 4 := 0;
signal cycle_en: std_logic;
signal ce_q_i  : std_logic;
signal q_i     : std_logic_vector(8*66-1 downto 0);

signal fifo_do     : std_logic_vector(20*66-1 downto 0);
signal fifo_do_reg : std_logic_vector(20*66-1 downto 0);
signal fifo_do_reg2: std_logic_vector(20*66-1 downto 0);
signal fifo_rd     : std_logic_vector(19 downto 0);
attribute keep of fifo_rd : signal is "true";
attribute dont_touch of fifo_rd : signal is "true";
signal fifo_full   : std_logic_vector(19 downto 0);
signal fifo_empty  : std_logic_vector(19 downto 0);
signal fifo_status : std_logic_vector(2*20-1 downto 0);

begin

-- Generate FIFO for each input lane
GEN_FIFOS: for i in 0 to 19 generate

--   FIFO : blockfifo
--     PORT MAP (
--       wr_clk => CLK_D,
--       din    => D(66*(i+1)-1 downto 66*i),
--       wr_en  => CE_D,                     
--       full   => fifo_full(i),             
--       --
--       rd_clk => CLK_Q,       
--       dout   => fifo_do(66*(i+1)-1 downto 66*i),
--       rd_en  => fifo_rd(i),
--       empty  => fifo_empty(i)
--     );

-- The generic ASFIFO can be used when the Xilinx FIFO core is not available

   FIFO: entity work.asfifo 
   generic map (
      -- Data Width
      DATA_WIDTH   => 66,
      ITEMS        => 8,
      STATUS_WIDTH => 2
   )
   port map (
      -- Write interface
      CLK_WR   => CLK_D,
      RST_WR   => RST_D,
      DI       => D(66*(i+1)-1 downto 66*i),
      WR       => CE_D,
      FULL     => fifo_full(i),
      STATUS   => fifo_status((i+1)*2-1 downto i*2),
      -- Read interface
      CLK_RD   => CLK_Q,
      RST_RD   => RST_Q,
      DO       => fifo_do(66*(i+1)-1 downto 66*i),
      RD       => fifo_rd(i),
      EMPTY    => fifo_empty(i)
   );
   
end generate;

CYCLE_SEQ: process(CLK_Q)
begin
   if CLK_Q'event and CLK_Q = '1' then
      if RST_Q = '1' then
         cycle <= ONE;
      elsif (ce_q_i = '1') then
         cycle <= next_cycle; -- ((cycle + 1) mod 5) after DLY; -- For simulations only
         -- cycle <= cycle + 1;
      end if;
      if (fifo_rd(0) = '1') and (fifo_empty(0) = '0') then
         fifo_do_reg  <= fifo_do;
         fifo_do_reg2 <= fifo_do_reg;
      end if;
   end if;
end process;

READ_CONTROL: process(cycle, fifo_empty, fifo_do_reg, fifo_do_reg2)
begin
   case cycle is
      when ONE => 
         fifo_rd <= (others => '1');
         ce_q_i  <= not fifo_empty(0);
         q_i     <= fifo_do_reg(8*66-1 downto 0);
         next_cycle <= TWO;
      when TWO => 
         fifo_rd <= (others => '0');
         ce_q_i  <= '1';
         q_i     <= fifo_do_reg2(16*66-1 downto 8*66);
         next_cycle <= THREE;
      when THREE =>
         fifo_rd <= (others => '1');
         ce_q_i  <= not fifo_empty(0);
         q_i     <= fifo_do_reg(4*66-1 downto 0) & fifo_do_reg2(20*66-1 downto 16*66);
         next_cycle <= FOUR;
      when FOUR => 
         fifo_rd <= (others => '0');
         ce_q_i  <= '1';
         q_i     <= fifo_do_reg2(12*66-1 downto 4*66);
         next_cycle <= FIVE;
      when others => -- five: Output data will be sampled here (with the next clock edge)
         fifo_rd <= (others => '0');
         ce_q_i  <= '1';
         q_i     <= fifo_do_reg2(20*66-1 downto 12*66);
         next_cycle <= ONE;
   end case;
end process;

OUTPUT_REG: process(CLK_Q)
begin
   if CLK_Q'event and CLK_Q = '1' then
      Q    <= q_i;
      CE_Q <= ce_q_i;
   end if;
end process;

end behavioral;
