/*******************************************************************************
 * This file is part of the WAL library developed
 * within the EU Artemis project SMECY (Smart Multicore Embedded Systems)
 * Artemis JU 100230 and MSMT 7H10001,             http://www.smecy.eu
 * Copyright (C) 2010, 2011 UTIA AV CR, v.v.i.     http://sp.utia.cz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even 
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
 *  PURPOSE.
 *
 * This file has been released within the SMECY project
 * consortium for the requirements of the SMECY project.
 * Any use outside the SMECY consortium and/or for any
 * developments outside the scope of the SMECY project is prohibited.
 *
 * For more details contact Roman Bartosinski <bartosr@utia.cas.cz>.
 *******************************************************************************
 * Filename  : wal_bce_jk.h
 * Authors   : Roman Bartosinski <bartosr@utia.cas.cz>
 * Project   : SMECY
 * Purpose   : Header file for the BCE JK group of families
 * Release   : 
 * Version   : 1.0
 * Date      : 2010/04/13
 *
 * Long Description:
 *  Functions, constants and definitions specific for the BCE JK group of
 *  families within the WAL (Worker Abstraction Layer) library.
 *  This version contains only one family of workers for floating point 32bit
 *  and 24bit mantisa (FP32M24).
 * 
 *******************************************************************************
 * Modifications:
 *  Date: 2010/04/20
 *  Author: Roman Bartosinski
 *  Modified: Added comments and documentations
 *  --
 *  
 ******************************************************************************/

#ifndef _WAL_BCE_JK_HEADER_FILE_
#define _WAL_BCE_JK_HEADER_FILE_

/*
 * For the BCE_JK_FP32M24 worker family have to be defined:
 * WAL_BCE_JK_FP32M24_MAX_SIMD
 * WAL_BCE_JK_FP32M24_DMEM_NUM_MEMORIES
 * WAL_BCE_JK_FP32M24_CMEM_NUM_MEMORIES
 * WAL_BCE_JK_FP32M24_SMEM_NUM_MEMORIES
 * wal_BCE_JK_FP32M24_description_structure
 */

/* worker family ID */
/** D
 * enum wal_bce_jk_family_ids - family identifications in the BCE_JK group of families
 * @WAL_BCE_JK_ID_UNKNOWN: unknown family in the BCE_JK worker group of families
 * @WAL_BCE_JK_ID_FP32M24: the original family with 32bit floating point, 24bit mantisa
 */
enum wal_bce_jk_family_ids {
	WAL_BCE_JK_ID_UNKNOWN = 0,
	WAL_BCE_JK_ID_FP32M24 = 1,
};

/* control memories */
#define WAL_BCE_JK_FP32M24_CMEM_NUM_MEMORIES	WAL_CMEM_NUM_MEMORIES

/* data memories */
#define WAL_BCE_JK_MAX_SIMD  8
#define WAL_BCE_JK_FP32M24_MAX_SIMD 	WAL_BCE_JK_MAX_SIMD  /* must be defined for checking if registered worker doesn't want more than this */

/** D
 * enum wal_bce_jk_data_memories - indices to BCE_JK data memories
 * @WAL_BCE_JK_DMEM_A: index of the A data memory
 * @WAL_BCE_JK_DMEM_B: index of the B data memory
 * @WAL_BCE_JK_DMEM_Z: index of the Z data memory
 * @WAL_BCE_JK_DMEM_NUM_MEMORIES: number of data memories in the BCE_JK families
 */
enum wal_bce_jk_data_memories {
	WAL_BCE_JK_DMEM_A = 0,
	WAL_BCE_JK_DMEM_B,
	WAL_BCE_JK_DMEM_Z,
	WAL_BCE_JK_DMEM_NUM_MEMORIES
};
#define WAL_BCE_JK_FP32M24_DMEM_NUM_MEMORIES	WAL_BCE_JK_DMEM_NUM_MEMORIES

/* support memories */
#define WAL_BCE_JK_SMEM_NUM_MEMORIES	4
/** D
 * enum wal_bce_jk_support_memories - indices to BCE_JK support memories
 * @WAL_BCE_JK_SMEM_MB2SM1: index to the first support memory for the BCE_JK families
 *                          family. It is the first index after indices to control memories
 *                          (@WAL_BCE_JK_SMEM_MB2SM1 = @WAL_CMEM_NUM_MEMORIES)
 * @WAL_BCE_JK_SMEM_MB2SM2: index to the second support memory for the BCE_JK families
 * @WAL_BCE_JK_SMEM_MB2SM3: index to the third support memory for the BCE_JK families
 * @WAL_BCE_JK_SMEM_MB2SM4: index to the fourth support memory for the BCE_JK families
 *
 * All BCE_JK support memories are used for cosimulation only.
 */
enum wal_bce_jk_support_memories {
	WAL_BCE_JK_SMEM_MB2SM1 = WAL_CMEM_NUM_MEMORIES,
	WAL_BCE_JK_SMEM_MB2SM2,
	WAL_BCE_JK_SMEM_MB2SM3,
	WAL_BCE_JK_SMEM_MB2SM4
};
#define WAL_BCE_JK_FP32M24_SMEM_NUM_MEMORIES	WAL_BCE_JK_SMEM_NUM_MEMORIES

/******************************************************************************/
/* BCE_JK worker specific operations and capabilities */
/** D
 * enum wal_bce_jk_operation_codes - worker operations known to BCE_JK group of families
 * @WAL_BCE_JK_VVER: return version of HW (capabilities) in the Z data memory (simdID=0)
 * @WAL_BCE_JK_VZ2A: copy vector    a[i] <= z[j]
 * @WAL_BCE_JK_VB2A: copy vector    a[i] <= b[j]
 * @WAL_BCE_JK_VZ2B: copy vector    b[i] <= z[j]
 * @WAL_BCE_JK_VA2B: copy vector    b[i] <= a[j]
 * @WAL_BCE_JK_VADD: add vectors    z[i] <= a[j] + b[k]
 * @WAL_BCE_JK_VADD_BZ2A: add vectors    a[i] <= b[j] + z[k]
 * @WAL_BCE_JK_VADD_AZ2B: add vectors    b[i] <= a[j] + z[k]
 * @WAL_BCE_JK_VSUB: sub vectors    z[i] <= a[j] - b[k]
 * @WAL_BCE_JK_VSUB_BZ2A: sub vectors    a[i] <= b[j] - z[k]
 * @WAL_BCE_JK_VSUB_AZ2B: sub vectors    b[i] <= a[j] - z[k]
 * @WAL_BCE_JK_VMULT: mult vectors   z[i] <= a[j] * b[k]
 * @WAL_BCE_JK_VMULT_BZ2A: mult vectors   a[i] <= b[j] * z[k]
 * @WAL_BCE_JK_VMULT_AZ2B: mult vectors   b[i] <= a[j] * z[k]
 * @WAL_BCE_JK_VPROD: vector product z    <= a'[i..i+nn]*b[i..i+nn]
 * @WAL_BCE_JK_VMAC: vector MAC     z[i] <= z[i] - a[j]*b[k] 1..13.
 * @WAL_BCE_JK_VMSUBAC: vector MSUBAC  z[i] <= z[i] - a[j]*b[k] 1..13.
 * @WAL_BCE_JK_VPROD_S2: vector product extended
 *                    z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn])
 * the same code has the WAL_BCE_JK_VPROD_S4 operation
 *                   z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn])+
 *                        (a3'[i..i+nn]*b3[i..i+nn] + a4'[i..i+nn]*b4[i..i+nn])
 * and the WAL_BCE_JK_VPROD_S8 operation
 *                   z <= ((a1'[i..i+nn]*b1[i..i+nn]+a2'[i..i+nn]*b2[i..i+nn])+
 *                         (a3'[i..i+nn]*b3[i..i+nn]+a4'[i..i+nn]*b4[i..i+nn]))
 *                         +
 *                        ((a5'[i..i+nn]*b5[i..i+nn]+a6'[i..i+nn]*b6[i..i+nn])+
 *                         (a7'[i..i+nn]*b7[i..i+nn]+a8'[i..i+nn]*b8[i..i+nn]))
 * @WAL_BCE_JK_VFP2SP: vector conversion from proprietary FP to 32m24 single precision FP
 * @WAL_BCE_JK_VSP2FP: vector conversion from 32m24 single precision FP to proprietary FP
 * @WAL_BCE_JK_VDIV: vector division
 * 
 */
enum wal_bce_jk_operation_codes {
	WAL_BCE_JK_VVER       = 0x0,	/* version of HW */
	WAL_BCE_JK_VZ2A	      = 0x1,	/* copy vector    a[i] <= z[j] */
	WAL_BCE_JK_VB2A       = 0x2,	/* copy vector    a[i] <= b[j] */
	WAL_BCE_JK_VZ2B	      = 0x3,	/* copy vector    b[i] <= z[j] */
	WAL_BCE_JK_VA2B	      = 0x4,	/* copy vector    b[i] <= a[j] */
	WAL_BCE_JK_VADD	      = 0x5,	/* add vectors    z[i] <= a[j] + b[k] */
	WAL_BCE_JK_VADD_BZ2A  = 0x6,	/* add vectors    a[i] <= b[j] + z[k] */
	WAL_BCE_JK_VADD_AZ2B  = 0x7,	/* add vectors    b[i] <= a[j] + z[k] */
	WAL_BCE_JK_VSUB	      = 0x8,	/* sub vectors    z[i] <= a[j] - b[k] */
	WAL_BCE_JK_VSUB_BZ2A  = 0x9,	/* sub vectors    a[i] <= b[j] - z[k] */
	WAL_BCE_JK_VSUB_AZ2B  = 0xA,	/* sub vectors    b[i] <= a[j] - z[k] */
	WAL_BCE_JK_VMULT      = 0xB,	/* mult vectors   z[i] <= a[j] * b[k] */
	WAL_BCE_JK_VMULT_BZ2A = 0xC,	/* mult vectors   a[i] <= b[j] * z[k] */
	WAL_BCE_JK_VMULT_AZ2B = 0xD,	/* mult vectors   b[i] <= a[j] * z[k] */
	WAL_BCE_JK_VPROD      = 0xE,	/* vector product z    <= a'[i..i+nn]*b[i..i+nn] */
	WAL_BCE_JK_VMAC       = 0xF,	/* vector MAC     z[i] <= z[i] - a[j]*b[k] 1..13. */
	WAL_BCE_JK_VMSUBAC    = 0x10,	/* vector MSUBAC  z[i] <= z[i] - a[j]*b[k] 1..13. */
	WAL_BCE_JK_VPROD_S2   = 0x11,	/* vector product extended */
                                    /* z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn]) */
    WAL_BCE_JK_VFP2SP     = 0x12,	/* vector conversion from proprietary FP to 32m24 single precision FP */
	WAL_BCE_JK_VSP2FP     = 0x13,	/* vector conversion from 32m24 single precision FP to proprietary FP */
	WAL_BCE_JK_VDIV       = 0x14,	/* vector division */
};
#define		WAL_BCE_JK_VPROD_S4	(WAL_BCE_JK_VPROD_S2)	/* vector product extended             */
                                /* z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn])+ */
                                /*      (a3'[i..i+nn]*b3[i..i+nn] + a4'[i..i+nn]*b4[i..i+nn])  */
#define		WAL_BCE_JK_VPROD_S8	(WAL_BCE_JK_VPROD_S2)	/* vector product extended             */
								/* z <= ((a1'[i..i+nn]*b1[i..i+nn]+a2'[i..i+nn]*b2[i..i+nn])+  */
								/*       (a3'[i..i+nn]*b3[i..i+nn]+a4'[i..i+nn]*b4[i..i+nn]))  */
								/*		+                                                      */
								/*      ((a5'[i..i+nn]*b5[i..i+nn]+a6'[i..i+nn]*b6[i..i+nn])+  */
								/*       (a7'[i..i+nn]*b7[i..i+nn]+a8'[i..i+nn]*b8[i..i+nn]))  */


/* BCE_JK worker capabilities */
/** D
 * enum wal_bce_jk_capabilities - BCE_JK possible worker capabilities 
 * @WAL_BCE_JK_CAP_VVER: worker supports getting version of HW (capabilities)
 * @WAL_BCE_JK_CAP_VZ2A: worker supports function copy vector    a[i] <= z[j]
 * @WAL_BCE_JK_CAP_VB2A: worker supports function copy vector    a[i] <= b[j]
 * @WAL_BCE_JK_CAP_VZ2B: worker supports function copy vector    b[i] <= z[j]
 * @WAL_BCE_JK_CAP_VA2B: worker supports function copy vector    b[i] <= a[j]
 * @WAL_BCE_JK_CAP_VADD: worker supports function add vectors    z[i] <= a[j] + b[k]
 * @WAL_BCE_JK_CAP_VADD_BZ2A: worker supports function add vectors    a[i] <= b[j] + z[k]
 * @WAL_BCE_JK_CAP_VADD_AZ2B: worker supports function add vectors    b[i] <= a[j] + z[k]
 * @WAL_BCE_JK_CAP_VSUB: worker supports function sub vectors    z[i] <= a[j] - b[k]
 * @WAL_BCE_JK_CAP_VSUB_BZ2A: worker supports function sub vectors    a[i] <= b[j] - z[k]
 * @WAL_BCE_JK_CAP_VSUB_AZ2B: worker supports function sub vectors    b[i] <= a[j] - z[k]
 * @WAL_BCE_JK_CAP_VMULT: worker supports function mult vectors   z[i] <= a[j] * b[k]
 * @WAL_BCE_JK_CAP_VMULT_BZ2A: worker supports function mult vectors   a[i] <= b[j] * z[k]
 * @WAL_BCE_JK_CAP_VMULT_AZ2B: worker supports function mult vectors   b[i] <= a[j] * z[k]
 * @WAL_BCE_JK_CAP_VPROD: worker supports function vector product z    <= a'[i..i+nn]*b[i..i+nn]
 * @WAL_BCE_JK_CAP_VMAC: worker supports function vector MAC     z[i] <= z[i] - a[j]*b[k] 1..13.
 * @WAL_BCE_JK_CAP_VMSUBAC: worker supports function vector MSUBAC  z[i] <= z[i] - a[j]*b[k] 1..13.
 * @WAL_BCE_JK_CAP_VPROD_S2: worker supports function vector product extended
 *                           z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn])
 *                           the same flag is for SIMD 4 (WAL_BCE_JK_CAP_VPROD_S4)
 *                           and the same flag is for SIMD 8 (WAL_BCE_JK_CAP_VPROD_S8)
 * @WAL_BCE_JK_CAP_VFP2SP: worker supports function vector conversion from proprietary FP to 32m24 single precision FP
 * @WAL_BCE_JK_CAP_VSP2FP: worker supports function vector conversion from 32m24 single precision FP to proprietary FP
 * @WAL_BCE_JK_CAP_VDIV: worker supports function vector division 
 */
enum wal_bce_jk_capabilities {
	WAL_BCE_JK_CAP_VVER       = 0x00000001,	/* version of HW */
	WAL_BCE_JK_CAP_VZ2A       = 0x00000002,	/* copy vector    a[i] <= z[j] */
	WAL_BCE_JK_CAP_VB2A       = 0x00000004,	/* copy vector    a[i] <= b[j] */
	WAL_BCE_JK_CAP_VZ2B       = 0x00000008,	/* copy vector    b[i] <= z[j] */
	WAL_BCE_JK_CAP_VA2B       = 0x00000010,	/* copy vector    b[i] <= a[j] */
	WAL_BCE_JK_CAP_VADD       = 0x00000020,	/* add vectors    z[i] <= a[j] + b[k] */
	WAL_BCE_JK_CAP_VADD_BZ2A  = 0x00000040,	/* add vectors    a[i] <= b[j] + z[k] */
	WAL_BCE_JK_CAP_VADD_AZ2B  = 0x00000080,	/* add vectors    b[i] <= a[j] + z[k] */
	WAL_BCE_JK_CAP_VSUB       = 0x00000100,	/* sub vectors    z[i] <= a[j] - b[k] */
	WAL_BCE_JK_CAP_VSUB_BZ2A  = 0x00000200,	/* sub vectors    a[i] <= b[j] - z[k] */
	WAL_BCE_JK_CAP_VSUB_AZ2B  = 0x00000400,	/* sub vectors    b[i] <= a[j] - z[k] */
	WAL_BCE_JK_CAP_VMULT      = 0x00000800,	/* mult vectors   z[i] <= a[j] * b[k] */
	WAL_BCE_JK_CAP_VMULT_BZ2A = 0x00001000,	/* mult vectors   a[i] <= b[j] * z[k] */
	WAL_BCE_JK_CAP_VMULT_AZ2B = 0x00002000,	/* mult vectors   b[i] <= a[j] * z[k] */
	WAL_BCE_JK_CAP_VPROD      = 0x00004000,	/* vector product z    <= a'[i..i+nn]*b[i..i+nn] */
	WAL_BCE_JK_CAP_VMAC       = 0x00008000,	/* vector MAC     z[i] <= z[i] - a[j]*b[k] 1..13. */
	WAL_BCE_JK_CAP_VMSUBAC    = 0x00010000,	/* vector MSUBAC  z[i] <= z[i] - a[j]*b[k] 1..13. */
	WAL_BCE_JK_CAP_VPROD_S2   = 0x00020000,	/* vector product extended  */
										    /* z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn])  */
	WAL_BCE_JK_CAP_VFP2SP     = 0x00040000,	/* vector conversion from proprietary FP to 32m24 single precision FP */
	WAL_BCE_JK_CAP_VSP2FP     = 0x00080000,	/* vector conversion from 32m24 single precision FP to proprietary FP */
	WAL_BCE_JK_CAP_VDIV       = 0x00100000,	/* vector division  */
};
#define		WAL_BCE_JK_CAP_VPROD_S4	(WAL_BCE_JK_CAP_VPROD_S2)	/* vector product extended              */
										/* z <= (a1'[i..i+nn]*b1[i..i+nn] + a2'[i..i+nn]*b2[i..i+nn])+  */
										/*      (a3'[i..i+nn]*b3[i..i+nn] + a4'[i..i+nn]*b4[i..i+nn])   */
#define		WAL_BCE_JK_CAP_VPROD_S8	(WAL_BCE_JK_CAP_VPROD_S2)	/* vector product extended              */
										/* z <= ((a1'[i..i+nn]*b1[i..i+nn]+a2'[i..i+nn]*b2[i..i+nn])+   */
										/*       (a3'[i..i+nn]*b3[i..i+nn]+a4'[i..i+nn]*b4[i..i+nn]))   */
										/*		+                                                       */
										/*      ((a5'[i..i+nn]*b5[i..i+nn]+a6'[i..i+nn]*b6[i..i+nn])+   */
										/*       (a7'[i..i+nn]*b7[i..i+nn]+a8'[i..i+nn]*b8[i..i+nn]))   */



#define		WAL_BCE_JK_MASK_CAP	(0x3fffffff)
#define		WAL_BCE_JK_MASK_LIC	(0xc0000000)
#define		WAL_BCE_JK_SHIFT_CAP	(0)
#define		WAL_BCE_JK_SHIFT_LIC	(30)
/******************************************************************************/
/* BCE_JK specific functions */

int wal_bce_jk_create_operation(wal_worker_t *wrk, unsigned int pbid, unsigned int op, unsigned int a, unsigned int as, unsigned int b, 
                                unsigned int bs, unsigned int z, unsigned int zs, unsigned int ah, unsigned int bh, 
                                unsigned int zh, unsigned int inca, unsigned int incb, unsigned int incz, unsigned int nn);

int wal_bce_jk_create_op(wal_worker_t *wrk, unsigned int pbid, unsigned int op, unsigned int nn);
int wal_bce_jk_sync_operation(wal_worker_t *wrk);

/* name of the BCE_JK description structure must be wal_<name of CE>_description_structure */
extern const struct wal_family_desc wal_BCE_JK_FP32M24_description_structure;

#endif /* _WAL_BCE_HEADER_FILE_ */
