//==============================================================================
/*! \file
 * Medical Data Segmentation Toolkit (MDSTk)    \n
 * Copyright (c) 2003-2006 by Michal Spanel     \n
 *
 * Authors: Michal Spanel, spanel@fit.vutbr.cz  \n
 * File:    mdsEM.hxx                           \n
 * Section: libAlgorithm                        \n
 * Date:    2006/08/29                          \n
 *
 * $Id: mdsEM.hxx 331 2007-04-17 14:43:06Z spanel $
 *
 * Description:
 * - Gaussian Mixture Model (GMM) optimization using maximal likelihood
 *   estimation via the Expection-Maximization (EM) algorithm.
 */


//==============================================================================
/*
 * Data members of the CMaxLikelihoodByEM class.
 */

template <class C, tSize N>
const double CMaxLikelihoodByEM<C,N>::DEFAULT_MIN_ADD_CHANGE  = 0.001;

template <class C, tSize N>
const double CMaxLikelihoodByEM<C,N>::MIN_CHANGE              = 1.0e-6;
//template <class C, tSize N>
//const double CMaxLikelihoodByEM<C,N>::MIN_CHANGE              = 1.0e-3;

template <class C, tSize N>
const double CMaxLikelihoodByEM<C,N>::SPLITTING_COEFF         = 0.1;

template <class C, tSize N>
const tSize CMaxLikelihoodByEM<C,N>::MAX_ITERS                = 10;
//template <class C, tSize N>
//const tSize CMaxLikelihoodByEM<C,N>::MAX_ITERS                = 7;

//template <class C, tSize N>
//const tSize CMaxLikelihoodByEM<C,N>::MAX_ITERS2               = 10;
//template <class C, tSize N>
//const tSize CMaxLikelihoodByEM<C,N>::MAX_ITERS2               = 7;
template <class C, tSize N>
const tSize CMaxLikelihoodByEM<C,N>::MAX_ITERS2               = 5;

template <class C, tSize N>
const double CMaxLikelihoodByEM<C,N>::RAND_NOISE_COEFF        = 0.5;
//template <class C, tSize N>
//const double CMaxLikelihoodByEM<C,N>::RAND_NOISE_COEFF        = 0.3;


//==============================================================================
/*
 * Implementation of the CMaxLikelihoodByEM class.
 */
template <class C, tSize N>
CMaxLikelihoodByEM<C,N>::CMaxLikelihoodByEM(double dMinAddChange)
    : m_dMinAddChange(dMinAddChange)
{
    MDS_ASSERT(m_dMinAddChange > 0.0);
}


template <class C, tSize N>
bool CMaxLikelihoodByEM<C,N>::execute(const tContainer& Input)
{
    // Number of samples
    tConstIterator it = Input.getBegin();
    tSize NumOfSamples = it.getDistance(Input.getEnd());

    // Check the number of samples
    MDS_CHECK(NumOfSamples > 0, return false);

    // Check size of the input vectors
    MDS_CHECK(NUM_OF_DIMENSIONS <= makeVector(*it).getSize(), return false);

    // Initialize the number of components
    tSize NumOfMixtures = 1;

    // Initial log-likelihood
//    double dLogLikelihood = 1.0;
    
    // Initial MDL
//    double dMDL = -DBL_MAX;
    double dMDL = DBL_MAX;
    MDS_LOG_NOTE("Initial MDL = " << dMDL);

    // Resize the support maps
    m_Maps.create(NumOfMixtures, NumOfSamples);

    // Resize the vector of gaussian compontents
    m_Components.create(NumOfMixtures);

    // Initialize the first component
    initFirstComponent(Input);

    // Best support maps
    tMaps BestMaps;
    tComponents BestComponents;

    // Test various numbers of clusters
    for( ;; )
    {
        // Compute the EM
        double dNewValue = iterateEM(Input, MIN_CHANGE);

        // Estimate change of the log-likelihood
/*        double dDelta = getAbs(dNewValue / dLogLikelihood - 1.0);

        // Estimate changes
        if( dDelta < m_dMinAddChange )
        {
            break;
        }*/
        
        // Estimate the new MDL
        double dNewMDL = computeMDL(dNewValue, NumOfSamples);
        MDS_LOG_NOTE("MDL = " << dNewMDL);

        // Estimate changes
//        if( dNewMDL < dMDL )
        if( dNewMDL > dMDL )
        {
            break;
        }

        // Save the current support maps
        BestMaps.create(NumOfMixtures, NumOfSamples);
        BestMaps = m_Maps;

        // Update current log-likelihood
//        dLogLikelihood = dNewValue;

        // Update current MDL
        dMDL = dNewMDL;

        // Find the largest component
        tSize ComponentToSplit = findComponent();

        // Increment the number of components
        ++NumOfMixtures;

        // Resize support maps
        m_Maps.create(NumOfMixtures, NumOfSamples);

        // Resize vector of components
        BestComponents.create(m_Components);
        m_Components.create(NumOfMixtures);
        m_Components = BestComponents;

        // Split the largest component
        splitComponent(ComponentToSplit, NumOfMixtures - 1);
    }

    // Use the best result
    --NumOfMixtures;
    m_Maps.create(BestMaps);
    m_Components.create(BestComponents);

    // O.K.
    return true;
}


template <class C, tSize N>
bool CMaxLikelihoodByEM<C,N>::execute(const tContainer& Input, tSize NumOfClasses)
{
    // Number of samples
    tConstIterator it = Input.getBegin();
    tSize NumOfSamples = it.getDistance(Input.getEnd());

    // Check the number of samples
    MDS_CHECK(NumOfSamples > 0, return false);

    // Check size of the input vectors
    MDS_CHECK(NUM_OF_DIMENSIONS <= makeVector(*it).getSize(), return false);

    // Initialize the number of clusters
    tSize NumOfMixtures = 1;

    // Resize the support maps
    m_Maps.create(NumOfMixtures, NumOfSamples);

    // Resize the vector of segment parameters
    m_Components.create(NumOfMixtures);

    // Initialize the first component
    initFirstComponent(Input);

    // Test various numbers of clusters
    for( int c = 0; c < NumOfClasses; ++c )
    {
        // Training
        iterateEM(Input, MIN_CHANGE);

        // Do not split the last component
        if( NumOfMixtures == NumOfClasses )
        {
            break;
        }

        // Find the largest component
        tSize ComponentToSplit = findComponent();

        // Increment the number of components
        ++NumOfMixtures;

        // Resize support maps
        m_Maps.create(NumOfMixtures, NumOfSamples);

        // Resize vector of components
        tComponents Old(m_Components);
        m_Components.create(NumOfMixtures);
        m_Components = Old;

        // Split the largest component
        splitComponent(ComponentToSplit, NumOfMixtures - 1);
    }

    // O.K.
    return true;
}


template <class C, tSize N>
bool CMaxLikelihoodByEM<C,N>::getMembership(tSize i, tVector& Membership)
{
    MDS_CHECK(i >= 0 && i < m_Maps.getNumOfCols(), return false);
    
    tVector Temp(m_Maps, i, COL_REFERENCE);
    Membership.create(Temp);
    
    return true;
}


template <class C, tSize N>
tSize CMaxLikelihoodByEM<C,N>::getMembership(tSize i)
{
    MDS_CHECK(i >= 0 && i < m_Maps.getNumOfCols(), return -1);

    tSize Max = 0;
    for( mds::tSize j = 1; j < m_Maps.getNumOfRows(); ++j )
    {
        if( m_Maps(j, i) > m_Maps(Max, i) )
        {
            Max = j;
        }
    }
    return Max;
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::clearSupportMaps()
{
    m_Maps.zeros();
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::initComponentMean(tSize i, const tContainer& Input)
{
    static const tSize MIN_NUM_OF_SAMPLES = 100;
    static const tSize MAX_NUM_OF_SAMPLES = 1000;

    tSize NumOfSamples = m_Maps.getNumOfCols();

    // Randomly choose a small subset of samples
    tSize First = round2Int(m_Uniform.random(0, NumOfSamples -1 ));
    tSize Size = getMin(round2Int(m_Uniform.random(MIN_NUM_OF_SAMPLES, MAX_NUM_OF_SAMPLES)), NumOfSamples - First);

    // Mean
    tMean Mean(0.0);

    // Find first sample of the subset
    tConstIterator it = Input.getBegin();
    for( tSize j = 0; j < First; ++j )
    {
        ++it;
    }

    // Compute mean of tetrahedron features
    for( tSize k = 0; k < Size; ++k )
    {
        Mean += makeVector(*it);
    }

    // Mean
    Mean *= 1.0 / Size;

    // Initialize the mixture component
    m_Components(i).setMean(Mean);
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::initComponentCov(tSize i)
{
    static const double dConst = 1.0 / (2 * tComponent::NUM_OF_DIMENSIONS);

    tComponent& Component = m_Components(i);

    double dMin = DBL_MAX;
    for( tSize j = 0; j < m_Components.getSize(); ++j )
    {
        if( j != i )
        {
            tMean Temp = Component.getMean();
            Temp -= m_Components(j).getMean();
            double dValue = getProd<double>(Temp, Temp);
            if( dValue < dMin )
            {
                dMin = dValue;
            }
        }
    }
    dMin *= dConst;

    for( tSize m = 0; m < tComponent::NUM_OF_DIMENSIONS; ++m )
    {
        Component.setCov(m, dMin);
    }
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::initComponentWeight(tSize i, double dWeight)
{
    m_Components(i).setWeight(dWeight);
}


template <class C, tSize N>
bool CMaxLikelihoodByEM<C,N>::initFirstComponent(const tContainer& Input)
{
    MDS_CHECK(m_Components.getSize() == 1 && m_Maps.getNumOfCols() > 0, return false);

    // Compute mean value of all tetrahedrons
    tMean Mean(0.0);
    tConstIterator it = Input.getBegin();
    tConstIterator itEnd = Input.getEnd();    
    for( ; it != itEnd; ++it )
    {
        inplaceAdd(Mean, makeVector(*it));
    }

    // Mean
    double dInvNum = 1.0 / m_Maps.getNumOfCols();
    Mean *= dInvNum;

    // Compute covariance matrix of all input values
    tCov Cov(0.0);
    for( it = Input.getBegin(); it != itEnd; ++it )
    {
        tMean Diff(makeVector(*it));
        Diff -= Mean;

        // Covariance matrix
        for( tSize m = 0; m < tComponent::NUM_OF_DIMENSIONS; ++m )
        {
            Cov(m) += Diff(m) * Diff(m);
        }
    }

    // Mean
    Cov *= dInvNum;

    // Initialize the mixture component
    m_Components(0).setMean(Mean);
    m_Components(0).setCov(Cov);
    m_Components(0).setWeight(1.0);
    
    // O.K.
    return true;
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::addRandomNoise(double dNoiseCoeff)
{
    for( tSize i = 0; i < m_Components.getSize(); ++i )
    {
        tMean& Mean = m_Components(i).getMean();
        tCov& Cov = m_Components(i).getCov();
    
        for( tSize j = 0; j < tComponent::NUM_OF_DIMENSIONS; ++j )
        {
            Mean(j) += m_Normal.random(0.0, dNoiseCoeff * std::sqrt(Cov(j)));
        }
    }
}


template <class C, tSize N>
double CMaxLikelihoodByEM<C,N>::computeLogLikelihood(const tContainer& Input)
{
    // Value stored in the logarithmic space
    CLogNum<double> Result = 1.0;

    // Traverse input data
    tMean Mean(0.0);
    tConstIterator itEnd = Input.getEnd();    
    for( tConstIterator it = Input.getBegin(); it != itEnd; ++it )
    {
        // Sum of probabilities
        double dSum = 0.0;
        for( tSize j = 0; j < m_Components.getSize(); ++j )
        {
            dSum += m_Components(j).getWeightedValue(makeVector(*it));
        }
        Result *= dSum;
    }

    return Result.get(LOG_VALUE);
}


template <class C, tSize N>
double CMaxLikelihoodByEM<C,N>::computeMDL(double dLogLikelihood, tSize NumOfSamples)
{
    tSize k = getNumOfComponents();
    tSize d = getNumOfDimensions();

//    return dLogLikelihood - 0.5 * (k - 1 + 2 * k * d) * std::log(double(NumOfSamples));
    return -2.0 * dLogLikelihood + (k - 1 + 2 * k * d) * std::log(double(NumOfSamples));
}


template <class C, tSize N>
bool CMaxLikelihoodByEM<C,N>::areSeparated(tSize i, tSize j, int c)
{
    MDS_CHECK(i >= 0 && i < m_Components.getSize(), return false);
    MDS_CHECK(j >= 0 && j < m_Components.getSize(), return false);

    tMean& Mean1 = m_Components(i).getMean();
    tCov& Cov1 = m_Components(i).getCov();

    tMean& Mean2 = m_Components(j).getMean();
    tCov& Cov2 = m_Components(j).getCov();

    double dMaxTrace = getMax(getSum<double>(Cov1), getSum<double>(Cov2));
    double dMax = c * std::sqrt(dMaxTrace);

    tMean M = Mean1;
    M -= Mean2;
    double dValue = std::sqrt(getSumOfSquares<double>(M));

#ifdef EM_LOGGING_ENABLED
    MDS_LOG_NOTE("CMaxLikelihoodByEM::areSeparated(): " << dValue << " >= " << dMax);
#endif // EM_LOGGING_ENABLED

    return (dValue >= dMax);
}


template <class C, tSize N>
int CMaxLikelihoodByEM<C,N>::getDegreeOfSeparation(tSize NumOfComponents)
{
    static const int MIN = 4;
    static const int DIV = 2;

    if( NumOfComponents <= MIN )
    {
        return 1;
    }

    return 1 + (NumOfComponents - MIN) / DIV;
}


template <class C, tSize N>
tSize CMaxLikelihoodByEM<C,N>::findComponent()
{
    MDS_ASSERT(m_Components.getSize() > 0);

    if( m_Components.getSize() == 1 )
    {
        return 0;
    }

    double dMaxValue = getMult<double>(m_Components(0).getCov());

    tSize Max = 0;
    for( tSize i = 1; i < m_Components.getSize(); ++i )
    {
        double dValue = getMult<double>(m_Components(i).getCov());
//        double dValue = getMax<double>(m_Components(i).getCov());
        if( dValue > dMaxValue )
        {
            Max = i;
            dMaxValue = dValue;
        }
    }

    return Max;
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::splitComponent(tSize Index, tSize NewIndex)
{
    MDS_ASSERT(Index < m_Components.getSize() && NewIndex < m_Components.getSize());

    tMean& Mean = m_Components(Index).getMean();
    tCov& Cov = m_Components(Index).getCov();
    double dWeight = m_Components(Index).getWeight();

    // Find maximum
    tSize Max = 0;
    double dMaxValue = Cov(0);
    for( tSize i = 1; i < tComponent::NUM_OF_DIMENSIONS; ++i )
    {
        if( Cov(i) > dMaxValue )
        {
            Max = i;
            dMaxValue = Cov(i);
        }
    }

    // New values
    double dNewMean = SPLITTING_COEFF * std::sqrt(dMaxValue);
    double dNewWeight = 0.5 * dWeight;

    // Create a new component
    m_Components(NewIndex).setMean(Mean);
    m_Components(NewIndex).setCov(Cov);

    // Modify the original component
    Mean(Max) += dNewMean;
    m_Components(Index).setWeight(dNewWeight);

    // Modify the new component
    m_Components(NewIndex).getMean().get(Max) -= dNewMean;
    m_Components(NewIndex).setWeight(dNewWeight);
}


template <class C, tSize N>
bool CMaxLikelihoodByEM<C,N>::checkForSimilarComponents()
{
    if( m_Components.getSize() < 2 )
    {
        return false;
    }

    // Compute degree of separation
    int iC = getDegreeOfSeparation(m_Components.getSize());

    // Check similarity of all possible pairs of components
    for( tSize i = 0; i < m_Components.getSize(); ++i )
    {
        for( tSize j = i + 1; j < m_Components.getSize(); ++j )
        {
            // Degree of separation
            if( !areSeparated(i, j, iC) )
            {
                // Splitting of the largest component
                tSize k = findComponent();
                if( k != j )
                {
                    splitComponent(k, j);
                }
                else
                {
                    splitComponent(k, i);
                }

                // Terminate the function
                // - Just one component could be reinitialized at the moment
                return true;
            }
        }
    }

    // No change
    return false;
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::processEStep(const tContainer& Input)
{
    // Clear all support maps
    clearSupportMaps();

    // Traverse all triangles
    tConstIterator it = Input.getBegin();
    tConstIterator itEnd = Input.getEnd();
    for( tSize j = 0; it != itEnd; ++j, ++it )
    {
        // Add probabilities to the support maps
        double dSum = 0.0;
        tSize i;
        for( i = 0; i < m_Components.getSize(); ++i )
        {
            double dTemp = m_Components(i).getWeightedValue(makeVector(*it));
            m_Maps(i,j) += dTemp;
            dSum += dTemp;
        }

        // Invert the computed sum
        double dInvSum = (dSum > 0.0) ? 1.0 / dSum : 1.0;

        // Divide the value in each support map
        for( i = 0; i < m_Components.getSize(); ++i )
        {
            m_Maps(i,j) *= dInvSum;
        }
    }
}


template <class C, tSize N>
void CMaxLikelihoodByEM<C,N>::processMStep(const tContainer& Input)
{
    // Helper value
    double dInvNum = 1.0 / m_Maps.getNumOfCols();

    // For each segment form new values of its parameters
    for( tSize i = 0; i < m_Components.getSize(); ++i )
    {
        // Mixture component mean
        tMean& Mean = m_Components(i).getMean();

        // Clear the mean value and covariance matrix
        tMean NewMean(0.0);
        tCov NewCov(0.0);

        // Initialize sums
        double dPSum = 0.0;

        // For each sample
        tConstIterator it = Input.getBegin();
        tConstIterator itEnd = Input.getEnd();
        for( tSize j = 0; it != itEnd; ++j, ++it )
        {
            dPSum += m_Maps(i, j);

            inplaceMultAdd(NewMean, makeVector(*it), m_Maps(i, j));
            
            tMean Temp = makeVector(*it);
            Temp -= Mean;
            for( tSize m = 0; m < tComponent::NUM_OF_DIMENSIONS; ++m )
            {
                NewCov(m) += Temp(m) * Temp(m) * m_Maps(i, j);
            }
        }

        // Safe invert of the sum
        double dInvPSum = (dPSum > 0.0) ? 1.0 / dPSum : 1.0;

        // Final values of the component mean and covariance matrix
        NewMean *= dInvPSum;
        NewCov *= dInvPSum;

        // Update the current component
        m_Components(i).setWeight(dPSum * dInvNum);
        m_Components(i).setMean(NewMean);
        m_Components(i).setCov(NewCov);

        // Component inverse of the covariance matrix
        m_Components(i).computeInverse();
    }
}


template <class C, tSize N>
double CMaxLikelihoodByEM<C,N>::iterateEM(const tContainer& Input, double dMinChange)
{
    // Initial value of the log-likelihood function
    double dLogLikelihoodFunc = 1.0;

    // Initial coefficient of the random noise
    double dRandNoise = RAND_NOISE_COEFF;

    // Second level iterations
    for( tSize j = 0; j < MAX_ITERS2; ++j )
    {
        // Add random noise to every mixture component
        addRandomNoise(dRandNoise);
        
        // Modify the random noise coefficient
        dRandNoise *= RAND_NOISE_COEFF;

        // Re-initialization of similar components
//        checkForSimilarComponents();
        
        // Initial value of the log-likelihood function
        dLogLikelihoodFunc = 1.0;

        // Iterate while the function converges
        for( tSize i = 0; i < MAX_ITERS; ++i )
        {
            // E-step
            processEStep(Input);
    
            // M-step
            processMStep(Input);
    
            // Evaluate the log-likelihood function
            double dNewValue = computeLogLikelihood(Input);

            // Eestimate change of the log-likelihood function
            double dDelta = getAbs(dNewValue / dLogLikelihoodFunc - 1.0);

#ifdef EM_LOGGING_ENABLED
            MDS_LOG_NOTE("CMaxLikelihoodByEM::iterateEM()");
            MDS_LOG_NOTE("  Log-likelihood Function = " << dNewValue);
            MDS_LOG_NOTE("  Delta = " << dDelta);
#endif // EM_LOGGING_ENABLED

            // Estimate changes
            if( dDelta < dMinChange /*|| dNewValue < dMinChange*/ )
            {
                break;
            }
    
            // Update the current value
            dLogLikelihoodFunc = dNewValue;
        }
    }

#ifdef EM_LOGGING_ENABLED
    MDS_LOG_NOTE("CMaxLikelihoodByEM::iterateEM()");
    for( tSize k = 0; k < m_Components.getSize(); ++k )
    {
        MDS_LOG_NOTE("  Component " << k << ":");
        MDS_LOG_NOTE("    Weight = " << m_Components(k).getWeight());
        MDS_LOG_NOTE("    Mean = " << m_Components(k).getMean());
        MDS_LOG_NOTE("    Cov = " << m_Components(k).getCov());
    }
#endif // EM_LOGGING_ENABLED

    // Final log-likelihood function value
    return dLogLikelihoodFunc;
}

