//
// $Id$
//
// OpenCV interface for LBPDetector
//

#include "lbpdetector_cv.h"
#include "common.h"
#include <ThreadPool.h>
#include <algorithm>
#include <list>
#include <cmath>

using namespace std;
using namespace LBPDetector;

// Calculate triangle kernel for smoothing
static std::vector<float> triangleKernel(int k)
{
    // K = [1 2 ... k+1 k ... 1] / S
    // Where S is such that sum(K) = 1
    /*
    int d = 2 * k - 1;
    std::vector<float> K(d);
    for (int i =   0, f = 1; i < k; ++i, ++f) K[i] = f;
    for (int i = k-1, f = k-1; i < d; ++i, --f) K[i] = f;
    float sum = std::accumulate(std::begin(K), std::end(K), 0.f);
    for (auto & a : K) a /= sum;
    return K;
    */
    if (k == 0) return {1};
    if (k == 1) return {0.25, 0.5, 0.25};
    if (k == 2) return {1/9, 2/9, 3/9, 2/9, 1/9};
    return {1};
}

Detections lbpDetectImage(
    cv::Mat & im,
    LBPDetector::Detector * D, LBPDetector::ChnsParams & pChns, 
    float thr, int stride,
    int group, float overlap, float scaleX, float scaleY,
    unsigned int nThreads, int tile, bool useCache)
{
    /// Init image
    cv::Mat image;
    cv::Mat image_32f;
    cvtColor(im, image, CV_RGB2GRAY);
    image.convertTo(image_32f, CV_32F);
    cv::Size orig_sz = image.size();

    // Image Pyramid
    std::vector<cv::Mat> pyramid;
    std::vector<cv::Mat> pyramidShrink;
    std::list<Image> pyramid1;
    float scaleFactor = pow(2, -1/float(pChns.nPerOct));

    // Results
    vector<cv::Rect> bbs; vector<float> hs;
    unsigned long long nf = 0;
    unsigned long long ns = 0;
    float cacheEffeciency = 0.0f;

    auto colorKernel = triangleKernel(ceil(pChns.colorSmooth)); // Smoothing kernel
    auto pyrKernel = triangleKernel(ceil(pChns.pyrSmooth)); // Smoothing kernel

    // Limit threads to 1..max
    nThreads = std::max(1U, std::min(std::thread::hardware_concurrency(), nThreads));
    
    if (nThreads > 0) // Multithread version
    {
        ThreadPool pool(nThreads);
        std::list< std::future<ScalarResult> > results;
        for (int k = 0; k < 48; ++k)
        {
            cv::Mat img, imgShrink;

            int w, h;
            if (k < pChns.nPerOct)
            {
                w = ceil(pow(scaleFactor, k) * orig_sz.width);
                h = ceil(pow(scaleFactor, k) * orig_sz.height);
                cv::resize(image_32f, img, cv::Size(w,h), cv::INTER_LINEAR);
            } 
            else
            {
                cv::pyrDown(pyramid[k-pChns.nPerOct], img);
                cv::Size sz = img.size();
                w = sz.width;
                h = sz.height;
            }
            if (w <= D->wdim0 || h <= D->wdim1) break;
            if (pChns.colorSmooth > 0)
            {
                cv::sepFilter2D(img, img, -1, colorKernel, colorKernel);
            }
            if (pChns.shrink > 0)
            {
                cv::resize(img, imgShrink, cv::Size(ceil(w/pChns.shrink),ceil(h/pChns.shrink)), cv::INTER_AREA);
            }
            else
            {
                imgShrink = img;
            }
            if (pChns.pyrSmooth > 0)
            {
                cv::sepFilter2D(imgShrink, imgShrink, -1, pyrKernel, pyrKernel);
            }
            pyramid.push_back(img);
            pyramidShrink.push_back(imgShrink);
            pyramid1.push_back(Image((float*)(imgShrink.data), imgShrink.cols, imgShrink.rows));
            auto & im_to_scan = pyramid1.back();
            int max_x = im_to_scan.dims[0] - D->wdim0 - 1;
            int max_y = im_to_scan.dims[1] - D->wdim1 - 1;
            float scale = orig_sz.width / float(im_to_scan.dims[0]);
            for (int y = 0; y < max_y; y+=tile)
            {
                for (int x = 0; x < max_x; x+=tile)
                {
                    results.emplace_back(pool.enqueue(
                        LBPDetector::scalarScanImage, std::cref(im_to_scan), x, y, min(x+tile, max_x), min(y+tile, max_y), stride, std::cref(*D), scale, useCache));
                } // Image loop
            }
        } // Pyramid loop
        for (auto && result: results)
        {
            ScalarResult r = result.get();
            nf += r.nf; ns += r.ns; cacheEffeciency += r.cacheEffeciency;
            for (size_t i = 0; i < r.bbs.size(); ++i)
            {
                if (r.hs[i] < thr) continue;
                BB & b = r.bbs[i];
                float scale = b.scale;
                bbs.push_back(cv::Rect(b.x*scale, b.y*scale, b.width*scale, b.height*scale));
                hs.push_back(r.hs[i]);
            }
        }
        cacheEffeciency /= results.size();
    }

    // Non-maxima suppression
    if (group > 0)
    {
        nmsMax(bbs, hs, overlap, group);
    }

    // Optionaly rescale detection windows
    if (scaleX != 1.0f || scaleY != 1.0f)
    {
        for (auto && bb: bbs)
        {
            bb.x += (bb.width - scaleX * bb.width)/2;
            bb.y += (bb.height - scaleY * bb.height)/2;
            bb.width *= scaleX;
            bb.height *= scaleY;
        }
    }
    return {bbs, hs, nf, ns, cacheEffeciency};
}