#include "WaveletOpenclParamComb.h"

float wavelet_type_cdf97_coef[] = { -1.58613434342059f, -0.0529801185729f, 0.8829110755309f, 0.4435068520439f };
float wavelet_type_cdf97_norm = 1.1496043988602f;

float wavelet_type_cdf53_coef[] = { -0.5f, 0.25f };
float wavelet_type_cdf53_norm = 1.4142135623730f;

float wavelet_type_cdf137_coef[] = { -0.5625f, 0.0625f, -0.28125f, 0.03125f };
float wavelet_type_cdf137_norm = 1.0f;

WaveletOpenclParamComb::WaveletOpenclParamComb() : WaveletOpenclParam(ENGINE_TYPE_OPENCL_COMB)
  {
    this->clear();
  }
/**
  * Set initial undefined value.
  */
void WaveletOpenclParamComb::clear()
  {
    this->WaveletOpenclParam::clear();
    this->comb_sizes = proc_dim(32,8);
    this->comb_kernel = std::string("wavelet_block_in_explosive_improved");
    this->comb_hor_corners_proc = 0;
  }
/**
  * Get program build parameters.
	* @param block_size threads in block count
	* @param stat_creating statistics creating type
	* @return parameters
  */
std::string WaveletOpenclParamComb::createBuildParam()
  {
    std::ostringstream str;

    str << this->WaveletOpenclParam::createBuildParam()
        << " -D COMB_BLOCK_SIZE=" << this->comb_sizes.count()
        << " -D COMB_BLOCK_SIZE_X=" << this->comb_sizes.x
        << " -D COMB_HOR_CORNERS_PROC=" << this->comb_hor_corners_proc
        << " -D " << this->comb_kernel
        << this->getFirDefs();
    #if DEBUG_LEVEL == DEBUG_ALL
      this->printDebug();
    #endif
    //fprintf(stderr, str.str().c_str());
    return str.str();
  }
/**
  * Print debug information
  */
void WaveletOpenclParamComb::printDebug()
  {
    fprintf(stderr, " comb kernel name:        %s\n"
                    " comb kernel local sizes: %dx%d\n"
                    " comb corners process:    %s\n", this->comb_kernel.c_str(), this->comb_sizes.x, this->comb_sizes.y, (this->comb_hor_corners_proc == 1) ? "yes" : "no");
  }

std::string WaveletOpenclParamComb::getFirDef(std::vector<float> &fir, std::string &filter_name)
{
  std::stringstream out_data;
  out_data.setf(std::ios_base::showpoint);
  out_data << " -D " << filter_name << "={";
  for(int i = 0; i < fir.size(); i++)
  {
    if(i != 0) out_data << ",";
    out_data << fir[i] << "f";
  }
  out_data << "}";
  return out_data.str();
}

/*void WaveletOpenclParamComb::createFirFilter(int stages, int filter_length, float *coef, cv::Mat &fir_ll_m, cv::Mat &fir_hl_m, cv::Mat &fir_lh_m, cv::Mat &fir_hh_m, bool improved)
{
  int half_filter_size = 2 * stages * (2 * filter_length - 1);
  int filter_size = (1 + 2 * half_filter_size);
  int matrix_size = filter_size + 1;
  int filter_start = filter_length * 2 - 1;

  cv::Mat fir_1D = cv::Mat::eye(matrix_size, matrix_size, CV_32F);

  //std::cerr << fir_1D << std::endl;
  for(int j = filter_start, coef_id = 0; j <= half_filter_size; j += filter_length * 2 - 1, coef_id++)
  {
    cv::Mat fir_1D_stage = cv::Mat::eye(matrix_size, matrix_size, CV_32F);
    for(int i = j; i < filter_size - j; i += 2)
    {
      for(int k = 0; k < filter_length; k++)
      {
        if((!improved) || (k != 0) || (j != half_filter_size)) fir_1D_stage.at<float>(i + (k * 2 + 1), i) = coef[coef_id*filter_length + k];
        if((!improved) || (k != 0) || (j != filter_start)) fir_1D_stage.at<float>(i - (k * 2 + 1), i) = coef[coef_id*filter_length + k];
      }
    }
    std::cerr << std::endl << fir_1D_stage << std::endl;
    fir_1D *= fir_1D_stage;
    std::cerr << std::endl << fir_1D << std::endl;
  }
  fir_ll_m = fir_1D.col(half_filter_size) * fir_1D.col(half_filter_size).t();
  fir_hl_m = fir_1D.col(half_filter_size + 1) * fir_1D.col(half_filter_size).t();
  fir_lh_m = fir_1D.col(half_filter_size) * fir_1D.col(half_filter_size + 1).t();
  fir_hh_m = fir_1D.col(half_filter_size + 1) * fir_1D.col(half_filter_size + 1).t();
  std::cerr << std::endl << fir_ll_m << std::endl;
  std::cerr << std::endl << fir_hl_m << std::endl;
  std::cerr << std::endl << fir_lh_m << std::endl;
  std::cerr << std::endl << fir_hh_m << std::endl;
  int proc_count = 0;
  for(int j = 0; j < fir_ll_m.rows; j++)
    for(int i = 0; i < fir_ll_m.cols; i++)
      {
       if((fir_ll_m.at<float>(i, j) != 0.0f) && (i != half_filter_size || j != half_filter_size || fir_ll_m.at<float>(i, j) != 1.0f)) proc_count++;
       if((fir_hl_m.at<float>(i, j) != 0.0f) && (i != half_filter_size+1 || j != half_filter_size || fir_hl_m.at<float>(i, j) != 1.0f)) proc_count++;
       if((fir_lh_m.at<float>(i, j) != 0.0f) && (i != half_filter_size || j != half_filter_size+1 || fir_lh_m.at<float>(i, j) != 1.0f)) proc_count++;
       if((fir_hh_m.at<float>(i, j) != 0.0f) && (i != half_filter_size+1 || j != half_filter_size+1 || fir_hh_m.at<float>(i, j) != 1.0f)) proc_count++;
      }
    if(improved) proc_count += 8;
  fprintf(stderr, "\n nonzero coef count: %d\n", proc_count);
}*/

void WaveletOpenclParamComb::createFirFilter(int stages, int filter_length, float *coef, cv::Mat &fir_ll_m, cv::Mat &fir_hl_m, cv::Mat &fir_lh_m, cv::Mat &fir_hh_m, bool improved)
{
  int half_filter_size = 2 * stages * (2 * filter_length - 1);
  int filter_size = (1 + 2 * half_filter_size);
  int matrix_size = filter_size + 1;
  int filter_start = filter_length * 2 - 1;

  cv::Mat fir_1D = cv::Mat::eye(matrix_size, matrix_size, CV_32F);

  //fprintf(stderr, "\nMatrix calculation info:\n");
  //fprintf(stderr,   " horizontal matrix info:\n");
  //std::cerr << fir_1D << std::endl;
  for(int j = filter_start, coef_id = 0; j <= half_filter_size; j += filter_length * 2 - 1, coef_id++)
  {
    cv::Mat fir_1D_stage = cv::Mat::eye(matrix_size, matrix_size, CV_32F);
    for(int i = j; i < filter_size - j; i += 2)
    {
      for(int k = 0; k < filter_length; k++)
      {
        if((!improved) || (k != 0) || (j != half_filter_size)) {
        //if(((!improved) || (k != 0) || (j != half_filter_size)) && ((!improved) || j != filter_start)) {
          fir_1D_stage.at<float>(i + (k * 2 + 1), i) = coef[coef_id*filter_length + k];
        }
        if((!improved) || (k != 0) || (j != filter_start)) {
        //if(((!improved) || (k != 0) || (j != filter_start)) && ((!improved) || j != half_filter_size)) {
          fir_1D_stage.at<float>(i - (k * 2 + 1), i) = coef[coef_id*filter_length + k];
        }
        
      }
    }
    //std::cerr << std::endl << fir_1D_stage << std::endl;
    fir_1D *= fir_1D_stage;
    //std::cerr << std::endl << fir_1D << std::endl;
  }
  
  
  //fprintf(stderr, " vertical matrix info:\n");
  cv::Mat fir_1D2 = cv::Mat::eye(matrix_size, matrix_size, CV_32F);
  //std::cerr << fir_1D << std::endl;
  for(int j = filter_start, coef_id = 0; j <= half_filter_size; j += filter_length * 2 - 1, coef_id++)
  {
    cv::Mat fir_1D_stage = cv::Mat::eye(matrix_size, matrix_size, CV_32F);
    for(int i = j; i < filter_size - j; i += 2)
    {
      for(int k = 0; k < filter_length; k++)
      {
        if((!improved) || (k != 0) || (j != half_filter_size)) fir_1D_stage.at<float>(i + (k * 2 + 1), i) = coef[coef_id*filter_length + k];
        if((!improved) || (k != 0) || (j != filter_start)) fir_1D_stage.at<float>(i - (k * 2 + 1), i) = coef[coef_id*filter_length + k];
      }
    }
    //std::cerr << std::endl << fir_1D_stage << std::endl;
    fir_1D2 *= fir_1D_stage;
    //std::cerr << std::endl << fir_1D2 << std::endl;
  }

  fir_ll_m = fir_1D.col(half_filter_size) * fir_1D2.col(half_filter_size).t();
  fir_hl_m = fir_1D.col(half_filter_size + 1) * fir_1D2.col(half_filter_size).t();
  fir_lh_m = fir_1D.col(half_filter_size) * fir_1D2.col(half_filter_size + 1).t();
  fir_hh_m = fir_1D.col(half_filter_size + 1) * fir_1D2.col(half_filter_size + 1).t();
  //std::cerr << std::endl << fir_ll_m << std::endl;
  //std::cerr << std::endl << fir_hl_m << std::endl;
  //std::cerr << std::endl << fir_lh_m << std::endl;
  //std::cerr << std::endl << fir_hh_m << std::endl;
  int proc_count = 0;
  for(int j = 0; j < fir_ll_m.rows; j++)
    for(int i = 0; i < fir_ll_m.cols; i++)
      {
       if((fir_ll_m.at<float>(i, j) != 0.0f) && (i != half_filter_size || j != half_filter_size || fir_ll_m.at<float>(i, j) != 1.0f)) proc_count++;
       if((fir_hl_m.at<float>(i, j) != 0.0f) && (i != half_filter_size+1 || j != half_filter_size || fir_hl_m.at<float>(i, j) != 1.0f)) proc_count++;
       if((fir_lh_m.at<float>(i, j) != 0.0f) && (i != half_filter_size || j != half_filter_size+1 || fir_lh_m.at<float>(i, j) != 1.0f)) proc_count++;
       if((fir_hh_m.at<float>(i, j) != 0.0f) && (i != half_filter_size+1 || j != half_filter_size+1 || fir_hh_m.at<float>(i, j) != 1.0f)) proc_count++;
      }
    //if(improved) proc_count += 8;
  //fprintf(stderr, "\n nonzero coef count: %d\n", proc_count);
}

void WaveletOpenclParamComb::getFirData(int stages, int filter_length, float *coef, float norm_coef, std::vector<float> &fir_ll, std::vector<float> &fir_hl, std::vector<float> &fir_lh, std::vector<float> &fir_hh, bool improved)
{
  cv::Mat fir_ll_m;
  cv::Mat fir_hl_m;
  cv::Mat fir_lh_m;
  cv::Mat fir_hh_m;

  int half_filter_size = 2 * stages * (2 * filter_length - 1);
  int filter_size = (1 + 2 * half_filter_size);
  int matrix_size = filter_size + 1;
  int fir_filter_size = matrix_size >> 1;

  this->createFirFilter(stages, filter_length, coef, fir_ll_m, fir_hl_m, fir_lh_m, fir_hh_m, improved);

  fir_ll.resize(matrix_size * matrix_size);
  fir_hl.resize(matrix_size * matrix_size);
  fir_lh.resize(matrix_size * matrix_size);
  fir_hh.resize(matrix_size * matrix_size);

  for(int j = 0; j < fir_filter_size; j++)
  {
    for(int i = 0; i < fir_filter_size; i++)
    {
      for(int k = 0; k < 4; k++)
      {
        fir_ll[i + j * fir_filter_size + fir_filter_size * fir_filter_size * k] = fir_ll_m.at<float>(i * 2 + (k % 2), j * 2 + (k / 2)) * ((improved) ? 1.0f : (norm_coef * norm_coef));
        fir_hl[i + j * fir_filter_size + fir_filter_size * fir_filter_size * k] = fir_hl_m.at<float>(i * 2 + (k % 2), j * 2 + (k / 2));
        fir_lh[i + j * fir_filter_size + fir_filter_size * fir_filter_size * k] = fir_lh_m.at<float>(i * 2 + (k % 2), j * 2 + (k / 2));
        fir_hh[i + j * fir_filter_size + fir_filter_size * fir_filter_size * k] = fir_hh_m.at<float>(i * 2 + (k % 2), j * 2 + (k / 2)) / ((improved) ? 1.0f : (norm_coef * norm_coef));
      }
    }
  }
}

/*std::string WaveletOpenclParamComb::getFirStepBody(int stages, int filter_length, float *coef, float norm_coef, int iter_id, std::vector<float> *fir_ll, std::vector<float> *fir_hl, std::vector<float> *fir_lh, std::vector<float> *fir_hh, bool improved)
{
  float first_predict = coef[0];
  float last_update = coef[(stages - 1) * filter_length * 2 + filter_length];
  int half_filter_size = stages * (2 * filter_length - 1);
  int fir_filter_size = (1 + 2 * half_filter_size);

  int comb_block_loc_r = 1;
  int comb_block_loc_b = this->comb_sizes.x;
  int coef_part_size = fir_filter_size * fir_filter_size;
  std::vector<float> *fir[4] = {fir_ll, fir_hl, fir_lh, fir_hh};
  std::ostringstream step_body;
  step_body.setf(std::ios_base::showpoint);
  step_body << "{\n";
  step_body << "float tmp_val;\n";
  
  // calculation of predict for per-thread values
  if(improved)
    {
      step_body << "act_data.y += " << first_predict << " * act_data.x;\n"
                << "act_data.w += " << first_predict << " * act_data.z;\n\n"
                << "act_data.z += " << first_predict << " * act_data.x;\n"
                << "act_data.w += " << first_predict << " * act_data.y;\n\n";
    }
  
  // copy data to shared memory
  if((!this->double_buffering) && (iter_id != 0))
    {
      step_body << "barrier(CLK_LOCAL_MEM_FENCE);\n";
    }
  for(int l = 0; l < 4; l++)
    {
      step_body << "temp_image[" << ((4 * (iter_id % 2) + l) * this->comb_sizes.count()) << "] = act_data.s" << l << ";\n";
    }

  step_body << "barrier(CLK_LOCAL_MEM_FENCE);\n\n";

  // check necessity of temporary buffer
  bool temp_buffer_use = false;
  for(int l = 0; l < 4; l++)
    {
      int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
      for(int m = 0; m < 4; m++)
        {
          if(m == l) continue;
          float act_coef = (*(fir[m]))[tmp_index];
          temp_buffer_use = temp_buffer_use | (act_coef != 0.0f);
        }
    }
  // calculate per-thread values
  if(temp_buffer_use)
    {
      step_body << "float4 act_data2;\n"
                << "act_data2 = act_data;\n\n";

      for(int l = 0; l < 4; l++)
        {
          int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
          float act_coef = (*(fir[l]))[tmp_index];
          if(act_coef != 1.0f) step_body << "act_data.s" << l << " = " << act_coef << " * act_data2.s" << l << ";\n";
          else step_body << "act_data.s" << l << " = act_data2.s" << l << ";\n";
        }
      step_body << "\n";
      for(int l = 0; l < 4; l++)
        {
          int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
          for(int m = 0; m < 4; m++)
            {
              if(m == l) continue;
              float act_coef = (*(fir[m]))[tmp_index];
              if(act_coef != 0.0f) step_body << "act_data.s" << m << " += " << act_coef << " * act_data2.s" << l << ";\n";
            }
        }
      step_body << "\n";
    }
  else
    {
      for(int l = 0; l < 4; l++)
        {
          int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
          float act_coef = (*(fir[l]))[tmp_index];
          if(act_coef != 1.0f) step_body << "act_data.s" << l << " *= " << act_coef << ";\n";
        }
    }
  // inter thread calculation
  for(int j = -half_filter_size; j <= half_filter_size; j++)
    {
      for(int i = -half_filter_size; i <= half_filter_size; i++)
        {
          if((j == 0) && (i == 0)) continue;
          for(int l = 0; l < 4; l++)
            {
              int tmp_index = (i + half_filter_size) + fir_filter_size * (j + half_filter_size) + l * coef_part_size;
              if(((*fir_ll)[tmp_index] != 0.0f) || ((*fir_hl)[tmp_index] != 0.0f) || ((*fir_lh)[tmp_index] != 0.0f) || ((*fir_hh)[tmp_index] != 0.0f))
                {
                  step_body << "tmp_val = temp_image[" << ((int((4 * (iter_id % 2) + l) * this->comb_sizes.count())) + comb_block_loc_r * i + comb_block_loc_b * j) << "];\n";
                  for(int m = 0; m < 4; m++)
                    {
                      float act_coef = (*(fir[m]))[tmp_index];
                      if(act_coef != 0.0f) step_body << "act_data.s" << m << " += tmp_val * " << act_coef << ";\n";
                    }
                }
            }
        }
    }
  // calculation of update for per-thread values
  if(improved)
    {
      step_body << "act_data.x += " << last_update << " * act_data.y;\n"
                << "act_data.z += " << last_update << " * act_data.w;\n\n"
                << "act_data.x += " << last_update << " * act_data.z;\n"
                << "act_data.y += " << last_update << " * act_data.w;\n\n";
      // normalization constant
      if(norm_coef != 1.0f)
        {
          step_body << "act_data.x *= " << (norm_coef * norm_coef) << ";\n"
                    << "act_data.w *= " << (1.0 / (norm_coef * norm_coef)) << ";\n\n";
        }
    }
  step_body << "}\n";
  return step_body.str();
}*/

std::string WaveletOpenclParamComb::getFirStepBody(int stages, int filter_length, float *coef, float norm_coef, int iter_id, std::vector<float> *fir_ll, std::vector<float> *fir_hl, std::vector<float> *fir_lh, std::vector<float> *fir_hh, bool improved)
{
  float first_predict = coef[0];
  float last_update = coef[(stages - 1) * filter_length * 2 + filter_length];
  int half_filter_size = stages * (2 * filter_length - 1);
  int fir_filter_size = (1 + 2 * half_filter_size);

  int comb_block_loc_r = 1;
  int comb_block_loc_b = this->comb_sizes.x;
  int coef_part_size = fir_filter_size * fir_filter_size;
  std::vector<float> *fir[4] = {fir_ll, fir_hl, fir_lh, fir_hh};
  std::ostringstream step_body;
  step_body.setf(std::ios_base::showpoint);
  step_body << "{\n";
  step_body << "DECLARE_VAR(float,tmp_val);\n";
  
  // calculation of predict for per-thread values
  if(improved)
    {
      step_body << "act_data.y += " << first_predict << "f * act_data.x;\n"
                << "act_data.w += " << first_predict << "f * act_data.z;\n\n"
                << "act_data.z += " << first_predict << "f * act_data.x;\n"
                << "act_data.w += " << first_predict << "f * act_data.y;\n\n";
    }
  
  // copy data to shared memory
  if((!this->double_buffering) && (iter_id != 0))
    {
      step_body << "barrier(CLK_LOCAL_MEM_FENCE);\n";
    }
  for(int l = 0; l < 4; l++)
    {
      step_body << "temp_image[" << ((4 * (iter_id % 2) + l) * this->comb_sizes.count()) << "] = act_data.s" << l << ";\n";
    }

  step_body << "barrier(CLK_LOCAL_MEM_FENCE);\n\n";

  // check necessity of temporary buffer
  bool temp_buffer_use = false;
  for(int l = 0; l < 4; l++)
    {
      int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
      for(int m = 0; m < 4; m++)
        {
          if(m == l) continue;
          float act_coef = (*(fir[m]))[tmp_index];
          temp_buffer_use = temp_buffer_use | (act_coef != 0.0f);
        }
    }
  // calculate per-thread values
  if(temp_buffer_use)
    {
      step_body << "DECLARE_VAR(float4,act_data2);\n";
      step_body << "act_data2 = act_data;\n";

      for(int l = 0; l < 4; l++)
        {
          int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
          float act_coef = (*(fir[l]))[tmp_index];
          if(act_coef != 1.0f) step_body << "act_data.s" << l << " = " << act_coef << "f * act_data2.s" << l << ";\n";
          else step_body << "act_data.s" << l << " = act_data2.s" << l << ";\n";
        }
      step_body << "\n";
      for(int l = 0; l < 4; l++)
        {
          int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
          for(int m = 0; m < 4; m++)
            {
              if(m == l) continue;
              float act_coef = (*(fir[m]))[tmp_index];
              if(act_coef != 0.0f) step_body << "act_data.s" << m << " += " << act_coef << "f * act_data2.s" << l << ";\n";
            }
        }
      step_body << "\n";
    }
  else
    {
      for(int l = 0; l < 4; l++)
        {
          int tmp_index = (0 + half_filter_size) + fir_filter_size * (0 + half_filter_size) + l * coef_part_size;
          float act_coef = (*(fir[l]))[tmp_index];
          if(act_coef != 1.0f) step_body << "act_data.s" << l << " *= " << act_coef << "f;\n";
        }
    }
  step_body << "\n";
  // inter thread calculation
  for(int j = -half_filter_size; j <= half_filter_size; j++)
    {
      for(int i = -half_filter_size; i <= half_filter_size; i++)
        {
          if((j == 0) && (i == 0)) continue;
          for(int l = 0; l < 4; l++)
            {
              int tmp_index = (i + half_filter_size) + fir_filter_size * (j + half_filter_size) + l * coef_part_size;
              if(((*fir_ll)[tmp_index] != 0.0f) || ((*fir_hl)[tmp_index] != 0.0f) || ((*fir_lh)[tmp_index] != 0.0f) || ((*fir_hh)[tmp_index] != 0.0f))
                {
                  step_body << "tmp_val = temp_image[" << ((int((4 * (iter_id % 2) + l) * this->comb_sizes.count())) + comb_block_loc_r * i + comb_block_loc_b * j) << "];\n";
                  for(int m = 0; m < 4; m++)
                    {
                      float act_coef = (*(fir[m]))[tmp_index];
                      if(act_coef != 0.0f) step_body << "act_data.s" << m << " += tmp_val * " << act_coef << "f;\n";
                    }
                }
            }
        }
    }
  step_body << "\n";
  // calculation of update for per-thread values
  if(improved)
    {
      step_body << "act_data.x += " << last_update << "f * act_data.y;\n"
                << "act_data.z += " << last_update << "f * act_data.w;\n"
                << "act_data.x += " << last_update << "f * act_data.z;\n"
                << "act_data.y += " << last_update << "f * act_data.w;\n";
      // normalization constant
      if(norm_coef != 1.0f)
        {
          step_body << "\n";
          step_body << "act_data.x *= " << (norm_coef * norm_coef) << "f;\n"
                    << "act_data.w *= " << (1.0 / (norm_coef * norm_coef)) << "f;\n";
        }
    }
  step_body << "}\n";
  std::string step_body_str = step_body.str();
  

  int start_pos = 0;
  while((start_pos = step_body_str.find(std::string(" "), start_pos)) != std::string::npos) {
    step_body_str.replace(start_pos, 1, "");
  }
  start_pos = 0;
  while((start_pos = step_body_str.find(std::string("\n"), start_pos)) != std::string::npos) {
    step_body_str.replace(start_pos, 1, "");
  }
  /*FILE *test = fopen("test.txt", "w+");
  fprintf(test, step_body_str.c_str());
  fclose(test);*/
  return step_body_str;
}

std::string WaveletOpenclParamComb::getFirDefs()
{
  //if((this->comb_kernel.find("_polyphase") == std::string::npos) && (this->comb_kernel.find("_convolution") == std::string::npos)) return std::string();
  std::vector<float> fir_ll;
  std::vector<float> fir_hl;
  std::vector<float> fir_lh;
  std::vector<float> fir_hh;
  
  std::vector<float> fir_ll_tmp;
  std::vector<float> fir_hl_tmp;
  std::vector<float> fir_lh_tmp;
  std::vector<float> fir_hh_tmp;

  std::string out_data;
  int steps_count;
  int filter_length;
  float *coef;
  float norm_coef;

  switch(wavelet)
  {
    case WAVELET_TYPE_CDF53:
      out_data += " -D WAVELET_TYPE_CDF53";
      steps_count = 1;
      filter_length = 1;
      coef = wavelet_type_cdf53_coef;
      norm_coef = wavelet_type_cdf53_norm;
    break;
    case WAVELET_TYPE_CDF97:
      out_data += " -D WAVELET_TYPE_CDF97";
      steps_count = 2;
      filter_length = 1;
      coef = wavelet_type_cdf97_coef;
      norm_coef = wavelet_type_cdf97_norm;
    break;
    case WAVELET_TYPE_CDF137:
      out_data += " -D WAVELET_TYPE_CDF137";
      steps_count = 1;
      filter_length = 2;
      coef = wavelet_type_cdf137_coef;
      norm_coef = wavelet_type_cdf137_norm;
    break;
  }
  bool improved = this->comb_kernel.find("_improved") != std::string::npos;
  std::string fir_body;
  if(this->comb_kernel.find("_polyphase") != std::string::npos)
    {
      for(int i = 0; i < steps_count; i++)
        {
          float act_norm = (i == steps_count - 1) ? norm_coef : 1.0f;
          getFirData(1, filter_length, coef + i * 2 * filter_length, act_norm, fir_ll_tmp, fir_hl_tmp, fir_lh_tmp, fir_hh_tmp, improved);
          if(this->gen_filter_body) fir_body += getFirStepBody(1, filter_length, coef + i * 2 * filter_length, act_norm, i, &fir_ll_tmp, &fir_hl_tmp, &fir_lh_tmp, &fir_hh_tmp, improved);
          fir_ll.insert(fir_ll.end(), fir_ll_tmp.begin(), fir_ll_tmp.end());
          fir_hl.insert(fir_hl.end(), fir_hl_tmp.begin(), fir_hl_tmp.end());
          fir_lh.insert(fir_lh.end(), fir_lh_tmp.begin(), fir_lh_tmp.end());
          fir_hh.insert(fir_hh.end(), fir_hh_tmp.begin(), fir_hh_tmp.end());

        }
    }
  else if(this->comb_kernel.find("_convolution") != std::string::npos)
    {
      getFirData(steps_count, filter_length, coef, norm_coef, fir_ll, fir_hl, fir_lh, fir_hh, improved);
      if(this->gen_filter_body) fir_body += getFirStepBody(steps_count, filter_length, coef, norm_coef, 0, &fir_ll, &fir_hl, &fir_lh, &fir_hh, improved);
    }
  else
    {
      return out_data;
    }
  if(this->gen_filter_body) out_data += std::string(" -D WAVELET_FILTER_BODY()=") + fir_body;
  else out_data += getFirDef(fir_ll, std::string("ACTUAL_FIR_LL1")) + getFirDef(fir_hl, std::string("ACTUAL_FIR_HL1")) + getFirDef(fir_lh, std::string("ACTUAL_FIR_LH1")) + getFirDef(fir_hh, std::string("ACTUAL_FIR_HH1"));
  return out_data;
}