
More than 5 years have passed since last update.


Last updated at Posted at 2017-11-23





  • 時間計測はrealizeなどのスケジューラを書いた場所でやること.
  • Funcを含むとJITコンパイラでコンパイルしている計算時間まで含まれることになる.


  • Funcを含むBFは1.5秒
  • Funcを含まないBFは1秒
  • Funcを含む加算は80ms
  • Funcを含まない加算は0.3ms
#include <opencv2/opencv.hpp>
#pragma comment(lib, "opencv_core320.lib")
#pragma comment(lib, "opencv_imgcodecs320.lib")
#pragma comment(lib, "opencv_highgui320.lib")

#include "include/Halide.h"
#pragma comment(lib, "Halide.lib")

using namespace Halide;

Buffer<uint8_t> load_image(cv::String name, int flags = 1);

Func add(Buffer<uint8_t>& src, int add)
    Var x("x"), y("y"), c("c");
    Func output("output");
    output(x, y, c) = cast<uint8_t>(clamp(cast<int16_t>(src(x, y, c)) + (short)add, 0, 255));
    return output;

Func bilateral_filter_color(Buffer<uint8_t>& src, int rad, float sigma_s, float sigma_c)
    Var x("x"), y("y"), c("c");
    Func clamped = BoundaryConditions::repeat_edge(src);

    Func input_float("input_float");
    input_float(x, y, c) = cast<float>(clamped(x, y, c));

    Expr ds = -1.f / (2.f*sigma_s*sigma_s);
    Expr dc = -1.f / (2.f*sigma_c*sigma_c);
    RDom r(-rad, 2 * rad + 1, -rad, 2 * rad + 1);
    Expr total = sum(
        fast_exp((r.x*r.x + r.y*r.y)*ds)
            (input_float(x + r.x, y + r.y, 0) - input_float(x, y, 0))*(input_float(x + r.x, y + r.y, 0) - input_float(x, y, 0))
            + (input_float(x + r.x, y + r.y, 1) - input_float(x, y, 1))*(input_float(x + r.x, y + r.y, 1) - input_float(x, y, 1))
            + (input_float(x + r.x, y + r.y, 2) - input_float(x, y, 2))*(input_float(x + r.x, y + r.y, 2) - input_float(x, y, 2))
            * dc)

    Func blur("blur");
    blur(x, y, c) +=
        fast_exp((r.x*r.x + r.y*r.y)*ds)
            (input_float(x + r.x, y + r.y, 0) - input_float(x, y, 0))*(input_float(x + r.x, y + r.y, 0) - input_float(x, y, 0))
            + (input_float(x + r.x, y + r.y, 1) - input_float(x, y, 1))*(input_float(x + r.x, y + r.y, 1) - input_float(x, y, 1))
            + (input_float(x + r.x, y + r.y, 2) - input_float(x, y, 2))*(input_float(x + r.x, y + r.y, 2) - input_float(x, y, 2))
        *input_float(x + r.x, y + r.y, c);

    Func output("output");
    output(x, y, c) = cast<uint8_t>(blur(x, y, c) / total);

    return output;

int main(int argc, char **argv)
    Buffer<uint8_t> input = load_image("rgb.png");
    int64 start, end;
    const int iteration = 5;

    for (int i = 0; i < iteration; i++)
        start = cv::getTickCount();
        Func output1 = bilateral_filter_color(input, 3, 4.f, 30.f);
        Buffer<uint8_t> result1 = output1.realize(input.width(), input.height(), 3);
        end = cv::getTickCount();
        std::cout <<"with Func: "<< (end - start) * 1000 / cv::getTickFrequency() << "[ms]" << std::endl;

    Func output2 = bilateral_filter_color(input, 3, 4.f, 30.f);
    for (int i = 0; i < iteration; i++)
        start = cv::getTickCount();
        Buffer<uint8_t> result2 = output2.realize(input.width(), input.height(), 3);
        end = cv::getTickCount();
        std::cout <<"without Func: "<< (end - start) * 1000 / cv::getTickFrequency() << "[ms]" << std::endl;

    for (int i = 0; i < iteration; i++)
        start = cv::getTickCount();
        Func output3 = add(input, 30);
        Buffer<uint8_t> result3 = output3.realize(input.width(), input.height(), 3);
        end = cv::getTickCount();
        std::cout << "with Func: " << (end - start) * 1000 / cv::getTickFrequency() << "[ms]" << std::endl;

    Func output4 = add(input, 30);
    for (int i = 0; i < iteration; i++)
        start = cv::getTickCount();
        Buffer<uint8_t> result4 = output4.realize(input.width(), input.height(), 3);
        end = cv::getTickCount();
        std::cout << "without Func: " << (end - start) * 1000 / cv::getTickFrequency() << "[ms]" << std::endl;

    return 0;

//utility function with OpenCV
void convertMat2Halide(cv::Mat& src, Buffer<uint8_t>& dest)
    const int ch = src.channels();
    if (ch == 1)
        for (int j = 0; j < src.rows; j++)
            for (int i = 0; i < src.cols; i++)
                dest(i, j) = src.at<uchar>(j, i);
    else if (ch == 3)
        for (int j = 0; j < src.rows; j++)
            for (int i = 0; i < src.cols; i++)
                dest(i, j, 0) = src.at<uchar>(j, 3 * i);
                dest(i, j, 1) = src.at<uchar>(j, 3 * i + 1);
                dest(i, j, 2) = src.at<uchar>(j, 3 * i + 2);

Buffer<uint8_t> load_image(cv::String name, int flags)
    cv::Mat a = cv::imread(name, flags);
    if (a.empty()) std::cout << name << " is empty" << std::endl;

    Buffer<uint8_t> ret(a.cols, a.rows, a.channels());
    convertMat2Halide(a, ret);

    return ret;

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up