Last updated at Posted at 2014-01-07


  • OpenMP
  • Intel TBB (Threading Building Brocks)
  • Microsoft PPL (Parallel Patterns Library)


#include <omp.h>

// PI近似計算 (OpenMP)
double compute_pi_by_openmp(const int num_of_partitions)
    if (num_of_partitions < 1) return -1.;

    double width = 1. / (double)num_of_partitions;
    double height = 0.;
    double middle = 0.;
    double sum = 0.;
    double pi = 0.;
    int index = 0;

#pragma omp parallel for private(middle, height) reduction(+:sum)
    for (index = 0; index < num_of_partitions; index++) {
        middle = (index + 0.5) * width;
        height = 4. / (1. + middle * middle);
        sum += height;
    pi = width * sum;

    return pi;

##Intel TBB
###parallel_reduce() の場合

#include <tbb/parallel_reduce.h>
#include <tbb/blocked_range.h>
#include <tbb/partitioner.h>
#include <functional>

// PI近似計算 (Intel TBB)
double compute_pi_by_tbb(const int num_of_partitions)
    if (num_of_partitions < 1) return -1.;
    const double width = 1. / static_cast<double>(num_of_partitions);
    double sum = 0.;
    sum = tbb::parallel_reduce(
            tbb::blocked_range<int>(0, num_of_partitions),
            [width](const tbb::blocked_range<int>& range, double value)->double {
                for (int i = range.begin(); i != range.end(); i++) {
                    double middle = (i + 0.5) * width;
                    double height = 4. / (1. + middle * middle);
                    value += height;
                return value;
    double pi = sum * width;
    return pi;

###parallel_for() の場合

#include <tbb/parallel_for.h>
#include <tbb/combinable.h>
#include <tbb/blocked_range.h>
#include <functional>

// PI近似計算 (Intel TBB)
double compute_pi_by_tbb2(const int num_of_partitions)
    if (num_of_partitions < 1) return -1.;
    const double width = 1. / static_cast<double>(num_of_partitions);

    tbb::combinable<double> sum;
    tbb::parallel_for(tbb::blocked_range<int>(0, num_of_partitions),
                      [width, &sum](const tbb::blocked_range<int>& range) {
                          double sum_local = 0.;
                          for (int i = range.begin(); i != range.end(); i++) {
                              double middle = (i + 0.5) * width;
                              double height = 4. / (1. + middle * middle);
                              sum_local += height;
                          sum.local() += sum_local;
    double pi = sum.combine(std::plus<double>()) * width;
    return pi;

(2014/1/12) TBB2.2以降は明示的な初期化 tbb::task_schedular_init が不要とのことですのでコードから削除しました。また、下記のPPLでの例と同等となるようにtbb::parallel_for() の例も追加しました。

##Microsoft PPL

#include <ppl.h>

// PI近似計算 (Microsoft PPL)
double compute_pi_by_ppl(const int num_of_partitions)
    if (num_of_partitions < 1) return -1.;

    const double width = 1. / static_cast<double>(num_of_partitions);

    concurrency::combinable<double> sum;
    concurrency::parallel_for<int>(0, num_of_partitions, [&sum, width](int i){
        double middle = (i + 0.5) * width;
        double height = 4. / (1. + middle * middle);
        sum.local() += height;

    double pi = sum.combine(std::plus<double>()) * width;
    return pi;



