OpenMP

  • 包含头文件

    #include<omp.h>

  • 声明并行段开始

    #pragma omp paralled;
    {
      int ID=omp_get_thread_num();
      printf("hello(%d)\n",ID);
      printf("world(%d)\n",ID)
    }
  • 编译运行

    gcc paralle.c -fopenmp -o paralle

  • 完整代码结构

      #include <stdio.h>
      #include <stdlib.h>
      #include <omp.h>
      int main(){
        int ID,nthread;
        omp_set_num_threads(2);
        #pragma omp parallel 
        {
          ID=omp_get_thread_num(); //获取每个线程的id编号
          printf("hello(%d)\n",ID);
          printf("world(%d)\n",ID);
          if(ID==0){
            nthread=omp_get_num_threads(); //获取开辟的线程数目,主进程的线程id默认为0
            printf("Number of threads %d\n",nthread);
          }
        }
        return 0;
      }
  • 输出结果

    hello(1)
    world(1)
    hello(0)
    world(1)

:waring: 这是由于线程编号ID定义在公共内存区域,当进行并发时两个线程同时进行默认是ID=0的主线程先执行;所以当执行到if判断语句时,两个线程对ID的赋值语句都已经结束;所以不会进入if语句

  • 改变 ID的作用域

    当将ID放在parallel作用域时,只对单个线程可见;所用最终结果会进入到if判断语句

      hello(0)
      world(0)
      Number of threads 2
      hello(1)
      world(1)
  • 使用并发求PI

    #include <time.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include "omp.h"
    #define NUM_THREADS 5
    //#define NUM_THREADS atoi(getenv("THREAD")) 使用环境变量进行声明
    #define PAD 8 //跟CPU内存分配有关
    double step;
    static long num_steps;
    int main()
    {
      clock_t start_t, end_t;
      double total_t;
      start_t = clock();
      num_steps = 100000000;
      double  pi, sum[NUM_THREADS][PAD];
      pi=0.0;
      step = 1.0 / (double)num_steps; //将1平分成100000步
      omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
    #pragma omp parallel  //开始并发执行
      {
        double x;
        int id , i;
        id = omp_get_thread_num(); //获取每个线程的id编号
        sum[id][0] = 0.0;
        for (i=id; i < num_steps; i = i + NUM_THREADS)
        {
          x = (i - 0.5) * step; 
          sum[id][0] += 4.0 / (1.0 + x * x);
        }
      }
        int i;
      for ( i = 0; i < NUM_THREADS; i++)
      {
        pi += sum[i][0] * step;
      }
      end_t = clock();
      total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
      printf("运行时间为%fs\t%f\n", total_t, pi);
      return 0;
    }

PAD模式代码

  • NUM_THREADS为对应的线程数

  • num_steps 为步长

  • 数组的读取可以同步进行,没一行是一个数组指针

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#define NUM_THREADS 6
#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;
int main()
{
  clock_t start_t, end_t;
  double total_t;
  start_t = clock();
  num_steps = 100000000;
  double  pi, sum[NUM_THREADS][PAD];
  pi=0.0;
  step = 1.0 / (double)num_steps; //将1平分成100000步
  omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel  //开始并发执行
  {
    double x;
    int id , i;
    id = omp_get_thread_num(); //获取每个线程的id编号
    sum[id][0] = 0.0;
    for (i=id; i < num_steps; i = i + NUM_THREADS)
    {
      x = (i - 0.5) * step; 
      sum[id][0] += 4.0 / (1.0 + x * x);
    }
  }
int i;
  for (i = 0; i < NUM_THREADS; i++)
  {
    pi += sum[i][0] * step;
  }
  end_t = clock();
  total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
  printf("运行时间为%fs\t%f\n", total_t, pi);
  return 0;
}

并行域代码

  • 伪共享模式,当使用一纬数组时,数组的读取被锁定,因为数组只有一个数组指针

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#define NUM_THREADS 4
#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;
int main()
{
  clock_t start_t, end_t;
  double total_t;
  start_t = clock();
  num_steps = 100000000;
  double  pi, sum[NUM_THREADS];
  pi=0.0;
  step = 1.0 / (double)num_steps; //将1平分成100000步
  omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel  //开始并发执行
  {
    double x;
    int id , i;
    id = omp_get_thread_num(); //获取每个线程的id编号
    sum[id]= 0.0;
    for (i=id; i < num_steps; i = i + NUM_THREADS)
    {
      x = (i - 0.5) * step; 
      sum[id] += 4.0 / (1.0 + x * x);
    }
  }
int i;
  for (i = 0; i < NUM_THREADS; i++)
  {
    pi += sum[i] * step;
  }
  end_t = clock();
  total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
  printf("运行时间为%fs\t%f\n", total_t, pi);
  return 0;
}

窜行模式

  • 输入同样的步长进行比较

#include <time.h>
#include <stdio.h>
double step;
int main()
{
    static long num_steps;
    // printf("请输入一个1000以上的数字求取PI值\n>>");
    //scanf("%ld",&num_steps);
    for (num_steps = 100000000; num_steps <= 100000000; num_steps += 100000)
    {
        int index;
        index = ((double)num_steps - 1000000) / 100000 + 4;
        clock_t start_t, end_t;
        double total_t;
        int i;
        double x, pi, sum = 0.0;
        start_t = clock();
        step = 1.0 / (double)num_steps; //将1平分成100000步
        for (i = 1; i <= num_steps; i++)
        {
            x = (i - 0.5) * step; //获得0-1的连续值
            sum += 4.0 / (1.0 + x * x);
        }
        pi = step * sum; //
        end_t = clock();
        total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
        printf("运行时间为%fs\t%.*f\r\n", total_t, pi, index);
    }
    return 0;
}

Last updated