04test

  • 固定步长num_step=10000000

不同模式求pi

1.PAD模式下

    #include <time.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include "omp.h"
    #include <unistd.h>
    #define NUM_THREADS atoi(getenv("THREAD"))
    #define PAD 8 //跟CPU内存分配有关
    double step;
    static long num_steps;
    int main(int argc ,char *argv[])
    {
    // printf("%f\n",(int)getenv("THREAD"));
      double start_t, end_t;
      double total_t;
      start_t = omp_get_wtime();
      num_steps = 100000000;
      double  pi,sum[NUM_THREADS][PAD];
      pi=0.0;
      step = 1.0 / (double)num_steps; //将1平分成100000步
      omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
    #pragma omp parallel  //开始并发执行
      {
        double x;
        int id , i;
        id = omp_get_thread_num(); //获取每个线程的id编号
        //#pragma omp parallel for reduction(+:sum)
        for (i=id; i < num_steps; i = i + NUM_THREADS)
        {
          x = (i - 0.5) * step; 
          sum[id][0] += 4.0 / (1.0 + x * x);
        }
        //printf("%f\n",sum);
        //#pragma omp critical
        //pi+=sum*step;

      }
      int i;
      for (i = 0; i < NUM_THREADS; i++)
      {
        pi += sum[i][0] * step;
      }
      end_t = omp_get_wtime();
      total_t = end_t-start_t;
      printf("运行时间为%fs\t%.10f\n", total_t, pi);
      //printf("PATH%d\n",atoi(getenv("THREAD")));
      return 0;
    }

2.并行域

    #include <time.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include "omp.h"
    #include <unistd.h>
    #define NUM_THREADS atoi(getenv("THREAD"))
    //#define PAD 8 //跟CPU内存分配有关
    double step;
    static long num_steps;
    int main(int argc ,char *argv[])
    {
    // printf("%f\n",(int)getenv("THREAD"));
      double start_t, end_t;
      double total_t;
      start_t = omp_get_wtime();
      num_steps = 100000000;
      double  pi,sum[NUM_THREADS];
      pi=0.0;
      step = 1.0 / (double)num_steps; //将1平分成100000步
      omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
    #pragma omp parallel  //开始并发执行
      {
        double x;
        int id , i;
        id = omp_get_thread_num(); //获取每个线程的id编号
        sum[id]=0.0;
        //#pragma omp parallel for reduction(+:sum)
        for (i=id; i < num_steps; i = i + NUM_THREADS)
        {
          x = (i - 0.5) * step; 
          sum[id]+= 4.0 / (1.0 + x * x);
        }
        //printf("%f\n",sum);
        //#pragma omp critical
        //pi+=sum*step;

      }
      int i;
      for (i = 0; i < NUM_THREADS; i++)
      {
        pi += sum[i] * step;
      }
      end_t = omp_get_wtime();
      total_t = end_t-start_t;
      printf("运行时间为%fs\t%.10f\n", total_t, pi);
      //printf("PATH%d\n",atoi(getenv("THREAD")));
      return 0;
    }

3.critical 制导

    #include <time.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include "omp.h"
    #include <unistd.h>
    #define NUM_THREADS atoi(getenv("THREAD"))
    //#define PAD 8 //跟CPU内存分配有关
    double step;
    static long num_steps;
    int main(int argc ,char *argv[])
    {
    // printf("%f\n",(int)getenv("THREAD"));
      double start_t, end_t;
      double total_t;
      start_t = omp_get_wtime();
      num_steps = 100000000;
      double  pi;
      pi=0.0;
      step = 1.0 / (double)num_steps; //将1平分成100000步
      omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
    #pragma omp parallel  //开始并发执行
      {
        double x,sum=0.0;
        int id , i;
        id = omp_get_thread_num(); //获取每个线程的id编号
        //#pragma omp parallel for reduction(+:sum)
        for (i=id; i < num_steps; i = i + NUM_THREADS)
        {
          x = (i - 0.5) * step; 
          sum += 4.0 / (1.0 + x * x);
        }
        //printf("%f\n",sum);
        #pragma omp critical
        pi+=sum*step;

      }
      end_t = omp_get_wtime();
      total_t = end_t-start_t;
      printf("运行时间为%fs\t%.10f\n", total_t, pi);
      //printf("PATH%d\n",atoi(getenv("THREAD")));
      return 0;
    }

4.reduction制导

    #include <time.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include "omp.h"
    #include <unistd.h>
    #define NUM_THREADS atoi(getenv("THREAD"))
    //#define PAD 8 //跟CPU内存分配有关
    double step;
    static long num_steps;
    int main(int argc ,char *argv[])
    {
    // printf("%f\n",(int)getenv("THREAD"));
      double start_t, end_t;
      double total_t;
      start_t = omp_get_wtime();
      num_steps = 100000000;
      double  pi;
      pi=0.0;
      step = 1.0 / (double)num_steps; //将1平分成100000步
      omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
    #pragma omp parallel  //开始并发执行
      {
        double x,sum=0.0;
        int id , i;
        id = omp_get_thread_num(); //获取每个线程的id编号
        #pragma omp parallel for reduction(+:sum)
        for (i=id; i < num_steps; i = i + NUM_THREADS)
        {
          x = (i - 0.5) * step; 
          sum += 4.0 / (1.0 + x * x);
        }
        //printf("%f\n",sum);
        //#pragma omp critical
        pi+=sum*step;

      }
      end_t = omp_get_wtime();
      total_t = end_t-start_t;
      printf("运行时间为%fs\t%.10f\n", total_t, pi);
      //printf("PATH%d\n",atoi(getenv("THREAD")));
      return 0;
    }

5.lastprivate制导

    #include <time.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include "omp.h"
    #include <unistd.h>
    #define NUM_THREADS atoi(getenv("THREAD"))
    //#define PAD 8 //跟CPU内存分配有关
    double step;
    static long num_steps;
    int main(int argc ,char *argv[])
    {
    // printf("%f\n",(int)getenv("THREAD"));
      double start_t, end_t;
      double total_t;
      start_t = omp_get_wtime();
      num_steps = 100000000;
      double  pi;
      pi=0.0;
      step = 1.0 / (double)num_steps; //将1平分成100000步
      omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
    #pragma omp parallel  //开始并发执行
      {
        double x,sum=0.0;
        int id , i;
        id = omp_get_thread_num(); //获取每个线程的id编号
        #pragma omp parallel for lastprivate(sum)
        for (i=id; i < num_steps; i = i + NUM_THREADS)
        {
          x = (i - 0.5) * step; 
          sum += 4.0 / (1.0 + x * x);
        }
        //printf("%f\n",sum);
        pi+=sum*step;

      }
      end_t = omp_get_wtime();
      total_t = end_t-start_t;
      printf("运行时间为%fs\t%.10f\n", total_t, pi);
      //printf("PATH%d\n",atoi(getenv("THREAD")));
      return 0;
    }

计算数列加和

两者之间效率差别的原因

  • PAD方法

    • 虽然使用了二维数组,防止了数据写入时的阻塞;但在求终止结果时使用了For循环

  • lastprivate制导

    • 对局部sum变量进行了深度拷贝,在退出for循环时,将结果存在各自的sum变量中,各个线程分别对全局变量K做累加

1.PAD方法

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#include <unistd.h>
#define NUM_THREADS atoi(getenv("THREAD"))
#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;
int main(int argc, char *argv[])
{
  double start_t, end_t;
  double total_t;
  start_t = omp_get_wtime();
  num_steps = 1000000;
  double pi, sum[NUM_THREADS][PAD];
  pi = 0.0;
  omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel                //开始并发执行
  {
    double x;
    int id, i;
    id = omp_get_thread_num(); //获取每个线程的id编号
    for (i = id+1; i <=num_steps; i = i + NUM_THREADS)
    {
      x=(3*i+3);
      sum[id][0] += x;
    }
  }
  int i;
  for (i = 0; i < NUM_THREADS; i++)
  {
    pi += sum[i][0];
  }
  end_t = omp_get_wtime();
  total_t = end_t-start_t;
  printf("运行时间为%fs\t%.10f\n", total_t, pi/2.0);
  return 0;
}

2.lastprivate制导

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#include <unistd.h>
#define NUM_THREADS atoi(getenv("THREAD"))
double step;
static long num_steps;
int main(int argc, char *argv[])
{
  double start_t, end_t;
  double total_t;
  start_t = omp_get_wtime();
  num_steps = 1000000;
  double pi;
  pi = 0.0;

  omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel                //开始并发执行
  {
    double x,sum=0.0;
    int id, i;
    id = omp_get_thread_num(); //获取每个线程的id编号
    #pragma omp parallel for lastprivate(sum)
    for (i = id+1; i <=num_steps; i = i + NUM_THREADS)
    {
      x=(3*i+3);
      sum += x;
    }
    pi+=sum;
  }
  end_t = omp_get_wtime();
  total_t = end_t-start_t;
  printf("运行时间为%fs\t%.10f\n", total_t, pi/2.0);
  //printf("PATH%d\n",atoi(getenv("THREAD")));
  return 0;
}

计算二维矩阵累加和累积

数组指针的理解

*符号表示获取指针指向的值,具体的来讲就是下面这张图的理解

  //定义数组
  int Array[4][4]={1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4};
  int i,j;
  for(i=0;i<4;i++){
   for(j=0;j<4;j++){
    printf("%d\t",*(*Array+5)); //先获取第二行,第2个元素的指针 +3==》+2,最外面的*是获取对应指针的值
   }
   printf("\n");
  }

二维数据的累加

  • 在命令行使用export THREAD=10指定使用的线程数

  • 当数组变大是可能会超出栈空间,使用ulimit -s 10000临时提升栈空间

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#include <unistd.h>
#define DIMENSION 10000                       //定义正交数组,可以修改栈空间来提高数组大小
#define random(x) (rand() % x)             //定义随机数函数
#define NUM_THREADS atoi(getenv("THREAD")) //定义线程数
//#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;

void printMatric(int *Array, int row, int col)
{ //形参传递数组指针
  int i, j;
  for (i = 0; i < row; i++)
  {
    for (j = 0; j < col; j++)
    {
      printf("%d\t", *(Array + i * col + j));
    }
    printf("\n");
  }
}
  //定义数组,节省栈空间
  int Array1[DIMENSION][DIMENSION];
  int Array2[DIMENSION][DIMENSION];
  int Sum[DIMENSION][DIMENSION];
int main(int argc, char *argv[])
{
   double start_t, end_t;
   double total_t;
   start_t = omp_get_wtime();
  //设置种子
  srand(2019);
  int i, j;
  for (i = 0; i < DIMENSION; i++)
  {
    for (j = 0; j < DIMENSION; j++)
    {
      Array1[i][j] = random(100);
    }
  }
  srand(2018);
  for (i = 0; i < DIMENSION; i++)
  {
    for (j = 0; j < DIMENSION; j++)
    {
      Array2[i][j] = random(100);
    }
  }

  omp_set_num_threads(NUM_THREADS);
#pragma omp parallel
  {
    int i, id, col;
    id = omp_get_thread_num(); //获取每个线程的id编号
    for (i = id; i < DIMENSION; i+=NUM_THREADS)//当指定线程数超过DIMENSION就是资源浪费
    {
      for (col = 0; col < DIMENSION; col++)
      {
        *(*Sum + i * DIMENSION + col) = *(*Array1 + col + i * DIMENSION) + *(*Array2 + i * DIMENSION + col);
      }
    }
  }
  end_t = omp_get_wtime();
  total_t = end_t-start_t;
  // printf("Array1!\n");
  // printMatric(*Array1, DIMENSION, DIMENSION);
  // printf("\nArray2!\n");
  // printMatric(*Array2, DIMENSION, DIMENSION);
  // printf("\nSum!\n");
  // printMatric(*Sum, DIMENSION, DIMENSION);
 printf("\n运行时间为%fs\n", total_t);
  return 0;
}

数组的累乘

  • 在纸上比划一下就ok啦

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#include <unistd.h>
#define DIMENSION 2                       //定义正交数组,可以修改栈空间来提高数组大小
#define random(x) (rand() % x)             //定义随机数函数
#define NUM_THREADS atoi(getenv("THREAD")) //定义线程数
//#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;

void printMatric(int *Array, int row, int col)
{ //形参传递数组指针
  int i, j;
  for (i = 0; i < row; i++)
  {
    for (j = 0; j < col; j++)
    {
      printf("%d\t", *(Array + i * col + j));
    }
    printf("\n");
  }
}
  //定义数组,节省栈空间
  int Array1[DIMENSION][DIMENSION];
  int Array2[DIMENSION][DIMENSION];
  int Sum[DIMENSION][DIMENSION];
int main(int argc, char *argv[])
{
   double start_t, end_t;
   double total_t;
   start_t = omp_get_wtime();
  //设置种子
  srand(2019);
  int i, j;
  for (i = 0; i < DIMENSION; i++)
  {
    for (j = 0; j < DIMENSION; j++)
    {
      Array1[i][j] = random(100);
    }
  }
  srand(2018);
  for (i = 0; i < DIMENSION; i++)
  {
    for (j = 0; j < DIMENSION; j++)
    {
      Array2[i][j] = random(100);
    }
  }

omp_set_num_threads(NUM_THREADS);
#pragma omp parallel
  {
    int i, id, col,row;
    id = omp_get_thread_num(); //获取每个线程的id编号
    for (i = id; i < DIMENSION; i+=NUM_THREADS)//当指定线程数超过DIMENSION就是资源浪费
    {
    //i对应了第一个矩阵中的每一行
      for (col = 0; col < DIMENSION; col++)//遍历第二个数组的列数
      {
        for(row=0;row<DIMENSION;row++){ //第一个矩阵对应的列
          *(*Sum+id*DIMENSION+col)+=*(*Array1+DIMENSION*i+row) * *(*Array2+row*DIMENSION+col);
        }
      }
    }
  }
  end_t = omp_get_wtime();
  total_t = end_t-start_t;
  // printf("Array1!\n");
  // printMatric(*Array1, DIMENSION, DIMENSION);
  // printf("\nArray2!\n");
  // printMatric(*Array2, DIMENSION, DIMENSION);
  // printf("\nSum!\n");
  // printMatric(*Sum, DIMENSION, DIMENSION);
 printf("\n运行时间为%fs\n", total_t);
  return 0;
}

Last updated