OpenMP
包含头文件
#include<omp.h>
声明并行段开始
#pragma omp paralled; { int ID=omp_get_thread_num(); printf("hello(%d)\n",ID); printf("world(%d)\n",ID) }
编译运行
gcc paralle.c -fopenmp -o paralle
完整代码结构
#include <stdio.h> #include <stdlib.h> #include <omp.h> int main(){ int ID,nthread; omp_set_num_threads(2); #pragma omp parallel { ID=omp_get_thread_num(); //获取每个线程的id编号 printf("hello(%d)\n",ID); printf("world(%d)\n",ID); if(ID==0){ nthread=omp_get_num_threads(); //获取开辟的线程数目,主进程的线程id默认为0 printf("Number of threads %d\n",nthread); } } return 0; }
输出结果
hello(1) world(1) hello(0) world(1)
:waring: 这是由于线程编号ID定义在公共内存区域,当进行并发时两个线程同时进行默认是ID=0的主线程先执行;所以当执行到if判断语句时,两个线程对ID的赋值语句都已经结束;所以不会进入if语句
改变 ID的作用域
当将ID放在parallel作用域时,只对单个线程可见;所用最终结果会进入到if判断语句
hello(0) world(0) Number of threads 2 hello(1) world(1)
使用并发求PI
#include <time.h> #include <stdio.h> #include <stdlib.h> #include "omp.h" #define NUM_THREADS 5 //#define NUM_THREADS atoi(getenv("THREAD")) 使用环境变量进行声明 #define PAD 8 //跟CPU内存分配有关 double step; static long num_steps; int main() { clock_t start_t, end_t; double total_t; start_t = clock(); num_steps = 100000000; double pi, sum[NUM_THREADS][PAD]; pi=0.0; step = 1.0 / (double)num_steps; //将1平分成100000步 omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目 #pragma omp parallel //开始并发执行 { double x; int id , i; id = omp_get_thread_num(); //获取每个线程的id编号 sum[id][0] = 0.0; for (i=id; i < num_steps; i = i + NUM_THREADS) { x = (i - 0.5) * step; sum[id][0] += 4.0 / (1.0 + x * x); } } int i; for ( i = 0; i < NUM_THREADS; i++) { pi += sum[i][0] * step; } end_t = clock(); total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC; printf("运行时间为%fs\t%f\n", total_t, pi); return 0; }
PAD模式代码
NUM_THREADS为对应的线程数
num_steps 为步长
数组的读取可以同步进行,没一行是一个数组指针
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#define NUM_THREADS 6
#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;
int main()
{
clock_t start_t, end_t;
double total_t;
start_t = clock();
num_steps = 100000000;
double pi, sum[NUM_THREADS][PAD];
pi=0.0;
step = 1.0 / (double)num_steps; //将1平分成100000步
omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel //开始并发执行
{
double x;
int id , i;
id = omp_get_thread_num(); //获取每个线程的id编号
sum[id][0] = 0.0;
for (i=id; i < num_steps; i = i + NUM_THREADS)
{
x = (i - 0.5) * step;
sum[id][0] += 4.0 / (1.0 + x * x);
}
}
int i;
for (i = 0; i < NUM_THREADS; i++)
{
pi += sum[i][0] * step;
}
end_t = clock();
total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
printf("运行时间为%fs\t%f\n", total_t, pi);
return 0;
}
并行域代码
伪共享模式,当使用一纬数组时,数组的读取被锁定,因为数组只有一个数组指针
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#define NUM_THREADS 4
#define PAD 8 //跟CPU内存分配有关
double step;
static long num_steps;
int main()
{
clock_t start_t, end_t;
double total_t;
start_t = clock();
num_steps = 100000000;
double pi, sum[NUM_THREADS];
pi=0.0;
step = 1.0 / (double)num_steps; //将1平分成100000步
omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel //开始并发执行
{
double x;
int id , i;
id = omp_get_thread_num(); //获取每个线程的id编号
sum[id]= 0.0;
for (i=id; i < num_steps; i = i + NUM_THREADS)
{
x = (i - 0.5) * step;
sum[id] += 4.0 / (1.0 + x * x);
}
}
int i;
for (i = 0; i < NUM_THREADS; i++)
{
pi += sum[i] * step;
}
end_t = clock();
total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
printf("运行时间为%fs\t%f\n", total_t, pi);
return 0;
}
窜行模式
输入同样的步长进行比较
#include <time.h>
#include <stdio.h>
double step;
int main()
{
static long num_steps;
// printf("请输入一个1000以上的数字求取PI值\n>>");
//scanf("%ld",&num_steps);
for (num_steps = 100000000; num_steps <= 100000000; num_steps += 100000)
{
int index;
index = ((double)num_steps - 1000000) / 100000 + 4;
clock_t start_t, end_t;
double total_t;
int i;
double x, pi, sum = 0.0;
start_t = clock();
step = 1.0 / (double)num_steps; //将1平分成100000步
for (i = 1; i <= num_steps; i++)
{
x = (i - 0.5) * step; //获得0-1的连续值
sum += 4.0 / (1.0 + x * x);
}
pi = step * sum; //
end_t = clock();
total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
printf("运行时间为%fs\t%.*f\r\n", total_t, pi, index);
}
return 0;
}
Last updated