lastTest
学号:2019301110060 姓名:刘振平 院系:植物科学技术学院
1.二维数组乘积
#include "mpi.h"
#include "omp.h"
#include <malloc.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
int rank, numprocess, dimension;
double startTime, endTime;
float *Matrix1;
float *Matrix2;
float *sendBuf, *revBuf;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numprocess);
MPI_Barrier(MPI_COMM_WORLD);
startTime = MPI_Wtime();
if (rank == 0)
{
printf("请输入矩阵的维度·:\n");
scanf("%d", &dimension);
Matrix1 = (float *)malloc(dimension * dimension * sizeof(float));
Matrix2 = (float *)malloc(dimension * dimension * sizeof(float));
for (int i = 0; i < dimension; i++)
{
// srand(rank*i+1);
for (int j = 0; j < dimension; j++)
{
Matrix1[i * dimension + j] = 100.0 * rand() / RAND_MAX;
Matrix2[i * dimension + j] = 100.0 * rand() / RAND_MAX;
}
}
revBuf = (float *)malloc(dimension * dimension * sizeof(float));
}
/*
广播二维数组
*/
MPI_Bcast(&dimension, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&Matrix1, dimension * dimension, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(&Matrix2, dimension * dimension, MPI_FLOAT, 0, MPI_COMM_WORLD);
sendBuf = (float *)malloc(dimension * sizeof(float));
for (int i = rank; i < dimension; i += numprocess)
{
for (int j = 0; j < dimension; j++)
{
for (int k = 0; k < dimension; k++)
{
sendBuf[j] = Matrix1[i * dimension + k] * Matrix2[k * dimension + j];
}
}
MPI_Gather(sendBuf, dimension, MPI_FLOAT, revBuf, dimension, MPI_FLOAT, 0, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
endTime = MPI_Wtime();
if (rank == 0)
{
for (int i = 0; i < dimension; i++)
{
// srand(rank*i+1);
for (int j = 0; j < dimension; j++)
{
printf("%f\t",revBuf[i*dimension+j]);
}
printf("\n");
}
printf("time= %g(s)\n", endTime - startTime);
}
free(Matrix1);
free(Matrix2);
MPI_Finalize();
return 0;
}
2.计算数列加和值
2.1MPI模式
根据进程数将N拆分成多个部分
// MPI
#include "mpi.h"
#include "omp.h"
#include <math.h>
#include <stdio.h>
#define N 1000000
int main(int argc, char *argv[])
{
int rank, nproc;
int i, low, up;
double local = 0.0, pi, t0, t1;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
t0 = MPI_Wtime();
//low = rank * (N / nproc) + 1;
//up = low + N / nproc ;
for (i = rank + 1; i <= N; i += nproc)
{
local += (double)(3 * i + 3) / 2.0;
}
MPI_Reduce(&local, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
printf("pi = %f\n", pi);
t1 = MPI_Wtime();
if (rank == 0)
printf("time used = %.2f\n", t1 - t0);
MPI_Finalize();
}
2.2MPi+OpenMp模式
根据进程ID拆分每个进程的计算范围
在每个范围中使用多线程进行计算
#include <mpi.h>
#include <omp.h>
#include <math.h>
#include <stdio.h>
// #define N 10000000000
int main(int argc, char *argv[])
{
int N;
int rank, i, nproc;
int low, up;
double t0, t1;
double local = 0.0, pi;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank==0){
printf("请输入一个数组长度:\n");
scanf("%d",&N);
}
MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);//发送每个进程维度
t0 = MPI_Wtime();
low = rank * (N / nproc) + 1;
up = low + N / nproc;
// for (i = rank + 1; i <= N; i += nproc)
// {
// local += (double)(3 * i + 3) / 2.0;
// }
if(rank==nproc-1&&N%nproc!=0){
up=N+1;
}
// printf("process:%d\tmax_thread:%d\n", rank,omp_get_max_threads());
#pragma omp parllel for reduction(+ : local) private(i)
for (i = low+omp_get_thread_num(); i <up; i += omp_get_max_threads())
{
local += (3 * i + 3) / 2.0;
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce(&local, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{
printf("pi = %lf\n", pi);
t1 = MPI_Wtime();
printf("time used = %.2f\n", t1 - t0);
}
MPI_Finalize();
return 0;
}
2.3并行效率和加速比
取N=10000000000
线程数
时间
加速比
并行效率
1
5.42
1
1
2
5.36
1.01
0.505
4
5.54
0.97
0.2425
6
5.38
1.0
0.16
8
5.46
0.99
0.123
3.所有计算的PI
3.1OMP多线程计算PI
3.1.1PAD模式
PAD模式进行多线程计算PI,利用二维数组存储计算结果
在多线程结束后,遍历数组获取对应的pi值
防止了多线程在输入输出时,造成的阻塞
#pragma omp parallel //开始并发执行
{
double x;
int id , i;
id = omp_get_thread_num(); //获取每个线程的id编号
//#pragma omp parallel for reduction(+:sum)
for (i=id; i < num_steps; i = i + NUM_THREADS)
{
x = (i - 0.5) * step;
sum[id][0] += 4.0 / (1.0 + x * x);
}
//printf("%f\n",sum);
//#pragma omp critical
//pi+=sum*step;
}
int i;
for (i = 0; i < NUM_THREADS; i++)
{
pi += sum[i][0] * step;
}
3.1.2并行域模式
使用一维数组存储计算结果,所以会造成数据读写时的阻塞
omp_set_num_threads(NUM_THREADS); //设置要使用的线程数目
#pragma omp parallel //开始并发执行
{
double x;
int id , i;
id = omp_get_thread_num(); //获取每个线程的id编号
sum[id]=0.0;
//#pragma omp parallel for reduction(+:sum)
for (i=id; i < num_steps; i = i + NUM_THREADS)
{
x = (i - 0.5) * step;
sum[id]+= 4.0 / (1.0 + x * x);
}
}
int i;
for (i = 0; i < NUM_THREADS; i++)
{
pi += sum[i] * step;
}
3.1.3reduction 制导
在并行域中,使用reduction制导语句将最终结果直接累加到sum变量
省去了for循环的遍历,以及多个线程对数据的读写
#pragma omp parallel //开始并发执行
{
double x,sum=0.0;
int id , i;
id = omp_get_thread_num(); //获取每个线程的id编号
#pragma omp parallel for reduction(+:sum)
for (i=id; i < num_steps; i = i + NUM_THREADS)
{
x = (i - 0.5) * step;
sum += 4.0 / (1.0 + x * x);
}
//printf("%f\n",sum);
//#pragma omp critical
pi+=sum*step;
}
3.2MPI多进程模式
使用MPI_Scatter将要计算的范围分发给各个进行
使用MPI_Gather将每个进程的结果汇总到0号进程
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include "mpi.h"
#define _NUM_STEP 10000
double step = 1.0 / (double)_NUM_STEP;
/*
定义每个进程使用的函数
*/
double LocalSum(int point, int numprocess)
{
double sum = 0.0, x;
for (int i = point; i <= _NUM_STEP; i += numprocess)
{
x = step * (i - 0.5);
sum += 4.0 / (1.0 + x * x);
}
return sum;
}
int main(int argc, char *argv[])
{
int numprocess, rank;
int *sendbuf; //定义接收缓冲区
double *revbuf = NULL; //定义接收缓冲区
int point; //定义每个进程开始计算的节点
double sum, pi; //sum得到每个进程的计算结果
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numprocess);
if (rank == 0)
{
sendbuf = (int *)malloc(numprocess * 1 * sizeof(int));//开辟发送缓存
for (int i = 0; i < numprocess; i++)
{
*(sendbuf + i) = i; //构造缓冲区数据
}
}
MPI_Scatter(sendbuf, 1, MPI_INT, &point, 1, MPI_INT, 0, MPI_COMM_WORLD);//向各个进程发送数据
sum = LocalSum(point, numprocess); //计算每个进程的结果
// printf("rank= %d Results: %f\t%d\t%d\n", rank, sum, point, numprocess);
if (rank == 0)
{
revbuf = (double *)malloc(numprocess * 1 * sizeof(double));//开辟接收结果的缓存
}
MPI_Gather(&sum, 1, MPI_DOUBLE, revbuf, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);//将结果收集
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0)
{
for (int i = 0; i < numprocess; i++)
{
pi += revbuf[i] * step;//计算最后结果
}
printf("pi:%f\n", pi);
/* code */
}
MPI_Finalize();
return 0;
}
3.3MPI和OpenMPI混合模式
根据进程ID计算每个进程所要计算的范围
每个进行使用多线程进行计算,使用reduction制导语句将多个线程的结果汇总
使用MPI_Reduce将多个进程的计算结果进行汇总
#include "mpi.h"
#include "omp.h"
#include <math.h>
#include <stdio.h>
#define N 1000000000
int main(int argc, char *argv[])
{
int rank, nproc;
int i, low, up;
double local = 0.0, pi, w, temp, t0, t1;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc); //进程数
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //进程身份
t0 = MPI_Wtime();
w = 1.0 / N;
low = rank * (N / nproc); //每个进程计算的范围
up = low + N / nproc - 1;
#pragma omp parallel for reduction(+ : local) private(i,tmp) //线程间通信计算
for (i = low; i < up; i++)
{
temp = (i + 0.5) * w;
local = local + 4.0 / (1.0 + temp * temp);
}
MPI_Reduce(&local, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); //进程间通信获取结果,发送给0号进程的pi
if (rank == 0)
printf("pi = %.20f\n", pi * w); //0号进程获取最终结果
t1 = MPI_Wtime();
if (rank == 0)
printf("time used = %.2f\n", t1 - t0); //打印时间
MPI_Finalize();
}
Last updated