无意中发现了一个Python下的mpi教程《A Python Introduction to Parallel Programming with MPI 1.0.2 documentation》
Point-to-Point Communication
The Trapezoidal Rule
# trapSerial.py
# example to run: python trapSerial.py 0.0 1.0 10000
import numpy
import sys
import time
# takes in command-line arguments [a,b,n]
a = float(sys.argv[1])
b = float(sys.argv[2])
n = int(sys.argv[3])
def f(x):
return x * x
def integrateRange(a, b, n):
'''Numerically integrate with the trapezoid rule on the interval from
a to b with n trapezoids.
integral = -(f(a) + f(b)) / 2.0
# n+1 endpoints, but n trapazoids
#for x in numpy.linspace(a, b, n + 1):
# integral = integral + f(x)
integral = integral + numpy.sum( f(numpy.linspace(a, b, n + 1)) )
integral = integral * (b - a) / n
return integral
begin_time = time.time()
integral = integrateRange(a, b, n)
end_time = time.time()
print("With n =", n, "trapezoids, our estimate of the integral\
from", a, "to", b, "is", integral)
print("total run time :", end_time - begin_time)
# trapParallel_1.py
# example to run: mpiexec -n 4 python trapParallel_1.py 0.0 1.0 10000
import numpy
import sys
import time
from mpi4py import MPI
from mpi4py.MPI import ANY_SOURCE
rank = comm.Get_rank()
size = comm.Get_size()
# takes in command-line arguments [a,b,n]
a = float(sys.argv[1])
b = float(sys.argv[2])
n = int(sys.argv[3])
# we arbitrarily define a function to integrate
def f(x):
return x * x
# this is the serial version of the trapezoidal rule
# parallelization occurs by dividing the range among processes
def integrateRange(a, b, n):
integral = -(f(a) + f(b)) / 2.0
# n+1 endpoints, but n trapazoids
# for x in numpy.linspace(a, b, n + 1):
# integral = integral + f(x)
integral = integral + numpy.sum(f(numpy.linspace(a, b, n + 1)))
integral = integral * (b - a) / n
return integral
# local_n is the number of trapezoids each process will calculate
# note that size must divide n
local_n = int(n / size)
# h is the step size. n is the total number of trapezoids
h = (b - a) / (local_n*size)
# we calculate the interval that each process handles
# local_a is the starting point and local_b is the endpoint
local_a = a + rank * local_n * h
local_b = local_a + local_n * h
# initializing variables. mpi4py requires that we pass numpy objects.
recv_buffer = numpy.zeros(size)
if rank == 0:
begin_time = time.time()
# perform local computation. Each process integrates its own interval
integral = integrateRange(local_a, local_b, local_n)
# communication
# root node receives results from all processes and sums them
if rank == 0:
recv_buffer[0] = integral
for i in range(1, size):
comm.Recv(recv_buffer[i:i+1], ANY_SOURCE)
total = numpy.sum(recv_buffer)
# all other process send their result
comm.Send(integral, dest=0)
# root process prints results
if comm.rank == 0:
end_time = time.time()
print("With n =", n, "trapezoids, our estimate of the integral from" \
, a, "to", b, "is", total)
print("total run time :", end_time - begin_time)
print("total size: ", size)
mpiexec -np 4 python trapSerial_1.py 0 1000000 100000000
# trapParallel_2.py
# example to run: mpiexec -n 4 python trapParallel_1.py 0.0 1.0 10000
import numpy
import sys
import time
from mpi4py import MPI
from mpi4py.MPI import ANY_SOURCE
rank = comm.Get_rank()
size = comm.Get_size()
# takes in command-line arguments [a,b,n]
a = float(sys.argv[1])
b = float(sys.argv[2])
n = int(sys.argv[3])
# we arbitrarily define a function to integrate
def f(x):
return x * x
# this is the serial version of the trapezoidal rule
# parallelization occurs by dividing the range among processes
def integrateRange(a, b, n):
integral = -(f(a) + f(b)) / 2.0
# n+1 endpoints, but n trapazoids
# for x in numpy.linspace(a, b, n + 1):
# integral = integral + f(x)
integral = integral + numpy.sum(f(numpy.linspace(a, b, n + 1)))
integral = integral * (b - a) / n
return integral
# h is the step size. n is the total number of trapezoids
h = (b - a) / n
# local_n is the number of trapezoids each process will calculate
# note that size must divide n
local_n = numpy.zeros(size, dtype=numpy.int32)
local_n[:] = n // size
if n%size!=0:
local_n[-(n%size):] += 1
# we calculate the interval that each process handles
# local_a is the starting point and local_b is the endpoint
local_a = numpy.sum(local_n[:rank]) * h
local_b = local_a + local_n[rank] * h
# initializing variables. mpi4py requires that we pass numpy objects.
recv_buffer = numpy.zeros(size)
if rank == 0:
begin_time = time.time()
# perform local computation. Each process integrates its own interval
integral = integrateRange(local_a, local_b, local_n[rank])
# communication
# root node receives results from all processes and sums them
if rank == 0:
recv_buffer[0] = integral
for i in range(1, size):
comm.Recv(recv_buffer[i:i+1], ANY_SOURCE)
total = numpy.sum(recv_buffer)
# all other process send their result
comm.Send(integral, dest=0)
# root process prints results
if comm.rank == 0:
end_time = time.time()
print("With n =", n, "trapezoids, our estimate of the integral from" \
, a, "to", b, "is", total)
print("total run time :", end_time - begin_time)
print("total size: ", size)
# h is the step size. n is the total number of trapezoids
h = (b - a) / n
# local_n is the number of trapezoids each process will calculate
# note that size must divide n
local_n = numpy.zeros(size, dtype=numpy.int32)
local_n[:] = n // size
if n%size!=0:
local_n[-(n%size):] += 1
# we calculate the interval that each process handles
# local_a is the starting point and local_b is the endpoint
local_a = numpy.sum(local_n[:rank]) * h
local_b = local_a + local_n[rank] * h
mpiexec --oversubscribe -np 100 python trapSerial_2.py 0 1000000 1099
Collective Communication
The Parallel Trapezoidal Rule 2.0
改进方法1对应的 trapParallel_1.py 改进:
# trapParallel_1.py
# example to run: mpiexec -n 4 python26 trapParallel_2.py 0.0 1.0 10000
import numpy
import sys
import time
from mpi4py import MPI
from mpi4py.MPI import ANY_SOURCE
rank = comm.Get_rank()
size = comm.Get_size()
# takes in command-line arguments [a,b,n]
a = float(sys.argv[1])
b = float(sys.argv[2])
n = int(sys.argv[3])
# we arbitrarily define a function to integrate
def f(x):
return x * x
# this is the serial version of the trapezoidal rule
# parallelization occurs by dividing the range among processes
def integrateRange(a, b, n):
integral = -(f(a) + f(b)) / 2.0
# n+1 endpoints, but n trapazoids
#for x in numpy.linspace(a, b, n + 1):
# integral = integral + f(x)
integral = integral + numpy.sum(f(numpy.linspace(a, b, n + 1)))
integral = integral * (b - a) / n
return integral
# local_n is the number of trapezoids each process will calculate
# note that size must divide n
local_n = int(n / size)
# h is the step size. n is the total number of trapezoids
h = (b - a) / (local_n*size)
# we calculate the interval that each process handles
# local_a is the starting point and local_b is the endpoint
local_a = a + rank * local_n * h
local_b = local_a + local_n * h
# initializing variables. mpi4py requires that we pass numpy objects.
#integral = numpy.zeros(1)
total = numpy.zeros(1)
if rank == 0:
begin_time = time.time()
# perform local computation. Each process integrates its own interval
integral = integrateRange(local_a, local_b, local_n)
# communication
# root node receives results with a collective "reduce"
comm.Reduce(integral, total, op=MPI.SUM, root=0)
# root process prints results
if comm.rank == 0:
end_time = time.time()
print("With n =", n, "trapezoids, our estimate of the integral from" \
, a, "to", b, "is", total)
print("total run time :", end_time - begin_time)
print("total size: ", size)
mpiexec --oversubscribe -np 100 python trapSerial_1.py 0 1000000 1099
改进方法2 对应的 trapParallel_2.py 改进:
# trapParallel_2.py
# example to run: mpiexec -n 4 python26 trapParallel_2.py 0.0 1.0 10000
import numpy
import sys
import time
from mpi4py import MPI
from mpi4py.MPI import ANY_SOURCE
rank = comm.Get_rank()
size = comm.Get_size()
# takes in command-line arguments [a,b,n]
a = float(sys.argv[1])
b = float(sys.argv[2])
n = int(sys.argv[3])
# we arbitrarily define a function to integrate
def f(x):
return x * x
# this is the serial version of the trapezoidal rule
# parallelization occurs by dividing the range among processes
def integrateRange(a, b, n):
integral = -(f(a) + f(b)) / 2.0
# n+1 endpoints, but n trapazoids
#for x in numpy.linspace(a, b, n + 1):
# integral = integral + f(x)
integral = integral + numpy.sum(f(numpy.linspace(a, b, n + 1)))
integral = integral * (b - a) / n
return integral
# h is the step size. n is the total number of trapezoids
h = (b - a) / n
# local_n is the number of trapezoids each process will calculate
# note that size must divide n
local_n = numpy.zeros(size, dtype=numpy.int32)
local_n[:] = n // size
if n%size!=0:
local_n[-(n%size):] += 1
# we calculate the interval that each process handles
# local_a is the starting point and local_b is the endpoint
local_a = numpy.sum(local_n[:rank]) * h
local_b = local_a + local_n[rank] * h
# initializing variables. mpi4py requires that we pass numpy objects.
#integral = numpy.zeros(1)
total = numpy.zeros(1)
if rank == 0:
begin_time = time.time()
# perform local computation. Each process integrates its own interval
integral = integrateRange(local_a, local_b, local_n[rank])
# communication
# root node receives results with a collective "reduce"
comm.Reduce(integral, total, op=MPI.SUM, root=0)
# root process prints results
if comm.rank == 0:
end_time = time.time()
print("With n =", n, "trapezoids, our estimate of the integral from" \
, a, "to", b, "is", total)
print("total run time :", end_time - begin_time)
print("total size: ", size)
mpiexec --oversubscribe -np 100 python trapSerial_2.py 0 1000000 1099