这篇文章的测试不准确,可能是minpy和numpy同时用出的问题,现在最新的测试在下面这篇文章中
因为觉得这是整个测试过程,就没有删除这篇文章.
测试minpy 调用gpu 加速numpy的矩阵相乘. 小矩阵相乘
小矩阵相乘,行数在1-1000.测试用的都是方阵.所以元素数木在1到一百万.
测试元素数目一到100万的矩阵相乘.
上一篇中可以看到在行数超过1000的时候,gpu相对于cpu就会有绝对的加速效果.但是在行数1000以前会看到,gpu并不一定能够起到加速计算的效果.
这里我们针对1-1000来看下.
#!/usr/bin/python
# -*- coding: utf-8 -*-
#####################################
# File name : main.py
# Create date : 2019-01-05 17:11
# Modified date : 2019-01-08 18:01
# Author : DARREN
# Describe : not set
# Email : lzygzh@126.com
#####################################
from __future__ import division
from __future__ import print_function
import time
import numpy as np
import numpy.random as random
import minpy.numpy as mnp
import matplotlib.pyplot as plt
def _plot_record(record):
_plot_a_key(record, "numpy", "minpy")
_plot_key(record,"acceleration")
def _plot_a_key(record, name1, name2):
numpy_lt = []
minpy_lt = []
steps = []
for key in record:
steps.append([key])
steps.sort()
for i in range(len(steps)):
step_dic = record[steps[i][0]]
numpy_value = step_dic[name1]
numpy_lt.append(numpy_value)
minpy_value = step_dic[name2]
minpy_lt.append(minpy_value)
numpy_lt = np.array(numpy_lt)
minpy_lt = np.array(minpy_lt)
steps = np.array(steps)
numpy_line, = plt.plot(steps, numpy_lt)
minpy_line, = plt.plot(steps, minpy_lt)
plt.legend(handles=[numpy_line,minpy_line],labels=['use numpy','use minpy'],loc='best')
full_path_name = "./%s_%s.jpg" % (name1, name2)
# plt.show()
plt.savefig(full_path_name)
plt.close()
def _plot_key(record, name):
acceleration_lt= []
steps = []
for key in record:
steps.append([key])
steps.sort()
for i in range(len(steps)):
step_dic = record[steps[i][0]]
acceleration_value = step_dic[name]
acceleration_lt.append(acceleration_value)
acceleration_lt = np.array(acceleration_lt)
steps = np.array(steps)
acceleration_line, = plt.plot(steps, acceleration_lt)
plt.legend(handles=[acceleration_line],labels=['acceleartion'],loc='best')
full_path_name = "./%s.jpg" % (name)
# plt.show()
plt.savefig(full_path_name)
plt.close()
def test_numpy(A,B,i):
s = time.time()
np.dot(A,B)
e = time.time()
take_time = e - s
return take_time
def test_minpy(A,B,i):
s = time.time()
mnp.dot(A,B)
e = time.time()
take_time = e - s
return take_time
def _write_status(file_obj, A, B, i, numpy_take_time, minpy_take_time):
acceleration = numpy_take_time / minpy_take_time
shape_str = "%s : %s matmul %s" % (A.dtype, A.shape, B.shape)
numpy_str = "i:%s use numpy:%s" % (i, numpy_take_time)
minpy_str = "i:%s use minpy:%s" % (i, minpy_take_time)
acceleration_str = "acceleration:%s" % acceleration
file_obj.write("%s\n" % shape_str)
file_obj.write("%s\n" % numpy_str)
file_obj.write("%s\n" % minpy_str)
file_obj.write("%s\n" % acceleration_str)
print(shape_str)
print(numpy_str)
print(minpy_str)
print(acceleration_str)
def _record_status(record, i, numpy_take_time, minpy_take_time):
dic = {}
dic["numpy"] = numpy_take_time
dic["minpy"] = minpy_take_time
dic["acceleration"] = numpy_take_time / minpy_take_time
record[i] = dic
def test_numpy_and_minpy():
# random.seed(0)
file_obj = open("./output","w")
record = {}
for i in range(1,1000):
A = random.randn(i, i)
#A = np.array(A,dtype=np.float32)
B = random.randn(i, i)
#B = np.array(B,dtype=np.float32)
numpy_take_time = test_numpy(A,B,i)
minpy_take_time = test_minpy(A,B,i)
_write_status(file_obj,A,B,i,numpy_take_time,minpy_take_time)
_record_status(record, i, numpy_take_time, minpy_take_time)
file_obj.close()
_plot_record(record)
if __name__ == '__main__':
test_numpy_and_minpy()
下面是我机器中的cpu和gpu型号
31.4 GiB
Intel® Core™ i7-8700K CPU @ 3.70GHz × 12
GeForce GTX 1080 Ti/PCIe/SSE2
64-bit
先看下整体的输出效果
float64_1_1000
numpy minpy 耗时对比图
这个图看的不是很清楚,图上他们的耗时几乎好像是重叠的.
所以下面是numpy 与 minpy 耗时之比的图,看一下加速效果图.
这个图上值如果大于1就说明有加速效果.能够看到最高加速能达到80多,但是随着矩阵的增大这个最大加速效果会降低.但是总的来说,在600之前能够看到明显的加速区域.
因为运行时输出太长了,影响文章阅读. 就不放了float32_1_1000
numpy 与 minpy 耗时对比图
然后是加速效果图
最高能有100多倍.float32 比float64 的加速效果要好一些.
下面逐一展开
float64_1_100
float32_1_100
float64_100_200
float32_100_200
float64_200_300
float32_200_300
float64_300_400
float32_300_400
float64_400_500
float32_400_500
float64_500_600
float32_500_600
float64_600_700
float32_600_700
float64_700_800
float32_700_800
float64_800_900
float32_800_900
float64_900_1000
float32_900_1000