1. 什么是DMA
DMA是直接内存访问(Direct Memory Access),DMA引擎可以将数据从一个地方传输到另一个地方,在传输过程中不经过CPU的控制。最简单的DMA用法是将数据从内存的一个区域搬运到另一个区域。DMA也可以将外设的数据(如ADC)搬运到内存中,或者将内存数据搬运到外设中(如DAC)。
Zynq-7000系列器件PS端的DMA控制器采用ARM的IP核DMA-330(PL-330)实现。
开发环境
- Windows 10 64位
- Vivado 2018.2
- XC7Z010-1-CLG400
1.1 结构特点
DMA控制器具有以下的特点:
- 8个独立的通道,4个可用于PL—PS间数据管理,每个通道有1024Byte的MFIFO;
- 使用CPU_2x 时钟搬运数据,CPU_2x = (CPU frq/6)*2;
- 执行自定义内存区域内的DMA指令运行DMA;
- AHB控制寄存器支持安全和非安全模式;
- 每个通道内置4字Cache;
- 可以访问SoC的以下映射物理地址:
DDR、OCM、PL、Linear QSPI Read、SMC和M_AXI_GP设备,访问设备的互联结构如图1所示。
图1 DMA PS结构示意图
1.2 Zynq 访问互联结构图
从图1可以看出DMA控制器可以访问连接到Central Interconnect上的所有设备,并提供了四个通道的外设管理接口可用于控制PL的数据搬运。
Zynq系列器件中DMA控制器采用ARM PL-330 IP和r1p1版,结构框图如图2所示
如图2所示,DMA控制器由指令加速引擎,AXI Master数据接口,AXI APB寄存器访问接口以及可以连接到PL的外设请求接口,数据缓冲FIFO和控制及状态产生单元组成。
从图2可以看到,DMA PL330的设计思想是:DMA控制器通过DMA指令执行引擎执行自己的指令,并将执行状态通过APB总线和中断等形式反馈给CPU,达到数据搬运不占用CPU的目的。
DMA控制器共有八个通道,其中四个通道负责互联到Central Interconnectcun存储单元上的数据搬运;四个数据通道为外设请求接口,可用于PL AXI互联接口的数据访问管理。
每个DMA通道都执行自己的指令,拥有自己的独立线程,通道间互不影响。指令执行引擎有自己独立的Cache线。
2. 实例测试
首先构建AXI DMA例程使用的硬件环境,如图3所示,ZYNQ通过GP0端口读取Block RAM数据。
2.1 测试硬件完整性
首先使用SDK测试硬件的完成整性,编写如下代码测试BRAM读写情况。
#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xtime_l.h"
#include "xparameters.h"
void TC_BRAM();
#define RAM_W XPAR_AXI_BRAM_CTRL_0_S_AXI_BASEADDR
#define RAM_R XPAR_AXI_BRAM_CTRL_1_S_AXI_BASEADDR
int main()
{
init_platform();
TC_BRAM();
cleanup_platform();
return 0;
}
void TC_BRAM()
{
printf("test for block RAM\n");
XTime tb, te;
double dt = 0.0;
XTime_SetTime(0);
for(int i=0; i<4*1024; i++)
{
*(int *)(RAM_W+4*i) = i;
}
XTime_GetTime(&tb);
for(int i=0; i<4*1024; i++)
{
if(*(int *)(RAM_R+4*i) != i)
{
printf("Test Failed\n");
break;
}
}
XTime_GetTime(&te);
printf("Test pass\n");
dt = (te-tb)*1000000/COUNTS_PER_SECOND;
printf("%fus\n",dt);
printf("test for block RAM end!\n");
}
在串口终端中如果没有输出"Test Failed"则说明硬件设计无误。
2.2 测试内存读取速度
在使用DMA之前,首先在不使用DMA的情况下测试内存读取的速度。本例程首先写入0~4095,然后全部读取出来。
指针循环访问:
void TC_PointerSpeed()
{
XTime tb, te;
double dt = 0.0;
int a[4*1024];
XTime_SetTime(0);
for(int i=0; i<4*1024; i++)
{
*(int *)(RAM_W+4*i) = i;
}
XTime_GetTime(&tb);
for(int i=0; i<4*1024; i++)
{
a[i] = *(int *)(RAM_R+4*i);
}
XTime_GetTime(&te);
dt = (te-tb)*1000000/COUNTS_PER_SECOND;
printf("%fus\n",dt);
}
memcpy:
void TC_MemcpySpeed()
{
XTime tb, te;
double dt = 0.0;
int a[4*1024];
XTime_SetTime(0);
for(int i=0; i<4*1024; i++)
{
*(int *)(RAM_W+4*i) = i;
}
XTime_GetTime(&tb);
memcpy(a, (void*)RAM_R, 4*1024*4);
XTime_GetTime(&te);
dt = (te-tb)*1000000/COUNTS_PER_SECOND;
printf("%fus\n",dt);
}
速度如下表所示。
访问方法 | 测试数据量 | 平均时间/us |
指针 | 16KB/32位 | 3276 |
memcpy | 16KB/32位 | 1597 |
DMA PS | 16KB/32位 | 180 |
可以看出使用CPU进行的内存复制效率非常低
3. DMAPS应用
3.1 编程模型
本文不考虑外设请求接口,DMA控制器编程分为以下几个部分:本文不考虑外设请求接口,DMA控制器编程分为以下几个部分:
- DMA控制器初始化;
- 组织DMA引擎执行代码;
- 启动或停止DMA传输;
- 异常处理。
官方例程在Vivado安装路径下:
Vivado2018.2\SDK\2018.2\data\embeddedsw\XilinxProcessorIPLib\drivers\dmaps_v2_3\examples
#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "sleep.h"
#include "xparameters.h"
#include "xil_types.h"
#include "xil_assert.h"
#include "xil_io.h"
#include "xil_exception.h"
#include "xil_cache.h"
#include "xil_printf.h"
#include "xscugic.h"
#include "xdmaps.h"
/************************** Constant Definitions *****************************/
/*
* The following constants map to the XPAR parameters created in the
* xparameters.h file. They are defined here such that a user can easily
* change all the needed parameters in one place.
*/
#define DMA_DEVICE_ID XPAR_XDMAPS_1_DEVICE_ID
#define INTC_DEVICE_ID XPAR_SCUGIC_SINGLE_DEVICE_ID
#define DMA_DONE_INTR_0 XPAR_XDMAPS_0_DONE_INTR_0
#define DMA_DONE_INTR_1 XPAR_XDMAPS_0_DONE_INTR_1
#define DMA_DONE_INTR_2 XPAR_XDMAPS_0_DONE_INTR_2
#define DMA_DONE_INTR_3 XPAR_XDMAPS_0_DONE_INTR_3
#define DMA_DONE_INTR_4 XPAR_XDMAPS_0_DONE_INTR_4
#define DMA_DONE_INTR_5 XPAR_XDMAPS_0_DONE_INTR_5
#define DMA_DONE_INTR_6 XPAR_XDMAPS_0_DONE_INTR_6
#define DMA_DONE_INTR_7 XPAR_XDMAPS_0_DONE_INTR_7
#define DMA_FAULT_INTR XPAR_XDMAPS_0_FAULT_INTR
#define TEST_ROUNDS 1 /* Number of loops that the Dma transfers run.*/
#define DMA_LENGTH 1024 /* Length of the Dma Transfers */
#define TIMEOUT_LIMIT 0x2000 /* Loop count for timeout */
/************************** Function Prototypes ******************************/
int XDmaPs_Example_W_Intr(XScuGic *GicPtr, u16 DeviceId);
int SetupInterruptSystem(XScuGic *GicPtr, XDmaPs *DmaPtr);
void DmaDoneHandler(unsigned int Channel, XDmaPs_Cmd *DmaCmd,
void *CallbackRef);
/************************** Variable Definitions *****************************/
#ifdef __ICCARM__
#pragma data_alignment=32
static int Src[DMA_LENGTH];
static int Dst[DMA_LENGTH];
#pragma data_alignment=4
#else
static int Src[DMA_LENGTH] __attribute__ ((aligned (32)));
static int Dst[DMA_LENGTH] __attribute__ ((aligned (32)));
#endif
XDmaPs DmaInstance;
#ifndef TESTAPP_GEN
XScuGic GicInstance;
#endif
#ifndef TESTAPP_GEN
int main(void)
{
int Status;
Status = XDmaPs_Example_W_Intr(&GicInstance,DMA_DEVICE_ID);
if (Status != XST_SUCCESS) {
xil_printf("Error: XDMaPs_Example_W_Intr failed\r\n");
return XST_FAILURE;
}
xil_printf("Successfully ran XDMaPs_Example_W_Intr\r\n");
return XST_SUCCESS;
}
#endif
/*****************************************************************************/
/**
*
* Interrupt Example to test the DMA.
*
* @param DeviceId is the Device ID of the DMA controller.
*
* @return XST_SUCCESS to indicate success, otherwise XST_FAILURE.
*
* @note None.
*
****************************************************************************/
int XDmaPs_Example_W_Intr(XScuGic *GicPtr, u16 DeviceId)
{
int Index;
unsigned int Channel = 0;
int Status;
int TestStatus;
int TestRound;
int TimeOutCnt;
volatile int Checked[XDMAPS_CHANNELS_PER_DEV];
XDmaPs_Config *DmaCfg;
XDmaPs *DmaInst = &DmaInstance;
XDmaPs_Cmd DmaCmd;
memset(&DmaCmd, 0, sizeof(XDmaPs_Cmd));
DmaCmd.ChanCtrl.SrcBurstSize = 4;
DmaCmd.ChanCtrl.SrcBurstLen = 4;
DmaCmd.ChanCtrl.SrcInc = 1;
DmaCmd.ChanCtrl.DstBurstSize = 4;
DmaCmd.ChanCtrl.DstBurstLen = 4;
DmaCmd.ChanCtrl.DstInc = 1;
DmaCmd.BD.SrcAddr = (u32) Src;
DmaCmd.BD.DstAddr = (u32) Dst;
DmaCmd.BD.Length = DMA_LENGTH * sizeof(int);
/*
* Initialize the DMA Driver
*/
DmaCfg = XDmaPs_LookupConfig(DeviceId);
if (DmaCfg == NULL) {
return XST_FAILURE;
}
Status = XDmaPs_CfgInitialize(DmaInst,
DmaCfg,
DmaCfg->BaseAddress);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
/*
* Setup the interrupt system.
*/
Status = SetupInterruptSystem(GicPtr, DmaInst);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
TestStatus = XST_SUCCESS;
for (TestRound = 0; TestRound < TEST_ROUNDS; TestRound++) {
xil_printf("Test round %d\r\n", TestRound);
for (Channel = 0;
Channel < XDMAPS_CHANNELS_PER_DEV;
Channel++) {
/* Initialize source */
for (Index = 0; Index < DMA_LENGTH; Index++)
Src[Index] = DMA_LENGTH - Index;
/* Clear destination */
for (Index = 0; Index < DMA_LENGTH; Index++)
Dst[Index] = 0;
Checked[Channel] = 0;
/* Set the Done interrupt handler */
XDmaPs_SetDoneHandler(DmaInst,
Channel,
DmaDoneHandler,
(void *)Checked);
Status = XDmaPs_Start(DmaInst, Channel, &DmaCmd, 0);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
TimeOutCnt = 0;
/* Now the DMA is done */
while (!Checked[Channel]
&& TimeOutCnt < TIMEOUT_LIMIT) {
TimeOutCnt++;
}
if (TimeOutCnt >= TIMEOUT_LIMIT) {
TestStatus = XST_FAILURE;
}
if (Checked[Channel] < 0) {
/* DMA controller failed */
TestStatus = XST_FAILURE;
}
}
}
return TestStatus;
}
/******************************************************************************/
/**
*
* This function connects the interrupt handler of the interrupt controller to
* the processor. This function is seperate to allow it to be customized for
* each application. Each processor or RTOS may require unique processing to
* connect the interrupt handler.
*
* @param GicPtr is the GIC instance pointer.
* @param DmaPtr is the DMA instance pointer.
*
* @return None.
*
* @note None.
*
****************************************************************************/
int SetupInterruptSystem(XScuGic *GicPtr, XDmaPs *DmaPtr)
{
int Status;
#ifndef TESTAPP_GEN
XScuGic_Config *GicConfig;
Xil_ExceptionInit();
/*
* Initialize the interrupt controller driver so that it is ready to
* use.
*/
GicConfig = XScuGic_LookupConfig(INTC_DEVICE_ID);
if (NULL == GicConfig) {
return XST_FAILURE;
}
Status = XScuGic_CfgInitialize(GicPtr, GicConfig,
GicConfig->CpuBaseAddress);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
/*
* Connect the interrupt controller interrupt handler to the hardware
* interrupt handling logic in the processor.
*/
Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_IRQ_INT,
(Xil_ExceptionHandler)XScuGic_InterruptHandler,
GicPtr);
#endif
/*
* Connect the device driver handlers that will be called when an interrupt
* for the device occurs, the device driver handler performs the specific
* interrupt processing for the device
*/
/*
* Connect the Fault ISR
*/
Status = XScuGic_Connect(GicPtr,
DMA_FAULT_INTR,
(Xil_InterruptHandler)XDmaPs_FaultISR,
(void *)DmaPtr);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
/*
* Connect the Done ISR for all 8 channels of DMA 0
*/
Status = XScuGic_Connect(GicPtr,
DMA_DONE_INTR_0,
(Xil_InterruptHandler)XDmaPs_DoneISR_0,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_1,
(Xil_InterruptHandler)XDmaPs_DoneISR_1,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_2,
(Xil_InterruptHandler)XDmaPs_DoneISR_2,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_3,
(Xil_InterruptHandler)XDmaPs_DoneISR_3,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_4,
(Xil_InterruptHandler)XDmaPs_DoneISR_4,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_5,
(Xil_InterruptHandler)XDmaPs_DoneISR_5,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_6,
(Xil_InterruptHandler)XDmaPs_DoneISR_6,
(void *)DmaPtr);
Status |= XScuGic_Connect(GicPtr,
DMA_DONE_INTR_7,
(Xil_InterruptHandler)XDmaPs_DoneISR_7,
(void *)DmaPtr);
if (Status != XST_SUCCESS)
return XST_FAILURE;
/*
* Enable the interrupts for the device
*/
XScuGic_Enable(GicPtr, DMA_DONE_INTR_0);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_1);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_2);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_3);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_4);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_5);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_6);
XScuGic_Enable(GicPtr, DMA_DONE_INTR_7);
XScuGic_Enable(GicPtr, DMA_FAULT_INTR);
Xil_ExceptionEnable();
return XST_SUCCESS;
}
/*****************************************************************************/
/**
*
* DmaDoneHandler.
*
* @param Channel is the Channel number.
* @param DmaCmd is the Dma Command.
* @param CallbackRef is the callback reference data.
*
* @return None.
*
* @note None.
*
******************************************************************************/
void DmaDoneHandler(unsigned int Channel, XDmaPs_Cmd *DmaCmd, void *CallbackRef)
{
/* done handler */
volatile int *Checked = (volatile int *)CallbackRef;
int Index;
int Status = 1;
int *Src;
int *Dst;
Src = (int *)DmaCmd->BD.SrcAddr;
Dst = (int *)DmaCmd->BD.DstAddr;
/* DMA successful */
/* compare the src and dst buffer */
for (Index = 0; Index < DMA_LENGTH; Index++) {
if ((Src[Index] != Dst[Index]) ||
(Dst[Index] != DMA_LENGTH - Index)) {
Status = -XST_FAILURE;
}
}
Checked[Channel] = Status;
}
3.2 修改DMA PS
修改DMA配置,使其将PL中的数据传输到内存中。
修改DMA的源地址
DmaCmd.BD.SrcAddr = (u32) RAM_R;
测量DMA传输16KB数据,时间约为180us,远远高于memcpy.
4. Linux DMA驱动
4.1 编程方法
配置DMA
void dma_init(u32 s, int size)
{
dma_cap_mask_t mask;
//alloc 512B src memory and dst memory
dma_src = s;
printk(KERN_INFO "dma_src = 0x%x\n",src);
//src = dma_alloc_coherent(NULL, MM_SIZE, &dma_src, GFP_KERNEL);
dst = dma_alloc_coherent(NULL, size, &dma_dst, GFP_KERNEL);
printk(KERN_INFO "dst = 0x%x, dma_dst = 0x%x\n",dst, dma_dst);
dma_cap_zero(mask);
dma_cap_set(DMA_MEMCPY, mask);//direction:memory to memory
chan = dma_request_channel(mask,NULL,NULL); //request a dma channel
printk(KERN_INFO "dma channel id = %d\n",chan->chan_id);
flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
dev = chan->device;
}
释放DMA
void dma_del(void)
{
//free memory and dma channel
dma_free_coherent(NULL, MM_SIZE, dst, &dma_dst);
dma_release_channel(chan);
}
向DMA引擎发起一个传输请求
void dma_read(u32 dst,u32 src,int size)
{
//alloc a desc,and set dst_addr,src_addr,data_size.
/*获取时间*/
do_gettimeofday(&tb);
tx = dev->device_prep_dma_memcpy(chan, dst, src, size, flags);
if (!tx){
printk(KERN_INFO "Failed to prepare DMA memcpy");
}
tx->callback = dma_callback_func;//set call back function
tx->callback_param = NULL;
cookie = tx->tx_submit(tx); //submit the desc
if (dma_submit_error(cookie)){
printk(KERN_INFO "Failed to do DMA tx_submit");
}
dma_async_issue_pending(chan);//begin dma transfer
}
4.2 实例代码
将Block RAM中的数据先使用ioremap映射的地址src,写入一些字符,然后使用DMA从Block RAM中传输16KB数据到分配的内存dst中。传输完成后调用dma_callback_func函数,在该函数中比较传输的数据和发送的数据是否相同,并测量DMA消耗的时间。
#include<linux/dmaengine.h>
#include<linux/dma-mapping.h>
#include<linux/types.h>
#include<linux/slab.h>
#include<linux/module.h>
#include<linux/init.h>
#include<linux/fs.h>
#include<linux/sched.h>
#include <linux/miscdevice.h>
#include<linux/device.h>
#include<linux/string.h>
#include<linux/errno.h>
#include<linux/types.h>
#include<linux/slab.h>
#include<asm/uaccess.h>
#include <asm/delay.h>
#define DEVICE_NAME "dma_driver"
#define ImageReadAddress0 0x40000000
volatile unsigned int *CaptureReadAddr0;
struct timeval tb, te;
#define MM_SIZE (1440*10)
void dma_callback_func(void *dma_async_param);
void dma_read(u32 dma_dst,u32 dma_src,int size);
void dma_init(u32 s, int size);
void dma_del(void);
struct dma_chan *chan;
//bus address
dma_addr_t dma_src;
dma_addr_t dma_dst;
//virtual address
char *src = NULL;
char *dst = NULL ;
struct dma_device *dev;
struct dma_async_tx_descriptor *tx = NULL;
enum dma_ctrl_flags flags;
dma_cookie_t cookie;
//When dma transfer finished,this function will be called.
void dma_callback_func(void *dma_async_param)
{
int i=0;
do_gettimeofday(&te);
printk("DMA\n");
printk("T:%ld, %ld\n", tb.tv_sec, tb.tv_usec);
printk("T2:%ld, %ld\n", te.tv_sec, te.tv_usec);
printk(KERN_ALERT "time use:%ld, %ld\n",
(te.tv_sec-tb.tv_sec),
(te.tv_usec-tb.tv_usec));
printk("memcpy\n");
do_gettimeofday(&tb);
memcpy(dst ,src, MM_SIZE);
do_gettimeofday(&te);
printk("T:%ld, %ld\n", tb.tv_sec, tb.tv_usec);
printk("T2:%ld, %ld\n", te.tv_sec, te.tv_usec);
printk(KERN_ALERT "time use:%ld, %ld\n",
(te.tv_sec-tb.tv_sec),
(te.tv_usec-tb.tv_usec));
printk("DMA transfer finished!\n\r");
for(i=0; i<MM_SIZE; i++)
{
if(*(dst + i) != (char)('a' + i%26))
{
printk("Failed\n");
return;
}
}
printk("PASS\n");
}
void dma_read(u32 dst,u32 src,int size)
{
//alloc a desc,and set dst_addr,src_addr,data_size.
/*获取时间*/
do_gettimeofday(&tb);
tx = dev->device_prep_dma_memcpy(chan, dst, src, size, flags);
if (!tx){
printk(KERN_INFO "Failed to prepare DMA memcpy");
}
tx->callback = dma_callback_func;//set call back function
tx->callback_param = NULL;
cookie = tx->tx_submit(tx); //submit the desc
if (dma_submit_error(cookie)){
printk(KERN_INFO "Failed to do DMA tx_submit");
}
dma_async_issue_pending(chan);//begin dma transfer
}
void dma_init(u32 s, int size)
{
dma_cap_mask_t mask;
//alloc 512B src memory and dst memory
dma_src = s;
printk(KERN_INFO "dma_src = 0x%x\n",src);
//src = dma_alloc_coherent(NULL, MM_SIZE, &dma_src, GFP_KERNEL);
dst = dma_alloc_coherent(NULL, size, &dma_dst, GFP_KERNEL);
printk(KERN_INFO "dst = 0x%x, dma_dst = 0x%x\n",dst, dma_dst);
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);//direction:memory to memory
chan = dma_request_channel(mask,NULL,NULL); //request a dma channel
printk(KERN_INFO "dma channel id = %d\n",chan->chan_id);
flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
dev = chan->device;
}
void dma_del(void)
{
//free memory and dma channel
dma_free_coherent(NULL, MM_SIZE, dst, &dma_dst);
dma_release_channel(chan);
}
static int device_open(struct inode *inode, struct file *file)
{
return 0;
}
static int device_close(struct inode *indoe, struct file *file)
{
printk("device close\n");
return 0;
}
static ssize_t device_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
int ret = 0;
dma_read(dma_dst, dma_src, MM_SIZE);
return ret;
}
static struct file_operations device_fops =
{
.owner = THIS_MODULE,
.open = device_open,
.release = device_close,
.read = device_read,
};
static struct miscdevice MMAP_misc =
{
.minor = MISC_DYNAMIC_MINOR,
.name = DEVICE_NAME,
.fops = &device_fops,
};
static int __init char_device_init( void )
{
int ret=0;
int i = 0;
printk("init module\n");
ret = misc_register(&MMAP_misc);
if(ret)
{
printk("Error:misc_register failed!\n");
return 0;
}
CaptureReadAddr0 = (volatile unsigned int*)ioremap(ImageReadAddress0, 1440*10);
printk("init module\n");
dma_init(ImageReadAddress0, MM_SIZE);
src = (char*)CaptureReadAddr0;
for (i = 0; i < MM_SIZE; i++){
*(src + i) = (char)('a' + i%26);
}
return 0;
}
static void __exit char_device_exit( void )
{
printk(KERN_ALERT"module exit\n");
misc_deregister(&MMAP_misc);
iounmap(CaptureReadAddr0);
dma_del();
}
MODULE_LICENSE("GPL");
MODULE_AUTHOR("DMA_test");
module_init(char_device_init);//模块加载
module_exit(char_device_exit);//模块退出
使用DMA搬运和memcpy搬运PL中的数据速度对比如下
Z-turn# ./test
Test for dma
DMA
T:34, 358179
T2:34, 358290
time use:0, 111
memcpy
T:34, 364372
T2:34, 364796
time use:0, 424
DMA transfer finished!
PASS
DMA搬运消耗了111us,而memcpy需要使用424us,可见DMA速度远高于CPU对数据的搬运。