1. 什么是DMA

DMA是直接内存访问(Direct Memory Access),DMA引擎可以将数据从一个地方传输到另一个地方,在传输过程中不经过CPU的控制。最简单的DMA用法是将数据从内存的一个区域搬运到另一个区域。DMA也可以将外设的数据(如ADC)搬运到内存中,或者将内存数据搬运到外设中(如DAC)。

Zynq-7000系列器件PS端的DMA控制器采用ARM的IP核DMA-330(PL-330)实现。

开发环境

  • Windows 10 64位
  • Vivado 2018.2
  • XC7Z010-1-CLG400

1.1 结构特点

DMA控制器具有以下的特点:

  1. 8个独立的通道,4个可用于PL—PS间数据管理,每个通道有1024Byte的MFIFO;
  2. 使用CPU_2x 时钟搬运数据,CPU_2x = (CPU frq/6)*2;
  3. 执行自定义内存区域内的DMA指令运行DMA;
  4. AHB控制寄存器支持安全和非安全模式;
  5. 每个通道内置4字Cache;
  6. 可以访问SoC的以下映射物理地址:

 DDR、OCM、PL、Linear QSPI Read、SMC和M_AXI_GP设备,访问设备的互联结构如图1所示。

arm版的docker arm版的ps_#include

                                                     图1 DMA PS结构示意图

  1.2 Zynq 访问互联结构图

从图1可以看出DMA控制器可以访问连接到Central Interconnect上的所有设备,并提供了四个通道的外设管理接口可用于控制PL的数据搬运。

 Zynq系列器件中DMA控制器采用ARM PL-330 IP和r1p1版,结构框图如图2所示

arm版的docker arm版的ps_#define_02

       如图2所示,DMA控制器由指令加速引擎,AXI Master数据接口,AXI APB寄存器访问接口以及可以连接到PL的外设请求接口,数据缓冲FIFO和控制及状态产生单元组成。

      从图2可以看到,DMA PL330的设计思想是:DMA控制器通过DMA指令执行引擎执行自己的指令,并将执行状态通过APB总线和中断等形式反馈给CPU,达到数据搬运不占用CPU的目的。

       DMA控制器共有八个通道,其中四个通道负责互联到Central Interconnectcun存储单元上的数据搬运;四个数据通道为外设请求接口,可用于PL AXI互联接口的数据访问管理。

       每个DMA通道都执行自己的指令,拥有自己的独立线程,通道间互不影响。指令执行引擎有自己独立的Cache线。

2. 实例测试

     首先构建AXI DMA例程使用的硬件环境,如图3所示,ZYNQ通过GP0端口读取Block RAM数据。

arm版的docker arm版的ps_#define_03

2.1 测试硬件完整性

首先使用SDK测试硬件的完成整性,编写如下代码测试BRAM读写情况。

#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xtime_l.h"
#include "xparameters.h"
 
void TC_BRAM();
#define RAM_W	XPAR_AXI_BRAM_CTRL_0_S_AXI_BASEADDR
#define RAM_R	XPAR_AXI_BRAM_CTRL_1_S_AXI_BASEADDR
 
int main()
{
    init_platform();
 
    TC_BRAM();
 
    cleanup_platform();
    return 0;
}
 
void TC_BRAM()
{
	printf("test for block RAM\n");
	XTime tb, te;
	double dt = 0.0;
	XTime_SetTime(0);
 
 
	for(int i=0; i<4*1024; i++)
	{
		*(int *)(RAM_W+4*i) = i;
	}
	XTime_GetTime(&tb);
	for(int i=0; i<4*1024; i++)
	{
		if(*(int *)(RAM_R+4*i) != i)
		{
			printf("Test Failed\n");
			break;
		}
	}
 
	XTime_GetTime(&te);
	printf("Test pass\n");
	dt = (te-tb)*1000000/COUNTS_PER_SECOND;
	printf("%fus\n",dt);
	printf("test for block RAM end!\n");
}

 在串口终端中如果没有输出"Test Failed"则说明硬件设计无误。

2.2 测试内存读取速度

       在使用DMA之前,首先在不使用DMA的情况下测试内存读取的速度。本例程首先写入0~4095,然后全部读取出来。

指针循环访问:

void TC_PointerSpeed()
{
	XTime tb, te;
	double dt = 0.0;
	int a[4*1024];
	XTime_SetTime(0);
    for(int i=0; i<4*1024; i++)
    {
        *(int *)(RAM_W+4*i) = i;
    }
    XTime_GetTime(&tb);
    for(int i=0; i<4*1024; i++)
    {
        a[i] = *(int *)(RAM_R+4*i);
    }
    XTime_GetTime(&te);
    dt = (te-tb)*1000000/COUNTS_PER_SECOND;
    printf("%fus\n",dt);
}

memcpy:

void TC_MemcpySpeed()
{
	XTime tb, te;
	double dt = 0.0;
	int a[4*1024];
	XTime_SetTime(0);
 
    for(int i=0; i<4*1024; i++)
    {
        *(int *)(RAM_W+4*i) = i;
    }
    XTime_GetTime(&tb);
    memcpy(a, (void*)RAM_R, 4*1024*4);
    XTime_GetTime(&te);
    dt = (te-tb)*1000000/COUNTS_PER_SECOND;
    printf("%fus\n",dt);
}

 速度如下表所示。

访问方法

测试数据量

平均时间/us

指针

16KB/32位

3276

memcpy

16KB/32位

1597

DMA PS

16KB/32位

180


 可以看出使用CPU进行的内存复制效率非常低

3. DMAPS应用

3.1 编程模型

本文不考虑外设请求接口,DMA控制器编程分为以下几个部分:本文不考虑外设请求接口,DMA控制器编程分为以下几个部分:

  1. DMA控制器初始化;
  2. 组织DMA引擎执行代码;
  3. 启动或停止DMA传输;
  4. 异常处理。

 

官方例程在Vivado安装路径下:

Vivado2018.2\SDK\2018.2\data\embeddedsw\XilinxProcessorIPLib\drivers\dmaps_v2_3\examples

#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "sleep.h"
#include "xparameters.h"
#include "xil_types.h"
#include "xil_assert.h"
#include "xil_io.h"
#include "xil_exception.h"
#include "xil_cache.h"
#include "xil_printf.h"
#include "xscugic.h"
#include "xdmaps.h"
 
/************************** Constant Definitions *****************************/
/*
 * The following constants map to the XPAR parameters created in the
 * xparameters.h file. They are defined here such that a user can easily
 * change all the needed parameters in one place.
 */
#define DMA_DEVICE_ID 			XPAR_XDMAPS_1_DEVICE_ID
#define INTC_DEVICE_ID			XPAR_SCUGIC_SINGLE_DEVICE_ID
 
#define DMA_DONE_INTR_0			XPAR_XDMAPS_0_DONE_INTR_0
#define DMA_DONE_INTR_1			XPAR_XDMAPS_0_DONE_INTR_1
#define DMA_DONE_INTR_2			XPAR_XDMAPS_0_DONE_INTR_2
#define DMA_DONE_INTR_3			XPAR_XDMAPS_0_DONE_INTR_3
#define DMA_DONE_INTR_4			XPAR_XDMAPS_0_DONE_INTR_4
#define DMA_DONE_INTR_5			XPAR_XDMAPS_0_DONE_INTR_5
#define DMA_DONE_INTR_6			XPAR_XDMAPS_0_DONE_INTR_6
#define DMA_DONE_INTR_7			XPAR_XDMAPS_0_DONE_INTR_7
#define DMA_FAULT_INTR			XPAR_XDMAPS_0_FAULT_INTR
 
#define TEST_ROUNDS	1	/* Number of loops that the Dma transfers run.*/
#define DMA_LENGTH	1024	/* Length of the Dma Transfers */
#define TIMEOUT_LIMIT 	0x2000	/* Loop count for timeout */
 
/************************** Function Prototypes ******************************/
 
int XDmaPs_Example_W_Intr(XScuGic *GicPtr, u16 DeviceId);
int SetupInterruptSystem(XScuGic *GicPtr, XDmaPs *DmaPtr);
void DmaDoneHandler(unsigned int Channel, XDmaPs_Cmd *DmaCmd,
			void *CallbackRef);
 
/************************** Variable Definitions *****************************/
#ifdef __ICCARM__
#pragma data_alignment=32
static int Src[DMA_LENGTH];
static int Dst[DMA_LENGTH];
#pragma data_alignment=4
#else
static int Src[DMA_LENGTH] __attribute__ ((aligned (32)));
static int Dst[DMA_LENGTH] __attribute__ ((aligned (32)));
#endif
 
XDmaPs DmaInstance;
#ifndef TESTAPP_GEN
XScuGic GicInstance;
#endif
 
#ifndef TESTAPP_GEN
int main(void)
{
	int Status;
 
	Status = XDmaPs_Example_W_Intr(&GicInstance,DMA_DEVICE_ID);
	if (Status != XST_SUCCESS) {
		xil_printf("Error: XDMaPs_Example_W_Intr failed\r\n");
		return XST_FAILURE;
	}
 
	xil_printf("Successfully ran XDMaPs_Example_W_Intr\r\n");
	return XST_SUCCESS;
 
}
#endif
 
/*****************************************************************************/
/**
 *
 * Interrupt Example to test the DMA.
 *
 * @param	DeviceId is the Device ID of the DMA controller.
 *
 * @return	XST_SUCCESS to indicate success, otherwise XST_FAILURE.
 *
 * @note	None.
 *
 ****************************************************************************/
int XDmaPs_Example_W_Intr(XScuGic *GicPtr, u16 DeviceId)
{
	int Index;
	unsigned int Channel = 0;
	int Status;
	int TestStatus;
	int TestRound;
	int TimeOutCnt;
	volatile int Checked[XDMAPS_CHANNELS_PER_DEV];
	XDmaPs_Config *DmaCfg;
	XDmaPs *DmaInst = &DmaInstance;
	XDmaPs_Cmd DmaCmd;
 
	memset(&DmaCmd, 0, sizeof(XDmaPs_Cmd));
 
	DmaCmd.ChanCtrl.SrcBurstSize = 4;
	DmaCmd.ChanCtrl.SrcBurstLen = 4;
	DmaCmd.ChanCtrl.SrcInc = 1;
	DmaCmd.ChanCtrl.DstBurstSize = 4;
	DmaCmd.ChanCtrl.DstBurstLen = 4;
	DmaCmd.ChanCtrl.DstInc = 1;
	DmaCmd.BD.SrcAddr = (u32) Src;
	DmaCmd.BD.DstAddr = (u32) Dst;
	DmaCmd.BD.Length = DMA_LENGTH * sizeof(int);
 
 
	/*
	 * Initialize the DMA Driver
	 */
	DmaCfg = XDmaPs_LookupConfig(DeviceId);
	if (DmaCfg == NULL) {
		return XST_FAILURE;
	}
 
	Status = XDmaPs_CfgInitialize(DmaInst,
				   DmaCfg,
				   DmaCfg->BaseAddress);
	if (Status != XST_SUCCESS) {
		return XST_FAILURE;
	}
 
 
	/*
	 * Setup the interrupt system.
	 */
	Status = SetupInterruptSystem(GicPtr, DmaInst);
	if (Status != XST_SUCCESS) {
		return XST_FAILURE;
	}
 
 
	TestStatus = XST_SUCCESS;
 
	for (TestRound = 0; TestRound < TEST_ROUNDS; TestRound++) {
		xil_printf("Test round %d\r\n", TestRound);
		for (Channel = 0;
		     Channel < XDMAPS_CHANNELS_PER_DEV;
		     Channel++) {
 
 
			/* Initialize source */
			for (Index = 0; Index < DMA_LENGTH; Index++)
				Src[Index] = DMA_LENGTH - Index;
 
			/* Clear destination */
			for (Index = 0; Index < DMA_LENGTH; Index++)
				Dst[Index] = 0;
 
			Checked[Channel] = 0;
 
			/* Set the Done interrupt handler */
			XDmaPs_SetDoneHandler(DmaInst,
					       Channel,
					       DmaDoneHandler,
					       (void *)Checked);
 
 
			Status = XDmaPs_Start(DmaInst, Channel, &DmaCmd, 0);
			if (Status != XST_SUCCESS) {
				return XST_FAILURE;
			}
 
			TimeOutCnt = 0;
 
			/* Now the DMA is done */
			while (!Checked[Channel]
			       && TimeOutCnt < TIMEOUT_LIMIT) {
				TimeOutCnt++;
			}
 
			if (TimeOutCnt >= TIMEOUT_LIMIT) {
				TestStatus = XST_FAILURE;
			}
 
			if (Checked[Channel] < 0) {
				/* DMA controller failed */
				TestStatus = XST_FAILURE;
			}
		}
	}
 
	return TestStatus;
 
}
 
 
/******************************************************************************/
/**
 *
 * This function connects the interrupt handler of the interrupt controller to
 * the processor.  This function is seperate to allow it to be customized for
 * each application. Each processor or RTOS may require unique processing to
 * connect the interrupt handler.
 *
 * @param	GicPtr is the GIC instance pointer.
 * @param	DmaPtr is the DMA instance pointer.
 *
 * @return	None.
 *
 * @note	None.
 *
 ****************************************************************************/
int SetupInterruptSystem(XScuGic *GicPtr, XDmaPs *DmaPtr)
{
	int Status;
#ifndef TESTAPP_GEN
	XScuGic_Config *GicConfig;
 
 
	Xil_ExceptionInit();
 
	/*
	 * Initialize the interrupt controller driver so that it is ready to
	 * use.
	 */
	GicConfig = XScuGic_LookupConfig(INTC_DEVICE_ID);
	if (NULL == GicConfig) {
		return XST_FAILURE;
	}
 
	Status = XScuGic_CfgInitialize(GicPtr, GicConfig,
				       GicConfig->CpuBaseAddress);
	if (Status != XST_SUCCESS) {
		return XST_FAILURE;
	}
 
	/*
	 * Connect the interrupt controller interrupt handler to the hardware
	 * interrupt handling logic in the processor.
	 */
	Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_IRQ_INT,
			     (Xil_ExceptionHandler)XScuGic_InterruptHandler,
			     GicPtr);
#endif
	/*
	 * Connect the device driver handlers that will be called when an interrupt
	 * for the device occurs, the device driver handler performs the specific
	 * interrupt processing for the device
	 */
 
	/*
	 * Connect the Fault ISR
	 */
	Status = XScuGic_Connect(GicPtr,
				 DMA_FAULT_INTR,
				 (Xil_InterruptHandler)XDmaPs_FaultISR,
				 (void *)DmaPtr);
	if (Status != XST_SUCCESS) {
		return XST_FAILURE;
	}
 
	/*
	 * Connect the Done ISR for all 8 channels of DMA 0
	 */
	Status = XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_0,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_0,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_1,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_1,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_2,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_2,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_3,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_3,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_4,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_4,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_5,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_5,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_6,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_6,
				 (void *)DmaPtr);
	Status |= XScuGic_Connect(GicPtr,
				 DMA_DONE_INTR_7,
				 (Xil_InterruptHandler)XDmaPs_DoneISR_7,
				 (void *)DmaPtr);
 
	if (Status != XST_SUCCESS)
		return XST_FAILURE;
 
	/*
	 * Enable the interrupts for the device
	 */
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_0);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_1);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_2);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_3);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_4);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_5);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_6);
	XScuGic_Enable(GicPtr, DMA_DONE_INTR_7);
	XScuGic_Enable(GicPtr, DMA_FAULT_INTR);
 
	Xil_ExceptionEnable();
	return XST_SUCCESS;
}
 
 
/*****************************************************************************/
/**
*
* DmaDoneHandler.
*
* @param	Channel is the Channel number.
* @param	DmaCmd is the Dma Command.
* @param	CallbackRef is the callback reference data.
*
* @return	None.
*
* @note		None.
*
******************************************************************************/
void DmaDoneHandler(unsigned int Channel, XDmaPs_Cmd *DmaCmd, void *CallbackRef)
{
 
	/* done handler */
	volatile int *Checked = (volatile int *)CallbackRef;
	int Index;
	int Status = 1;
	int *Src;
	int *Dst;
 
	Src = (int *)DmaCmd->BD.SrcAddr;
	Dst = (int *)DmaCmd->BD.DstAddr;
 
	/* DMA successful */
	/* compare the src and dst buffer */
	for (Index = 0; Index < DMA_LENGTH; Index++) {
		if ((Src[Index] != Dst[Index]) ||
				(Dst[Index] != DMA_LENGTH - Index)) {
			Status = -XST_FAILURE;
		}
	}
 
 
	Checked[Channel] = Status;
}

3.2 修改DMA PS

修改DMA配置,使其将PL中的数据传输到内存中。

修改DMA的源地址

     DmaCmd.BD.SrcAddr = (u32) RAM_R;

 测量DMA传输16KB数据,时间约为180us,远远高于memcpy.

4. Linux DMA驱动

4.1 编程方法

配置DMA

void dma_init(u32 s, int size)
{
	dma_cap_mask_t mask;
	//alloc 512B src memory and dst memory
	dma_src = s;
	printk(KERN_INFO "dma_src = 0x%x\n",src);
	//src = dma_alloc_coherent(NULL, MM_SIZE, &dma_src, GFP_KERNEL);
	dst = dma_alloc_coherent(NULL, size, &dma_dst, GFP_KERNEL);
	printk(KERN_INFO "dst = 0x%x, dma_dst = 0x%x\n",dst, dma_dst);
 
	dma_cap_zero(mask);
	dma_cap_set(DMA_MEMCPY, mask);//direction:memory to memory
	chan = dma_request_channel(mask,NULL,NULL); //request a dma channel
	printk(KERN_INFO "dma channel id = %d\n",chan->chan_id);
 
	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
	dev = chan->device;
}

释放DMA

void dma_del(void)
{
    //free memory and dma channel
    dma_free_coherent(NULL, MM_SIZE, dst, &dma_dst);
    dma_release_channel(chan);
}

向DMA引擎发起一个传输请求

void dma_read(u32 dst,u32 src,int size)
{
    //alloc a desc,and set dst_addr,src_addr,data_size.
	/*获取时间*/
	do_gettimeofday(&tb);
    tx = dev->device_prep_dma_memcpy(chan, dst, src, size, flags);
    if (!tx){
        printk(KERN_INFO "Failed to prepare DMA memcpy");
    }
 
    tx->callback = dma_callback_func;//set call back function
    tx->callback_param = NULL;
    cookie = tx->tx_submit(tx); 	//submit the desc
    if (dma_submit_error(cookie)){
        printk(KERN_INFO "Failed to do DMA tx_submit");
    }
    dma_async_issue_pending(chan);//begin dma transfer
}

   4.2 实例代码

      将Block RAM中的数据先使用ioremap映射的地址src,写入一些字符,然后使用DMA从Block RAM中传输16KB数据到分配的内存dst中。传输完成后调用dma_callback_func函数,在该函数中比较传输的数据和发送的数据是否相同,并测量DMA消耗的时间。

#include<linux/dmaengine.h>
#include<linux/dma-mapping.h>
#include<linux/types.h>
#include<linux/slab.h>
#include<linux/module.h>
#include<linux/init.h>
#include<linux/fs.h>
#include<linux/sched.h>
#include <linux/miscdevice.h>
#include<linux/device.h>
#include<linux/string.h>
#include<linux/errno.h>
#include<linux/types.h>
#include<linux/slab.h>
 
#include<asm/uaccess.h>
#include <asm/delay.h>
 
#define DEVICE_NAME "dma_driver"
#define ImageReadAddress0			0x40000000
volatile unsigned int *CaptureReadAddr0;
 
struct timeval tb, te;
 
#define MM_SIZE (1440*10)
 
void dma_callback_func(void *dma_async_param);
void dma_read(u32 dma_dst,u32 dma_src,int size);
void dma_init(u32 s, int size);
void dma_del(void);
 
struct dma_chan *chan;
 //bus address
dma_addr_t dma_src;
dma_addr_t dma_dst;
//virtual address
char *src = NULL;
char *dst = NULL ;
struct dma_device *dev;
struct dma_async_tx_descriptor *tx = NULL;
enum dma_ctrl_flags flags;
dma_cookie_t cookie;
 
//When dma transfer finished,this function will be called.
void dma_callback_func(void *dma_async_param)
{
	int i=0;
	do_gettimeofday(&te);
	printk("DMA\n");
	printk("T:%ld, %ld\n", tb.tv_sec, tb.tv_usec);
	printk("T2:%ld, %ld\n", te.tv_sec, te.tv_usec);
	printk(KERN_ALERT "time use:%ld, %ld\n",
			(te.tv_sec-tb.tv_sec),
			(te.tv_usec-tb.tv_usec));
 
	printk("memcpy\n");
	do_gettimeofday(&tb);
	memcpy(dst ,src,  MM_SIZE);
	do_gettimeofday(&te);
	printk("T:%ld, %ld\n", tb.tv_sec, tb.tv_usec);
	printk("T2:%ld, %ld\n", te.tv_sec, te.tv_usec);
	printk(KERN_ALERT "time use:%ld, %ld\n",
			(te.tv_sec-tb.tv_sec),
			(te.tv_usec-tb.tv_usec));
 
    printk("DMA transfer finished!\n\r");
    for(i=0; i<MM_SIZE; i++)
    {
    	if(*(dst + i) != (char)('a' + i%26))
    	{
    		printk("Failed\n");
    		return;
    	}
    }
    printk("PASS\n");
}
 
void dma_read(u32 dst,u32 src,int size)
{
    //alloc a desc,and set dst_addr,src_addr,data_size.
	/*获取时间*/
	do_gettimeofday(&tb);
    tx = dev->device_prep_dma_memcpy(chan, dst, src, size, flags);
    if (!tx){
        printk(KERN_INFO "Failed to prepare DMA memcpy");
    }
 
    tx->callback = dma_callback_func;//set call back function
    tx->callback_param = NULL;
    cookie = tx->tx_submit(tx); 	//submit the desc
    if (dma_submit_error(cookie)){
        printk(KERN_INFO "Failed to do DMA tx_submit");
    }
    dma_async_issue_pending(chan);//begin dma transfer
}
 
void dma_init(u32 s, int size)
{
	dma_cap_mask_t mask;
	//alloc 512B src memory and dst memory
	dma_src = s;
	printk(KERN_INFO "dma_src = 0x%x\n",src);
	//src = dma_alloc_coherent(NULL, MM_SIZE, &dma_src, GFP_KERNEL);
	dst = dma_alloc_coherent(NULL, size, &dma_dst, GFP_KERNEL);
	printk(KERN_INFO "dst = 0x%x, dma_dst = 0x%x\n",dst, dma_dst);
 
	dma_cap_zero(mask);
	dma_cap_set(DMA_SLAVE, mask);//direction:memory to memory
	chan = dma_request_channel(mask,NULL,NULL); //request a dma channel
	printk(KERN_INFO "dma channel id = %d\n",chan->chan_id);
 
	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
	dev = chan->device;
}
 
void dma_del(void)
{
    //free memory and dma channel
    dma_free_coherent(NULL, MM_SIZE, dst, &dma_dst);
    dma_release_channel(chan);
}
 
static int device_open(struct inode *inode, struct file *file)
{
    return 0;
}
 
static int device_close(struct inode *indoe, struct file *file)
{
	printk("device close\n");
    return 0;
}
 
static ssize_t device_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
    int ret = 0;
 
    dma_read(dma_dst, dma_src, MM_SIZE);
 
    return ret;
}
 
static struct file_operations device_fops =
{
    .owner = THIS_MODULE,
    .open  = device_open,
    .release = device_close,
    .read = device_read,
};
 
static struct miscdevice MMAP_misc =
{
	.minor = MISC_DYNAMIC_MINOR,
	.name = DEVICE_NAME,
	.fops = &device_fops,
};
 
static int __init char_device_init( void )
{
	int ret=0;
	int i = 0;
	printk("init module\n");
	ret = misc_register(&MMAP_misc);
	if(ret)
	{
		printk("Error:misc_register failed!\n");
		return 0;
	}
 
	CaptureReadAddr0 	= (volatile unsigned int*)ioremap(ImageReadAddress0, 1440*10);
 
	printk("init module\n");
	dma_init(ImageReadAddress0, MM_SIZE);
 
	src = (char*)CaptureReadAddr0;
	for (i = 0; i < MM_SIZE; i++){
		*(src + i) = (char)('a' + i%26);
	}
 
    return 0;
}
 
static void __exit char_device_exit( void )
{
    printk(KERN_ALERT"module exit\n");
    misc_deregister(&MMAP_misc);
 
    iounmap(CaptureReadAddr0);
    dma_del();
}
 
MODULE_LICENSE("GPL");
MODULE_AUTHOR("DMA_test");
 
module_init(char_device_init);//模块加载
module_exit(char_device_exit);//模块退出

使用DMA搬运和memcpy搬运PL中的数据速度对比如下

Z-turn# ./test
Test for dma
DMA
T:34, 358179
T2:34, 358290
time use:0, 111
memcpy
T:34, 364372
T2:34, 364796
time use:0, 424
DMA transfer finished!
PASS

 DMA搬运消耗了111us,而memcpy需要使用424us,可见DMA速度远高于CPU对数据的搬运。