• 内核中内存数据迁移速度对比


    前言

            我这样干不知道合理不合理啊,路过的大神给指点指点啊。

            本次实验完整以下任务:

            1 申请两块内核 abuf和bbuf,大小是1280*800

            2 执行memcpy(abuf,bbuf,1280*800),计算消耗的时间

            3 在定时器处理函数中执行memcpy(abuf,bbuf,1280*800),计算消耗的时间

            4 使用dmaengine 复制两块内存的数据。并计算每次消耗的时间。也是计算十次

            5 

    一 涉及到的函数

    使用__get_free_pages分配内存

    #define CSS_DMA_IMAGE_SIZE 1280*800

    css_dev->buf_size = CSS_DMA_IMAGE_SIZE;

    css_dev->buf_size_order = get_order(CSS_DMA_IMAGE_SIZE );

    p = (void*)__get_free_pages(GFP_KERNEL|GFP_DMA,css_dev->buf_size_order);
        if(p == NULL || IS_ERR(p)){
            DEBUG_CSS("devm_kmalloc error");
            return -ENOMEM;
        }

    在驱动初始化函数中使用memcpy复制内存:

    memcpy(css_dev->dst_buf,css_dev->src_buf,css_dev->buf_size);

    计算时间

    struct timeval {
        __kernel_time_t        tv_sec;        /* seconds */
        __kernel_suseconds_t    tv_usec;    /* microseconds */
    };

    struct timeval tv_start,tv_end;

    do_gettimeofday(&tv_start);
    memcpy(css_dev->dst_buf,css_dev->src_buf,css_dev->buf_size);
    do_gettimeofday(&tv_end);

    DEBUG_CSS("used time = %d us",
        tv_end.tv_sec - tv_start.tv_sec)*1000000 +
        tv_end.tv_usec - tv_start.tv_usec
        );

    测试结果,如下图,最小值1683微秒,最大值2315微秒。

    二 计算memcpy消耗的时间 

    在定时器函数中使用memcpy复制内存:

    1. static void __maybe_unused css_timer_function(unsigned long arg)
    2. {
    3.     struct _css_dev_ *css_dev = (struct _css_dev_ *)arg;
    4.     static int count = 0;
    5.     struct timeval tv_start,tv_end;
    6.     if(count < 10){
    7.         do_gettimeofday(&tv_start);
    8.         memcpy(css_dev->dst_buf,css_dev->src_buf,css_dev->buf_size);
    9.         do_gettimeofday(&tv_end);
    10.         DEBUG_CSS("used time = %d us",
    11.         (tv_end.tv_sec - tv_start.tv_sec)*1000000 + (tv_end.tv_usec - tv_start.tv_usec));
    12.         count++;
    13.         mod_timer(&css_dev->timer,jiffies + msecs_to_jiffies(1000));
    14.     }
    15. }
    16. init_timer(&css_dev->timer);
    17.     css_dev->timer.function = css_timer_function;
    18.     css_dev->timer.data = (unsigned long)css_dev;
    19.     mod_timer(&css_dev->timer,jiffies + msecs_to_jiffies(1000));

    执行结果如下所示:由结果可知,在定时器中执行memcpy还是时间比较稳定的。

     三 计算dmaengine消耗的时间

    源码:

            实验内容等同于memcpy(css_dev->dst_buf,css_dev->src_buf,css_dev->buf_size);

            第一次使用dmaengine的时候,经过测试,发现复制后,src和dst数据不一致,经过多次修改后,终于数据正常了

    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include
    7. #include
    8. #include
    9. #include
    10. #include
    11. #include
    12. #include
    13. #include
    14. #include
    15. #include
    16. #include
    17. #include
    18. #define DEBUG_CSS(format,...)\
    19. printk(KERN_INFO"info:%s:%s:%d: "format"\n",\
    20. __FILE__,__func__,__LINE__,\
    21. ##__VA_ARGS__)
    22. #define DEBUG_CSS_ERR(format,...)\
    23. printk("\001" "1""error:%s:%s:%d: "format"\n",\
    24. __FILE__,__func__,__LINE__,\
    25. ##__VA_ARGS__)
    26. #define CSS_DMA_IMAGE_SIZE (1280*800)
    27. #define CSI_DMA_SET_CUR_MAP_BUF_TYPE_IOCTL 0x1001
    28. enum _css_dev_buf_type{
    29. _CSS_DEV_READ_BUF = 0,
    30. _CSS_DEV_WRITE_BUF,
    31. _CSS_DEV_UNKNOWN_BUF_TYPE,
    32. _CSS_DEV_MAX_BUF_TYPE,
    33. };
    34. struct _css_dev_{
    35. struct file_operations _css_fops;
    36. struct miscdevice misc;
    37. int buf_size;
    38. int buf_size_order;
    39. char *src_buf;
    40. char *dst_buf;
    41. char *user_src_buf_vaddr;
    42. char *user_dst_buf_vaddr;
    43. dma_addr_t src_addr;
    44. dma_addr_t dst_addr;
    45. struct spinlock slock;
    46. struct mutex open_lock;
    47. char name[10];
    48. enum _css_dev_buf_type buf_type;
    49. struct device *dev;
    50. struct dma_chan * dma_m2m_chan;
    51. struct completion dma_m2m_ok;
    52. struct imx_dma_data m2m_dma_data;
    53. struct timeval tv_start,tv_end;
    54. };
    55. #define _to_css_dev_(file) (struct _css_dev_ *)container_of(file->f_op,struct _css_dev_,_css_fops)
    56. static int _css_open(struct inode *inode, struct file *file)
    57. {
    58. struct _css_dev_ *css_dev = _to_css_dev_(file);
    59. DEBUG_CSS("css_dev->name = %s",css_dev->name);
    60. return 0;
    61. }
    62. static ssize_t _css_read(struct file *file, char __user *ubuf, size_t size, loff_t *ppos)
    63. {
    64. struct _css_dev_ *css_dev = _to_css_dev_(file);
    65. DEBUG_CSS("css_dev->name = %s",css_dev->name);
    66. return 0;
    67. }
    68. static int _css_mmap (struct file *file, struct vm_area_struct *vma)
    69. {
    70. struct _css_dev_ *css_dev = _to_css_dev_(file);
    71. char *p = NULL;
    72. char **user_addr;
    73. switch(css_dev->buf_type){
    74. case _CSS_DEV_READ_BUF:
    75. p = css_dev->src_buf;
    76. user_addr = (char**)&css_dev->user_src_buf_vaddr;
    77. break;
    78. case _CSS_DEV_WRITE_BUF:
    79. p = css_dev->dst_buf;
    80. user_addr = (char**)&css_dev->user_dst_buf_vaddr;
    81. break;
    82. default:
    83. p = NULL;
    84. return -EINVAL;
    85. break;
    86. }
    87. if (remap_pfn_range(vma, vma->vm_start, virt_to_phys(p) >> PAGE_SHIFT,
    88. vma->vm_end-vma->vm_start, vma->vm_page_prot)) {
    89. DEBUG_CSS_ERR( "remap_pfn_range error\n");
    90. return -EAGAIN;
    91. }
    92. css_dev->buf_type = _CSS_DEV_UNKNOWN_BUF_TYPE;
    93. *user_addr = (void*)vma->vm_start;
    94. DEBUG_CSS("mmap ok user_addr = %p kernel addr = %p",*user_addr,p);
    95. return 0;
    96. }
    97. static ssize_t _css_write(struct file *file, const char __user *ubuf, size_t size, loff_t *ppos)
    98. {
    99. struct _css_dev_ *css_dev = _to_css_dev_(file);
    100. DEBUG_CSS("css_dev->name = %s",css_dev->name);
    101. return size;
    102. }
    103. static int _css_release (struct inode *inode, struct file *file)
    104. {
    105. struct _css_dev_ *css_dev = _to_css_dev_(file);
    106. DEBUG_CSS("css_dev->name = %s",css_dev->name);
    107. DEBUG_CSS("css_dev->src_buf[%d] = %c",css_dev->buf_size - 1,
    108. ((char*)css_dev->src_buf)[css_dev->buf_size - 1]);
    109. DEBUG_CSS("css_dev->dst_buf[%d] = %c",css_dev->buf_size - 1,
    110. ((char*)css_dev->dst_buf)[css_dev->buf_size - 1]);
    111. return 0;
    112. }
    113. static int _css_set_buf_type(struct file *file, enum _css_dev_buf_type buf_type)
    114. {
    115. unsigned long flags;
    116. struct _css_dev_ *css_dev = _to_css_dev_(file);
    117. DEBUG_CSS("buf_type=%d",buf_type);
    118. if(buf_type >= _CSS_DEV_MAX_BUF_TYPE){
    119. DEBUG_CSS_ERR("invalid buf type");
    120. return -EINVAL;
    121. }
    122. spin_lock_irqsave(&css_dev->slock,flags);
    123. css_dev->buf_type = buf_type;
    124. spin_unlock_irqrestore(&css_dev->slock,flags);
    125. return 0;
    126. }
    127. static long _css_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
    128. {
    129. int ret = 0;
    130. switch(cmd){
    131. case CSI_DMA_SET_CUR_MAP_BUF_TYPE_IOCTL:
    132. ret = _css_set_buf_type(file,(enum _css_dev_buf_type)arg);
    133. break;
    134. default:
    135. DEBUG_CSS_ERR("unknown cmd = %x",cmd);
    136. ret = -EINVAL;
    137. break;
    138. }
    139. return ret;
    140. }
    141. static struct _css_dev_ _global_css_dev = {
    142. .name = "lkmao",
    143. ._css_fops = {
    144. .owner = THIS_MODULE,
    145. .mmap = _css_mmap,
    146. .open = _css_open,
    147. .release = _css_release,
    148. .read = _css_read,
    149. .write = _css_write,
    150. .unlocked_ioctl = _css_unlocked_ioctl,
    151. },
    152. .misc = {
    153. .minor = MISC_DYNAMIC_MINOR,
    154. .name = "css_dma",
    155. },
    156. .buf_type = _CSS_DEV_UNKNOWN_BUF_TYPE,
    157. .user_src_buf_vaddr = NULL,
    158. .user_dst_buf_vaddr = NULL,
    159. .m2m_dma_data = {
    160. .peripheral_type = IMX_DMATYPE_MEMORY,
    161. .priority = DMA_PRIO_HIGH,
    162. },
    163. };
    164. static int css_dev_get_dma_addr(struct _css_dev_ *css_dev,char **vaddr,dma_addr_t *phys, int direction)
    165. {
    166. char *p;
    167. dma_addr_t dma_addr;
    168. p = (char*)__get_free_pages(GFP_KERNEL|GFP_DMA,css_dev->buf_size_order);
    169. if(p == NULL || IS_ERR(p)){
    170. DEBUG_CSS("devm_kmalloc error");
    171. return -ENOMEM;
    172. }
    173. dma_addr = dma_map_single(css_dev->dev, p, css_dev->buf_size, direction);
    174. *vaddr = p;
    175. *phys = dma_addr;
    176. DEBUG_CSS("32bit:p = %p,dma_addr = %x",p,dma_addr);
    177. return 0;
    178. }
    179. static bool css_dma_filter_fn(struct dma_chan *chan, void *filter_param)
    180. {
    181. if(!imx_dma_is_general_purpose(chan)){
    182. DEBUG_CSS("css_dma_filter_fn error");
    183. return false;
    184. }
    185. chan->private = filter_param;
    186. return true;
    187. }
    188. static void css_dma_async_tx_callback(void *dma_async_param)
    189. {
    190. struct _css_dev_ *css_dev = (struct _css_dev_ *)dma_async_param;
    191. complete(&css_dev->dma_m2m_ok);
    192. }
    193. static int css_dmaengine_init(struct _css_dev_ *css_dev)
    194. {
    195. dma_cap_mask_t dma_m2m_mask;
    196. struct dma_slave_config dma_m2m_config = {0};
    197. css_dev->m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY;
    198. css_dev->m2m_dma_data.priority = DMA_PRIO_HIGH;
    199. dma_cap_zero(dma_m2m_mask);
    200. dma_cap_set(DMA_MEMCPY,dma_m2m_mask);
    201. css_dev->dma_m2m_chan = dma_request_channel(dma_m2m_mask,css_dma_filter_fn,&css_dev->m2m_dma_data);
    202. if(!css_dev->dma_m2m_chan){
    203. DEBUG_CSS("dma_request_channel error");
    204. return -EINVAL;
    205. }
    206. dma_m2m_config.direction = DMA_MEM_TO_MEM;
    207. dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    208. dmaengine_slave_config(css_dev->dma_m2m_chan,&dma_m2m_config);
    209. return 0;
    210. }
    211. static int css_dmaengine_test(unsigned long arg)
    212. {
    213. struct _css_dev_ *css_dev = (struct _css_dev_ *)arg;
    214. struct dma_async_tx_descriptor *dma_m2m_desc;
    215. struct dma_device *dma_dev;
    216. struct device *chan_dev;
    217. dma_cookie_t cookie;
    218. enum dma_status dma_status;
    219. do_gettimeofday(&css_dev->tv_start);
    220. dma_dev = css_dev->dma_m2m_chan->device;
    221. chan_dev = css_dev->dma_m2m_chan->device->dev;
    222. css_dev->src_addr = dma_map_single(chan_dev, css_dev->src_buf, css_dev->buf_size, DMA_TO_DEVICE);
    223. css_dev->dst_addr = dma_map_single(chan_dev, css_dev->dst_buf, css_dev->buf_size, DMA_FROM_DEVICE);
    224. //DEBUG_CSS("32bit:css_dev->src_addr = %x,css_dev->dst_addr = %x",css_dev->src_addr,css_dev->dst_addr);
    225. dma_m2m_desc = dma_dev->device_prep_dma_memcpy(css_dev->dma_m2m_chan,
    226. css_dev->dst_addr,
    227. css_dev->src_addr,
    228. css_dev->buf_size,0);
    229. dma_m2m_desc->callback = css_dma_async_tx_callback;
    230. dma_m2m_desc->callback_param = css_dev;
    231. init_completion(&css_dev->dma_m2m_ok);
    232. cookie = dmaengine_submit(dma_m2m_desc);
    233. if(dma_submit_error(cookie)){
    234. DEBUG_CSS("dmaengine_submit error");
    235. return -EINVAL;
    236. }
    237. dma_async_issue_pending(css_dev->dma_m2m_chan);
    238. wait_for_completion(&css_dev->dma_m2m_ok);
    239. dma_status = dma_async_is_tx_complete(css_dev->dma_m2m_chan,cookie,NULL,NULL);
    240. if(DMA_COMPLETE != dma_status){
    241. DEBUG_CSS("dma_status = %d",dma_status);
    242. }
    243. dma_unmap_single(chan_dev,css_dev->src_addr,css_dev->buf_size,DMA_TO_DEVICE);
    244. dma_unmap_single(chan_dev,css_dev->dst_addr,css_dev->buf_size,DMA_FROM_DEVICE);
    245. do_gettimeofday(&css_dev->tv_end);
    246. DEBUG_CSS("used time = %ld us",
    247. (css_dev->tv_end.tv_sec - css_dev->tv_start.tv_sec)*1000000
    248. + (css_dev->tv_end.tv_usec - css_dev->tv_start.tv_usec));
    249. return 0;
    250. }
    251. static int css_dev_init(struct platform_device *pdev,struct _css_dev_ *css_dev)
    252. {
    253. int i = 0,j = 0;
    254. css_dev->misc.fops = &css_dev->_css_fops;
    255. pr_debug("css_init init ok");
    256. mutex_init(&css_dev->open_lock);
    257. spin_lock_init(&css_dev->slock);
    258. printk("KERN_ALERT = %s",KERN_ALERT);
    259. css_dev->dev = &pdev->dev;
    260. css_dev->buf_size = CSS_DMA_IMAGE_SIZE;
    261. css_dev->buf_size_order = get_order(css_dev->buf_size);
    262. if(css_dev_get_dma_addr(css_dev,&css_dev->src_buf,&css_dev->src_addr,DMA_TO_DEVICE)){
    263. return -ENOMEM;
    264. }
    265. css_dev->src_buf = (char*)__get_free_pages(GFP_KERNEL|GFP_DMA,css_dev->buf_size_order);
    266. if(css_dev->src_buf == NULL || IS_ERR(css_dev->src_buf)){
    267. DEBUG_CSS("devm_kmalloc error");
    268. return -ENOMEM;
    269. }
    270. css_dev->dst_buf = (char*)__get_free_pages(GFP_KERNEL|GFP_DMA,css_dev->buf_size_order);
    271. if(css_dev->dst_buf == NULL || IS_ERR(css_dev->dst_buf)){
    272. DEBUG_CSS("devm_kmalloc error");
    273. return -ENOMEM;
    274. }
    275. DEBUG_CSS("32bit:css_dev->src_buf = %p,css_dev->dst_buf = %p",css_dev->src_buf,css_dev->dst_buf);
    276. if(misc_register(&css_dev->misc) != 0){
    277. DEBUG_CSS("misc_register error");
    278. return -EINVAL;
    279. }
    280. platform_set_drvdata(pdev,css_dev);
    281. css_dmaengine_init(css_dev);
    282. for(i = 0;i < 10;i++){
    283. memset(css_dev->src_buf,5,css_dev->buf_size);
    284. memset(css_dev->dst_buf,6,css_dev->buf_size);
    285. #if 1
    286. css_dmaengine_test((unsigned long)css_dev);
    287. #else
    288. memcpy(css_dev->dst_buf,css_dev->src_buf,css_dev->buf_size);
    289. #endif
    290. for(j = 0;j < css_dev->buf_size;j++){
    291. if(css_dev->dst_buf[j] != 5){
    292. DEBUG_CSS("css_dev->dst_buf[%d] = %d",j,css_dev->dst_buf[j]);
    293. DEBUG_CSS("css_dev->src_buf[%d] = %d",j,css_dev->src_buf[j]);
    294. break;
    295. }
    296. }
    297. if(j != css_dev->buf_size){
    298. DEBUG_CSS("i = %d,css_dmaengine_test error",i);
    299. return -EINVAL;
    300. }else{
    301. DEBUG_CSS("copy ok %d times",i+1);
    302. }
    303. }
    304. return 0;
    305. }
    306. static int css_probe(struct platform_device *pdev)
    307. {
    308. struct _css_dev_ *css_dev = (struct _css_dev_ *)&_global_css_dev;
    309. if(css_dev_init(pdev,css_dev)){
    310. return -EINVAL;
    311. }
    312. DEBUG_CSS("init ok");
    313. return 0;
    314. }
    315. static int css_remove(struct platform_device *pdev)
    316. {
    317. struct _css_dev_ *css_dev = &_global_css_dev;
    318. dma_unmap_single(css_dev->dev,css_dev->src_addr,css_dev->buf_size,DMA_FROM_DEVICE);
    319. dma_unmap_single(css_dev->dev,css_dev->dst_addr,css_dev->buf_size,DMA_FROM_DEVICE);
    320. free_page((unsigned long )css_dev->dst_buf);
    321. free_page((unsigned long )css_dev->src_buf);
    322. misc_deregister(&css_dev->misc);
    323. dma_release_channel(css_dev->dma_m2m_chan);
    324. DEBUG_CSS("exit ok");
    325. return 0;
    326. }
    327. static const struct of_device_id css_of_ids[] = {
    328. {.compatible = "css_dma"},
    329. {},
    330. };
    331. MODULE_DEVICE_TABLE(of,css_of_ids);
    332. static struct platform_driver css_platform_driver = {
    333. .probe = css_probe,
    334. .remove = css_remove,
    335. .driver = {
    336. .name = "css_dma",
    337. .of_match_table = css_of_ids,
    338. .owner = THIS_MODULE,
    339. },
    340. };
    341. static int __init css_init(void)
    342. {
    343. int ret_val;
    344. ret_val = platform_driver_register(&css_platform_driver);
    345. if(ret_val != 0){
    346. DEBUG_CSS("platform_driver_register error");
    347. return ret_val;
    348. }
    349. DEBUG_CSS("platform_driver_register ok");
    350. return 0;
    351. }
    352. static void __exit css_exit(void)
    353. {
    354. platform_driver_unregister(&css_platform_driver);
    355. }
    356. module_init(css_init);
    357. module_exit(css_exit);
    358. MODULE_LICENSE("GPL");

    设备树

    1. sdma_m2m{
    2. compatible = "css_dma";
    3. };

    测试结果,这个有点恐怖了,需要20多毫秒,是memcpy的十几倍啊,

    小结

            

  • 相关阅读:
    Mysql互不关联的联表查询(减少了查询的次数)
    病毒攻防原理
    [C国演义] 第十六章
    IO拷贝文件大全
    20220719_Filter_Listener_AJAX_Axios的用法
    golang学习笔记系列之流程控制
    vue2与vue3的使用区别与组件通信
    c++ 类的多态以及虚函数
    IOS开发学习日记(十七)
    12 张图看懂 CPU 缓存一致性与 MESI 协议,真的一致吗?
  • 原文地址:https://blog.csdn.net/yueni_zhao/article/details/127730383