1. 程式人生 > >CUDA實例練習(九):頁鎖定主機內存

CUDA實例練習(九):頁鎖定主機內存

png blog 分配 pydev art event http tdi dev

malloc()分配的內存與cudaHostAlloc()分配的內存之間存在著一個重要差異。C庫函數malloc()將分配標準的,可分頁的(Pagable)主機內存,而cudaHostAlloc()將分配頁鎖定的主機內存。頁鎖定內存也稱為固定內存(Pinned Memory)或者不可分頁內存,它有一個重要的屬性:操作系統將不會對這塊內存分頁並交換到磁盤上,從而確保了該內存始終駐留在物理內存中。因此,操作系統能夠安全地使某個應用程序訪問該內存的物理地址,因為這塊內存將不會被破壞或者重新定位。

 1 #include <stdio.h>
 2 #include <cuda_runtime.h>
 3
#include <device_launch_parameters.h> 4 #include "book.h" 5 6 #define SIZE (10*1024*1024) 7 8 float cuda_malloc_test(int size, bool up){ 9 cudaEvent_t start, stop; 10 int *a, *dev_a; 11 float elapsedTime; 12 13 HANDLE_ERROR(cudaEventCreate(&start)); 14 HANDLE_ERROR(cudaEventCreate(&stop));
15 16 a = (int *)malloc(size * sizeof(*a)); 17 HANDLE_NULL(a); 18 HANDLE_ERROR(cudaMalloc((void**)&dev_a, size * sizeof(*dev_a))); 19 HANDLE_ERROR(cudaEventRecord(start, 0)); 20 for (int i = 0; i < 100; i++){ 21 if (up) 22 HANDLE_ERROR(cudaMemcpy(dev_a, a, size * sizeof
(*dev_a), cudaMemcpyHostToDevice)); 23 else 24 HANDLE_ERROR(cudaMemcpy(a, dev_a, size * sizeof(*dev_a), cudaMemcpyDeviceToHost)); 25 } 26 HANDLE_ERROR(cudaEventRecord(stop, 0)); 27 HANDLE_ERROR(cudaEventSynchronize(stop)); 28 HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop)); 29 free(a); 30 HANDLE_ERROR(cudaFree(dev_a)); 31 HANDLE_ERROR(cudaEventDestroy(start)); 32 HANDLE_ERROR(cudaEventDestroy(stop)); 33 34 return elapsedTime; 35 } 36 37 float cuda_host_alloc_test(int size, bool up){ 38 cudaEvent_t start, stop; 39 int *a, *dev_a; 40 float elapsedTime; 41 42 HANDLE_ERROR(cudaEventCreate(&start)); 43 HANDLE_ERROR(cudaEventCreate(&stop)); 44 45 HANDLE_ERROR(cudaHostAlloc((void **)&a, size * sizeof(*a), cudaHostAllocDefault)); 46 HANDLE_ERROR(cudaMalloc((void**)&dev_a, size * sizeof(*dev_a))); 47 48 HANDLE_ERROR(cudaEventRecord(start, 0)); 49 for (int i = 0; i < 100; i++){ 50 if (up) 51 HANDLE_ERROR(cudaMemcpy(dev_a, a, size * sizeof(*a), cudaMemcpyHostToDevice)); 52 else 53 HANDLE_ERROR(cudaMemcpy(a, dev_a, size * sizeof(*a), cudaMemcpyDeviceToHost)); 54 } 55 HANDLE_ERROR(cudaEventRecord(stop, 0)); 56 HANDLE_ERROR(cudaEventSynchronize(stop)); 57 HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop)); 58 59 HANDLE_ERROR(cudaFreeHost(a)); 60 HANDLE_ERROR(cudaFree(dev_a)); 61 HANDLE_ERROR(cudaEventDestroy(start)); 62 HANDLE_ERROR(cudaEventDestroy(stop)); 63 64 return elapsedTime; 65 } 66 67 int main(void){ 68 float elapsedTime; 69 float MB = (float)100 * SIZE*sizeof(int) / 1024 / 1024; 70 elapsedTime = cuda_malloc_test(SIZE, true); 71 printf("Time using cudaMalloc: %3.1f ms\n", elapsedTime); 72 printf("\tMB/s during copy up: %3.1f\n", MB / (elapsedTime / 1000)); 73 74 elapsedTime = cuda_malloc_test(SIZE, false); 75 printf("Time using cudaMalloc: %3.1f ms\n", elapsedTime); 76 printf("\tMB/s during copy down: %3.1f\n", MB / (elapsedTime / 1000)); 77 78 elapsedTime = cuda_host_alloc_test(SIZE, true); 79 printf("Time using cudaHostAlloc: %3.1f ms\n", elapsedTime); 80 printf("\tMB/s during copy up: %3.1f\n", MB / (elapsedTime / 1000)); 81 82 elapsedTime = cuda_host_alloc_test(SIZE, false); 83 printf("Time using cudaHostAlloc: %3.1f ms\n", elapsedTime); 84 printf("\tMB/s during copy down: %3.1f\n", MB / (elapsedTime / 1000)); 85 }

技術分享

CUDA實例練習(九):頁鎖定主機內存