1. 程式人生 > >CUDA實現影象二次線性插值縮放

CUDA實現影象二次線性插值縮放

這裡寫圖片描述
(Sx-0)/(SW-0)=(Dx-0)/(DW-0) (Sy-0)/(SH-0)=(Dy-0)/(DH-0)
=> Sx=Dx*SW/DW Sy=Dy*SH/DH
聚焦看看(Sx,Sy)座標點(Sx,Sy為浮點數)附近的情況;
對於近鄰取樣插值的縮放演算法,直接取Color0顏色作為縮放後點的顏色;
二次線性插值需要考慮(Sx,Sy)座標點周圍的4個顏色值Color0/Color1/Color2/Color3,
把(Sx,Sy)到A/B/C/D座標點的距離作為係數來把4個顏色混合出縮放後點的顏色;
( u=Sx-floor(Sx); v=Sy-floor(Sy); 說明:floor函式的返回值為小於等於引數的最大整數 )
二次線性插值公式為:
tmpColor0=Color0*(1-u) + Color2*u;
tmpColor1=Color1*(1-u) + Color3*u;
DstColor =tmpColor0*(1-v) + tmpColor2*v;

這裡寫圖片描述
展開公式為:
pm0=(1-u)*(1-v);
pm1=v*(1-u);
pm2=u*(1-v);
pm3=u*v;
則顏色混合公式為:
DstColor = Color0*pm0 + Color1*pm1 + Color2*pm2 + Color3*pm3;

CUDA實現:

__global__ void cudaTransform(Uint8 *output, Uint8 *input, Uint32 pitchOutput, Uint32 pitchInput, Uint8 bytesPerPixelOutput, Uint8 bytesPerPixelInput, float
xRatio, float yRatio) { int x = (int)(xRatio * blockIdx.x); int y = (int)(yRatio * blockIdx.y); Uint8 *a; Uint8 *b; Uint8 *c; Uint8 *d; float xDist, yDist, blue, red, green; // X and Y distance difference xDist = (xRatio * blockIdx.x) - x; yDist = (yRatio * blockIdx.y) - y; // Points
a = input + y * pitchInput + x * bytesPerPixelInput; b = input + y * pitchInput + (x + 1) * bytesPerPixelInput; c = input + (y + 1) * pitchInput + x * bytesPerPixelInput; d = input + (y + 1) * pitchInput + (x + 1) * bytesPerPixelInput; // blue blue = (a[2])*(1 - xDist)*(1 - yDist) + (b[2])*(xDist)*(1 - yDist) + (c[2])*(yDist)*(1 - xDist) + (d[2])*(xDist * yDist); // green green = ((a[1]))*(1 - xDist)*(1 - yDist) + (b[1])*(xDist)*(1 - yDist) + (c[1])*(yDist)*(1 - xDist) + (d[1])*(xDist * yDist); // red red = (a[0])*(1 - xDist)*(1 - yDist) + (b[0])*(xDist)*(1 - yDist) + (c[0])*(yDist)*(1 - xDist) + (d[0])*(xDist * yDist); Uint8 *p = output + blockIdx.y * pitchOutput + blockIdx.x * bytesPerPixelOutput; *(Uint32*)p = 0xff000000 | ((((int)blue) << 16)) | ((((int)green) << 8)) | ((int)red); } void RGB24_resize32(uint8_t* src, uint8_t*dst, int w, int h, int dstw, int dsth) { uint32_t src_row_btyes; uint32_t dst_row_bytes; int src_nb_component; int dst_nb_component; uint32_t src_size; uint32_t dst_size; uint8_t* device_src; uint8_t* device_dst; if (dstw <= 0 || dsth <= 0) return; float x_ratio = ((float)(w - 1)) / dstw; float y_ratio = ((float)(h - 1)) / dsth; dim3 grid(dstw, dsth); src_row_btyes = (w * 3 + 3) & ~3; dst_row_bytes = (dstw * 4 + 3) & ~3; src_nb_component = 3; dst_nb_component = 4; src_size = src_row_btyes * h; dst_size = dst_row_bytes * dsth; // Copy original image cudasafe(cudaMalloc((void **)&device_src, src_size), "Original image allocation ", __FILE__, __LINE__); cudasafe(cudaMemcpy(device_src, src, src_size, cudaMemcpyHostToDevice), "Copy original image to device ", __FILE__, __LINE__); cudasafe(cudaMalloc((void **)&device_dst, dst_size), "New image allocation ", __FILE__, __LINE__); cudaTransform << <grid, 1 >> >(device_dst, device_src, dst_row_bytes, src_row_btyes, dst_nb_component, src_nb_component, x_ratio, y_ratio); // Copy scaled image to host cudasafe(cudaMemcpy(dst, device_dst, dst_size, cudaMemcpyDeviceToHost), "from device to host", __FILE__, __LINE__); cudaFree(device_src); cudaFree(device_dst); }