1. 程式人生 > >CUDA學習筆記(4)- 簡單的影象處理

CUDA學習筆記(4)- 簡單的影象處理

使用GPU處理影象的速度比使用CPU處理影象的速度要快很多,下面是使用GPU與使用CPU做影象處理速度的對比,對同一個影象做簡單的黑色的混色。 BlendImage

可以看出使用GPU處理大約需要耗時10ms左右,而使用CPU處理大約需要耗時50毫秒左右,CUDA處理影象的速度大約是CPU處理的5倍。 這裡使用的1維的紋理記憶體,建立並繫結紋理記憶體的步驟如下:

texture<unsigned char, cudaTextureType1D, cudaReadModeElementType> rT;
textureReference *texRefPtr = nullptr;
cudaGetTextureReference
((const textureReference**)&texRefPtr, &rT); cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned char>(); cudaBindTexture(0, &rT, pDevSrc, &channelDesc, size);

其中,pDevSrc為裝置記憶體指標,size為記憶體的大小。

完整程式碼如下: 介面顯示相關,CUDAMainWindow.h

#ifndef CUDA_MAINWINDOW_H
#define CUDA_MAINWINDOW_H
#include "UIBase/UIBaseWindow.h" #include <QWidget> #include <QLabel> #include <QSlider> class CUDAMainWindow : public UIBaseWindow { Q_OBJECT public: CUDAMainWindow(QWidget* parent = nullptr); ~CUDAMainWindow(); private: QLabel *m_LeftImage = nullptr; QLabel *m_LeftTag = nullptr;
QLabel *m_RightImage = nullptr; QLabel *m_RightTag = nullptr; QSlider *m_Slider = nullptr; QImage m_MainImage; unsigned char* pCUDAImageData = nullptr; unsigned char* pCPUImageData = nullptr; // 使用CUDA修改透明度 QImage cudaBlendImage(float alpha); // 使用CPU修改透明度 QImage normalBlendImage(float alpha); private slots: void onSliderValueChanged(int); }; #endif

介面顯示相關,CUDAMainWindow.cpp

#include "CUDAMainWindow.h"
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QTime>
#include <QDebug>
#include "CUDACore/CUDAImageDispose.cuh"

CUDAMainWindow::CUDAMainWindow(QWidget* parent)
	:UIBaseWindow(parent)
{
	QVBoxLayout *mainLayout = new QVBoxLayout(this);
	mainLayout->addSpacing(30);

	QHBoxLayout *topLayout = new QHBoxLayout;
	m_LeftImage = new QLabel;

	// Image Display
	QImage image("./test.jpg");
	m_MainImage = image;
	qreal scaleValue = image.width() * 1.0 / 400;
	image = image.scaled(image.width() * 1.0 / scaleValue, image.height() * 1.0 / scaleValue);
	m_LeftImage->setPixmap(QPixmap::fromImage(image));

	m_RightImage = new QLabel;
	m_RightImage->setPixmap(QPixmap::fromImage(image));

	topLayout->addWidget(m_LeftImage);
	topLayout->addWidget(m_RightImage);

	// Speed Display
	m_LeftTag = new QLabel("CUDA Delay: ");
	m_RightTag = new QLabel("CPU Delay: ");
	QHBoxLayout *midLayout = new QHBoxLayout;
	midLayout->addWidget(m_LeftTag);
	midLayout->addWidget(m_RightTag);

	// Slider Display
	m_Slider = new QSlider(Qt::Horizontal);
	m_Slider->setMaximum(100);
	m_Slider->setMinimum(0);
	m_Slider->setValue(100);
	QObject::connect(m_Slider, SIGNAL(valueChanged(int)), this, SLOT(onSliderValueChanged(int)));

	mainLayout->addLayout(topLayout);
	mainLayout->addLayout(midLayout);
	mainLayout->addStretch();
	mainLayout->addWidget(m_Slider);
}

CUDAMainWindow::~CUDAMainWindow()
{

}

// 使用CUDA修改透明度
QImage CUDAMainWindow::cudaBlendImage(float alpha)
{
	if (pCUDAImageData == nullptr)
		pCUDAImageData = new unsigned char[m_MainImage.byteCount()];

	QTime time;
	time.start();
	blendCudaImage(pCUDAImageData, (unsigned char*)m_MainImage.constBits(), m_MainImage.byteCount(), alpha);

	// 計算時間
	QString str = "CUDA Delay: %1 ms";
	str = str.arg(time.elapsed());
	m_LeftTag->setText(str);

	QImage::Format format = m_MainImage.format();
	QImage image(pCUDAImageData, m_MainImage.width(), m_MainImage.height(), format);

	return image;
}

// 使用CPU修改透明度
QImage CUDAMainWindow::normalBlendImage(float alpha)
{
	if (pCPUImageData == nullptr)
		pCPUImageData = new unsigned char[m_MainImage.byteCount()];

	QTime time;
	time.start();
	for (int i = 0; i < m_MainImage.byteCount(); ++i)
		pCPUImageData[i] = m_MainImage.constBits()[i] * alpha;

	// 計算時間
	QString str = "CPU Delay: %1 ms";
	str = str.arg(time.elapsed());
	m_RightTag->setText(str);

	QImage::Format format = m_MainImage.format();
	QImage image(pCPUImageData, m_MainImage.width(), m_MainImage.height(), format);

	return image;
}

void CUDAMainWindow::onSliderValueChanged(int value)
{
	qreal alpha = value * 1.0 / 100;


	QImage image1 = cudaBlendImage(alpha);
	qreal scaleValue = image1.width() * 1.0 / 400;
	image1 = image1.scaled(image1.width() * 1.0 / scaleValue, image1.height() * 1.0 / scaleValue);
	m_LeftImage->setPixmap(QPixmap::fromImage(image1));

	QImage image2 = normalBlendImage(alpha);
	scaleValue = image2.width() * 1.0 / 400;
	image2 = image1.scaled(image2.width() * 1.0 / scaleValue, image2.height() * 1.0 / scaleValue);
	m_RightImage->setPixmap(QPixmap::fromImage(image2));
}

CUDA處理,CUDAImageDispose.cuh

#ifndef CUDA_IMAGE_DISPOSE_H
#define CUDA_IMAGE_DISPOSE_H

#include "cuda_runtime.h"
#include "stdio.h"
#include "device_launch_parameters.h"

extern "C" void blendCudaImage(unsigned char* pDest, unsigned char* pSrc, int size, float alpha);

#endif

#include "CUDACore/CUDAImageDispose.cuh"

#define BLOCK_DIM 512
texture<unsigned char, cudaTextureType1D, cudaReadModeElementType> rT;

__global__ void blendTexture(unsigned char* pDest, int size, float alpha)
{
	int index = blockIdx.x * blockDim.x + threadIdx.x;
	if (index < size)
		pDest[index] = tex1Dfetch(rT, index) * alpha;
}

void blendTextureFunction(unsigned char* pDest, unsigned char* pSrc, int size, float alpha)
{
	unsigned char *pDevSrc = nullptr;
	cudaMalloc(&pDevSrc, size);
	cudaMemcpy(pDevSrc, pSrc, size, cudaMemcpyHostToDevice);

	unsigned char *pDevDest = nullptr;
	cudaMalloc(&pDevDest, size);

	textureReference *texRefPtr = nullptr;
	cudaGetTextureReference((const textureReference**)&texRefPtr, &rT);
	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned char>();
	cudaBindTexture(0, &rT, pDevSrc, &channelDesc, size);
	blendTexture << <ceil((float)size / BLOCK_DIM), BLOCK_DIM >> >(pDevDest, size, alpha);

	cudaThreadSynchronize();
	cudaMemcpy(pDest, pDevDest, size, cudaMemcpyDeviceToHost);

	cudaUnbindTexture(rT);
	cudaFree(pDevSrc);
	cudaFree(pDevDest);
}

void blendCudaImage(unsigned char* pDest, unsigned char* pSrc, int size, float alpha)
{
	blendTextureFunction(pDest, pSrc, size, alpha);
}