Pages: 1 2
CUDA code
// This code originates from supercomputingblog.com // This code is provided as is without any warranty of any kind // Feel free to use this code however you'd like, as long as // due credit is given, and this header remains intact. void __global__ PaintCu(int width, int height, int stride, unsigned int *pRawBitmapOrig, unsigned int *pBitmapCopy, int radius, int nBins) { // This CUDA kernel effectively transforms an image into an image that looks like it's been painted. // Each thread will calculate exactly 1 final pixel // The processing of each pixel requires many computations, and the use of 4 kilobytes of memory. // This code will work well on fermi archetecture and beyond due to the cache structure of the GPU int i = blockIdx.y * blockDim.y + threadIdx.y; int j = blockIdx.x * blockDim.x + threadIdx.x; // Test to see if we're testing a valid pixel if (i >= height || j >= width) return; // Don't bother doing the calculation. We're not in a valid pixel location #define C_MAX_INTENSITIES 256 // 8 bits per color, 256 intensities is fine. int intensityCount[C_MAX_INTENSITIES]; int avgR[C_MAX_INTENSITIES]; int avgG[C_MAX_INTENSITIES]; int avgB[C_MAX_INTENSITIES]; for (int k=0; k <= nBins; k++) { intensityCount[k] = 0; avgR[k] = 0; avgG[k] = 0; avgB[k] = 0; } // we have a radius r int maxIntensityCount = 0; int maxIntensityCountIndex = 0; for (int k=i-radius; k <= i+radius;k++) { if (k < 0 || k >= height) continue; for (int l=j-radius; l <= j+radius; l++) { if (l < 0 || l >= width) continue; int curPixel = pBitmapCopy[k*stride/4 + l]; int r = ((curPixel & 0x00ff0000) >> 16); int g = ((curPixel & 0x0000ff00) >> 8); int b = ((curPixel & 0x000000ff) >> 0); int curIntensity = (int)((float)((r+g+b)/3*nBins)/255.0f); intensityCount[curIntensity]++; if (intensityCount[curIntensity] > maxIntensityCount) { maxIntensityCount = intensityCount[curIntensity]; maxIntensityCountIndex = curIntensity; } avgR[curIntensity] += r; avgG[curIntensity] += g; avgB[curIntensity] += b; } } int finalR = avgR[maxIntensityCountIndex] / maxIntensityCount; int finalG = avgG[maxIntensityCountIndex] / maxIntensityCount; int finalB = avgB[maxIntensityCountIndex] / maxIntensityCount; pRawBitmapOrig[i*stride/4 + j] = 0xff000000 | ((finalR << 16) + (finalG << 8) + finalB); }
CPU code
#pragma omp parallel for (int i=0; i < height; i++) { #define C_MAX_INTENSITIES 256 // 8 bits per color, 256 intensities is fine. int intensityCount[C_MAX_INTENSITIES]; int avgR[C_MAX_INTENSITIES]; int avgG[C_MAX_INTENSITIES]; int avgB[C_MAX_INTENSITIES]; for (int j=0; j < width; j++) { // reset to zero for (int k=0; k <= nBins; k++) { intensityCount[k] = 0; avgR[k] = 0; avgG[k] = 0; avgB[k] = 0; } // we have a radius r int maxIntensityCount = 0; int maxIntensityCountIndex = 0; for (int k=i-radius; k <= i+radius;k++) { if (k < 0 || k >= height) continue; for (int l=j-radius; l <= j+radius; l++) { if (l < 0 || l >= width) continue; int curPixel = pBitmapCopy[k*bitmapData.Stride/4 + l]; int r = ((curPixel & 0x00ff0000) >> 16); int g = ((curPixel & 0x0000ff00) >> 8); int b = ((curPixel & 0x000000ff) >> 0); int curIntensity = (int)((float)((r+g+b)/3*nBins)/255.0f); intensityCount[curIntensity]++; if (intensityCount[curIntensity] > maxIntensityCount) { maxIntensityCount = intensityCount[curIntensity]; maxIntensityCountIndex = curIntensity; } avgR[curIntensity] += r; avgG[curIntensity] += g; avgB[curIntensity] += b; } } int finalR = avgR[maxIntensityCountIndex] / maxIntensityCount; int finalG = avgG[maxIntensityCountIndex] / maxIntensityCount; int finalB = avgB[maxIntensityCountIndex] / maxIntensityCount; pRawBitmapOrig[i*bitmapData.Stride/4 + j] = 0xff000000 | ((finalR << 16) + (finalG << 8) + finalB); } }
Pages: 1 2