Julia集中的元素都是經(jīng)過(guò)簡(jiǎn)單的迭代計(jì)算得到的,很適合用CUDA進(jìn)行加速。對(duì)一個(gè)600*600的圖像,需要進(jìn)行360000次迭代計(jì)算,所以在CUDA中創(chuàng)建了600*600個(gè)線程塊(block),每個(gè)線程塊包含1個(gè)線程,并行執(zhí)行360000次運(yùn)行,圖像的創(chuàng)建和顯示通過(guò)OpenCV實(shí)現(xiàn):
#include "cuda_runtime.h"#include <highgui.hpp>using namespace cv;#define DIM 600 //圖像長(zhǎng)寬struct cuComplex{ float r; float i; __device__ cuComplex(float a, float b) : r(a), i(b) {} __device__ float magnitude2(void) { return r * r + i * i; } __device__ cuComplex Operator*(const cuComplex& a) { return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i); } __device__ cuComplex operator+(const cuComplex& a) { return cuComplex(r + a.r, i + a.i); }};__device__ int julia(int x, int y){ const float scale = 1.5; float jx = scale * (float)(DIM / 2 - x) / (DIM / 2); float jy = scale * (float)(DIM / 2 - y) / (DIM / 2); cuComplex c(0.25, 0.010); cuComplex a(jx, jy); int i = 0; for (i = 0; i < 200; i++) { a = a * a + c; if (a.magnitude2() > 1000) return 0; } return 1;}__global__ void kernel(unsigned char *ptr){ // map from blockIdx to pixel position int x = blockIdx.x; int y = blockIdx.y; int offset = x + y * gridDim.x; // now calculate the value at that position int juliaValue = julia(x, y); ptr[offset * 3 + 0] = 0; ptr[offset * 3 + 1] = 0; ptr[offset * 3 + 2] = 255 * juliaValue;}// globals needed by the update routinestruct DataBlock{ unsigned char *dev_bitmap;};int main(void){ DataBlock data; cudaError_t error; Mat image = Mat(DIM, DIM, CV_8UC3, Scalar::all(0)); data.dev_bitmap = image.data; unsigned char *dev_bitmap; error = cudaMalloc((void**)&dev_bitmap, 3 * image.cols*image.rows); data.dev_bitmap = dev_bitmap; dim3 grid(DIM, DIM); kernel << <grid, 1 >> > (dev_bitmap); error = cudaMemcpy(image.data, dev_bitmap, 3 * image.cols*image.rows, cudaMemcpyDeviceToHost); error = cudaFree(dev_bitmap); imshow("CUDA For Julia | c(0.25, 0.010)", image); waitKey();}c(-0.8,0.156):
c(-0.85,0.06):
c(-0.305,0.60):
c(0.25,0.010):
新聞熱點(diǎn)
疑難解答
圖片精選
網(wǎng)友關(guān)注