node-webcl
Version:
A WebCL implementation for desktops with NodeJS
238 lines (203 loc) • 11.1 kB
JavaScript
// Adapted from: OpenCL Programming by Example, Ravishekhar Banger, Packt Publishing 2013
/*
* This test will work with power-of-two images.
*/
var nodejs = (typeof window === 'undefined');
if(nodejs) {
webcl = require('../webcl');
clu = require('../lib/clUtils');
util = require("util"),
fs = require("fs");
Image = require("node-image").Image;
log = console.log;
}
else
webcl = window.webcl;
//First check if the webcl extension is installed at all
if (webcl == undefined) {
alert("Unfortunately your system does not support webcl. " +
"Make sure that you have the webcl extension installed.");
process.exit(-1);
}
var histogram_kernel = [
"#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable ",
"__kernel ",
"void histogram_kernel(__global const uint* data, ",
" __local uchar* sharedArray, ",
" __global uint* binResultR, ",
" __global uint* binResultG, ",
" __global uint* binResultB) ",
"{ ",
" size_t localId = get_local_id(0); ",
" size_t globalId = get_global_id(0); ",
" size_t groupId = get_group_id(0); ",
" size_t groupSize = get_local_size(0); ",
" __local uchar* sharedArrayR = sharedArray; ",
" __local uchar* sharedArrayG = sharedArray + groupSize * BIN_SIZE; ",
" __local uchar* sharedArrayB = sharedArray + 2 * groupSize * BIN_SIZE; ",
" ",
" /* initialize shared array to zero */ ",
" for(int i = 0; i < BIN_SIZE; ++i) ",
" { ",
" sharedArrayR[localId * BIN_SIZE + i] = 0; ",
" sharedArrayG[localId * BIN_SIZE + i] = 0; ",
" sharedArrayB[localId * BIN_SIZE + i] = 0; ",
" } ",
" ",
" barrier(CLK_LOCAL_MEM_FENCE); ",
" ",
" /* calculate thread-histograms */ ",
" for(int i = 0; i < BIN_SIZE; ++i) ",
" { ",
" uint value = data[globalId * BIN_SIZE + i]; ",
" uint valueR = value & 0xFF; ",
" uint valueG = (value & 0xFF00) >> 8; ",
" uint valueB = (value & 0xFF0000) >> 16; ",
" sharedArrayR[localId * BIN_SIZE + valueR]++; ",
" sharedArrayG[localId * BIN_SIZE + valueG]++; ",
" sharedArrayB[localId * BIN_SIZE + valueB]++; ",
" } ",
" ",
" barrier(CLK_LOCAL_MEM_FENCE); ",
" ",
" /* merge all thread-histograms into block-histogram */ ",
" for(int i = 0; i < BIN_SIZE / groupSize; ++i) ",
" { ",
" uint binCountR = 0; ",
" uint binCountG = 0; ",
" uint binCountB = 0; ",
" for(int j = 0; j < groupSize; ++j) ",
" { ",
" binCountR += sharedArrayR[j * BIN_SIZE + i * groupSize + localId]; ",
" binCountG += sharedArrayG[j * BIN_SIZE + i * groupSize + localId]; ",
" binCountB += sharedArrayB[j * BIN_SIZE + i * groupSize + localId]; ",
" } ",
" ",
" binResultR[groupId * BIN_SIZE + i * groupSize + localId] = binCountR; ",
" binResultG[groupId * BIN_SIZE + i * groupSize + localId] = binCountG; ",
" binResultB[groupId * BIN_SIZE + i * groupSize + localId] = binCountB; ",
" } ",
"}"].join("\n");
var binSize = 256; // number of histogram bins
var groupSize = 16; // local workgroup size
var file = __dirname+'/lenaRGB.jpg';
log('Loading image '+file);
var img=Image.load(file);
var image=img.convertTo32Bits();
// log('Image '+file+': \n'+util.inspect(image));
image.size = image.height*image.pitch;
log('Total number of pixels: '+ (image.width*image.height));
var binsRGB=histogram(image);
// Validate the histogram operation.
// The idea behind this is that once a histogram is computed the sum of all the bins should be equal
// to the number of pixels.
log('Validation: total number of pixels should be: '+ (image.width*image.height));
var totalPixelsR = 0, totalPixelsG = 0, totalPixelsB = 0;
for(var j = 0; j < binSize; ++j)
{
totalPixelsR += binsRGB[0][j];
totalPixelsG += binsRGB[1][j];
totalPixelsB += binsRGB[2][j];
}
log ("Total Number of Red Pixels = ",totalPixelsR);
log ("Total Number of Green Pixels = ",totalPixelsG);
log ("Total Number of Blue Pixels = ",totalPixelsB);
// cleanup
image.unload();
function histogram(image) {
var status = 0;
var /*cl_mem*/ imageBuffer;
var /*cl_mem*/ intermediateHistR, intermediateHistG, intermediateHistB; // Intermediate Image Histogram buffers
var subHistgCnt = clu.DivUp(image.width * image.height,binSize*groupSize);
var szIntermediateHist = binSize * subHistgCnt;
var midDeviceBinR = new Uint32Array(szIntermediateHist);
var midDeviceBinG = new Uint32Array(szIntermediateHist);
var midDeviceBinB = new Uint32Array(szIntermediateHist);
var deviceBinR = new Uint32Array(binSize);
var deviceBinG = new Uint32Array(binSize);
var deviceBinB = new Uint32Array(binSize);
// create GPU context for this platform
var context=webcl.createContext(webcl.DEVICE_TYPE_GPU);
// find the device for this context
var devices = context.getInfo(webcl.CONTEXT_DEVICES);
var device=devices[0];
// Report the device vendor and device name
var vendor_name = device.getInfo(webcl.DEVICE_VENDOR);
var device_name = device.getInfo(webcl.DEVICE_NAME);
log("Connecting to: "+vendor_name+" "+device_name);
// create device buffers
try {
imageBuffer = context.createBuffer(webcl.MEM_READ_ONLY | webcl.MEM_ALLOC_HOST_PTR, image.size);
}
catch(err) {
log('error creating input image buffer. '+err);
process.exit(-1);
}
//Create command queue
queue=context.createCommandQueue(device, 0);
// Write our data set into the input array in device memory asynchronously
queue.enqueueWriteBuffer(imageBuffer, false, 0, image.size, image.buffer);
// wait for image data transfered
queue.finish();
var szBytesIntermediateHist = Int32Array.BYTES_PER_ELEMENT * szIntermediateHist;
try {
intermediateHistR = context.createBuffer(webcl.MEM_WRITE_ONLY, szBytesIntermediateHist);
intermediateHistG = context.createBuffer(webcl.MEM_WRITE_ONLY, szBytesIntermediateHist);
intermediateHistB = context.createBuffer(webcl.MEM_WRITE_ONLY, szBytesIntermediateHist);
}
catch(err) {
log('error creating output buffers of size '+szBytesIntermediateHist+' bytes. '+err);
process.exit(-1);
}
var kernel;
try {
// Create and program from source
var program=context.createProgram(histogram_kernel);
// Build program
program.build(device,"-cl-kernel-arg-info -DBIN_SIZE="+binSize);
// Create kernel object
kernel= program.createKernel("histogram_kernel");
}
catch(err) {
log('Error: '+err.name)
log("Error building program. "+program.getBuildInfo(device,webcl.PROGRAM_BUILD_LOG));
process.exit(-1);
}
// Set the arguments to our compute kernel
kernel.setArg(0, imageBuffer);
kernel.setArg(1, new Uint32Array([3 * groupSize * binSize * 1])); // for __local array
kernel.setArg(2, intermediateHistR);
kernel.setArg(3, intermediateHistG);
kernel.setArg(4, intermediateHistB);
//Create command queue
queue=context.createCommandQueue(device, 0);
// Init ND-range
// Get the maximum work group size for executing the kernel on the device
var globalWS = [ clu.DivUp(image.width * image.height, binSize*groupSize) * groupSize ];
var localWS=[ groupSize ];
log("Global work item size: " + globalWS);
log("Local work item size: " + localWS);
// Execute (enqueue) kernel
queue.enqueueNDRangeKernel(kernel, 1,
null,
globalWS,
localWS);
queue.finish(); //Finish all the operations
// read histograms
var readEvt=[new WebCLEvent(), new WebCLEvent(), new WebCLEvent()];
status = queue.enqueueReadBuffer(intermediateHistR, false, 0, szBytesIntermediateHist, midDeviceBinR, null, readEvt[0]);
status |= queue.enqueueReadBuffer(intermediateHistG, false, 0, szBytesIntermediateHist, midDeviceBinG, null, readEvt[1]);
status |= queue.enqueueReadBuffer(intermediateHistB, false, 0, szBytesIntermediateHist, midDeviceBinB, null, readEvt[2]);
status = webcl.waitForEvents(readEvt);
// Calculate final histogram bin
for(var i = 0; i < subHistgCnt; ++i)
{
for(var j = 0; j < binSize; ++j)
{
deviceBinR[j] += midDeviceBinR[i * binSize + j];
deviceBinG[j] += midDeviceBinG[i * binSize + j];
deviceBinB[j] += midDeviceBinB[i * binSize + j];
}
}
return [deviceBinR, deviceBinG, deviceBinB];
}