webcl-nodep
Version:
A fork of node-webcl without dependencies other than OpenCL
214 lines (177 loc) • 8.34 kB
JavaScript
// Copyright (c) 2011-2012, Motorola Mobility, Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the Motorola Mobility, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var nodejs = (typeof window === 'undefined');
if(nodejs) {
WebCL = require('../webcl');
clu = require('../lib/clUtils');
util = require('util');
fs = require('fs');
Image = require('node-image').Image;
log = console.log;
}
//First check if the webcl extension is installed at all
if (WebCL == undefined) {
alert("Unfortunately your system does not support WebCL. " +
"Make sure that you have the WebCL extension installed.");
return;
}
process.on('exit',function() {
log('Exiting app');
log(util.inspect(process.memoryUsage()));
})
// Box processing params
var uiNumOutputPix = 64; // Default output pix per workgroup... may be modified depending HW/OpenCl caps
var iRadius = 10; // initial radius of 2D box filter mask
var fScale = 1/(2 * iRadius + 1); // precalculated GV rescaling value
// OpenCL variables
var ckBoxRowsTex; // OpenCL Kernel for row sum (using 2d Image/texture)
var ckBoxColumns; // OpenCL for column sum and normalize
var cmDevBufIn; // OpenCL device memory object (buffer or 2d Image) for input data
var cmDevBufTemp; // OpenCL device memory temp buffer object
var cmDevBufOut; // OpenCL device memory output buffer object
var szBuffBytes; // Size of main image buffers
var szGlobalWorkSize=[0,0]; // global # of work items
var szLocalWorkSize= [0,0]; // work group # of work items
var szMaxWorkgroupSize = 512; // initial max # of work items
// load image
var file = __dirname+'/lenaRGB.jpg';
log('Loading image '+file);
var img=Image.load(file);
var image=img.convertTo32Bits();
var szBuffBytes = image.height*image.pitch;
//img.unload();
log('Image '+file+': \n'+util.inspect(image));
//Pick platform
var platformList=WebCL.getPlatforms();
var platform=platformList[0];
//Query the set of GPU devices on this platform
var devices = platform.getDevices(WebCL.DEVICE_TYPE_ALL);
log(" # of Devices Available = "+devices.length);
var uiTargetDevice = clu.clamp(uiTargetDevice, 0, (devices.length - 1));
var device=devices[uiTargetDevice];
log(" Using Device "+ uiTargetDevice+": "+device.getInfo(WebCL.DEVICE_NAME));
var hasImageSupport=device.getInfo(WebCL.DEVICE_IMAGE_SUPPORT);
if(hasImageSupport != WebCL.TRUE) {
log("No image support");
return;
}
var numComputeUnits=device.getInfo(WebCL.DEVICE_MAX_COMPUTE_UNITS);
log(' # of Compute Units = '+numComputeUnits);
log(' createContext...');
context=WebCL.createContext({
devices: device,
platform: platform
});
// Create a command-queue
queue=context.createCommandQueue(device, 0);
// Allocate OpenCL object for the source data
var InputFormat= {
order : WebCL.RGBA,
data_type : WebCL.UNSIGNED_INT8,
size : [ image.width, image.height ],
rowPitch : image.pitch
};
//2D Image (Texture) on device
cmDevBufIn = context.createImage(WebCL.MEM_READ_ONLY | WebCL.MEM_USE_HOST_PTR, InputFormat, image.buffer);
RowSampler = context.createSampler(false, WebCL.ADDRESS_CLAMP, WebCL.FILTER_NEAREST);
// Allocate the OpenCL intermediate and result buffer memory objects on the device GMEM
cmDevBufTemp = context.createBuffer(WebCL.MEM_READ_WRITE, szBuffBytes);
cmDevBufOut = context.createBuffer(WebCL.MEM_WRITE_ONLY, szBuffBytes);
//Create the program
sourceCL = fs.readFileSync(__dirname+'/BoxFilter.cl','ascii');
cpProgram = context.createProgram(sourceCL);
sBuildOpts = "-cl-fast-relaxed-math";
ciErrNum = cpProgram.build(device, sBuildOpts);
// Create kernels
ckBoxRowsTex = cpProgram.createKernel("BoxRowsTex");
ckBoxColumns = cpProgram.createKernel("BoxColumns");
// set the kernel args
ResetKernelArgs(image.width, image.height, iRadius, fScale);
// Warmup call to assure OpenCL driver is awake
BoxFilterGPU (image, cmDevBufOut, iRadius, fScale);
queue.finish();
// launch processing on the GPU
BoxFilterGPU (image, cmDevBufOut, iRadius, fScale);
queue.finish();
// Copy results back to host memory, block until complete
var uiOutput=new Uint8Array(szBuffBytes);
queue.enqueueReadBuffer(cmDevBufOut, WebCL.TRUE, 0, szBuffBytes, uiOutput);
// PNG uses 32-bit images, JPG can only work on 24-bit images
if(!Image.save('out_'+iRadius+'.png',uiOutput, image.width,image.height, image.pitch, image.bpp, 0xFF0000, 0x00FF00, 0xFF))
log("Error saving image");
// cleanup
log(util.inspect(process.memoryUsage()));
function ResetKernelArgs(width, height, r, fScale)
{
// (Image/texture version)
ckBoxRowsTex.setArg(0, cmDevBufIn);
ckBoxRowsTex.setArg(1, cmDevBufTemp);
ckBoxRowsTex.setArg(2, RowSampler);
ckBoxRowsTex.setArg(3, width, WebCL.type.UINT);
ckBoxRowsTex.setArg(4, height, WebCL.type.UINT);
ckBoxRowsTex.setArg(5, r, WebCL.type.INT);
ckBoxRowsTex.setArg(6, fScale, WebCL.type.FLOAT);
// Set the Argument values for the column kernel
ckBoxColumns.setArg(0, cmDevBufTemp);
ckBoxColumns.setArg(1, cmDevBufOut);
ckBoxColumns.setArg(2, width, WebCL.type.UINT);
ckBoxColumns.setArg(3, height, WebCL.type.UINT);
ckBoxColumns.setArg(4, r, WebCL.type.INT);
ckBoxColumns.setArg(5, fScale, WebCL.type.FLOAT);
}
//OpenCL computation function for GPU:
//Copies input data to the device, runs kernel, copies output data back to host
//*****************************************************************************
function BoxFilterGPU(image, cmOutputBuffer, r, fScale)
{
// Setup Kernel Args
ckBoxColumns.setArg(1, cmOutputBuffer);
// Copy input data from host to device
var szTexOrigin = [0, 0, 0]; // Offset of input texture origin relative to host image
var szTexRegion = [image.width, image.height, 1]; // Size of texture region to operate on
log('enqueue image: origin='+szTexOrigin+", region="+szTexRegion);
queue.enqueueWriteImage(cmDevBufIn, WebCL.TRUE, szTexOrigin, szTexRegion, 0, 0, image.buffer);
// Set global and local work sizes for row kernel
szLocalWorkSize[0] = uiNumOutputPix;
szLocalWorkSize[1] = 1;
szGlobalWorkSize[0]= szLocalWorkSize[0] * clu.DivUp(image.height, szLocalWorkSize[0]);
szGlobalWorkSize[1] = 1;
log("row kernel work sizes: global="+szGlobalWorkSize+" local="+szLocalWorkSize);
//Sync host
queue.finish();
//Launch row kernel
queue.enqueueNDRangeKernel(ckBoxRowsTex, null, szGlobalWorkSize, szLocalWorkSize);
//Set global and local work sizes for column kernel
szLocalWorkSize[0] = 64;
szLocalWorkSize[1] = 1;
szGlobalWorkSize[0] = szLocalWorkSize[0] * clu.DivUp(image.width, szLocalWorkSize[0]);
szGlobalWorkSize[1] = 1;
log("column kernel work sizes: global="+szGlobalWorkSize+" local="+szLocalWorkSize);
//Launch column kernel
queue.enqueueNDRangeKernel(ckBoxColumns, null, szGlobalWorkSize, szLocalWorkSize);
//sync host
queue.finish();
}