JOCL FAQ: Difference between revisions
Jump to navigation
Jump to search
Created page with '== Get the Source Code == Create a local copy/branch of the git repository, either anonymous: * git clone git://github.com/mbien/gluegen.git gluegen * git clone git://github.co…' |
added rudimentary getting started page |
||
| Line 6: | Line 6: | ||
* git clone git://github.com/mbien/gluegen.git gluegen | * git clone git://github.com/mbien/gluegen.git gluegen | ||
* git clone git://github.com/mbien/jocl.git jocl | * git clone git://github.com/mbien/jocl.git jocl | ||
* git clone git://github.com/mbien/jocl-demos.git jocl-demos | |||
* git clone git://github.com/mbien/jogl.git jogl | * git clone git://github.com/mbien/jogl.git jogl | ||
| Line 13: | Line 14: | ||
* git clone git@github.com:username/gluegen.git gluegen | * git clone git@github.com:username/gluegen.git gluegen | ||
* git clone git@github.com:username/jocl.git jocl | * git clone git@github.com:username/jocl.git jocl | ||
* git clone git@github.com:username/jocl-demos.git jocl-demos | |||
* git clone git@github.com:username/jogl.git jogl | * git clone git@github.com:username/jogl.git jogl | ||
== Getting Started == | |||
Hello JOCL host program: | |||
<pre> | |||
import com.mbien.opencl.*; | |||
import java.io.IOException; | |||
import java.nio.FloatBuffer; | |||
import java.util.Random; | |||
import static java.lang.System.*; | |||
import static com.mbien.opencl.CLMemory.Mem.*; | |||
/** | |||
* Hello Java OpenCL example. Adds all elements of buffer A to buffer B | |||
* and stores the result in buffer C.<br/> | |||
* Sample was inspired by the Nvidia VectorAdd example written in C/C++ | |||
* which is bundled in the Nvidia OpenCL SDK. | |||
* @author Michael Bien | |||
*/ | |||
public class HelloJOCL { | |||
public static void main(String[] args) throws IOException { | |||
int elementCount = 11444777; // Length of arrays to process | |||
int localWorkSize = 256; // Local work size | |||
int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize | |||
// set up | |||
CLContext context = CLContext.create(); | |||
CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build(); | |||
CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY); | |||
CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY); | |||
CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY); | |||
// fill read buffers with random numbers. | |||
fillBuffer(clBufferA.getBuffer(), 12345); | |||
fillBuffer(clBufferB.getBuffer(), 67890); | |||
// get a reference to the kernel functon with the name 'VectorAdd' | |||
// and map the buffers to its input parameters. | |||
CLKernel kernel = program.createCLKernel("VectorAdd"); | |||
kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount); | |||
// create command queue on fastest device. | |||
CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue(); | |||
// asynchronous write of data to GPU device, | |||
// blocking read later to get the computed results back. | |||
long time = nanoTime(); | |||
queue.putWriteBuffer(clBufferA, false) | |||
.putWriteBuffer(clBufferB, false) | |||
.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize) | |||
.putReadBuffer(clBufferC, true); | |||
time = nanoTime() - time; | |||
// cleanup all resources associated with this context. | |||
context.release(); | |||
// print first few elements of the resulting buffer to the console. | |||
out.println("a+b=c results snapshot: "); | |||
for(int i = 0; i < 10; i++) | |||
out.print(clBufferC.getBuffer().get() + ", "); | |||
out.println("...; " + clBufferC.getBuffer().remaining() + " more"); | |||
out.println("computation took: "+(time/1000000)+"ms"); | |||
} | |||
/* utilities */ | |||
private static void fillBuffer(FloatBuffer buffer, int seed) { | |||
Random rnd = new Random(seed); | |||
while(buffer.remaining() != 0) | |||
buffer.put(rnd.nextFloat()*100); | |||
buffer.rewind(); | |||
} | |||
private static int roundUp(int groupSize, int globalSize) { | |||
int r = globalSize % groupSize; | |||
if (r == 0) return globalSize; | |||
else return globalSize + groupSize - r; | |||
} | |||
} | |||
</pre> | |||
Hello JOCL Kernel | |||
<pre> | |||
// OpenCL Kernel Function for element by element vector addition | |||
kernel void VectorAdd(global const float* a, global const float* b, global float* c, int numElements) { | |||
// get index into global data array | |||
int iGID = get_global_id(0); | |||
// bound check, equivalent to the limit on a 'for' loop | |||
if (iGID >= numElements) { | |||
return; | |||
} | |||
// add the vector elements | |||
c[iGID] = a[iGID] + b[iGID]; | |||
} | |||
</pre> | |||
Revision as of 19:46, 18 March 2010
Get the Source Code
Create a local copy/branch of the git repository, either anonymous:
- git clone git://github.com/mbien/gluegen.git gluegen
- git clone git://github.com/mbien/jocl.git jocl
- git clone git://github.com/mbien/jocl-demos.git jocl-demos
- git clone git://github.com/mbien/jogl.git jogl
or via SSH and your user credential, so you can easily push back your changes to the github server:
- git clone git@github.com:username/gluegen.git gluegen
- git clone git@github.com:username/jocl.git jocl
- git clone git@github.com:username/jocl-demos.git jocl-demos
- git clone git@github.com:username/jogl.git jogl
Getting Started
Hello JOCL host program:
import com.mbien.opencl.*;
import java.io.IOException;
import java.nio.FloatBuffer;
import java.util.Random;
import static java.lang.System.*;
import static com.mbien.opencl.CLMemory.Mem.*;
/**
* Hello Java OpenCL example. Adds all elements of buffer A to buffer B
* and stores the result in buffer C.<br/>
* Sample was inspired by the Nvidia VectorAdd example written in C/C++
* which is bundled in the Nvidia OpenCL SDK.
* @author Michael Bien
*/
public class HelloJOCL {
public static void main(String[] args) throws IOException {
int elementCount = 11444777; // Length of arrays to process
int localWorkSize = 256; // Local work size
int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize
// set up
CLContext context = CLContext.create();
CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build();
CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY);
CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY);
CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY);
// fill read buffers with random numbers.
fillBuffer(clBufferA.getBuffer(), 12345);
fillBuffer(clBufferB.getBuffer(), 67890);
// get a reference to the kernel functon with the name 'VectorAdd'
// and map the buffers to its input parameters.
CLKernel kernel = program.createCLKernel("VectorAdd");
kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount);
// create command queue on fastest device.
CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();
// asynchronous write of data to GPU device,
// blocking read later to get the computed results back.
long time = nanoTime();
queue.putWriteBuffer(clBufferA, false)
.putWriteBuffer(clBufferB, false)
.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
.putReadBuffer(clBufferC, true);
time = nanoTime() - time;
// cleanup all resources associated with this context.
context.release();
// print first few elements of the resulting buffer to the console.
out.println("a+b=c results snapshot: ");
for(int i = 0; i < 10; i++)
out.print(clBufferC.getBuffer().get() + ", ");
out.println("...; " + clBufferC.getBuffer().remaining() + " more");
out.println("computation took: "+(time/1000000)+"ms");
}
/* utilities */
private static void fillBuffer(FloatBuffer buffer, int seed) {
Random rnd = new Random(seed);
while(buffer.remaining() != 0)
buffer.put(rnd.nextFloat()*100);
buffer.rewind();
}
private static int roundUp(int groupSize, int globalSize) {
int r = globalSize % groupSize;
if (r == 0) return globalSize;
else return globalSize + groupSize - r;
}
}
Hello JOCL Kernel
// OpenCL Kernel Function for element by element vector addition
kernel void VectorAdd(global const float* a, global const float* b, global float* c, int numElements) {
// get index into global data array
int iGID = get_global_id(0);
// bound check, equivalent to the limit on a 'for' loop
if (iGID >= numElements) {
return;
}
// add the vector elements
c[iGID] = a[iGID] + b[iGID];
}