JOCL Tutorial

From JogampWiki
Revision as of 16:36, 25 April 2010 by Mbien (talk | contribs) (moved hello world from faq to tutorial)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Hello JOCL host program:

import com.jogamp.opencl.*;
import java.io.IOException;
import java.nio.FloatBuffer;
import java.util.Random;

import static java.lang.System.*;
import static com.jogamp.opencl.CLMemory.Mem.*;

/**
 * Hello Java OpenCL example. Adds all elements of buffer A to buffer B
 * and stores the result in buffer C.<br/>
 * Sample was inspired by the Nvidia VectorAdd example written in C/C++
 * which is bundled in the Nvidia OpenCL SDK.
 * @author Michael Bien
 */
public class HelloJOCL {

    public static void main(String[] args) throws IOException {

        int elementCount = 11444777;    // Length of arrays to process
        int localWorkSize = 256;        // Local work size        
        int globalWorkSize = roundUp(localWorkSize, elementCount);  // rounded up to the nearest multiple of the localWorkSize

        // set up
        CLContext context = CLContext.create();
        CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build();

        CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY);

        // fill read buffers with random numbers.
        fillBuffer(clBufferA.getBuffer(), 12345);
        fillBuffer(clBufferB.getBuffer(), 67890);

        // get a reference to the kernel functon with the name 'VectorAdd'
        // and map the buffers to its input parameters.
        CLKernel kernel = program.createCLKernel("VectorAdd");
        kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount);

        // create command queue on fastest device.
        CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();

        // asynchronous write of data to GPU device,
        // blocking read later to get the computed results back.
        long time = nanoTime();
        queue.putWriteBuffer(clBufferA, false)
             .putWriteBuffer(clBufferB, false)
             .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
             .putReadBuffer(clBufferC, true);
        time = nanoTime() - time;

        // cleanup all resources associated with this context.
        context.release();

        // print first few elements of the resulting buffer to the console.
        out.println("a+b=c results snapshot: ");
        for(int i = 0; i < 10; i++)
            out.print(clBufferC.getBuffer().get() + ", ");
        out.println("...; " + clBufferC.getBuffer().remaining() + " more");

        out.println("computation took: "+(time/1000000)+"ms");
    }

    /* utilities */
    private static void fillBuffer(FloatBuffer buffer, int seed) {
        Random rnd = new Random(seed);
        while(buffer.remaining() != 0)
            buffer.put(rnd.nextFloat()*100);
        buffer.rewind();
    }

    private static int roundUp(int groupSize, int globalSize) {
        int r = globalSize % groupSize;
        if (r == 0) return globalSize;
        else        return globalSize + groupSize - r;
    }
}

Hello JOCL Kernel:

    // OpenCL Kernel Function for element by element vector addition
    kernel void VectorAdd(global const float* a, global const float* b, global float* c, int numElements) {

        // get index into global data array
        int iGID = get_global_id(0);

        // bound check, equivalent to the limit on a 'for' loop
        if (iGID >= numElements)  {
            return;
        }

        // add the vector elements
        c[iGID] = a[iGID] + b[iGID];
    }