JOCL v2.6.0-rc-20250722
JOCL, OpenCL® API Binding for Java™ (public API).
CLKernel.java
Go to the documentation of this file.
1/*
2 * Copyright (c) 2009 JogAmp Community. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are
5 * permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this list of
8 * conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
11 * of conditions and the following disclaimer in the documentation and/or other materials
12 * provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY JogAmp Community ``AS IS'' AND ANY EXPRESS OR IMPLIED
15 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JogAmp Community OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
22 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 * The views and conclusions contained in the software and documentation are those of the
25 * authors and should not be interpreted as representing official policies, either expressed
26 * or implied, of JogAmp Community.
27 */
28
29package com.jogamp.opencl;
30
31import com.jogamp.opencl.util.CLUtil;
32import com.jogamp.common.nio.Buffers;
33import com.jogamp.common.nio.PointerBuffer;
34import com.jogamp.opencl.llb.CL;
35import java.nio.Buffer;
36import java.nio.ByteBuffer;
37
38import static com.jogamp.opencl.CLException.*;
39import static com.jogamp.opencl.llb.CL.*;
40import static com.jogamp.common.os.Platform.*;
41
42/**
43 * High level abstraction for an OpenCL Kernel.
44 * A kernel is a function declared in a program. A kernel is identified by the <code>kernel</code> qualifier
45 * applied to any function in a program. A kernel object encapsulates the specific <code>kernel</code>
46 * function declared in a program and the argument values to be used when executing this
47 * <code>kernel</code> function.
48 * CLKernel is not threadsafe.
49 * @see CLProgram#createCLKernel(java.lang.String)
50 * @see CLProgram#createCLKernels()
51 * @author Michael Bien, et al.
52 */
53public class CLKernel extends CLObjectResource implements Cloneable {
54
55 public final String name;
56 public final int numArgs;
57
58 private final CLProgram program;
59 private final CL binding;
60
61 private final ByteBuffer buffer;
62
63 private int argIndex;
64 private boolean force32BitArgs;
65
66 CLKernel(final CLProgram program, final long id) {
67 this(program, null, id);
68 }
69
70 CLKernel(final CLProgram program, final String name, final long id) {
71 super(program.getContext(), id);
72
73 this.program = program;
74 this.buffer = Buffers.newDirectByteBuffer((is32Bit()?4:8)*3);
75
76 binding = program.getPlatform().getCLBinding();
77
78 if(name == null) {
79 // get function name
80 final PointerBuffer size = PointerBuffer.wrap(buffer);
81 int ret = binding.clGetKernelInfo(ID, CL_KERNEL_FUNCTION_NAME, 0, null, size);
82 checkForError(ret, "error while asking for kernel function name");
83
84 final ByteBuffer bb = Buffers.newDirectByteBuffer((int)size.get(0));
85
86 ret = binding.clGetKernelInfo(ID, CL_KERNEL_FUNCTION_NAME, bb.capacity(), bb, null);
87 checkForError(ret, "error while asking for kernel function name");
88
89 this.name = CLUtil.clString2JavaString(bb, bb.capacity());
90 }else{
91 this.name = name;
92 }
93
94 // get number of arguments
95 final int ret = binding.clGetKernelInfo(ID, CL_KERNEL_NUM_ARGS, buffer.capacity(), buffer, null);
96 checkForError(ret, "error while asking for number of function arguments.");
97
98 numArgs = buffer.getInt(0);
99
100 }
101
102// public CLKernel putArg(Buffer value) {
103// setArg(argIndex++, value);
104// return this;
105// }
106
107 public CLKernel putArg(final CLMemory<?> value) {
108 setArg(argIndex, value);
109 argIndex++;
110 return this;
111 }
112
113 public CLKernel putArg(final short value) {
114 setArg(argIndex, value);
115 argIndex++;
116 return this;
117 }
118
119 public CLKernel putArg(final int value) {
120 setArg(argIndex, value);
121 argIndex++;
122 return this;
123 }
124
125 public CLKernel putArg(final long value) {
126 setArg(argIndex, value);
127 argIndex++;
128 return this;
129 }
130
131 public CLKernel putArg(final float value) {
132 setArg(argIndex, value);
133 argIndex++;
134 return this;
135 }
136
137 public CLKernel putArg(final double value) {
138 setArg(argIndex, value);
139 argIndex++;
140 return this;
141 }
142
143 public CLKernel putNullArg(final int size) {
144 setNullArg(argIndex, size);
145 argIndex++;
146 return this;
147 }
148
149 public CLKernel putArgs(final CLMemory<?>... values) {
150 setArgs(argIndex, values);
151 argIndex += values.length;
152 return this;
153 }
154
155 /**
156 * Resets the argument index to 0.
157 */
158 public CLKernel rewind() {
159 argIndex = 0;
160 return this;
161 }
162
163 /**
164 * Returns the argument index used in the relative putArt(...) methods.
165 */
166 public int position() {
167 return argIndex;
168 }
169
170// public CLKernel setArg(int argumentIndex, Buffer value) {
171// setArgument(argumentIndex, CLMemory.sizeOfBufferElem(value)*value.capacity(), value);
172// return this;
173// }
174
175 public CLKernel setArg(final int argumentIndex, final CLMemory<?> value) {
176 setArgument(argumentIndex, is32Bit()?4:8, wrap(value.ID));
177 return this;
178 }
179
180 public CLKernel setArg(final int argumentIndex, final short value) {
181 setArgument(argumentIndex, 2, wrap(value));
182 return this;
183 }
184
185 public CLKernel setArg(final int argumentIndex, final int value) {
186 setArgument(argumentIndex, 4, wrap(value));
187 return this;
188 }
189
190 public CLKernel setArg(final int argumentIndex, final long value) {
191 if(force32BitArgs) {
192 setArgument(argumentIndex, 4, wrap((int)value));
193 }else{
194 setArgument(argumentIndex, 8, wrap(value));
195 }
196 return this;
197 }
198
199 public CLKernel setArg(final int argumentIndex, final float value) {
200 setArgument(argumentIndex, 4, wrap(value));
201 return this;
202 }
203
204 public CLKernel setArg(final int argumentIndex, final double value) {
205 if(force32BitArgs) {
206 setArgument(argumentIndex, 4, wrap((float)value));
207 }else{
208 setArgument(argumentIndex, 8, wrap(value));
209 }
210 return this;
211 }
212
213 public CLKernel setNullArg(final int argumentIndex, final int size) {
214 setArgument(argumentIndex, size, null);
215 return this;
216 }
217
218 public CLKernel setArgs(final CLMemory<?>... values) {
219 setArgs(0, values);
220 return this;
221 }
222
223 public CLKernel setArgs(final Object... values) {
224 if(values == null || values.length == 0) {
225 throw new IllegalArgumentException("values array was empty or null.");
226 }
227 for (int i = 0; i < values.length; i++) {
228 final Object value = values[i];
229 if(value instanceof CLMemory<?>) {
230 setArg(i, (CLMemory<?>)value);
231 }else if(value instanceof Short) {
232 setArg(i, (Short)value);
233 }else if(value instanceof Integer) {
234 setArg(i, (Integer)value);
235 }else if(value instanceof Long) {
236 setArg(i, (Long)value);
237 }else if(value instanceof Float) {
238 setArg(i, (Float)value);
239 }else if(value instanceof Double) {
240 setArg(i, (Double)value);
241 }else{
242 throw new IllegalArgumentException(value + " is not a valid argument.");
243 }
244 }
245 return this;
246 }
247
248 private void setArgs(final int startIndex, final CLMemory<?>... values) {
249 for (int i = 0; i < values.length; i++) {
250 setArg(i+startIndex, values[i]);
251 }
252 }
253
254 private void setArgument(final int argumentIndex, final int size, final Buffer value) {
255 if(argumentIndex >= numArgs || argumentIndex < 0) {
256 throw new IndexOutOfBoundsException("kernel "+ this +" has "+numArgs+
257 " arguments, can not set argument with index "+argumentIndex);
258 }
259 if(!program.isExecutable()) {
260 throw new IllegalStateException("can not set program" +
261 " arguments for a not executable program. "+program);
262 }
263
264 final int ret = binding.clSetKernelArg(ID, argumentIndex, size, value);
265 if(ret != CL_SUCCESS) {
266 throw newException(ret, "error setting arg "+argumentIndex+" to value "+value+" of size "+size+" of "+this);
267 }
268 }
269
270 /**
271 * Forces double and long arguments to be passed as float and int to the OpenCL kernel.
272 * This can be used in applications which want to mix kernels with different floating point precision.
273 */
274 public CLKernel setForce32BitArgs(final boolean force) {
275 this.force32BitArgs = force;
276 return this;
277 }
278
280 return program;
281 }
282
283 /**
284 * @see #setForce32BitArgs(boolean)
285 */
286 public boolean isForce32BitArgsEnabled() {
287 return force32BitArgs;
288 }
289
290 private Buffer wrap(final float value) {
291 return buffer.putFloat(0, value);
292 }
293
294 private Buffer wrap(final double value) {
295 return buffer.putDouble(0, value);
296 }
297
298 private Buffer wrap(final short value) {
299 return buffer.putShort(0, value);
300 }
301
302 private Buffer wrap(final int value) {
303 return buffer.putInt(0, value);
304 }
305
306 private Buffer wrap(final long value) {
307 return buffer.putLong(0, value);
308 }
309
310 /**
311 * Returns the amount of local memory in bytes being used by a kernel.
312 * This includes local memory that may be needed by an implementation to execute the kernel,
313 * variables declared inside the kernel with the <code>__local</code> address qualifier and local memory
314 * to be allocated for arguments to the kernel declared as pointers with the <code>__local</code> address
315 * qualifier and whose size is specified with clSetKernelArg.
316 * If the local memory size, for any pointer argument to the kernel declared with
317 * the <code>__local</code> address qualifier, is not specified, its size is assumed to be 0.
318 * @version 1.0
319 */
320 public long getLocalMemorySize(final CLDevice device) {
321 return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE);
322 }
323
324 /**
325 * Returns the work group size for this kernel on the given device.
326 * This provides a mechanism for the application to query the work-group size
327 * that can be used to execute a kernel on a specific device given by device.
328 * The OpenCL implementation uses the resource requirements of the kernel
329 * (register usage etc.) to determine what this work-group size should be.
330 * @version 1.0
331 */
332 public long getWorkGroupSize(final CLDevice device) {
333 return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE);
334 }
335
336 /**
337 * Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources.
338 * If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned.
339 * The returned array has always three elements.
340 * @version 1.0
341 */
342 public long[] getCompileWorkGroupSize(final CLDevice device) {
343 final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null);
344 if(ret != CL_SUCCESS) {
345 throw newException(ret, "error while asking for CL_KERNEL_COMPILE_WORK_GROUP_SIZE of "+this+" on "+device);
346 }
347
348 if(is32Bit()) {
349 return new long[] { buffer.getInt(0), buffer.getInt(4), buffer.getInt(8) };
350 }else {
351 return new long[] { buffer.getLong(0), buffer.getLong(8), buffer.getLong(16) };
352 }
353 }
354
355 /**
356 * Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing
357 * with other sizes will still work, unless the size is more than the maximum allowed.
358 * @version 1.1
359 */
360 public long getPreferredWorkGroupSizeMultiple(final CLDevice device) {
361 return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE);
362 }
363
364 /**
365 * Returns the number of bytes of private memory used by each work item in the kernel.
366 * This includes private memory declared with the <code>__private</code> qualifier, as
367 * well as other private memory used by the implementation.
368 * @version 1.1
369 */
370 public long getPrivateMemSize(final CLDevice device) {
371 return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE);
372 }
373
374 private long getWorkGroupInfo(final CLDevice device, final int flag) {
375 final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null);
376 if(ret != CL_SUCCESS) {
377 throw newException(ret, "error while asking for clGetKernelWorkGroupInfo of "+this+" on "+device);
378 }
379 return buffer.getLong(0);
380 }
381
382 /**
383 * Releases all resources of this kernel from its context.
384 */
385 @Override
386 public void release() {
387 super.release();
388 final int ret = binding.clReleaseKernel(ID);
389 program.onKernelReleased(this);
390 if(ret != CL_SUCCESS) {
391 throw newException(ret, "can not release "+this);
392 }
393 }
394
395 @Override
396 public String toString() {
397 return "CLKernel [id: " + ID
398 + " name: " + name+"]";
399 }
400
401 @Override
402 public boolean equals(final Object obj) {
403 if (obj == null) {
404 return false;
405 }
406 if (getClass() != obj.getClass()) {
407 return false;
408 }
409 final CLKernel other = (CLKernel) obj;
410 if (this.ID != other.ID) {
411 return false;
412 }
413 if (!this.program.equals(other.program)) {
414 return false;
415 }
416 return true;
417 }
418
419 @Override
420 public int hashCode() {
421 int hash = 7;
422 hash = 43 * hash + (int) (this.ID ^ (this.ID >>> 32));
423 hash = 43 * hash + (this.program != null ? this.program.hashCode() : 0);
424 return hash;
425 }
426
427 /**
428 * Returns a new instance of this kernel with uninitialized arguments.
429 */
430 @Override
431 public CLKernel clone() {
432 return program.createCLKernel(name).setForce32BitArgs(force32BitArgs);
433 }
434
435}
This object represents an OpenCL device.
Definition: CLDevice.java:53
High level abstraction for an OpenCL Kernel.
Definition: CLKernel.java:53
void release()
Releases all resources of this kernel from its context.
Definition: CLKernel.java:386
boolean isForce32BitArgsEnabled()
Definition: CLKernel.java:286
CLKernel putArgs(final CLMemory<?>... values)
Definition: CLKernel.java:149
CLKernel rewind()
Resets the argument index to 0.
Definition: CLKernel.java:158
long[] getCompileWorkGroupSize(final CLDevice device)
Returns the work-group size specified by the attribute((reqd_work_group_size(X, Y,...
Definition: CLKernel.java:342
int position()
Returns the argument index used in the relative putArt(...) methods.
Definition: CLKernel.java:166
CLKernel setArg(final int argumentIndex, final CLMemory<?> value)
Definition: CLKernel.java:175
CLKernel setArg(final int argumentIndex, final long value)
Definition: CLKernel.java:190
CLKernel putArg(final int value)
Definition: CLKernel.java:119
CLKernel putArg(final CLMemory<?> value)
Definition: CLKernel.java:107
CLKernel clone()
Returns a new instance of this kernel with uninitialized arguments.
Definition: CLKernel.java:431
CLKernel putArg(final short value)
Definition: CLKernel.java:113
boolean equals(final Object obj)
Definition: CLKernel.java:402
CLKernel setArg(final int argumentIndex, final float value)
Definition: CLKernel.java:199
long getWorkGroupSize(final CLDevice device)
Returns the work group size for this kernel on the given device.
Definition: CLKernel.java:332
long getPrivateMemSize(final CLDevice device)
Returns the number of bytes of private memory used by each work item in the kernel.
Definition: CLKernel.java:370
CLKernel setArgs(final CLMemory<?>... values)
Definition: CLKernel.java:218
CLKernel putNullArg(final int size)
Definition: CLKernel.java:143
long getPreferredWorkGroupSizeMultiple(final CLDevice device)
Returns the preferred multiple of workgroup size to use for kernel launch.
Definition: CLKernel.java:360
CLKernel setForce32BitArgs(final boolean force)
Forces double and long arguments to be passed as float and int to the OpenCL kernel.
Definition: CLKernel.java:274
CLKernel putArg(final long value)
Definition: CLKernel.java:125
CLKernel putArg(final float value)
Definition: CLKernel.java:131
CLKernel putArg(final double value)
Definition: CLKernel.java:137
CLKernel setArg(final int argumentIndex, final int value)
Definition: CLKernel.java:185
CLKernel setArg(final int argumentIndex, final double value)
Definition: CLKernel.java:204
CLKernel setArg(final int argumentIndex, final short value)
Definition: CLKernel.java:180
long getLocalMemorySize(final CLDevice device)
Returns the amount of local memory in bytes being used by a kernel.
Definition: CLKernel.java:320
CLKernel setArgs(final Object... values)
Definition: CLKernel.java:223
CLKernel setNullArg(final int argumentIndex, final int size)
Definition: CLKernel.java:213
Common superclass for all OpenCL memory types.
Definition: CLMemory.java:49
CLContext getContext()
Returns the context for this OpenCL object.
Definition: CLObject.java:58
final long ID
The OpenCL object handle.
Definition: CLObject.java:41
CLPlatform getPlatform()
Returns the platform for this OpenCL object.
Definition: CLObject.java:65
Represents a OpenCL program executed on one or more CLDevices.
Definition: CLProgram.java:64
boolean isExecutable()
Returns true if the build status 'BUILD_SUCCESS' for at least one device of this program exists.
Definition: CLProgram.java:570
boolean equals(final Object obj)
Definition: CLProgram.java:705
CLKernel createCLKernel(final String kernelName)
Creates a kernel with the specified kernel name.
Definition: CLProgram.java:410
static String clString2JavaString(final byte[] chars, int clLength)
Definition: CLUtil.java:51
Java bindings to OpenCL, the Open Computing Language.
Definition: CL.java:26
int clGetKernelInfo(long kernel, int param_name, long param_value_size, Buffer param_value, PointerBuffer param_value_size_ret)
Interface to C language function: cl_int {@native clGetKernelInfo}(cl_kernel kernel,...
int clReleaseKernel(long kernel)
Interface to C language function: cl_int {@native clReleaseKernel}(cl_kernel kernel)
int clSetKernelArg(long kernel, int arg_index, long arg_size, Buffer arg_value)
Interface to C language function: cl_int {@native clSetKernelArg}(cl_kernel kernel,...
int clGetKernelWorkGroupInfo(long kernel, long device, int param_name, long param_value_size, Buffer param_value, PointerBuffer param_value_size_ret)
Interface to C language function: cl_int {@native clGetKernelWorkGroupInfo}(cl_kernel kernel,...