summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRhys Weatherley <rhys.weatherley@nokia.com>2010-06-24 12:32:06 +1000
committerRhys Weatherley <rhys.weatherley@nokia.com>2010-06-24 12:32:06 +1000
commit493815ce47f3db7cb6c02735ea8ec465406f6e32 (patch)
treef4b6cd761a29cef267b112da1168fe7601d52ac5
parent7235aca62bc03bc05b1c73ee8e303dae9010a8e6 (diff)
Inline kernel argument setting for performance.
operator() makes a lot of calls to setArg() which then call through to clSetKernelArg in the .cpp file. This caused a double-function call overhead on each argument. Reduce it down to a single function call per argument, to be consistent with using the raw OpenCL API.
-rw-r--r--src/opencl/qclkernel.cpp130
-rw-r--r--src/opencl/qclkernel.h89
2 files changed, 121 insertions, 98 deletions
diff --git a/src/opencl/qclkernel.cpp b/src/opencl/qclkernel.cpp
index aceaa9f..23a9e94 100644
--- a/src/opencl/qclkernel.cpp
+++ b/src/opencl/qclkernel.cpp
@@ -225,7 +225,7 @@ public:
Constructs a null OpenCL kernel object.
*/
QCLKernel::QCLKernel()
- : d_ptr(new QCLKernelPrivate(0, 0))
+ : d_ptr(new QCLKernelPrivate(0, 0)), m_kernelId(0)
{
}
@@ -235,7 +235,7 @@ QCLKernel::QCLKernel()
ownership of \a id and release it in the destructor.
*/
QCLKernel::QCLKernel(QCLContext *context, cl_kernel id)
- : d_ptr(new QCLKernelPrivate(context, id))
+ : d_ptr(new QCLKernelPrivate(context, id)), m_kernelId(id)
{
}
@@ -244,6 +244,7 @@ QCLKernel::QCLKernel(QCLContext *context, cl_kernel id)
*/
QCLKernel::QCLKernel(const QCLKernel &other)
: d_ptr(new QCLKernelPrivate(other.d_ptr.data()))
+ , m_kernelId(other.m_kernelId)
{
}
@@ -261,6 +262,7 @@ QCLKernel::~QCLKernel()
QCLKernel &QCLKernel::operator=(const QCLKernel &other)
{
d_ptr->copy(other.d_ptr.data());
+ m_kernelId = other.m_kernelId;
return *this;
}
@@ -530,112 +532,77 @@ size_t QCLKernel::preferredWorkSizeMultiple() const
}
/*!
+ \fn void QCLKernel::setArg(int index, cl_int value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c int.
*/
-void QCLKernel::setArg(int index, cl_int value)
-{
- Q_D(const QCLKernel);
- clSetKernelArg(d->id, index, sizeof(value), &value);
-}
/*!
+ \fn void QCLKernel::setArg(int index, cl_uint value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c uint.
*/
-void QCLKernel::setArg(int index, cl_uint value)
-{
- Q_D(const QCLKernel);
- clSetKernelArg(d->id, index, sizeof(value), &value);
-}
/*!
+ \fn void QCLKernel::setArg(int index, cl_long value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c long.
*/
-void QCLKernel::setArg(int index, cl_long value)
-{
- Q_D(const QCLKernel);
- clSetKernelArg(d->id, index, sizeof(value), &value);
-}
/*!
+ \fn void QCLKernel::setArg(int index, cl_ulong value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c ulong.
*/
-void QCLKernel::setArg(int index, cl_ulong value)
-{
- Q_D(const QCLKernel);
- clSetKernelArg(d->id, index, sizeof(value), &value);
-}
/*!
+ \fn void QCLKernel::setArg(int index, float value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c float.
*/
-void QCLKernel::setArg(int index, float value)
-{
- Q_D(const QCLKernel);
- clSetKernelArg(d->id, index, sizeof(value), &value);
-}
/*!
+ \fn void QCLKernel::setArg(int index, const QVector2D &value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c float2.
*/
-void QCLKernel::setArg(int index, const QVector2D &value)
-{
- Q_D(const QCLKernel);
- if (sizeof(value) == (sizeof(float) * 2)) {
- clSetKernelArg(d->id, index, sizeof(value), &value);
- } else {
- float values[2] = {value.x(), value.y()};
- clSetKernelArg(d->id, index, sizeof(values), values);
- }
-}
/*!
+ \fn void QCLKernel::setArg(int index, const QVector3D &value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c float4 (OpenCL does not have a \c float3 type).
The value will be passed to the kernel as (x, y, z, 1).
*/
-void QCLKernel::setArg(int index, const QVector3D &value)
-{
- Q_D(const QCLKernel);
- float values[4] = {value.x(), value.y(), value.z(), 1.0f};
- clSetKernelArg(d->id, index, sizeof(values), values);
-}
/*!
+ \fn void QCLKernel::setArg(int index, const QVector4D &value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c float4.
*/
-void QCLKernel::setArg(int index, const QVector4D &value)
-{
- Q_D(const QCLKernel);
- if (sizeof(value) == (sizeof(float) * 4)) {
- clSetKernelArg(d->id, index, sizeof(value), &value);
- } else {
- float values[4] = {value.x(), value.y(), value.z(), value.w()};
- clSetKernelArg(d->id, index, sizeof(values), values);
- }
-}
/*!
Sets argument \a index for this kernel to \a value.
@@ -647,10 +614,9 @@ void QCLKernel::setArg(int index, const QVector4D &value)
*/
void QCLKernel::setArg(int index, const QColor &value)
{
- Q_D(const QCLKernel);
float values[4] =
{value.redF(), value.greenF(), value.blueF(), value.alphaF()};
- clSetKernelArg(d->id, index, sizeof(values), values);
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
}
/*!
@@ -663,42 +629,29 @@ void QCLKernel::setArg(int index, const QColor &value)
*/
void QCLKernel::setArg(int index, Qt::GlobalColor value)
{
- Q_D(const QCLKernel);
QColor color(value);
float values[4] =
{color.redF(), color.greenF(), color.blueF(), color.alphaF()};
- clSetKernelArg(d->id, index, sizeof(values), values);
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
}
/*!
+ \fn void QCLKernel::setArg(int index, const QPoint &value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c int2.
*/
-void QCLKernel::setArg(int index, const QPoint &value)
-{
- Q_D(const QCLKernel);
- cl_int values[2] = {value.x(), value.y()};
- clSetKernelArg(d->id, index, sizeof(values), values);
-}
/*!
+ \fn void QCLKernel::setArg(int index, const QPointF &value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c float2.
*/
-void QCLKernel::setArg(int index, const QPointF &value)
-{
- Q_D(const QCLKernel);
- if (sizeof(value) == (sizeof(float) * 2)) {
- clSetKernelArg(d->id, index, sizeof(value), &value);
- } else {
- float values[2] = {value.x(), value.y()};
- clSetKernelArg(d->id, index, sizeof(values), values);
- }
-}
/*!
Sets argument \a index for this kernel to \a value.
@@ -708,30 +661,25 @@ void QCLKernel::setArg(int index, const QPointF &value)
*/
void QCLKernel::setArg(int index, const QMatrix4x4 &value)
{
- Q_D(const QCLKernel);
if (sizeof(qreal) == sizeof(float)) {
- clSetKernelArg(d->id, index, sizeof(float) * 16, value.constData());
+ clSetKernelArg(m_kernelId, index, sizeof(float) * 16, value.constData());
} else {
float values[16];
for (int posn = 0; posn < 16; ++posn)
values[posn] = float(value.constData()[posn]);
- clSetKernelArg(d->id, index, sizeof(values), values);
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
}
}
/*!
+ \fn void QCLKernel::setArg(int index, const QCLMemoryObject &value)
+
Sets argument \a index for this kernel to \a value.
The argument is assumed to have been declared with the
type \c image2d_t, \c image3d_t, or be a pointer to a buffer,
according to the type of memory object represented by \a value.
*/
-void QCLKernel::setArg(int index, const QCLMemoryObject &value)
-{
- Q_D(const QCLKernel);
- cl_mem id = value.memoryId();
- clSetKernelArg(d->id, index, sizeof(id), &id);
-}
#if defined(qdoc)
@@ -750,9 +698,8 @@ void QCLKernel::setArg(int index, const QCLMemoryObject &value)
void QCLKernel::setArg(int index, const QCLVectorBase &value)
{
- Q_D(const QCLKernel);
cl_mem id = value.kernelArg();
- clSetKernelArg(d->id, index, sizeof(id), &id);
+ clSetKernelArg(m_kernelId, index, sizeof(id), &id);
}
#endif
@@ -771,13 +718,10 @@ void QCLKernel::setArg(int index, const QCLSampler &value)
}
/*!
+ \fn void QCLKernel::setArg(int index, const void *data, size_t size)
+
Sets argument \a index to the \a size bytes at \a data.
*/
-void QCLKernel::setArg(int index, const void *data, size_t size)
-{
- Q_D(const QCLKernel);
- clSetKernelArg(d->id, index, size, data);
-}
/*!
Requests that this kernel instance be run on globalWorkSize() items,
@@ -794,11 +738,11 @@ QCLEvent QCLKernel::run()
Q_D(const QCLKernel);
cl_event event;
cl_int error = clEnqueueNDRangeKernel
- (d->context->activeQueue(), d->id, d->globalWorkSize.dimensions(),
+ (d->context->activeQueue(), m_kernelId, d->globalWorkSize.dimensions(),
0, d->globalWorkSize.sizes(),
(d->localWorkSize.width() ? d->localWorkSize.sizes() : 0),
0, 0, &event);
- context()->reportError("QCLKernel::run:", error);
+ d->context->reportError("QCLKernel::run:", error);
if (error != CL_SUCCESS)
return QCLEvent();
else
@@ -823,11 +767,11 @@ QCLEvent QCLKernel::run(const QCLEventList &after)
Q_D(const QCLKernel);
cl_event event;
cl_int error = clEnqueueNDRangeKernel
- (d->context->activeQueue(), d->id, d->globalWorkSize.dimensions(),
+ (d->context->activeQueue(), m_kernelId, d->globalWorkSize.dimensions(),
0, d->globalWorkSize.sizes(),
(d->localWorkSize.width() ? d->localWorkSize.sizes() : 0),
after.size(), after.eventData(), &event);
- context()->reportError("QCLKernel::run:", error);
+ d->context->reportError("QCLKernel::run:", error);
if (error != CL_SUCCESS)
return QCLEvent();
else
diff --git a/src/opencl/qclkernel.h b/src/opencl/qclkernel.h
index f0721c0..249e880 100644
--- a/src/opencl/qclkernel.h
+++ b/src/opencl/qclkernel.h
@@ -45,9 +45,14 @@
#include "qclglobal.h"
#include "qclevent.h"
#include "qclworksize.h"
+#include "qclmemoryobject.h"
#include <QtCore/qstring.h>
#include <QtCore/qscopedpointer.h>
#include <QtCore/qtconcurrentrun.h>
+#include <QtCore/qpoint.h>
+#include <QtGui/qvector2d.h>
+#include <QtGui/qvector3d.h>
+#include <QtGui/qvector4d.h>
QT_BEGIN_HEADER
@@ -61,12 +66,7 @@ class QCLMemoryObject;
class QCLVectorBase;
class QCLSampler;
class QCLDevice;
-class QVector2D;
-class QVector3D;
-class QVector4D;
class QMatrix4x4;
-class QPoint;
-class QPointF;
class QColor;
class QCLKernelPrivate;
@@ -279,6 +279,7 @@ public:
private:
QScopedPointer<QCLKernelPrivate> d_ptr;
+ cl_kernel m_kernelId;
Q_DECLARE_PRIVATE(QCLKernel)
};
@@ -303,6 +304,84 @@ inline void QCLKernel::setLocalWorkSize(size_t width, size_t height, size_t dept
setLocalWorkSize(QCLWorkSize(width, height, depth));
}
+inline void QCLKernel::setArg(int index, cl_int value)
+{
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+}
+
+inline void QCLKernel::setArg(int index, cl_uint value)
+{
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+}
+
+inline void QCLKernel::setArg(int index, cl_long value)
+{
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+}
+
+inline void QCLKernel::setArg(int index, cl_ulong value)
+{
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+}
+
+inline void QCLKernel::setArg(int index, float value)
+{
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+}
+
+inline void QCLKernel::setArg(int index, const QVector2D &value)
+{
+ if (sizeof(value) == (sizeof(float) * 2)) {
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+ } else {
+ float values[2] = {value.x(), value.y()};
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
+ }
+}
+
+inline void QCLKernel::setArg(int index, const QVector3D &value)
+{
+ float values[4] = {value.x(), value.y(), value.z(), 1.0f};
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
+}
+
+inline void QCLKernel::setArg(int index, const QVector4D &value)
+{
+ if (sizeof(value) == (sizeof(float) * 4)) {
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+ } else {
+ float values[4] = {value.x(), value.y(), value.z(), value.w()};
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
+ }
+}
+
+inline void QCLKernel::setArg(int index, const QPoint &value)
+{
+ cl_int values[2] = {value.x(), value.y()};
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
+}
+
+inline void QCLKernel::setArg(int index, const QPointF &value)
+{
+ if (sizeof(value) == (sizeof(float) * 2)) {
+ clSetKernelArg(m_kernelId, index, sizeof(value), &value);
+ } else {
+ float values[2] = {value.x(), value.y()};
+ clSetKernelArg(m_kernelId, index, sizeof(values), values);
+ }
+}
+
+inline void QCLKernel::setArg(int index, const QCLMemoryObject &value)
+{
+ cl_mem id = value.memoryId();
+ clSetKernelArg(m_kernelId, index, sizeof(id), &id);
+}
+
+inline void QCLKernel::setArg(int index, const void *data, size_t size)
+{
+ clSetKernelArg(m_kernelId, index, size, data);
+}
+
#ifndef QT_NO_CONCURRENT
// Convenience function definitions that make it possible to say