summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRhys Weatherley <rhys.weatherley@nokia.com>2010-06-10 08:08:40 +1000
committerRhys Weatherley <rhys.weatherley@nokia.com>2010-06-10 08:08:40 +1000
commit5a09f29da57f415ef8d776e497fd571c65a76e25 (patch)
tree38a57cde4e7603ba136478353f9286b41f9a656c
parent8d2ac078b7a867c5232a2c4d1b9baa762b360f59 (diff)
parent81bf568b6fa1f023bbbccee381a6deeb413578bf (diff)
Merge branch 'master' of scm.dev.nokia.troll.no:research/opencl
-rw-r--r--src/opencl/opencl.pro3
-rw-r--r--src/opencl/qcldevice.cpp6
-rw-r--r--src/opencl/qcldevice.h2
-rw-r--r--src/opencl/qclevent.cpp30
-rw-r--r--src/opencl/qclevent.h1
-rw-r--r--src/opencl/qclplatform.cpp2
-rw-r--r--src/opencl/qclvector.cpp70
-rw-r--r--src/opencl/qclvector.h40
-rw-r--r--src/opencl/qclworksize.cpp (renamed from src/opencl/qclworksize.qdoc)69
-rw-r--r--src/opencl/qclworksize.h14
-rw-r--r--src/openclgl/qclcontextgl.cpp2
-rw-r--r--tests/auto/qcl/tst_qcl.cpp30
-rw-r--r--util/clinfo/clinfo.cpp3
13 files changed, 125 insertions, 147 deletions
diff --git a/src/opencl/opencl.pro b/src/opencl/opencl.pro
index c7dd01e..00f4c0e 100644
--- a/src/opencl/opencl.pro
+++ b/src/opencl/opencl.pro
@@ -56,7 +56,8 @@ SOURCES += \
qclplatform.cpp \
qclprogram.cpp \
qclsampler.cpp \
- qclvector.cpp
+ qclvector.cpp \
+ qclworksize.cpp
HEADERS += $$PRIVATE_HEADERS
DEFINES += QT_BUILD_CL_LIB
diff --git a/src/opencl/qcldevice.cpp b/src/opencl/qcldevice.cpp
index 4c138b1..fe120c9 100644
--- a/src/opencl/qcldevice.cpp
+++ b/src/opencl/qcldevice.cpp
@@ -361,9 +361,9 @@ QCLWorkSize QCLDevice::maximumWorkItemSize() const
\sa maximumWorkItemSize()
*/
-int QCLDevice::maximumWorkItemsPerGroup() const
+size_t QCLDevice::maximumWorkItemsPerGroup() const
{
- return int(qt_cl_paramSize(m_id, CL_DEVICE_MAX_WORK_GROUP_SIZE));
+ return qt_cl_paramSize(m_id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
}
/*!
@@ -872,7 +872,7 @@ bool qt_cl_has_extension(const char *list, size_t listLen, const char *name)
}
/*!
- Returns this if this device has an extension called \a name;
+ Returns true if this device has an extension called \a name;
false otherwise.
This function is more efficient than checking for \a name
diff --git a/src/opencl/qcldevice.h b/src/opencl/qcldevice.h
index 335114b..241496e 100644
--- a/src/opencl/qcldevice.h
+++ b/src/opencl/qcldevice.h
@@ -87,7 +87,7 @@ public:
QSysInfo::Endian byteOrder() const;
QCLWorkSize maximumWorkItemSize() const;
- int maximumWorkItemsPerGroup() const;
+ size_t maximumWorkItemsPerGroup() const;
bool hasImage2D() const;
bool hasImage3D() const;
diff --git a/src/opencl/qclevent.cpp b/src/opencl/qclevent.cpp
index f751186..e7d7d1e 100644
--- a/src/opencl/qclevent.cpp
+++ b/src/opencl/qclevent.cpp
@@ -236,6 +236,22 @@ cl_int QCLEvent::status() const
}
/*!
+ Returns the type of command that generated this event.
+*/
+cl_command_type QCLEvent::commandType() const
+{
+ if (!m_id)
+ return 0;
+ cl_command_type type;
+ cl_int error = clGetEventInfo
+ (m_id, CL_EVENT_COMMAND_TYPE, sizeof(type), &type, 0);
+ if (error != CL_SUCCESS)
+ return 0;
+ else
+ return type;
+}
+
+/*!
Waits for this event to be signalled as finished. The calling thread
is blocked until the event is signalled. This function returns
immediately if the event is null.
@@ -426,18 +442,8 @@ QDebug operator<<(QDebug dbg, const QCLEvent &event)
dbg << "QCLEvent()";
return dbg;
}
- cl_command_type command;
- cl_int status;
- if (clGetEventInfo(id, CL_EVENT_COMMAND_TYPE,
- sizeof(command), &command, 0) != CL_SUCCESS) {
- dbg << "QCLEvent(invalid)";
- return dbg;
- }
- if (clGetEventInfo(id, CL_EVENT_COMMAND_EXECUTION_STATUS,
- sizeof(status), &status, 0) != CL_SUCCESS) {
- dbg << "QCLEvent(invalid)";
- return dbg;
- }
+ cl_command_type command = event.commandType();
+ cl_int status = event.status();
const char *commandName;
switch (command) {
case CL_COMMAND_NDRANGE_KERNEL:
diff --git a/src/opencl/qclevent.h b/src/opencl/qclevent.h
index ce6da59..3755846 100644
--- a/src/opencl/qclevent.h
+++ b/src/opencl/qclevent.h
@@ -73,6 +73,7 @@ public:
bool isErrored() const { return status() < 0; }
cl_int status() const;
+ cl_command_type commandType() const;
void waitForFinished();
diff --git a/src/opencl/qclplatform.cpp b/src/opencl/qclplatform.cpp
index 742e1f7..3739f0e 100644
--- a/src/opencl/qclplatform.cpp
+++ b/src/opencl/qclplatform.cpp
@@ -214,7 +214,7 @@ QStringList QCLPlatform::extensions() const
bool qt_cl_has_extension(const char *list, size_t listLen, const char *name);
/*!
- Returns this if this platform has an extension called \a name;
+ Returns true if this platform has an extension called \a name;
false otherwise.
This function is more efficient than checking for \a name
diff --git a/src/opencl/qclvector.cpp b/src/opencl/qclvector.cpp
index b53c3da..10b60c0 100644
--- a/src/opencl/qclvector.cpp
+++ b/src/opencl/qclvector.cpp
@@ -55,11 +55,10 @@ QT_BEGIN_NAMESPACE
OpenCL buffer object to make it appear as a host-accessible array
of elements of type T.
- Whenever the host CPU calls operator[]() or map(), the
- array's contents are mapped into host-accessible memory for
- direct access. When the host calls unmap() or sets the vector
- on a QCLKernel as an argument, the data is copied back to the
- OpenCL compute device (e.g., the GPU).
+ Whenever the host CPU calls operator[](), the array's contents
+ are copied into host-accessible memory for direct access. When the
+ host sets the vector on a QCLKernel as an argument, the data is
+ copied back to the OpenCL compute device (e.g., the GPU).
The type T is restricted to primitive and movable types that do
not require explicit construction, destruction, or operator=().
@@ -390,60 +389,22 @@ cl_mem QCLVectorBase::kernelArg() const
\fn T &QCLVector::operator[](int index)
Returns a reference to the element at \a index in this OpenCL vector.
- The vector will be mapped into host memory if necessary.
-
- \sa map()
+ The vector will be copied to host memory if necessary.
*/
/*!
\fn const T &QCLVector::operator[](int index) const
Returns a const reference to the element at \a index in this
- OpenCL vector. The vector will be mapped into host memory
+ OpenCL vector. The vector will be copied to host memory
if necessary.
-
- \sa map()
-*/
-
-/*!
- \fn void QCLVector::map()
-
- Maps this OpenCL vector into the host CPU's address space so that
- its contents can be read or written. Once the host CPU no longer
- needs to access the vector's contents, it should call unmap().
-
- This function does nothing if the vector is already mapped.
-
- The vector will be implicitly unmapped if it is set on a QCLKernel
- as an argument.
-
- \sa unmap(), isMapped()
-*/
-
-/*!
- \fn void QCLVector::unmap()
-
- Unmaps this OpenCL vector from the host CPU's address space.
- The data can then be accessed by a QCLKernel running on the
- OpenCL compute device.
-
- \sa map(), isMapped()
-*/
-
-/*!
- \fn bool QCLVector::isMapped() const
-
- Returns true if this vector is mapped into the host CPU's
- address space; false otherwise.
-
- \sa map(), unmap()
*/
/*!
\fn void QCLVector::read(T *data, int count, int offset)
Reads the \a count elements starting \a offset in this vector
- into \a data. The vector does not need to be mapped.
+ into \a data.
\sa write()
*/
@@ -452,7 +413,6 @@ cl_mem QCLVectorBase::kernelArg() const
\fn void QCLVector::write(const T *data, int count, int offset)
Writes the \a count elements from \a data to \a offset in this vector.
- The vector does not need to be mapped.
\sa read()
*/
@@ -462,22 +422,6 @@ cl_mem QCLVectorBase::kernelArg() const
\overload
Writes the contents of \a data to \a offset in this vector.
- The vector does not need to be mapped.
-*/
-
-/*!
- \fn T *QCLVector::data() const
-
- Returns a pointer to the first element in the vector.
- The vector will be mapped into host memory if necessary.
-
- \sa map()
-*/
-
-/*!
- \fn cl_mem QCLVector::memoryId() const
-
- Returns the native OpenCL memory buffer identifier for this vector.
*/
/*!
diff --git a/src/opencl/qclvector.h b/src/opencl/qclvector.h
index cd95d80..1864e85 100644
--- a/src/opencl/qclvector.h
+++ b/src/opencl/qclvector.h
@@ -107,19 +107,11 @@ public:
T &operator[](int index);
const T &operator[](int index) const;
- void map();
- void unmap();
- bool isMapped() const;
-
void read(T *data, int count, int offset = 0);
void write(const T *data, int count, int offset = 0);
void write(const QVector<T> &data, int offset = 0);
- T *data() const;
-
- cl_mem memoryId() const;
QCLContext *context() const;
-
QCLBuffer toBuffer() const;
private:
@@ -189,24 +181,6 @@ Q_INLINE_TEMPLATE const T &QCLVector<T>::operator[](int index) const
}
template <typename T>
-Q_INLINE_TEMPLATE void QCLVector<T>::map()
-{
- QCLVectorBase::map();
-}
-
-template <typename T>
-Q_INLINE_TEMPLATE void QCLVector<T>::unmap()
-{
- QCLVectorBase::unmap();
-}
-
-template <typename T>
-Q_INLINE_TEMPLATE bool QCLVector<T>::isMapped() const
-{
- return m_mapped != 0;
-}
-
-template <typename T>
Q_INLINE_TEMPLATE void QCLVector<T>::write
(const T *data, int count, int offset)
{
@@ -230,20 +204,6 @@ Q_INLINE_TEMPLATE void QCLVector<T>::write
}
template <typename T>
-Q_INLINE_TEMPLATE T *QCLVector<T>::data() const
-{
- if (!m_mapped)
- map();
- return reinterpret_cast<T *>(m_mapped);
-}
-
-template <typename T>
-Q_INLINE_TEMPLATE cl_mem QCLVector<T>::memoryId() const
-{
- return QCLVectorBase::memoryId();
-}
-
-template <typename T>
Q_INLINE_TEMPLATE QCLContext *QCLVector<T>::context() const
{
return QCLVectorBase::context();
diff --git a/src/opencl/qclworksize.qdoc b/src/opencl/qclworksize.cpp
index 122694d..79a351a 100644
--- a/src/opencl/qclworksize.qdoc
+++ b/src/opencl/qclworksize.cpp
@@ -39,6 +39,11 @@
**
****************************************************************************/
+#include "qclworksize.h"
+#include "qcldevice.h"
+
+QT_BEGIN_NAMESPACE
+
/*!
\class QCLWorkSize
\brief The QCLWorkSize class defines the size of an item of work for an OpenCL kernel.
@@ -84,18 +89,21 @@
\fn QCLWorkSize::QCLWorkSize(size_t size)
Constructs a single-dimensional work size with width() set to \a size.
+ The height() and depth() will be set to 1.
*/
/*!
\fn QCLWorkSize::QCLWorkSize(size_t width, size_t height)
Constructs a two-dimensional work size of \a width x \a height.
+ The depth() will be set to 1.
*/
/*!
\fn QCLWorkSize::QCLWorkSize(const QSize &size)
Constructs a two-dimensional work size set to \a size.
+ The depth() will be set to 1.
*/
/*!
@@ -152,3 +160,64 @@
\sa operator==()
*/
+
+static size_t qt_gcd_of_size(size_t x, size_t y)
+{
+ size_t remainder;
+ while ((remainder = x % y) != 0) {
+ x = y;
+ y = remainder;
+ }
+ return y;
+}
+
+/*!
+ Returns the best-fit local work size that evenly divides this work
+ size and fits within the maximums defined by \a maxWorkItemSize
+ and \a maxItemsPerGroup.
+
+ This function is typically used to convert an arbitrary global
+ work size on a QCLKernel into a compatible local work size.
+
+ \sa QCLKernel::setLocalWorkSize()
+*/
+QCLWorkSize QCLWorkSize::toLocalWorkSize
+ (const QCLWorkSize &maxWorkItemSize, size_t maxItemsPerGroup) const
+{
+ // Adjust for the maximum work item size in each dimension.
+ size_t width = m_dim >= 1 ? qt_gcd_of_size(m_sizes[0], maxWorkItemSize.width()) : 1;
+ size_t height = m_dim >= 2 ? qt_gcd_of_size(m_sizes[1], maxWorkItemSize.height()) : 1;
+ size_t depth = m_dim >= 3 ? qt_gcd_of_size(m_sizes[2], maxWorkItemSize.depth()) : 1;
+
+ // Reduce in size by a factor of 2 until underneath the maximum group size.
+ while (maxItemsPerGroup && (width * height * depth) > maxItemsPerGroup) {
+ width = (width > 1) ? (width / 2) : 1;
+ height = (height > 1) ? (height / 2) : 1;
+ depth = (depth > 1) ? (depth / 2) : 1;
+ }
+
+ // Return the final result.
+ if (m_dim >= 3)
+ return QCLWorkSize(width, height, depth);
+ else if (m_dim >= 2)
+ return QCLWorkSize(width, height);
+ else
+ return QCLWorkSize(width);
+}
+
+/*!
+ Returns the best-fit local work size that evenly divides this
+ work size and fits within the maximum work group size of \a device.
+
+ This function is typically used to convert an arbitrary global
+ work size on a QCLKernel into a compatible local work size.
+
+ \sa QCLKernel::setLocalWorkSize()
+*/
+QCLWorkSize QCLWorkSize::toLocalWorkSize(const QCLDevice &device) const
+{
+ return toLocalWorkSize(device.maximumWorkItemSize(),
+ device.maximumWorkItemsPerGroup());
+}
+
+QT_END_NAMESPACE
diff --git a/src/opencl/qclworksize.h b/src/opencl/qclworksize.h
index 719338c..0c5541e 100644
--- a/src/opencl/qclworksize.h
+++ b/src/opencl/qclworksize.h
@@ -51,17 +51,19 @@ QT_BEGIN_NAMESPACE
QT_MODULE(CL)
+class QCLDevice;
+
class Q_CL_EXPORT QCLWorkSize
{
public:
QCLWorkSize()
- : m_dim(1) { m_sizes[0] = 1; m_sizes[1] = 0; m_sizes[2] = 0; }
+ : m_dim(1) { m_sizes[0] = 1; m_sizes[1] = 1; m_sizes[2] = 1; }
QCLWorkSize(size_t size)
- : m_dim(1) { m_sizes[0] = size; m_sizes[1] = 0; m_sizes[2] = 0; }
+ : m_dim(1) { m_sizes[0] = size; m_sizes[1] = 1; m_sizes[2] = 1; }
QCLWorkSize(size_t width, size_t height)
- : m_dim(2) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = 0; }
+ : m_dim(2) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = 1; }
QCLWorkSize(const QSize &size)
- : m_dim(2) { m_sizes[0] = size.width(); m_sizes[1] = size.height(); m_sizes[2] = 0; }
+ : m_dim(2) { m_sizes[0] = size.width(); m_sizes[1] = size.height(); m_sizes[2] = 1; }
QCLWorkSize(size_t width, size_t height, size_t depth)
: m_dim(3)
{ m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = depth; }
@@ -76,6 +78,10 @@ public:
bool operator==(const QCLWorkSize &other) const;
bool operator!=(const QCLWorkSize &other) const;
+ QCLWorkSize toLocalWorkSize
+ (const QCLWorkSize &maxWorkItemSize, size_t maxItemsPerGroup) const;
+ QCLWorkSize toLocalWorkSize(const QCLDevice &device) const;
+
private:
size_t m_dim;
size_t m_sizes[3];
diff --git a/src/openclgl/qclcontextgl.cpp b/src/openclgl/qclcontextgl.cpp
index d6235e2..53bcbb5 100644
--- a/src/openclgl/qclcontextgl.cpp
+++ b/src/openclgl/qclcontextgl.cpp
@@ -225,7 +225,7 @@ bool QCLContextGL::create(const QCLPlatform &platform)
(q_PFNCLGETGLCONTEXTINFOKHR)clGetExtensionFunctionAddress
("clGetGLContextInfoKHR");
if (getGLContextInfo && hasSharing) {
- cl_uint size;
+ size_t size;
cl_device_id currentDev;
if(getGLContextInfo(properties.data(),
CL_DEVICES_FOR_GL_CONTEXT_KHR,
diff --git a/tests/auto/qcl/tst_qcl.cpp b/tests/auto/qcl/tst_qcl.cpp
index 066ff88..c0f3c4e 100644
--- a/tests/auto/qcl/tst_qcl.cpp
+++ b/tests/auto/qcl/tst_qcl.cpp
@@ -377,46 +377,37 @@ void tst_QCL::vectorBuffer()
QVERIFY(vector1.isNull());
QVERIFY(vector1.isEmpty());
QCOMPARE(vector1.size(), 0);
- QVERIFY(vector1.memoryId() == 0);
+ QVERIFY(vector1.toBuffer().memoryId() == 0);
QVERIFY(vector1.context() == 0);
- QVERIFY(!vector1.isMapped());
vector1 = context.createVector<float>(100);
QVERIFY(!vector1.isNull());
QVERIFY(!vector1.isEmpty());
QCOMPARE(vector1.size(), 100);
- QVERIFY(vector1.memoryId() != 0);
+ QVERIFY(vector1.toBuffer().memoryId() != 0);
QVERIFY(vector1.context() == &context);
- QVERIFY(!vector1.isMapped());
for (int index = 0; index < 100; ++index)
vector1[index] = float(index);
- QVERIFY(vector1.isMapped());
- vector1.unmap();
- QVERIFY(!vector1.isMapped());
for (int index = 0; index < 100; ++index)
QCOMPARE(vector1[index], float(index));
- QVERIFY(vector1.isMapped());
QCLKernel addToVector = program.createKernel("addToVector");
addToVector.setGlobalWorkSize(vector1.size());
addToVector(vector1, 42.0f);
- QVERIFY(!vector1.isMapped());
for (int index = 0; index < 100; ++index) {
QCOMPARE(constVectorAt(vector1, index), float(index + 42));
QCOMPARE(vector1[index], float(index + 42));
}
- QVERIFY(vector1.isMapped());
vector1.release();
QVERIFY(vector1.isNull());
QVERIFY(vector1.isEmpty());
QCOMPARE(vector1.size(), 0);
- QVERIFY(vector1.memoryId() == 0);
+ QVERIFY(vector1.toBuffer().memoryId() == 0);
QVERIFY(vector1.context() == 0);
- QVERIFY(!vector1.isMapped());
}
void tst_QCL::eventProfiling()
@@ -429,7 +420,6 @@ void tst_QCL::eventProfiling()
QCLVector<float> vector1 = context.createVector<float>(20000);
for (int index = 0; index < vector1.size(); ++index)
vector1[index] = float(index);
- vector1.unmap();
QCLKernel addToVector = program.createKernel("addToVector");
addToVector.setGlobalWorkSize(vector1.size());
@@ -531,26 +521,26 @@ void tst_QCL::workSize()
QCLWorkSize size;
QVERIFY(size.dimensions() == 1);
QVERIFY(size.width() == 1);
- QVERIFY(size.height() == 0);
- QVERIFY(size.depth() == 0);
+ QVERIFY(size.height() == 1);
+ QVERIFY(size.depth() == 1);
QCLWorkSize size1(42);
QVERIFY(size1.dimensions() == 1);
QVERIFY(size1.width() == 42);
- QVERIFY(size1.height() == 0);
- QVERIFY(size1.depth() == 0);
+ QVERIFY(size1.height() == 1);
+ QVERIFY(size1.depth() == 1);
QCLWorkSize size2(42, 63);
QVERIFY(size2.dimensions() == 2);
QVERIFY(size2.width() == 42);
QVERIFY(size2.height() == 63);
- QVERIFY(size2.depth() == 0);
+ QVERIFY(size2.depth() == 1);
QCLWorkSize size2b(QSize(63, 42));
QVERIFY(size2b.dimensions() == 2);
QVERIFY(size2b.width() == 63);
QVERIFY(size2b.height() == 42);
- QVERIFY(size2b.depth() == 0);
+ QVERIFY(size2b.depth() == 1);
QCLWorkSize size3(42, 63, 12);
QVERIFY(size3.dimensions() == 3);
@@ -577,7 +567,7 @@ void tst_QCL::workSize()
QVERIFY(size4.dimensions() == 2);
QVERIFY(size4.width() == 42);
QVERIFY(size4.height() == 63);
- QVERIFY(size4.depth() == 0);
+ QVERIFY(size4.depth() == 1);
QVERIFY(size4.width() == size4.sizes()[0]);
QVERIFY(size4.height() == size4.sizes()[1]);
diff --git a/util/clinfo/clinfo.cpp b/util/clinfo/clinfo.cpp
index 73443cd..aa7519a 100644
--- a/util/clinfo/clinfo.cpp
+++ b/util/clinfo/clinfo.cpp
@@ -100,7 +100,8 @@ int main(int argc, char *argv[])
printf(" Max Work Size : %ux%ux%u\n",
uint(size.width()), uint(size.height()),
uint(size.depth()));
- printf(" Max Items/Group : %d\n", dev.maximumWorkItemsPerGroup());
+ printf(" Max Items/Group : %u\n",
+ uint(dev.maximumWorkItemsPerGroup()));
printf(" Local Memory : ");
printMemorySize(dev.localMemorySize());
printf(" Global Memory : ");