Merge branch 'master' of scm.dev.nokia.troll.no:research/opencl

author: Rhys Weatherley <rhys.weatherley@nokia.com> 2010-06-10 08:08:40 +1000
committer: Rhys Weatherley <rhys.weatherley@nokia.com> 2010-06-10 08:08:40 +1000
commit: 5a09f29da57f415ef8d776e497fd571c65a76e25 (patch)
tree: 38a57cde4e7603ba136478353f9286b41f9a656c
parent: 8d2ac078b7a867c5232a2c4d1b9baa762b360f59 (diff)
parent: 81bf568b6fa1f023bbbccee381a6deeb413578bf (diff)
13 files changed, 125 insertions, 147 deletions
diff --git a/src/opencl/opencl.pro b/src/opencl/opencl.pro
index c7dd01e..00f4c0e 100644
--- a/src/opencl/opencl.pro
+++ b/src/opencl/opencl.pro
@@ -56,7 +56,8 @@ SOURCES += \
     qclplatform.cpp \
     qclprogram.cpp \
     qclsampler.cpp \
-    qclvector.cpp
+    qclvector.cpp \
+    qclworksize.cpp
 
 HEADERS += $$PRIVATE_HEADERS
 DEFINES += QT_BUILD_CL_LIB
diff --git a/src/opencl/qcldevice.cpp b/src/opencl/qcldevice.cpp
index 4c138b1..fe120c9 100644
--- a/src/opencl/qcldevice.cpp
+++ b/src/opencl/qcldevice.cpp
@@ -361,9 +361,9 @@ QCLWorkSize QCLDevice::maximumWorkItemSize() const
 
     \sa maximumWorkItemSize()
 */
-int QCLDevice::maximumWorkItemsPerGroup() const
+size_t QCLDevice::maximumWorkItemsPerGroup() const
 {
-    return int(qt_cl_paramSize(m_id, CL_DEVICE_MAX_WORK_GROUP_SIZE));
+    return qt_cl_paramSize(m_id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
 }
 
 /*!
@@ -872,7 +872,7 @@ bool qt_cl_has_extension(const char *list, size_t listLen, const char *name)
 }
 
 /*!
-    Returns this if this device has an extension called \a name;
+    Returns true if this device has an extension called \a name;
     false otherwise.
 
     This function is more efficient than checking for \a name
diff --git a/src/opencl/qcldevice.h b/src/opencl/qcldevice.h
index 335114b..241496e 100644
--- a/src/opencl/qcldevice.h
+++ b/src/opencl/qcldevice.h
@@ -87,7 +87,7 @@ public:
     QSysInfo::Endian byteOrder() const;
 
     QCLWorkSize maximumWorkItemSize() const;
-    int maximumWorkItemsPerGroup() const;
+    size_t maximumWorkItemsPerGroup() const;
 
     bool hasImage2D() const;
     bool hasImage3D() const;
diff --git a/src/opencl/qclevent.cpp b/src/opencl/qclevent.cpp
index f751186..e7d7d1e 100644
--- a/src/opencl/qclevent.cpp
+++ b/src/opencl/qclevent.cpp
@@ -236,6 +236,22 @@ cl_int QCLEvent::status() const
 }
 
 /*!
+    Returns the type of command that generated this event.
+*/
+cl_command_type QCLEvent::commandType() const
+{
+    if (!m_id)
+        return 0;
+    cl_command_type type;
+    cl_int error = clGetEventInfo
+        (m_id, CL_EVENT_COMMAND_TYPE, sizeof(type), &type, 0);
+    if (error != CL_SUCCESS)
+        return 0;
+    else
+        return type;
+}
+
+/*!
     Waits for this event to be signalled as finished.  The calling thread
     is blocked until the event is signalled.  This function returns
     immediately if the event is null.
@@ -426,18 +442,8 @@ QDebug operator<<(QDebug dbg, const QCLEvent &event)
         dbg << "QCLEvent()";
         return dbg;
     }
-    cl_command_type command;
-    cl_int status;
-    if (clGetEventInfo(id, CL_EVENT_COMMAND_TYPE,
-                       sizeof(command), &command, 0) != CL_SUCCESS) {
-        dbg << "QCLEvent(invalid)";
-        return dbg;
-    }
-    if (clGetEventInfo(id, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                       sizeof(status), &status, 0) != CL_SUCCESS) {
-        dbg << "QCLEvent(invalid)";
-        return dbg;
-    }
+    cl_command_type command = event.commandType();
+    cl_int status = event.status();
     const char *commandName;
     switch (command) {
     case CL_COMMAND_NDRANGE_KERNEL:
diff --git a/src/opencl/qclevent.h b/src/opencl/qclevent.h
index ce6da59..3755846 100644
--- a/src/opencl/qclevent.h
+++ b/src/opencl/qclevent.h
@@ -73,6 +73,7 @@ public:
     bool isErrored() const { return status() < 0; }
 
     cl_int status() const;
+    cl_command_type commandType() const;
 
     void waitForFinished();
 
diff --git a/src/opencl/qclplatform.cpp b/src/opencl/qclplatform.cpp
index 742e1f7..3739f0e 100644
--- a/src/opencl/qclplatform.cpp
+++ b/src/opencl/qclplatform.cpp
@@ -214,7 +214,7 @@ QStringList QCLPlatform::extensions() const
 bool qt_cl_has_extension(const char *list, size_t listLen, const char *name);
 
 /*!
-    Returns this if this platform has an extension called \a name;
+    Returns true if this platform has an extension called \a name;
     false otherwise.
 
     This function is more efficient than checking for \a name
diff --git a/src/opencl/qclvector.cpp b/src/opencl/qclvector.cpp
index b53c3da..10b60c0 100644
--- a/src/opencl/qclvector.cpp
+++ b/src/opencl/qclvector.cpp
@@ -55,11 +55,10 @@ QT_BEGIN_NAMESPACE
     OpenCL buffer object to make it appear as a host-accessible array
     of elements of type T.
 
-    Whenever the host CPU calls operator[]() or map(), the
-    array's contents are mapped into host-accessible memory for
-    direct access.  When the host calls unmap() or sets the vector
-    on a QCLKernel as an argument, the data is copied back to the
-    OpenCL compute device (e.g., the GPU).
+    Whenever the host CPU calls operator[](), the array's contents
+    are copied into host-accessible memory for direct access.  When the
+    host sets the vector on a QCLKernel as an argument, the data is
+    copied back to the OpenCL compute device (e.g., the GPU).
 
     The type T is restricted to primitive and movable types that do
     not require explicit construction, destruction, or operator=().
@@ -390,60 +389,22 @@ cl_mem QCLVectorBase::kernelArg() const
     \fn T &QCLVector::operator[](int index)
 
     Returns a reference to the element at \a index in this OpenCL vector.
-    The vector will be mapped into host memory if necessary.
-
-    \sa map()
+    The vector will be copied to host memory if necessary.
 */
 
 /*!
     \fn const T &QCLVector::operator[](int index) const
 
     Returns a const reference to the element at \a index in this
-    OpenCL vector.  The vector will be mapped into host memory
+    OpenCL vector.  The vector will be copied to host memory
     if necessary.
-
-    \sa map()
-*/
-
-/*!
-    \fn void QCLVector::map()
-
-    Maps this OpenCL vector into the host CPU's address space so that
-    its contents can be read or written.  Once the host CPU no longer
-    needs to access the vector's contents, it should call unmap().
-
-    This function does nothing if the vector is already mapped.
-
-    The vector will be implicitly unmapped if it is set on a QCLKernel
-    as an argument.
-
-    \sa unmap(), isMapped()
-*/
-
-/*!
-    \fn void QCLVector::unmap()
-
-    Unmaps this OpenCL vector from the host CPU's address space.
-    The data can then be accessed by a QCLKernel running on the
-    OpenCL compute device.
-
-    \sa map(), isMapped()
-*/
-
-/*!
-    \fn bool QCLVector::isMapped() const
-
-    Returns true if this vector is mapped into the host CPU's
-    address space; false otherwise.
-
-    \sa map(), unmap()
 */
 
 /*!
     \fn void QCLVector::read(T *data, int count, int offset)
 
     Reads the \a count elements starting \a offset in this vector
-    into \a data.  The vector does not need to be mapped.
+    into \a data.
 
     \sa write()
 */
@@ -452,7 +413,6 @@ cl_mem QCLVectorBase::kernelArg() const
     \fn void QCLVector::write(const T *data, int count, int offset)
 
     Writes the \a count elements from \a data to \a offset in this vector.
-    The vector does not need to be mapped.
 
     \sa read()
 */
@@ -462,22 +422,6 @@ cl_mem QCLVectorBase::kernelArg() const
     \overload
 
     Writes the contents of \a data to \a offset in this vector.
-    The vector does not need to be mapped.
-*/
-
-/*!
-    \fn T *QCLVector::data() const
-
-    Returns a pointer to the first element in the vector.
-    The vector will be mapped into host memory if necessary.
-
-    \sa map()
-*/
-
-/*!
-    \fn cl_mem QCLVector::memoryId() const
-
-    Returns the native OpenCL memory buffer identifier for this vector.
 */
 
 /*!
diff --git a/src/opencl/qclvector.h b/src/opencl/qclvector.h
index cd95d80..1864e85 100644
--- a/src/opencl/qclvector.h
+++ b/src/opencl/qclvector.h
@@ -107,19 +107,11 @@ public:
     T &operator[](int index);
     const T &operator[](int index) const;
 
-    void map();
-    void unmap();
-    bool isMapped() const;
-
     void read(T *data, int count, int offset = 0);
     void write(const T *data, int count, int offset = 0);
     void write(const QVector<T> &data, int offset = 0);
 
-    T *data() const;
-
-    cl_mem memoryId() const;
     QCLContext *context() const;
-
     QCLBuffer toBuffer() const;
 
 private:
@@ -189,24 +181,6 @@ Q_INLINE_TEMPLATE const T &QCLVector<T>::operator[](int index) const
 }
 
 template <typename T>
-Q_INLINE_TEMPLATE void QCLVector<T>::map()
-{
-    QCLVectorBase::map();
-}
-
-template <typename T>
-Q_INLINE_TEMPLATE void QCLVector<T>::unmap()
-{
-    QCLVectorBase::unmap();
-}
-
-template <typename T>
-Q_INLINE_TEMPLATE bool QCLVector<T>::isMapped() const
-{
-    return m_mapped != 0;
-}
-
-template <typename T>
 Q_INLINE_TEMPLATE void QCLVector<T>::write
     (const T *data, int count, int offset)
 {
@@ -230,20 +204,6 @@ Q_INLINE_TEMPLATE void QCLVector<T>::write
 }
 
 template <typename T>
-Q_INLINE_TEMPLATE T *QCLVector<T>::data() const
-{
-    if (!m_mapped)
-        map();
-    return reinterpret_cast<T *>(m_mapped);
-}
-
-template <typename T>
-Q_INLINE_TEMPLATE cl_mem QCLVector<T>::memoryId() const
-{
-    return QCLVectorBase::memoryId();
-}
-
-template <typename T>
 Q_INLINE_TEMPLATE QCLContext *QCLVector<T>::context() const
 {
     return QCLVectorBase::context();
diff --git a/src/opencl/qclworksize.qdoc b/src/opencl/qclworksize.cpp
index 122694d..79a351a 100644
--- a/src/opencl/qclworksize.qdoc
+++ b/src/opencl/qclworksize.cpp
@@ -39,6 +39,11 @@
 **
 ****************************************************************************/
 
+#include "qclworksize.h"
+#include "qcldevice.h"
+
+QT_BEGIN_NAMESPACE
+
 /*!
     \class QCLWorkSize
     \brief The QCLWorkSize class defines the size of an item of work for an OpenCL kernel.
@@ -84,18 +89,21 @@
     \fn QCLWorkSize::QCLWorkSize(size_t size)
 
     Constructs a single-dimensional work size with width() set to \a size.
+    The height() and depth() will be set to 1.
 */
 
 /*!
     \fn QCLWorkSize::QCLWorkSize(size_t width, size_t height)
 
     Constructs a two-dimensional work size of \a width x \a height.
+    The depth() will be set to 1.
 */
 
 /*!
     \fn QCLWorkSize::QCLWorkSize(const QSize &size)
 
     Constructs a two-dimensional work size set to \a size.
+    The depth() will be set to 1.
 */
 
 /*!
@@ -152,3 +160,64 @@
 
     \sa operator==()
 */
+
+static size_t qt_gcd_of_size(size_t x, size_t y)
+{
+    size_t remainder;
+    while ((remainder = x % y) != 0) {
+        x = y;
+        y = remainder;
+    }
+    return y;
+}
+
+/*!
+    Returns the best-fit local work size that evenly divides this work
+    size and fits within the maximums defined by \a maxWorkItemSize
+    and \a maxItemsPerGroup.
+
+    This function is typically used to convert an arbitrary global
+    work size on a QCLKernel into a compatible local work size.
+
+    \sa QCLKernel::setLocalWorkSize()
+*/
+QCLWorkSize QCLWorkSize::toLocalWorkSize
+    (const QCLWorkSize &maxWorkItemSize, size_t maxItemsPerGroup) const
+{
+    // Adjust for the maximum work item size in each dimension.
+    size_t width = m_dim >= 1 ? qt_gcd_of_size(m_sizes[0], maxWorkItemSize.width()) : 1;
+    size_t height = m_dim >= 2 ? qt_gcd_of_size(m_sizes[1], maxWorkItemSize.height()) : 1;
+    size_t depth = m_dim >= 3 ? qt_gcd_of_size(m_sizes[2], maxWorkItemSize.depth()) : 1;
+
+    // Reduce in size by a factor of 2 until underneath the maximum group size.
+    while (maxItemsPerGroup && (width * height * depth) > maxItemsPerGroup) {
+        width = (width > 1) ? (width / 2) : 1;
+        height = (height > 1) ? (height / 2) : 1;
+        depth = (depth > 1) ? (depth / 2) : 1;
+    }
+
+    // Return the final result.
+    if (m_dim >= 3)
+        return QCLWorkSize(width, height, depth);
+    else if (m_dim >= 2)
+        return QCLWorkSize(width, height);
+    else
+        return QCLWorkSize(width);
+}
+
+/*!
+    Returns the best-fit local work size that evenly divides this
+    work size and fits within the maximum work group size of \a device.
+
+    This function is typically used to convert an arbitrary global
+    work size on a QCLKernel into a compatible local work size.
+
+    \sa QCLKernel::setLocalWorkSize()
+*/
+QCLWorkSize QCLWorkSize::toLocalWorkSize(const QCLDevice &device) const
+{
+    return toLocalWorkSize(device.maximumWorkItemSize(),
+                           device.maximumWorkItemsPerGroup());
+}
+
+QT_END_NAMESPACE
diff --git a/src/opencl/qclworksize.h b/src/opencl/qclworksize.h
index 719338c..0c5541e 100644
--- a/src/opencl/qclworksize.h
+++ b/src/opencl/qclworksize.h
@@ -51,17 +51,19 @@ QT_BEGIN_NAMESPACE
 
 QT_MODULE(CL)
 
+class QCLDevice;
+
 class Q_CL_EXPORT QCLWorkSize
 {
 public:
     QCLWorkSize()
-        : m_dim(1) { m_sizes[0] = 1; m_sizes[1] = 0; m_sizes[2] = 0; }
+        : m_dim(1) { m_sizes[0] = 1; m_sizes[1] = 1; m_sizes[2] = 1; }
     QCLWorkSize(size_t size)
-        : m_dim(1) { m_sizes[0] = size; m_sizes[1] = 0; m_sizes[2] = 0; }
+        : m_dim(1) { m_sizes[0] = size; m_sizes[1] = 1; m_sizes[2] = 1; }
     QCLWorkSize(size_t width, size_t height)
-        : m_dim(2) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = 0; }
+        : m_dim(2) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = 1; }
     QCLWorkSize(const QSize &size)
-        : m_dim(2) { m_sizes[0] = size.width(); m_sizes[1] = size.height(); m_sizes[2] = 0; }
+        : m_dim(2) { m_sizes[0] = size.width(); m_sizes[1] = size.height(); m_sizes[2] = 1; }
     QCLWorkSize(size_t width, size_t height, size_t depth)
         : m_dim(3)
         { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = depth; }
@@ -76,6 +78,10 @@ public:
     bool operator==(const QCLWorkSize &other) const;
     bool operator!=(const QCLWorkSize &other) const;
 
+    QCLWorkSize toLocalWorkSize
+        (const QCLWorkSize &maxWorkItemSize, size_t maxItemsPerGroup) const;
+    QCLWorkSize toLocalWorkSize(const QCLDevice &device) const;
+
 private:
     size_t m_dim;
     size_t m_sizes[3];
diff --git a/src/openclgl/qclcontextgl.cpp b/src/openclgl/qclcontextgl.cpp
index d6235e2..53bcbb5 100644
--- a/src/openclgl/qclcontextgl.cpp
+++ b/src/openclgl/qclcontextgl.cpp
@@ -225,7 +225,7 @@ bool QCLContextGL::create(const QCLPlatform &platform)
         (q_PFNCLGETGLCONTEXTINFOKHR)clGetExtensionFunctionAddress
             ("clGetGLContextInfoKHR");
     if (getGLContextInfo && hasSharing) {
-        cl_uint size;
+        size_t size;
         cl_device_id currentDev;
         if(getGLContextInfo(properties.data(),
                             CL_DEVICES_FOR_GL_CONTEXT_KHR,
diff --git a/tests/auto/qcl/tst_qcl.cpp b/tests/auto/qcl/tst_qcl.cpp
index 066ff88..c0f3c4e 100644
--- a/tests/auto/qcl/tst_qcl.cpp
+++ b/tests/auto/qcl/tst_qcl.cpp
@@ -377,46 +377,37 @@ void tst_QCL::vectorBuffer()
     QVERIFY(vector1.isNull());
     QVERIFY(vector1.isEmpty());
     QCOMPARE(vector1.size(), 0);
-    QVERIFY(vector1.memoryId() == 0);
+    QVERIFY(vector1.toBuffer().memoryId() == 0);
     QVERIFY(vector1.context() == 0);
-    QVERIFY(!vector1.isMapped());
 
     vector1 = context.createVector<float>(100);
     QVERIFY(!vector1.isNull());
     QVERIFY(!vector1.isEmpty());
     QCOMPARE(vector1.size(), 100);
-    QVERIFY(vector1.memoryId() != 0);
+    QVERIFY(vector1.toBuffer().memoryId() != 0);
     QVERIFY(vector1.context() == &context);
-    QVERIFY(!vector1.isMapped());
 
     for (int index = 0; index < 100; ++index)
         vector1[index] = float(index);
-    QVERIFY(vector1.isMapped());
 
-    vector1.unmap();
-    QVERIFY(!vector1.isMapped());
     for (int index = 0; index < 100; ++index)
         QCOMPARE(vector1[index], float(index));
-    QVERIFY(vector1.isMapped());
 
     QCLKernel addToVector = program.createKernel("addToVector");
     addToVector.setGlobalWorkSize(vector1.size());
     addToVector(vector1, 42.0f);
-    QVERIFY(!vector1.isMapped());
 
     for (int index = 0; index < 100; ++index) {
         QCOMPARE(constVectorAt(vector1, index), float(index + 42));
         QCOMPARE(vector1[index], float(index + 42));
     }
-    QVERIFY(vector1.isMapped());
 
     vector1.release();
     QVERIFY(vector1.isNull());
     QVERIFY(vector1.isEmpty());
     QCOMPARE(vector1.size(), 0);
-    QVERIFY(vector1.memoryId() == 0);
+    QVERIFY(vector1.toBuffer().memoryId() == 0);
     QVERIFY(vector1.context() == 0);
-    QVERIFY(!vector1.isMapped());
 }
 
 void tst_QCL::eventProfiling()
@@ -429,7 +420,6 @@ void tst_QCL::eventProfiling()
     QCLVector<float> vector1 = context.createVector<float>(20000);
     for (int index = 0; index < vector1.size(); ++index)
         vector1[index] = float(index);
-    vector1.unmap();
 
     QCLKernel addToVector = program.createKernel("addToVector");
     addToVector.setGlobalWorkSize(vector1.size());
@@ -531,26 +521,26 @@ void tst_QCL::workSize()
     QCLWorkSize size;
     QVERIFY(size.dimensions() == 1);
     QVERIFY(size.width() == 1);
-    QVERIFY(size.height() == 0);
-    QVERIFY(size.depth() == 0);
+    QVERIFY(size.height() == 1);
+    QVERIFY(size.depth() == 1);
 
     QCLWorkSize size1(42);
     QVERIFY(size1.dimensions() == 1);
     QVERIFY(size1.width() == 42);
-    QVERIFY(size1.height() == 0);
-    QVERIFY(size1.depth() == 0);
+    QVERIFY(size1.height() == 1);
+    QVERIFY(size1.depth() == 1);
 
     QCLWorkSize size2(42, 63);
     QVERIFY(size2.dimensions() == 2);
     QVERIFY(size2.width() == 42);
     QVERIFY(size2.height() == 63);
-    QVERIFY(size2.depth() == 0);
+    QVERIFY(size2.depth() == 1);
 
     QCLWorkSize size2b(QSize(63, 42));
     QVERIFY(size2b.dimensions() == 2);
     QVERIFY(size2b.width() == 63);
     QVERIFY(size2b.height() == 42);
-    QVERIFY(size2b.depth() == 0);
+    QVERIFY(size2b.depth() == 1);
 
     QCLWorkSize size3(42, 63, 12);
     QVERIFY(size3.dimensions() == 3);
@@ -577,7 +567,7 @@ void tst_QCL::workSize()
     QVERIFY(size4.dimensions() == 2);
     QVERIFY(size4.width() == 42);
     QVERIFY(size4.height() == 63);
-    QVERIFY(size4.depth() == 0);
+    QVERIFY(size4.depth() == 1);
 
     QVERIFY(size4.width() == size4.sizes()[0]);
     QVERIFY(size4.height() == size4.sizes()[1]);
diff --git a/util/clinfo/clinfo.cpp b/util/clinfo/clinfo.cpp
index 73443cd..aa7519a 100644
--- a/util/clinfo/clinfo.cpp
+++ b/util/clinfo/clinfo.cpp
@@ -100,7 +100,8 @@ int main(int argc, char *argv[])
         printf("    Max Work Size     : %ux%ux%u\n",
                uint(size.width()), uint(size.height()),
                uint(size.depth()));
-        printf("    Max Items/Group   : %d\n", dev.maximumWorkItemsPerGroup());
+        printf("    Max Items/Group   : %u\n",
+               uint(dev.maximumWorkItemsPerGroup()));
         printf("    Local Memory      : ");
         printMemorySize(dev.localMemorySize());
         printf("    Global Memory     : ");
author	Rhys Weatherley <rhys.weatherley@nokia.com>	2010-06-10 08:08:40 +1000
committer	Rhys Weatherley <rhys.weatherley@nokia.com>	2010-06-10 08:08:40 +1000
commit	5a09f29da57f415ef8d776e497fd571c65a76e25 (patch)
tree	38a57cde4e7603ba136478353f9286b41f9a656c
parent	8d2ac078b7a867c5232a2c4d1b9baa762b360f59 (diff)
parent	81bf568b6fa1f023bbbccee381a6deeb413578bf (diff)