diff options
author | Rhys Weatherley <rhys.weatherley@nokia.com> | 2010-06-10 08:08:40 +1000 |
---|---|---|
committer | Rhys Weatherley <rhys.weatherley@nokia.com> | 2010-06-10 08:08:40 +1000 |
commit | 5a09f29da57f415ef8d776e497fd571c65a76e25 (patch) | |
tree | 38a57cde4e7603ba136478353f9286b41f9a656c | |
parent | 8d2ac078b7a867c5232a2c4d1b9baa762b360f59 (diff) | |
parent | 81bf568b6fa1f023bbbccee381a6deeb413578bf (diff) |
Merge branch 'master' of scm.dev.nokia.troll.no:research/opencl
-rw-r--r-- | src/opencl/opencl.pro | 3 | ||||
-rw-r--r-- | src/opencl/qcldevice.cpp | 6 | ||||
-rw-r--r-- | src/opencl/qcldevice.h | 2 | ||||
-rw-r--r-- | src/opencl/qclevent.cpp | 30 | ||||
-rw-r--r-- | src/opencl/qclevent.h | 1 | ||||
-rw-r--r-- | src/opencl/qclplatform.cpp | 2 | ||||
-rw-r--r-- | src/opencl/qclvector.cpp | 70 | ||||
-rw-r--r-- | src/opencl/qclvector.h | 40 | ||||
-rw-r--r-- | src/opencl/qclworksize.cpp (renamed from src/opencl/qclworksize.qdoc) | 69 | ||||
-rw-r--r-- | src/opencl/qclworksize.h | 14 | ||||
-rw-r--r-- | src/openclgl/qclcontextgl.cpp | 2 | ||||
-rw-r--r-- | tests/auto/qcl/tst_qcl.cpp | 30 | ||||
-rw-r--r-- | util/clinfo/clinfo.cpp | 3 |
13 files changed, 125 insertions, 147 deletions
diff --git a/src/opencl/opencl.pro b/src/opencl/opencl.pro index c7dd01e..00f4c0e 100644 --- a/src/opencl/opencl.pro +++ b/src/opencl/opencl.pro @@ -56,7 +56,8 @@ SOURCES += \ qclplatform.cpp \ qclprogram.cpp \ qclsampler.cpp \ - qclvector.cpp + qclvector.cpp \ + qclworksize.cpp HEADERS += $$PRIVATE_HEADERS DEFINES += QT_BUILD_CL_LIB diff --git a/src/opencl/qcldevice.cpp b/src/opencl/qcldevice.cpp index 4c138b1..fe120c9 100644 --- a/src/opencl/qcldevice.cpp +++ b/src/opencl/qcldevice.cpp @@ -361,9 +361,9 @@ QCLWorkSize QCLDevice::maximumWorkItemSize() const \sa maximumWorkItemSize() */ -int QCLDevice::maximumWorkItemsPerGroup() const +size_t QCLDevice::maximumWorkItemsPerGroup() const { - return int(qt_cl_paramSize(m_id, CL_DEVICE_MAX_WORK_GROUP_SIZE)); + return qt_cl_paramSize(m_id, CL_DEVICE_MAX_WORK_GROUP_SIZE); } /*! @@ -872,7 +872,7 @@ bool qt_cl_has_extension(const char *list, size_t listLen, const char *name) } /*! - Returns this if this device has an extension called \a name; + Returns true if this device has an extension called \a name; false otherwise. This function is more efficient than checking for \a name diff --git a/src/opencl/qcldevice.h b/src/opencl/qcldevice.h index 335114b..241496e 100644 --- a/src/opencl/qcldevice.h +++ b/src/opencl/qcldevice.h @@ -87,7 +87,7 @@ public: QSysInfo::Endian byteOrder() const; QCLWorkSize maximumWorkItemSize() const; - int maximumWorkItemsPerGroup() const; + size_t maximumWorkItemsPerGroup() const; bool hasImage2D() const; bool hasImage3D() const; diff --git a/src/opencl/qclevent.cpp b/src/opencl/qclevent.cpp index f751186..e7d7d1e 100644 --- a/src/opencl/qclevent.cpp +++ b/src/opencl/qclevent.cpp @@ -236,6 +236,22 @@ cl_int QCLEvent::status() const } /*! + Returns the type of command that generated this event. +*/ +cl_command_type QCLEvent::commandType() const +{ + if (!m_id) + return 0; + cl_command_type type; + cl_int error = clGetEventInfo + (m_id, CL_EVENT_COMMAND_TYPE, sizeof(type), &type, 0); + if (error != CL_SUCCESS) + return 0; + else + return type; +} + +/*! Waits for this event to be signalled as finished. The calling thread is blocked until the event is signalled. This function returns immediately if the event is null. @@ -426,18 +442,8 @@ QDebug operator<<(QDebug dbg, const QCLEvent &event) dbg << "QCLEvent()"; return dbg; } - cl_command_type command; - cl_int status; - if (clGetEventInfo(id, CL_EVENT_COMMAND_TYPE, - sizeof(command), &command, 0) != CL_SUCCESS) { - dbg << "QCLEvent(invalid)"; - return dbg; - } - if (clGetEventInfo(id, CL_EVENT_COMMAND_EXECUTION_STATUS, - sizeof(status), &status, 0) != CL_SUCCESS) { - dbg << "QCLEvent(invalid)"; - return dbg; - } + cl_command_type command = event.commandType(); + cl_int status = event.status(); const char *commandName; switch (command) { case CL_COMMAND_NDRANGE_KERNEL: diff --git a/src/opencl/qclevent.h b/src/opencl/qclevent.h index ce6da59..3755846 100644 --- a/src/opencl/qclevent.h +++ b/src/opencl/qclevent.h @@ -73,6 +73,7 @@ public: bool isErrored() const { return status() < 0; } cl_int status() const; + cl_command_type commandType() const; void waitForFinished(); diff --git a/src/opencl/qclplatform.cpp b/src/opencl/qclplatform.cpp index 742e1f7..3739f0e 100644 --- a/src/opencl/qclplatform.cpp +++ b/src/opencl/qclplatform.cpp @@ -214,7 +214,7 @@ QStringList QCLPlatform::extensions() const bool qt_cl_has_extension(const char *list, size_t listLen, const char *name); /*! - Returns this if this platform has an extension called \a name; + Returns true if this platform has an extension called \a name; false otherwise. This function is more efficient than checking for \a name diff --git a/src/opencl/qclvector.cpp b/src/opencl/qclvector.cpp index b53c3da..10b60c0 100644 --- a/src/opencl/qclvector.cpp +++ b/src/opencl/qclvector.cpp @@ -55,11 +55,10 @@ QT_BEGIN_NAMESPACE OpenCL buffer object to make it appear as a host-accessible array of elements of type T. - Whenever the host CPU calls operator[]() or map(), the - array's contents are mapped into host-accessible memory for - direct access. When the host calls unmap() or sets the vector - on a QCLKernel as an argument, the data is copied back to the - OpenCL compute device (e.g., the GPU). + Whenever the host CPU calls operator[](), the array's contents + are copied into host-accessible memory for direct access. When the + host sets the vector on a QCLKernel as an argument, the data is + copied back to the OpenCL compute device (e.g., the GPU). The type T is restricted to primitive and movable types that do not require explicit construction, destruction, or operator=(). @@ -390,60 +389,22 @@ cl_mem QCLVectorBase::kernelArg() const \fn T &QCLVector::operator[](int index) Returns a reference to the element at \a index in this OpenCL vector. - The vector will be mapped into host memory if necessary. - - \sa map() + The vector will be copied to host memory if necessary. */ /*! \fn const T &QCLVector::operator[](int index) const Returns a const reference to the element at \a index in this - OpenCL vector. The vector will be mapped into host memory + OpenCL vector. The vector will be copied to host memory if necessary. - - \sa map() -*/ - -/*! - \fn void QCLVector::map() - - Maps this OpenCL vector into the host CPU's address space so that - its contents can be read or written. Once the host CPU no longer - needs to access the vector's contents, it should call unmap(). - - This function does nothing if the vector is already mapped. - - The vector will be implicitly unmapped if it is set on a QCLKernel - as an argument. - - \sa unmap(), isMapped() -*/ - -/*! - \fn void QCLVector::unmap() - - Unmaps this OpenCL vector from the host CPU's address space. - The data can then be accessed by a QCLKernel running on the - OpenCL compute device. - - \sa map(), isMapped() -*/ - -/*! - \fn bool QCLVector::isMapped() const - - Returns true if this vector is mapped into the host CPU's - address space; false otherwise. - - \sa map(), unmap() */ /*! \fn void QCLVector::read(T *data, int count, int offset) Reads the \a count elements starting \a offset in this vector - into \a data. The vector does not need to be mapped. + into \a data. \sa write() */ @@ -452,7 +413,6 @@ cl_mem QCLVectorBase::kernelArg() const \fn void QCLVector::write(const T *data, int count, int offset) Writes the \a count elements from \a data to \a offset in this vector. - The vector does not need to be mapped. \sa read() */ @@ -462,22 +422,6 @@ cl_mem QCLVectorBase::kernelArg() const \overload Writes the contents of \a data to \a offset in this vector. - The vector does not need to be mapped. -*/ - -/*! - \fn T *QCLVector::data() const - - Returns a pointer to the first element in the vector. - The vector will be mapped into host memory if necessary. - - \sa map() -*/ - -/*! - \fn cl_mem QCLVector::memoryId() const - - Returns the native OpenCL memory buffer identifier for this vector. */ /*! diff --git a/src/opencl/qclvector.h b/src/opencl/qclvector.h index cd95d80..1864e85 100644 --- a/src/opencl/qclvector.h +++ b/src/opencl/qclvector.h @@ -107,19 +107,11 @@ public: T &operator[](int index); const T &operator[](int index) const; - void map(); - void unmap(); - bool isMapped() const; - void read(T *data, int count, int offset = 0); void write(const T *data, int count, int offset = 0); void write(const QVector<T> &data, int offset = 0); - T *data() const; - - cl_mem memoryId() const; QCLContext *context() const; - QCLBuffer toBuffer() const; private: @@ -189,24 +181,6 @@ Q_INLINE_TEMPLATE const T &QCLVector<T>::operator[](int index) const } template <typename T> -Q_INLINE_TEMPLATE void QCLVector<T>::map() -{ - QCLVectorBase::map(); -} - -template <typename T> -Q_INLINE_TEMPLATE void QCLVector<T>::unmap() -{ - QCLVectorBase::unmap(); -} - -template <typename T> -Q_INLINE_TEMPLATE bool QCLVector<T>::isMapped() const -{ - return m_mapped != 0; -} - -template <typename T> Q_INLINE_TEMPLATE void QCLVector<T>::write (const T *data, int count, int offset) { @@ -230,20 +204,6 @@ Q_INLINE_TEMPLATE void QCLVector<T>::write } template <typename T> -Q_INLINE_TEMPLATE T *QCLVector<T>::data() const -{ - if (!m_mapped) - map(); - return reinterpret_cast<T *>(m_mapped); -} - -template <typename T> -Q_INLINE_TEMPLATE cl_mem QCLVector<T>::memoryId() const -{ - return QCLVectorBase::memoryId(); -} - -template <typename T> Q_INLINE_TEMPLATE QCLContext *QCLVector<T>::context() const { return QCLVectorBase::context(); diff --git a/src/opencl/qclworksize.qdoc b/src/opencl/qclworksize.cpp index 122694d..79a351a 100644 --- a/src/opencl/qclworksize.qdoc +++ b/src/opencl/qclworksize.cpp @@ -39,6 +39,11 @@ ** ****************************************************************************/ +#include "qclworksize.h" +#include "qcldevice.h" + +QT_BEGIN_NAMESPACE + /*! \class QCLWorkSize \brief The QCLWorkSize class defines the size of an item of work for an OpenCL kernel. @@ -84,18 +89,21 @@ \fn QCLWorkSize::QCLWorkSize(size_t size) Constructs a single-dimensional work size with width() set to \a size. + The height() and depth() will be set to 1. */ /*! \fn QCLWorkSize::QCLWorkSize(size_t width, size_t height) Constructs a two-dimensional work size of \a width x \a height. + The depth() will be set to 1. */ /*! \fn QCLWorkSize::QCLWorkSize(const QSize &size) Constructs a two-dimensional work size set to \a size. + The depth() will be set to 1. */ /*! @@ -152,3 +160,64 @@ \sa operator==() */ + +static size_t qt_gcd_of_size(size_t x, size_t y) +{ + size_t remainder; + while ((remainder = x % y) != 0) { + x = y; + y = remainder; + } + return y; +} + +/*! + Returns the best-fit local work size that evenly divides this work + size and fits within the maximums defined by \a maxWorkItemSize + and \a maxItemsPerGroup. + + This function is typically used to convert an arbitrary global + work size on a QCLKernel into a compatible local work size. + + \sa QCLKernel::setLocalWorkSize() +*/ +QCLWorkSize QCLWorkSize::toLocalWorkSize + (const QCLWorkSize &maxWorkItemSize, size_t maxItemsPerGroup) const +{ + // Adjust for the maximum work item size in each dimension. + size_t width = m_dim >= 1 ? qt_gcd_of_size(m_sizes[0], maxWorkItemSize.width()) : 1; + size_t height = m_dim >= 2 ? qt_gcd_of_size(m_sizes[1], maxWorkItemSize.height()) : 1; + size_t depth = m_dim >= 3 ? qt_gcd_of_size(m_sizes[2], maxWorkItemSize.depth()) : 1; + + // Reduce in size by a factor of 2 until underneath the maximum group size. + while (maxItemsPerGroup && (width * height * depth) > maxItemsPerGroup) { + width = (width > 1) ? (width / 2) : 1; + height = (height > 1) ? (height / 2) : 1; + depth = (depth > 1) ? (depth / 2) : 1; + } + + // Return the final result. + if (m_dim >= 3) + return QCLWorkSize(width, height, depth); + else if (m_dim >= 2) + return QCLWorkSize(width, height); + else + return QCLWorkSize(width); +} + +/*! + Returns the best-fit local work size that evenly divides this + work size and fits within the maximum work group size of \a device. + + This function is typically used to convert an arbitrary global + work size on a QCLKernel into a compatible local work size. + + \sa QCLKernel::setLocalWorkSize() +*/ +QCLWorkSize QCLWorkSize::toLocalWorkSize(const QCLDevice &device) const +{ + return toLocalWorkSize(device.maximumWorkItemSize(), + device.maximumWorkItemsPerGroup()); +} + +QT_END_NAMESPACE diff --git a/src/opencl/qclworksize.h b/src/opencl/qclworksize.h index 719338c..0c5541e 100644 --- a/src/opencl/qclworksize.h +++ b/src/opencl/qclworksize.h @@ -51,17 +51,19 @@ QT_BEGIN_NAMESPACE QT_MODULE(CL) +class QCLDevice; + class Q_CL_EXPORT QCLWorkSize { public: QCLWorkSize() - : m_dim(1) { m_sizes[0] = 1; m_sizes[1] = 0; m_sizes[2] = 0; } + : m_dim(1) { m_sizes[0] = 1; m_sizes[1] = 1; m_sizes[2] = 1; } QCLWorkSize(size_t size) - : m_dim(1) { m_sizes[0] = size; m_sizes[1] = 0; m_sizes[2] = 0; } + : m_dim(1) { m_sizes[0] = size; m_sizes[1] = 1; m_sizes[2] = 1; } QCLWorkSize(size_t width, size_t height) - : m_dim(2) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = 0; } + : m_dim(2) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = 1; } QCLWorkSize(const QSize &size) - : m_dim(2) { m_sizes[0] = size.width(); m_sizes[1] = size.height(); m_sizes[2] = 0; } + : m_dim(2) { m_sizes[0] = size.width(); m_sizes[1] = size.height(); m_sizes[2] = 1; } QCLWorkSize(size_t width, size_t height, size_t depth) : m_dim(3) { m_sizes[0] = width; m_sizes[1] = height; m_sizes[2] = depth; } @@ -76,6 +78,10 @@ public: bool operator==(const QCLWorkSize &other) const; bool operator!=(const QCLWorkSize &other) const; + QCLWorkSize toLocalWorkSize + (const QCLWorkSize &maxWorkItemSize, size_t maxItemsPerGroup) const; + QCLWorkSize toLocalWorkSize(const QCLDevice &device) const; + private: size_t m_dim; size_t m_sizes[3]; diff --git a/src/openclgl/qclcontextgl.cpp b/src/openclgl/qclcontextgl.cpp index d6235e2..53bcbb5 100644 --- a/src/openclgl/qclcontextgl.cpp +++ b/src/openclgl/qclcontextgl.cpp @@ -225,7 +225,7 @@ bool QCLContextGL::create(const QCLPlatform &platform) (q_PFNCLGETGLCONTEXTINFOKHR)clGetExtensionFunctionAddress ("clGetGLContextInfoKHR"); if (getGLContextInfo && hasSharing) { - cl_uint size; + size_t size; cl_device_id currentDev; if(getGLContextInfo(properties.data(), CL_DEVICES_FOR_GL_CONTEXT_KHR, diff --git a/tests/auto/qcl/tst_qcl.cpp b/tests/auto/qcl/tst_qcl.cpp index 066ff88..c0f3c4e 100644 --- a/tests/auto/qcl/tst_qcl.cpp +++ b/tests/auto/qcl/tst_qcl.cpp @@ -377,46 +377,37 @@ void tst_QCL::vectorBuffer() QVERIFY(vector1.isNull()); QVERIFY(vector1.isEmpty()); QCOMPARE(vector1.size(), 0); - QVERIFY(vector1.memoryId() == 0); + QVERIFY(vector1.toBuffer().memoryId() == 0); QVERIFY(vector1.context() == 0); - QVERIFY(!vector1.isMapped()); vector1 = context.createVector<float>(100); QVERIFY(!vector1.isNull()); QVERIFY(!vector1.isEmpty()); QCOMPARE(vector1.size(), 100); - QVERIFY(vector1.memoryId() != 0); + QVERIFY(vector1.toBuffer().memoryId() != 0); QVERIFY(vector1.context() == &context); - QVERIFY(!vector1.isMapped()); for (int index = 0; index < 100; ++index) vector1[index] = float(index); - QVERIFY(vector1.isMapped()); - vector1.unmap(); - QVERIFY(!vector1.isMapped()); for (int index = 0; index < 100; ++index) QCOMPARE(vector1[index], float(index)); - QVERIFY(vector1.isMapped()); QCLKernel addToVector = program.createKernel("addToVector"); addToVector.setGlobalWorkSize(vector1.size()); addToVector(vector1, 42.0f); - QVERIFY(!vector1.isMapped()); for (int index = 0; index < 100; ++index) { QCOMPARE(constVectorAt(vector1, index), float(index + 42)); QCOMPARE(vector1[index], float(index + 42)); } - QVERIFY(vector1.isMapped()); vector1.release(); QVERIFY(vector1.isNull()); QVERIFY(vector1.isEmpty()); QCOMPARE(vector1.size(), 0); - QVERIFY(vector1.memoryId() == 0); + QVERIFY(vector1.toBuffer().memoryId() == 0); QVERIFY(vector1.context() == 0); - QVERIFY(!vector1.isMapped()); } void tst_QCL::eventProfiling() @@ -429,7 +420,6 @@ void tst_QCL::eventProfiling() QCLVector<float> vector1 = context.createVector<float>(20000); for (int index = 0; index < vector1.size(); ++index) vector1[index] = float(index); - vector1.unmap(); QCLKernel addToVector = program.createKernel("addToVector"); addToVector.setGlobalWorkSize(vector1.size()); @@ -531,26 +521,26 @@ void tst_QCL::workSize() QCLWorkSize size; QVERIFY(size.dimensions() == 1); QVERIFY(size.width() == 1); - QVERIFY(size.height() == 0); - QVERIFY(size.depth() == 0); + QVERIFY(size.height() == 1); + QVERIFY(size.depth() == 1); QCLWorkSize size1(42); QVERIFY(size1.dimensions() == 1); QVERIFY(size1.width() == 42); - QVERIFY(size1.height() == 0); - QVERIFY(size1.depth() == 0); + QVERIFY(size1.height() == 1); + QVERIFY(size1.depth() == 1); QCLWorkSize size2(42, 63); QVERIFY(size2.dimensions() == 2); QVERIFY(size2.width() == 42); QVERIFY(size2.height() == 63); - QVERIFY(size2.depth() == 0); + QVERIFY(size2.depth() == 1); QCLWorkSize size2b(QSize(63, 42)); QVERIFY(size2b.dimensions() == 2); QVERIFY(size2b.width() == 63); QVERIFY(size2b.height() == 42); - QVERIFY(size2b.depth() == 0); + QVERIFY(size2b.depth() == 1); QCLWorkSize size3(42, 63, 12); QVERIFY(size3.dimensions() == 3); @@ -577,7 +567,7 @@ void tst_QCL::workSize() QVERIFY(size4.dimensions() == 2); QVERIFY(size4.width() == 42); QVERIFY(size4.height() == 63); - QVERIFY(size4.depth() == 0); + QVERIFY(size4.depth() == 1); QVERIFY(size4.width() == size4.sizes()[0]); QVERIFY(size4.height() == size4.sizes()[1]); diff --git a/util/clinfo/clinfo.cpp b/util/clinfo/clinfo.cpp index 73443cd..aa7519a 100644 --- a/util/clinfo/clinfo.cpp +++ b/util/clinfo/clinfo.cpp @@ -100,7 +100,8 @@ int main(int argc, char *argv[]) printf(" Max Work Size : %ux%ux%u\n", uint(size.width()), uint(size.height()), uint(size.depth())); - printf(" Max Items/Group : %d\n", dev.maximumWorkItemsPerGroup()); + printf(" Max Items/Group : %u\n", + uint(dev.maximumWorkItemsPerGroup())); printf(" Local Memory : "); printMemorySize(dev.localMemorySize()); printf(" Global Memory : "); |