diff options
author | Rhys Weatherley <rhys.weatherley@nokia.com> | 2010-11-18 09:11:17 +1000 |
---|---|---|
committer | Rhys Weatherley <rhys.weatherley@nokia.com> | 2010-11-18 09:11:17 +1000 |
commit | af723d7ca0902618dc6da1509b79a815d96e9382 (patch) | |
tree | 538f99cb59038339cdc6183296241267cb380ee0 | |
parent | 866e29b8ad73f876cbba1001c8766b96ce63d9e0 (diff) |
Adjust best local size for small max-work-sizes
-rw-r--r-- | src/opencl/qclkernel.cpp | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/src/opencl/qclkernel.cpp b/src/opencl/qclkernel.cpp index 942ef23..58a0db8 100644 --- a/src/opencl/qclkernel.cpp +++ b/src/opencl/qclkernel.cpp @@ -484,26 +484,36 @@ void QCLKernel::setLocalWorkSize(const QCLWorkSize &size) /*! Returns the recommended best local work size for 2D image processing - on this kernel. Default value is 8x8. + on this kernel. Default value is 8x8 unless the maximum work size + is not large enough to accomodate 8x8 items. \sa bestLocalWorkSizeImage3D() */ QCLWorkSize QCLKernel::bestLocalWorkSizeImage2D() const { - // TODO - need some way to determine this from the driver. - return QCLWorkSize(8, 8); + QList<QCLDevice> devices = program().devices(); + size_t maxItems = devices.isEmpty() ? 1 : devices.at(0). maximumWorkItemsPerGroup(); + size_t size = 8; + while (size > 1 && (size * size) > maxItems) + size /= 2; + return QCLWorkSize(size, size); } /*! Returns the recommended best local work size for 3D image processing - on this kernel. Default value is 8x8x8. + on this kernel. Default value is 8x8x8 unless the maximum work size + is not large enough to accomodate 8x8x8 items. \sa bestLocalWorkSizeImage2D() */ QCLWorkSize QCLKernel::bestLocalWorkSizeImage3D() const { - // TODO - need some way to determine this from the driver. - return QCLWorkSize(8, 8, 8); + QList<QCLDevice> devices = program().devices(); + size_t maxItems = devices.isEmpty() ? 1 : devices.at(0). maximumWorkItemsPerGroup(); + size_t size = 8; + while (size > 1 && (size * size * size) > maxItems) + size /= 2; + return QCLWorkSize(size, size, size); } /*! |