b3RadixSort32CL.h 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. #ifndef B3_RADIXSORT32_H
  2. #define B3_RADIXSORT32_H
  3. #include "b3OpenCLArray.h"
  4. struct b3SortData
  5. {
  6. union
  7. {
  8. unsigned int m_key;
  9. unsigned int x;
  10. };
  11. union
  12. {
  13. unsigned int m_value;
  14. unsigned int y;
  15. };
  16. };
  17. #include "b3BufferInfoCL.h"
  18. class b3RadixSort32CL
  19. {
  20. b3OpenCLArray<unsigned int>* m_workBuffer1;
  21. b3OpenCLArray<unsigned int>* m_workBuffer2;
  22. b3OpenCLArray<b3SortData>* m_workBuffer3;
  23. b3OpenCLArray<b3SortData>* m_workBuffer4;
  24. b3OpenCLArray<unsigned int>* m_workBuffer3a;
  25. b3OpenCLArray<unsigned int>* m_workBuffer4a;
  26. cl_command_queue m_commandQueue;
  27. cl_kernel m_streamCountSortDataKernel;
  28. cl_kernel m_streamCountKernel;
  29. cl_kernel m_prefixScanKernel;
  30. cl_kernel m_sortAndScatterSortDataKernel;
  31. cl_kernel m_sortAndScatterKernel;
  32. bool m_deviceCPU;
  33. class b3PrefixScanCL* m_scan;
  34. class b3FillCL* m_fill;
  35. public:
  36. struct b3ConstData
  37. {
  38. int m_n;
  39. int m_nWGs;
  40. int m_startBit;
  41. int m_nBlocksPerWG;
  42. };
  43. enum
  44. {
  45. DATA_ALIGNMENT = 256,
  46. WG_SIZE = 64,
  47. BLOCK_SIZE = 256,
  48. ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE),
  49. BITS_PER_PASS = 4,
  50. NUM_BUCKET=(1<<BITS_PER_PASS),
  51. // if you change this, change nPerWI in kernel as well
  52. NUM_WGS = 20*6, // cypress
  53. // NUM_WGS = 24*6, // cayman
  54. // NUM_WGS = 32*4, // nv
  55. };
  56. private:
  57. public:
  58. b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0);
  59. virtual ~b3RadixSort32CL();
  60. void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
  61. b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
  62. ///keys only
  63. void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32 );
  64. void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32 );
  65. void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
  66. void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
  67. };
  68. #endif //B3_RADIXSORT32_H