mali_kbase_gpuprops.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /*
  2. *
  3. * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
  4. *
  5. * This program is free software and is provided to you under the terms of the
  6. * GNU General Public License version 2 as published by the Free Software
  7. * Foundation, and any use by you of this program is subject to the terms
  8. * of such GNU licence.
  9. *
  10. * A copy of the licence is included with the program, and can also be obtained
  11. * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12. * Boston, MA 02110-1301, USA.
  13. *
  14. */
  15. /*
  16. * Base kernel property query APIs
  17. */
  18. #include <mali_kbase.h>
  19. #include <mali_midg_regmap.h>
  20. #include <mali_kbase_gpuprops.h>
  21. #include <mali_kbase_config_defaults.h>
  22. #include <mali_kbase_hwaccess_gpuprops.h>
  23. #include <linux/clk.h>
  24. /**
  25. * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
  26. * @value: The value from which to extract bits.
  27. * @offset: The first bit to extract (0 being the LSB).
  28. * @size: The number of bits to extract.
  29. *
  30. * Context: @offset + @size <= 32.
  31. *
  32. * Return: Bits [@offset, @offset + @size) from @value.
  33. */
  34. /* from mali_cdsb.h */
  35. #define KBASE_UBFX32(value, offset, size) \
  36. (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
  37. int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
  38. {
  39. kbase_gpu_clk_speed_func get_gpu_speed_mhz;
  40. u32 gpu_speed_mhz;
  41. int rc = 1;
  42. KBASE_DEBUG_ASSERT(kctx != NULL);
  43. KBASE_DEBUG_ASSERT(kbase_props != NULL);
  44. /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
  45. * If that function fails, or the function is not provided by the system integrator, we report the maximum
  46. * GPU speed as specified by GPU_FREQ_KHZ_MAX.
  47. */
  48. get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
  49. if (get_gpu_speed_mhz != NULL) {
  50. rc = get_gpu_speed_mhz(&gpu_speed_mhz);
  51. #ifdef CONFIG_MALI_DEBUG
  52. /* Issue a warning message when the reported GPU speed falls outside the min/max range */
  53. if (rc == 0) {
  54. u32 gpu_speed_khz = gpu_speed_mhz * 1000;
  55. if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
  56. gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
  57. dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
  58. (unsigned long)gpu_speed_khz,
  59. (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
  60. (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
  61. }
  62. #endif // ifdef CONFIG_MALI_DEBUG
  63. }
  64. if (kctx->kbdev->clock) {
  65. gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
  66. rc = 0;
  67. }
  68. if (rc != 0)
  69. gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
  70. kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
  71. memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
  72. /* Before API 8.2 they expect L3 cache info here, which was always 0 */
  73. if (kctx->api_version < KBASE_API_VERSION(8, 2))
  74. kbase_props->props.raw_props.suspend_size = 0;
  75. return 0;
  76. }
  77. static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
  78. {
  79. struct mali_base_gpu_coherent_group *current_group;
  80. u64 group_present;
  81. u64 group_mask;
  82. u64 first_set, first_set_prev;
  83. u32 num_groups = 0;
  84. KBASE_DEBUG_ASSERT(props != NULL);
  85. props->coherency_info.coherency = props->raw_props.mem_features;
  86. props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
  87. if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
  88. /* Group is l2 coherent */
  89. group_present = props->raw_props.l2_present;
  90. } else {
  91. /* Group is l1 coherent */
  92. group_present = props->raw_props.shader_present;
  93. }
  94. /*
  95. * The coherent group mask can be computed from the l2 present
  96. * register.
  97. *
  98. * For the coherent group n:
  99. * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
  100. * where first_set is group_present with only its nth set-bit kept
  101. * (i.e. the position from where a new group starts).
  102. *
  103. * For instance if the groups are l2 coherent and l2_present=0x0..01111:
  104. * The first mask is:
  105. * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
  106. * = (0x0..010 - 1) & ~(0x0..01 - 1)
  107. * = 0x0..00f
  108. * The second mask is:
  109. * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
  110. * = (0x0..100 - 1) & ~(0x0..010 - 1)
  111. * = 0x0..0f0
  112. * And so on until all the bits from group_present have been cleared
  113. * (i.e. there is no group left).
  114. */
  115. current_group = props->coherency_info.group;
  116. first_set = group_present & ~(group_present - 1);
  117. while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
  118. group_present -= first_set; /* Clear the current group bit */
  119. first_set_prev = first_set;
  120. first_set = group_present & ~(group_present - 1);
  121. group_mask = (first_set - 1) & ~(first_set_prev - 1);
  122. /* Populate the coherent_group structure for each group */
  123. current_group->core_mask = group_mask & props->raw_props.shader_present;
  124. current_group->num_cores = hweight64(current_group->core_mask);
  125. num_groups++;
  126. current_group++;
  127. }
  128. if (group_present != 0)
  129. pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
  130. props->coherency_info.num_groups = num_groups;
  131. }
  132. /**
  133. * kbase_gpuprops_get_props - Get the GPU configuration
  134. * @gpu_props: The &base_gpu_props structure
  135. * @kbdev: The &struct kbase_device structure for the device
  136. *
  137. * Fill the &base_gpu_props structure with values from the GPU configuration
  138. * registers. Only the raw properties are filled in this function
  139. */
  140. static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
  141. {
  142. struct kbase_gpuprops_regdump regdump;
  143. int i;
  144. KBASE_DEBUG_ASSERT(kbdev != NULL);
  145. KBASE_DEBUG_ASSERT(gpu_props != NULL);
  146. /* Dump relevant registers */
  147. kbase_backend_gpuprops_get(kbdev, &regdump);
  148. gpu_props->raw_props.gpu_id = regdump.gpu_id;
  149. gpu_props->raw_props.tiler_features = regdump.tiler_features;
  150. gpu_props->raw_props.mem_features = regdump.mem_features;
  151. gpu_props->raw_props.mmu_features = regdump.mmu_features;
  152. gpu_props->raw_props.l2_features = regdump.l2_features;
  153. gpu_props->raw_props.suspend_size = regdump.suspend_size;
  154. gpu_props->raw_props.as_present = regdump.as_present;
  155. gpu_props->raw_props.js_present = regdump.js_present;
  156. gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
  157. gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
  158. gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
  159. for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
  160. gpu_props->raw_props.js_features[i] = regdump.js_features[i];
  161. for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
  162. gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
  163. gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
  164. gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
  165. gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
  166. gpu_props->raw_props.thread_features = regdump.thread_features;
  167. }
  168. /**
  169. * kbase_gpuprops_calculate_props - Calculate the derived properties
  170. * @gpu_props: The &base_gpu_props structure
  171. * @kbdev: The &struct kbase_device structure for the device
  172. *
  173. * Fill the &base_gpu_props structure with values derived from the GPU
  174. * configuration registers
  175. */
  176. static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
  177. {
  178. int i;
  179. /* Populate the base_gpu_props structure */
  180. gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
  181. gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
  182. gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
  183. gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
  184. gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
  185. gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
  186. for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
  187. gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
  188. gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
  189. gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
  190. /* Field with number of l2 slices is added to MEM_FEATURES register
  191. * since t76x. Below code assumes that for older GPU reserved bits will
  192. * be read as zero. */
  193. gpu_props->l2_props.num_l2_slices =
  194. KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
  195. gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
  196. gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
  197. if (gpu_props->raw_props.thread_max_threads == 0)
  198. gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
  199. else
  200. gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
  201. if (gpu_props->raw_props.thread_max_workgroup_size == 0)
  202. gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
  203. else
  204. gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
  205. if (gpu_props->raw_props.thread_max_barrier_size == 0)
  206. gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
  207. else
  208. gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
  209. gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
  210. gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
  211. gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
  212. gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
  213. /* If values are not specified, then use defaults */
  214. if (gpu_props->thread_props.max_registers == 0) {
  215. gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
  216. gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
  217. gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
  218. }
  219. /* Initialize the coherent_group structure for each group */
  220. kbase_gpuprops_construct_coherent_groups(gpu_props);
  221. }
  222. void kbase_gpuprops_set(struct kbase_device *kbdev)
  223. {
  224. struct kbase_gpu_props *gpu_props;
  225. struct gpu_raw_gpu_props *raw;
  226. KBASE_DEBUG_ASSERT(kbdev != NULL);
  227. gpu_props = &kbdev->gpu_props;
  228. raw = &gpu_props->props.raw_props;
  229. /* Initialize the base_gpu_props structure from the hardware */
  230. kbase_gpuprops_get_props(&gpu_props->props, kbdev);
  231. /* Populate the derived properties */
  232. kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
  233. /* Populate kbase-only fields */
  234. gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
  235. gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
  236. gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
  237. gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
  238. gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
  239. gpu_props->num_cores = hweight64(raw->shader_present);
  240. gpu_props->num_core_groups = hweight64(raw->l2_present);
  241. gpu_props->num_address_spaces = hweight32(raw->as_present);
  242. gpu_props->num_job_slots = hweight32(raw->js_present);
  243. }
  244. void kbase_gpuprops_set_features(struct kbase_device *kbdev)
  245. {
  246. base_gpu_props *gpu_props;
  247. struct kbase_gpuprops_regdump regdump;
  248. gpu_props = &kbdev->gpu_props.props;
  249. /* Dump relevant registers */
  250. kbase_backend_gpuprops_get_features(kbdev, &regdump);
  251. /*
  252. * Copy the raw value from the register, later this will get turned
  253. * into the selected coherency mode.
  254. */
  255. gpu_props->raw_props.coherency_mode = regdump.coherency_features;
  256. }