pyramid.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. /*
  2. * Copyright (c) 2022, Alliance for Open Media. All rights reserved
  3. *
  4. * This source code is subject to the terms of the BSD 2 Clause License and
  5. * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  6. * was not distributed with this source code in the LICENSE file, you can
  7. * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  8. * Media Patent License 1.0 was not distributed with this source code in the
  9. * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10. */
  11. #include "aom_dsp/pyramid.h"
  12. #include "aom_mem/aom_mem.h"
  13. #include "aom_ports/bitops.h"
  14. #include "aom_util/aom_thread.h"
  15. // TODO(rachelbarker): Move needed code from av1/ to aom_dsp/
  16. #include "av1/common/resize.h"
  17. #include <assert.h>
  18. #include <string.h>
  19. // Lifecycle:
  20. // * Frame buffer alloc code calls aom_get_pyramid_alloc_size()
  21. // to work out how much space is needed for a given number of pyramid
  22. // levels. This is counted in the size checked against the max allocation
  23. // limit
  24. // * Then calls aom_alloc_pyramid() to actually create the pyramid
  25. // * Pyramid is initially marked as invalid (no data)
  26. // * Whenever pyramid is needed, we check the valid flag. If set, use existing
  27. // data. If not set, compute full pyramid
  28. // * Whenever frame buffer is reused, clear the valid flag
  29. // * Whenever frame buffer is resized, reallocate pyramid
  30. size_t aom_get_pyramid_alloc_size(int width, int height, int n_levels,
  31. bool image_is_16bit) {
  32. // Limit number of levels on small frames
  33. const int msb = get_msb(AOMMIN(width, height));
  34. const int max_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
  35. n_levels = AOMMIN(n_levels, max_levels);
  36. size_t alloc_size = 0;
  37. alloc_size += sizeof(ImagePyramid);
  38. alloc_size += n_levels * sizeof(PyramidLayer);
  39. // Calculate how much memory is needed for downscaled frame buffers
  40. size_t buffer_size = 0;
  41. // Work out if we need to allocate a few extra bytes for alignment.
  42. // aom_memalign() will ensure that the start of the allocation is aligned
  43. // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
  44. // to be aligned, not the first byte of the allocation.
  45. //
  46. // In the loop below, we ensure that the stride of every image is a multiple
  47. // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
  48. // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
  49. // first pixel in the first pyramid layer aligned properly, that will
  50. // automatically mean that the first pixel of every row of every layer is
  51. // properly aligned too.
  52. //
  53. // Thus all we need to consider is the first pixel in the first layer.
  54. // This is located at offset
  55. // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
  56. // bytes into the buffer. Since level_stride is a multiple of
  57. // PYRAMID_ALIGNMENT, we can ignore that. So we need
  58. // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
  59. //
  60. // To solve this, we can round PYRAMID_PADDING up to the next multiple
  61. // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
  62. // how many extra bytes are needed.
  63. size_t first_px_offset =
  64. (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
  65. size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
  66. buffer_size += extra_bytes;
  67. // If the original image is stored in an 8-bit buffer, then we can point the
  68. // lowest pyramid level at that buffer rather than allocating a new one.
  69. int first_allocated_level = image_is_16bit ? 0 : 1;
  70. for (int level = first_allocated_level; level < n_levels; level++) {
  71. int level_width = width >> level;
  72. int level_height = height >> level;
  73. // Allocate padding for each layer
  74. int padded_width = level_width + 2 * PYRAMID_PADDING;
  75. int padded_height = level_height + 2 * PYRAMID_PADDING;
  76. // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
  77. // This ensures that, as long as the top-left pixel in this pyramid level is
  78. // properly aligned, then so will the leftmost pixel in every row of the
  79. // pyramid level.
  80. int level_stride =
  81. (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
  82. buffer_size += level_stride * padded_height;
  83. }
  84. alloc_size += buffer_size;
  85. return alloc_size;
  86. }
  87. ImagePyramid *aom_alloc_pyramid(int width, int height, int n_levels,
  88. bool image_is_16bit) {
  89. // Limit number of levels on small frames
  90. const int msb = get_msb(AOMMIN(width, height));
  91. const int max_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
  92. n_levels = AOMMIN(n_levels, max_levels);
  93. ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr));
  94. if (!pyr) {
  95. return NULL;
  96. }
  97. pyr->layers = aom_calloc(n_levels, sizeof(PyramidLayer));
  98. if (!pyr->layers) {
  99. aom_free(pyr);
  100. return NULL;
  101. }
  102. pyr->valid = false;
  103. pyr->n_levels = n_levels;
  104. // Compute sizes and offsets for each pyramid level
  105. // These are gathered up first, so that we can allocate all pyramid levels
  106. // in a single buffer
  107. size_t buffer_size = 0;
  108. size_t *layer_offsets = aom_calloc(n_levels, sizeof(size_t));
  109. if (!layer_offsets) {
  110. aom_free(pyr);
  111. aom_free(pyr->layers);
  112. return NULL;
  113. }
  114. // Work out if we need to allocate a few extra bytes for alignment.
  115. // aom_memalign() will ensure that the start of the allocation is aligned
  116. // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
  117. // to be aligned, not the first byte of the allocation.
  118. //
  119. // In the loop below, we ensure that the stride of every image is a multiple
  120. // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
  121. // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
  122. // first pixel in the first pyramid layer aligned properly, that will
  123. // automatically mean that the first pixel of every row of every layer is
  124. // properly aligned too.
  125. //
  126. // Thus all we need to consider is the first pixel in the first layer.
  127. // This is located at offset
  128. // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
  129. // bytes into the buffer. Since level_stride is a multiple of
  130. // PYRAMID_ALIGNMENT, we can ignore that. So we need
  131. // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
  132. //
  133. // To solve this, we can round PYRAMID_PADDING up to the next multiple
  134. // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
  135. // how many extra bytes are needed.
  136. size_t first_px_offset =
  137. (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
  138. size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
  139. buffer_size += extra_bytes;
  140. // If the original image is stored in an 8-bit buffer, then we can point the
  141. // lowest pyramid level at that buffer rather than allocating a new one.
  142. int first_allocated_level = image_is_16bit ? 0 : 1;
  143. for (int level = first_allocated_level; level < n_levels; level++) {
  144. PyramidLayer *layer = &pyr->layers[level];
  145. int level_width = width >> level;
  146. int level_height = height >> level;
  147. // Allocate padding for each layer
  148. int padded_width = level_width + 2 * PYRAMID_PADDING;
  149. int padded_height = level_height + 2 * PYRAMID_PADDING;
  150. // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
  151. // This ensures that, as long as the top-left pixel in this pyramid level is
  152. // properly aligned, then so will the leftmost pixel in every row of the
  153. // pyramid level.
  154. int level_stride =
  155. (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
  156. size_t level_alloc_start = buffer_size;
  157. size_t level_start =
  158. level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING;
  159. buffer_size += level_stride * padded_height;
  160. layer_offsets[level] = level_start;
  161. layer->width = level_width;
  162. layer->height = level_height;
  163. layer->stride = level_stride;
  164. }
  165. pyr->buffer_alloc =
  166. aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc));
  167. if (!pyr->buffer_alloc) {
  168. aom_free(pyr);
  169. aom_free(pyr->layers);
  170. aom_free(layer_offsets);
  171. return NULL;
  172. }
  173. // Fill in pointers for each level
  174. // If image is 8-bit, then the lowest level is left unconfigured for now,
  175. // and will be set up properly when the pyramid is filled in
  176. for (int level = first_allocated_level; level < n_levels; level++) {
  177. PyramidLayer *layer = &pyr->layers[level];
  178. layer->buffer = pyr->buffer_alloc + layer_offsets[level];
  179. }
  180. #if CONFIG_MULTITHREAD
  181. pthread_mutex_init(&pyr->mutex, NULL);
  182. #endif // CONFIG_MULTITHREAD
  183. aom_free(layer_offsets);
  184. return pyr;
  185. }
  186. // Fill the border region of a pyramid frame.
  187. // This must be called after the main image area is filled out.
  188. // `img_buf` should point to the first pixel in the image area,
  189. // ie. it should be pyr->level_buffer + pyr->level_loc[level].
  190. static INLINE void fill_border(uint8_t *img_buf, const int width,
  191. const int height, const int stride) {
  192. // Fill left and right areas
  193. for (int row = 0; row < height; row++) {
  194. uint8_t *row_start = &img_buf[row * stride];
  195. uint8_t left_pixel = row_start[0];
  196. memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING);
  197. uint8_t right_pixel = row_start[width - 1];
  198. memset(row_start + width, right_pixel, PYRAMID_PADDING);
  199. }
  200. // Fill top area
  201. for (int row = -PYRAMID_PADDING; row < 0; row++) {
  202. uint8_t *row_start = &img_buf[row * stride];
  203. memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING,
  204. width + 2 * PYRAMID_PADDING);
  205. }
  206. // Fill bottom area
  207. uint8_t *last_row_start = &img_buf[(height - 1) * stride];
  208. for (int row = height; row < height + PYRAMID_PADDING; row++) {
  209. uint8_t *row_start = &img_buf[row * stride];
  210. memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING,
  211. width + 2 * PYRAMID_PADDING);
  212. }
  213. }
  214. // Compute coarse to fine pyramids for a frame
  215. // This must only be called while holding frame_pyr->mutex
  216. static INLINE void fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
  217. ImagePyramid *frame_pyr) {
  218. int n_levels = frame_pyr->n_levels;
  219. const int frame_width = frame->y_crop_width;
  220. const int frame_height = frame->y_crop_height;
  221. const int frame_stride = frame->y_stride;
  222. assert((frame_width >> n_levels) >= 0);
  223. assert((frame_height >> n_levels) >= 0);
  224. PyramidLayer *first_layer = &frame_pyr->layers[0];
  225. if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
  226. // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
  227. assert(first_layer->width == frame_width);
  228. assert(first_layer->height == frame_height);
  229. uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
  230. uint8_t *pyr_buffer = first_layer->buffer;
  231. int pyr_stride = first_layer->stride;
  232. for (int y = 0; y < frame_height; y++) {
  233. uint16_t *frame_row = frame_buffer + y * frame_stride;
  234. uint8_t *pyr_row = pyr_buffer + y * pyr_stride;
  235. for (int x = 0; x < frame_width; x++) {
  236. pyr_row[x] = frame_row[x] >> (bit_depth - 8);
  237. }
  238. }
  239. fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
  240. } else {
  241. // For frames stored in an 8-bit buffer, we need to configure the first
  242. // pyramid layer to point at the original image buffer
  243. first_layer->buffer = frame->y_buffer;
  244. first_layer->width = frame_width;
  245. first_layer->height = frame_height;
  246. first_layer->stride = frame_stride;
  247. }
  248. // Fill in the remaining levels through progressive downsampling
  249. for (int level = 1; level < n_levels; ++level) {
  250. PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
  251. uint8_t *prev_buffer = prev_layer->buffer;
  252. int prev_stride = prev_layer->stride;
  253. PyramidLayer *this_layer = &frame_pyr->layers[level];
  254. uint8_t *this_buffer = this_layer->buffer;
  255. int this_width = this_layer->width;
  256. int this_height = this_layer->height;
  257. int this_stride = this_layer->stride;
  258. // Compute the this pyramid level by downsampling the current level.
  259. //
  260. // We downsample by a factor of exactly 2, clipping the rightmost and
  261. // bottommost pixel off of the current level if needed. We do this for
  262. // two main reasons:
  263. //
  264. // 1) In the disflow code, when stepping from a higher pyramid level to a
  265. // lower pyramid level, we need to not just interpolate the flow field
  266. // but also to scale each flow vector by the upsampling ratio.
  267. // So it is much more convenient if this ratio is simply 2.
  268. //
  269. // 2) Up/downsampling by a factor of 2 can be implemented much more
  270. // efficiently than up/downsampling by a generic ratio.
  271. // TODO(rachelbarker): Use optimized downsample-by-2 function
  272. av1_resize_plane(prev_buffer, this_height << 1, this_width << 1,
  273. prev_stride, this_buffer, this_height, this_width,
  274. this_stride);
  275. fill_border(this_buffer, this_width, this_height, this_stride);
  276. }
  277. }
  278. // Fill out a downsampling pyramid for a given frame.
  279. //
  280. // The top level (index 0) will always be an 8-bit copy of the input frame,
  281. // regardless of the input bit depth. Additional levels are then downscaled
  282. // by powers of 2.
  283. //
  284. // For small input frames, the number of levels actually constructed
  285. // will be limited so that the smallest image is at least MIN_PYRAMID_SIZE
  286. // pixels along each side.
  287. //
  288. // However, if the input frame has a side of length < MIN_PYRAMID_SIZE,
  289. // we will still construct the top level.
  290. void aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
  291. ImagePyramid *pyr) {
  292. assert(pyr);
  293. // Per the comments in the ImagePyramid struct, we must take this mutex
  294. // before reading or writing the "valid" flag, and hold it while computing
  295. // the pyramid, to ensure proper behaviour if multiple threads call this
  296. // function simultaneously
  297. #if CONFIG_MULTITHREAD
  298. pthread_mutex_lock(&pyr->mutex);
  299. #endif // CONFIG_MULTITHREAD
  300. if (!pyr->valid) {
  301. fill_pyramid(frame, bit_depth, pyr);
  302. pyr->valid = true;
  303. }
  304. // At this point, the pyramid is guaranteed to be valid, and can be safely
  305. // read from without holding the mutex any more
  306. #if CONFIG_MULTITHREAD
  307. pthread_mutex_unlock(&pyr->mutex);
  308. #endif // CONFIG_MULTITHREAD
  309. }
  310. #ifndef NDEBUG
  311. // Check if a pyramid has already been computed.
  312. // This is mostly a debug helper - as it is necessary to hold pyr->mutex
  313. // while reading the valid flag, we cannot just write:
  314. // assert(pyr->valid);
  315. // This function allows the check to be correctly written as:
  316. // assert(aom_is_pyramid_valid(pyr));
  317. bool aom_is_pyramid_valid(ImagePyramid *pyr) {
  318. assert(pyr);
  319. // Per the comments in the ImagePyramid struct, we must take this mutex
  320. // before reading or writing the "valid" flag, and hold it while computing
  321. // the pyramid, to ensure proper behaviour if multiple threads call this
  322. // function simultaneously
  323. #if CONFIG_MULTITHREAD
  324. pthread_mutex_lock(&pyr->mutex);
  325. #endif // CONFIG_MULTITHREAD
  326. bool valid = pyr->valid;
  327. #if CONFIG_MULTITHREAD
  328. pthread_mutex_unlock(&pyr->mutex);
  329. #endif // CONFIG_MULTITHREAD
  330. return valid;
  331. }
  332. #endif
  333. // Mark a pyramid as no longer containing valid data.
  334. // This must be done whenever the corresponding frame buffer is reused
  335. void aom_invalidate_pyramid(ImagePyramid *pyr) {
  336. if (pyr) {
  337. #if CONFIG_MULTITHREAD
  338. pthread_mutex_lock(&pyr->mutex);
  339. #endif // CONFIG_MULTITHREAD
  340. pyr->valid = false;
  341. #if CONFIG_MULTITHREAD
  342. pthread_mutex_unlock(&pyr->mutex);
  343. #endif // CONFIG_MULTITHREAD
  344. }
  345. }
  346. // Release the memory associated with a pyramid
  347. void aom_free_pyramid(ImagePyramid *pyr) {
  348. if (pyr) {
  349. #if CONFIG_MULTITHREAD
  350. pthread_mutex_destroy(&pyr->mutex);
  351. #endif // CONFIG_MULTITHREAD
  352. aom_free(pyr->buffer_alloc);
  353. aom_free(pyr->layers);
  354. aom_free(pyr);
  355. }
  356. }