noise_model.c 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693
  1. /*
  2. * Copyright (c) 2017, Alliance for Open Media. All rights reserved
  3. *
  4. * This source code is subject to the terms of the BSD 2 Clause License and
  5. * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  6. * was not distributed with this source code in the LICENSE file, you can
  7. * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  8. * Media Patent License 1.0 was not distributed with this source code in the
  9. * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10. */
  11. #include <math.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <string.h>
  15. #include "aom_dsp/aom_dsp_common.h"
  16. #include "aom_dsp/mathutils.h"
  17. #include "aom_dsp/noise_model.h"
  18. #include "aom_dsp/noise_util.h"
  19. #include "aom_mem/aom_mem.h"
  20. #define kLowPolyNumParams 3
  21. static const int kMaxLag = 4;
  22. // Defines a function that can be used to obtain the mean of a block for the
  23. // provided data type (uint8_t, or uint16_t)
  24. #define GET_BLOCK_MEAN(INT_TYPE, suffix) \
  25. static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
  26. int stride, int x_o, int y_o, \
  27. int block_size) { \
  28. const int max_h = AOMMIN(h - y_o, block_size); \
  29. const int max_w = AOMMIN(w - x_o, block_size); \
  30. double block_mean = 0; \
  31. for (int y = 0; y < max_h; ++y) { \
  32. for (int x = 0; x < max_w; ++x) { \
  33. block_mean += data[(y_o + y) * stride + x_o + x]; \
  34. } \
  35. } \
  36. return block_mean / (max_w * max_h); \
  37. }
  38. GET_BLOCK_MEAN(uint8_t, lowbd)
  39. GET_BLOCK_MEAN(uint16_t, highbd)
  40. static INLINE double get_block_mean(const uint8_t *data, int w, int h,
  41. int stride, int x_o, int y_o,
  42. int block_size, int use_highbd) {
  43. if (use_highbd)
  44. return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
  45. block_size);
  46. return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
  47. }
  48. // Defines a function that can be used to obtain the variance of a block
  49. // for the provided data type (uint8_t, or uint16_t)
  50. #define GET_NOISE_VAR(INT_TYPE, suffix) \
  51. static double get_noise_var_##suffix( \
  52. const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
  53. int h, int x_o, int y_o, int block_size_x, int block_size_y) { \
  54. const int max_h = AOMMIN(h - y_o, block_size_y); \
  55. const int max_w = AOMMIN(w - x_o, block_size_x); \
  56. double noise_var = 0; \
  57. double noise_mean = 0; \
  58. for (int y = 0; y < max_h; ++y) { \
  59. for (int x = 0; x < max_w; ++x) { \
  60. double noise = (double)data[(y_o + y) * stride + x_o + x] - \
  61. denoised[(y_o + y) * stride + x_o + x]; \
  62. noise_mean += noise; \
  63. noise_var += noise * noise; \
  64. } \
  65. } \
  66. noise_mean /= (max_w * max_h); \
  67. return noise_var / (max_w * max_h) - noise_mean * noise_mean; \
  68. }
  69. GET_NOISE_VAR(uint8_t, lowbd)
  70. GET_NOISE_VAR(uint16_t, highbd)
  71. static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
  72. int w, int h, int stride, int x_o, int y_o,
  73. int block_size_x, int block_size_y,
  74. int use_highbd) {
  75. if (use_highbd)
  76. return get_noise_var_highbd((const uint16_t *)data,
  77. (const uint16_t *)denoised, w, h, stride, x_o,
  78. y_o, block_size_x, block_size_y);
  79. return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
  80. block_size_x, block_size_y);
  81. }
  82. static void equation_system_clear(aom_equation_system_t *eqns) {
  83. const int n = eqns->n;
  84. memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
  85. memset(eqns->x, 0, sizeof(*eqns->x) * n);
  86. memset(eqns->b, 0, sizeof(*eqns->b) * n);
  87. }
  88. static void equation_system_copy(aom_equation_system_t *dst,
  89. const aom_equation_system_t *src) {
  90. const int n = dst->n;
  91. memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
  92. memcpy(dst->x, src->x, sizeof(*dst->x) * n);
  93. memcpy(dst->b, src->b, sizeof(*dst->b) * n);
  94. }
  95. static int equation_system_init(aom_equation_system_t *eqns, int n) {
  96. eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
  97. eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
  98. eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
  99. eqns->n = n;
  100. if (!eqns->A || !eqns->b || !eqns->x) {
  101. fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
  102. aom_free(eqns->A);
  103. aom_free(eqns->b);
  104. aom_free(eqns->x);
  105. memset(eqns, 0, sizeof(*eqns));
  106. return 0;
  107. }
  108. equation_system_clear(eqns);
  109. return 1;
  110. }
  111. static int equation_system_solve(aom_equation_system_t *eqns) {
  112. const int n = eqns->n;
  113. double *b = (double *)aom_malloc(sizeof(*b) * n);
  114. double *A = (double *)aom_malloc(sizeof(*A) * n * n);
  115. int ret = 0;
  116. if (A == NULL || b == NULL) {
  117. fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
  118. aom_free(b);
  119. aom_free(A);
  120. return 0;
  121. }
  122. memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
  123. memcpy(b, eqns->b, sizeof(*eqns->b) * n);
  124. ret = linsolve(n, A, eqns->n, b, eqns->x);
  125. aom_free(b);
  126. aom_free(A);
  127. if (ret == 0) {
  128. return 0;
  129. }
  130. return 1;
  131. }
  132. static void equation_system_add(aom_equation_system_t *dest,
  133. aom_equation_system_t *src) {
  134. const int n = dest->n;
  135. int i, j;
  136. for (i = 0; i < n; ++i) {
  137. for (j = 0; j < n; ++j) {
  138. dest->A[i * n + j] += src->A[i * n + j];
  139. }
  140. dest->b[i] += src->b[i];
  141. }
  142. }
  143. static void equation_system_free(aom_equation_system_t *eqns) {
  144. if (!eqns) return;
  145. aom_free(eqns->A);
  146. aom_free(eqns->b);
  147. aom_free(eqns->x);
  148. memset(eqns, 0, sizeof(*eqns));
  149. }
  150. static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
  151. equation_system_clear(&solver->eqns);
  152. solver->num_equations = 0;
  153. solver->total = 0;
  154. }
  155. static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
  156. aom_noise_strength_solver_t *src) {
  157. equation_system_add(&dest->eqns, &src->eqns);
  158. dest->num_equations += src->num_equations;
  159. dest->total += src->total;
  160. }
  161. // Return the number of coefficients required for the given parameters
  162. static int num_coeffs(const aom_noise_model_params_t params) {
  163. const int n = 2 * params.lag + 1;
  164. switch (params.shape) {
  165. case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
  166. case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
  167. }
  168. return 0;
  169. }
  170. static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
  171. const int kNumBins = 20;
  172. if (!equation_system_init(&state->eqns, n)) {
  173. fprintf(stderr, "Failed initialization noise state with size %d\n", n);
  174. return 0;
  175. }
  176. state->ar_gain = 1.0;
  177. state->num_observations = 0;
  178. return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
  179. bit_depth);
  180. }
  181. static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
  182. const double kTolerance = 1e-6;
  183. const int last = eqns->n - 1;
  184. // Set all of the AR coefficients to zero, but try to solve for correlation
  185. // with the luma channel
  186. memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
  187. if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
  188. eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
  189. }
  190. }
  191. int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
  192. if (!lut) return 0;
  193. if (num_points <= 0) return 0;
  194. lut->num_points = 0;
  195. lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
  196. if (!lut->points) return 0;
  197. lut->num_points = num_points;
  198. memset(lut->points, 0, sizeof(*lut->points) * num_points);
  199. return 1;
  200. }
  201. void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
  202. if (!lut) return;
  203. aom_free(lut->points);
  204. memset(lut, 0, sizeof(*lut));
  205. }
  206. double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
  207. double x) {
  208. int i = 0;
  209. // Constant extrapolation for x < x_0.
  210. if (x < lut->points[0][0]) return lut->points[0][1];
  211. for (i = 0; i < lut->num_points - 1; ++i) {
  212. if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
  213. const double a =
  214. (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
  215. return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
  216. }
  217. }
  218. // Constant extrapolation for x > x_{n-1}
  219. return lut->points[lut->num_points - 1][1];
  220. }
  221. static double noise_strength_solver_get_bin_index(
  222. const aom_noise_strength_solver_t *solver, double value) {
  223. const double val =
  224. fclamp(value, solver->min_intensity, solver->max_intensity);
  225. const double range = solver->max_intensity - solver->min_intensity;
  226. return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
  227. }
  228. static double noise_strength_solver_get_value(
  229. const aom_noise_strength_solver_t *solver, double x) {
  230. const double bin = noise_strength_solver_get_bin_index(solver, x);
  231. const int bin_i0 = (int)floor(bin);
  232. const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
  233. const double a = bin - bin_i0;
  234. return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
  235. }
  236. void aom_noise_strength_solver_add_measurement(
  237. aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
  238. const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
  239. const int bin_i0 = (int)floor(bin);
  240. const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
  241. const double a = bin - bin_i0;
  242. const int n = solver->num_bins;
  243. solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
  244. solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
  245. solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
  246. solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
  247. solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
  248. solver->eqns.b[bin_i1] += a * noise_std;
  249. solver->total += noise_std;
  250. solver->num_equations++;
  251. }
  252. int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
  253. // Add regularization proportional to the number of constraints
  254. const int n = solver->num_bins;
  255. const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
  256. int result = 0;
  257. double mean = 0;
  258. // Do this in a non-destructive manner so it is not confusing to the caller
  259. double *old_A = solver->eqns.A;
  260. double *A = (double *)aom_malloc(sizeof(*A) * n * n);
  261. if (!A) {
  262. fprintf(stderr, "Unable to allocate copy of A\n");
  263. return 0;
  264. }
  265. memcpy(A, old_A, sizeof(*A) * n * n);
  266. for (int i = 0; i < n; ++i) {
  267. const int i_lo = AOMMAX(0, i - 1);
  268. const int i_hi = AOMMIN(n - 1, i + 1);
  269. A[i * n + i_lo] -= kAlpha;
  270. A[i * n + i] += 2 * kAlpha;
  271. A[i * n + i_hi] -= kAlpha;
  272. }
  273. // Small regularization to give average noise strength
  274. mean = solver->total / solver->num_equations;
  275. for (int i = 0; i < n; ++i) {
  276. A[i * n + i] += 1.0 / 8192.;
  277. solver->eqns.b[i] += mean / 8192.;
  278. }
  279. solver->eqns.A = A;
  280. result = equation_system_solve(&solver->eqns);
  281. solver->eqns.A = old_A;
  282. aom_free(A);
  283. return result;
  284. }
  285. int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
  286. int num_bins, int bit_depth) {
  287. if (!solver) return 0;
  288. memset(solver, 0, sizeof(*solver));
  289. solver->num_bins = num_bins;
  290. solver->min_intensity = 0;
  291. solver->max_intensity = (1 << bit_depth) - 1;
  292. solver->total = 0;
  293. solver->num_equations = 0;
  294. return equation_system_init(&solver->eqns, num_bins);
  295. }
  296. void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
  297. if (!solver) return;
  298. equation_system_free(&solver->eqns);
  299. }
  300. double aom_noise_strength_solver_get_center(
  301. const aom_noise_strength_solver_t *solver, int i) {
  302. const double range = solver->max_intensity - solver->min_intensity;
  303. const int n = solver->num_bins;
  304. return ((double)i) / (n - 1) * range + solver->min_intensity;
  305. }
  306. // Computes the residual if a point were to be removed from the lut. This is
  307. // calculated as the area between the output of the solver and the line segment
  308. // that would be formed between [x_{i - 1}, x_{i + 1}).
  309. static void update_piecewise_linear_residual(
  310. const aom_noise_strength_solver_t *solver,
  311. const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
  312. const double dx = 255. / solver->num_bins;
  313. for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
  314. const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
  315. solver, lut->points[i - 1][0])));
  316. const int upper = AOMMIN(solver->num_bins - 1,
  317. (int)ceil(noise_strength_solver_get_bin_index(
  318. solver, lut->points[i + 1][0])));
  319. double r = 0;
  320. for (int j = lower; j <= upper; ++j) {
  321. const double x = aom_noise_strength_solver_get_center(solver, j);
  322. if (x < lut->points[i - 1][0]) continue;
  323. if (x >= lut->points[i + 1][0]) continue;
  324. const double y = solver->eqns.x[j];
  325. const double a = (x - lut->points[i - 1][0]) /
  326. (lut->points[i + 1][0] - lut->points[i - 1][0]);
  327. const double estimate_y =
  328. lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
  329. r += fabs(y - estimate_y);
  330. }
  331. residual[i] = r * dx;
  332. }
  333. }
  334. int aom_noise_strength_solver_fit_piecewise(
  335. const aom_noise_strength_solver_t *solver, int max_output_points,
  336. aom_noise_strength_lut_t *lut) {
  337. // The tolerance is normalized to be give consistent results between
  338. // different bit-depths.
  339. const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
  340. if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
  341. fprintf(stderr, "Failed to init lut\n");
  342. return 0;
  343. }
  344. for (int i = 0; i < solver->num_bins; ++i) {
  345. lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
  346. lut->points[i][1] = solver->eqns.x[i];
  347. }
  348. if (max_output_points < 0) {
  349. max_output_points = solver->num_bins;
  350. }
  351. double *residual = (double *)aom_malloc(solver->num_bins * sizeof(*residual));
  352. if (!residual) {
  353. aom_noise_strength_lut_free(lut);
  354. return 0;
  355. }
  356. memset(residual, 0, sizeof(*residual) * solver->num_bins);
  357. update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
  358. // Greedily remove points if there are too many or if it doesn't hurt local
  359. // approximation (never remove the end points)
  360. while (lut->num_points > 2) {
  361. int min_index = 1;
  362. for (int j = 1; j < lut->num_points - 1; ++j) {
  363. if (residual[j] < residual[min_index]) {
  364. min_index = j;
  365. }
  366. }
  367. const double dx =
  368. lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
  369. const double avg_residual = residual[min_index] / dx;
  370. if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
  371. break;
  372. }
  373. const int num_remaining = lut->num_points - min_index - 1;
  374. memmove(lut->points + min_index, lut->points + min_index + 1,
  375. sizeof(lut->points[0]) * num_remaining);
  376. lut->num_points--;
  377. update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
  378. min_index + 1);
  379. }
  380. aom_free(residual);
  381. return 1;
  382. }
  383. int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
  384. int block_size, int bit_depth, int use_highbd) {
  385. const int n = block_size * block_size;
  386. aom_equation_system_t eqns;
  387. double *AtA_inv = 0;
  388. double *A = 0;
  389. int x = 0, y = 0, i = 0, j = 0;
  390. block_finder->A = NULL;
  391. block_finder->AtA_inv = NULL;
  392. if (!equation_system_init(&eqns, kLowPolyNumParams)) {
  393. fprintf(stderr, "Failed to init equation system for block_size=%d\n",
  394. block_size);
  395. return 0;
  396. }
  397. AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
  398. sizeof(*AtA_inv));
  399. A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
  400. if (AtA_inv == NULL || A == NULL) {
  401. fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
  402. block_size);
  403. aom_free(AtA_inv);
  404. aom_free(A);
  405. equation_system_free(&eqns);
  406. return 0;
  407. }
  408. block_finder->A = A;
  409. block_finder->AtA_inv = AtA_inv;
  410. block_finder->block_size = block_size;
  411. block_finder->normalization = (1 << bit_depth) - 1;
  412. block_finder->use_highbd = use_highbd;
  413. for (y = 0; y < block_size; ++y) {
  414. const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
  415. for (x = 0; x < block_size; ++x) {
  416. const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
  417. const double coords[3] = { yd, xd, 1 };
  418. const int row = y * block_size + x;
  419. A[kLowPolyNumParams * row + 0] = yd;
  420. A[kLowPolyNumParams * row + 1] = xd;
  421. A[kLowPolyNumParams * row + 2] = 1;
  422. for (i = 0; i < kLowPolyNumParams; ++i) {
  423. for (j = 0; j < kLowPolyNumParams; ++j) {
  424. eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
  425. }
  426. }
  427. }
  428. }
  429. // Lazy inverse using existing equation solver.
  430. for (i = 0; i < kLowPolyNumParams; ++i) {
  431. memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
  432. eqns.b[i] = 1;
  433. equation_system_solve(&eqns);
  434. for (j = 0; j < kLowPolyNumParams; ++j) {
  435. AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
  436. }
  437. }
  438. equation_system_free(&eqns);
  439. return 1;
  440. }
  441. void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
  442. if (!block_finder) return;
  443. aom_free(block_finder->A);
  444. aom_free(block_finder->AtA_inv);
  445. memset(block_finder, 0, sizeof(*block_finder));
  446. }
  447. void aom_flat_block_finder_extract_block(
  448. const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
  449. int w, int h, int stride, int offsx, int offsy, double *plane,
  450. double *block) {
  451. const int block_size = block_finder->block_size;
  452. const int n = block_size * block_size;
  453. const double *A = block_finder->A;
  454. const double *AtA_inv = block_finder->AtA_inv;
  455. double plane_coords[kLowPolyNumParams];
  456. double AtA_inv_b[kLowPolyNumParams];
  457. int xi, yi, i;
  458. if (block_finder->use_highbd) {
  459. const uint16_t *const data16 = (const uint16_t *const)data;
  460. for (yi = 0; yi < block_size; ++yi) {
  461. const int y = clamp(offsy + yi, 0, h - 1);
  462. for (xi = 0; xi < block_size; ++xi) {
  463. const int x = clamp(offsx + xi, 0, w - 1);
  464. block[yi * block_size + xi] =
  465. ((double)data16[y * stride + x]) / block_finder->normalization;
  466. }
  467. }
  468. } else {
  469. for (yi = 0; yi < block_size; ++yi) {
  470. const int y = clamp(offsy + yi, 0, h - 1);
  471. for (xi = 0; xi < block_size; ++xi) {
  472. const int x = clamp(offsx + xi, 0, w - 1);
  473. block[yi * block_size + xi] =
  474. ((double)data[y * stride + x]) / block_finder->normalization;
  475. }
  476. }
  477. }
  478. multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
  479. multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
  480. kLowPolyNumParams, 1);
  481. multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
  482. for (i = 0; i < n; ++i) {
  483. block[i] -= plane[i];
  484. }
  485. }
  486. typedef struct {
  487. int index;
  488. float score;
  489. } index_and_score_t;
  490. static int compare_scores(const void *a, const void *b) {
  491. const float diff =
  492. ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
  493. if (diff < 0)
  494. return -1;
  495. else if (diff > 0)
  496. return 1;
  497. return 0;
  498. }
  499. int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
  500. const uint8_t *const data, int w, int h,
  501. int stride, uint8_t *flat_blocks) {
  502. // The gradient-based features used in this code are based on:
  503. // A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
  504. // correlation for improved video denoising," 2012 19th, ICIP.
  505. // The thresholds are more lenient to allow for correct grain modeling
  506. // if extreme cases.
  507. const int block_size = block_finder->block_size;
  508. const int n = block_size * block_size;
  509. const double kTraceThreshold = 0.15 / (32 * 32);
  510. const double kRatioThreshold = 1.25;
  511. const double kNormThreshold = 0.08 / (32 * 32);
  512. const double kVarThreshold = 0.005 / (double)n;
  513. const int num_blocks_w = (w + block_size - 1) / block_size;
  514. const int num_blocks_h = (h + block_size - 1) / block_size;
  515. int num_flat = 0;
  516. double *plane = (double *)aom_malloc(n * sizeof(*plane));
  517. double *block = (double *)aom_malloc(n * sizeof(*block));
  518. index_and_score_t *scores = (index_and_score_t *)aom_malloc(
  519. num_blocks_w * num_blocks_h * sizeof(*scores));
  520. if (plane == NULL || block == NULL || scores == NULL) {
  521. fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
  522. aom_free(plane);
  523. aom_free(block);
  524. aom_free(scores);
  525. return -1;
  526. }
  527. #ifdef NOISE_MODEL_LOG_SCORE
  528. fprintf(stderr, "score = [");
  529. #endif
  530. for (int by = 0; by < num_blocks_h; ++by) {
  531. for (int bx = 0; bx < num_blocks_w; ++bx) {
  532. // Compute gradient covariance matrix.
  533. aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
  534. bx * block_size, by * block_size,
  535. plane, block);
  536. double Gxx = 0, Gxy = 0, Gyy = 0;
  537. double mean = 0;
  538. double var = 0;
  539. for (int yi = 1; yi < block_size - 1; ++yi) {
  540. for (int xi = 1; xi < block_size - 1; ++xi) {
  541. const double gx = (block[yi * block_size + xi + 1] -
  542. block[yi * block_size + xi - 1]) /
  543. 2;
  544. const double gy = (block[yi * block_size + xi + block_size] -
  545. block[yi * block_size + xi - block_size]) /
  546. 2;
  547. Gxx += gx * gx;
  548. Gxy += gx * gy;
  549. Gyy += gy * gy;
  550. const double value = block[yi * block_size + xi];
  551. mean += value;
  552. var += value * value;
  553. }
  554. }
  555. mean /= (block_size - 2) * (block_size - 2);
  556. // Normalize gradients by block_size.
  557. Gxx /= ((block_size - 2) * (block_size - 2));
  558. Gxy /= ((block_size - 2) * (block_size - 2));
  559. Gyy /= ((block_size - 2) * (block_size - 2));
  560. var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
  561. {
  562. const double trace = Gxx + Gyy;
  563. const double det = Gxx * Gyy - Gxy * Gxy;
  564. const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
  565. const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
  566. const double norm = e1; // Spectral norm
  567. const double ratio = (e1 / AOMMAX(e2, 1e-6));
  568. const int is_flat = (trace < kTraceThreshold) &&
  569. (ratio < kRatioThreshold) &&
  570. (norm < kNormThreshold) && (var > kVarThreshold);
  571. // The following weights are used to combine the above features to give
  572. // a sigmoid score for flatness. If the input was normalized to [0,100]
  573. // the magnitude of these values would be close to 1 (e.g., weights
  574. // corresponding to variance would be a factor of 10000x smaller).
  575. // The weights are given in the following order:
  576. // [{var}, {ratio}, {trace}, {norm}, offset]
  577. // with one of the most discriminative being simply the variance.
  578. const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
  579. double sum_weights = weights[0] * var + weights[1] * ratio +
  580. weights[2] * trace + weights[3] * norm +
  581. weights[4];
  582. // clamp the value to [-25.0, 100.0] to prevent overflow
  583. sum_weights = fclamp(sum_weights, -25.0, 100.0);
  584. const float score = (float)(1.0 / (1 + exp(-sum_weights)));
  585. flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
  586. scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
  587. scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
  588. #ifdef NOISE_MODEL_LOG_SCORE
  589. fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
  590. is_flat);
  591. #endif
  592. num_flat += is_flat;
  593. }
  594. }
  595. #ifdef NOISE_MODEL_LOG_SCORE
  596. fprintf(stderr, "\n");
  597. #endif
  598. }
  599. #ifdef NOISE_MODEL_LOG_SCORE
  600. fprintf(stderr, "];\n");
  601. #endif
  602. // Find the top-scored blocks (most likely to be flat) and set the flat blocks
  603. // be the union of the thresholded results and the top 10th percentile of the
  604. // scored results.
  605. qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
  606. const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
  607. const float score_threshold = scores[top_nth_percentile].score;
  608. for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
  609. if (scores[i].score >= score_threshold) {
  610. num_flat += flat_blocks[scores[i].index] == 0;
  611. flat_blocks[scores[i].index] |= 1;
  612. }
  613. }
  614. aom_free(block);
  615. aom_free(plane);
  616. aom_free(scores);
  617. return num_flat;
  618. }
  619. int aom_noise_model_init(aom_noise_model_t *model,
  620. const aom_noise_model_params_t params) {
  621. const int n = num_coeffs(params);
  622. const int lag = params.lag;
  623. const int bit_depth = params.bit_depth;
  624. int x = 0, y = 0, i = 0, c = 0;
  625. memset(model, 0, sizeof(*model));
  626. if (params.lag < 1) {
  627. fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
  628. return 0;
  629. }
  630. if (params.lag > kMaxLag) {
  631. fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
  632. kMaxLag);
  633. return 0;
  634. }
  635. if (!(params.bit_depth == 8 || params.bit_depth == 10 ||
  636. params.bit_depth == 12)) {
  637. return 0;
  638. }
  639. memcpy(&model->params, &params, sizeof(params));
  640. for (c = 0; c < 3; ++c) {
  641. if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
  642. fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
  643. aom_noise_model_free(model);
  644. return 0;
  645. }
  646. if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
  647. fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
  648. aom_noise_model_free(model);
  649. return 0;
  650. }
  651. }
  652. model->n = n;
  653. model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
  654. if (!model->coords) {
  655. aom_noise_model_free(model);
  656. return 0;
  657. }
  658. for (y = -lag; y <= 0; ++y) {
  659. const int max_x = y == 0 ? -1 : lag;
  660. for (x = -lag; x <= max_x; ++x) {
  661. switch (params.shape) {
  662. case AOM_NOISE_SHAPE_DIAMOND:
  663. if (abs(x) <= y + lag) {
  664. model->coords[i][0] = x;
  665. model->coords[i][1] = y;
  666. ++i;
  667. }
  668. break;
  669. case AOM_NOISE_SHAPE_SQUARE:
  670. model->coords[i][0] = x;
  671. model->coords[i][1] = y;
  672. ++i;
  673. break;
  674. default:
  675. fprintf(stderr, "Invalid shape\n");
  676. aom_noise_model_free(model);
  677. return 0;
  678. }
  679. }
  680. }
  681. assert(i == n);
  682. return 1;
  683. }
  684. void aom_noise_model_free(aom_noise_model_t *model) {
  685. int c = 0;
  686. if (!model) return;
  687. aom_free(model->coords);
  688. for (c = 0; c < 3; ++c) {
  689. equation_system_free(&model->latest_state[c].eqns);
  690. equation_system_free(&model->combined_state[c].eqns);
  691. equation_system_free(&model->latest_state[c].strength_solver.eqns);
  692. equation_system_free(&model->combined_state[c].strength_solver.eqns);
  693. }
  694. memset(model, 0, sizeof(*model));
  695. }
  696. // Extracts the neighborhood defined by coords around point (x, y) from
  697. // the difference between the data and denoised images. Also extracts the
  698. // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
  699. #define EXTRACT_AR_ROW(INT_TYPE, suffix) \
  700. static double extract_ar_row_##suffix( \
  701. int(*coords)[2], int num_coords, const INT_TYPE *const data, \
  702. const INT_TYPE *const denoised, int stride, int sub_log2[2], \
  703. const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised, \
  704. int alt_stride, int x, int y, double *buffer) { \
  705. for (int i = 0; i < num_coords; ++i) { \
  706. const int x_i = x + coords[i][0], y_i = y + coords[i][1]; \
  707. buffer[i] = \
  708. (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
  709. } \
  710. const double val = \
  711. (double)data[y * stride + x] - denoised[y * stride + x]; \
  712. \
  713. if (alt_data && alt_denoised) { \
  714. double avg_data = 0, avg_denoised = 0; \
  715. int num_samples = 0; \
  716. for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) { \
  717. const int y_up = (y << sub_log2[1]) + dy_i; \
  718. for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) { \
  719. const int x_up = (x << sub_log2[0]) + dx_i; \
  720. avg_data += alt_data[y_up * alt_stride + x_up]; \
  721. avg_denoised += alt_denoised[y_up * alt_stride + x_up]; \
  722. num_samples++; \
  723. } \
  724. } \
  725. buffer[num_coords] = (avg_data - avg_denoised) / num_samples; \
  726. } \
  727. return val; \
  728. }
  729. EXTRACT_AR_ROW(uint8_t, lowbd)
  730. EXTRACT_AR_ROW(uint16_t, highbd)
  731. static int add_block_observations(
  732. aom_noise_model_t *noise_model, int c, const uint8_t *const data,
  733. const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
  734. const uint8_t *const alt_data, const uint8_t *const alt_denoised,
  735. int alt_stride, const uint8_t *const flat_blocks, int block_size,
  736. int num_blocks_w, int num_blocks_h) {
  737. const int lag = noise_model->params.lag;
  738. const int num_coords = noise_model->n;
  739. const double normalization = (1 << noise_model->params.bit_depth) - 1;
  740. double *A = noise_model->latest_state[c].eqns.A;
  741. double *b = noise_model->latest_state[c].eqns.b;
  742. double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
  743. const int n = noise_model->latest_state[c].eqns.n;
  744. if (!buffer) {
  745. fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
  746. return 0;
  747. }
  748. for (int by = 0; by < num_blocks_h; ++by) {
  749. const int y_o = by * (block_size >> sub_log2[1]);
  750. for (int bx = 0; bx < num_blocks_w; ++bx) {
  751. const int x_o = bx * (block_size >> sub_log2[0]);
  752. if (!flat_blocks[by * num_blocks_w + bx]) {
  753. continue;
  754. }
  755. int y_start =
  756. (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
  757. int x_start =
  758. (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
  759. int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
  760. block_size >> sub_log2[1]);
  761. int x_end = AOMMIN(
  762. (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
  763. (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
  764. ? (block_size >> sub_log2[0])
  765. : ((block_size >> sub_log2[0]) - lag));
  766. for (int y = y_start; y < y_end; ++y) {
  767. for (int x = x_start; x < x_end; ++x) {
  768. const double val =
  769. noise_model->params.use_highbd
  770. ? extract_ar_row_highbd(noise_model->coords, num_coords,
  771. (const uint16_t *const)data,
  772. (const uint16_t *const)denoised,
  773. stride, sub_log2,
  774. (const uint16_t *const)alt_data,
  775. (const uint16_t *const)alt_denoised,
  776. alt_stride, x + x_o, y + y_o, buffer)
  777. : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
  778. denoised, stride, sub_log2, alt_data,
  779. alt_denoised, alt_stride, x + x_o,
  780. y + y_o, buffer);
  781. for (int i = 0; i < n; ++i) {
  782. for (int j = 0; j < n; ++j) {
  783. A[i * n + j] +=
  784. (buffer[i] * buffer[j]) / (normalization * normalization);
  785. }
  786. b[i] += (buffer[i] * val) / (normalization * normalization);
  787. }
  788. noise_model->latest_state[c].num_observations++;
  789. }
  790. }
  791. }
  792. }
  793. aom_free(buffer);
  794. return 1;
  795. }
  796. static void add_noise_std_observations(
  797. aom_noise_model_t *noise_model, int c, const double *coeffs,
  798. const uint8_t *const data, const uint8_t *const denoised, int w, int h,
  799. int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
  800. const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
  801. int num_blocks_h) {
  802. const int num_coords = noise_model->n;
  803. aom_noise_strength_solver_t *noise_strength_solver =
  804. &noise_model->latest_state[c].strength_solver;
  805. const aom_noise_strength_solver_t *noise_strength_luma =
  806. &noise_model->latest_state[0].strength_solver;
  807. const double luma_gain = noise_model->latest_state[0].ar_gain;
  808. const double noise_gain = noise_model->latest_state[c].ar_gain;
  809. for (int by = 0; by < num_blocks_h; ++by) {
  810. const int y_o = by * (block_size >> sub_log2[1]);
  811. for (int bx = 0; bx < num_blocks_w; ++bx) {
  812. const int x_o = bx * (block_size >> sub_log2[0]);
  813. if (!flat_blocks[by * num_blocks_w + bx]) {
  814. continue;
  815. }
  816. const int num_samples_h =
  817. AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
  818. block_size >> sub_log2[1]);
  819. const int num_samples_w =
  820. AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
  821. (block_size >> sub_log2[0]));
  822. // Make sure that we have a reasonable amount of samples to consider the
  823. // block
  824. if (num_samples_w * num_samples_h > block_size) {
  825. const double block_mean = get_block_mean(
  826. alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
  827. x_o << sub_log2[0], y_o << sub_log2[1], block_size,
  828. noise_model->params.use_highbd);
  829. const double noise_var = get_noise_var(
  830. data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
  831. y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
  832. noise_model->params.use_highbd);
  833. // We want to remove the part of the noise that came from being
  834. // correlated with luma. Note that the noise solver for luma must
  835. // have already been run.
  836. const double luma_strength =
  837. c > 0 ? luma_gain * noise_strength_solver_get_value(
  838. noise_strength_luma, block_mean)
  839. : 0;
  840. const double corr = c > 0 ? coeffs[num_coords] : 0;
  841. // Chroma noise:
  842. // N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
  843. // The uncorrelated component:
  844. // uncorr_var = noise_var - (corr * luma_strength)^2
  845. // But don't allow fully correlated noise (hence the max), since the
  846. // synthesis cannot model it.
  847. const double uncorr_std = sqrt(
  848. AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
  849. // After we've removed correlation with luma, undo the gain that will
  850. // come from running the IIR filter.
  851. const double adjusted_strength = uncorr_std / noise_gain;
  852. aom_noise_strength_solver_add_measurement(
  853. noise_strength_solver, block_mean, adjusted_strength);
  854. }
  855. }
  856. }
  857. }
  858. // Return true if the noise estimate appears to be different from the combined
  859. // (multi-frame) estimate. The difference is measured by checking whether the
  860. // AR coefficients have diverged (using a threshold on normalized cross
  861. // correlation), or whether the noise strength has changed.
  862. static int is_noise_model_different(aom_noise_model_t *const noise_model) {
  863. // These thresholds are kind of arbitrary and will likely need further tuning
  864. // (or exported as parameters). The threshold on noise strength is a weighted
  865. // difference between the noise strength histograms
  866. const double kCoeffThreshold = 0.9;
  867. const double kStrengthThreshold =
  868. 0.005 * (1 << (noise_model->params.bit_depth - 8));
  869. for (int c = 0; c < 1; ++c) {
  870. const double corr =
  871. aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
  872. noise_model->combined_state[c].eqns.x,
  873. noise_model->combined_state[c].eqns.n);
  874. if (corr < kCoeffThreshold) return 1;
  875. const double dx =
  876. 1.0 / noise_model->latest_state[c].strength_solver.num_bins;
  877. const aom_equation_system_t *latest_eqns =
  878. &noise_model->latest_state[c].strength_solver.eqns;
  879. const aom_equation_system_t *combined_eqns =
  880. &noise_model->combined_state[c].strength_solver.eqns;
  881. double diff = 0;
  882. double total_weight = 0;
  883. for (int j = 0; j < latest_eqns->n; ++j) {
  884. double weight = 0;
  885. for (int i = 0; i < latest_eqns->n; ++i) {
  886. weight += latest_eqns->A[i * latest_eqns->n + j];
  887. }
  888. weight = sqrt(weight);
  889. diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
  890. total_weight += weight;
  891. }
  892. if (diff * dx / total_weight > kStrengthThreshold) return 1;
  893. }
  894. return 0;
  895. }
  896. static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
  897. const int ret = equation_system_solve(&state->eqns);
  898. state->ar_gain = 1.0;
  899. if (!ret) return ret;
  900. // Update the AR gain from the equation system as it will be used to fit
  901. // the noise strength as a function of intensity. In the Yule-Walker
  902. // equations, the diagonal should be the variance of the correlated noise.
  903. // In the case of the least squares estimate, there will be some variability
  904. // in the diagonal. So use the mean of the diagonal as the estimate of
  905. // overall variance (this works for least squares or Yule-Walker formulation).
  906. double var = 0;
  907. const int n = state->eqns.n;
  908. for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
  909. var += state->eqns.A[i * n + i] / state->num_observations;
  910. }
  911. var /= (n - is_chroma);
  912. // Keep track of E(Y^2) = <b, x> + E(X^2)
  913. // In the case that we are using chroma and have an estimate of correlation
  914. // with luma we adjust that estimate slightly to remove the correlated bits by
  915. // subtracting out the last column of a scaled by our correlation estimate
  916. // from b. E(y^2) = <b - A(:, end)*x(end), x>
  917. double sum_covar = 0;
  918. for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
  919. double bi = state->eqns.b[i];
  920. if (is_chroma) {
  921. bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
  922. }
  923. sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
  924. }
  925. // Now, get an estimate of the variance of uncorrelated noise signal and use
  926. // it to determine the gain of the AR filter.
  927. const double noise_var = AOMMAX(var - sum_covar, 1e-6);
  928. state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
  929. return ret;
  930. }
  931. aom_noise_status_t aom_noise_model_update(
  932. aom_noise_model_t *const noise_model, const uint8_t *const data[3],
  933. const uint8_t *const denoised[3], int w, int h, int stride[3],
  934. int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
  935. const int num_blocks_w = (w + block_size - 1) / block_size;
  936. const int num_blocks_h = (h + block_size - 1) / block_size;
  937. int y_model_different = 0;
  938. int num_blocks = 0;
  939. int i = 0, channel = 0;
  940. if (block_size <= 1) {
  941. fprintf(stderr, "block_size = %d must be > 1\n", block_size);
  942. return AOM_NOISE_STATUS_INVALID_ARGUMENT;
  943. }
  944. if (block_size < noise_model->params.lag * 2 + 1) {
  945. fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
  946. noise_model->params.lag * 2 + 1);
  947. return AOM_NOISE_STATUS_INVALID_ARGUMENT;
  948. }
  949. // Clear the latest equation system
  950. for (i = 0; i < 3; ++i) {
  951. equation_system_clear(&noise_model->latest_state[i].eqns);
  952. noise_model->latest_state[i].num_observations = 0;
  953. noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
  954. }
  955. // Check that we have enough flat blocks
  956. for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
  957. if (flat_blocks[i]) {
  958. num_blocks++;
  959. }
  960. }
  961. if (num_blocks <= 1) {
  962. fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
  963. return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
  964. }
  965. for (channel = 0; channel < 3; ++channel) {
  966. int no_subsampling[2] = { 0, 0 };
  967. const uint8_t *alt_data = channel > 0 ? data[0] : 0;
  968. const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
  969. int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
  970. const int is_chroma = channel != 0;
  971. if (!data[channel] || !denoised[channel]) break;
  972. if (!add_block_observations(noise_model, channel, data[channel],
  973. denoised[channel], w, h, stride[channel], sub,
  974. alt_data, alt_denoised, stride[0], flat_blocks,
  975. block_size, num_blocks_w, num_blocks_h)) {
  976. fprintf(stderr, "Adding block observation failed\n");
  977. return AOM_NOISE_STATUS_INTERNAL_ERROR;
  978. }
  979. if (!ar_equation_system_solve(&noise_model->latest_state[channel],
  980. is_chroma)) {
  981. if (is_chroma) {
  982. set_chroma_coefficient_fallback_soln(
  983. &noise_model->latest_state[channel].eqns);
  984. } else {
  985. fprintf(stderr, "Solving latest noise equation system failed %d!\n",
  986. channel);
  987. return AOM_NOISE_STATUS_INTERNAL_ERROR;
  988. }
  989. }
  990. add_noise_std_observations(
  991. noise_model, channel, noise_model->latest_state[channel].eqns.x,
  992. data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
  993. stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
  994. if (!aom_noise_strength_solver_solve(
  995. &noise_model->latest_state[channel].strength_solver)) {
  996. fprintf(stderr, "Solving latest noise strength failed!\n");
  997. return AOM_NOISE_STATUS_INTERNAL_ERROR;
  998. }
  999. // Check noise characteristics and return if error.
  1000. if (channel == 0 &&
  1001. noise_model->combined_state[channel].strength_solver.num_equations >
  1002. 0 &&
  1003. is_noise_model_different(noise_model)) {
  1004. y_model_different = 1;
  1005. }
  1006. // Don't update the combined stats if the y model is different.
  1007. if (y_model_different) continue;
  1008. noise_model->combined_state[channel].num_observations +=
  1009. noise_model->latest_state[channel].num_observations;
  1010. equation_system_add(&noise_model->combined_state[channel].eqns,
  1011. &noise_model->latest_state[channel].eqns);
  1012. if (!ar_equation_system_solve(&noise_model->combined_state[channel],
  1013. is_chroma)) {
  1014. if (is_chroma) {
  1015. set_chroma_coefficient_fallback_soln(
  1016. &noise_model->combined_state[channel].eqns);
  1017. } else {
  1018. fprintf(stderr, "Solving combined noise equation system failed %d!\n",
  1019. channel);
  1020. return AOM_NOISE_STATUS_INTERNAL_ERROR;
  1021. }
  1022. }
  1023. noise_strength_solver_add(
  1024. &noise_model->combined_state[channel].strength_solver,
  1025. &noise_model->latest_state[channel].strength_solver);
  1026. if (!aom_noise_strength_solver_solve(
  1027. &noise_model->combined_state[channel].strength_solver)) {
  1028. fprintf(stderr, "Solving combined noise strength failed!\n");
  1029. return AOM_NOISE_STATUS_INTERNAL_ERROR;
  1030. }
  1031. }
  1032. return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
  1033. : AOM_NOISE_STATUS_OK;
  1034. }
  1035. void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
  1036. for (int c = 0; c < 3; c++) {
  1037. equation_system_copy(&noise_model->combined_state[c].eqns,
  1038. &noise_model->latest_state[c].eqns);
  1039. equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
  1040. &noise_model->latest_state[c].strength_solver.eqns);
  1041. noise_model->combined_state[c].strength_solver.num_equations =
  1042. noise_model->latest_state[c].strength_solver.num_equations;
  1043. noise_model->combined_state[c].num_observations =
  1044. noise_model->latest_state[c].num_observations;
  1045. noise_model->combined_state[c].ar_gain =
  1046. noise_model->latest_state[c].ar_gain;
  1047. }
  1048. }
  1049. int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
  1050. aom_film_grain_t *film_grain) {
  1051. if (noise_model->params.lag > 3) {
  1052. fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
  1053. return 0;
  1054. }
  1055. uint16_t random_seed = film_grain->random_seed;
  1056. memset(film_grain, 0, sizeof(*film_grain));
  1057. film_grain->random_seed = random_seed;
  1058. film_grain->apply_grain = 1;
  1059. film_grain->update_parameters = 1;
  1060. film_grain->ar_coeff_lag = noise_model->params.lag;
  1061. // Convert the scaling functions to 8 bit values
  1062. aom_noise_strength_lut_t scaling_points[3];
  1063. if (!aom_noise_strength_solver_fit_piecewise(
  1064. &noise_model->combined_state[0].strength_solver, 14,
  1065. scaling_points + 0)) {
  1066. return 0;
  1067. }
  1068. if (!aom_noise_strength_solver_fit_piecewise(
  1069. &noise_model->combined_state[1].strength_solver, 10,
  1070. scaling_points + 1)) {
  1071. aom_noise_strength_lut_free(scaling_points + 0);
  1072. return 0;
  1073. }
  1074. if (!aom_noise_strength_solver_fit_piecewise(
  1075. &noise_model->combined_state[2].strength_solver, 10,
  1076. scaling_points + 2)) {
  1077. aom_noise_strength_lut_free(scaling_points + 0);
  1078. aom_noise_strength_lut_free(scaling_points + 1);
  1079. return 0;
  1080. }
  1081. // Both the domain and the range of the scaling functions in the film_grain
  1082. // are normalized to 8-bit (e.g., they are implicitly scaled during grain
  1083. // synthesis).
  1084. const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
  1085. double max_scaling_value = 1e-4;
  1086. for (int c = 0; c < 3; ++c) {
  1087. for (int i = 0; i < scaling_points[c].num_points; ++i) {
  1088. scaling_points[c].points[i][0] =
  1089. AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
  1090. scaling_points[c].points[i][1] =
  1091. AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
  1092. max_scaling_value =
  1093. AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
  1094. }
  1095. }
  1096. // Scaling_shift values are in the range [8,11]
  1097. const int max_scaling_value_log2 =
  1098. clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
  1099. film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
  1100. const double scale_factor = 1 << (8 - max_scaling_value_log2);
  1101. film_grain->num_y_points = scaling_points[0].num_points;
  1102. film_grain->num_cb_points = scaling_points[1].num_points;
  1103. film_grain->num_cr_points = scaling_points[2].num_points;
  1104. int(*film_grain_scaling[3])[2] = {
  1105. film_grain->scaling_points_y,
  1106. film_grain->scaling_points_cb,
  1107. film_grain->scaling_points_cr,
  1108. };
  1109. for (int c = 0; c < 3; c++) {
  1110. for (int i = 0; i < scaling_points[c].num_points; ++i) {
  1111. film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
  1112. film_grain_scaling[c][i][1] = clamp(
  1113. (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
  1114. }
  1115. }
  1116. aom_noise_strength_lut_free(scaling_points + 0);
  1117. aom_noise_strength_lut_free(scaling_points + 1);
  1118. aom_noise_strength_lut_free(scaling_points + 2);
  1119. // Convert the ar_coeffs into 8-bit values
  1120. const int n_coeff = noise_model->combined_state[0].eqns.n;
  1121. double max_coeff = 1e-4, min_coeff = -1e-4;
  1122. double y_corr[2] = { 0, 0 };
  1123. double avg_luma_strength = 0;
  1124. for (int c = 0; c < 3; c++) {
  1125. aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
  1126. for (int i = 0; i < n_coeff; ++i) {
  1127. max_coeff = AOMMAX(max_coeff, eqns->x[i]);
  1128. min_coeff = AOMMIN(min_coeff, eqns->x[i]);
  1129. }
  1130. // Since the correlation between luma/chroma was computed in an already
  1131. // scaled space, we adjust it in the un-scaled space.
  1132. aom_noise_strength_solver_t *solver =
  1133. &noise_model->combined_state[c].strength_solver;
  1134. // Compute a weighted average of the strength for the channel.
  1135. double average_strength = 0, total_weight = 0;
  1136. for (int i = 0; i < solver->eqns.n; ++i) {
  1137. double w = 0;
  1138. for (int j = 0; j < solver->eqns.n; ++j) {
  1139. w += solver->eqns.A[i * solver->eqns.n + j];
  1140. }
  1141. w = sqrt(w);
  1142. average_strength += solver->eqns.x[i] * w;
  1143. total_weight += w;
  1144. }
  1145. if (total_weight == 0)
  1146. average_strength = 1;
  1147. else
  1148. average_strength /= total_weight;
  1149. if (c == 0) {
  1150. avg_luma_strength = average_strength;
  1151. } else {
  1152. y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
  1153. max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
  1154. min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
  1155. }
  1156. }
  1157. // Shift value: AR coeffs range (values 6-9)
  1158. // 6: [-2, 2), 7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
  1159. film_grain->ar_coeff_shift =
  1160. clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
  1161. 6, 9);
  1162. double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
  1163. int *ar_coeffs[3] = {
  1164. film_grain->ar_coeffs_y,
  1165. film_grain->ar_coeffs_cb,
  1166. film_grain->ar_coeffs_cr,
  1167. };
  1168. for (int c = 0; c < 3; ++c) {
  1169. aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
  1170. for (int i = 0; i < n_coeff; ++i) {
  1171. ar_coeffs[c][i] =
  1172. clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
  1173. }
  1174. if (c > 0) {
  1175. ar_coeffs[c][n_coeff] =
  1176. clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
  1177. }
  1178. }
  1179. // At the moment, the noise modeling code assumes that the chroma scaling
  1180. // functions are a function of luma.
  1181. film_grain->cb_mult = 128; // 8 bits
  1182. film_grain->cb_luma_mult = 192; // 8 bits
  1183. film_grain->cb_offset = 256; // 9 bits
  1184. film_grain->cr_mult = 128; // 8 bits
  1185. film_grain->cr_luma_mult = 192; // 8 bits
  1186. film_grain->cr_offset = 256; // 9 bits
  1187. film_grain->chroma_scaling_from_luma = 0;
  1188. film_grain->grain_scale_shift = 0;
  1189. film_grain->overlap_flag = 1;
  1190. return 1;
  1191. }
  1192. static void pointwise_multiply(const float *a, float *b, int n) {
  1193. for (int i = 0; i < n; ++i) {
  1194. b[i] *= a[i];
  1195. }
  1196. }
  1197. static float *get_half_cos_window(int block_size) {
  1198. float *window_function =
  1199. (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
  1200. if (!window_function) return NULL;
  1201. for (int y = 0; y < block_size; ++y) {
  1202. const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
  1203. for (int x = 0; x < block_size; ++x) {
  1204. const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
  1205. window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
  1206. }
  1207. }
  1208. return window_function;
  1209. }
  1210. #define DITHER_AND_QUANTIZE(INT_TYPE, suffix) \
  1211. static void dither_and_quantize_##suffix( \
  1212. float *result, int result_stride, INT_TYPE *denoised, int w, int h, \
  1213. int stride, int chroma_sub_w, int chroma_sub_h, int block_size, \
  1214. float block_normalization) { \
  1215. for (int y = 0; y < (h >> chroma_sub_h); ++y) { \
  1216. for (int x = 0; x < (w >> chroma_sub_w); ++x) { \
  1217. const int result_idx = \
  1218. (y + (block_size >> chroma_sub_h)) * result_stride + x + \
  1219. (block_size >> chroma_sub_w); \
  1220. INT_TYPE new_val = (INT_TYPE)AOMMIN( \
  1221. AOMMAX(result[result_idx] * block_normalization + 0.5f, 0), \
  1222. block_normalization); \
  1223. const float err = \
  1224. -(((float)new_val) / block_normalization - result[result_idx]); \
  1225. denoised[y * stride + x] = new_val; \
  1226. if (x + 1 < (w >> chroma_sub_w)) { \
  1227. result[result_idx + 1] += err * 7.0f / 16.0f; \
  1228. } \
  1229. if (y + 1 < (h >> chroma_sub_h)) { \
  1230. if (x > 0) { \
  1231. result[result_idx + result_stride - 1] += err * 3.0f / 16.0f; \
  1232. } \
  1233. result[result_idx + result_stride] += err * 5.0f / 16.0f; \
  1234. if (x + 1 < (w >> chroma_sub_w)) { \
  1235. result[result_idx + result_stride + 1] += err * 1.0f / 16.0f; \
  1236. } \
  1237. } \
  1238. } \
  1239. } \
  1240. }
  1241. DITHER_AND_QUANTIZE(uint8_t, lowbd)
  1242. DITHER_AND_QUANTIZE(uint16_t, highbd)
  1243. int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
  1244. int w, int h, int stride[3], int chroma_sub[2],
  1245. float *noise_psd[3], int block_size, int bit_depth,
  1246. int use_highbd) {
  1247. float *plane = NULL, *block = NULL, *window_full = NULL,
  1248. *window_chroma = NULL;
  1249. double *block_d = NULL, *plane_d = NULL;
  1250. struct aom_noise_tx_t *tx_full = NULL;
  1251. struct aom_noise_tx_t *tx_chroma = NULL;
  1252. const int num_blocks_w = (w + block_size - 1) / block_size;
  1253. const int num_blocks_h = (h + block_size - 1) / block_size;
  1254. const int result_stride = (num_blocks_w + 2) * block_size;
  1255. const int result_height = (num_blocks_h + 2) * block_size;
  1256. float *result = NULL;
  1257. int init_success = 1;
  1258. aom_flat_block_finder_t block_finder_full;
  1259. aom_flat_block_finder_t block_finder_chroma;
  1260. const float kBlockNormalization = (float)((1 << bit_depth) - 1);
  1261. if (chroma_sub[0] != chroma_sub[1]) {
  1262. fprintf(stderr,
  1263. "aom_wiener_denoise_2d doesn't handle different chroma "
  1264. "subsampling\n");
  1265. return 0;
  1266. }
  1267. init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
  1268. bit_depth, use_highbd);
  1269. result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
  1270. sizeof(*result));
  1271. plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
  1272. block =
  1273. (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
  1274. block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
  1275. plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
  1276. window_full = get_half_cos_window(block_size);
  1277. tx_full = aom_noise_tx_malloc(block_size);
  1278. if (chroma_sub[0] != 0) {
  1279. init_success &= aom_flat_block_finder_init(&block_finder_chroma,
  1280. block_size >> chroma_sub[0],
  1281. bit_depth, use_highbd);
  1282. window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
  1283. tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
  1284. } else {
  1285. window_chroma = window_full;
  1286. tx_chroma = tx_full;
  1287. }
  1288. init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
  1289. (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
  1290. (window_full != NULL) && (window_chroma != NULL) &&
  1291. (result != NULL);
  1292. for (int c = init_success ? 0 : 3; c < 3; ++c) {
  1293. float *window_function = c == 0 ? window_full : window_chroma;
  1294. aom_flat_block_finder_t *block_finder = &block_finder_full;
  1295. const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
  1296. const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
  1297. struct aom_noise_tx_t *tx =
  1298. (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
  1299. if (!data[c] || !denoised[c]) continue;
  1300. if (c > 0 && chroma_sub[0] != 0) {
  1301. block_finder = &block_finder_chroma;
  1302. }
  1303. memset(result, 0, sizeof(*result) * result_stride * result_height);
  1304. // Do overlapped block processing (half overlapped). The block rows can
  1305. // easily be done in parallel
  1306. for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
  1307. offsy += (block_size >> chroma_sub_h) / 2) {
  1308. for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
  1309. offsx += (block_size >> chroma_sub_w) / 2) {
  1310. // Pad the boundary when processing each block-set.
  1311. for (int by = -1; by < num_blocks_h; ++by) {
  1312. for (int bx = -1; bx < num_blocks_w; ++bx) {
  1313. const int pixels_per_block =
  1314. (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
  1315. aom_flat_block_finder_extract_block(
  1316. block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
  1317. stride[c], bx * (block_size >> chroma_sub_w) + offsx,
  1318. by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
  1319. for (int j = 0; j < pixels_per_block; ++j) {
  1320. block[j] = (float)block_d[j];
  1321. plane[j] = (float)plane_d[j];
  1322. }
  1323. pointwise_multiply(window_function, block, pixels_per_block);
  1324. aom_noise_tx_forward(tx, block);
  1325. aom_noise_tx_filter(tx, noise_psd[c]);
  1326. aom_noise_tx_inverse(tx, block);
  1327. // Apply window function to the plane approximation (we will apply
  1328. // it to the sum of plane + block when composing the results).
  1329. pointwise_multiply(window_function, plane, pixels_per_block);
  1330. for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
  1331. const int y_result =
  1332. y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
  1333. for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
  1334. const int x_result =
  1335. x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
  1336. result[y_result * result_stride + x_result] +=
  1337. (block[y * (block_size >> chroma_sub_w) + x] +
  1338. plane[y * (block_size >> chroma_sub_w) + x]) *
  1339. window_function[y * (block_size >> chroma_sub_w) + x];
  1340. }
  1341. }
  1342. }
  1343. }
  1344. }
  1345. }
  1346. if (use_highbd) {
  1347. dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
  1348. w, h, stride[c], chroma_sub_w, chroma_sub_h,
  1349. block_size, kBlockNormalization);
  1350. } else {
  1351. dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
  1352. stride[c], chroma_sub_w, chroma_sub_h,
  1353. block_size, kBlockNormalization);
  1354. }
  1355. }
  1356. aom_free(result);
  1357. aom_free(plane);
  1358. aom_free(block);
  1359. aom_free(plane_d);
  1360. aom_free(block_d);
  1361. aom_free(window_full);
  1362. aom_noise_tx_free(tx_full);
  1363. aom_flat_block_finder_free(&block_finder_full);
  1364. if (chroma_sub[0] != 0) {
  1365. aom_flat_block_finder_free(&block_finder_chroma);
  1366. aom_free(window_chroma);
  1367. aom_noise_tx_free(tx_chroma);
  1368. }
  1369. return init_success;
  1370. }
  1371. struct aom_denoise_and_model_t {
  1372. int block_size;
  1373. int bit_depth;
  1374. float noise_level;
  1375. // Size of current denoised buffer and flat_block buffer
  1376. int width;
  1377. int height;
  1378. int y_stride;
  1379. int uv_stride;
  1380. int num_blocks_w;
  1381. int num_blocks_h;
  1382. // Buffers for image and noise_psd allocated on the fly
  1383. float *noise_psd[3];
  1384. uint8_t *denoised[3];
  1385. uint8_t *flat_blocks;
  1386. aom_flat_block_finder_t flat_block_finder;
  1387. aom_noise_model_t noise_model;
  1388. };
  1389. struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
  1390. int block_size,
  1391. float noise_level) {
  1392. struct aom_denoise_and_model_t *ctx =
  1393. (struct aom_denoise_and_model_t *)aom_malloc(
  1394. sizeof(struct aom_denoise_and_model_t));
  1395. if (!ctx) {
  1396. fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
  1397. return NULL;
  1398. }
  1399. memset(ctx, 0, sizeof(*ctx));
  1400. ctx->block_size = block_size;
  1401. ctx->noise_level = noise_level;
  1402. ctx->bit_depth = bit_depth;
  1403. ctx->noise_psd[0] =
  1404. (float *)aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
  1405. ctx->noise_psd[1] =
  1406. (float *)aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
  1407. ctx->noise_psd[2] =
  1408. (float *)aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
  1409. if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
  1410. fprintf(stderr, "Unable to allocate noise PSD buffers\n");
  1411. aom_denoise_and_model_free(ctx);
  1412. return NULL;
  1413. }
  1414. return ctx;
  1415. }
  1416. void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
  1417. aom_free(ctx->flat_blocks);
  1418. for (int i = 0; i < 3; ++i) {
  1419. aom_free(ctx->denoised[i]);
  1420. aom_free(ctx->noise_psd[i]);
  1421. }
  1422. aom_noise_model_free(&ctx->noise_model);
  1423. aom_flat_block_finder_free(&ctx->flat_block_finder);
  1424. aom_free(ctx);
  1425. }
  1426. static int denoise_and_model_realloc_if_necessary(
  1427. struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
  1428. if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
  1429. ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
  1430. return 1;
  1431. const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
  1432. const int block_size = ctx->block_size;
  1433. ctx->width = sd->y_width;
  1434. ctx->height = sd->y_height;
  1435. ctx->y_stride = sd->y_stride;
  1436. ctx->uv_stride = sd->uv_stride;
  1437. for (int i = 0; i < 3; ++i) {
  1438. aom_free(ctx->denoised[i]);
  1439. ctx->denoised[i] = NULL;
  1440. }
  1441. aom_free(ctx->flat_blocks);
  1442. ctx->flat_blocks = NULL;
  1443. ctx->denoised[0] =
  1444. (uint8_t *)aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
  1445. ctx->denoised[1] =
  1446. (uint8_t *)aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
  1447. ctx->denoised[2] =
  1448. (uint8_t *)aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
  1449. if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
  1450. fprintf(stderr, "Unable to allocate denoise buffers\n");
  1451. return 0;
  1452. }
  1453. ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
  1454. ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
  1455. ctx->flat_blocks =
  1456. (uint8_t *)aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
  1457. if (!ctx->flat_blocks) {
  1458. fprintf(stderr, "Unable to allocate flat_blocks buffer\n");
  1459. return 0;
  1460. }
  1461. aom_flat_block_finder_free(&ctx->flat_block_finder);
  1462. if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
  1463. ctx->bit_depth, use_highbd)) {
  1464. fprintf(stderr, "Unable to init flat block finder\n");
  1465. return 0;
  1466. }
  1467. const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
  1468. ctx->bit_depth, use_highbd };
  1469. aom_noise_model_free(&ctx->noise_model);
  1470. if (!aom_noise_model_init(&ctx->noise_model, params)) {
  1471. fprintf(stderr, "Unable to init noise model\n");
  1472. return 0;
  1473. }
  1474. // Simply use a flat PSD (although we could use the flat blocks to estimate
  1475. // PSD) those to estimate an actual noise PSD)
  1476. const float y_noise_level =
  1477. aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
  1478. const float uv_noise_level = aom_noise_psd_get_default_value(
  1479. ctx->block_size >> sd->subsampling_x, ctx->noise_level);
  1480. for (int i = 0; i < block_size * block_size; ++i) {
  1481. ctx->noise_psd[0][i] = y_noise_level;
  1482. ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
  1483. }
  1484. return 1;
  1485. }
  1486. // TODO(aomedia:3151): Handle a monochrome image (sd->u_buffer and sd->v_buffer
  1487. // are null pointers) correctly.
  1488. int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
  1489. YV12_BUFFER_CONFIG *sd,
  1490. aom_film_grain_t *film_grain, int apply_denoise) {
  1491. const int block_size = ctx->block_size;
  1492. const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
  1493. uint8_t *raw_data[3] = {
  1494. use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
  1495. use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
  1496. use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
  1497. };
  1498. const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
  1499. int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
  1500. int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
  1501. if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
  1502. fprintf(stderr, "Unable to realloc buffers\n");
  1503. return 0;
  1504. }
  1505. aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
  1506. sd->y_height, strides[0], ctx->flat_blocks);
  1507. if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
  1508. strides, chroma_sub_log2, ctx->noise_psd,
  1509. block_size, ctx->bit_depth, use_highbd)) {
  1510. fprintf(stderr, "Unable to denoise image\n");
  1511. return 0;
  1512. }
  1513. const aom_noise_status_t status = aom_noise_model_update(
  1514. &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
  1515. sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
  1516. block_size);
  1517. int have_noise_estimate = 0;
  1518. if (status == AOM_NOISE_STATUS_OK) {
  1519. have_noise_estimate = 1;
  1520. } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
  1521. aom_noise_model_save_latest(&ctx->noise_model);
  1522. have_noise_estimate = 1;
  1523. } else {
  1524. // Unable to update noise model; proceed if we have a previous estimate.
  1525. have_noise_estimate =
  1526. (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
  1527. }
  1528. film_grain->apply_grain = 0;
  1529. if (have_noise_estimate) {
  1530. if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
  1531. fprintf(stderr, "Unable to get grain parameters.\n");
  1532. return 0;
  1533. }
  1534. if (!film_grain->random_seed) {
  1535. film_grain->random_seed = 7391;
  1536. }
  1537. if (apply_denoise) {
  1538. memcpy(raw_data[0], ctx->denoised[0],
  1539. (strides[0] * sd->y_height) << use_highbd);
  1540. if (!sd->monochrome) {
  1541. memcpy(raw_data[1], ctx->denoised[1],
  1542. (strides[1] * sd->uv_height) << use_highbd);
  1543. memcpy(raw_data[2], ctx->denoised[2],
  1544. (strides[2] * sd->uv_height) << use_highbd);
  1545. }
  1546. }
  1547. }
  1548. return 1;
  1549. }