mm_main.c 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. // See LICENSE for license details.
  2. #include "common.h"
  3. #include <assert.h>
  4. #include <stdlib.h>
  5. #include <stdio.h>
  6. #include "util.h"
  7. #pragma GCC optimize ("unroll-loops")
  8. void thread_entry(int cid, int nc)
  9. {
  10. const int R = 8;
  11. int m, n, p;
  12. uint64_t s = 0xdeadbeefU;
  13. m = CBM;
  14. n = CBN;
  15. p = CBK;
  16. t a[m*p];
  17. t b[p*n];
  18. t c[m*n];
  19. for (size_t i = 0; i < m; i++)
  20. for (size_t j = 0; j < p; j++)
  21. a[i*p+j] = (t)(s = lfsr(s));
  22. for (size_t i = 0; i < p; i++)
  23. for (size_t j = 0; j < n; j++)
  24. b[i*n+j] = (t)(s = lfsr(s));
  25. memset(c, 0, m*n*sizeof(c[0]));
  26. size_t instret, cycles;
  27. for (int i = 0; i < R; i++)
  28. {
  29. instret = -read_csr(minstret);
  30. cycles = -read_csr(mcycle);
  31. mm(m, n, p, a, p, b, n, c, n);
  32. instret += read_csr(minstret);
  33. cycles += read_csr(mcycle);
  34. }
  35. asm volatile("fence");
  36. printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
  37. cid, RBM, RBN, RBK, CBM, CBN, CBK);
  38. printf("C%d: %d instructions\n", cid, (int)(instret));
  39. printf("C%d: %d cycles\n", cid, (int)(cycles));
  40. printf("C%d: %d flops\n", cid, 2*m*n*p);
  41. printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles));
  42. #if 1
  43. for (size_t i = 0; i < m; i++)
  44. {
  45. for (size_t j = 0; j < n; j++)
  46. {
  47. t s = 0;
  48. for (size_t k = 0; k < p; k++)
  49. s += a[i*p+k] * b[k*n+j];
  50. s *= R;
  51. if (fabs(c[i*n+j]-s) > fabs(1e-6*s))
  52. {
  53. printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s);
  54. exit(1);
  55. }
  56. }
  57. }
  58. #endif
  59. barrier(nc);
  60. exit(0);
  61. }