test_rolling_percentile.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. import pytest
  2. from .utils import MockedTime
  3. from .sample_data import sample_data_holder_1, sample_data_holder_2
  4. from hystrix.rolling_percentile import RollingPercentile, PercentileSnapshot
  5. def test_rolling():
  6. time = MockedTime()
  7. percentile = RollingPercentile(time, 60000, 12, 1000, True)
  8. percentile.add_value(1000)
  9. percentile.add_value(1000)
  10. percentile.add_value(1000)
  11. percentile.add_value(2000)
  12. assert percentile.buckets.size == 1
  13. # No bucket turnover yet so percentile not yet generated
  14. assert percentile.percentile(50) == 0
  15. time.increment(6000)
  16. # Still only 1 bucket until we touch it again
  17. assert percentile.buckets.size == 1
  18. # A bucket has been created so we have a new percentile
  19. assert percentile.percentile(50) == 1000
  20. # Now 2 buckets since getting a percentile causes bucket retrieval
  21. assert percentile.buckets.size == 2
  22. percentile.add_value(1000)
  23. percentile.add_value(500)
  24. assert percentile.buckets.size == 2
  25. percentile.add_value(200)
  26. percentile.add_value(200)
  27. percentile.add_value(1600)
  28. percentile.add_value(200)
  29. percentile.add_value(1600)
  30. percentile.add_value(1600)
  31. # We haven't progressed to a new bucket so the percentile should be the
  32. # same and ignore the most recent bucket
  33. assert percentile.percentile(50) == 1000
  34. # Increment to another bucket so we include all of the above in the
  35. # PercentileSnapshot
  36. time.increment(6000)
  37. # The rolling version should have the same data as creating a snapshot
  38. # like this
  39. snapshot = PercentileSnapshot(1000, 1000, 1000, 2000, 1000, 500,
  40. 200, 200, 1600, 200, 1600, 1600)
  41. assert snapshot.percentile(0.15) == percentile.percentile(0.15)
  42. assert snapshot.percentile(0.50) == percentile.percentile(0.50)
  43. assert snapshot.percentile(0.90) == percentile.percentile(0.90)
  44. assert snapshot.percentile(0.995) == percentile.percentile(0.995)
  45. # mean = 1000+1000+1000+2000+1000+500+200+200+1600+200+1600+1600/12
  46. assert snapshot.mean() == 991
  47. def test_value_is_zero_after_rolling_window_passes_and_no_traffic():
  48. time = MockedTime()
  49. percentile = RollingPercentile(time, 60000, 12, 1000, True)
  50. percentile.add_value(1000)
  51. percentile.add_value(1000)
  52. percentile.add_value(1000)
  53. percentile.add_value(2000)
  54. percentile.add_value(4000)
  55. assert percentile.buckets.size == 1
  56. # No bucket turnover yet so percentile not yet generated
  57. assert percentile.percentile(50) == 0
  58. time.increment(6000)
  59. # Still only 1 bucket until we touch it again
  60. assert percentile.buckets.size == 1
  61. # A bucket has been created so we have a new percentile
  62. assert percentile.percentile(50) == 1500
  63. # Let 1 minute pass
  64. time.increment(60000)
  65. # No data in a minute should mean all buckets are empty (or reset) so we
  66. # should not have any percentiles
  67. assert percentile.percentile(50) == 0
  68. def test_sample_data_over_time_1():
  69. time = MockedTime()
  70. percentile = RollingPercentile(time, 60000, 12, 1000, True)
  71. previous_time = 0
  72. for time_millis, latency in sample_data_holder_1:
  73. time.increment(time_millis - previous_time)
  74. previous_time = time_millis
  75. percentile.add_value(latency)
  76. print('0.01', percentile.percentile(0.01))
  77. print('Median', percentile.percentile(50))
  78. print('90th', percentile.percentile(90))
  79. print('99th', percentile.percentile(99))
  80. print('99.5th', percentile.percentile(99.5))
  81. print('99.99', percentile.percentile(99.99))
  82. print('Median', percentile.percentile(50))
  83. print('Median', percentile.percentile(50))
  84. print('Median', percentile.percentile(50))
  85. # In a loop as a use case was found where very different values were
  86. # calculated in subsequent requests.
  87. for _ in range(10):
  88. percentile50 = percentile.percentile(50)
  89. if percentile50 > 5:
  90. pytest.fail('We expect around 2 but got: {}'.format(percentile50))
  91. percentile995 = percentile.percentile(99.5)
  92. if percentile995 < 20:
  93. msg = 'We expect to see some high values over 20 but got: {}'
  94. pytest.fail(msg.format(percentile995))
  95. def test_sample_data_over_time_2():
  96. time = MockedTime()
  97. percentile = RollingPercentile(time, 60000, 12, 1000, True)
  98. previous_time = 0
  99. for time_millis, latency in sample_data_holder_2:
  100. time.increment(time_millis - previous_time)
  101. previous_time = time_millis
  102. percentile.add_value(latency)
  103. print('0.01', percentile.percentile(0.01))
  104. print('Median', percentile.percentile(50))
  105. print('90th', percentile.percentile(90))
  106. print('99th', percentile.percentile(99))
  107. print('99.5th', percentile.percentile(99.5))
  108. print('99.99', percentile.percentile(99.99))
  109. percentile50 = percentile.percentile(50)
  110. if percentile50 > 90 or percentile50 < 50:
  111. pytest.fail('We expect around 60-70 but got: {}'.format(percentile50))
  112. percentile99 = percentile.percentile(99)
  113. if percentile99 < 400:
  114. msg = 'We expect to see some high values over 400 but got: {}'
  115. pytest.fail(msg.format(percentile99))
  116. def test_percentile_algorithm_media1():
  117. snapshot = PercentileSnapshot(100, 100, 100, 100, 200, 200,
  118. 200, 300, 300, 300, 300)
  119. assert snapshot.percentile(50) == 200
  120. def test_percentile_algorithm_media2():
  121. snapshot = PercentileSnapshot(100, 100, 100, 100, 100, 100,
  122. 100, 100, 100, 100, 500)
  123. assert snapshot.percentile(50) == 100
  124. def test_percentile_algorithm_media3():
  125. snapshot = PercentileSnapshot(50, 75, 100, 125, 160, 170,
  126. 180, 200, 210, 300, 500)
  127. assert snapshot.percentile(50) == 175
  128. def test_percentile_algorithm_media4():
  129. ''' Unsorted so it is expected to sort it for us. '''
  130. snapshot = PercentileSnapshot(300, 75, 125, 500, 100, 160,
  131. 180, 200, 210, 50, 170)
  132. assert snapshot.percentile(50) == 175
  133. def test_percentile_algorithm_extremes():
  134. ''' Unsorted so it is expected to sort it for us. '''
  135. snapshot = PercentileSnapshot(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  136. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  137. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  138. 800, 768, 657, 700, 867)
  139. print('0.01', snapshot.percentile(0.01))
  140. print('10th', snapshot.percentile(10))
  141. print('Median', snapshot.percentile(50))
  142. print('75th', snapshot.percentile(75))
  143. print('90th', snapshot.percentile(90))
  144. print('99th', snapshot.percentile(99))
  145. print('99.5th', snapshot.percentile(99.5))
  146. print('99.99', snapshot.percentile(99.99))
  147. assert snapshot.percentile(50) == 2
  148. assert snapshot.percentile(10) == 2
  149. assert snapshot.percentile(75) == 2
  150. if snapshot.percentile(95) < 600:
  151. msg = 'We expect 90th to be over 600 to show the extremes but got: {}'
  152. pytest.fail(msg.format(snapshot.percentile(95)))
  153. if snapshot.percentile(99) < 600:
  154. msg = 'We expect 99th to be over 600 to show the extremes but got: {}'
  155. pytest.fail(msg.format(snapshot.percentile(99)))
  156. def percentile_for_values(*values):
  157. return PercentileSnapshot(*values)
  158. def test_percentile_algorithm_high_percentile():
  159. snapshot = percentile_for_values(1, 2, 3)
  160. assert snapshot.percentile(50) == 2
  161. assert snapshot.percentile(75) == 3
  162. def test_percentile_algorithm_low_percentile():
  163. snapshot = percentile_for_values(1, 2)
  164. assert snapshot.percentile(25) == 1
  165. assert snapshot.percentile(75) == 2
  166. def test_percentile_algorithm_percentiles():
  167. snapshot = percentile_for_values(10, 30, 20, 40)
  168. assert snapshot.percentile(30) == 22
  169. assert snapshot.percentile(25) == 20
  170. assert snapshot.percentile(75) == 40
  171. assert snapshot.percentile(50) == 30
  172. assert snapshot.percentile(-1) == 10
  173. assert snapshot.percentile(101) == 40
  174. def test_percentile_algorithm_NIST_example():
  175. snapshot = percentile_for_values(951772, 951567, 951937, 951959, 951442,
  176. 950610, 951591, 951195, 951772, 950925,
  177. 951990, 951682)
  178. assert snapshot.percentile(90) == 951983
  179. assert snapshot.percentile(100) == 951990
  180. def test_does_nothing_when_disabled():
  181. time = MockedTime()
  182. percentile = RollingPercentile(time, 60000, 12, 1000, False)
  183. previous_time = 0
  184. for time_millis, latency in sample_data_holder_2:
  185. time.increment(time_millis - previous_time)
  186. previous_time = time_millis
  187. percentile.add_value(latency)
  188. assert percentile.percentile(50) == -1
  189. assert percentile.percentile(75) == -1
  190. assert percentile.mean() == -1