test_run_results.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. # Copyright (C) 2010 Google Inc. All rights reserved.
  2. # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. # * Redistributions in binary form must reproduce the above
  11. # copyright notice, this list of conditions and the following disclaimer
  12. # in the documentation and/or other materials provided with the
  13. # distribution.
  14. # * Neither the name of Google Inc. nor the names of its
  15. # contributors may be used to endorse or promote products derived from
  16. # this software without specific prior written permission.
  17. #
  18. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. import logging
  30. from webkitpy.layout_tests.models import test_expectations
  31. from webkitpy.layout_tests.models import test_failures
  32. _log = logging.getLogger(__name__)
  33. class TestRunResults(object):
  34. def __init__(self, expectations, num_tests):
  35. self.total = num_tests
  36. self.remaining = self.total
  37. self.expectations = expectations
  38. self.expected = 0
  39. self.unexpected = 0
  40. self.unexpected_failures = 0
  41. self.unexpected_crashes = 0
  42. self.unexpected_timeouts = 0
  43. self.tests_by_expectation = {}
  44. self.tests_by_timeline = {}
  45. self.results_by_name = {} # Map of test name to the last result for the test.
  46. self.all_results = [] # All results from a run, including every iteration of every test.
  47. self.unexpected_results_by_name = {}
  48. self.failures_by_name = {}
  49. self.total_failures = 0
  50. self.expected_skips = 0
  51. for expectation in test_expectations.TestExpectations.EXPECTATIONS.values():
  52. self.tests_by_expectation[expectation] = set()
  53. for timeline in test_expectations.TestExpectations.TIMELINES.values():
  54. self.tests_by_timeline[timeline] = expectations.get_tests_with_timeline(timeline)
  55. self.slow_tests = set()
  56. self.interrupted = False
  57. def add(self, test_result, expected, test_is_slow):
  58. self.tests_by_expectation[test_result.type].add(test_result.test_name)
  59. self.results_by_name[test_result.test_name] = test_result
  60. if test_result.type != test_expectations.SKIP:
  61. self.all_results.append(test_result)
  62. self.remaining -= 1
  63. if len(test_result.failures):
  64. self.total_failures += 1
  65. self.failures_by_name[test_result.test_name] = test_result.failures
  66. if expected:
  67. self.expected += 1
  68. if test_result.type == test_expectations.SKIP:
  69. self.expected_skips += 1
  70. else:
  71. self.unexpected_results_by_name[test_result.test_name] = test_result
  72. self.unexpected += 1
  73. if len(test_result.failures):
  74. self.unexpected_failures += 1
  75. if test_result.type == test_expectations.CRASH:
  76. self.unexpected_crashes += 1
  77. elif test_result.type == test_expectations.TIMEOUT:
  78. self.unexpected_timeouts += 1
  79. if test_is_slow:
  80. self.slow_tests.add(test_result.test_name)
  81. class RunDetails(object):
  82. def __init__(self, exit_code, summarized_results=None, initial_results=None, retry_results=None, enabled_pixel_tests_in_retry=False):
  83. self.exit_code = exit_code
  84. self.summarized_results = summarized_results
  85. self.initial_results = initial_results
  86. self.retry_results = retry_results
  87. self.enabled_pixel_tests_in_retry = enabled_pixel_tests_in_retry
  88. def _interpret_test_failures(failures):
  89. test_dict = {}
  90. failure_types = [type(failure) for failure in failures]
  91. # FIXME: get rid of all this is_* values once there is a 1:1 map between
  92. # TestFailure type and test_expectations.EXPECTATION.
  93. if test_failures.FailureMissingAudio in failure_types:
  94. test_dict['is_missing_audio'] = True
  95. if test_failures.FailureMissingResult in failure_types:
  96. test_dict['is_missing_text'] = True
  97. if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
  98. test_dict['is_missing_image'] = True
  99. if 'image_diff_percent' not in test_dict:
  100. for failure in failures:
  101. if isinstance(failure, test_failures.FailureImageHashMismatch) or isinstance(failure, test_failures.FailureReftestMismatch):
  102. test_dict['image_diff_percent'] = failure.diff_percent
  103. return test_dict
  104. def summarize_results(port_obj, expectations, initial_results, retry_results, enabled_pixel_tests_in_retry):
  105. """Returns a dictionary containing a summary of the test runs, with the following fields:
  106. 'version': a version indicator
  107. 'fixable': The number of fixable tests (NOW - PASS)
  108. 'skipped': The number of skipped tests (NOW & SKIPPED)
  109. 'num_regressions': The number of non-flaky failures
  110. 'num_flaky': The number of flaky failures
  111. 'num_missing': The number of tests with missing results
  112. 'num_passes': The number of unexpected passes
  113. 'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
  114. """
  115. results = {}
  116. results['version'] = 3
  117. tbe = initial_results.tests_by_expectation
  118. tbt = initial_results.tests_by_timeline
  119. results['fixable'] = len(tbt[test_expectations.NOW] - tbe[test_expectations.PASS])
  120. results['skipped'] = len(tbt[test_expectations.NOW] & tbe[test_expectations.SKIP])
  121. num_passes = 0
  122. num_flaky = 0
  123. num_missing = 0
  124. num_regressions = 0
  125. keywords = {}
  126. for expecation_string, expectation_enum in test_expectations.TestExpectations.EXPECTATIONS.iteritems():
  127. keywords[expectation_enum] = expecation_string.upper()
  128. for modifier_string, modifier_enum in test_expectations.TestExpectations.MODIFIERS.iteritems():
  129. keywords[modifier_enum] = modifier_string.upper()
  130. tests = {}
  131. for test_name, result in initial_results.results_by_name.iteritems():
  132. # Note that if a test crashed in the original run, we ignore
  133. # whether or not it crashed when we retried it (if we retried it),
  134. # and always consider the result not flaky.
  135. expected = expectations.get_expectations_string(test_name)
  136. result_type = result.type
  137. actual = [keywords[result_type]]
  138. if result_type == test_expectations.SKIP:
  139. continue
  140. test_dict = {}
  141. if result.has_stderr:
  142. test_dict['has_stderr'] = True
  143. if result.reftest_type:
  144. test_dict.update(reftest_type=list(result.reftest_type))
  145. if expectations.has_modifier(test_name, test_expectations.WONTFIX):
  146. test_dict['wontfix'] = True
  147. if result_type == test_expectations.PASS:
  148. num_passes += 1
  149. # FIXME: include passing tests that have stderr output.
  150. if expected == 'PASS':
  151. continue
  152. elif result_type == test_expectations.CRASH:
  153. if test_name in initial_results.unexpected_results_by_name:
  154. num_regressions += 1
  155. elif result_type == test_expectations.MISSING:
  156. if test_name in initial_results.unexpected_results_by_name:
  157. num_missing += 1
  158. elif test_name in initial_results.unexpected_results_by_name:
  159. if retry_results and test_name not in retry_results.unexpected_results_by_name:
  160. actual.extend(expectations.get_expectations_string(test_name).split(" "))
  161. num_flaky += 1
  162. elif retry_results:
  163. retry_result_type = retry_results.unexpected_results_by_name[test_name].type
  164. if result_type != retry_result_type:
  165. if enabled_pixel_tests_in_retry and result_type == test_expectations.TEXT and retry_result_type == test_expectations.IMAGE_PLUS_TEXT:
  166. num_regressions += 1
  167. else:
  168. num_flaky += 1
  169. actual.append(keywords[retry_result_type])
  170. else:
  171. num_regressions += 1
  172. else:
  173. num_regressions += 1
  174. test_dict['expected'] = expected
  175. test_dict['actual'] = " ".join(actual)
  176. test_dict.update(_interpret_test_failures(result.failures))
  177. if retry_results:
  178. retry_result = retry_results.unexpected_results_by_name.get(test_name)
  179. if retry_result:
  180. test_dict.update(_interpret_test_failures(retry_result.failures))
  181. # Store test hierarchically by directory. e.g.
  182. # foo/bar/baz.html: test_dict
  183. # foo/bar/baz1.html: test_dict
  184. #
  185. # becomes
  186. # foo: {
  187. # bar: {
  188. # baz.html: test_dict,
  189. # baz1.html: test_dict
  190. # }
  191. # }
  192. parts = test_name.split('/')
  193. current_map = tests
  194. for i, part in enumerate(parts):
  195. if i == (len(parts) - 1):
  196. current_map[part] = test_dict
  197. break
  198. if part not in current_map:
  199. current_map[part] = {}
  200. current_map = current_map[part]
  201. results['tests'] = tests
  202. results['num_passes'] = num_passes
  203. results['num_flaky'] = num_flaky
  204. results['num_missing'] = num_missing
  205. results['num_regressions'] = num_regressions
  206. results['uses_expectations_file'] = port_obj.uses_test_expectations_file()
  207. results['interrupted'] = initial_results.interrupted # Does results.html have enough information to compute this itself? (by checking total number of results vs. total number of tests?)
  208. results['layout_tests_dir'] = port_obj.layout_tests_dir()
  209. results['has_wdiff'] = port_obj.wdiff_available()
  210. results['has_pretty_patch'] = port_obj.pretty_patch_available()
  211. results['pixel_tests_enabled'] = port_obj.get_option('pixel_tests')
  212. try:
  213. # We only use the svn revision for using trac links in the results.html file,
  214. # Don't do this by default since it takes >100ms.
  215. # FIXME: Do we really need to populate this both here and in the json_results_generator?
  216. if port_obj.get_option("builder_name"):
  217. port_obj.host.initialize_scm()
  218. results['revision'] = port_obj.host.scm().head_svn_revision()
  219. except Exception, e:
  220. _log.warn("Failed to determine svn revision for checkout (cwd: %s, webkit_base: %s), leaving 'revision' key blank in full_results.json.\n%s" % (port_obj._filesystem.getcwd(), port_obj.path_from_webkit_base(), e))
  221. # Handle cases where we're running outside of version control.
  222. import traceback
  223. _log.debug('Failed to learn head svn revision:')
  224. _log.debug(traceback.format_exc())
  225. results['revision'] = ""
  226. return results