diff.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. # Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
  2. # All rights reserved.
  3. # This component and the accompanying materials are made available
  4. # under the terms of "Eclipse Public License v1.0"
  5. # which accompanies this distribution, and is available
  6. # at the URL "http://www.eclipse.org/legal/epl-v10.html".
  7. #
  8. # Initial Contributors:
  9. # Nokia Corporation - initial contribution.
  10. #
  11. # Contributors:
  12. #
  13. # Description:
  14. """
  15. Compare the raptor XML logs from multiple builds.
  16. """
  17. import csv
  18. import os
  19. import sys
  20. import allo.utils
  21. import generic_path
  22. import plugins.filter_csv
  23. # we don't want to create a Raptor object just for these 2 variables
  24. sbs_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")
  25. cfg_path = os.path.join("lib", "config")
  26. class NotADiffableLog(Exception):
  27. pass
  28. class CSVFilterParams(object):
  29. """The minimal parameter set required for filter_csv."""
  30. def __init__(self, csv_file):
  31. self.logFileName = generic_path.Path(csv_file)
  32. self.timestring = ""
  33. self.configPath = [ generic_path.Path(cfg_path) ]
  34. self.home = generic_path.Path(sbs_home)
  35. class DiffableLog(object):
  36. """Represents a raptor log, or set of logs, in a way that can be compared
  37. to another nominally similar log (or set of logs) from a different build."""
  38. def __init__(self, dir_or_file, force=False, limit=0, verbose=False):
  39. """dir_or_file is the location of the build logs. For a directory all
  40. the files it contains are examined to see if they are Raptor log files.
  41. If force is True the class will not reuse any cached information that
  42. it finds in the specified directory: instead it will re-read the original
  43. logs and generate a new cache.
  44. If limit is greater than zero then it is used to reset the maximum
  45. allowed CSV record size. This is sometimes needed for builds with
  46. particularly huge error or warning messages.
  47. If verbose is True then progress information is printed as we work
  48. through the logs."""
  49. self.location = dir_or_file
  50. self.force = force
  51. self.limit = limit
  52. self.verbose = verbose
  53. self.logs = []
  54. # find all the raptor logs that are in the running
  55. if os.path.isfile(dir_or_file):
  56. self.add_file(dir_or_file)
  57. elif os.path.isdir(dir_or_file):
  58. for file in os.listdir(dir_or_file):
  59. self.add_file(os.path.join(dir_or_file, file))
  60. else:
  61. raise NotADiffableLog("'{0}' is not a file or a directory\n".format(dir_or_file))
  62. if len(self.logs) > 0:
  63. if self.verbose:
  64. print("found {0} raptor logs".format(len(self.logs)))
  65. else:
  66. raise NotADiffableLog("no raptor logs found in '{0}'\n".format(dir_or_file))
  67. # generate all the .csv files that are missing or out of date
  68. new_csv_files = False
  69. all_csv_files = []
  70. for log_file in self.logs:
  71. csv_file = log_file + ".csv"
  72. all_csv_files.append(csv_file)
  73. if self.force or not os.path.isfile(csv_file) \
  74. or os.path.getmtime(log_file) > os.path.getmtime(csv_file):
  75. self.generate_csv(log_file, csv_file)
  76. new_csv_files = True
  77. # combine multiple .csv files into one big one
  78. all_csv_files.sort()
  79. csv_cat = all_csv_files[0] + ".cat"
  80. if new_csv_files or not os.path.isfile(csv_cat):
  81. allo.utils.cat(all_csv_files, csv_cat)
  82. # sort the big .csv file
  83. csv_sort = csv_cat + ".sort"
  84. if new_csv_files or not os.path.isfile(csv_sort):
  85. allo.utils.sort(csv_cat, csv_sort, allo.utils.linecount(csv_cat))
  86. # remove duplicate lines from the big .csv file
  87. self.csv = csv_sort + ".uniq"
  88. if new_csv_files or not os.path.isfile(self.csv):
  89. allo.utils.uniq(csv_sort, self.csv)
  90. if self.verbose:
  91. print("combined log " + self.csv)
  92. # add up the per-component and per-event totals
  93. self.summarise()
  94. def add_file(self, path):
  95. if allo.utils.is_raptor_log(path):
  96. self.logs.append(path)
  97. if self.verbose:
  98. print(path + " is a raptor log")
  99. def generate_csv(self, log_file, csv_file):
  100. """run the CSV filter on log_file to produce csv_file."""
  101. if self.verbose:
  102. print("generating " + csv_file)
  103. filter = plugins.filter_csv.CSV(["ok"]) # ignore "ok" recipes
  104. filter_params = CSVFilterParams(csv_file)
  105. try:
  106. filter.open(filter_params)
  107. with open(log_file, "rb") as file:
  108. for line in file:
  109. filter.write(line)
  110. filter.summary()
  111. filter.close()
  112. except Exception,e:
  113. raise NotADiffableLog("problem filtering '{0}' : {1}\n".format(log_file, str(e)))
  114. def summarise(self):
  115. """scan the combined CSV file and total up the number of error, warning etc.
  116. also record the total number of "events" per component."""
  117. self.events = {}
  118. self.components = {}
  119. if self.limit > 0:
  120. csv.field_size_limit(self.limit)
  121. reader = csv.reader(open(self.csv, "rb"))
  122. for row in reader:
  123. event = row[0]
  124. if event == "info" and row[2] == "version":
  125. self.raptor_version = row[3]
  126. continue
  127. if event in self.events:
  128. self.events[event] += 1
  129. else:
  130. self.events[event] = 1
  131. bldinf = row[1]
  132. if bldinf in self.components:
  133. self.components[bldinf] += 1
  134. else:
  135. self.components[bldinf] = 1
  136. if self.verbose:
  137. for (event, count) in self.events.items():
  138. print("{0} : {1}".format(event, count))
  139. print("{0} components".format(len(self.components)))
  140. class LogDiff(object):
  141. """Comparison between two DiffableLog objects.
  142. The result is a "components" dictionary and an "events" dictionary which
  143. provide a useful summary of the differences. In components the key is the
  144. bld.inf path and the data is the total number of events that appear for
  145. that component. In events the key is the event type (error, warning etc.)
  146. and the data is the total number of those events that appear in the whole
  147. build.
  148. The object can also be iterated over, providing a sequence of tuples
  149. (line, flag) where "line" is a single line from the combined CSV files
  150. and "flag" is either FIRST, SECOND or BOTH to indicate which build(s) the
  151. line appears in."""
  152. FIRST = 1
  153. SECOND = 2
  154. BOTH = 3
  155. def __init__(self, log_a, log_b):
  156. """take two DiffableLog objects."""
  157. self.log_a = log_a
  158. self.log_b = log_b
  159. # compare the summaries
  160. # component totals
  161. bldinfs = set(log_a.components.keys()) | set(log_b.components.keys())
  162. self.components = {}
  163. for bldinf in bldinfs:
  164. if bldinf in log_a.components:
  165. na = log_a.components[bldinf]
  166. else:
  167. na = 0
  168. if bldinf in log_b.components:
  169. nb = log_b.components[bldinf]
  170. else:
  171. nb = 0
  172. self.components[bldinf] = (na, nb)
  173. # event totals
  174. events = set(log_a.events.keys()) | set(log_b.events.keys())
  175. self.events = {}
  176. for event in events:
  177. if event in log_a.events:
  178. na = log_a.events[event]
  179. else:
  180. na = 0
  181. if event in log_b.events:
  182. nb = log_b.events[event]
  183. else:
  184. nb = 0
  185. self.events[event] = (na, nb)
  186. def __iter__(self):
  187. """an iterator for stepping through the detailed differences."""
  188. return LogDiffIterator(self)
  189. def dump_to_files(self, filename1, filename2):
  190. """take the detailed differences and create a pair of files which
  191. should be manageable by a graphical diff tool. we trim the size by
  192. replacing blocks of matching lines with "== block 1", "== block 2" etc.
  193. returns the number of lines that differ."""
  194. different = 0
  195. sameblock = False # are we on a run of matching lines
  196. block = 0
  197. with open(filename1, "wb") as file_a:
  198. with open(filename2, "wb") as file_b:
  199. for (line, flag) in self:
  200. if flag == LogDiff.FIRST:
  201. file_a.write(line)
  202. sameblock = False
  203. different += 1
  204. elif flag == LogDiff.SECOND:
  205. file_b.write(line)
  206. sameblock = False
  207. different += 1
  208. elif not sameblock: # LogDiff.BOTH
  209. sameblock = True
  210. block += 1
  211. file_a.write("== block {0}\n".format(block))
  212. file_b.write("== block {0}\n".format(block))
  213. return different
  214. class LogDiffIterator(object):
  215. """Iterate over a LogDiff object.
  216. The sequence values are tuples (line, flag) where "line" is a line of text
  217. from one or both CSV files, and "flag" is either FIRST or SECOND or BOTH
  218. to show which."""
  219. def __init__(self, log_diff):
  220. """It should be OK to create multiple iterators for the same data."""
  221. self.file_a = open(log_diff.log_a.csv, "rb")
  222. self.file_b = open(log_diff.log_b.csv, "rb")
  223. self.line_a = self.file_a.readline()
  224. self.line_b = self.file_b.readline()
  225. def __iter__(self):
  226. return self
  227. def next(self):
  228. if self.line_a:
  229. if self.line_b:
  230. if self.line_a == self.line_b:
  231. value_pair = (self.line_a, LogDiff.BOTH)
  232. self.line_a = self.file_a.readline()
  233. self.line_b = self.file_b.readline()
  234. elif self.line_a < self.line_b:
  235. value_pair = (self.line_a, LogDiff.FIRST)
  236. self.line_a = self.file_a.readline()
  237. else:
  238. value_pair = (self.line_b, LogDiff.SECOND)
  239. self.line_b = self.file_b.readline()
  240. else:
  241. # file_b is finished
  242. value_pair = (self.line_a, LogDiff.FIRST)
  243. self.line_a = self.file_a.readline()
  244. elif self.line_b:
  245. # file_a is finished
  246. value_pair = (self.line_b, LogDiff.SECOND)
  247. self.line_b = self.file_b.readline()
  248. else:
  249. # both files are finished
  250. self.file_a.close()
  251. self.file_b.close()
  252. raise StopIteration
  253. return value_pair