123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- # Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
- # All rights reserved.
- # This component and the accompanying materials are made available
- # under the terms of "Eclipse Public License v1.0"
- # which accompanies this distribution, and is available
- # at the URL "http://www.eclipse.org/legal/epl-v10.html".
- #
- # Initial Contributors:
- # Nokia Corporation - initial contribution.
- #
- # Contributors:
- #
- # Description:
- """
- Compare the raptor XML logs from multiple builds.
- """
- import csv
- import os
- import sys
- import allo.utils
- import generic_path
- import plugins.filter_csv
- # we don't want to create a Raptor object just for these 2 variables
- sbs_home = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")
- cfg_path = os.path.join("lib", "config")
- class NotADiffableLog(Exception):
- pass
- class CSVFilterParams(object):
- """The minimal parameter set required for filter_csv."""
-
- def __init__(self, csv_file):
- self.logFileName = generic_path.Path(csv_file)
- self.timestring = ""
- self.configPath = [ generic_path.Path(cfg_path) ]
- self.home = generic_path.Path(sbs_home)
-
- class DiffableLog(object):
- """Represents a raptor log, or set of logs, in a way that can be compared
- to another nominally similar log (or set of logs) from a different build."""
-
- def __init__(self, dir_or_file, force=False, limit=0, verbose=False):
- """dir_or_file is the location of the build logs. For a directory all
- the files it contains are examined to see if they are Raptor log files.
-
- If force is True the class will not reuse any cached information that
- it finds in the specified directory: instead it will re-read the original
- logs and generate a new cache.
-
- If limit is greater than zero then it is used to reset the maximum
- allowed CSV record size. This is sometimes needed for builds with
- particularly huge error or warning messages.
-
- If verbose is True then progress information is printed as we work
- through the logs."""
-
- self.location = dir_or_file
- self.force = force
- self.limit = limit
- self.verbose = verbose
- self.logs = []
-
- # find all the raptor logs that are in the running
- if os.path.isfile(dir_or_file):
- self.add_file(dir_or_file)
-
- elif os.path.isdir(dir_or_file):
- for file in os.listdir(dir_or_file):
- self.add_file(os.path.join(dir_or_file, file))
- else:
- raise NotADiffableLog("'{0}' is not a file or a directory\n".format(dir_or_file))
-
- if len(self.logs) > 0:
- if self.verbose:
- print("found {0} raptor logs".format(len(self.logs)))
- else:
- raise NotADiffableLog("no raptor logs found in '{0}'\n".format(dir_or_file))
-
- # generate all the .csv files that are missing or out of date
- new_csv_files = False
- all_csv_files = []
- for log_file in self.logs:
- csv_file = log_file + ".csv"
- all_csv_files.append(csv_file)
-
- if self.force or not os.path.isfile(csv_file) \
- or os.path.getmtime(log_file) > os.path.getmtime(csv_file):
- self.generate_csv(log_file, csv_file)
- new_csv_files = True
- # combine multiple .csv files into one big one
- all_csv_files.sort()
- csv_cat = all_csv_files[0] + ".cat"
- if new_csv_files or not os.path.isfile(csv_cat):
- allo.utils.cat(all_csv_files, csv_cat)
- # sort the big .csv file
- csv_sort = csv_cat + ".sort"
- if new_csv_files or not os.path.isfile(csv_sort):
- allo.utils.sort(csv_cat, csv_sort, allo.utils.linecount(csv_cat))
-
- # remove duplicate lines from the big .csv file
- self.csv = csv_sort + ".uniq"
- if new_csv_files or not os.path.isfile(self.csv):
- allo.utils.uniq(csv_sort, self.csv)
-
- if self.verbose:
- print("combined log " + self.csv)
- # add up the per-component and per-event totals
- self.summarise()
-
- def add_file(self, path):
- if allo.utils.is_raptor_log(path):
- self.logs.append(path)
- if self.verbose:
- print(path + " is a raptor log")
-
- def generate_csv(self, log_file, csv_file):
- """run the CSV filter on log_file to produce csv_file."""
-
- if self.verbose:
- print("generating " + csv_file)
-
- filter = plugins.filter_csv.CSV(["ok"]) # ignore "ok" recipes
- filter_params = CSVFilterParams(csv_file)
-
- try:
- filter.open(filter_params)
-
- with open(log_file, "rb") as file:
- for line in file:
- filter.write(line)
- filter.summary()
- filter.close()
- except Exception,e:
- raise NotADiffableLog("problem filtering '{0}' : {1}\n".format(log_file, str(e)))
- def summarise(self):
- """scan the combined CSV file and total up the number of error, warning etc.
-
- also record the total number of "events" per component."""
-
- self.events = {}
- self.components = {}
-
- if self.limit > 0:
- csv.field_size_limit(self.limit)
-
- reader = csv.reader(open(self.csv, "rb"))
- for row in reader:
-
- event = row[0]
- if event == "info" and row[2] == "version":
- self.raptor_version = row[3]
- continue
-
- if event in self.events:
- self.events[event] += 1
- else:
- self.events[event] = 1
-
- bldinf = row[1]
- if bldinf in self.components:
- self.components[bldinf] += 1
- else:
- self.components[bldinf] = 1
-
- if self.verbose:
- for (event, count) in self.events.items():
- print("{0} : {1}".format(event, count))
- print("{0} components".format(len(self.components)))
-
- class LogDiff(object):
- """Comparison between two DiffableLog objects.
-
- The result is a "components" dictionary and an "events" dictionary which
- provide a useful summary of the differences. In components the key is the
- bld.inf path and the data is the total number of events that appear for
- that component. In events the key is the event type (error, warning etc.)
- and the data is the total number of those events that appear in the whole
- build.
-
- The object can also be iterated over, providing a sequence of tuples
- (line, flag) where "line" is a single line from the combined CSV files
- and "flag" is either FIRST, SECOND or BOTH to indicate which build(s) the
- line appears in."""
-
- FIRST = 1
- SECOND = 2
- BOTH = 3
-
- def __init__(self, log_a, log_b):
- """take two DiffableLog objects."""
-
- self.log_a = log_a
- self.log_b = log_b
-
- # compare the summaries
- # component totals
- bldinfs = set(log_a.components.keys()) | set(log_b.components.keys())
- self.components = {}
- for bldinf in bldinfs:
- if bldinf in log_a.components:
- na = log_a.components[bldinf]
- else:
- na = 0
-
- if bldinf in log_b.components:
- nb = log_b.components[bldinf]
- else:
- nb = 0
- self.components[bldinf] = (na, nb)
-
- # event totals
- events = set(log_a.events.keys()) | set(log_b.events.keys())
- self.events = {}
- for event in events:
- if event in log_a.events:
- na = log_a.events[event]
- else:
- na = 0
-
- if event in log_b.events:
- nb = log_b.events[event]
- else:
- nb = 0
- self.events[event] = (na, nb)
- def __iter__(self):
- """an iterator for stepping through the detailed differences."""
- return LogDiffIterator(self)
- def dump_to_files(self, filename1, filename2):
- """take the detailed differences and create a pair of files which
- should be manageable by a graphical diff tool. we trim the size by
- replacing blocks of matching lines with "== block 1", "== block 2" etc.
- returns the number of lines that differ."""
-
- different = 0
- sameblock = False # are we on a run of matching lines
- block = 0
- with open(filename1, "wb") as file_a:
- with open(filename2, "wb") as file_b:
- for (line, flag) in self:
- if flag == LogDiff.FIRST:
- file_a.write(line)
- sameblock = False
- different += 1
- elif flag == LogDiff.SECOND:
- file_b.write(line)
- sameblock = False
- different += 1
- elif not sameblock: # LogDiff.BOTH
- sameblock = True
- block += 1
- file_a.write("== block {0}\n".format(block))
- file_b.write("== block {0}\n".format(block))
- return different
-
- class LogDiffIterator(object):
- """Iterate over a LogDiff object.
-
- The sequence values are tuples (line, flag) where "line" is a line of text
- from one or both CSV files, and "flag" is either FIRST or SECOND or BOTH
- to show which."""
-
- def __init__(self, log_diff):
- """It should be OK to create multiple iterators for the same data."""
-
- self.file_a = open(log_diff.log_a.csv, "rb")
- self.file_b = open(log_diff.log_b.csv, "rb")
-
- self.line_a = self.file_a.readline()
- self.line_b = self.file_b.readline()
-
- def __iter__(self):
- return self
-
- def next(self):
- if self.line_a:
- if self.line_b:
- if self.line_a == self.line_b:
- value_pair = (self.line_a, LogDiff.BOTH)
- self.line_a = self.file_a.readline()
- self.line_b = self.file_b.readline()
- elif self.line_a < self.line_b:
- value_pair = (self.line_a, LogDiff.FIRST)
- self.line_a = self.file_a.readline()
- else:
- value_pair = (self.line_b, LogDiff.SECOND)
- self.line_b = self.file_b.readline()
- else:
- # file_b is finished
- value_pair = (self.line_a, LogDiff.FIRST)
- self.line_a = self.file_a.readline()
- elif self.line_b:
- # file_a is finished
- value_pair = (self.line_b, LogDiff.SECOND)
- self.line_b = self.file_b.readline()
- else:
- # both files are finished
- self.file_a.close()
- self.file_b.close()
- raise StopIteration
-
- return value_pair
|