duplicates.sh 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/env python
  2. # -*- coding: iso-8859-15 -*-
  3. #------------------------------------------------------------------------------
  4. # Python script to analysis cpp-ethereum commits, and filter out duplicates
  5. #
  6. # The documentation for cpp-ethereum is hosted at http://cpp-ethereum.org
  7. #
  8. # ------------------------------------------------------------------------------
  9. # This file is part of cpp-ethereum.
  10. #
  11. # cpp-ethereum is free software: you can redistribute it and/or modify
  12. # it under the terms of the GNU General Public License as published by
  13. # the Free Software Foundation, either version 3 of the License, or
  14. # (at your option) any later version.
  15. #
  16. # cpp-ethereum is distributed in the hope that it will be useful,
  17. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. # GNU General Public License for more details.
  20. #
  21. # You should have received a copy of the GNU General Public License
  22. # along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>
  23. #
  24. # (c) 2016 cpp-ethereum contributors.
  25. #------------------------------------------------------------------------------
  26. import operator
  27. import re
  28. authorRegex = re.compile('Author: (.*) <(.*)>')
  29. dateRegex = re.compile('Date: (.*)')
  30. authorAliases = {}
  31. authorAliases['arkpar'] = 'Arkadiy Paronyan'
  32. authorAliases['arkady.paronyan@gmail.com'] = 'Arkadiy Paronyan'
  33. authorAliases['Arkady Paronyan'] = 'Arkadiy Paronyan'
  34. authorAliases['artur-zawlocki'] = 'Artur Zawłocki'
  35. authorAliases['Artur Zawlocki'] = 'Artur Zawłocki'
  36. authorAliases['Artur Zawłocki'] = 'Artur Zawłocki'
  37. authorAliases['caktux'] = 'Vincent Gariepy'
  38. authorAliases['chriseth'] = 'Christian Reitwiessner'
  39. authorAliases['Christian'] = 'Christian Reitwiessner'
  40. authorAliases['CJentzsch'] = 'Christoph Jentzsch'
  41. authorAliases['debris'] = 'Marek Kotewicz'
  42. authorAliases['debris-berlin'] = 'Marek Kotewicz'
  43. authorAliases['Dimitry'] = 'Dimitry Khokhlov'
  44. authorAliases['Dmitry K'] = 'Dimitry Khokhlov'
  45. authorAliases['ethdev'] = 'Marek Kotewicz'
  46. authorAliases['gluk256'] = 'Vlad Gluhovsky'
  47. authorAliases['Greg'] = 'Greg Colvin'
  48. authorAliases['Marian OANCΞA'] = 'Marian Oancea'
  49. authorAliases['ethdev zug'] = 'Marek Kotewicz'
  50. authorAliases['Gav Wood'] = 'Gavin Wood'
  51. authorAliases['U-SVZ13\Arkady'] = 'Arkadiy Paronyan'
  52. authorAliases['liana'] = 'Liana Husikyan'
  53. authorAliases['LianaHus'] = 'Liana Husikyan'
  54. authorAliases['subtly'] = 'Alex Leverington'
  55. authorAliases['unknown'] = 'Marek Kotewicz'
  56. authorAliases['vbuterin'] = 'Vitalik Buterin'
  57. authorAliases['winsvega'] = 'Dimitry Khokhlov'
  58. authorAliases['yann300'] = 'Yann Levreau'
  59. commitCounts = {}
  60. commitAlreadySeen = {}
  61. with open('log.txt') as logFile:
  62. author = ""
  63. for line in logFile:
  64. match = authorRegex.match(line)
  65. if match:
  66. author = match.group(1)
  67. if authorAliases.has_key(author):
  68. author = authorAliases[author]
  69. match = dateRegex.match(line)
  70. if match:
  71. date = match.group(1)
  72. if commitAlreadySeen.has_key(author + date):
  73. print "Filtering out .... " + author + " - " + date
  74. else:
  75. commitAlreadySeen[author + date] = 1
  76. if not commitCounts.has_key(author):
  77. commitCounts[author] = 1
  78. else:
  79. commitCounts[author] = commitCounts[author] + 1
  80. for key in sorted(commitCounts, key=commitCounts.get): #sorted(commitCounts.items()):
  81. print key + " has " + str(commitCounts[key]) + " commits"