tracker_tasks.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # Copyright 2013 The Distro Tracker Developers
  2. # See the COPYRIGHT file at the top-level directory of this distribution and
  3. # at http://deb.li/DTAuthors
  4. #
  5. # This file is part of Distro Tracker. It is subject to the license terms
  6. # in the LICENSE file found in the top-level directory of this
  7. # distribution and at http://deb.li/DTLicense. No part of Distro Tracker,
  8. # including this file, may be copied, modified, propagated, or distributed
  9. # except according to the terms contained in the LICENSE file.
  10. """
  11. Implements the Distro Tracker tasks necessary for interesting package source
  12. files.
  13. """
  14. from __future__ import unicode_literals
  15. from distro_tracker.core.tasks import BaseTask
  16. from distro_tracker.core.utils.packages import AptCache
  17. from distro_tracker.core.models import ExtractedSourceFile
  18. from distro_tracker.core.models import SourcePackage
  19. from django.core.files import File
  20. import os
  21. import logging
  22. logger = logging.getLogger('distro_tracker.core.tasks')
  23. class ExtractSourcePackageFiles(BaseTask):
  24. """
  25. A task which extracts some files from a new source package version.
  26. The extracted files are:
  27. - debian/changelog
  28. - debian/copyright
  29. - debian/rules
  30. - debian/control
  31. - debian/watch
  32. """
  33. DEPENDS_ON_EVENTS = (
  34. 'new-source-package-version',
  35. )
  36. PRODUCES_EVENTS = (
  37. 'source-files-extracted',
  38. )
  39. ALL_FILES_TO_EXTRACT = (
  40. 'changelog',
  41. 'copyright',
  42. 'rules',
  43. 'control',
  44. 'watch',
  45. )
  46. def __init__(self, *args, **kwargs):
  47. super(ExtractSourcePackageFiles, self).__init__(*args, **kwargs)
  48. self.cache = None
  49. def extract_files(self, source_package, files_to_extract=None):
  50. """
  51. Extract files for just the given source package.
  52. :type source_package: :class:`SourcePackage
  53. <distro_tracker.core.models.SourcePackage>`
  54. :type files_to_extract: An iterable of file names which should be
  55. extracted
  56. """
  57. if self.cache is None:
  58. self.cache = AptCache()
  59. source_directory = self.cache.retrieve_source(
  60. source_package.source_package_name.name,
  61. source_package.version,
  62. debian_directory_only=True)
  63. debian_directory = os.path.join(source_directory, 'debian')
  64. if files_to_extract is None:
  65. files_to_extract = self.ALL_FILES_TO_EXTRACT
  66. for file_name in files_to_extract:
  67. file_path = os.path.join(debian_directory, file_name)
  68. if not os.path.exists(file_path):
  69. continue
  70. with open(file_path, 'r') as f:
  71. extracted_file = File(f)
  72. ExtractedSourceFile.objects.create(
  73. source_package=source_package,
  74. extracted_file=extracted_file,
  75. name=file_name)
  76. def _execute_initial(self):
  77. """
  78. When the task is directly ran, instead of relying on events to know
  79. which packages' source files should be retrieved, the task scans all
  80. existing packages and adds any missing source packages for each of
  81. them.
  82. """
  83. # First remove all source files which are no longer to be included.
  84. qs = ExtractedSourceFile.objects.exclude(
  85. name__in=self.ALL_FILES_TO_EXTRACT)
  86. qs.delete()
  87. # Retrieves the packages and all the associated files with each of them
  88. # in only two db queries.
  89. source_packages = SourcePackage.objects.all()
  90. source_packages.prefetch_related('extracted_source_files')
  91. # Find the difference of packages and extract only those for each
  92. # package
  93. for srcpkg in source_packages:
  94. extracted_files = [
  95. extracted_file.name
  96. for extracted_file in srcpkg.extracted_source_files.all()
  97. ]
  98. files_to_extract = [
  99. file_name
  100. for file_name in self.ALL_FILES_TO_EXTRACT
  101. if file_name not in extracted_files
  102. ]
  103. if files_to_extract:
  104. try:
  105. self.extract_files(srcpkg, files_to_extract)
  106. except:
  107. logger.exception(
  108. 'Problem extracting source files for'
  109. ' {pkg} version {ver}'.format(
  110. pkg=srcpkg, ver=srcpkg.version))
  111. def execute(self):
  112. if self.is_initial_task():
  113. return self._execute_initial()
  114. # When the task is not the initial task, then all the packages it
  115. # should process should come from received events.
  116. new_version_pks = [
  117. event.arguments['pk']
  118. for event in self.get_all_events()
  119. ]
  120. source_packages = SourcePackage.objects.filter(pk__in=new_version_pks)
  121. source_packages = source_packages.select_related()
  122. for source_package in source_packages:
  123. try:
  124. self.extract_files(source_package)
  125. except:
  126. logger.exception(
  127. 'Problem extracting source files for'
  128. ' {pkg} version {ver}'.format(
  129. pkg=source_package, ver=source_package.version))
  130. self.raise_event('source-files-extracted')