housekeeping.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. # nm.debian.org website housekeeping
  2. # pymode:lint_ignore=E501
  3. #
  4. # Copyright (C) 2012--2014 Enrico Zini <enrico@debian.org>
  5. #
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU Affero General Public License as
  8. # published by the Free Software Foundation, either version 3 of the
  9. # License, or (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU Affero General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU Affero General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. from __future__ import print_function
  19. from __future__ import absolute_import
  20. from __future__ import division
  21. from __future__ import unicode_literals
  22. from django.utils.timezone import now
  23. from django.conf import settings
  24. import django_housekeeping as hk
  25. from django.db import connection, transaction
  26. from django.contrib.sites.models import Site
  27. from backend import const
  28. from . import models as bmodels
  29. from . import utils, const
  30. import gzip
  31. import datetime
  32. import json
  33. import os.path
  34. import logging
  35. log = logging.getLogger(__name__)
  36. BACKUP_DIR = getattr(settings, "BACKUP_DIR", None)
  37. STAGES = ["backup", "main", "stats"]
  38. class Housekeeper(hk.Task):
  39. NAME = "housekeeper"
  40. def __init__(self, *args, **kw):
  41. super(Housekeeper, self).__init__(*args, **kw)
  42. # Ensure that there is a __housekeeping__ user
  43. try:
  44. self.user = bmodels.Person.objects.get(username="__housekeeping__")
  45. except bmodels.Person.DoesNotExist:
  46. self.user = bmodels.Person.objects.create_user(
  47. username="__housekeeping__",
  48. is_staff=False,
  49. cn="nm.debian.org Housekeeping",
  50. sn="Robot",
  51. email="nm@debian.org",
  52. bio="I am the robot that runs the automated tasks in the site",
  53. uid=None,
  54. status=const.STATUS_DC,
  55. audit_skip=True)
  56. class MakeLink(hk.Task):
  57. NAME = "link"
  58. def __init__(self, *args, **kw):
  59. super(MakeLink, self).__init__(*args, **kw)
  60. self.site = Site.objects.get_current()
  61. def __call__(self, obj):
  62. if self.site.domain == "localhost":
  63. return "http://localhost:8000" + obj.get_absolute_url()
  64. else:
  65. return "https://%s%s" % (self.site.domain, obj.get_absolute_url())
  66. class BackupDB(hk.Task):
  67. """
  68. Backup of the whole database
  69. """
  70. def run_backup(self, stage):
  71. if self.hk.outdir is None:
  72. log.info("HOUSEKEEPING_ROOT is not set: skipping backups")
  73. return
  74. people = list(bmodels.export_db(full=True))
  75. class Serializer(json.JSONEncoder):
  76. def default(self, o):
  77. if hasattr(o, "strftime"):
  78. return o.strftime("%Y-%m-%d %H:%M:%S")
  79. return json.JSONEncoder.default(self, o)
  80. # Base filename for the backup
  81. basedir = self.hk.outdir.path()
  82. fname = os.path.join(basedir, "db-full.json.gz")
  83. log.info("%s: backing up to %s", self.IDENTIFIER, fname)
  84. if self.hk.dry_run:
  85. return
  86. # Write the backup file
  87. with utils.atomic_writer(fname, 0640) as fd:
  88. try:
  89. gzfd = gzip.GzipFile(filename=fname[:-3], mode="w", compresslevel=9, fileobj=fd)
  90. json.dump(people, gzfd, cls=Serializer, indent=2)
  91. finally:
  92. gzfd.close()
  93. class ComputeAMCTTE(hk.Task):
  94. """
  95. Compute AM Committee membership
  96. """
  97. @transaction.atomic
  98. def run_main(self, stage):
  99. # Set all to False
  100. bmodels.AM.objects.update(is_am_ctte=False)
  101. cutoff = now()
  102. cutoff = cutoff - datetime.timedelta(days=30 * 6)
  103. # Set the active ones to True
  104. cursor = connection.cursor()
  105. cursor.execute("""
  106. SELECT am.id
  107. FROM am
  108. JOIN process p ON p.manager_id=am.id AND p.progress IN (%s, %s)
  109. JOIN log ON log.process_id=p.id AND log.logdate > %s
  110. WHERE am.is_am AND NOT am.is_fd AND NOT am.is_dam
  111. GROUP BY am.id
  112. """, (const.PROGRESS_DONE, const.PROGRESS_CANCELLED, cutoff))
  113. ids = [x[0] for x in cursor]
  114. bmodels.AM.objects.filter(id__in=ids).update(is_am_ctte=True)
  115. log.info("%s: %d CTTE members", self.IDENTIFIER, bmodels.AM.objects.filter(is_am_ctte=True).count())
  116. class ComputeProcessActiveFlag(hk.Task):
  117. """
  118. Compute Process.is_active from Process.progress
  119. """
  120. @transaction.atomic
  121. def run_main(self, stage):
  122. cursor = connection.cursor()
  123. cursor.execute("""
  124. UPDATE process SET is_active=(progress NOT IN (%s, %s))
  125. """, (const.PROGRESS_DONE, const.PROGRESS_CANCELLED))
  126. log.info("%s: %d/%d active processes",
  127. self.IDENTIFIER,
  128. bmodels.Process.objects.filter(is_active=True).count(),
  129. cursor.rowcount)
  130. class PersonExpires(hk.Task):
  131. """
  132. Expire old Person records
  133. """
  134. DEPENDS = [MakeLink, Housekeeper]
  135. @transaction.atomic
  136. def run_main(self, stage):
  137. """
  138. Generate a sequence of Person objects that have expired
  139. """
  140. import process.models as pmodels
  141. today = datetime.date.today()
  142. for p in bmodels.Person.objects.filter(expires__lt=today):
  143. if p.status != const.STATUS_DC:
  144. log.info("%s: removing expiration date for %s who has become %s",
  145. self.IDENTIFIER, self.hk.link(p), p.status)
  146. p.expires = None
  147. p.save(audit_author=self.hk.housekeeper.user, audit_notes="user became {}: removing expiration date".format(const.ALL_STATUS_DESCS[p.status]))
  148. elif p.processes.exists() or pmodels.Process.objects.filter(person=p).exists():
  149. log.info("%s: removing expiration date for %s who now has process history",
  150. self.IDENTIFIER, self.hk.link(p))
  151. p.expires = None
  152. p.save(audit_author=self.hk.housekeeper.user, audit_notes="process detected: removing expiration date")
  153. else:
  154. log.info("%s: deleting expired Person %s", self.IDENTIFIER, p)
  155. p.delete()
  156. class CheckOneProcessPerPerson(hk.Task):
  157. """
  158. Check that one does not have more than one open process at the current time
  159. """
  160. DEPENDS = [MakeLink]
  161. def run_main(self, stage):
  162. from django.db.models import Count
  163. for p in bmodels.Person.objects.filter(processes__is_active=True) \
  164. .annotate(num_processes=Count("processes")) \
  165. .filter(num_processes__gt=1):
  166. log.warn("%s: %s has %d open processes", self.IDENTIFIER, self.hk.link(p), p.num_processes)
  167. class CheckAMMustHaveUID(hk.Task):
  168. """
  169. Check that AMs have a Debian login
  170. """
  171. def run_main(self, stage):
  172. for am in bmodels.AM.objects.filter(person__uid=None):
  173. log.warning("%s: AM %d (person %d %s) has no uid", self.IDENTIFIER, am.id, am.person.id, am.person.email)
  174. class CheckStatusProgressMatch(hk.Task):
  175. """
  176. Check that the last process with progress 'done' has the same
  177. 'applying_for' as the person status
  178. """
  179. DEPENDS = [MakeLink]
  180. def run_main(self, stage):
  181. from django.db.models import Max
  182. import process.models as pmodels
  183. process_byperson = {}
  184. for p in bmodels.Process.objects.filter(closed__isnull=False, progress=const.PROGRESS_DONE).select_related("person"):
  185. existing = process_byperson.get(p.person, None)
  186. if existing is None:
  187. process_byperson[p.person] = p
  188. elif existing.closed < p.closed:
  189. process_byperson[p.person] = p
  190. for p in pmodels.Process.objects.filter(closed__isnull=False, approved_by__isnull=False).select_related("person"):
  191. existing = process_byperson.get(p.person, None)
  192. if existing is None:
  193. process_byperson[p.person] = p
  194. elif existing.closed < p.closed:
  195. process_byperson[p.person] = p
  196. for person, process in process_byperson.items():
  197. if person.status != process.applying_for:
  198. log.warn("%s: %s has status %s but the last completed process was applying for %s",
  199. self.IDENTIFIER, self.hk.link(person), person.status, process.applying_for)
  200. class CheckLogProgressMatch(hk.Task):
  201. """
  202. Check that the last process with progress 'done' has the same
  203. 'applying_for' as the person status
  204. """
  205. DEPENDS = [MakeLink]
  206. def run_main(self, stage):
  207. for p in bmodels.Process.objects.filter(is_active=True):
  208. try:
  209. last_log = p.log.order_by("-logdate")[0]
  210. except IndexError:
  211. log.warning("%s: %s (%s) has no log entries", self.IDENTIFIER, self.hk.link(p), repr(p))
  212. continue
  213. if p.progress != last_log.progress:
  214. log.warning("%s: %s (%s) has progress %s but the last log entry has progress %s",
  215. self.IDENTIFIER, self.hk.link(p), repr(p), p.progress, last_log.progress)
  216. class CheckEnums(hk.Task):
  217. """
  218. Consistency check of enum values
  219. """
  220. DEPENDS = [MakeLink]
  221. def run_main(self, stage):
  222. statuses = [x.tag for x in const.ALL_STATUS]
  223. progresses = [x.tag for x in const.ALL_PROGRESS]
  224. for p in bmodels.Person.objects.exclude(status__in=statuses):
  225. log.warning("%s: %s: invalid status %s", self.IDENTIFIER, self.hk.link(p), p.status)
  226. for p in bmodels.Process.objects.exclude(applying_for__in=statuses):
  227. log.warning("%s: %s: invalid applying_for %s", self.IDENTIFIER, self.hk.link(p), p.applying_for)
  228. for p in bmodels.Process.objects.exclude(progress__in=progresses):
  229. log.warning("%s: %s: invalid progress %s", self.IDENTIFIER, self.hk.link(p), p.progress)
  230. for l in bmodels.Log.objects.exclude(progress__in=progresses):
  231. log.warning("%s: %s: log entry %d has invalid progress %s",
  232. self.IDENTIFIER, self.hk.link(l.process), l.id, l.progress)
  233. class CheckCornerCases(hk.Task):
  234. """
  235. Check for known corner cases, to be fixed somehow eventually maybe in case
  236. they give trouble
  237. """
  238. def run_main(self, stage):
  239. c = bmodels.Person.objects.filter(processes__isnull=True).count()
  240. if c > 0:
  241. log.info("%s: %d Great Ancients found who have no Process entry", self.IDENTIFIER, c)
  242. c = bmodels.Person.objects.filter(status_changed__isnull=True).count()
  243. if c > 0:
  244. log.warning("%s: %d entries still have a NULL status_changed date", self.IDENTIFIER, c)
  245. class CheckDjangoPermissions(hk.Task):
  246. """
  247. Check consistency between Django permissions and flags in the AM model
  248. """
  249. DEPENDS = [MakeLink]
  250. def run_main(self, stage):
  251. from django.db.models import Q
  252. # Get the list of users that django thinks are powerful
  253. person_power_users = set()
  254. for p in bmodels.Person.objects.all():
  255. if p.is_staff or p.is_superuser:
  256. person_power_users.add(p.id)
  257. # Get the list of users that we think are powerful
  258. am_power_users = set()
  259. for a in bmodels.AM.objects.filter(Q(is_fd=True) | Q(is_dam=True)):
  260. am_power_users.add(a.person.id)
  261. for id in (person_power_users - am_power_users):
  262. p = bmodels.Person.objects.get(pk=id)
  263. log.warning("%s: bmodels.Person.id %d (%s) has powers that bmodels.AM does not know about",
  264. self.IDENTIFIER, id, p.lookup_key)
  265. for id in (am_power_users - person_power_users):
  266. p = bmodels.Person.objects.get(pk=id)
  267. log.warning("%s: bmodels.Person.id %d (%s) has powers in bmodels.AM that bmodels.Person does not know about",
  268. self.IDENTIFIER, id, p.lookup_key)
  269. class DDUsernames(hk.Task):
  270. """
  271. Make sure that people with a DD status have a DD SSO username
  272. """
  273. DEPENDS = [MakeLink, Housekeeper]
  274. @transaction.atomic
  275. def run_main(self, stage):
  276. dd_statuses = (const.STATUS_DD_U, const.STATUS_DD_NU,
  277. const.STATUS_EMERITUS_DD, const.STATUS_EMERITUS_DM,
  278. const.STATUS_REMOVED_DD, const.STATUS_REMOVED_DM)
  279. for p in bmodels.Person.objects.filter(status__in=dd_statuses):
  280. if p.uid is None:
  281. log.warning("%s: %s has status %s but uid is empty",
  282. self.IDENTIFIER, self.hk.link(p), p.status)
  283. continue
  284. if p.username.endswith("@debian.org"): continue
  285. new_username = p.uid + "@debian.org"
  286. log.info("%s: %s has status %s but an alioth username: setting username to %s",
  287. self.IDENTIFIER, self.hk.link(p), p.status, new_username)
  288. p.username = new_username
  289. p.save(audit_author=self.hk.housekeeper.user, audit_notes="updated SSO username to @debian.org version")
  290. class CheckOneActiveKeyPerPerson(hk.Task):
  291. """
  292. Check that one does not have more than one open process at the current time
  293. """
  294. DEPENDS = [MakeLink]
  295. def run_main(self, stage):
  296. from django.db.models import Count
  297. for p in bmodels.Person.objects.filter(fprs__is_active=True) \
  298. .annotate(num_fprs=Count("fprs")) \
  299. .filter(num_fprs__gt=1):
  300. log.warn("%s: %s has %d active keys", self.IDENTIFIER, self.hk.link(p), p.num_fprs)