legacy_import.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. # nm.debian.org website maintenance
  2. #
  3. # Copyright (C) 2012 Enrico Zini <enrico@debian.org>
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU Affero General Public License as
  7. # published by the Free Software Foundation, either version 3 of the
  8. # License, or (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU Affero General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU Affero General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. from django.core.management.base import BaseCommand, CommandError
  18. import django.db
  19. from django.conf import settings
  20. import optparse
  21. import sys
  22. import logging
  23. import json
  24. import ldap
  25. from backend import models as bmodels
  26. from backend import const
  27. import keyring.models as kmodels
  28. log = logging.getLogger(__name__)
  29. class Importer(object):
  30. def __init__(self):
  31. self.people_cache_by_email = dict()
  32. self.people_cache_by_uid = dict()
  33. self.todo_advocates = dict()
  34. log.info("Importing dm keyring...")
  35. self.dm = frozenset(kmodels.list_dm())
  36. log.info("Importing dd_u keyring...")
  37. self.dd_u = frozenset(kmodels.list_dd_u())
  38. log.info("Importing dd_nu keyring...")
  39. self.dd_nu = frozenset(kmodels.list_dd_nu())
  40. log.info("Importing emeritus_dd keyring...")
  41. self.emeritus_dd = frozenset(kmodels.list_emeritus_dd())
  42. log.info("Importing removed_dd keyring...")
  43. self.removed_dd = frozenset(kmodels.list_removed_dd())
  44. def import_person(self, person):
  45. p = bmodels.Person(
  46. cn=person["cn"],
  47. mn=person["mn"],
  48. sn=person["sn"],
  49. uid=person["accountname"],
  50. email=person["mail"],
  51. status=person["status"])
  52. p.save()
  53. self.people_cache_by_email[p.email] = p
  54. if p.uid: self.people_cache_by_uid[p.uid] = p
  55. #print "Person:", repr(p)
  56. if person["am"]:
  57. src = person["am"]
  58. am = bmodels.AM(
  59. person=p,
  60. slots=src["slots"],
  61. is_am=src["is_am"],
  62. is_fd=src["is_fd"],
  63. is_dam=src["is_dam"])
  64. am.save()
  65. log.info("AM: %s", repr(am))
  66. def import_processes(self, person):
  67. p = self.people_cache_by_email[person["mail"]]
  68. by_target = dict()
  69. for proc in person["processes"]:
  70. if proc["manager"] is None:
  71. am = None
  72. else:
  73. if proc["manager"] not in self.people_cache_by_uid:
  74. log.warning("%s manager of %s is not in the person table", proc["manager"], p)
  75. m = self.people_cache_by_uid[proc["manager"]]
  76. if not m.am:
  77. log.warning("%s manager of %s is not in the AM table", proc["manager"], p)
  78. am = m.am
  79. pr = bmodels.Process(
  80. person=p,
  81. applying_for=proc["applying_for"],
  82. progress=proc["progress"],
  83. manager=am,
  84. )
  85. pr.save()
  86. self.todo_advocates[pr.id] = proc["advocates"]
  87. by_target[pr.applying_for] = pr
  88. def get_person(uid):
  89. if uid is None:
  90. return None
  91. return self.people_cache_by_uid[uid]
  92. import re
  93. re_date = re.compile("^\d+-\d+-\d+$")
  94. re_datetime = re.compile("^\d+-\d+-\d+ \d+:\d+:\d+$")
  95. def get_date(s):
  96. import datetime
  97. import rfc822
  98. if re_date.match(s):
  99. try:
  100. return datetime.datetime.strptime(s, "%Y-%m-%d")
  101. except ValueError:
  102. date = rfc822.parsedate(s)
  103. elif re_datetime.match(s):
  104. try:
  105. return datetime.datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
  106. except ValueError:
  107. date = rfc822.parsedate(s)
  108. else:
  109. date = rfc822.parsedate(s)
  110. if date is None:
  111. return None
  112. return datetime.datetime(*date)
  113. for logentry in person["log"]:
  114. if logentry["applying_for"] not in by_target:
  115. log.warning("%s not in %s for %s", logentry["applying_for"], by_target.keys(), p)
  116. if logentry["logdate"] is None:
  117. log.warning("Skipping '%s' log entry for %s because of a missing date", logentry["logtext"], repr(p))
  118. continue
  119. # FIXME: move this to export
  120. date = get_date(logentry["logdate"])
  121. if date is None:
  122. log.warning("Skipping '%s' log entry: cannot parse date: %s", logentry["logtext"], logentry["logdate"])
  123. continue
  124. l = bmodels.Log(
  125. changed_by=get_person(logentry["changed_by"]),
  126. process=by_target[logentry["applying_for"]],
  127. progress=logentry["logtype"],
  128. logdate=date,
  129. logtext=logentry["logtext"],
  130. )
  131. l.save()
  132. def import_ldap(self, server):
  133. """
  134. Perform initial data import from LDAP
  135. Imports cn, sn, nm, fpr, email for DDs and guest accounts.
  136. Does not set status, that will be taken from keyrings
  137. """
  138. # enrico> Hi. Can you give me an official procedure to check if one is a DD from LDAP info?
  139. # @weasel> enrico: not really, that's your decision.
  140. # @weasel> enrico: for one, you can filter on gid 800. and then filter for having a
  141. # fingerprint. that's usually right
  142. # enrico> weasel: what are person accounts without fingerprints for?
  143. # @weasel> people who screwed up their keys
  144. # @weasel> we've had that on occasion
  145. # enrico> weasel: ack
  146. # @weasel> enrico: and of course retired people
  147. # @weasel> we try to set ldap's account status nowadays, but no idea if
  148. # that applies to all that ever retired
  149. search_base = "dc=debian,dc=org"
  150. l = ldap.initialize(server)
  151. l.simple_bind_s("","")
  152. fpr_seq = 0
  153. for dn, attrs in l.search_s(search_base, ldap.SCOPE_SUBTREE, "objectclass=inetOrgPerson"):
  154. def get_field(f):
  155. if f not in attrs:
  156. return None
  157. f = attrs[f]
  158. if not f:
  159. return None
  160. return f[0]
  161. # Try to match the person using uid
  162. uid = get_field("uid")
  163. fpr = get_field("keyFingerPrint")
  164. if not fpr:
  165. fpr = "FIXME-removed-key-%04d" % fpr_seq
  166. fpr_seq += 1
  167. log.warning("%s has empty keyFingerPrint in LDAP. Setting it to %s", uid, fpr)
  168. try:
  169. person = bmodels.Person.objects.get(uid=uid)
  170. person.fpr = fpr
  171. if person.status == const.STATUS_DC:
  172. person.status = const.STATUS_DC_GA
  173. person.save()
  174. continue
  175. except bmodels.Person.DoesNotExist:
  176. pass
  177. # Try to match the person using emails
  178. try:
  179. person = bmodels.Person.objects.get(email=uid + "@debian.org")
  180. person.uid = uid
  181. person.fpr = fpr
  182. if person.status == const.STATUS_DC:
  183. person.status = const.STATUS_DC_GA,
  184. person.save()
  185. continue
  186. except bmodels.Person.DoesNotExist:
  187. pass
  188. email = get_field("emailForward")
  189. try:
  190. person = bmodels.Person.objects.get(email=email)
  191. person.uid = uid
  192. person.fpr = fpr
  193. if person.status == const.STATUS_DC:
  194. person.status = const.STATUS_DC_GA,
  195. person.save()
  196. continue
  197. except bmodels.Person.DoesNotExist:
  198. pass
  199. # Try to match the person using fingerprints
  200. try:
  201. # This should never be needed, but I have seen duplicate
  202. # fingerprints in the create person case below, so it's useful
  203. # to have this here to keep an eye on what happens
  204. person = bmodels.Person.objects.get(fpr=fpr)
  205. log.warning("Person %s has uid %s in ldap and oddly matches by fingerprint '%s'", person.uid, uid, fpr)
  206. person.uid = uid
  207. person.save()
  208. continue
  209. except bmodels.Person.DoesNotExist:
  210. pass
  211. person = bmodels.Person(
  212. cn=get_field("cn"),
  213. mn=get_field("mn"),
  214. sn=get_field("sn"),
  215. fpr=fpr,
  216. uid=uid,
  217. # Default to MM_GA: if they are in LDAP, they have at least a
  218. # guest account
  219. status=const.STATUS_DC_GA,
  220. )
  221. if get_field("gidNumber") == '800':
  222. person.email = uid + "@debian.org"
  223. else:
  224. person.email = email
  225. if person.email is None:
  226. log.warning("UID %s from LDAP does not look like a DD and has no email address: skipping import as Person", uid)
  227. continue
  228. person.save()
  229. def import_ldap_pass2(self, server):
  230. search_base = "dc=debian,dc=org"
  231. l = ldap.initialize(server)
  232. l.simple_bind_s("","")
  233. for dn, attrs in l.search_s(search_base, ldap.SCOPE_SUBTREE, "objectclass=inetOrgPerson"):
  234. uid = attrs["uid"][0]
  235. try:
  236. person = bmodels.Person.objects.get(uid=uid)
  237. except bmodels.Person.DoesNotExist:
  238. log.warning("Person %s exists in LDAP but not in NM database", uid)
  239. continue
  240. def get_field(f):
  241. if f not in attrs:
  242. return None
  243. f = attrs[f]
  244. if not f:
  245. return None
  246. return f[0]
  247. # Move one-name people from sn to cn
  248. if get_field("cn") == "-":
  249. log.info("swapping cn (%s) and sn (%s) for %s", get_field("cn"), get_field("sn"), person.uid)
  250. attrs["cn"] = attrs["sn"]
  251. del attrs["sn"]
  252. changed = False
  253. for field in ("cn", "mn", "sn"):
  254. val = get_field(field)
  255. if val is not None:
  256. for encoding in ("utf8", "latin1"):
  257. try:
  258. val = val.decode(encoding)
  259. good = True
  260. break
  261. except (UnicodeDecodeError, UnicodeEncodeError):
  262. good = False
  263. if not good:
  264. log.warning("Field %s=%s for %s has invalid unicode information: skipping", field, repr(val), uid)
  265. continue
  266. old = getattr(person, field)
  267. if old is not None:
  268. for encoding in ("utf8", "latin1"):
  269. try:
  270. old = old.decode(encoding)
  271. good = True
  272. except (UnicodeDecodeError, UnicodeEncodeError):
  273. good = False
  274. if not good:
  275. old = "<invalid encoding>"
  276. if val != old:
  277. try:
  278. log.info("Person %s changed %s from %s to %s", uid, field, old, val)
  279. except UnicodeDecodeError:
  280. log.warning("Problems with %s", uid)
  281. continue
  282. setattr(person, field, val)
  283. changed = True
  284. if changed:
  285. person.save()
  286. def import_advocates(self):
  287. # Clear the uid cache
  288. self.people_cache_by_uid = dict()
  289. for id, advocates in self.todo_advocates.iteritems():
  290. proc = bmodels.Process.objects.get(id=id)
  291. for adv in advocates:
  292. a = self.people_cache_by_uid.get(adv, None)
  293. if a is None:
  294. try:
  295. a = bmodels.Person.objects.get(uid=adv)
  296. self.people_cache_by_uid[adv] = a
  297. except bmodels.Person.DoesNotExist:
  298. log.warning("advocate %s not found: skipping the DB association and leaving it just in the logs", adv)
  299. continue
  300. proc.advocates.add(a)
  301. def import_keyrings(self):
  302. """
  303. Perform initial import from keyring.d.o
  304. Detects status by checking what keyring contains the fingerprint
  305. """
  306. for person in bmodels.Person.objects.all():
  307. if not person.fpr:
  308. log.info("%s/%s has no fingerprint: skipped", person.uid, person.email)
  309. continue
  310. old_status = person.status
  311. if person.fpr in self.dm:
  312. # If we have a fingerprint in the Person during the initial import,
  313. # it means they come from LDAP, so they have a guest account
  314. person.status = const.STATUS_DM_GA
  315. if person.fpr in self.dd_u:
  316. person.status = const.STATUS_DD_U
  317. if person.fpr in self.dd_nu:
  318. person.status = const.STATUS_DD_NU
  319. if person.fpr in self.emeritus_dd:
  320. person.status = const.STATUS_EMERITUS_DD
  321. if person.fpr in self.removed_dd:
  322. person.status = const.STATUS_REMOVED_DD
  323. if old_status != person.status:
  324. log.info("%s: status changed from %s to %s", person.uid, old_status, person.status)
  325. person.save()
  326. class Command(BaseCommand):
  327. help = 'Import a JSON database dump'
  328. option_list = BaseCommand.option_list + (
  329. optparse.make_option("--quiet", action="store_true", dest="quiet", default=None, help="Disable progress reporting"),
  330. optparse.make_option("--ldap", action="store", default="ldap://db.debian.org", help="LDAP server to use. Default: %default"),
  331. #l = ldap.initialize("ldap://localhost:3389")
  332. )
  333. def handle(self, *fnames, **opts):
  334. FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
  335. if opts["quiet"]:
  336. logging.basicConfig(level=logging.WARNING, stream=sys.stderr, format=FORMAT)
  337. else:
  338. logging.basicConfig(level=logging.INFO, stream=sys.stderr, format=FORMAT)
  339. if not fnames:
  340. print >>sys.stderr, "please provide a JSON dump file name"
  341. sys.exit(1)
  342. with open(fnames[0]) as fd:
  343. people = json.load(fd)
  344. importer = Importer()
  345. for k, v in people.iteritems():
  346. importer.import_person(v)
  347. for k, v in people.iteritems():
  348. importer.import_processes(v)
  349. importer.import_ldap(opts["ldap"])
  350. importer.import_ldap_pass2(opts["ldap"])
  351. importer.import_advocates()
  352. importer.import_keyrings()
  353. #log.info("%d patch(es) applied", len(fnames))