email_messages.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. # Copyright 2013-2015 The Distro Tracker Developers
  2. # See the COPYRIGHT file at the top-level directory of this distribution and
  3. # at http://deb.li/DTAuthors
  4. #
  5. # This file is part of Distro Tracker. It is subject to the license terms
  6. # in the LICENSE file found in the top-level directory of this
  7. # distribution and at http://deb.li/DTLicense. No part of Distro Tracker,
  8. # including this file, may be copied, modified, propagated, or distributed
  9. # except according to the terms contained in the LICENSE file.
  10. """
  11. Module including some utility functions and classes for manipulating email.
  12. """
  13. from __future__ import unicode_literals
  14. from django.core.mail import EmailMessage
  15. from django.utils import six
  16. from django.utils.encoding import force_bytes
  17. from email.mime.base import MIMEBase
  18. import re
  19. import copy
  20. import email
  21. import types
  22. def extract_email_address_from_header(header):
  23. """
  24. Extracts the email address from the From email header.
  25. >>> str(extract_email_address_from_header('Real Name <foo@domain.com>'))
  26. 'foo@domain.com'
  27. >>> str(extract_email_address_from_header('foo@domain.com'))
  28. 'foo@domain.com'
  29. """
  30. from email.utils import parseaddr
  31. real_name, from_address = parseaddr(header)
  32. return from_address
  33. def name_and_address_from_string(content):
  34. """
  35. Takes an address in almost-RFC822 format and turns it into a dict
  36. {'name': real_name, 'email': email_address}
  37. The difference with email.utils.parseaddr and rfc822.parseaddr
  38. is that this routine allows unquoted commas to appear in the real name
  39. (in violation of RFC822).
  40. """
  41. from email.utils import parseaddr
  42. hacked_content = content.replace(",", "WEWANTNOCOMMAS")
  43. name, mail = parseaddr(hacked_content)
  44. if mail:
  45. return {
  46. 'name': name.replace("WEWANTNOCOMMAS", ","),
  47. 'email': mail.replace("WEWANTNOCOMMAS", ",")
  48. }
  49. else:
  50. return None
  51. def names_and_addresses_from_string(content):
  52. """
  53. Takes a string with addresses in RFC822 format and returns a list of dicts
  54. {'name': real_name, 'email': email_address}
  55. It tries to be forgiving about unquoted commas in addresses.
  56. """
  57. all_parts = [
  58. name_and_address_from_string(part)
  59. for part in re.split('(?<=>)\s*,\s*', content)
  60. ]
  61. return [
  62. part
  63. for part in all_parts
  64. if part is not None
  65. ]
  66. def get_decoded_message_payload(message, default_charset='utf-8'):
  67. """
  68. Extracts the payload of the given ``email.message.Message`` and returns it
  69. decoded based on the Content-Transfer-Encoding and charset.
  70. """
  71. # If the message is multipart there is nothing to decode so None is
  72. # returned
  73. if message.is_multipart():
  74. return None
  75. # Decodes the message based on transfer encoding and returns bytes
  76. payload = message.get_payload(decode=True)
  77. # The charset defaults to ascii if none is given
  78. charset = message.get_content_charset(default_charset)
  79. try:
  80. return payload.decode(charset)
  81. except (UnicodeDecodeError, LookupError):
  82. # If we did not get the charset right, assume it's latin1 and make
  83. # sure to not fail furter
  84. return payload.decode('latin1', 'replace')
  85. def patch_message_for_django_compat(message):
  86. """
  87. Live patch the :py:class:`email.message.Message` object passed as
  88. parameter so that:
  89. - the as_string() method return the same set of bytes it has been parsed
  90. from (to preserve as much as possible the original message)
  91. - the as_bytes() is added too (this method is expected by Django's SMTP
  92. backend)
  93. """
  94. # Django expects patched versions of as_string/as_bytes, see
  95. # django/core/mail/message.py
  96. def as_string(self, unixfrom=False, maxheaderlen=0, linesep='\n'):
  97. """
  98. Returns the payload of the message encoded as bytes.
  99. """
  100. if six.PY3:
  101. from email.generator import BytesGenerator as Generator
  102. else:
  103. from email.generator import Generator
  104. fp = six.BytesIO()
  105. g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
  106. if six.PY3:
  107. g.flatten(self, unixfrom=unixfrom, linesep=linesep)
  108. else:
  109. g.flatten(self, unixfrom=unixfrom)
  110. return force_bytes(fp.getvalue(), 'utf-8')
  111. message.as_string = types.MethodType(as_string, message)
  112. message.as_bytes = message.as_string
  113. return message
  114. def message_from_bytes(message_bytes):
  115. """
  116. Returns a live-patched :class:`email.Message` object from the given
  117. bytes.
  118. The changes ensure that parsing the message's bytes with this method
  119. and then returning them by using the returned object's as_string
  120. method is an idempotent operation.
  121. An as_bytes method is also created since Django's SMTP backend relies
  122. on this method (which is usually brought by its own
  123. :class:`django.core.mail.SafeMIMEText` object but that we don't use
  124. in our :class:`CustomEmailMessage`).
  125. """
  126. if six.PY3:
  127. from email import message_from_bytes as email_message_from_bytes
  128. else:
  129. from email import message_from_string as email_message_from_bytes
  130. message = email_message_from_bytes(message_bytes)
  131. return patch_message_for_django_compat(message)
  132. class CustomEmailMessage(EmailMessage):
  133. """
  134. A subclass of :class:`django.core.mail.EmailMessage` which can be fed
  135. an :class:`email.message.Message` instance to define the body of the
  136. message.
  137. If :attr:`msg` is set, the :attr:`body <django.core.mail.EmailMessage.body>`
  138. attribute is ignored.
  139. If the user wants to attach additional parts to the message, the
  140. :meth:`attach` method can be used but the user must ensure that the given
  141. ``msg`` instance is a multipart message before doing so.
  142. Effectively, this is also a wrapper which allows sending instances of
  143. :class:`email.message.Message` via Django email backends.
  144. """
  145. def __init__(self, msg=None, *args, **kwargs):
  146. """
  147. Use the keyword argument ``msg`` to set the
  148. :class:`email.message.Message` instance which should be used to define
  149. the body of the message. The original object is copied.
  150. If no ``msg`` is set, the object's behaviour is identical to
  151. :class:`django.core.mail.EmailMessage`
  152. """
  153. super(CustomEmailMessage, self).__init__(*args, **kwargs)
  154. self.msg = msg
  155. def message(self):
  156. """
  157. Returns the underlying :class:`email.message.Message` object.
  158. In case the user did not set a :attr:`msg` attribute for this instance
  159. the parent :meth:`EmailMessage.message
  160. <django.core.mail.EmailMessage.message>` method is used.
  161. """
  162. if self.msg:
  163. msg = self._attach_all()
  164. return msg
  165. else:
  166. return EmailMessage.message(self)
  167. def _attach_all(self):
  168. """
  169. Attaches all existing attachments to the given message ``msg``.
  170. """
  171. msg = self.msg
  172. if self.attachments:
  173. assert self.msg.is_multipart()
  174. msg = copy.deepcopy(self.msg)
  175. for attachment in self.attachments:
  176. if isinstance(attachment, MIMEBase):
  177. msg.attach(attachment)
  178. else:
  179. msg.attach(self._create_attachment(*attachment))
  180. return msg
  181. def decode_header(header, default_encoding='utf-8'):
  182. """
  183. Decodes an email message header and returns it coded as a unicode
  184. string.
  185. This is necessary since it is possible that a header is made of multiple
  186. differently encoded parts which makes :func:`email.header.decode_header`
  187. insufficient.
  188. """
  189. if header is None:
  190. return None
  191. decoded_header = email.header.decode_header(header)
  192. # Join all the different parts of the header into a single unicode string
  193. result = ''
  194. for part, encoding in decoded_header:
  195. if encoding == 'unknown-8bit':
  196. # Python 3 returns unknown-8bit instead of None when you have 8bit
  197. # characters without any encoding information
  198. encoding = 'iso-8859-1'
  199. if isinstance(part, six.binary_type):
  200. encoding = encoding if encoding else default_encoding
  201. try:
  202. result += part.decode(encoding)
  203. except UnicodeDecodeError:
  204. result += part.decode('iso-8859-1', 'replace')
  205. else:
  206. result += part
  207. return result
  208. def unfold_header(header):
  209. """
  210. Unfolding is the process to remove the line wrapping added by mail agents.
  211. An header is a single logical line and they are not allowed to be multi-line
  212. values.
  213. We need to unfold their values in particular when we want to reuse the
  214. values to compose a reply message as Python's email API chokes on those
  215. newline characters.
  216. If header is None, the return value is None as well.
  217. :param:header: the header value to unfold
  218. :type param: str
  219. :returns: the unfolded version of the header.
  220. :rtype: str
  221. """
  222. if header is None:
  223. return None
  224. return re.sub(r'\r?\n(\s)', r'\1', header, 0, re.MULTILINE)