urlutils.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. #
  2. # urlutils.py - Simplified urllib handling
  3. #
  4. # Written by Chris Lawrence <lawrencc@debian.org>
  5. # (C) 1999-2008 Chris Lawrence
  6. # Copyright (C) 2008-2016 Sandro Tosi <morph@debian.org>
  7. #
  8. # This program is freely distributable per the following license:
  9. #
  10. # Permission to use, copy, modify, and distribute this software and its
  11. # documentation for any purpose and without fee is hereby granted,
  12. # provided that the above copyright notice appears in all copies and that
  13. # both that copyright notice and this permission notice appear in
  14. # supporting documentation.
  15. #
  16. # I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL I
  18. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  19. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  20. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  21. # ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  22. # SOFTWARE.
  23. import httplib
  24. import urllib
  25. import urllib2
  26. import getpass
  27. import re
  28. import socket
  29. import commands
  30. import os
  31. import sys
  32. import webbrowser
  33. from exceptions import (
  34. NoNetwork,
  35. )
  36. from __init__ import VERSION_NUMBER
  37. UA_STR = 'reportbug/' + VERSION_NUMBER + ' (Debian)'
  38. def decode(page):
  39. "gunzip or deflate a compressed page"
  40. # print page.info().headers
  41. encoding = page.info().get("Content-Encoding")
  42. if encoding in ('gzip', 'x-gzip', 'deflate'):
  43. from cStringIO import StringIO
  44. # cannot seek in socket descriptors, so must get content now
  45. content = page.read()
  46. if encoding == 'deflate':
  47. import zlib
  48. fp = StringIO(zlib.decompress(content))
  49. else:
  50. import gzip
  51. fp = gzip.GzipFile('', 'rb', 9, StringIO(content))
  52. # remove content-encoding header
  53. headers = httplib.HTTPMessage(StringIO(""))
  54. ceheader = re.compile(r"(?i)content-encoding:")
  55. for h in page.info().keys():
  56. if not ceheader.match(h):
  57. headers[h] = page.info()[h]
  58. newpage = urllib.addinfourl(fp, headers, page.geturl())
  59. # Propagate code, msg through
  60. if hasattr(page, 'code'):
  61. newpage.code = page.code
  62. if hasattr(page, 'msg'):
  63. newpage.msg = page.msg
  64. return newpage
  65. return page
  66. class HttpWithGzipHandler(urllib2.HTTPHandler):
  67. "support gzip encoding"
  68. def http_open(self, req):
  69. return decode(urllib2.HTTPHandler.http_open(self, req))
  70. if hasattr(httplib, 'HTTPS'):
  71. class HttpsWithGzipHandler(urllib2.HTTPSHandler):
  72. "support gzip encoding"
  73. def https_open(self, req):
  74. return decode(urllib2.HTTPSHandler.https_open(self, req))
  75. class handlepasswd(urllib2.HTTPPasswordMgrWithDefaultRealm):
  76. def find_user_password(self, realm, authurl):
  77. user, password = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(self, realm, authurl)
  78. if user is not None:
  79. return user, password
  80. user = raw_input('Enter username for %s at %s: ' % (realm, authurl))
  81. password = getpass.getpass(
  82. "Enter password for %s in %s at %s: " % (user, realm, authurl))
  83. self.add_password(realm, authurl, user, password)
  84. return user, password
  85. _opener = None
  86. def urlopen(url, proxies=None, timeout=60, data=None):
  87. global _opener
  88. if not proxies:
  89. proxies = urllib.getproxies()
  90. headers = {'User-Agent': UA_STR,
  91. 'Accept-Encoding': 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5'}
  92. req = urllib2.Request(url, data, headers)
  93. proxy_support = urllib2.ProxyHandler(proxies)
  94. if _opener is None:
  95. pwd_manager = handlepasswd()
  96. handlers = [proxy_support,
  97. urllib2.UnknownHandler, HttpWithGzipHandler,
  98. urllib2.HTTPBasicAuthHandler(pwd_manager),
  99. urllib2.ProxyBasicAuthHandler(pwd_manager),
  100. urllib2.HTTPDigestAuthHandler(pwd_manager),
  101. urllib2.ProxyDigestAuthHandler(pwd_manager),
  102. urllib2.HTTPDefaultErrorHandler, urllib2.HTTPRedirectHandler,
  103. ]
  104. if hasattr(httplib, 'HTTPS'):
  105. handlers.append(HttpsWithGzipHandler)
  106. _opener = urllib2.build_opener(*handlers)
  107. # print _opener.handlers
  108. urllib2.install_opener(_opener)
  109. return _opener.open(req, timeout=timeout)
  110. # Global useful URL opener; returns None if the page is absent, otherwise
  111. # like urlopen
  112. def open_url(url, http_proxy=None, timeout=60):
  113. # Set timeout to 60 secs (1 min), cfr bug #516449
  114. # in #572316 we set a user-configurable timeout
  115. socket.setdefaulttimeout(timeout)
  116. proxies = urllib.getproxies()
  117. if http_proxy:
  118. proxies['http'] = http_proxy
  119. try:
  120. page = urlopen(url, proxies, timeout)
  121. except urllib2.HTTPError, x:
  122. if x.code in (404, 500, 503):
  123. return None
  124. else:
  125. raise
  126. except (socket.gaierror, socket.error, urllib2.URLError), x:
  127. raise NoNetwork
  128. except IOError, data:
  129. if data and data[0] == 'http error' and data[1] == 404:
  130. return None
  131. else:
  132. raise NoNetwork
  133. except TypeError:
  134. print >> sys.stderr, "http_proxy environment variable must be formatted as a valid URI"
  135. raise NoNetwork
  136. except httplib.HTTPException, exc:
  137. exc_name = exc.__class__.__name__
  138. message = "Failed to open %(url)r (%(exc_name)s: %(exc)s)" % vars()
  139. raise NoNetwork(message)
  140. return page
  141. def launch_browser(url):
  142. if not os.system('command -v xdg-open >/dev/null 2>&1'):
  143. cmd = 'xdg-open' + commands.mkarg(url)
  144. os.system(cmd)
  145. return
  146. if webbrowser:
  147. webbrowser.open(url)
  148. return