useragents.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. # -*- coding: utf-8 -*-
  2. """
  3. werkzeug.useragents
  4. ~~~~~~~~~~~~~~~~~~~
  5. This module provides a helper to inspect user agent strings. This module
  6. is far from complete but should work for most of the currently available
  7. browsers.
  8. :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
  9. :license: BSD, see LICENSE for more details.
  10. """
  11. import re
  12. class UserAgentParser(object):
  13. """A simple user agent parser. Used by the `UserAgent`."""
  14. platforms = (
  15. ('cros', 'chromeos'),
  16. ('iphone|ios', 'iphone'),
  17. ('ipad', 'ipad'),
  18. (r'darwin|mac|os\s*x', 'macos'),
  19. ('win', 'windows'),
  20. (r'android', 'android'),
  21. ('netbsd', 'netbsd'),
  22. ('openbsd', 'openbsd'),
  23. ('freebsd', 'freebsd'),
  24. ('dragonfly', 'dragonflybsd'),
  25. ('(sun|i86)os', 'solaris'),
  26. (r'x11|lin(\b|ux)?', 'linux'),
  27. (r'nintendo\s+wii', 'wii'),
  28. ('irix', 'irix'),
  29. ('hp-?ux', 'hpux'),
  30. ('aix', 'aix'),
  31. ('sco|unix_sv', 'sco'),
  32. ('bsd', 'bsd'),
  33. ('amiga', 'amiga'),
  34. ('blackberry|playbook', 'blackberry'),
  35. ('symbian', 'symbian')
  36. )
  37. browsers = (
  38. ('googlebot', 'google'),
  39. ('msnbot', 'msn'),
  40. ('yahoo', 'yahoo'),
  41. ('ask jeeves', 'ask'),
  42. (r'aol|america\s+online\s+browser', 'aol'),
  43. ('opera', 'opera'),
  44. ('edge', 'edge'),
  45. ('chrome', 'chrome'),
  46. ('seamonkey', 'seamonkey'),
  47. ('firefox|firebird|phoenix|iceweasel', 'firefox'),
  48. ('galeon', 'galeon'),
  49. ('safari|version', 'safari'),
  50. ('webkit', 'webkit'),
  51. ('camino', 'camino'),
  52. ('konqueror', 'konqueror'),
  53. ('k-meleon', 'kmeleon'),
  54. ('netscape', 'netscape'),
  55. (r'msie|microsoft\s+internet\s+explorer|trident/.+? rv:', 'msie'),
  56. ('lynx', 'lynx'),
  57. ('links', 'links'),
  58. ('Baiduspider', 'baidu'),
  59. ('bingbot', 'bing'),
  60. ('mozilla', 'mozilla')
  61. )
  62. _browser_version_re = r'(?:%s)[/\sa-z(]*(\d+[.\da-z]+)?'
  63. _language_re = re.compile(
  64. r'(?:;\s*|\s+)(\b\w{2}\b(?:-\b\w{2}\b)?)\s*;|'
  65. r'(?:\(|\[|;)\s*(\b\w{2}\b(?:-\b\w{2}\b)?)\s*(?:\]|\)|;)'
  66. )
  67. def __init__(self):
  68. self.platforms = [(b, re.compile(a, re.I)) for a, b in self.platforms]
  69. self.browsers = [(b, re.compile(self._browser_version_re % a, re.I))
  70. for a, b in self.browsers]
  71. def __call__(self, user_agent):
  72. for platform, regex in self.platforms:
  73. match = regex.search(user_agent)
  74. if match is not None:
  75. break
  76. else:
  77. platform = None
  78. for browser, regex in self.browsers:
  79. match = regex.search(user_agent)
  80. if match is not None:
  81. version = match.group(1)
  82. break
  83. else:
  84. browser = version = None
  85. match = self._language_re.search(user_agent)
  86. if match is not None:
  87. language = match.group(1) or match.group(2)
  88. else:
  89. language = None
  90. return platform, browser, version, language
  91. class UserAgent(object):
  92. """Represents a user agent. Pass it a WSGI environment or a user agent
  93. string and you can inspect some of the details from the user agent
  94. string via the attributes. The following attributes exist:
  95. .. attribute:: string
  96. the raw user agent string
  97. .. attribute:: platform
  98. the browser platform. The following platforms are currently
  99. recognized:
  100. - `aix`
  101. - `amiga`
  102. - `android`
  103. - `blackberry`
  104. - `bsd`
  105. - `chromeos`
  106. - `dragonflybsd`
  107. - `freebsd`
  108. - `hpux`
  109. - `ipad`
  110. - `iphone`
  111. - `irix`
  112. - `linux`
  113. - `macos`
  114. - `netbsd`
  115. - `openbsd`
  116. - `sco`
  117. - `solaris`
  118. - `symbian`
  119. - `wii`
  120. - `windows`
  121. .. attribute:: browser
  122. the name of the browser. The following browsers are currently
  123. recognized:
  124. - `aol` *
  125. - `ask` *
  126. - `baidu` *
  127. - `bing` *
  128. - `camino`
  129. - `chrome`
  130. - `firefox`
  131. - `galeon`
  132. - `google` *
  133. - `kmeleon`
  134. - `konqueror`
  135. - `links`
  136. - `lynx`
  137. - `mozilla`
  138. - `msie`
  139. - `msn`
  140. - `netscape`
  141. - `opera`
  142. - `safari`
  143. - `seamonkey`
  144. - `webkit`
  145. - `yahoo` *
  146. (Browsers marked with a star (``*``) are crawlers.)
  147. .. attribute:: version
  148. the version of the browser
  149. .. attribute:: language
  150. the language of the browser
  151. """
  152. _parser = UserAgentParser()
  153. def __init__(self, environ_or_string):
  154. if isinstance(environ_or_string, dict):
  155. environ_or_string = environ_or_string.get('HTTP_USER_AGENT', '')
  156. self.string = environ_or_string
  157. self.platform, self.browser, self.version, self.language = \
  158. self._parser(environ_or_string)
  159. def to_header(self):
  160. return self.string
  161. def __str__(self):
  162. return self.string
  163. def __nonzero__(self):
  164. return bool(self.browser)
  165. __bool__ = __nonzero__
  166. def __repr__(self):
  167. return '<%s %r/%s>' % (
  168. self.__class__.__name__,
  169. self.browser,
  170. self.version
  171. )
  172. # conceptionally this belongs in this module but because we want to lazily
  173. # load the user agent module (which happens in wrappers.py) we have to import
  174. # it afterwards. The class itself has the module set to this module so
  175. # pickle, inspect and similar modules treat the object as if it was really
  176. # implemented here.
  177. from werkzeug.wrappers import UserAgentMixin # noqa