http_header_util.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # Copyright 2011, Google Inc.
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. # * Redistributions in binary form must reproduce the above
  11. # copyright notice, this list of conditions and the following disclaimer
  12. # in the documentation and/or other materials provided with the
  13. # distribution.
  14. # * Neither the name of Google Inc. nor the names of its
  15. # contributors may be used to endorse or promote products derived from
  16. # this software without specific prior written permission.
  17. #
  18. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. """Utilities for parsing and formatting headers that follow the grammar defined
  30. in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt.
  31. """
  32. import urlparse
  33. _SEPARATORS = '()<>@,;:\\"/[]?={} \t'
  34. def _is_char(c):
  35. """Returns true iff c is in CHAR as specified in HTTP RFC."""
  36. return ord(c) <= 127
  37. def _is_ctl(c):
  38. """Returns true iff c is in CTL as specified in HTTP RFC."""
  39. return ord(c) <= 31 or ord(c) == 127
  40. class ParsingState(object):
  41. def __init__(self, data):
  42. self.data = data
  43. self.head = 0
  44. def peek(state, pos=0):
  45. """Peeks the character at pos from the head of data."""
  46. if state.head + pos >= len(state.data):
  47. return None
  48. return state.data[state.head + pos]
  49. def consume(state, amount=1):
  50. """Consumes specified amount of bytes from the head and returns the
  51. consumed bytes. If there's not enough bytes to consume, returns None.
  52. """
  53. if state.head + amount > len(state.data):
  54. return None
  55. result = state.data[state.head:state.head + amount]
  56. state.head = state.head + amount
  57. return result
  58. def consume_string(state, expected):
  59. """Given a parsing state and a expected string, consumes the string from
  60. the head. Returns True if consumed successfully. Otherwise, returns
  61. False.
  62. """
  63. pos = 0
  64. for c in expected:
  65. if c != peek(state, pos):
  66. return False
  67. pos += 1
  68. consume(state, pos)
  69. return True
  70. def consume_lws(state):
  71. """Consumes a LWS from the head. Returns True if any LWS is consumed.
  72. Otherwise, returns False.
  73. LWS = [CRLF] 1*( SP | HT )
  74. """
  75. original_head = state.head
  76. consume_string(state, '\r\n')
  77. pos = 0
  78. while True:
  79. c = peek(state, pos)
  80. if c == ' ' or c == '\t':
  81. pos += 1
  82. else:
  83. if pos == 0:
  84. state.head = original_head
  85. return False
  86. else:
  87. consume(state, pos)
  88. return True
  89. def consume_lwses(state):
  90. """Consumes *LWS from the head."""
  91. while consume_lws(state):
  92. pass
  93. def consume_token(state):
  94. """Consumes a token from the head. Returns the token or None if no token
  95. was found.
  96. """
  97. pos = 0
  98. while True:
  99. c = peek(state, pos)
  100. if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
  101. if pos == 0:
  102. return None
  103. return consume(state, pos)
  104. else:
  105. pos += 1
  106. def consume_token_or_quoted_string(state):
  107. """Consumes a token or a quoted-string, and returns the token or unquoted
  108. string. If no token or quoted-string was found, returns None.
  109. """
  110. original_head = state.head
  111. if not consume_string(state, '"'):
  112. return consume_token(state)
  113. result = []
  114. expect_quoted_pair = False
  115. while True:
  116. if not expect_quoted_pair and consume_lws(state):
  117. result.append(' ')
  118. continue
  119. c = consume(state)
  120. if c is None:
  121. # quoted-string is not enclosed with double quotation
  122. state.head = original_head
  123. return None
  124. elif expect_quoted_pair:
  125. expect_quoted_pair = False
  126. if _is_char(c):
  127. result.append(c)
  128. else:
  129. # Non CHAR character found in quoted-pair
  130. state.head = original_head
  131. return None
  132. elif c == '\\':
  133. expect_quoted_pair = True
  134. elif c == '"':
  135. return ''.join(result)
  136. elif _is_ctl(c):
  137. # Invalid character %r found in qdtext
  138. state.head = original_head
  139. return None
  140. else:
  141. result.append(c)
  142. def quote_if_necessary(s):
  143. """Quotes arbitrary string into quoted-string."""
  144. quote = False
  145. if s == '':
  146. return '""'
  147. result = []
  148. for c in s:
  149. if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
  150. quote = True
  151. if c == '"' or _is_ctl(c):
  152. result.append('\\' + c)
  153. else:
  154. result.append(c)
  155. if quote:
  156. return '"' + ''.join(result) + '"'
  157. else:
  158. return ''.join(result)
  159. def parse_uri(uri):
  160. """Parse absolute URI then return host, port and resource."""
  161. parsed = urlparse.urlsplit(uri)
  162. if parsed.scheme != 'wss' and parsed.scheme != 'ws':
  163. # |uri| must be a relative URI.
  164. # TODO(toyoshim): Should validate |uri|.
  165. return None, None, uri
  166. if parsed.hostname is None:
  167. return None, None, None
  168. port = None
  169. try:
  170. port = parsed.port
  171. except ValueError, e:
  172. # port property cause ValueError on invalid null port description like
  173. # 'ws://host:/path'.
  174. return None, None, None
  175. if port is None:
  176. if parsed.scheme == 'ws':
  177. port = 80
  178. else:
  179. port = 443
  180. path = parsed.path
  181. if not path:
  182. path += '/'
  183. if parsed.query:
  184. path += '?' + parsed.query
  185. if parsed.fragment:
  186. path += '#' + parsed.fragment
  187. return parsed.hostname, port, path
  188. try:
  189. urlparse.uses_netloc.index('ws')
  190. except ValueError, e:
  191. # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries.
  192. urlparse.uses_netloc.append('ws')
  193. urlparse.uses_netloc.append('wss')
  194. # vi:sts=4 sw=4 et