url.lua 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. -----------------------------------------------------------------------------
  2. -- URI parsing, composition and relative URL resolution
  3. -- LuaSocket toolkit.
  4. -- Author: Diego Nehab
  5. -----------------------------------------------------------------------------
  6. -----------------------------------------------------------------------------
  7. -- Declare module
  8. -----------------------------------------------------------------------------
  9. local string = require("string")
  10. local base = _G
  11. local table = require("table")
  12. local socket = require("socket")
  13. socket.url = {}
  14. local _M = socket.url
  15. -----------------------------------------------------------------------------
  16. -- Module version
  17. -----------------------------------------------------------------------------
  18. _M._VERSION = "URL 1.0.3"
  19. -----------------------------------------------------------------------------
  20. -- Encodes a string into its escaped hexadecimal representation
  21. -- Input
  22. -- s: binary string to be encoded
  23. -- Returns
  24. -- escaped representation of string binary
  25. -----------------------------------------------------------------------------
  26. function _M.escape(s)
  27. return (string.gsub(s, "([^A-Za-z0-9_])", function(c)
  28. return string.format("%%%02x", string.byte(c))
  29. end))
  30. end
  31. -----------------------------------------------------------------------------
  32. -- Protects a path segment, to prevent it from interfering with the
  33. -- url parsing.
  34. -- Input
  35. -- s: binary string to be encoded
  36. -- Returns
  37. -- escaped representation of string binary
  38. -----------------------------------------------------------------------------
  39. local function make_set(t)
  40. local s = {}
  41. for i,v in base.ipairs(t) do
  42. s[t[i]] = 1
  43. end
  44. return s
  45. end
  46. -- these are allowed within a path segment, along with alphanum
  47. -- other characters must be escaped
  48. local segment_set = make_set {
  49. "-", "_", ".", "!", "~", "*", "'", "(",
  50. ")", ":", "@", "&", "=", "+", "$", ",",
  51. }
  52. local function protect_segment(s)
  53. return string.gsub(s, "([^A-Za-z0-9_])", function (c)
  54. if segment_set[c] then return c
  55. else return string.format("%%%02X", string.byte(c)) end
  56. end)
  57. end
  58. -----------------------------------------------------------------------------
  59. -- Unencodes a escaped hexadecimal string into its binary representation
  60. -- Input
  61. -- s: escaped hexadecimal string to be unencoded
  62. -- Returns
  63. -- unescaped binary representation of escaped hexadecimal binary
  64. -----------------------------------------------------------------------------
  65. function _M.unescape(s)
  66. return (string.gsub(s, "%%(%x%x)", function(hex)
  67. return string.char(base.tonumber(hex, 16))
  68. end))
  69. end
  70. -----------------------------------------------------------------------------
  71. -- Removes '..' and '.' components appropriately from a path.
  72. -- Input
  73. -- path
  74. -- Returns
  75. -- dot-normalized path
  76. local function remove_dot_components(path)
  77. local marker = string.char(1)
  78. repeat
  79. local was = path
  80. path = path:gsub('//', '/'..marker..'/', 1)
  81. until path == was
  82. repeat
  83. local was = path
  84. path = path:gsub('/%./', '/', 1)
  85. until path == was
  86. repeat
  87. local was = path
  88. path = path:gsub('[^/]+/%.%./([^/]+)', '%1', 1)
  89. until path == was
  90. path = path:gsub('[^/]+/%.%./*$', '')
  91. path = path:gsub('/%.%.$', '/')
  92. path = path:gsub('/%.$', '/')
  93. path = path:gsub('^/%.%./', '/')
  94. path = path:gsub(marker, '')
  95. return path
  96. end
  97. -----------------------------------------------------------------------------
  98. -- Builds a path from a base path and a relative path
  99. -- Input
  100. -- base_path
  101. -- relative_path
  102. -- Returns
  103. -- corresponding absolute path
  104. -----------------------------------------------------------------------------
  105. local function absolute_path(base_path, relative_path)
  106. if string.sub(relative_path, 1, 1) == "/" then
  107. return remove_dot_components(relative_path) end
  108. base_path = base_path:gsub("[^/]*$", "")
  109. if not base_path:find'/$' then base_path = base_path .. '/' end
  110. local path = base_path .. relative_path
  111. path = remove_dot_components(path)
  112. return path
  113. end
  114. -----------------------------------------------------------------------------
  115. -- Parses a url and returns a table with all its parts according to RFC 2396
  116. -- The following grammar describes the names given to the URL parts
  117. -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
  118. -- <authority> ::= <userinfo>@<host>:<port>
  119. -- <userinfo> ::= <user>[:<password>]
  120. -- <path> :: = {<segment>/}<segment>
  121. -- Input
  122. -- url: uniform resource locator of request
  123. -- default: table with default values for each field
  124. -- Returns
  125. -- table with the following fields, where RFC naming conventions have
  126. -- been preserved:
  127. -- scheme, authority, userinfo, user, password, host, port,
  128. -- path, params, query, fragment
  129. -- Obs:
  130. -- the leading '/' in {/<path>} is considered part of <path>
  131. -----------------------------------------------------------------------------
  132. function _M.parse(url, default)
  133. -- initialize default parameters
  134. local parsed = {}
  135. for i,v in base.pairs(default or parsed) do parsed[i] = v end
  136. -- empty url is parsed to nil
  137. if not url or url == "" then return nil, "invalid url" end
  138. -- remove whitespace
  139. -- url = string.gsub(url, "%s", "")
  140. -- get scheme
  141. url = string.gsub(url, "^([%w][%w%+%-%.]*)%:",
  142. function(s) parsed.scheme = s; return "" end)
  143. -- get authority
  144. url = string.gsub(url, "^//([^/]*)", function(n)
  145. parsed.authority = n
  146. return ""
  147. end)
  148. -- get fragment
  149. url = string.gsub(url, "#(.*)$", function(f)
  150. parsed.fragment = f
  151. return ""
  152. end)
  153. -- get query string
  154. url = string.gsub(url, "%?(.*)", function(q)
  155. parsed.query = q
  156. return ""
  157. end)
  158. -- get params
  159. url = string.gsub(url, "%;(.*)", function(p)
  160. parsed.params = p
  161. return ""
  162. end)
  163. -- path is whatever was left
  164. if url ~= "" then parsed.path = url end
  165. local authority = parsed.authority
  166. if not authority then return parsed end
  167. authority = string.gsub(authority,"^([^@]*)@",
  168. function(u) parsed.userinfo = u; return "" end)
  169. authority = string.gsub(authority, ":([^:%]]*)$",
  170. function(p) parsed.port = p; return "" end)
  171. if authority ~= "" then
  172. -- IPv6?
  173. parsed.host = string.match(authority, "^%[(.+)%]$") or authority
  174. end
  175. local userinfo = parsed.userinfo
  176. if not userinfo then return parsed end
  177. userinfo = string.gsub(userinfo, ":([^:]*)$",
  178. function(p) parsed.password = p; return "" end)
  179. parsed.user = userinfo
  180. return parsed
  181. end
  182. -----------------------------------------------------------------------------
  183. -- Rebuilds a parsed URL from its components.
  184. -- Components are protected if any reserved or unallowed characters are found
  185. -- Input
  186. -- parsed: parsed URL, as returned by parse
  187. -- Returns
  188. -- a stringing with the corresponding URL
  189. -----------------------------------------------------------------------------
  190. function _M.build(parsed)
  191. --local ppath = _M.parse_path(parsed.path or "")
  192. --local url = _M.build_path(ppath)
  193. local url = parsed.path or ""
  194. if parsed.params then url = url .. ";" .. parsed.params end
  195. if parsed.query then url = url .. "?" .. parsed.query end
  196. local authority = parsed.authority
  197. if parsed.host then
  198. authority = parsed.host
  199. if string.find(authority, ":") then -- IPv6?
  200. authority = "[" .. authority .. "]"
  201. end
  202. if parsed.port then authority = authority .. ":" .. base.tostring(parsed.port) end
  203. local userinfo = parsed.userinfo
  204. if parsed.user then
  205. userinfo = parsed.user
  206. if parsed.password then
  207. userinfo = userinfo .. ":" .. parsed.password
  208. end
  209. end
  210. if userinfo then authority = userinfo .. "@" .. authority end
  211. end
  212. if authority then url = "//" .. authority .. url end
  213. if parsed.scheme then url = parsed.scheme .. ":" .. url end
  214. if parsed.fragment then url = url .. "#" .. parsed.fragment end
  215. -- url = string.gsub(url, "%s", "")
  216. return url
  217. end
  218. -----------------------------------------------------------------------------
  219. -- Builds a absolute URL from a base and a relative URL according to RFC 2396
  220. -- Input
  221. -- base_url
  222. -- relative_url
  223. -- Returns
  224. -- corresponding absolute url
  225. -----------------------------------------------------------------------------
  226. function _M.absolute(base_url, relative_url)
  227. local base_parsed
  228. if base.type(base_url) == "table" then
  229. base_parsed = base_url
  230. base_url = _M.build(base_parsed)
  231. else
  232. base_parsed = _M.parse(base_url)
  233. end
  234. local result
  235. local relative_parsed = _M.parse(relative_url)
  236. if not base_parsed then
  237. result = relative_url
  238. elseif not relative_parsed then
  239. result = base_url
  240. elseif relative_parsed.scheme then
  241. result = relative_url
  242. else
  243. relative_parsed.scheme = base_parsed.scheme
  244. if not relative_parsed.authority then
  245. relative_parsed.authority = base_parsed.authority
  246. if not relative_parsed.path then
  247. relative_parsed.path = base_parsed.path
  248. if not relative_parsed.params then
  249. relative_parsed.params = base_parsed.params
  250. if not relative_parsed.query then
  251. relative_parsed.query = base_parsed.query
  252. end
  253. end
  254. else
  255. relative_parsed.path = absolute_path(base_parsed.path or "",
  256. relative_parsed.path)
  257. end
  258. end
  259. result = _M.build(relative_parsed)
  260. end
  261. return remove_dot_components(result)
  262. end
  263. -----------------------------------------------------------------------------
  264. -- Breaks a path into its segments, unescaping the segments
  265. -- Input
  266. -- path
  267. -- Returns
  268. -- segment: a table with one entry per segment
  269. -----------------------------------------------------------------------------
  270. function _M.parse_path(path)
  271. local parsed = {}
  272. path = path or ""
  273. --path = string.gsub(path, "%s", "")
  274. string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end)
  275. for i = 1, #parsed do
  276. parsed[i] = _M.unescape(parsed[i])
  277. end
  278. if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end
  279. if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end
  280. return parsed
  281. end
  282. -----------------------------------------------------------------------------
  283. -- Builds a path component from its segments, escaping protected characters.
  284. -- Input
  285. -- parsed: path segments
  286. -- unsafe: if true, segments are not protected before path is built
  287. -- Returns
  288. -- path: corresponding path stringing
  289. -----------------------------------------------------------------------------
  290. function _M.build_path(parsed, unsafe)
  291. local path = ""
  292. local n = #parsed
  293. if unsafe then
  294. for i = 1, n-1 do
  295. path = path .. parsed[i]
  296. path = path .. "/"
  297. end
  298. if n > 0 then
  299. path = path .. parsed[n]
  300. if parsed.is_directory then path = path .. "/" end
  301. end
  302. else
  303. for i = 1, n-1 do
  304. path = path .. protect_segment(parsed[i])
  305. path = path .. "/"
  306. end
  307. if n > 0 then
  308. path = path .. protect_segment(parsed[n])
  309. if parsed.is_directory then path = path .. "/" end
  310. end
  311. end
  312. if parsed.is_absolute then path = "/" .. path end
  313. return path
  314. end
  315. return _M