urlUtils.cpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. #include "urlUtils.h"
  2. #include "stl/stringUtils.h"
  3. #include <set>
  4. #include <ctype.h>
  5. #ifdef _WIN32
  6. #include <crtdbg.h>
  7. #define snprintf _snprintf
  8. #define unused_attribute
  9. #else
  10. #include <stdio.h>
  11. #include <string.h>
  12. #include <assert.h>
  13. #define _ASSERTE assert
  14. #define unused_attribute __attribute__((unused))
  15. #endif
  16. using namespace std;
  17. using namespace stringUtil;
  18. static string gExcluded = " <>#%\"{}|\\^[]`";
  19. static string gReserved = ";/?:@&=+$,";
  20. static string gMark = "-_.!~*'()";
  21. static string gDigit = "0123456789";
  22. static string gLowAlpha = "abcdefghijklmnopqrstuvwxyz";
  23. static string gHiAlpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  24. static string gUnreserved_RFC3986 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";
  25. static string gReserved_RFC3986 = "!*'();:@&=+$,/?%#[]";
  26. template<class T>
  27. static set<T> setunion(const set<T> &s1, const set<T> &s2)
  28. {
  29. set<T> result(s1);
  30. result.insert(s2.begin(), s2.end());
  31. return result;
  32. }
  33. static set<char> gSetExcluded(gExcluded.begin(), gExcluded.end());
  34. static set<char> gSetReserved(gReserved.begin(), gReserved.end());
  35. static set<char> gSetExcludedAndReserved(setunion(gSetExcluded, gSetReserved));
  36. static set<char> gSetMark(gMark.begin(), gMark.end());
  37. static set<char> gSetLowAlpha(gLowAlpha.begin(), gLowAlpha.end());
  38. static set<char> gSetHiAlpha(gHiAlpha.begin(), gHiAlpha.end());
  39. static set<char> gSetDigit(gDigit.begin(), gDigit.end());
  40. static set<char> gSetAlpha(setunion(gSetLowAlpha, gSetHiAlpha));
  41. static set<char> gSetAlphanum(setunion(gSetAlpha, gSetDigit));
  42. static set<char> gSetUnreserved(setunion(gSetAlphanum, gSetMark));
  43. static set<char> gSetUnreserved_RFC3986(gUnreserved_RFC3986.begin(), gUnreserved_RFC3986.end());
  44. static set<char> gSetReserved_RFC3986(gReserved_RFC3986.begin(), gReserved_RFC3986.end());
  45. ////////////////////////////////////////////////
  46. ////////// Escaping Funcs //////////////////////
  47. ///////////////////////////////////////////////
  48. // convert a character to its hex representation
  49. static string escapeChar(__uint8 c) throw()
  50. {
  51. char buf[8] = {0};
  52. int len unused_attribute = snprintf(buf, sizeof(buf), "%%%02X", (int)c);
  53. _ASSERTE(len == 3);
  54. return buf;
  55. }
  56. // convert the char if it's in the set
  57. static string escapeIfInSet(char c,const set<char> &st) throw()
  58. {
  59. return (st.find(c) == st.end() ? string(1, c) : escapeChar(c));
  60. }
  61. static string escapeIfNotInSet(char c, const set<char> &st) throw()
  62. {
  63. return (st.find(c) == st.end() ? escapeChar(c) : string(1, c));
  64. }
  65. template<typename ESC,typename STYPE>
  66. static string escapeString(const STYPE &s, const set<char> &st, ESC func) throw()
  67. {
  68. string result;
  69. for (typename STYPE::const_iterator i = s.begin(); i != s.end(); ++i)
  70. {
  71. result += func(*i,st);
  72. }
  73. return result;
  74. }
  75. string urlUtils::escape(const std::string &s) throw()
  76. {
  77. return escapeString(s, gSetExcludedAndReserved, escapeIfInSet);
  78. }
  79. string urlUtils::escapeURI_RFC3986(const uniString::utf8 &s) throw()
  80. {
  81. return escapeString(s, gSetUnreserved_RFC3986, escapeIfNotInSet);
  82. }
  83. /////////////////////////////////////////////////////
  84. //////////// Unescaping funcs //////////////////////
  85. ////////////////////////////////////////////////////
  86. // convert %xx to a character
  87. inline char unescapeSequence(const string &s) throw()
  88. {
  89. _ASSERTE(s.size() == 3);
  90. _ASSERTE(s[0] == '%' && isxdigit(s[1]) && isxdigit(s[2]));
  91. unsigned int v = 0;
  92. sscanf(s.c_str(), "%%%02x", &v);
  93. return (char)v;
  94. }
  95. uniString::utf8 urlUtils::unescapeString(const string &s) throw()
  96. {
  97. string result;
  98. string escTok;
  99. int ccnt(0);
  100. for (string::const_iterator i = s.begin(); i != s.end(); ++i)
  101. {
  102. bool escChar(false);
  103. switch (ccnt)
  104. {
  105. case 0:
  106. {
  107. escChar = ((*i) == '%');
  108. break;
  109. }
  110. case 1:
  111. {
  112. escChar = (isxdigit(*i) ? true : false);
  113. break;
  114. }
  115. case 2:
  116. {
  117. escChar = (isxdigit(*i) ? true : false);
  118. break;
  119. }
  120. }
  121. if (escChar)
  122. {
  123. escTok += (*i);
  124. ++ccnt;
  125. }
  126. else
  127. {
  128. result += escTok;
  129. ccnt = 0;
  130. if ((*i) == '+')
  131. {
  132. result += " ";
  133. }
  134. else
  135. {
  136. result += (*i);
  137. }
  138. }
  139. if (ccnt == 3)
  140. {
  141. result += unescapeSequence(escTok);
  142. escTok = "";
  143. ccnt = 0;
  144. }
  145. }
  146. result += escTok;
  147. return uniString::utf8(result);
  148. }
  149. ////////////////////////////////////////////
  150. /////////// Classes ////////////////////////
  151. string urlUtils::urlQueryEntry::escape() const throw()
  152. {
  153. string result(escapeString(m_entry.first, gSetExcludedAndReserved, escapeIfInSet));
  154. if (!m_entry.second.empty())
  155. {
  156. result += "=" + escapeString(m_entry.second, gSetExcludedAndReserved, escapeIfInSet);
  157. }
  158. return result;
  159. }
  160. urlUtils::urlQueryEntry urlUtils::urlQueryEntry::parse(const string &s) throw()
  161. {
  162. urlUtils::urlQueryEntry result;
  163. if (!s.empty())
  164. {
  165. string::size_type pos = s.find("=");
  166. result.m_entry.first = unescapeString(s.substr(0,pos));
  167. result.m_entry.second = (pos == string::npos ? "" : unescapeString(s.substr(pos+1)));
  168. }
  169. return result;
  170. }
  171. string urlUtils::urlQuery::escape() const throw()
  172. {
  173. string result;
  174. for (vector<urlQueryEntry>::const_iterator i = m_query.begin(); i != m_query.end(); ++i)
  175. {
  176. result += (*i).escape();
  177. if (i + 1 != m_query.end())
  178. {
  179. result += "&";
  180. }
  181. }
  182. return result;
  183. }
  184. urlUtils::urlQuery urlUtils::urlQuery::parse(const std::string &sin) throw()
  185. {
  186. urlUtils::urlQuery result;
  187. parse(sin, result);
  188. return result;
  189. }
  190. void urlUtils::urlQuery::parse(const std::string &sin, urlQuery &q) throw()
  191. {
  192. q.clear();
  193. string s(sin);
  194. while (s != "")
  195. {
  196. if (!s.empty())
  197. {
  198. string::size_type pos = s.find("&");
  199. q.m_query.push_back(urlUtils::urlQueryEntry::parse(s.substr(0, pos)));
  200. if (pos == string::npos)
  201. {
  202. break;
  203. }
  204. s = s.substr(pos + 1);
  205. }
  206. else
  207. {
  208. break;
  209. }
  210. }
  211. }