URLEncoder.java 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. /* URLEncoder.java -- Class to convert strings to a properly encoded URL
  2. Copyright (C) 1998, 1999, 2001, 2002, 2003 Free Software Foundation, Inc.
  3. This file is part of GNU Classpath.
  4. GNU Classpath is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2, or (at your option)
  7. any later version.
  8. GNU Classpath is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNU Classpath; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  15. 02110-1301 USA.
  16. Linking this library statically or dynamically with other modules is
  17. making a combined work based on this library. Thus, the terms and
  18. conditions of the GNU General Public License cover the whole
  19. combination.
  20. As a special exception, the copyright holders of this library give you
  21. permission to link this library with independent modules to produce an
  22. executable, regardless of the license terms of these independent
  23. modules, and to copy and distribute the resulting executable under
  24. terms of your choice, provided that you also meet, for each linked
  25. independent module, the terms and conditions of the license of that
  26. module. An independent module is a module which is not derived from
  27. or based on this library. If you modify this library, you may extend
  28. this exception to your version of the library, but you are not
  29. obligated to do so. If you do not wish to do so, delete this
  30. exception statement from your version. */
  31. package java.net;
  32. import gnu.java.lang.CPStringBuilder;
  33. import java.io.UnsupportedEncodingException;
  34. /*
  35. * Written using on-line Java Platform 1.2/1.4 API Specification, as well
  36. * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
  37. * Status: Believed complete and correct.
  38. */
  39. /**
  40. * This utility class contains static methods that converts a
  41. * string into a fully encoded URL string in x-www-form-urlencoded
  42. * format. This format replaces certain disallowed characters with
  43. * encoded equivalents. All upper case and lower case letters in the
  44. * US alphabet remain as is, the space character (' ') is replaced with
  45. * '+' sign, and all other characters are converted to a "%XX" format
  46. * where XX is the hexadecimal representation of that character in a
  47. * certain encoding (by default, the platform encoding, though the
  48. * standard is "UTF-8").
  49. * <p>
  50. * This method is very useful for encoding strings to be sent to CGI scripts
  51. *
  52. * @author Aaron M. Renn (arenn@urbanophile.com)
  53. * @author Warren Levy (warrenl@cygnus.com)
  54. * @author Mark Wielaard (mark@klomp.org)
  55. */
  56. public class URLEncoder
  57. {
  58. /**
  59. * This method translates the passed in string into x-www-form-urlencoded
  60. * format using the default encoding. The standard encoding is
  61. * "UTF-8", and the two-argument form of this method should be used
  62. * instead.
  63. *
  64. * @param s The String to convert
  65. *
  66. * @return The converted String
  67. *
  68. * @deprecated
  69. */
  70. public static String encode(String s)
  71. {
  72. try
  73. {
  74. // We default to 8859_1 for compatibility with the same
  75. // default elsewhere in the library.
  76. return encode(s, System.getProperty("file.encoding", "8859_1"));
  77. }
  78. catch (UnsupportedEncodingException uee)
  79. {
  80. // Should never happen since default should always be supported
  81. return s;
  82. }
  83. }
  84. /**
  85. * This method translates the passed in string into x-www-form-urlencoded
  86. * format using the character encoding to hex-encode the unsafe characters.
  87. *
  88. * @param s The String to convert
  89. * @param encoding The encoding to use for unsafe characters
  90. *
  91. * @return The converted String
  92. *
  93. * @exception UnsupportedEncodingException If the named encoding is not
  94. * supported
  95. *
  96. * @since 1.4
  97. */
  98. public static String encode(String s, String encoding)
  99. throws UnsupportedEncodingException
  100. {
  101. int length = s.length();
  102. int start = 0;
  103. int i = 0;
  104. CPStringBuilder result = new CPStringBuilder(length);
  105. while (true)
  106. {
  107. while (i < length && isSafe(s.charAt(i)))
  108. i++;
  109. // Safe character can just be added
  110. result.append(s.substring(start, i));
  111. // Are we done?
  112. if (i >= length)
  113. return result.toString();
  114. else if (s.charAt(i) == ' ')
  115. {
  116. result.append('+'); // Replace space char with plus symbol.
  117. i++;
  118. }
  119. else
  120. {
  121. // Get all unsafe characters
  122. start = i;
  123. char c;
  124. while (i < length && (c = s.charAt(i)) != ' ' && ! isSafe(c))
  125. i++;
  126. // Convert them to %XY encoded strings
  127. String unsafe = s.substring(start, i);
  128. byte[] bytes = unsafe.getBytes(encoding);
  129. for (int j = 0; j < bytes.length; j++)
  130. {
  131. result.append('%');
  132. int val = bytes[j];
  133. result.append(hex.charAt((val & 0xf0) >> 4));
  134. result.append(hex.charAt(val & 0x0f));
  135. }
  136. }
  137. start = i;
  138. }
  139. }
  140. /**
  141. * Private static method that returns true if the given char is either
  142. * a uppercase or lowercase letter from 'a' till 'z', or a digit froim
  143. * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such
  144. * 'safe' character don't have to be url encoded.
  145. */
  146. private static boolean isSafe(char c)
  147. {
  148. return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
  149. || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.'
  150. || c == '*');
  151. }
  152. /**
  153. * Private constructor that does nothing. Included to avoid a default
  154. * public constructor being created by the compiler.
  155. */
  156. private URLEncoder()
  157. {
  158. }
  159. /**
  160. * Used to convert to hex. We don't use Integer.toHexString, since
  161. * it converts to lower case (and the Sun docs pretty clearly
  162. * specify upper case here), and because it doesn't provide a
  163. * leading 0.
  164. */
  165. private static final String hex = "0123456789ABCDEF";
  166. }