1 /*
2 * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.21 2003/06/29 21:34:06 olegk Exp $
3 * $Revision: 1.21 $
4 * $Date: 2003/06/29 21:34:06 $
5 *
6 * ====================================================================
7 *
8 * The Apache Software License, Version 1.1
9 *
10 * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
11 * reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 *
25 * 3. The end-user documentation included with the redistribution, if
26 * any, must include the following acknowlegement:
27 * "This product includes software developed by the
28 * Apache Software Foundation (http://www.apache.org/)."
29 * Alternately, this acknowlegement may appear in the software itself,
30 * if and wherever such third-party acknowlegements normally appear.
31 *
32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software
33 * Foundation" must not be used to endorse or promote products derived
34 * from this software without prior written permission. For written
35 * permission, please contact apache@apache.org.
36 *
37 * 5. Products derived from this software may not be called "Apache"
38 * nor may "Apache" appear in their names without prior written
39 * permission of the Apache Group.
40 *
41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 * ====================================================================
54 *
55 * This software consists of voluntary contributions made by many
56 * individuals on behalf of the Apache Software Foundation. For more
57 * information on the Apache Software Foundation, please see
58 * <http://www.apache.org/>.
59 *
60 * [Additional notices, if required by prior licensing conditions]
61 *
62 */
63
64 package org.apache.commons.httpclient.util;
65
66 import java.io.UnsupportedEncodingException;
67 import java.util.BitSet;
68 import org.apache.commons.httpclient.URI;
69 import org.apache.commons.httpclient.URIException;
70
71 /***
72 * The URI escape and character encoding and decoding utility.
73 * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
74 * than {@link org.apache.commons.httpclient.URI}.
75 *
76 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
77 * @version $Revision: 1.21 $ $Date: 2002/03/14 15:14:01
78 */
79
80 public class URIUtil {
81
82 // ----------------------------------------------------- Instance variables
83
84 protected static final BitSet empty = new BitSet(1);
85
86 // ---------------------------------------------------------- URI utilities
87
88 /***
89 * Get the basename of an URI. It's possibly an empty string.
90 *
91 * @param uri a string regarded an URI
92 * @return the basename string; an empty string if the path ends with slash
93 */
94 public static String getName(String uri) {
95 if (uri == null || uri.length() == 0) { return uri; }
96 String path = URIUtil.getPath(uri);
97 int at = path.lastIndexOf("/");
98 int to = path.length();
99 return (at >= 0) ? path.substring(at + 1, to) : path;
100 }
101
102
103 /***
104 * Get the query of an URI.
105 *
106 * @param uri a string regarded an URI
107 * @return the query string; <code>null</code> if empty or undefined
108 */
109 public static String getQuery(String uri) {
110 if (uri == null || uri.length() == 0) { return null; }
111 // consider of net_path
112 int at = uri.indexOf("//");
113 int from = uri.indexOf(
114 "/",
115 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
116 );
117 // the authority part of URI ignored
118 int to = uri.length();
119 // reuse the at and from variables to consider the query
120 at = uri.indexOf("?", from);
121 if (at >= 0) {
122 from = at + 1;
123 } else {
124 return null;
125 }
126 // check the fragment
127 if (uri.lastIndexOf("#") > from) {
128 to = uri.lastIndexOf("#");
129 }
130 // get the path and query.
131 return (from < 0 || from == to) ? null : uri.substring(from, to);
132 }
133
134
135 /***
136 * Get the path of an URI.
137 *
138 * @param uri a string regarded an URI
139 * @return the path string
140 */
141 public static String getPath(String uri) {
142 if (uri == null) {
143 return null;
144 }
145 // consider of net_path
146 int at = uri.indexOf("//");
147 int from = uri.indexOf(
148 "/",
149 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
150 );
151 // the authority part of URI ignored
152 int to = uri.length();
153 // check the query
154 if (uri.indexOf('?', from) != -1) {
155 to = uri.indexOf('?', from);
156 }
157 // check the fragment
158 if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
159 to = uri.lastIndexOf("#");
160 }
161 // get only the path.
162 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
163 }
164
165
166 /***
167 * Get the path and query of an URI.
168 *
169 * @param uri a string regarded an URI
170 * @return the path and query string
171 */
172 public static String getPathQuery(String uri) {
173 if (uri == null) {
174 return null;
175 }
176 // consider of net_path
177 int at = uri.indexOf("//");
178 int from = uri.indexOf(
179 "/",
180 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
181 );
182 // the authority part of URI ignored
183 int to = uri.length();
184 // Ignore the '?' mark so to ignore the query.
185 // check the fragment
186 if (uri.lastIndexOf("#") > from) {
187 to = uri.lastIndexOf("#");
188 }
189 // get the path and query.
190 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
191 }
192
193
194 /***
195 * Get the path of an URI and its rest part.
196 *
197 * @param uri a string regarded an URI
198 * @return the string from the path part
199 */
200 public static String getFromPath(String uri) {
201 if (uri == null) {
202 return null;
203 }
204 // consider of net_path
205 int at = uri.indexOf("//");
206 int from = uri.indexOf(
207 "/",
208 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
209 );
210 // get the path and its rest.
211 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
212 }
213
214 // ----------------------------------------------------- Encoding utilities
215
216 /***
217 * Get the all escaped and encoded string with the default protocl charset.
218 * It's the same function to use <code>encode(String unescaped, Bitset
219 * empty, URI.getDefaultProtocolCharset())</code>.
220 *
221 * @param unescaped an unescaped string
222 * @return the escaped string
223 *
224 * @throws URIException if the default protocol charset is not supported
225 *
226 * @see URI#getDefaultProtocolCharset
227 * @see #encode
228 */
229 public static String encodeAll(String unescaped) throws URIException {
230 return encodeAll(unescaped, URI.getDefaultProtocolCharset());
231 }
232
233
234 /***
235 * Get the all escaped and encoded string with a given charset.
236 * It's the same function to use <code>encode(String unescaped, Bitset
237 * empty, String charset)</code>.
238 *
239 * @param unescaped an unescaped string
240 * @param charset the charset
241 * @return the escaped string
242 *
243 * @throws URIException if the charset is not supported
244 *
245 * @see #encode
246 */
247 public static String encodeAll(String unescaped, String charset)
248 throws URIException {
249
250 return encode(unescaped, empty, charset);
251 }
252
253
254 /***
255 * Escape and encode a string regarded as within the authority component of
256 * an URI with the default protocol charset.
257 * Within the authority component, the characters ";", ":", "@", "?", and
258 * "/" are reserved.
259 *
260 * @param unescaped an unescaped string
261 * @return the escaped string
262 *
263 * @throws URIException if the default protocol charset is not supported
264 *
265 * @see URI#getDefaultProtocolCharset
266 * @see #encode
267 */
268 public static String encodeWithinAuthority(String unescaped)
269 throws URIException {
270
271 return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
272 }
273
274
275 /***
276 * Escape and encode a string regarded as within the authority component of
277 * an URI with a given charset.
278 * Within the authority component, the characters ";", ":", "@", "?", and
279 * "/" are reserved.
280 *
281 * @param unescaped an unescaped string
282 * @param charset the charset
283 * @return the escaped string
284 *
285 * @throws URIException if the charset is not supported
286 *
287 * @see #encode
288 */
289 public static String encodeWithinAuthority(String unescaped, String charset)
290 throws URIException {
291
292 return encode(unescaped, URI.allowed_within_authority, charset);
293 }
294
295
296 /***
297 * Escape and encode a string regarded as the path and query components of
298 * an URI with the default protocol charset.
299 *
300 * @param unescaped an unescaped string
301 * @return the escaped string
302 *
303 * @throws URIException if the default protocol charset is not supported
304 *
305 * @see URI#getDefaultProtocolCharset
306 * @see #encode
307 */
308 public static String encodePathQuery(String unescaped) throws URIException {
309 return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
310 }
311
312
313 /***
314 * Escape and encode a string regarded as the path and query components of
315 * an URI with a given charset.
316 *
317 * @param unescaped an unescaped string
318 * @param charset the charset
319 * @return the escaped string
320 *
321 * @throws URIException if the charset is not supported
322 *
323 * @see #encode
324 */
325 public static String encodePathQuery(String unescaped, String charset)
326 throws URIException {
327
328 int at = unescaped.indexOf('?');
329 if (at < 0) {
330 return encode(unescaped, URI.allowed_abs_path, charset);
331 }
332 // else
333 return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
334 + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
335 }
336
337
338 /***
339 * Escape and encode a string regarded as within the path component of an
340 * URI with the default protocol charset.
341 * The path may consist of a sequence of path segments separated by a
342 * single slash "/" character. Within a path segment, the characters
343 * "/", ";", "=", and "?" are reserved.
344 *
345 * @param unescaped an unescaped string
346 * @return the escaped string
347 *
348 * @throws URIException if the default protocol charset is not supported
349 *
350 * @see URI#getDefaultProtocolCharset
351 * @see #encode
352 */
353 public static String encodeWithinPath(String unescaped)
354 throws URIException {
355
356 return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
357 }
358
359
360 /***
361 * Escape and encode a string regarded as within the path component of an
362 * URI with a given charset.
363 * The path may consist of a sequence of path segments separated by a
364 * single slash "/" character. Within a path segment, the characters
365 * "/", ";", "=", and "?" are reserved.
366 *
367 * @param unescaped an unescaped string
368 * @param charset the charset
369 * @return the escaped string
370 *
371 * @throws URIException if the charset is not supported
372 *
373 * @see #encode
374 */
375 public static String encodeWithinPath(String unescaped, String charset)
376 throws URIException {
377
378 return encode(unescaped, URI.allowed_within_path, charset);
379 }
380
381
382 /***
383 * Escape and encode a string regarded as the path component of an URI with
384 * the default protocol charset.
385 *
386 * @param unescaped an unescaped string
387 * @return the escaped string
388 *
389 * @throws URIException if the default protocol charset is not supported
390 *
391 * @see URI#getDefaultProtocolCharset
392 * @see #encode
393 */
394 public static String encodePath(String unescaped) throws URIException {
395 return encodePath(unescaped, URI.getDefaultProtocolCharset());
396 }
397
398
399 /***
400 * Escape and encode a string regarded as the path component of an URI with
401 * a given charset.
402 *
403 * @param unescaped an unescaped string
404 * @param charset the charset
405 * @return the escaped string
406 *
407 * @throws URIException if the charset is not supported
408 *
409 * @see #encode
410 */
411 public static String encodePath(String unescaped, String charset)
412 throws URIException {
413
414 return encode(unescaped, URI.allowed_abs_path, charset);
415 }
416
417
418 /***
419 * Escape and encode a string regarded as within the query component of an
420 * URI with the default protocol charset.
421 * When a query comprise the name and value pairs, it is used in order
422 * to encode each name and value string. The reserved special characters
423 * within a query component are being included in encoding the query.
424 *
425 * @param unescaped an unescaped string
426 * @return the escaped string
427 *
428 * @throws URIException if the default protocol charset is not supported
429 *
430 * @see URI#getDefaultProtocolCharset
431 * @see #encode
432 */
433 public static String encodeWithinQuery(String unescaped)
434 throws URIException {
435
436 return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
437 }
438
439
440 /***
441 * Escape and encode a string regarded as within the query component of an
442 * URI with a given charset.
443 * When a query comprise the name and value pairs, it is used in order
444 * to encode each name and value string. The reserved special characters
445 * within a query component are being included in encoding the query.
446 *
447 * @param unescaped an unescaped string
448 * @param charset the charset
449 * @return the escaped string
450 *
451 * @throws URIException if the charset is not supported
452 *
453 * @see #encode
454 */
455 public static String encodeWithinQuery(String unescaped, String charset)
456 throws URIException {
457
458 return encode(unescaped, URI.allowed_within_query, charset);
459 }
460
461
462 /***
463 * Escape and encode a string regarded as the query component of an URI with
464 * the default protocol charset.
465 * When a query string is not misunderstood the reserved special characters
466 * ("&", "=", "+", ",", and "$") within a query component, this method
467 * is recommended to use in encoding the whole query.
468 *
469 * @param unescaped an unescaped string
470 * @return the escaped string
471 *
472 * @throws URIException if the default protocol charset is not supported
473 *
474 * @see URI#getDefaultProtocolCharset
475 * @see #encode
476 */
477 public static String encodeQuery(String unescaped) throws URIException {
478 return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
479 }
480
481
482 /***
483 * Escape and encode a string regarded as the query component of an URI with
484 * a given charset.
485 * When a query string is not misunderstood the reserved special characters
486 * ("&", "=", "+", ",", and "$") within a query component, this method
487 * is recommended to use in encoding the whole query.
488 *
489 * @param unescaped an unescaped string
490 * @param charset the charset
491 * @return the escaped string
492 *
493 * @throws URIException if the charset is not supported
494 *
495 * @see #encode
496 */
497 public static String encodeQuery(String unescaped, String charset)
498 throws URIException {
499
500 return encode(unescaped, URI.allowed_query, charset);
501 }
502
503
504 /***
505 * Escape and encode a given string with allowed characters not to be
506 * escaped and the default protocol charset.
507 *
508 * @param unescaped a string
509 * @param allowed allowed characters not to be escaped
510 * @return the escaped string
511 *
512 * @throws URIException if the default protocol charset is not supported
513 *
514 * @see URI#getDefaultProtocolCharset
515 * @see Coder#encode
516 */
517 public static String encode(String unescaped, BitSet allowed)
518 throws URIException {
519
520 return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
521 }
522
523
524 /***
525 * Escape and encode a given string with allowed characters not to be
526 * escaped and a given charset.
527 *
528 * @param unescaped a string
529 * @param allowed allowed characters not to be escaped
530 * @param charset the charset
531 * @return the escaped string
532 *
533 * @throws URIException if the charset is not supported
534 *
535 * @see Coder#encode
536 */
537 public static String encode(String unescaped, BitSet allowed,
538 String charset) throws URIException {
539
540 return new String(Coder.encode(unescaped, allowed, charset));
541 }
542
543
544 /***
545 * Unescape and decode a given string regarded as an escaped string with the
546 * default protocol charset.
547 *
548 * @param escaped a string
549 * @return the unescaped string
550 *
551 * @throws URIException if the default protocol charset is not supported
552 *
553 * @see URI#getDefaultProtocolCharset
554 * @see Coder#decode
555 */
556 public static String decode(String escaped) throws URIException {
557 return Coder.decode(escaped.toCharArray(), URI.getDefaultProtocolCharset());
558 }
559
560
561 /***
562 * Unescape and decode a given string regarded as an escaped string.
563 *
564 * @param escaped a string
565 * @param charset the charset
566 * @return the unescaped string
567 *
568 * @throws URIException if the charset is not supported
569 *
570 * @see Coder#decode
571 */
572 public static String decode(String escaped, String charset)
573 throws URIException {
574
575 return Coder.decode(escaped.toCharArray(), charset);
576 }
577
578 // --------------------------------- transforming a string between charsets
579
580 /***
581 * Convert a target string to the specified character encoded string with
582 * the default protocol charset.
583 *
584 * @param target a target string
585 * @return the protocol character encoded string
586 *
587 * @throws URIException if the default protocol charset is not supported
588 *
589 * @see URI#getDefaultProtocolCharset
590 *
591 * @deprecated Do not use. To be removed
592 */
593 public static String toProtocolCharset(String target) throws URIException {
594 return toUsingCharset(
595 target,
596 URI.getDefaultDocumentCharset(),
597 URI.getDefaultProtocolCharset());
598 }
599
600
601 /***
602 * Convert a target string to the specified character encoded string with
603 * a given protocol charset.
604 *
605 * @param target a target string
606 * @param charset the transformed protocol charset
607 * @return the protocol character encoded string
608 *
609 * @throws URIException if the charset is not supported
610 *
611 * @deprecated Do not use. To be removed
612 */
613 public static String toProtocolCharset(String target, String charset)
614 throws URIException {
615
616 return toUsingCharset(target, URI.getDefaultDocumentCharset(), charset);
617 }
618
619
620 /***
621 * Convert a target string to the specified character encoded string with
622 * the default document charset.
623 *
624 * @param target a target string
625 * @return the document character encoded string
626 *
627 * @throws URIException if the default protocol charset is not supported
628 *
629 * @see URI#getDefaultDocumentCharset
630 *
631 * @deprecated Do not use. To be removed
632 */
633 public static String toDocumentCharset(String target) throws URIException {
634 return toUsingCharset(target, URI.getDefaultProtocolCharset(),
635 URI.getDefaultDocumentCharset());
636 }
637
638
639 /***
640 * Convert a target string to the specified character encoded string with
641 * a given document charset.
642 *
643 * @param target a target string
644 * @param charset the transformed document charset
645 * @return the document character encoded string
646 *
647 * @throws URIException if the charset is not supported
648 *
649 * @deprecated Do not use. To be removed
650 */
651 public static String toDocumentCharset(String target, String charset)
652 throws URIException {
653
654 return toUsingCharset(target, URI.getDefaultProtocolCharset(), charset);
655 }
656
657
658 /***
659 * Convert a target string from the <code>fromCharset</code> charset to
660 * the <code>toCharset</code> charset.
661 * <p>
662 * What if the document charset is ISO-8859-1 and the protocol charset is
663 * UTF-8, when it's read from the document part and is used in the protocol
664 * part, the use of the method will be <code>toUsingCharset(the string,
665 * "ISO-8859-1", "UTF-8")</code>.
666 *
667 * @param target a target string
668 * @param fromCharset the previous charset
669 * @param toCharset the changing charset
670 * @return the document character encoded string
671 *
672 * @throws URIException if either of the charsets are not supported
673 *
674 * @deprecated Do not use. To be removed
675 */
676
677 public static String toUsingCharset(String target, String fromCharset,
678 String toCharset) throws URIException {
679
680 try {
681 return new String(target.getBytes(fromCharset), toCharset);
682 } catch (UnsupportedEncodingException error) {
683 throw new URIException(URIException.UNSUPPORTED_ENCODING,
684 error.getMessage());
685 }
686 }
687
688 // ---------------------------------------------------------- Inner classes
689
690 /***
691 * The basic and internal utility for URI escape and character encoding and
692 * decoding.
693 */
694 protected static class Coder extends URI {
695
696 /***
697 * Escape and encode a given string with allowed characters not to be
698 * escaped.
699 *
700 * @param unescapedComponent an unescaped component
701 * @param allowed allowed characters not to be escaped
702 * @param charset the charset to encode
703 * @return the escaped and encoded string
704 *
705 * @throws URIException if the charset is not supported
706 */
707 public static char[] encode(String unescapedComponent, BitSet allowed, String charset)
708 throws URIException {
709
710 return URI.encode(unescapedComponent, allowed, charset);
711 }
712
713
714 /***
715 * Unescape and decode a given string.
716 *
717 * @param escapedComponent an being-unescaped component
718 * @param charset the charset to decode
719 * @return the escaped and encoded string
720 *
721 * @throws URIException if the charset is not supported
722 */
723 public static String decode(char[] escapedComponent, String charset)
724 throws URIException {
725
726 return URI.decode(escapedComponent, charset);
727 }
728
729
730 /***
731 * Verify whether a given string is escaped or not
732 *
733 * @param original given characters
734 * @return true if the given character array is 7 bit ASCII-compatible.
735 */
736 public static boolean verifyEscaped(char[] original) {
737 for (int i = 0; i < original.length; i++) {
738 int c = original[i];
739 if (c > 128) {
740 return false;
741 } else if (c == '%') {
742 if (Character.digit(original[++i], 16) == -1
743 || Character.digit(original[++i], 16) == -1) {
744 return false;
745 }
746 }
747 }
748 return true;
749 }
750
751
752 /***
753 * Replace from a given character to given character in an array order
754 * for a given string.
755 *
756 * @param original a given string
757 * @param from a replacing character array
758 * @param to a replaced character array
759 * @return the replaced string
760 */
761 public static String replace(String original, char[] from, char[] to) {
762 for (int i = from.length; i > 0; --i) {
763 original = replace(original, from[i], to[i]);
764 }
765 return original.toString();
766 }
767
768
769 /***
770 * Replace from a given character to given character for a given string.
771 *
772 * @param original a given string
773 * @param from a replacing character array
774 * @param to a replaced character array
775 * @return the replaced string
776 */
777 public static String replace(String original, char from, char to) {
778 StringBuffer result = new StringBuffer(original.length());
779 int at, saved = 0;
780 do {
781 at = original.indexOf(from);
782 if (at >= 0) {
783 result.append(original.substring(0, at));
784 result.append(to);
785 } else {
786 result.append(original.substring(saved));
787 }
788 saved = at;
789 } while (at >= 0);
790 return result.toString();
791 }
792 }
793
794 }
795
This page was automatically generated by Maven