1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 package org.apache.commons.httpclient.util;
33
34 import java.io.UnsupportedEncodingException;
35 import java.util.BitSet;
36 import org.apache.commons.httpclient.URI;
37 import org.apache.commons.httpclient.URIException;
38
39 /***
40 * The URI escape and character encoding and decoding utility.
41 * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
42 * than {@link org.apache.commons.httpclient.URI}.
43 *
44 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
45 * @version $Revision: 1.21.2.1 $ $Date: 2002/03/14 15:14:01
46 */
47
48 public class URIUtil {
49
50
51
52 protected static final BitSet empty = new BitSet(1);
53
54
55
56 /***
57 * Get the basename of an URI. It's possibly an empty string.
58 *
59 * @param uri a string regarded an URI
60 * @return the basename string; an empty string if the path ends with slash
61 */
62 public static String getName(String uri) {
63 if (uri == null || uri.length() == 0) { return uri; }
64 String path = URIUtil.getPath(uri);
65 int at = path.lastIndexOf("/");
66 int to = path.length();
67 return (at >= 0) ? path.substring(at + 1, to) : path;
68 }
69
70
71 /***
72 * Get the query of an URI.
73 *
74 * @param uri a string regarded an URI
75 * @return the query string; <code>null</code> if empty or undefined
76 */
77 public static String getQuery(String uri) {
78 if (uri == null || uri.length() == 0) { return null; }
79
80 int at = uri.indexOf("//");
81 int from = uri.indexOf(
82 "/",
83 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
84 );
85
86 int to = uri.length();
87
88 at = uri.indexOf("?", from);
89 if (at >= 0) {
90 from = at + 1;
91 } else {
92 return null;
93 }
94
95 if (uri.lastIndexOf("#") > from) {
96 to = uri.lastIndexOf("#");
97 }
98
99 return (from < 0 || from == to) ? null : uri.substring(from, to);
100 }
101
102
103 /***
104 * Get the path of an URI.
105 *
106 * @param uri a string regarded an URI
107 * @return the path string
108 */
109 public static String getPath(String uri) {
110 if (uri == null) {
111 return null;
112 }
113
114 int at = uri.indexOf("//");
115 int from = uri.indexOf(
116 "/",
117 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
118 );
119
120 int to = uri.length();
121
122 if (uri.indexOf('?', from) != -1) {
123 to = uri.indexOf('?', from);
124 }
125
126 if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
127 to = uri.lastIndexOf("#");
128 }
129
130 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
131 }
132
133
134 /***
135 * Get the path and query of an URI.
136 *
137 * @param uri a string regarded an URI
138 * @return the path and query string
139 */
140 public static String getPathQuery(String uri) {
141 if (uri == null) {
142 return null;
143 }
144
145 int at = uri.indexOf("//");
146 int from = uri.indexOf(
147 "/",
148 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
149 );
150
151 int to = uri.length();
152
153
154 if (uri.lastIndexOf("#") > from) {
155 to = uri.lastIndexOf("#");
156 }
157
158 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
159 }
160
161
162 /***
163 * Get the path of an URI and its rest part.
164 *
165 * @param uri a string regarded an URI
166 * @return the string from the path part
167 */
168 public static String getFromPath(String uri) {
169 if (uri == null) {
170 return null;
171 }
172
173 int at = uri.indexOf("//");
174 int from = uri.indexOf(
175 "/",
176 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
177 );
178
179 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
180 }
181
182
183
184 /***
185 * Get the all escaped and encoded string with the default protocl charset.
186 * It's the same function to use <code>encode(String unescaped, Bitset
187 * empty, URI.getDefaultProtocolCharset())</code>.
188 *
189 * @param unescaped an unescaped string
190 * @return the escaped string
191 *
192 * @throws URIException if the default protocol charset is not supported
193 *
194 * @see URI#getDefaultProtocolCharset
195 * @see #encode
196 */
197 public static String encodeAll(String unescaped) throws URIException {
198 return encodeAll(unescaped, URI.getDefaultProtocolCharset());
199 }
200
201
202 /***
203 * Get the all escaped and encoded string with a given charset.
204 * It's the same function to use <code>encode(String unescaped, Bitset
205 * empty, String charset)</code>.
206 *
207 * @param unescaped an unescaped string
208 * @param charset the charset
209 * @return the escaped string
210 *
211 * @throws URIException if the charset is not supported
212 *
213 * @see #encode
214 */
215 public static String encodeAll(String unescaped, String charset)
216 throws URIException {
217
218 return encode(unescaped, empty, charset);
219 }
220
221
222 /***
223 * Escape and encode a string regarded as within the authority component of
224 * an URI with the default protocol charset.
225 * Within the authority component, the characters ";", ":", "@", "?", and
226 * "/" are reserved.
227 *
228 * @param unescaped an unescaped string
229 * @return the escaped string
230 *
231 * @throws URIException if the default protocol charset is not supported
232 *
233 * @see URI#getDefaultProtocolCharset
234 * @see #encode
235 */
236 public static String encodeWithinAuthority(String unescaped)
237 throws URIException {
238
239 return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
240 }
241
242
243 /***
244 * Escape and encode a string regarded as within the authority component of
245 * an URI with a given charset.
246 * Within the authority component, the characters ";", ":", "@", "?", and
247 * "/" are reserved.
248 *
249 * @param unescaped an unescaped string
250 * @param charset the charset
251 * @return the escaped string
252 *
253 * @throws URIException if the charset is not supported
254 *
255 * @see #encode
256 */
257 public static String encodeWithinAuthority(String unescaped, String charset)
258 throws URIException {
259
260 return encode(unescaped, URI.allowed_within_authority, charset);
261 }
262
263
264 /***
265 * Escape and encode a string regarded as the path and query components of
266 * an URI with the default protocol charset.
267 *
268 * @param unescaped an unescaped string
269 * @return the escaped string
270 *
271 * @throws URIException if the default protocol charset is not supported
272 *
273 * @see URI#getDefaultProtocolCharset
274 * @see #encode
275 */
276 public static String encodePathQuery(String unescaped) throws URIException {
277 return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
278 }
279
280
281 /***
282 * Escape and encode a string regarded as the path and query components of
283 * an URI with a given charset.
284 *
285 * @param unescaped an unescaped string
286 * @param charset the charset
287 * @return the escaped string
288 *
289 * @throws URIException if the charset is not supported
290 *
291 * @see #encode
292 */
293 public static String encodePathQuery(String unescaped, String charset)
294 throws URIException {
295
296 int at = unescaped.indexOf('?');
297 if (at < 0) {
298 return encode(unescaped, URI.allowed_abs_path, charset);
299 }
300
301 return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
302 + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
303 }
304
305
306 /***
307 * Escape and encode a string regarded as within the path component of an
308 * URI with the default protocol charset.
309 * The path may consist of a sequence of path segments separated by a
310 * single slash "/" character. Within a path segment, the characters
311 * "/", ";", "=", and "?" are reserved.
312 *
313 * @param unescaped an unescaped string
314 * @return the escaped string
315 *
316 * @throws URIException if the default protocol charset is not supported
317 *
318 * @see URI#getDefaultProtocolCharset
319 * @see #encode
320 */
321 public static String encodeWithinPath(String unescaped)
322 throws URIException {
323
324 return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
325 }
326
327
328 /***
329 * Escape and encode a string regarded as within the path component of an
330 * URI with a given charset.
331 * The path may consist of a sequence of path segments separated by a
332 * single slash "/" character. Within a path segment, the characters
333 * "/", ";", "=", and "?" are reserved.
334 *
335 * @param unescaped an unescaped string
336 * @param charset the charset
337 * @return the escaped string
338 *
339 * @throws URIException if the charset is not supported
340 *
341 * @see #encode
342 */
343 public static String encodeWithinPath(String unescaped, String charset)
344 throws URIException {
345
346 return encode(unescaped, URI.allowed_within_path, charset);
347 }
348
349
350 /***
351 * Escape and encode a string regarded as the path component of an URI with
352 * the default protocol charset.
353 *
354 * @param unescaped an unescaped string
355 * @return the escaped string
356 *
357 * @throws URIException if the default protocol charset is not supported
358 *
359 * @see URI#getDefaultProtocolCharset
360 * @see #encode
361 */
362 public static String encodePath(String unescaped) throws URIException {
363 return encodePath(unescaped, URI.getDefaultProtocolCharset());
364 }
365
366
367 /***
368 * Escape and encode a string regarded as the path component of an URI with
369 * a given charset.
370 *
371 * @param unescaped an unescaped string
372 * @param charset the charset
373 * @return the escaped string
374 *
375 * @throws URIException if the charset is not supported
376 *
377 * @see #encode
378 */
379 public static String encodePath(String unescaped, String charset)
380 throws URIException {
381
382 return encode(unescaped, URI.allowed_abs_path, charset);
383 }
384
385
386 /***
387 * Escape and encode a string regarded as within the query component of an
388 * URI with the default protocol charset.
389 * When a query comprise the name and value pairs, it is used in order
390 * to encode each name and value string. The reserved special characters
391 * within a query component are being included in encoding the query.
392 *
393 * @param unescaped an unescaped string
394 * @return the escaped string
395 *
396 * @throws URIException if the default protocol charset is not supported
397 *
398 * @see URI#getDefaultProtocolCharset
399 * @see #encode
400 */
401 public static String encodeWithinQuery(String unescaped)
402 throws URIException {
403
404 return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
405 }
406
407
408 /***
409 * Escape and encode a string regarded as within the query component of an
410 * URI with a given charset.
411 * When a query comprise the name and value pairs, it is used in order
412 * to encode each name and value string. The reserved special characters
413 * within a query component are being included in encoding the query.
414 *
415 * @param unescaped an unescaped string
416 * @param charset the charset
417 * @return the escaped string
418 *
419 * @throws URIException if the charset is not supported
420 *
421 * @see #encode
422 */
423 public static String encodeWithinQuery(String unescaped, String charset)
424 throws URIException {
425
426 return encode(unescaped, URI.allowed_within_query, charset);
427 }
428
429
430 /***
431 * Escape and encode a string regarded as the query component of an URI with
432 * the default protocol charset.
433 * When a query string is not misunderstood the reserved special characters
434 * ("&", "=", "+", ",", and "$") within a query component, this method
435 * is recommended to use in encoding the whole query.
436 *
437 * @param unescaped an unescaped string
438 * @return the escaped string
439 *
440 * @throws URIException if the default protocol charset is not supported
441 *
442 * @see URI#getDefaultProtocolCharset
443 * @see #encode
444 */
445 public static String encodeQuery(String unescaped) throws URIException {
446 return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
447 }
448
449
450 /***
451 * Escape and encode a string regarded as the query component of an URI with
452 * a given charset.
453 * When a query string is not misunderstood the reserved special characters
454 * ("&", "=", "+", ",", and "$") within a query component, this method
455 * is recommended to use in encoding the whole query.
456 *
457 * @param unescaped an unescaped string
458 * @param charset the charset
459 * @return the escaped string
460 *
461 * @throws URIException if the charset is not supported
462 *
463 * @see #encode
464 */
465 public static String encodeQuery(String unescaped, String charset)
466 throws URIException {
467
468 return encode(unescaped, URI.allowed_query, charset);
469 }
470
471
472 /***
473 * Escape and encode a given string with allowed characters not to be
474 * escaped and the default protocol charset.
475 *
476 * @param unescaped a string
477 * @param allowed allowed characters not to be escaped
478 * @return the escaped string
479 *
480 * @throws URIException if the default protocol charset is not supported
481 *
482 * @see URI#getDefaultProtocolCharset
483 * @see Coder#encode
484 */
485 public static String encode(String unescaped, BitSet allowed)
486 throws URIException {
487
488 return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
489 }
490
491
492 /***
493 * Escape and encode a given string with allowed characters not to be
494 * escaped and a given charset.
495 *
496 * @param unescaped a string
497 * @param allowed allowed characters not to be escaped
498 * @param charset the charset
499 * @return the escaped string
500 *
501 * @throws URIException if the charset is not supported
502 *
503 * @see Coder#encode
504 */
505 public static String encode(String unescaped, BitSet allowed,
506 String charset) throws URIException {
507
508 return new String(Coder.encode(unescaped, allowed, charset));
509 }
510
511
512 /***
513 * Unescape and decode a given string regarded as an escaped string with the
514 * default protocol charset.
515 *
516 * @param escaped a string
517 * @return the unescaped string
518 *
519 * @throws URIException if the default protocol charset is not supported
520 *
521 * @see URI#getDefaultProtocolCharset
522 * @see Coder#decode
523 */
524 public static String decode(String escaped) throws URIException {
525 return Coder.decode(escaped.toCharArray(), URI.getDefaultProtocolCharset());
526 }
527
528
529 /***
530 * Unescape and decode a given string regarded as an escaped string.
531 *
532 * @param escaped a string
533 * @param charset the charset
534 * @return the unescaped string
535 *
536 * @throws URIException if the charset is not supported
537 *
538 * @see Coder#decode
539 */
540 public static String decode(String escaped, String charset)
541 throws URIException {
542
543 return Coder.decode(escaped.toCharArray(), charset);
544 }
545
546
547
548 /***
549 * Convert a target string to the specified character encoded string with
550 * the default protocol charset.
551 *
552 * @param target a target string
553 * @return the protocol character encoded string
554 *
555 * @throws URIException if the default protocol charset is not supported
556 *
557 * @see URI#getDefaultProtocolCharset
558 *
559 * @deprecated Do not use. To be removed
560 */
561 public static String toProtocolCharset(String target) throws URIException {
562 return toUsingCharset(
563 target,
564 URI.getDefaultDocumentCharset(),
565 URI.getDefaultProtocolCharset());
566 }
567
568
569 /***
570 * Convert a target string to the specified character encoded string with
571 * a given protocol charset.
572 *
573 * @param target a target string
574 * @param charset the transformed protocol charset
575 * @return the protocol character encoded string
576 *
577 * @throws URIException if the charset is not supported
578 *
579 * @deprecated Do not use. To be removed
580 */
581 public static String toProtocolCharset(String target, String charset)
582 throws URIException {
583
584 return toUsingCharset(target, URI.getDefaultDocumentCharset(), charset);
585 }
586
587
588 /***
589 * Convert a target string to the specified character encoded string with
590 * the default document charset.
591 *
592 * @param target a target string
593 * @return the document character encoded string
594 *
595 * @throws URIException if the default protocol charset is not supported
596 *
597 * @see URI#getDefaultDocumentCharset
598 *
599 * @deprecated Do not use. To be removed
600 */
601 public static String toDocumentCharset(String target) throws URIException {
602 return toUsingCharset(target, URI.getDefaultProtocolCharset(),
603 URI.getDefaultDocumentCharset());
604 }
605
606
607 /***
608 * Convert a target string to the specified character encoded string with
609 * a given document charset.
610 *
611 * @param target a target string
612 * @param charset the transformed document charset
613 * @return the document character encoded string
614 *
615 * @throws URIException if the charset is not supported
616 *
617 * @deprecated Do not use. To be removed
618 */
619 public static String toDocumentCharset(String target, String charset)
620 throws URIException {
621
622 return toUsingCharset(target, URI.getDefaultProtocolCharset(), charset);
623 }
624
625
626 /***
627 * Convert a target string from the <code>fromCharset</code> charset to
628 * the <code>toCharset</code> charset.
629 * <p>
630 * What if the document charset is ISO-8859-1 and the protocol charset is
631 * UTF-8, when it's read from the document part and is used in the protocol
632 * part, the use of the method will be <code>toUsingCharset(the string,
633 * "ISO-8859-1", "UTF-8")</code>.
634 *
635 * @param target a target string
636 * @param fromCharset the previous charset
637 * @param toCharset the changing charset
638 * @return the document character encoded string
639 *
640 * @throws URIException if either of the charsets are not supported
641 *
642 * @deprecated Do not use. To be removed
643 */
644
645 public static String toUsingCharset(String target, String fromCharset,
646 String toCharset) throws URIException {
647
648 try {
649 return new String(target.getBytes(fromCharset), toCharset);
650 } catch (UnsupportedEncodingException error) {
651 throw new URIException(URIException.UNSUPPORTED_ENCODING,
652 error.getMessage());
653 }
654 }
655
656
657
658 /***
659 * The basic and internal utility for URI escape and character encoding and
660 * decoding.
661 */
662 protected static class Coder extends URI {
663
664 /***
665 * Escape and encode a given string with allowed characters not to be
666 * escaped.
667 *
668 * @param unescapedComponent an unescaped component
669 * @param allowed allowed characters not to be escaped
670 * @param charset the charset to encode
671 * @return the escaped and encoded string
672 *
673 * @throws URIException if the charset is not supported
674 */
675 public static char[] encode(String unescapedComponent, BitSet allowed, String charset)
676 throws URIException {
677
678 return URI.encode(unescapedComponent, allowed, charset);
679 }
680
681
682 /***
683 * Unescape and decode a given string.
684 *
685 * @param escapedComponent an being-unescaped component
686 * @param charset the charset to decode
687 * @return the escaped and encoded string
688 *
689 * @throws URIException if the charset is not supported
690 */
691 public static String decode(char[] escapedComponent, String charset)
692 throws URIException {
693
694 return URI.decode(escapedComponent, charset);
695 }
696
697
698 /***
699 * Verify whether a given string is escaped or not
700 *
701 * @param original given characters
702 * @return true if the given character array is 7 bit ASCII-compatible.
703 */
704 public static boolean verifyEscaped(char[] original) {
705 for (int i = 0; i < original.length; i++) {
706 int c = original[i];
707 if (c > 128) {
708 return false;
709 } else if (c == '%') {
710 if (Character.digit(original[++i], 16) == -1
711 || Character.digit(original[++i], 16) == -1) {
712 return false;
713 }
714 }
715 }
716 return true;
717 }
718
719
720 /***
721 * Replace from a given character to given character in an array order
722 * for a given string.
723 *
724 * @param original a given string
725 * @param from a replacing character array
726 * @param to a replaced character array
727 * @return the replaced string
728 */
729 public static String replace(String original, char[] from, char[] to) {
730 for (int i = from.length; i > 0; --i) {
731 original = replace(original, from[i], to[i]);
732 }
733 return original.toString();
734 }
735
736
737 /***
738 * Replace from a given character to given character for a given string.
739 *
740 * @param original a given string
741 * @param from a replacing character array
742 * @param to a replaced character array
743 * @return the replaced string
744 */
745 public static String replace(String original, char from, char to) {
746 StringBuffer result = new StringBuffer(original.length());
747 int at, saved = 0;
748 do {
749 at = original.indexOf(from);
750 if (at >= 0) {
751 result.append(original.substring(0, at));
752 result.append(to);
753 } else {
754 result.append(original.substring(saved));
755 }
756 saved = at;
757 } while (at >= 0);
758 return result.toString();
759 }
760 }
761
762 }
763