1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 package org.apache.commons.httpclient;
33
34 import java.io.IOException;
35 import java.io.ObjectInputStream;
36 import java.io.ObjectOutputStream;
37 import java.io.Serializable;
38 import java.io.UnsupportedEncodingException;
39 import java.util.Locale;
40 import java.util.BitSet;
41 import java.util.Hashtable;
42 import java.net.URL;
43
44 /***
45 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
46 * This class has the purpose of supportting of parsing a URI reference to
47 * extend any specific protocols, the character encoding of the protocol to
48 * be transported and the charset of the document.
49 * <p>
50 * A URI is always in an "escaped" form, since escaping or unescaping a
51 * completed URI might change its semantics.
52 * <p>
53 * Implementers should be careful not to escape or unescape the same string
54 * more than once, since unescaping an already unescaped string might lead to
55 * misinterpreting a percent data character as another escaped character,
56 * or vice versa in the case of escaping an already escaped string.
57 * <p>
58 * In order to avoid these problems, data types used as follows:
59 * <p><blockquote><pre>
60 * URI character sequence: char
61 * octet sequence: byte
62 * original character sequence: String
63 * </pre></blockquote><p>
64 *
65 * So, a URI is a sequence of characters as an array of a char type, which
66 * is not always represented as a sequence of octets as an array of byte.
67 * <p>
68 *
69 * URI Syntactic Components
70 * <p><blockquote><pre>
71 * - In general, written as follows:
72 * Absolute URI = <scheme>:<scheme-specific-part>
73 * Generic URI = <scheme>://<authority><path>?<query>
74 *
75 * - Syntax
76 * absoluteURI = scheme ":" ( hier_part | opaque_part )
77 * hier_part = ( net_path | abs_path ) [ "?" query ]
78 * net_path = "//" authority [ abs_path ]
79 * abs_path = "/" path_segments
80 * </pre></blockquote><p>
81 *
82 * The following examples illustrate URI that are in common use.
83 * <pre>
84 * ftp://ftp.is.co.za/rfc/rfc1808.txt
85 * -- ftp scheme for File Transfer Protocol services
86 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
87 * -- gopher scheme for Gopher and Gopher+ Protocol services
88 * http://www.math.uio.no/faq/compression-faq/part1.html
89 * -- http scheme for Hypertext Transfer Protocol services
90 * mailto:mduerst@ifi.unizh.ch
91 * -- mailto scheme for electronic mail addresses
92 * news:comp.infosystems.www.servers.unix
93 * -- news scheme for USENET news groups and articles
94 * telnet://melvyl.ucop.edu/
95 * -- telnet scheme for interactive services via the TELNET Protocol
96 * </pre>
97 * Please, notice that there are many modifications from URL(RFC 1738) and
98 * relative URL(RFC 1808).
99 * <p>
100 * <b>The expressions for a URI</b>
101 * <p><pre>
102 * For escaped URI forms
103 * - URI(char[]) // constructor
104 * - char[] getRawXxx() // method
105 * - String getEscapedXxx() // method
106 * - String toString() // method
107 * <p>
108 * For unescaped URI forms
109 * - URI(String) // constructor
110 * - String getXXX() // method
111 * </pre><p>
112 *
113 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
114 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
115 * @version $Revision: 1.36.2.5 $ $Date: 2002/03/14 15:14:01
116 */
117 public class URI implements Cloneable, Comparable, Serializable {
118
119
120
121
122 /*** Create an instance as an internal use */
123 protected URI() {
124 }
125
126
127 /***
128 * Construct a URI as an escaped form of a character array with the given
129 * charset.
130 *
131 * @param escaped the URI character sequence
132 * @param charset the charset string to do escape encoding
133 * @throws URIException If the URI cannot be created.
134 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
135 * @see #getProtocolCharset
136 */
137 public URI(char[] escaped, String charset)
138 throws URIException, NullPointerException {
139 protocolCharset = charset;
140 parseUriReference(new String(escaped), true);
141 }
142
143
144 /***
145 * Construct a URI as an escaped form of a character array.
146 * An URI can be placed within double-quotes or angle brackets like
147 * "http://test.com/" and <http://test.com/>
148 *
149 * @param escaped the URI character sequence
150 * @throws URIException If the URI cannot be created.
151 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
152 * @see #getDefaultProtocolCharset
153 */
154 public URI(char[] escaped)
155 throws URIException, NullPointerException {
156 parseUriReference(new String(escaped), true);
157 }
158
159
160 /***
161 * Construct a URI from the given string with the given charset.
162 *
163 * @param original the string to be represented to URI character sequence
164 * It is one of absoluteURI and relativeURI.
165 * @param charset the charset string to do escape encoding
166 * @throws URIException If the URI cannot be created.
167 * @see #getProtocolCharset
168 */
169 public URI(String original, String charset) throws URIException {
170 protocolCharset = charset;
171 parseUriReference(original, false);
172 }
173
174
175 /***
176 * Construct a URI from the given string.
177 * <p><blockquote><pre>
178 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
179 * </pre></blockquote><p>
180 * An URI can be placed within double-quotes or angle brackets like
181 * "http://test.com/" and <http://test.com/>
182 *
183 * @param original the string to be represented to URI character sequence
184 * It is one of absoluteURI and relativeURI.
185 * @throws URIException If the URI cannot be created.
186 * @see #getDefaultProtocolCharset
187 */
188 public URI(String original) throws URIException {
189 parseUriReference(original, false);
190 }
191
192
193 /***
194 * Construct a URI from a URL.
195 *
196 * @param url a valid URL.
197 * @throws URIException If the URI cannot be created.
198 * @since 2.0
199 * @deprecated currently somewhat wrong and diffrent with java.net.URL usage
200 */
201 public URI(URL url) throws URIException {
202 this(url.toString());
203 }
204
205
206 /***
207 * Construct a general URI from the given components.
208 * <p><blockquote><pre>
209 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
210 * absoluteURI = scheme ":" ( hier_part | opaque_part )
211 * opaque_part = uric_no_slash *uric
212 * </pre></blockquote><p>
213 * It's for absolute URI = <scheme>:<scheme-specific-part>#
214 * <fragment>.
215 *
216 * @param scheme the scheme string
217 * @param schemeSpecificPart scheme_specific_part
218 * @param fragment the fragment string
219 * @throws URIException If the URI cannot be created.
220 * @see #getDefaultProtocolCharset
221 */
222 public URI(String scheme, String schemeSpecificPart, String fragment)
223 throws URIException {
224
225
226 if (scheme == null) {
227 throw new URIException(URIException.PARSING, "scheme required");
228 }
229 char[] s = scheme.toLowerCase().toCharArray();
230 if (validate(s, URI.scheme)) {
231 _scheme = s;
232 } else {
233 throw new URIException(URIException.PARSING, "incorrect scheme");
234 }
235 _opaque = encode(schemeSpecificPart, allowed_opaque_part,
236 getProtocolCharset());
237
238 _is_opaque_part = true;
239 _fragment = fragment.toCharArray();
240
241 setURI();
242 }
243
244
245 /***
246 * Construct a general URI from the given components.
247 * <p><blockquote><pre>
248 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
249 * absoluteURI = scheme ":" ( hier_part | opaque_part )
250 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
251 * hier_part = ( net_path | abs_path ) [ "?" query ]
252 * </pre></blockquote><p>
253 * It's for absolute URI = <scheme>:<path>?<query>#<
254 * fragment> and relative URI = <path>?<query>#<fragment
255 * >.
256 *
257 * @param scheme the scheme string
258 * @param authority the authority string
259 * @param path the path string
260 * @param query the query string
261 * @param fragment the fragment string
262 * @throws URIException If the new URI cannot be created.
263 * @see #getDefaultProtocolCharset
264 */
265 public URI(String scheme, String authority, String path, String query,
266 String fragment) throws URIException {
267
268
269 StringBuffer buff = new StringBuffer();
270 if (scheme != null) {
271 buff.append(scheme);
272 buff.append(':');
273 }
274 if (authority != null) {
275 buff.append("//");
276 buff.append(authority);
277 }
278 if (path != null) {
279 if ((scheme != null || authority != null)
280 && !path.startsWith("/")) {
281 throw new URIException(URIException.PARSING,
282 "abs_path requested");
283 }
284 buff.append(path);
285 }
286 if (query != null) {
287 buff.append('?');
288 buff.append(query);
289 }
290 if (fragment != null) {
291 buff.append('#');
292 buff.append(fragment);
293 }
294 parseUriReference(buff.toString(), false);
295 }
296
297
298 /***
299 * Construct a general URI from the given components.
300 *
301 * @param scheme the scheme string
302 * @param userinfo the userinfo string
303 * @param host the host string
304 * @param port the port number
305 * @throws URIException If the new URI cannot be created.
306 * @see #getDefaultProtocolCharset
307 */
308 public URI(String scheme, String userinfo, String host, int port)
309 throws URIException {
310
311 this(scheme, userinfo, host, port, null, null, null);
312 }
313
314
315 /***
316 * Construct a general URI from the given components.
317 *
318 * @param scheme the scheme string
319 * @param userinfo the userinfo string
320 * @param host the host string
321 * @param port the port number
322 * @param path the path string
323 * @throws URIException If the new URI cannot be created.
324 * @see #getDefaultProtocolCharset
325 */
326 public URI(String scheme, String userinfo, String host, int port,
327 String path) throws URIException {
328
329 this(scheme, userinfo, host, port, path, null, null);
330 }
331
332
333 /***
334 * Construct a general URI from the given components.
335 *
336 * @param scheme the scheme string
337 * @param userinfo the userinfo string
338 * @param host the host string
339 * @param port the port number
340 * @param path the path string
341 * @param query the query string
342 * @throws URIException If the new URI cannot be created.
343 * @see #getDefaultProtocolCharset
344 */
345 public URI(String scheme, String userinfo, String host, int port,
346 String path, String query) throws URIException {
347
348 this(scheme, userinfo, host, port, path, query, null);
349 }
350
351
352 /***
353 * Construct a general URI from the given components.
354 *
355 * @param scheme the scheme string
356 * @param userinfo the userinfo string
357 * @param host the host string
358 * @param port the port number
359 * @param path the path string
360 * @param query the query string
361 * @param fragment the fragment string
362 * @throws URIException If the new URI cannot be created.
363 * @see #getDefaultProtocolCharset
364 */
365 public URI(String scheme, String userinfo, String host, int port,
366 String path, String query, String fragment) throws URIException {
367
368 this(scheme, (host == null) ? null
369 : ((userinfo != null) ? userinfo + '@' : "") + host
370 + ((port != -1) ? ":" + port : ""), path, query, fragment);
371 }
372
373
374 /***
375 * Construct a general URI from the given components.
376 *
377 * @param scheme the scheme string
378 * @param host the host string
379 * @param path the path string
380 * @param fragment the fragment string
381 * @throws URIException If the new URI cannot be created.
382 * @see #getDefaultProtocolCharset
383 */
384 public URI(String scheme, String host, String path, String fragment)
385 throws URIException {
386
387 this(scheme, host, path, null, fragment);
388 }
389
390
391 /***
392 * Construct a general URI with the given relative URI string.
393 *
394 * @param base the base URI
395 * @param relative the relative URI string
396 * @throws URIException If the new URI cannot be created.
397 */
398 public URI(URI base, String relative) throws URIException {
399 this(base, new URI(relative));
400 }
401
402
403 /***
404 * Construct a general URI with the given relative URI.
405 * <p><blockquote><pre>
406 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
407 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
408 * </pre></blockquote><p>
409 * Resolving Relative References to Absolute Form.
410 *
411 * <strong>Examples of Resolving Relative URI References</strong>
412 *
413 * Within an object with a well-defined base URI of
414 * <p><blockquote><pre>
415 * http://a/b/c/d;p?q
416 * </pre></blockquote><p>
417 * the relative URI would be resolved as follows:
418 *
419 * Normal Examples
420 *
421 * <p><blockquote><pre>
422 * g:h = g:h
423 * g = http://a/b/c/g
424 * ./g = http://a/b/c/g
425 * g/ = http://a/b/c/g/
426 * /g = http://a/g
427 * //g = http://g
428 * ?y = http://a/b/c/?y
429 * g?y = http://a/b/c/g?y
430 * #s = (current document)#s
431 * g#s = http://a/b/c/g#s
432 * g?y#s = http://a/b/c/g?y#s
433 * ;x = http://a/b/c/;x
434 * g;x = http://a/b/c/g;x
435 * g;x?y#s = http://a/b/c/g;x?y#s
436 * . = http://a/b/c/
437 * ./ = http://a/b/c/
438 * .. = http://a/b/
439 * ../ = http://a/b/
440 * ../g = http://a/b/g
441 * ../.. = http://a/
442 * ../../ = http://a/
443 * ../../g = http://a/g
444 * </pre></blockquote><p>
445 *
446 * Some URI schemes do not allow a hierarchical syntax matching the
447 * <hier_part> syntax, and thus cannot use relative references.
448 *
449 * @param base the base URI
450 * @param relative the relative URI
451 * @throws URIException If the new URI cannot be created.
452 */
453 public URI(URI base, URI relative) throws URIException {
454
455 if (base._scheme == null) {
456 throw new URIException(URIException.PARSING, "base URI required");
457 }
458 if (base._scheme != null) {
459 this._scheme = base._scheme;
460 this._authority = base._authority;
461 }
462 if (base._is_opaque_part || relative._is_opaque_part) {
463 this._scheme = base._scheme;
464 this._is_opaque_part = base._is_opaque_part
465 || relative._is_opaque_part;
466 this._opaque = relative._opaque;
467 this._fragment = relative._fragment;
468 this.setURI();
469 return;
470 }
471 if (relative._scheme != null) {
472 this._scheme = relative._scheme;
473 this._is_net_path = relative._is_net_path;
474 this._authority = relative._authority;
475 if (relative._is_server) {
476 this._is_server = relative._is_server;
477 this._userinfo = relative._userinfo;
478 this._host = relative._host;
479 this._port = relative._port;
480 } else if (relative._is_reg_name) {
481 this._is_reg_name = relative._is_reg_name;
482 }
483 this._is_abs_path = relative._is_abs_path;
484 this._is_rel_path = relative._is_rel_path;
485 this._path = relative._path;
486 } else if (base._authority != null && relative._scheme == null) {
487 this._is_net_path = base._is_net_path;
488 this._authority = base._authority;
489 if (base._is_server) {
490 this._is_server = base._is_server;
491 this._userinfo = base._userinfo;
492 this._host = base._host;
493 this._port = base._port;
494 } else if (base._is_reg_name) {
495 this._is_reg_name = base._is_reg_name;
496 }
497 }
498 if (relative._authority != null) {
499 this._is_net_path = relative._is_net_path;
500 this._authority = relative._authority;
501 if (relative._is_server) {
502 this._is_server = relative._is_server;
503 this._userinfo = relative._userinfo;
504 this._host = relative._host;
505 this._port = relative._port;
506 } else if (relative._is_reg_name) {
507 this._is_reg_name = relative._is_reg_name;
508 }
509 this._is_abs_path = relative._is_abs_path;
510 this._is_rel_path = relative._is_rel_path;
511 this._path = relative._path;
512 }
513
514 if (relative._scheme == null && relative._authority == null) {
515 if ((relative._path == null || relative._path.length == 0)
516 && relative._query == null) {
517
518
519 this._path = base._path;
520 this._query = base._query;
521 } else {
522 this._path = resolvePath(base._path, relative._path);
523 }
524 }
525
526 if (relative._query != null) {
527 this._query = relative._query;
528 }
529
530 if (relative._fragment != null) {
531 this._fragment = relative._fragment;
532 }
533 this.setURI();
534
535
536 parseUriReference(new String(_uri), true);
537 }
538
539
540
541 /*** Version ID for serialization */
542 static final long serialVersionUID = 604752400577948726L;
543
544
545 /***
546 * Cache the hash code for this URI.
547 */
548 protected int hash = 0;
549
550
551 /***
552 * This Uniform Resource Identifier (URI).
553 * The URI is always in an "escaped" form, since escaping or unescaping
554 * a completed URI might change its semantics.
555 */
556 protected char[] _uri = null;
557
558
559 /***
560 * The charset of the protocol used by this URI instance.
561 */
562 protected String protocolCharset = null;
563
564
565 /***
566 * The default charset of the protocol. RFC 2277, 2396
567 */
568 protected static String defaultProtocolCharset = "UTF-8";
569
570
571 /***
572 * The default charset of the document. RFC 2277, 2396
573 * The platform's charset is used for the document by default.
574 */
575 protected static String defaultDocumentCharset = null;
576 protected static String defaultDocumentCharsetByLocale = null;
577 protected static String defaultDocumentCharsetByPlatform = null;
578
579 static {
580 Locale locale = Locale.getDefault();
581
582 if (locale != null) {
583 defaultDocumentCharsetByLocale =
584 LocaleToCharsetMap.getCharset(locale);
585
586 defaultDocumentCharset = defaultDocumentCharsetByLocale;
587 }
588
589 try {
590 defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
591 } catch(SecurityException ignore) {
592 }
593 if (defaultDocumentCharset == null) {
594
595 defaultDocumentCharset = defaultDocumentCharsetByPlatform;
596 }
597 }
598
599
600 /***
601 * The scheme.
602 */
603 protected char[] _scheme = null;
604
605
606 /***
607 * The opaque.
608 */
609 protected char[] _opaque = null;
610
611
612 /***
613 * The authority.
614 */
615 protected char[] _authority = null;
616
617
618 /***
619 * The userinfo.
620 */
621 protected char[] _userinfo = null;
622
623
624 /***
625 * The host.
626 */
627 protected char[] _host = null;
628
629
630 /***
631 * The port.
632 */
633 protected int _port = -1;
634
635
636 /***
637 * The path.
638 */
639 protected char[] _path = null;
640
641
642 /***
643 * The query.
644 */
645 protected char[] _query = null;
646
647
648 /***
649 * The fragment.
650 */
651 protected char[] _fragment = null;
652
653
654 /***
655 * The root path.
656 */
657 protected static char[] rootPath = { '/' };
658
659
660
661 /***
662 * The percent "%" character always has the reserved purpose of being the
663 * escape indicator, it must be escaped as "%25" in order to be used as
664 * data within a URI.
665 */
666 protected static final BitSet percent = new BitSet(256);
667
668 static {
669 percent.set('%');
670 }
671
672
673 /***
674 * BitSet for digit.
675 * <p><blockquote><pre>
676 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
677 * "8" | "9"
678 * </pre></blockquote><p>
679 */
680 protected static final BitSet digit = new BitSet(256);
681
682 static {
683 for (int i = '0'; i <= '9'; i++) {
684 digit.set(i);
685 }
686 }
687
688
689 /***
690 * BitSet for alpha.
691 * <p><blockquote><pre>
692 * alpha = lowalpha | upalpha
693 * </pre></blockquote><p>
694 */
695 protected static final BitSet alpha = new BitSet(256);
696
697 static {
698 for (int i = 'a'; i <= 'z'; i++) {
699 alpha.set(i);
700 }
701 for (int i = 'A'; i <= 'Z'; i++) {
702 alpha.set(i);
703 }
704 }
705
706
707 /***
708 * BitSet for alphanum (join of alpha & digit).
709 * <p><blockquote><pre>
710 * alphanum = alpha | digit
711 * </pre></blockquote><p>
712 */
713 protected static final BitSet alphanum = new BitSet(256);
714
715 static {
716 alphanum.or(alpha);
717 alphanum.or(digit);
718 }
719
720
721 /***
722 * BitSet for hex.
723 * <p><blockquote><pre>
724 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
725 * "a" | "b" | "c" | "d" | "e" | "f"
726 * </pre></blockquote><p>
727 */
728 protected static final BitSet hex = new BitSet(256);
729
730 static {
731 hex.or(digit);
732 for (int i = 'a'; i <= 'f'; i++) {
733 hex.set(i);
734 }
735 for (int i = 'A'; i <= 'F'; i++) {
736 hex.set(i);
737 }
738 }
739
740
741 /***
742 * BitSet for escaped.
743 * <p><blockquote><pre>
744 * escaped = "%" hex hex
745 * </pre></blockquote><p>
746 */
747 protected static final BitSet escaped = new BitSet(256);
748
749 static {
750 escaped.or(percent);
751 escaped.or(hex);
752 }
753
754
755 /***
756 * BitSet for mark.
757 * <p><blockquote><pre>
758 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
759 * "(" | ")"
760 * </pre></blockquote><p>
761 */
762 protected static final BitSet mark = new BitSet(256);
763
764 static {
765 mark.set('-');
766 mark.set('_');
767 mark.set('.');
768 mark.set('!');
769 mark.set('~');
770 mark.set('*');
771 mark.set('\'');
772 mark.set('(');
773 mark.set(')');
774 }
775
776
777 /***
778 * Data characters that are allowed in a URI but do not have a reserved
779 * purpose are called unreserved.
780 * <p><blockquote><pre>
781 * unreserved = alphanum | mark
782 * </pre></blockquote><p>
783 */
784 protected static final BitSet unreserved = new BitSet(256);
785
786 static {
787 unreserved.or(alphanum);
788 unreserved.or(mark);
789 }
790
791
792 /***
793 * BitSet for reserved.
794 * <p><blockquote><pre>
795 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
796 * "$" | ","
797 * </pre></blockquote><p>
798 */
799 protected static final BitSet reserved = new BitSet(256);
800
801 static {
802 reserved.set(';');
803 reserved.set('/');
804 reserved.set('?');
805 reserved.set(':');
806 reserved.set('@');
807 reserved.set('&');
808 reserved.set('=');
809 reserved.set('+');
810 reserved.set('$');
811 reserved.set(',');
812 }
813
814
815 /***
816 * BitSet for uric.
817 * <p><blockquote><pre>
818 * uric = reserved | unreserved | escaped
819 * </pre></blockquote><p>
820 */
821 protected static final BitSet uric = new BitSet(256);
822
823 static {
824 uric.or(reserved);
825 uric.or(unreserved);
826 uric.or(escaped);
827 }
828
829
830 /***
831 * BitSet for fragment (alias for uric).
832 * <p><blockquote><pre>
833 * fragment = *uric
834 * </pre></blockquote><p>
835 */
836 protected static final BitSet fragment = uric;
837
838
839 /***
840 * BitSet for query (alias for uric).
841 * <p><blockquote><pre>
842 * query = *uric
843 * </pre></blockquote><p>
844 */
845 protected static final BitSet query = uric;
846
847
848 /***
849 * BitSet for pchar.
850 * <p><blockquote><pre>
851 * pchar = unreserved | escaped |
852 * ":" | "@" | "&" | "=" | "+" | "$" | ","
853 * </pre></blockquote><p>
854 */
855 protected static final BitSet pchar = new BitSet(256);
856
857 static {
858 pchar.or(unreserved);
859 pchar.or(escaped);
860 pchar.set(':');
861 pchar.set('@');
862 pchar.set('&');
863 pchar.set('=');
864 pchar.set('+');
865 pchar.set('$');
866 pchar.set(',');
867 }
868
869
870 /***
871 * BitSet for param (alias for pchar).
872 * <p><blockquote><pre>
873 * param = *pchar
874 * </pre></blockquote><p>
875 */
876 protected static final BitSet param = pchar;
877
878
879 /***
880 * BitSet for segment.
881 * <p><blockquote><pre>
882 * segment = *pchar *( ";" param )
883 * </pre></blockquote><p>
884 */
885 protected static final BitSet segment = new BitSet(256);
886
887 static {
888 segment.or(pchar);
889 segment.set(';');
890 segment.or(param);
891 }
892
893
894 /***
895 * BitSet for path segments.
896 * <p><blockquote><pre>
897 * path_segments = segment *( "/" segment )
898 * </pre></blockquote><p>
899 */
900 protected static final BitSet path_segments = new BitSet(256);
901
902 static {
903 path_segments.set('/');
904 path_segments.or(segment);
905 }
906
907
908 /***
909 * URI absolute path.
910 * <p><blockquote><pre>
911 * abs_path = "/" path_segments
912 * </pre></blockquote><p>
913 */
914 protected static final BitSet abs_path = new BitSet(256);
915
916 static {
917 abs_path.set('/');
918 abs_path.or(path_segments);
919 }
920
921
922 /***
923 * URI bitset for encoding typical non-slash characters.
924 * <p><blockquote><pre>
925 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
926 * "&" | "=" | "+" | "$" | ","
927 * </pre></blockquote><p>
928 */
929 protected static final BitSet uric_no_slash = new BitSet(256);
930
931 static {
932 uric_no_slash.or(unreserved);
933 uric_no_slash.or(escaped);
934 uric_no_slash.set(';');
935 uric_no_slash.set('?');
936 uric_no_slash.set(';');
937 uric_no_slash.set('@');
938 uric_no_slash.set('&');
939 uric_no_slash.set('=');
940 uric_no_slash.set('+');
941 uric_no_slash.set('$');
942 uric_no_slash.set(',');
943 }
944
945
946 /***
947 * URI bitset that combines uric_no_slash and uric.
948 * <p><blockquote><pre>
949 * opaque_part = uric_no_slash *uric
950 * </pre></blockquote><p>
951 */
952 protected static final BitSet opaque_part = new BitSet(256);
953
954 static {
955
956 opaque_part.or(uric_no_slash);
957 opaque_part.or(uric);
958 }
959
960
961 /***
962 * URI bitset that combines absolute path and opaque part.
963 * <p><blockquote><pre>
964 * path = [ abs_path | opaque_part ]
965 * </pre></blockquote><p>
966 */
967 protected static final BitSet path = new BitSet(256);
968
969 static {
970 path.or(abs_path);
971 path.or(opaque_part);
972 }
973
974
975 /***
976 * Port, a logical alias for digit.
977 */
978 protected static final BitSet port = digit;
979
980
981 /***
982 * Bitset that combines digit and dot fo IPv$address.
983 * <p><blockquote><pre>
984 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
985 * </pre></blockquote><p>
986 */
987 protected static final BitSet IPv4address = new BitSet(256);
988
989 static {
990 IPv4address.or(digit);
991 IPv4address.set('.');
992 }
993
994
995 /***
996 * RFC 2373.
997 * <p><blockquote><pre>
998 * IPv6address = hexpart [ ":" IPv4address ]
999 * </pre></blockquote><p>
1000 */
1001 protected static final BitSet IPv6address = new BitSet(256);
1002
1003 static {
1004 IPv6address.or(hex);
1005 IPv6address.set(':');
1006 IPv6address.or(IPv4address);
1007 }
1008
1009
1010 /***
1011 * RFC 2732, 2373.
1012 * <p><blockquote><pre>
1013 * IPv6reference = "[" IPv6address "]"
1014 * </pre></blockquote><p>
1015 */
1016 protected static final BitSet IPv6reference = new BitSet(256);
1017
1018 static {
1019 IPv6reference.set('[');
1020 IPv6reference.or(IPv6address);
1021 IPv6reference.set(']');
1022 }
1023
1024
1025 /***
1026 * BitSet for toplabel.
1027 * <p><blockquote><pre>
1028 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1029 * </pre></blockquote><p>
1030 */
1031 protected static final BitSet toplabel = new BitSet(256);
1032
1033 static {
1034 toplabel.or(alphanum);
1035 toplabel.set('-');
1036 }
1037
1038
1039 /***
1040 * BitSet for domainlabel.
1041 * <p><blockquote><pre>
1042 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1043 * </pre></blockquote><p>
1044 */
1045 protected static final BitSet domainlabel = toplabel;
1046
1047
1048 /***
1049 * BitSet for hostname.
1050 * <p><blockquote><pre>
1051 * hostname = *( domainlabel "." ) toplabel [ "." ]
1052 * </pre></blockquote><p>
1053 */
1054 protected static final BitSet hostname = new BitSet(256);
1055
1056 static {
1057 hostname.or(toplabel);
1058
1059 hostname.set('.');
1060 }
1061
1062
1063 /***
1064 * BitSet for host.
1065 * <p><blockquote><pre>
1066 * host = hostname | IPv4address | IPv6reference
1067 * </pre></blockquote><p>
1068 */
1069 protected static final BitSet host = new BitSet(256);
1070
1071 static {
1072 host.or(hostname);
1073
1074 host.or(IPv6reference);
1075 }
1076
1077
1078 /***
1079 * BitSet for hostport.
1080 * <p><blockquote><pre>
1081 * hostport = host [ ":" port ]
1082 * </pre></blockquote><p>
1083 */
1084 protected static final BitSet hostport = new BitSet(256);
1085
1086 static {
1087 hostport.or(host);
1088 hostport.set(':');
1089 hostport.or(port);
1090 }
1091
1092
1093 /***
1094 * Bitset for userinfo.
1095 * <p><blockquote><pre>
1096 * userinfo = *( unreserved | escaped |
1097 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1098 * </pre></blockquote><p>
1099 */
1100 protected static final BitSet userinfo = new BitSet(256);
1101
1102 static {
1103 userinfo.or(unreserved);
1104 userinfo.or(escaped);
1105 userinfo.set(';');
1106 userinfo.set(':');
1107 userinfo.set('&');
1108 userinfo.set('=');
1109 userinfo.set('+');
1110 userinfo.set('$');
1111 userinfo.set(',');
1112 }
1113
1114
1115 /***
1116 * BitSet for within the userinfo component like user and password.
1117 */
1118 public static final BitSet within_userinfo = new BitSet(256);
1119
1120 static {
1121 within_userinfo.or(userinfo);
1122 within_userinfo.clear(';');
1123 within_userinfo.clear(':');
1124 within_userinfo.clear('@');
1125 within_userinfo.clear('?');
1126 within_userinfo.clear('/');
1127 }
1128
1129
1130 /***
1131 * Bitset for server.
1132 * <p><blockquote><pre>
1133 * server = [ [ userinfo "@" ] hostport ]
1134 * </pre></blockquote><p>
1135 */
1136 protected static final BitSet server = new BitSet(256);
1137
1138 static {
1139 server.or(userinfo);
1140 server.set('@');
1141 server.or(hostport);
1142 }
1143
1144
1145 /***
1146 * BitSet for reg_name.
1147 * <p><blockquote><pre>
1148 * reg_name = 1*( unreserved | escaped | "$" | "," |
1149 * ";" | ":" | "@" | "&" | "=" | "+" )
1150 * </pre></blockquote><p>
1151 */
1152 protected static final BitSet reg_name = new BitSet(256);
1153
1154 static {
1155 reg_name.or(unreserved);
1156 reg_name.or(escaped);
1157 reg_name.set('$');
1158 reg_name.set(',');
1159 reg_name.set(';');
1160 reg_name.set(':');
1161 reg_name.set('@');
1162 reg_name.set('&');
1163 reg_name.set('=');
1164 reg_name.set('+');
1165 }
1166
1167
1168 /***
1169 * BitSet for authority.
1170 * <p><blockquote><pre>
1171 * authority = server | reg_name
1172 * </pre></blockquote><p>
1173 */
1174 protected static final BitSet authority = new BitSet(256);
1175
1176 static {
1177 authority.or(server);
1178 authority.or(reg_name);
1179 }
1180
1181
1182 /***
1183 * BitSet for scheme.
1184 * <p><blockquote><pre>
1185 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1186 * </pre></blockquote><p>
1187 */
1188 protected static final BitSet scheme = new BitSet(256);
1189
1190 static {
1191 scheme.or(alpha);
1192 scheme.or(digit);
1193 scheme.set('+');
1194 scheme.set('-');
1195 scheme.set('.');
1196 }
1197
1198
1199 /***
1200 * BitSet for rel_segment.
1201 * <p><blockquote><pre>
1202 * rel_segment = 1*( unreserved | escaped |
1203 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
1204 * </pre></blockquote><p>
1205 */
1206 protected static final BitSet rel_segment = new BitSet(256);
1207
1208 static {
1209 rel_segment.or(unreserved);
1210 rel_segment.or(escaped);
1211 rel_segment.set(';');
1212 rel_segment.set('@');
1213 rel_segment.set('&');
1214 rel_segment.set('=');
1215 rel_segment.set('+');
1216 rel_segment.set('$');
1217 rel_segment.set(',');
1218 }
1219
1220
1221 /***
1222 * BitSet for rel_path.
1223 * <p><blockquote><pre>
1224 * rel_path = rel_segment [ abs_path ]
1225 * </pre></blockquote><p>
1226 */
1227 protected static final BitSet rel_path = new BitSet(256);
1228
1229 static {
1230 rel_path.or(rel_segment);
1231 rel_path.or(abs_path);
1232 }
1233
1234
1235 /***
1236 * BitSet for net_path.
1237 * <p><blockquote><pre>
1238 * net_path = "//" authority [ abs_path ]
1239 * </pre></blockquote><p>
1240 */
1241 protected static final BitSet net_path = new BitSet(256);
1242
1243 static {
1244 net_path.set('/');
1245 net_path.or(authority);
1246 net_path.or(abs_path);
1247 }
1248
1249
1250 /***
1251 * BitSet for hier_part.
1252 * <p><blockquote><pre>
1253 * hier_part = ( net_path | abs_path ) [ "?" query ]
1254 * </pre></blockquote><p>
1255 */
1256 protected static final BitSet hier_part = new BitSet(256);
1257
1258 static {
1259 hier_part.or(net_path);
1260 hier_part.or(abs_path);
1261
1262 hier_part.or(query);
1263 }
1264
1265
1266 /***
1267 * BitSet for relativeURI.
1268 * <p><blockquote><pre>
1269 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1270 * </pre></blockquote><p>
1271 */
1272 protected static final BitSet relativeURI = new BitSet(256);
1273
1274 static {
1275 relativeURI.or(net_path);
1276 relativeURI.or(abs_path);
1277 relativeURI.or(rel_path);
1278
1279 relativeURI.or(query);
1280 }
1281
1282
1283 /***
1284 * BitSet for absoluteURI.
1285 * <p><blockquote><pre>
1286 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1287 * </pre></blockquote><p>
1288 */
1289 protected static final BitSet absoluteURI = new BitSet(256);
1290
1291 static {
1292 absoluteURI.or(scheme);
1293 absoluteURI.set(':');
1294 absoluteURI.or(hier_part);
1295 absoluteURI.or(opaque_part);
1296 }
1297
1298
1299 /***
1300 * BitSet for URI-reference.
1301 * <p><blockquote><pre>
1302 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1303 * </pre></blockquote><p>
1304 */
1305 protected static final BitSet URI_reference = new BitSet(256);
1306
1307 static {
1308 URI_reference.or(absoluteURI);
1309 URI_reference.or(relativeURI);
1310 URI_reference.set('#');
1311 URI_reference.or(fragment);
1312 }
1313
1314
1315
1316
1317 /***
1318 * BitSet for control.
1319 */
1320 public static final BitSet control = new BitSet(256);
1321
1322 static {
1323 for (int i = 0; i <= 0x1F; i++) {
1324 control.set(i);
1325 }
1326 control.set(0x7F);
1327 }
1328
1329 /***
1330 * BitSet for space.
1331 */
1332 public static final BitSet space = new BitSet(256);
1333
1334 static {
1335 space.set(0x20);
1336 }
1337
1338
1339 /***
1340 * BitSet for delims.
1341 */
1342 public static final BitSet delims = new BitSet(256);
1343
1344 static {
1345 delims.set('<');
1346 delims.set('>');
1347 delims.set('#');
1348 delims.set('%');
1349 delims.set('"');
1350 }
1351
1352
1353 /***
1354 * BitSet for unwise.
1355 */
1356 public static final BitSet unwise = new BitSet(256);
1357
1358 static {
1359 unwise.set('{');
1360 unwise.set('}');
1361 unwise.set('|');
1362 unwise.set('//');
1363 unwise.set('^');
1364 unwise.set('[');
1365 unwise.set(']');
1366 unwise.set('`');
1367 }
1368
1369
1370 /***
1371 * Disallowed rel_path before escaping.
1372 */
1373 public static final BitSet disallowed_rel_path = new BitSet(256);
1374
1375 static {
1376 disallowed_rel_path.or(uric);
1377 disallowed_rel_path.andNot(rel_path);
1378 }
1379
1380
1381 /***
1382 * Disallowed opaque_part before escaping.
1383 */
1384 public static final BitSet disallowed_opaque_part = new BitSet(256);
1385
1386 static {
1387 disallowed_opaque_part.or(uric);
1388 disallowed_opaque_part.andNot(opaque_part);
1389 }
1390
1391
1392
1393 /***
1394 * Those characters that are allowed for the authority component.
1395 */
1396 public static final BitSet allowed_authority = new BitSet(256);
1397
1398 static {
1399 allowed_authority.or(authority);
1400 allowed_authority.clear('%');
1401 }
1402
1403
1404 /***
1405 * Those characters that are allowed for the opaque_part.
1406 */
1407 public static final BitSet allowed_opaque_part = new BitSet(256);
1408
1409 static {
1410 allowed_opaque_part.or(opaque_part);
1411 allowed_opaque_part.clear('%');
1412 }
1413
1414
1415 /***
1416 * Those characters that are allowed for the reg_name.
1417 */
1418 public static final BitSet allowed_reg_name = new BitSet(256);
1419
1420 static {
1421 allowed_reg_name.or(reg_name);
1422
1423 allowed_reg_name.clear('%');
1424 }
1425
1426
1427 /***
1428 * Those characters that are allowed for the userinfo component.
1429 */
1430 public static final BitSet allowed_userinfo = new BitSet(256);
1431
1432 static {
1433 allowed_userinfo.or(userinfo);
1434
1435 allowed_userinfo.clear('%');
1436 }
1437
1438
1439 /***
1440 * Those characters that are allowed for within the userinfo component.
1441 */
1442 public static final BitSet allowed_within_userinfo = new BitSet(256);
1443
1444 static {
1445 allowed_within_userinfo.or(within_userinfo);
1446 allowed_within_userinfo.clear('%');
1447 }
1448
1449
1450 /***
1451 * Those characters that are allowed for the IPv6reference component.
1452 * The characters '[', ']' in IPv6reference should be excluded.
1453 */
1454 public static final BitSet allowed_IPv6reference = new BitSet(256);
1455
1456 static {
1457 allowed_IPv6reference.or(IPv6reference);
1458
1459 allowed_IPv6reference.clear('[');
1460 allowed_IPv6reference.clear(']');
1461 }
1462
1463
1464 /***
1465 * Those characters that are allowed for the host component.
1466 * The characters '[', ']' in IPv6reference should be excluded.
1467 */
1468 public static final BitSet allowed_host = new BitSet(256);
1469
1470 static {
1471 allowed_host.or(hostname);
1472 allowed_host.or(allowed_IPv6reference);
1473 }
1474
1475
1476 /***
1477 * Those characters that are allowed for the authority component.
1478 */
1479 public static final BitSet allowed_within_authority = new BitSet(256);
1480
1481 static {
1482 allowed_within_authority.or(server);
1483 allowed_within_authority.or(reg_name);
1484 allowed_within_authority.clear(';');
1485 allowed_within_authority.clear(':');
1486 allowed_within_authority.clear('@');
1487 allowed_within_authority.clear('?');
1488 allowed_within_authority.clear('/');
1489 }
1490
1491
1492 /***
1493 * Those characters that are allowed for the abs_path.
1494 */
1495 public static final BitSet allowed_abs_path = new BitSet(256);
1496
1497 static {
1498 allowed_abs_path.or(abs_path);
1499
1500 allowed_abs_path.andNot(percent);
1501 }
1502
1503
1504 /***
1505 * Those characters that are allowed for the rel_path.
1506 */
1507 public static final BitSet allowed_rel_path = new BitSet(256);
1508
1509 static {
1510 allowed_rel_path.or(rel_path);
1511 allowed_rel_path.clear('%');
1512 }
1513
1514
1515 /***
1516 * Those characters that are allowed within the path.
1517 */
1518 public static final BitSet allowed_within_path = new BitSet(256);
1519
1520 static {
1521 allowed_within_path.or(abs_path);
1522 allowed_within_path.clear('/');
1523 allowed_within_path.clear(';');
1524 allowed_within_path.clear('=');
1525 allowed_within_path.clear('?');
1526 }
1527
1528
1529 /***
1530 * Those characters that are allowed for the query component.
1531 */
1532 public static final BitSet allowed_query = new BitSet(256);
1533
1534 static {
1535 allowed_query.or(uric);
1536 allowed_query.clear('%');
1537 }
1538
1539
1540 /***
1541 * Those characters that are allowed within the query component.
1542 */
1543 public static final BitSet allowed_within_query = new BitSet(256);
1544
1545 static {
1546 allowed_within_query.or(allowed_query);
1547 allowed_within_query.andNot(reserved);
1548 }
1549
1550
1551 /***
1552 * Those characters that are allowed for the fragment component.
1553 */
1554 public static final BitSet allowed_fragment = new BitSet(256);
1555
1556 static {
1557 allowed_fragment.or(uric);
1558 allowed_fragment.clear('%');
1559 }
1560
1561
1562
1563
1564
1565
1566
1567 protected boolean _is_hier_part;
1568 protected boolean _is_opaque_part;
1569
1570
1571 protected boolean _is_net_path;
1572 protected boolean _is_abs_path;
1573 protected boolean _is_rel_path;
1574
1575
1576 protected boolean _is_reg_name;
1577 protected boolean _is_server;
1578
1579
1580 protected boolean _is_hostname;
1581 protected boolean _is_IPv4address;
1582 protected boolean _is_IPv6reference;
1583
1584
1585
1586 /***
1587 * Encodes URI string.
1588 *
1589 * This is a two mapping, one from original characters to octets, and
1590 * subsequently a second from octets to URI characters:
1591 * <p><blockquote><pre>
1592 * original character sequence->octet sequence->URI character sequence
1593 * </pre></blockquote><p>
1594 *
1595 * An escaped octet is encoded as a character triplet, consisting of the
1596 * percent character "%" followed by the two hexadecimal digits
1597 * representing the octet code. For example, "%20" is the escaped
1598 * encoding for the US-ASCII space character.
1599 * <p>
1600 * Conversion from the local filesystem character set to UTF-8 will
1601 * normally involve a two step process. First convert the local character
1602 * set to the UCS; then convert the UCS to UTF-8.
1603 * The first step in the process can be performed by maintaining a mapping
1604 * table that includes the local character set code and the corresponding
1605 * UCS code.
1606 * The next step is to convert the UCS character code to the UTF-8 encoding.
1607 * <p>
1608 * Mapping between vendor codepages can be done in a very similar manner
1609 * as described above.
1610 * <p>
1611 * The only time escape encodings can allowedly be made is when a URI is
1612 * being created from its component parts. The escape and validate methods
1613 * are internally performed within this method.
1614 *
1615 * @param original the original character sequence
1616 * @param allowed those characters that are allowed within a component
1617 * @param charset the protocol charset
1618 * @return URI character sequence
1619 * @throws URIException null component or unsupported character encoding
1620 */
1621 protected static char[] encode(String original, BitSet allowed,
1622 String charset) throws URIException {
1623
1624
1625 if (original == null) {
1626 throw new URIException(URIException.PARSING, "null");
1627 }
1628
1629 if (allowed == null) {
1630 throw new URIException(URIException.PARSING,
1631 "null allowed characters");
1632 }
1633 byte[] octets;
1634 try {
1635 octets = original.getBytes(charset);
1636 } catch (UnsupportedEncodingException error) {
1637 throw new URIException(URIException.UNSUPPORTED_ENCODING, charset);
1638 }
1639 StringBuffer buf = new StringBuffer(octets.length);
1640 for (int i = 0; i < octets.length; i++) {
1641 char c = (char) octets[i];
1642 if (allowed.get(c)) {
1643 buf.append(c);
1644 } else {
1645 buf.append('%');
1646 byte b = octets[i];
1647 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1648 buf.append(Character.toUpperCase(hexadecimal));
1649 hexadecimal = Character.forDigit(b & 0xF, 16);
1650 buf.append(Character.toUpperCase(hexadecimal));
1651 }
1652 }
1653
1654 return buf.toString().toCharArray();
1655 }
1656
1657
1658 /***
1659 * Decodes URI encoded string.
1660 *
1661 * This is a two mapping, one from URI characters to octets, and
1662 * subsequently a second from octets to original characters:
1663 * <p><blockquote><pre>
1664 * URI character sequence->octet sequence->original character sequence
1665 * </pre></blockquote><p>
1666 *
1667 * A URI must be separated into its components before the escaped
1668 * characters within those components can be allowedly decoded.
1669 * <p>
1670 * Notice that there is a chance that URI characters that are non UTF-8
1671 * may be parsed as valid UTF-8. A recent non-scientific analysis found
1672 * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1673 * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1674 * false reading.
1675 * <p>
1676 * The percent "%" character always has the reserved purpose of being
1677 * the escape indicator, it must be escaped as "%25" in order to be used
1678 * as data within a URI.
1679 * <p>
1680 * The unescape method is internally performed within this method.
1681 *
1682 * @param component the URI character sequence
1683 * @param charset the protocol charset
1684 * @return original character sequence
1685 * @throws URIException incomplete trailing escape pattern or unsupported
1686 * character encoding
1687 */
1688 protected static String decode(char[] component, String charset)
1689 throws URIException {
1690
1691
1692 if (component == null) {
1693 return null;
1694 }
1695
1696 byte[] octets;
1697 try {
1698 octets = new String(component).getBytes(charset);
1699 } catch (UnsupportedEncodingException error) {
1700 throw new URIException(URIException.UNSUPPORTED_ENCODING,
1701 "not supported " + charset + " encoding");
1702 }
1703 int length = octets.length;
1704 int oi = 0;
1705 for (int ii = 0; ii < length; oi++) {
1706 byte aByte = (byte) octets[ii++];
1707 if (aByte == '%' && ii + 2 <= length) {
1708 byte high = (byte) Character.digit((char) octets[ii++], 16);
1709 byte low = (byte) Character.digit((char) octets[ii++], 16);
1710 if (high == -1 || low == -1) {
1711 throw new URIException(URIException.ESCAPING,
1712 "incomplete trailing escape pattern");
1713
1714 }
1715 aByte = (byte) ((high << 4) + low);
1716 }
1717 octets[oi] = (byte) aByte;
1718 }
1719
1720 String result;
1721 try {
1722 result = new String(octets, 0, oi, charset);
1723 } catch (UnsupportedEncodingException error) {
1724 throw new URIException(URIException.UNSUPPORTED_ENCODING,
1725 "not supported " + charset + " encoding");
1726 }
1727
1728 return result;
1729 }
1730
1731
1732 /***
1733 * Pre-validate the unescaped URI string within a specific component.
1734 *
1735 * @param component the component string within the component
1736 * @param disallowed those characters disallowed within the component
1737 * @return if true, it doesn't have the disallowed characters
1738 * if false, the component is undefined or an incorrect one
1739 */
1740 protected boolean prevalidate(String component, BitSet disallowed) {
1741
1742 if (component == null) {
1743 return false;
1744 }
1745 char[] target = component.toCharArray();
1746 for (int i = 0; i < target.length; i++) {
1747 if (disallowed.get(target[i])) {
1748 return false;
1749 }
1750 }
1751 return true;
1752 }
1753
1754
1755 /***
1756 * Validate the URI characters within a specific component.
1757 * The component must be performed after escape encoding. Or it doesn't
1758 * include escaped characters.
1759 *
1760 * @param component the characters sequence within the component
1761 * @param generous those characters that are allowed within a component
1762 * @return if true, it's the correct URI character sequence
1763 */
1764 protected boolean validate(char[] component, BitSet generous) {
1765
1766 return validate(component, 0, -1, generous);
1767 }
1768
1769
1770 /***
1771 * Validate the URI characters within a specific component.
1772 * The component must be performed after escape encoding. Or it doesn't
1773 * include escaped characters.
1774 * <p>
1775 * It's not that much strict, generous. The strict validation might be
1776 * performed before being called this method.
1777 *
1778 * @param component the characters sequence within the component
1779 * @param soffset the starting offset of the given component
1780 * @param eoffset the ending offset of the given component
1781 * if -1, it means the length of the component
1782 * @param generous those characters that are allowed within a component
1783 * @return if true, it's the correct URI character sequence
1784 */
1785 protected boolean validate(char[] component, int soffset, int eoffset,
1786 BitSet generous) {
1787
1788 if (eoffset == -1) {
1789 eoffset = component.length - 1;
1790 }
1791 for (int i = soffset; i <= eoffset; i++) {
1792 if (!generous.get(component[i])) {
1793 return false;
1794 }
1795 }
1796 return true;
1797 }
1798
1799
1800 /***
1801 * In order to avoid any possilbity of conflict with non-ASCII characters,
1802 * Parse a URI reference as a <code>String</code> with the character
1803 * encoding of the local system or the document.
1804 * <p>
1805 * The following line is the regular expression for breaking-down a URI
1806 * reference into its components.
1807 * <p><blockquote><pre>
1808 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1809 * 12 3 4 5 6 7 8 9
1810 * </pre></blockquote><p>
1811 * For example, matching the above expression to
1812 * http://jakarta.apache.org/ietf/uri/#Related
1813 * results in the following subexpression matches:
1814 * <p><blockquote><pre>
1815 * $1 = http:
1816 * scheme = $2 = http
1817 * $3 = //jakarta.apache.org
1818 * authority = $4 = jakarta.apache.org
1819 * path = $5 = /ietf/uri/
1820 * $6 = <undefined>
1821 * query = $7 = <undefined>
1822 * $8 = #Related
1823 * fragment = $9 = Related
1824 * </pre></blockquote><p>
1825 *
1826 * @param original the original character sequence
1827 * @param escaped <code>true</code> if <code>original</code> is escaped
1828 * @throws URIException If an error occurs.
1829 */
1830 protected void parseUriReference(String original, boolean escaped)
1831 throws URIException {
1832
1833
1834 if (original == null) {
1835 throw new URIException("URI-Reference required");
1836 }
1837
1838
1839
1840
1841 String tmp = original.trim();
1842
1843
1844
1845
1846
1847 int length = tmp.length();
1848
1849
1850
1851
1852 if (length > 0) {
1853 char[] firstDelimiter = { tmp.charAt(0) };
1854 if (validate(firstDelimiter, delims)) {
1855 if (length >= 2) {
1856 char[] lastDelimiter = { tmp.charAt(length - 1) };
1857 if (validate(lastDelimiter, delims)) {
1858 tmp = tmp.substring(1, length - 1);
1859 length = length - 2;
1860 }
1861 }
1862 }
1863 }
1864
1865
1866
1867
1868 int from = 0;
1869
1870
1871
1872
1873 boolean isStartedFromPath = false;
1874 int atColon = tmp.indexOf(':');
1875 int atSlash = tmp.indexOf('/');
1876 if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1877 isStartedFromPath = true;
1878 }
1879
1880
1881
1882
1883
1884
1885
1886 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1887 if (at == -1) {
1888 at = 0;
1889 }
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899 if (at < length && tmp.charAt(at) == ':') {
1900 char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1901 if (validate(target, scheme)) {
1902 _scheme = target;
1903 } else {
1904 throw new URIException("incorrect scheme");
1905 }
1906 from = ++at;
1907 }
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1919 if (0 <= at && at < length && tmp.charAt(at) == '/') {
1920
1921 _is_hier_part = true;
1922 if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1923
1924 int next = indexFirstOf(tmp, "/?#", at + 2);
1925 if (next == -1) {
1926 next = (tmp.substring(at + 2).length() == 0) ? at + 2
1927 : tmp.length();
1928 }
1929 parseAuthority(tmp.substring(at + 2, next), escaped);
1930 from = at = next;
1931
1932 _is_net_path = true;
1933 }
1934 if (from == at) {
1935
1936 _is_abs_path = true;
1937 }
1938 }
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948 if (from < length) {
1949
1950 int next = indexFirstOf(tmp, "?#", from);
1951 if (next == -1) {
1952 next = tmp.length();
1953 }
1954 if (!_is_abs_path) {
1955 if (!escaped
1956 && prevalidate(tmp.substring(from, next), disallowed_rel_path)
1957 || escaped
1958 && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1959
1960 _is_rel_path = true;
1961 } else if (!escaped
1962 && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
1963 || escaped
1964 && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
1965
1966 _is_opaque_part = true;
1967 } else {
1968
1969 _path = null;
1970 }
1971 }
1972 if (escaped) {
1973 setRawPath(tmp.substring(from, next).toCharArray());
1974 } else {
1975 setPath(tmp.substring(from, next));
1976 }
1977 at = next;
1978 }
1979
1980
1981 String charset = getProtocolCharset();
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991 if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
1992 int next = tmp.indexOf('#', at + 1);
1993 if (next == -1) {
1994 next = tmp.length();
1995 }
1996 _query = (escaped) ? tmp.substring(at + 1, next).toCharArray()
1997 : encode(tmp.substring(at + 1, next), allowed_query, charset);
1998 at = next;
1999 }
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009 if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2010 if (at + 1 == length) {
2011 _fragment = "".toCharArray();
2012 } else {
2013 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
2014 : encode(tmp.substring(at + 1), allowed_fragment, charset);
2015 }
2016 }
2017
2018
2019 setURI();
2020 }
2021
2022
2023 /***
2024 * Get the earlier index that to be searched for the first occurrance in
2025 * one of any of the given string.
2026 *
2027 * @param s the string to be indexed
2028 * @param delims the delimiters used to index
2029 * @return the earlier index if there are delimiters
2030 */
2031 protected int indexFirstOf(String s, String delims) {
2032 return indexFirstOf(s, delims, -1);
2033 }
2034
2035
2036 /***
2037 * Get the earlier index that to be searched for the first occurrance in
2038 * one of any of the given string.
2039 *
2040 * @param s the string to be indexed
2041 * @param delims the delimiters used to index
2042 * @param offset the from index
2043 * @return the earlier index if there are delimiters
2044 */
2045 protected int indexFirstOf(String s, String delims, int offset) {
2046 if (s == null || s.length() == 0) {
2047 return -1;
2048 }
2049 if (delims == null || delims.length() == 0) {
2050 return -1;
2051 }
2052
2053 if (offset < 0) {
2054 offset = 0;
2055 } else if (offset > s.length()) {
2056 return -1;
2057 }
2058
2059 int min = s.length();
2060 char[] delim = delims.toCharArray();
2061 for (int i = 0; i < delim.length; i++) {
2062 int at = s.indexOf(delim[i], offset);
2063 if (at >= 0 && at < min) {
2064 min = at;
2065 }
2066 }
2067 return (min == s.length()) ? -1 : min;
2068 }
2069
2070
2071 /***
2072 * Get the earlier index that to be searched for the first occurrance in
2073 * one of any of the given array.
2074 *
2075 * @param s the character array to be indexed
2076 * @param delim the delimiter used to index
2077 * @return the ealier index if there are a delimiter
2078 */
2079 protected int indexFirstOf(char[] s, char delim) {
2080 return indexFirstOf(s, delim, 0);
2081 }
2082
2083
2084 /***
2085 * Get the earlier index that to be searched for the first occurrance in
2086 * one of any of the given array.
2087 *
2088 * @param s the character array to be indexed
2089 * @param delim the delimiter used to index
2090 * @param offset The offset.
2091 * @return the ealier index if there is a delimiter
2092 */
2093 protected int indexFirstOf(char[] s, char delim, int offset) {
2094 if (s == null || s.length == 0) {
2095 return -1;
2096 }
2097
2098 if (offset < 0) {
2099 offset = 0;
2100 } else if (offset > s.length) {
2101 return -1;
2102 }
2103 for (int i = offset; i < s.length; i++) {
2104 if (s[i] == delim) {
2105 return i;
2106 }
2107 }
2108 return -1;
2109 }
2110
2111
2112 /***
2113 * Parse the authority component.
2114 *
2115 * @param original the original character sequence of authority component
2116 * @param escaped <code>true</code> if <code>original</code> is escaped
2117 * @throws URIException If an error occurs.
2118 */
2119 protected void parseAuthority(String original, boolean escaped)
2120 throws URIException {
2121
2122
2123 _is_reg_name = _is_server =
2124 _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2125
2126
2127 String charset = getProtocolCharset();
2128
2129 boolean hasPort = true;
2130 int from = 0;
2131 int next = original.indexOf('@');
2132 if (next != -1) {
2133
2134 _userinfo = (escaped) ? original.substring(0, next).toCharArray()
2135 : encode(original.substring(0, next), allowed_userinfo,
2136 charset);
2137 from = next + 1;
2138 }
2139 next = original.indexOf('[', from);
2140 if (next >= from) {
2141 next = original.indexOf(']', from);
2142 if (next == -1) {
2143 throw new URIException(URIException.PARSING, "IPv6reference");
2144 } else {
2145 next++;
2146 }
2147
2148 _host = (escaped) ? original.substring(from, next).toCharArray()
2149 : encode(original.substring(from, next), allowed_IPv6reference,
2150 charset);
2151
2152 _is_IPv6reference = true;
2153 } else {
2154 next = original.indexOf(':', from);
2155 if (next == -1) {
2156 next = original.length();
2157 hasPort = false;
2158 }
2159
2160 _host = original.substring(from, next).toCharArray();
2161 if (validate(_host, IPv4address)) {
2162
2163 _is_IPv4address = true;
2164 } else if (validate(_host, hostname)) {
2165
2166 _is_hostname = true;
2167 } else {
2168
2169 _is_reg_name = true;
2170 }
2171 }
2172 if (_is_reg_name) {
2173
2174 _is_server = _is_hostname = _is_IPv4address =
2175 _is_IPv6reference = false;
2176
2177 _authority = (escaped) ? original.toString().toCharArray()
2178 : encode(original.toString(), allowed_reg_name, charset);
2179 } else {
2180 if (original.length() - 1 > next && hasPort
2181 && original.charAt(next) == ':') {
2182 from = next + 1;
2183 try {
2184 _port = Integer.parseInt(original.substring(from));
2185 } catch (NumberFormatException error) {
2186 throw new URIException(URIException.PARSING,
2187 "invalid port number");
2188 }
2189 }
2190
2191 StringBuffer buf = new StringBuffer();
2192 if (_userinfo != null) {
2193 buf.append(_userinfo);
2194 buf.append('@');
2195 }
2196 if (_host != null) {
2197 buf.append(_host);
2198 if (_port != -1) {
2199 buf.append(':');
2200 buf.append(_port);
2201 }
2202 }
2203 _authority = buf.toString().toCharArray();
2204
2205 _is_server = true;
2206 }
2207 }
2208
2209
2210 /***
2211 * Once it's parsed successfully, set this URI.
2212 *
2213 * @see #getRawURI
2214 */
2215 protected void setURI() {
2216
2217 StringBuffer buf = new StringBuffer();
2218
2219 if (_scheme != null) {
2220 buf.append(_scheme);
2221 buf.append(':');
2222 }
2223 if (_is_net_path) {
2224 buf.append("//");
2225 if (_authority != null) {
2226 if (_userinfo != null) {
2227 if (_host != null) {
2228 buf.append(_host);
2229 if (_port != -1) {
2230 buf.append(':');
2231 buf.append(_port);
2232 }
2233 }
2234 } else {
2235 buf.append(_authority);
2236 }
2237 }
2238 }
2239 if (_opaque != null && _is_opaque_part) {
2240 buf.append(_opaque);
2241 } else if (_path != null) {
2242
2243 if (_path.length != 0) {
2244 buf.append(_path);
2245 }
2246 }
2247 if (_query != null) {
2248 buf.append('?');
2249 buf.append(_query);
2250 }
2251
2252 _uri = buf.toString().toCharArray();
2253 hash = 0;
2254 }
2255
2256
2257
2258
2259 /***
2260 * Tell whether or not this URI is absolute.
2261 *
2262 * @return true iif this URI is absoluteURI
2263 */
2264 public boolean isAbsoluteURI() {
2265 return (_scheme != null);
2266 }
2267
2268
2269 /***
2270 * Tell whether or not this URI is relative.
2271 *
2272 * @return true iif this URI is relativeURI
2273 */
2274 public boolean isRelativeURI() {
2275 return (_scheme == null);
2276 }
2277
2278
2279 /***
2280 * Tell whether or not the absoluteURI of this URI is hier_part.
2281 *
2282 * @return true iif the absoluteURI is hier_part
2283 */
2284 public boolean isHierPart() {
2285 return _is_hier_part;
2286 }
2287
2288
2289 /***
2290 * Tell whether or not the absoluteURI of this URI is opaque_part.
2291 *
2292 * @return true iif the absoluteURI is opaque_part
2293 */
2294 public boolean isOpaquePart() {
2295 return _is_opaque_part;
2296 }
2297
2298
2299 /***
2300 * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2301 * It's the same function as the has_authority() method.
2302 *
2303 * @return true iif the relativeURI or heir_part is net_path
2304 * @see #hasAuthority
2305 */
2306 public boolean isNetPath() {
2307 return _is_net_path || (_authority != null);
2308 }
2309
2310
2311 /***
2312 * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2313 *
2314 * @return true iif the relativeURI or hier_part is abs_path
2315 */
2316 public boolean isAbsPath() {
2317 return _is_abs_path;
2318 }
2319
2320
2321 /***
2322 * Tell whether or not the relativeURI of this URI is rel_path.
2323 *
2324 * @return true iif the relativeURI is rel_path
2325 */
2326 public boolean isRelPath() {
2327 return _is_rel_path;
2328 }
2329
2330
2331 /***
2332 * Tell whether or not this URI has authority.
2333 * It's the same function as the is_net_path() method.
2334 *
2335 * @return true iif this URI has authority
2336 * @see #isNetPath
2337 */
2338 public boolean hasAuthority() {
2339 return (_authority != null) || _is_net_path;
2340 }
2341
2342 /***
2343 * Tell whether or not the authority component of this URI is reg_name.
2344 *
2345 * @return true iif the authority component is reg_name
2346 */
2347 public boolean isRegName() {
2348 return _is_reg_name;
2349 }
2350
2351
2352 /***
2353 * Tell whether or not the authority component of this URI is server.
2354 *
2355 * @return true iif the authority component is server
2356 */
2357 public boolean isServer() {
2358 return _is_server;
2359 }
2360
2361
2362 /***
2363 * Tell whether or not this URI has userinfo.
2364 *
2365 * @return true iif this URI has userinfo
2366 */
2367 public boolean hasUserinfo() {
2368 return (_userinfo != null);
2369 }
2370
2371
2372 /***
2373 * Tell whether or not the host part of this URI is hostname.
2374 *
2375 * @return true iif the host part is hostname
2376 */
2377 public boolean isHostname() {
2378 return _is_hostname;
2379 }
2380
2381
2382 /***
2383 * Tell whether or not the host part of this URI is IPv4address.
2384 *
2385 * @return true iif the host part is IPv4address
2386 */
2387 public boolean isIPv4address() {
2388 return _is_IPv4address;
2389 }
2390
2391
2392 /***
2393 * Tell whether or not the host part of this URI is IPv6reference.
2394 *
2395 * @return true iif the host part is IPv6reference
2396 */
2397 public boolean isIPv6reference() {
2398 return _is_IPv6reference;
2399 }
2400
2401
2402 /***
2403 * Tell whether or not this URI has query.
2404 *
2405 * @return true iif this URI has query
2406 */
2407 public boolean hasQuery() {
2408 return (_query != null);
2409 }
2410
2411
2412 /***
2413 * Tell whether or not this URI has fragment.
2414 *
2415 * @return true iif this URI has fragment
2416 */
2417 public boolean hasFragment() {
2418 return (_fragment != null);
2419 }
2420
2421
2422
2423
2424
2425 /***
2426 * Set the default charset of the protocol.
2427 * <p>
2428 * The character set used to store files SHALL remain a local decision and
2429 * MAY depend on the capability of local operating systems. Prior to the
2430 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2431 * and UTF-8 encoded. This approach, while allowing international exchange
2432 * of URIs, will still allow backward compatibility with older systems
2433 * because the code set positions for ASCII characters are identical to the
2434 * one byte sequence in UTF-8.
2435 * <p>
2436 * An individual URI scheme may require a single charset, define a default
2437 * charset, or provide a way to indicate the charset used.
2438 *
2439 * <p>
2440 * Always all the time, the setter method is always succeeded and throws
2441 * <code>DefaultCharsetChanged</code> exception.
2442 *
2443 * So API programmer must follow the following way:
2444 * <code><pre>
2445 * import org.apache.util.URI$DefaultCharsetChanged;
2446 * .
2447 * .
2448 * .
2449 * try {
2450 * URI.setDefaultProtocolCharset("UTF-8");
2451 * } catch (DefaultCharsetChanged cc) {
2452 * // CASE 1: the exception could be ignored, when it is set by user
2453 * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2454 * // CASE 2: let user know the default protocol charset changed
2455 * } else {
2456 * // CASE 2: let user know the default document charset changed
2457 * }
2458 * }
2459 * </pre></code>
2460 *
2461 * The API programmer is responsible to set the correct charset.
2462 * And each application should remember its own charset to support.
2463 *
2464 * @param charset the default charset for each protocol
2465 * @throws DefaultCharsetChanged default charset changed
2466 */
2467 public static void setDefaultProtocolCharset(String charset)
2468 throws DefaultCharsetChanged {
2469
2470 defaultProtocolCharset = charset;
2471 throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2472 "the default protocol charset changed");
2473 }
2474
2475
2476 /***
2477 * Get the default charset of the protocol.
2478 * <p>
2479 * An individual URI scheme may require a single charset, define a default
2480 * charset, or provide a way to indicate the charset used.
2481 * <p>
2482 * To work globally either requires support of a number of character sets
2483 * and to be able to convert between them, or the use of a single preferred
2484 * character set.
2485 * For support of global compatibility it is STRONGLY RECOMMENDED that
2486 * clients and servers use UTF-8 encoding when exchanging URIs.
2487 *
2488 * @return the default charset string
2489 */
2490 public static String getDefaultProtocolCharset() {
2491 return defaultProtocolCharset;
2492 }
2493
2494
2495 /***
2496 * Get the protocol charset used by this current URI instance.
2497 * It was set by the constructor for this instance. If it was not set by
2498 * contructor, it will return the default protocol charset.
2499 *
2500 * @return the protocol charset string
2501 * @see #getDefaultProtocolCharset
2502 */
2503 public String getProtocolCharset() {
2504 return (protocolCharset != null)
2505 ? protocolCharset
2506 : defaultProtocolCharset;
2507 }
2508
2509
2510 /***
2511 * Set the default charset of the document.
2512 * <p>
2513 * Notice that it will be possible to contain mixed characters (e.g.
2514 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2515 * display of these character sets, the protocol charset could be simply
2516 * used again. Because it's not yet implemented that the insertion of BIDI
2517 * control characters at different points during composition is extracted.
2518 * <p>
2519 *
2520 * Always all the time, the setter method is always succeeded and throws
2521 * <code>DefaultCharsetChanged</code> exception.
2522 *
2523 * So API programmer must follow the following way:
2524 * <code><pre>
2525 * import org.apache.util.URI$DefaultCharsetChanged;
2526 * .
2527 * .
2528 * .
2529 * try {
2530 * URI.setDefaultDocumentCharset("EUC-KR");
2531 * } catch (DefaultCharsetChanged cc) {
2532 * // CASE 1: the exception could be ignored, when it is set by user
2533 * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2534 * // CASE 2: let user know the default document charset changed
2535 * } else {
2536 * // CASE 2: let user know the default protocol charset changed
2537 * }
2538 * }
2539 * </pre></code>
2540 *
2541 * The API programmer is responsible to set the correct charset.
2542 * And each application should remember its own charset to support.
2543 *
2544 * @param charset the default charset for the document
2545 * @throws DefaultCharsetChanged default charset changed
2546 */
2547 public static void setDefaultDocumentCharset(String charset)
2548 throws DefaultCharsetChanged {
2549
2550 defaultDocumentCharset = charset;
2551 throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2552 "the default document charset changed");
2553 }
2554
2555
2556 /***
2557 * Get the recommended default charset of the document.
2558 *
2559 * @return the default charset string
2560 */
2561 public static String getDefaultDocumentCharset() {
2562 return defaultDocumentCharset;
2563 }
2564
2565
2566 /***
2567 * Get the default charset of the document by locale.
2568 *
2569 * @return the default charset string by locale
2570 */
2571 public static String getDefaultDocumentCharsetByLocale() {
2572 return defaultDocumentCharsetByLocale;
2573 }
2574
2575
2576 /***
2577 * Get the default charset of the document by platform.
2578 *
2579 * @return the default charset string by platform
2580 */
2581 public static String getDefaultDocumentCharsetByPlatform() {
2582 return defaultDocumentCharsetByPlatform;
2583 }
2584
2585
2586
2587 /***
2588 * Get the scheme.
2589 *
2590 * @return the scheme
2591 */
2592 public char[] getRawScheme() {
2593 return _scheme;
2594 }
2595
2596
2597 /***
2598 * Get the scheme.
2599 *
2600 * @return the scheme
2601 * null if undefined scheme
2602 */
2603 public String getScheme() {
2604 return (_scheme == null) ? null : new String(_scheme);
2605 }
2606
2607
2608
2609 /***
2610 * Set the authority. It can be one type of server, hostport, hostname,
2611 * IPv4address, IPv6reference and reg_name.
2612 * <p><blockquote><pre>
2613 * authority = server | reg_name
2614 * </pre></blockquote><p>
2615 *
2616 * @param escapedAuthority the raw escaped authority
2617 * @throws URIException If {@link
2618 * #parseAuthority(java.lang.String,boolean)} fails
2619 * @throws NullPointerException null authority
2620 */
2621 public void setRawAuthority(char[] escapedAuthority)
2622 throws URIException, NullPointerException {
2623
2624 parseAuthority(new String(escapedAuthority), true);
2625 setURI();
2626 }
2627
2628
2629 /***
2630 * Set the authority. It can be one type of server, hostport, hostname,
2631 * IPv4address, IPv6reference and reg_name.
2632 * Note that there is no setAuthority method by the escape encoding reason.
2633 *
2634 * @param escapedAuthority the escaped authority string
2635 * @throws URIException If {@link
2636 * #parseAuthority(java.lang.String,boolean)} fails
2637 */
2638 public void setEscapedAuthority(String escapedAuthority)
2639 throws URIException {
2640
2641 parseAuthority(escapedAuthority, true);
2642 setURI();
2643 }
2644
2645
2646 /***
2647 * Get the raw-escaped authority.
2648 *
2649 * @return the raw-escaped authority
2650 */
2651 public char[] getRawAuthority() {
2652 return _authority;
2653 }
2654
2655
2656 /***
2657 * Get the escaped authority.
2658 *
2659 * @return the escaped authority
2660 */
2661 public String getEscapedAuthority() {
2662 return (_authority == null) ? null : new String(_authority);
2663 }
2664
2665
2666 /***
2667 * Get the authority.
2668 *
2669 * @return the authority
2670 * @throws URIException If {@link #decode} fails
2671 */
2672 public String getAuthority() throws URIException {
2673 return (_authority == null) ? null : decode(_authority,
2674 getProtocolCharset());
2675 }
2676
2677
2678
2679 /***
2680 * Get the raw-escaped userinfo.
2681 *
2682 * @return the raw-escaped userinfo
2683 * @see #getAuthority
2684 */
2685 public char[] getRawUserinfo() {
2686 return _userinfo;
2687 }
2688
2689
2690 /***
2691 * Get the escaped userinfo.
2692 *
2693 * @return the escaped userinfo
2694 * @see #getAuthority
2695 */
2696 public String getEscapedUserinfo() {
2697 return (_userinfo == null) ? null : new String(_userinfo);
2698 }
2699
2700
2701 /***
2702 * Get the userinfo.
2703 *
2704 * @return the userinfo
2705 * @throws URIException If {@link #decode} fails
2706 * @see #getAuthority
2707 */
2708 public String getUserinfo() throws URIException {
2709 return (_userinfo == null) ? null : decode(_userinfo,
2710 getProtocolCharset());
2711 }
2712
2713
2714
2715 /***
2716 * Get the host.
2717 * <p><blockquote><pre>
2718 * host = hostname | IPv4address | IPv6reference
2719 * </pre></blockquote><p>
2720 *
2721 * @return the host
2722 * @see #getAuthority
2723 */
2724 public char[] getRawHost() {
2725 return _host;
2726 }
2727
2728
2729 /***
2730 * Get the host.
2731 * <p><blockquote><pre>
2732 * host = hostname | IPv4address | IPv6reference
2733 * </pre></blockquote><p>
2734 *
2735 * @return the host
2736 * @throws URIException If {@link #decode} fails
2737 * @see #getAuthority
2738 */
2739 public String getHost() throws URIException {
2740 return decode(_host, getProtocolCharset());
2741 }
2742
2743
2744
2745 /***
2746 * Get the port. In order to get the specfic default port, the specific
2747 * protocol-supported class extended from the URI class should be used.
2748 * It has the server-based naming authority.
2749 *
2750 * @return the port
2751 * if -1, it has the default port for the scheme or the server-based
2752 * naming authority is not supported in the specific URI.
2753 */
2754 public int getPort() {
2755 return _port;
2756 }
2757
2758
2759
2760 /***
2761 * Set the raw-escaped path.
2762 *
2763 * @param escapedPath the path character sequence
2764 * @throws URIException encoding error or not proper for initial instance
2765 * @see #encode
2766 */
2767 public void setRawPath(char[] escapedPath) throws URIException {
2768 if (escapedPath == null || escapedPath.length == 0) {
2769 _path = _opaque = escapedPath;
2770 setURI();
2771 return;
2772 }
2773
2774 escapedPath = removeFragmentIdentifier(escapedPath);
2775 if (_is_net_path || _is_abs_path) {
2776 if (escapedPath[0] != '/') {
2777 throw new URIException(URIException.PARSING,
2778 "not absolute path");
2779 }
2780 if (!validate(escapedPath, abs_path)) {
2781 throw new URIException(URIException.ESCAPING,
2782 "escaped absolute path not valid");
2783 }
2784 _path = escapedPath;
2785 } else if (_is_rel_path) {
2786 int at = indexFirstOf(escapedPath, '/');
2787 if (at == 0) {
2788 throw new URIException(URIException.PARSING, "incorrect path");
2789 }
2790 if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
2791 && !validate(escapedPath, at, -1, abs_path)
2792 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2793
2794 throw new URIException(URIException.ESCAPING,
2795 "escaped relative path not valid");
2796 }
2797 _path = escapedPath;
2798 } else if (_is_opaque_part) {
2799 if (!uric_no_slash.get(escapedPath[0])
2800 && !validate(escapedPath, 1, -1, uric)) {
2801 throw new URIException(URIException.ESCAPING,
2802 "escaped opaque part not valid");
2803 }
2804 _opaque = escapedPath;
2805 } else {
2806 throw new URIException(URIException.PARSING, "incorrect path");
2807 }
2808 setURI();
2809 }
2810
2811
2812 /***
2813 * Set the escaped path.
2814 *
2815 * @param escapedPath the escaped path string
2816 * @throws URIException encoding error or not proper for initial instance
2817 * @see #encode
2818 */
2819 public void setEscapedPath(String escapedPath) throws URIException {
2820 if (escapedPath == null) {
2821 _path = _opaque = null;
2822 setURI();
2823 return;
2824 }
2825 setRawPath(escapedPath.toCharArray());
2826 }
2827
2828
2829 /***
2830 * Set the path.
2831 *
2832 * @param path the path string
2833 * @throws URIException set incorrectly or fragment only
2834 * @see #encode
2835 */
2836 public void setPath(String path) throws URIException {
2837
2838 if (path == null || path.length() == 0) {
2839 _path = _opaque = (path == null) ? null : path.toCharArray();
2840 setURI();
2841 return;
2842 }
2843
2844 String charset = getProtocolCharset();
2845
2846 if (_is_net_path || _is_abs_path) {
2847 _path = encode(path, allowed_abs_path, charset);
2848 } else if (_is_rel_path) {
2849 StringBuffer buff = new StringBuffer(path.length());
2850 int at = path.indexOf('/');
2851 if (at == 0) {
2852 throw new URIException(URIException.PARSING,
2853 "incorrect relative path");
2854 }
2855 if (at > 0) {
2856 buff.append(encode(path.substring(0, at), allowed_rel_path,
2857 charset));
2858 buff.append(encode(path.substring(at), allowed_abs_path,
2859 charset));
2860 } else {
2861 buff.append(encode(path, allowed_rel_path, charset));
2862 }
2863 _path = buff.toString().toCharArray();
2864 } else if (_is_opaque_part) {
2865 StringBuffer buf = new StringBuffer();
2866 buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2867 buf.insert(1, encode(path.substring(1), uric, charset));
2868 _opaque = buf.toString().toCharArray();
2869 } else {
2870 throw new URIException(URIException.PARSING, "incorrect path");
2871 }
2872 setURI();
2873 }
2874
2875
2876 /***
2877 * Resolve the base and relative path.
2878 *
2879 * @param basePath a character array of the basePath
2880 * @param relPath a character array of the relPath
2881 * @return the resolved path
2882 * @throws URIException no more higher path level to be resolved
2883 */
2884 protected char[] resolvePath(char[] basePath, char[] relPath)
2885 throws URIException {
2886
2887
2888 String base = (basePath == null) ? "" : new String(basePath);
2889 int at = base.lastIndexOf('/');
2890 if (at != -1) {
2891 basePath = base.substring(0, at + 1).toCharArray();
2892 }
2893
2894 if (relPath == null || relPath.length == 0) {
2895 return normalize(basePath);
2896 } else if (relPath[0] == '/') {
2897 return normalize(relPath);
2898 } else {
2899 StringBuffer buff = new StringBuffer(base.length()
2900 + relPath.length);
2901 buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2902 buff.append(relPath);
2903 return normalize(buff.toString().toCharArray());
2904 }
2905 }
2906
2907
2908 /***
2909 * Get the raw-escaped current hierarchy level in the given path.
2910 * If the last namespace is a collection, the slash mark ('/') should be
2911 * ended with at the last character of the path string.
2912 *
2913 * @param path the path
2914 * @return the current hierarchy level
2915 * @throws URIException no hierarchy level
2916 */
2917 protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2918
2919 if (_is_opaque_part) {
2920 throw new URIException(URIException.PARSING, "no hierarchy level");
2921 }
2922 if (path == null) {
2923 throw new URIException(URIException.PARSING, "empty path");
2924 }
2925 String buff = new String(path);
2926 int first = buff.indexOf('/');
2927 int last = buff.lastIndexOf('/');
2928 if (last == 0) {
2929 return rootPath;
2930 } else if (first != last && last != -1) {
2931 return buff.substring(0, last).toCharArray();
2932 }
2933
2934 return path;
2935 }
2936
2937
2938 /***
2939 * Get the raw-escaped current hierarchy level.
2940 *
2941 * @return the raw-escaped current hierarchy level
2942 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2943 */
2944 public char[] getRawCurrentHierPath() throws URIException {
2945 return (_path == null) ? null : getRawCurrentHierPath(_path);
2946 }
2947
2948
2949 /***
2950 * Get the escaped current hierarchy level.
2951 *
2952 * @return the escaped current hierarchy level
2953 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2954 */
2955 public String getEscapedCurrentHierPath() throws URIException {
2956 char[] path = getRawCurrentHierPath();
2957 return (path == null) ? null : new String(path);
2958 }
2959
2960
2961 /***
2962 * Get the current hierarchy level.
2963 *
2964 * @return the current hierarchy level
2965 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2966 * @see #decode
2967 */
2968 public String getCurrentHierPath() throws URIException {
2969 char[] path = getRawCurrentHierPath();
2970 return (path == null) ? null : decode(path, getProtocolCharset());
2971 }
2972
2973
2974 /***
2975 * Get the level above the this hierarchy level.
2976 *
2977 * @return the raw above hierarchy level
2978 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2979 */
2980 public char[] getRawAboveHierPath() throws URIException {
2981 char[] path = getRawCurrentHierPath();
2982 return (path == null) ? null : getRawCurrentHierPath(path);
2983 }
2984
2985
2986 /***
2987 * Get the level above the this hierarchy level.
2988 *
2989 * @return the raw above hierarchy level
2990 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2991 */
2992 public String getEscapedAboveHierPath() throws URIException {
2993 char[] path = getRawAboveHierPath();
2994 return (path == null) ? null : new String(path);
2995 }
2996
2997
2998 /***
2999 * Get the level above the this hierarchy level.
3000 *
3001 * @return the above hierarchy level
3002 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3003 * @see #decode
3004 */
3005 public String getAboveHierPath() throws URIException {
3006 char[] path = getRawAboveHierPath();
3007 return (path == null) ? null : decode(path, getProtocolCharset());
3008 }
3009
3010
3011 /***
3012 * Get the raw-escaped path.
3013 * <p><blockquote><pre>
3014 * path = [ abs_path | opaque_part ]
3015 * </pre></blockquote><p>
3016 *
3017 * @return the raw-escaped path
3018 */
3019 public char[] getRawPath() {
3020 return _is_opaque_part ? _opaque : _path;
3021 }
3022
3023
3024 /***
3025 * Get the escaped path.
3026 * <p><blockquote><pre>
3027 * path = [ abs_path | opaque_part ]
3028 * abs_path = "/" path_segments
3029 * opaque_part = uric_no_slash *uric
3030 * </pre></blockquote><p>
3031 *
3032 * @return the escaped path string
3033 */
3034 public String getEscapedPath() {
3035 char[] path = getRawPath();
3036 return (path == null) ? null : new String(path);
3037 }
3038
3039
3040 /***
3041 * Get the path.
3042 * <p><blockquote><pre>
3043 * path = [ abs_path | opaque_part ]
3044 * </pre></blockquote><p>
3045 * @return the path string
3046 * @throws URIException If {@link #decode} fails.
3047 * @see #decode
3048 */
3049 public String getPath() throws URIException {
3050 char[] path = getRawPath();
3051 return (path == null) ? null : decode(path, getProtocolCharset());
3052 }
3053
3054
3055 /***
3056 * Get the raw-escaped basename of the path.
3057 *
3058 * @return the raw-escaped basename
3059 */
3060 public char[] getRawName() {
3061 if (_path == null) {
3062 return null;
3063 }
3064
3065 int at = 0;
3066 for (int i = _path.length - 1; i >= 0; i--) {
3067 if (_path[i] == '/') {
3068 at = i + 1;
3069 break;
3070 }
3071 }
3072 int len = _path.length - at;
3073 char[] basename = new char[len];
3074 System.arraycopy(_path, at, basename, 0, len);
3075 return basename;
3076 }
3077
3078
3079 /***
3080 * Get the escaped basename of the path.
3081 *
3082 * @return the escaped basename string
3083 */
3084 public String getEscapedName() {
3085 char[] basename = getRawName();
3086 return (basename == null) ? null : new String(basename);
3087 }
3088
3089
3090 /***
3091 * Get the basename of the path.
3092 *
3093 * @return the basename string
3094 * @throws URIException incomplete trailing escape pattern or unsupported
3095 * character encoding
3096 * @see #decode
3097 */
3098 public String getName() throws URIException {
3099 char[] basename = getRawName();
3100 return (basename == null) ? null : decode(getRawName(),
3101 getProtocolCharset());
3102 }
3103
3104
3105
3106 /***
3107 * Get the raw-escaped path and query.
3108 *
3109 * @return the raw-escaped path and query
3110 */
3111 public char[] getRawPathQuery() {
3112
3113 if (_path == null && _query == null) {
3114 return null;
3115 }
3116 StringBuffer buff = new StringBuffer();
3117 if (_path != null) {
3118 buff.append(_path);
3119 }
3120 if (_query != null) {
3121 buff.append('?');
3122 buff.append(_query);
3123 }
3124 return buff.toString().toCharArray();
3125 }
3126
3127
3128 /***
3129 * Get the escaped query.
3130 *
3131 * @return the escaped path and query string
3132 */
3133 public String getEscapedPathQuery() {
3134 char[] rawPathQuery = getRawPathQuery();
3135 return (rawPathQuery == null) ? null : new String(rawPathQuery);
3136 }
3137
3138
3139 /***
3140 * Get the path and query.
3141 *
3142 * @return the path and query string.
3143 * @throws URIException incomplete trailing escape pattern or unsupported
3144 * character encoding
3145 * @see #decode
3146 */
3147 public String getPathQuery() throws URIException {
3148 char[] rawPathQuery = getRawPathQuery();
3149 return (rawPathQuery == null) ? null : decode(rawPathQuery,
3150 getProtocolCharset());
3151 }
3152
3153
3154
3155 /***
3156 * Set the raw-escaped query.
3157 *
3158 * @param escapedQuery the raw-escaped query
3159 * @throws URIException escaped query not valid
3160 */
3161 public void setRawQuery(char[] escapedQuery) throws URIException {
3162 if (escapedQuery == null || escapedQuery.length == 0) {
3163 _query = escapedQuery;
3164 setURI();
3165 return;
3166 }
3167
3168 escapedQuery = removeFragmentIdentifier(escapedQuery);
3169 if (!validate(escapedQuery, query)) {
3170 throw new URIException(URIException.ESCAPING,
3171 "escaped query not valid");
3172 }
3173 _query = escapedQuery;
3174 setURI();
3175 }
3176
3177
3178 /***
3179 * Set the escaped query string.
3180 *
3181 * @param escapedQuery the escaped query string
3182 * @throws URIException escaped query not valid
3183 */
3184 public void setEscapedQuery(String escapedQuery) throws URIException {
3185 if (escapedQuery == null) {
3186 _query = null;
3187 setURI();
3188 return;
3189 }
3190 setRawQuery(escapedQuery.toCharArray());
3191 }
3192
3193
3194 /***
3195 * Set the query.
3196 * <p>
3197 * When a query string is not misunderstood the reserved special characters
3198 * ("&", "=", "+", ",", and "$") within a query component, it is
3199 * recommended to use in encoding the whole query with this method.
3200 * <p>
3201 * The additional APIs for the special purpose using by the reserved
3202 * special characters used in each protocol are implemented in each protocol
3203 * classes inherited from <code>URI</code>. So refer to the same-named APIs
3204 * implemented in each specific protocol instance.
3205 *
3206 * @param query the query string.
3207 * @throws URIException incomplete trailing escape pattern or unsupported
3208 * character encoding
3209 * @see #encode
3210 */
3211 public void setQuery(String query) throws URIException {
3212 if (query == null || query.length() == 0) {
3213 _query = (query == null) ? null : query.toCharArray();
3214 setURI();
3215 return;
3216 }
3217 setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3218 }
3219
3220
3221 /***
3222 * Get the raw-escaped query.
3223 *
3224 * @return the raw-escaped query
3225 */
3226 public char[] getRawQuery() {
3227 return _query;
3228 }
3229
3230
3231 /***
3232 * Get the escaped query.
3233 *
3234 * @return the escaped query string
3235 */
3236 public String getEscapedQuery() {
3237 return (_query == null) ? null : new String(_query);
3238 }
3239
3240
3241 /***
3242 * Get the query.
3243 *
3244 * @return the query string.
3245 * @throws URIException incomplete trailing escape pattern or unsupported
3246 * character encoding
3247 * @see #decode
3248 */
3249 public String getQuery() throws URIException {
3250 return (_query == null) ? null : decode(_query, getProtocolCharset());
3251 }
3252
3253
3254
3255 /***
3256 * Set the raw-escaped fragment.
3257 *
3258 * @param escapedFragment the raw-escaped fragment
3259 * @throws URIException escaped fragment not valid
3260 */
3261 public void setRawFragment(char[] escapedFragment) throws URIException {
3262 if (escapedFragment == null || escapedFragment.length == 0) {
3263 _fragment = escapedFragment;
3264 hash = 0;
3265 return;
3266 }
3267 if (!validate(escapedFragment, fragment)) {
3268 throw new URIException(URIException.ESCAPING,
3269 "escaped fragment not valid");
3270 }
3271 _fragment = escapedFragment;
3272 hash = 0;
3273 }
3274
3275
3276 /***
3277 * Set the escaped fragment string.
3278 *
3279 * @param escapedFragment the escaped fragment string
3280 * @throws URIException escaped fragment not valid
3281 */
3282 public void setEscapedFragment(String escapedFragment) throws URIException {
3283 if (escapedFragment == null) {
3284 _fragment = null;
3285 hash = 0;
3286 return;
3287 }
3288 setRawFragment(escapedFragment.toCharArray());
3289 }
3290
3291
3292 /***
3293 * Set the fragment.
3294 *
3295 * @param fragment the fragment string.
3296 * @throws URIException If an error occurs.
3297 */
3298 public void setFragment(String fragment) throws URIException {
3299 if (fragment == null || fragment.length() == 0) {
3300 _fragment = (fragment == null) ? null : fragment.toCharArray();
3301 hash = 0;
3302 return;
3303 }
3304 _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3305 hash = 0;
3306 }
3307
3308
3309 /***
3310 * Get the raw-escaped fragment.
3311 * <p>
3312 * The optional fragment identifier is not part of a URI, but is often used
3313 * in conjunction with a URI.
3314 * <p>
3315 * The format and interpretation of fragment identifiers is dependent on
3316 * the media type [RFC2046] of the retrieval result.
3317 * <p>
3318 * A fragment identifier is only meaningful when a URI reference is
3319 * intended for retrieval and the result of that retrieval is a document
3320 * for which the identified fragment is consistently defined.
3321 *
3322 * @return the raw-escaped fragment
3323 */
3324 public char[] getRawFragment() {
3325 return _fragment;
3326 }
3327
3328
3329 /***
3330 * Get the escaped fragment.
3331 *
3332 * @return the escaped fragment string
3333 */
3334 public String getEscapedFragment() {
3335 return (_fragment == null) ? null : new String(_fragment);
3336 }
3337
3338
3339 /***
3340 * Get the fragment.
3341 *
3342 * @return the fragment string
3343 * @throws URIException incomplete trailing escape pattern or unsupported
3344 * character encoding
3345 * @see #decode
3346 */
3347 public String getFragment() throws URIException {
3348 return (_fragment == null) ? null : decode(_fragment,
3349 getProtocolCharset());
3350 }
3351
3352
3353
3354 /***
3355 * Remove the fragment identifier of the given component.
3356 *
3357 * @param component the component that a fragment may be included
3358 * @return the component that the fragment identifier is removed
3359 */
3360 protected char[] removeFragmentIdentifier(char[] component) {
3361 if (component == null) {
3362 return null;
3363 }
3364 int lastIndex = new String(component).indexOf('#');
3365 if (lastIndex != -1) {
3366 component = new String(component).substring(0,
3367 lastIndex).toCharArray();
3368 }
3369 return component;
3370 }
3371
3372
3373 /***
3374 * Normalize the given hier path part.
3375 *
3376 * <p>Algorithm taken from URI reference parser at
3377 * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3378 *
3379 * @param path the path to normalize
3380 * @return the normalized path
3381 * @throws URIException no more higher path level to be normalized
3382 */
3383 protected char[] normalize(char[] path) throws URIException {
3384
3385 if (path == null) {
3386 return null;
3387 }
3388
3389 String normalized = new String(path);
3390
3391
3392 if (normalized.startsWith("./")) {
3393 normalized = normalized.substring(1);
3394 } else if (normalized.startsWith("../")) {
3395 normalized = normalized.substring(2);
3396 } else if (normalized.startsWith("..")) {
3397 normalized = normalized.substring(2);
3398 }
3399
3400
3401 int index = -1;
3402 while ((index = normalized.indexOf("/./")) != -1) {
3403 normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3404 }
3405
3406
3407 if (normalized.endsWith("/.")) {
3408 normalized = normalized.substring(0, normalized.length() - 1);
3409 }
3410
3411 int startIndex = 0;
3412
3413
3414
3415
3416
3417
3418 while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3419 int slashIndex = normalized.lastIndexOf('/', index - 1);
3420 if (slashIndex >= 0) {
3421 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3422 } else {
3423 startIndex = index + 3;
3424 }
3425 }
3426 if (normalized.endsWith("/..")) {
3427 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3428 if (slashIndex >= 0) {
3429 normalized = normalized.substring(0, slashIndex + 1);
3430 }
3431 }
3432
3433
3434
3435
3436
3437
3438 while ((index = normalized.indexOf("/../")) != -1) {
3439 int slashIndex = normalized.lastIndexOf('/', index - 1);
3440 if (slashIndex >= 0) {
3441 break;
3442 } else {
3443 normalized = normalized.substring(index + 3);
3444 }
3445 }
3446 if (normalized.endsWith("/..")) {
3447 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3448 if (slashIndex < 0) {
3449 normalized = "/";
3450 }
3451 }
3452
3453 return normalized.toCharArray();
3454 }
3455
3456
3457 /***
3458 * Normalizes the path part of this URI. Normalization is only meant to be performed on
3459 * URIs with an absolute path. Calling this method on a relative path URI will have no
3460 * effect.
3461 *
3462 * @throws URIException no more higher path level to be normalized
3463 *
3464 * @see #isAbsPath()
3465 */
3466 public void normalize() throws URIException {
3467 if (isAbsPath()) {
3468 _path = normalize(_path);
3469 setURI();
3470 }
3471 }
3472
3473
3474 /***
3475 * Test if the first array is equal to the second array.
3476 *
3477 * @param first the first character array
3478 * @param second the second character array
3479 * @return true if they're equal
3480 */
3481 protected boolean equals(char[] first, char[] second) {
3482
3483 if (first == null && second == null) {
3484 return true;
3485 }
3486 if (first == null || second == null) {
3487 return false;
3488 }
3489 if (first.length != second.length) {
3490 return false;
3491 }
3492 for (int i = 0; i < first.length; i++) {
3493 if (first[i] != second[i]) {
3494 return false;
3495 }
3496 }
3497 return true;
3498 }
3499
3500
3501 /***
3502 * Test an object if this URI is equal to another.
3503 *
3504 * @param obj an object to compare
3505 * @return true if two URI objects are equal
3506 */
3507 public boolean equals(Object obj) {
3508
3509
3510 if (obj == this) {
3511 return true;
3512 }
3513 if (!(obj instanceof URI)) {
3514 return false;
3515 }
3516 URI another = (URI) obj;
3517
3518 if (!equals(_scheme, another._scheme)) {
3519 return false;
3520 }
3521
3522 if (!equals(_opaque, another._opaque)) {
3523 return false;
3524 }
3525
3526
3527 if (!equals(_authority, another._authority)) {
3528 return false;
3529 }
3530
3531 if (!equals(_path, another._path)) {
3532 return false;
3533 }
3534
3535 if (!equals(_query, another._query)) {
3536 return false;
3537 }
3538
3539 if (!equals(_fragment, another._fragment)) {
3540 return false;
3541 }
3542 return true;
3543 }
3544
3545
3546
3547 /***
3548 * Write the content of this URI.
3549 *
3550 * @param oos the object-output stream
3551 * @throws IOException If an IO problem occurs.
3552 */
3553 protected void writeObject(ObjectOutputStream oos)
3554 throws IOException {
3555
3556 oos.defaultWriteObject();
3557 }
3558
3559
3560 /***
3561 * Read a URI.
3562 *
3563 * @param ois the object-input stream
3564 * @throws ClassNotFoundException If one of the classes specified in the
3565 * input stream cannot be found.
3566 * @throws IOException If an IO problem occurs.
3567 */
3568 protected void readObject(ObjectInputStream ois)
3569 throws ClassNotFoundException, IOException {
3570
3571 ois.defaultReadObject();
3572 }
3573
3574
3575
3576 /***
3577 * Return a hash code for this URI.
3578 *
3579 * @return a has code value for this URI
3580 */
3581 public int hashCode() {
3582 if (hash == 0) {
3583 char[] c = _uri;
3584 if (c != null) {
3585 for (int i = 0, len = c.length; i < len; i++) {
3586 hash = 31 * hash + c[i];
3587 }
3588 }
3589 c = _fragment;
3590 if (c != null) {
3591 for (int i = 0, len = c.length; i < len; i++) {
3592 hash = 31 * hash + c[i];
3593 }
3594 }
3595 }
3596 return hash;
3597 }
3598
3599
3600
3601 /***
3602 * Compare this URI to another object.
3603 *
3604 * @param obj the object to be compared.
3605 * @return 0, if it's same,
3606 * -1, if failed, first being compared with in the authority component
3607 * @throws ClassCastException not URI argument
3608 */
3609 public int compareTo(Object obj) throws ClassCastException {
3610
3611 URI another = (URI) obj;
3612 if (!equals(_authority, another.getRawAuthority())) {
3613 return -1;
3614 }
3615 return toString().compareTo(another.toString());
3616 }
3617
3618
3619
3620 /***
3621 * Create and return a copy of this object, the URI-reference containing
3622 * the userinfo component. Notice that the whole URI-reference including
3623 * the userinfo component counld not be gotten as a <code>String</code>.
3624 * <p>
3625 * To copy the identical <code>URI</code> object including the userinfo
3626 * component, it should be used.
3627 *
3628 * @return a clone of this instance
3629 */
3630 public synchronized Object clone() {
3631
3632 URI instance = new URI();
3633
3634 instance._uri = _uri;
3635 instance._scheme = _scheme;
3636 instance._opaque = _opaque;
3637 instance._authority = _authority;
3638 instance._userinfo = _userinfo;
3639 instance._host = _host;
3640 instance._port = _port;
3641 instance._path = _path;
3642 instance._query = _query;
3643 instance._fragment = _fragment;
3644
3645 instance.protocolCharset = protocolCharset;
3646
3647 instance._is_hier_part = _is_hier_part;
3648 instance._is_opaque_part = _is_opaque_part;
3649 instance._is_net_path = _is_net_path;
3650 instance._is_abs_path = _is_abs_path;
3651 instance._is_rel_path = _is_rel_path;
3652 instance._is_reg_name = _is_reg_name;
3653 instance._is_server = _is_server;
3654 instance._is_hostname = _is_hostname;
3655 instance._is_IPv4address = _is_IPv4address;
3656 instance._is_IPv6reference = _is_IPv6reference;
3657
3658 return instance;
3659 }
3660
3661
3662
3663 /***
3664 * It can be gotten the URI character sequence. It's raw-escaped.
3665 * For the purpose of the protocol to be transported, it will be useful.
3666 * <p>
3667 * It is clearly unwise to use a URL that contains a password which is
3668 * intended to be secret. In particular, the use of a password within
3669 * the 'userinfo' component of a URL is strongly disrecommended except
3670 * in those rare cases where the 'password' parameter is intended to be
3671 * public.
3672 * <p>
3673 * When you want to get each part of the userinfo, you need to use the
3674 * specific methods in the specific URL. It depends on the specific URL.
3675 *
3676 * @return the URI character sequence
3677 */
3678 public char[] getRawURI() {
3679 return _uri;
3680 }
3681
3682
3683 /***
3684 * It can be gotten the URI character sequence. It's escaped.
3685 * For the purpose of the protocol to be transported, it will be useful.
3686 *
3687 * @return the escaped URI string
3688 */
3689 public String getEscapedURI() {
3690 return (_uri == null) ? null : new String(_uri);
3691 }
3692
3693
3694 /***
3695 * It can be gotten the URI character sequence.
3696 *
3697 * @return the original URI string
3698 * @throws URIException incomplete trailing escape pattern or unsupported
3699 * character encoding
3700 * @see #decode
3701 */
3702 public String getURI() throws URIException {
3703 return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3704 }
3705
3706
3707 /***
3708 * Get the URI reference character sequence.
3709 *
3710 * @return the URI reference character sequence
3711 */
3712 public char[] getRawURIReference() {
3713 if (_fragment == null) {
3714 return _uri;
3715 }
3716 if (_uri == null) {
3717 return _fragment;
3718 }
3719
3720 String uriReference = new String(_uri) + "#" + new String(_fragment);
3721 return uriReference.toCharArray();
3722 }
3723
3724
3725 /***
3726 * Get the escaped URI reference string.
3727 *
3728 * @return the escaped URI reference string
3729 */
3730 public String getEscapedURIReference() {
3731 char[] uriReference = getRawURIReference();
3732 return (uriReference == null) ? null : new String(uriReference);
3733 }
3734
3735
3736 /***
3737 * Get the original URI reference string.
3738 *
3739 * @return the original URI reference string
3740 * @throws URIException If {@link #decode} fails.
3741 */
3742 public String getURIReference() throws URIException {
3743 char[] uriReference = getRawURIReference();
3744 return (uriReference == null) ? null : decode(uriReference,
3745 getProtocolCharset());
3746 }
3747
3748
3749 /***
3750 * Get the escaped URI string.
3751 * <p>
3752 * On the document, the URI-reference form is only used without the userinfo
3753 * component like http://jakarta.apache.org/ by the security reason.
3754 * But the URI-reference form with the userinfo component could be parsed.
3755 * <p>
3756 * In other words, this URI and any its subclasses must not expose the
3757 * URI-reference expression with the userinfo component like
3758 * http://user:password@hostport/restricted_zone.<br>
3759 * It means that the API client programmer should extract each user and
3760 * password to access manually. Probably it will be supported in the each
3761 * subclass, however, not a whole URI-reference expression.
3762 *
3763 * @return the escaped URI string
3764 * @see #clone()
3765 */
3766 public String toString() {
3767 return getEscapedURI();
3768 }
3769
3770
3771
3772
3773 /***
3774 * The charset-changed normal operation to represent to be required to
3775 * alert to user the fact the default charset is changed.
3776 */
3777 public static class DefaultCharsetChanged extends RuntimeException {
3778
3779
3780
3781 /***
3782 * The constructor with a reason string and its code arguments.
3783 *
3784 * @param reasonCode the reason code
3785 * @param reason the reason
3786 */
3787 public DefaultCharsetChanged(int reasonCode, String reason) {
3788 super(reason);
3789 this.reason = reason;
3790 this.reasonCode = reasonCode;
3791 }
3792
3793
3794
3795 /*** No specified reason code. */
3796 public static final int UNKNOWN = 0;
3797
3798 /*** Protocol charset changed. */
3799 public static final int PROTOCOL_CHARSET = 1;
3800
3801 /*** Document charset changed. */
3802 public static final int DOCUMENT_CHARSET = 2;
3803
3804
3805
3806 /*** The reason code. */
3807 private int reasonCode;
3808
3809 /*** The reason message. */
3810 private String reason;
3811
3812
3813
3814 /***
3815 * Get the reason code.
3816 *
3817 * @return the reason code
3818 */
3819 public int getReasonCode() {
3820 return reasonCode;
3821 }
3822
3823 /***
3824 * Get the reason message.
3825 *
3826 * @return the reason message
3827 */
3828 public String getReason() {
3829 return reason;
3830 }
3831
3832 }
3833
3834
3835 /***
3836 * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3837 * given locale. Supports all locales recognized in JDK 1.1.
3838 * <p>
3839 * The distribution of this class is Servlets.com. It was originally
3840 * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3841 */
3842 public static class LocaleToCharsetMap {
3843
3844 /*** A mapping of language code to charset */
3845 private static final Hashtable LOCALE_TO_CHARSET_MAP;
3846 static {
3847 LOCALE_TO_CHARSET_MAP = new Hashtable();
3848 LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3849 LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3850 LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3851 LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3852 LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3853 LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3854 LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3855 LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3856 LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3857 LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3858 LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3859 LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3860 LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3861 LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3862 LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3863 LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3864 LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3865 LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3866 LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3867 LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3868 LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3869 LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3870 LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3871 LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3872 LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3873 LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3874 LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3875 LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3876 LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3877 LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3878 LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3879 LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3880 LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3881 LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3882 LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3883 LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3884 LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3885 LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3886 LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3887 }
3888
3889 /***
3890 * Get the preferred charset for the given locale.
3891 *
3892 * @param locale the locale
3893 * @return the preferred charset or null if the locale is not
3894 * recognized.
3895 */
3896 public static String getCharset(Locale locale) {
3897
3898 String charset =
3899 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3900 if (charset != null) {
3901 return charset;
3902 }
3903
3904
3905 charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3906 return charset;
3907 }
3908
3909 }
3910
3911 }
3912