View Javadoc
1   /*
2    * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.36.2.4 2003/10/11 19:44:27 olegk Exp $
3    * $Revision: 1.36.2.4 $
4    * $Date: 2003/10/11 19:44:27 $
5    *
6    * ====================================================================
7    *
8    * The Apache Software License, Version 1.1
9    *
10   * Copyright (c) 2002-2003 The Apache Software Foundation.  All rights
11   * reserved.
12   *
13   * Redistribution and use in source and binary forms, with or without
14   * modification, are permitted provided that the following conditions
15   * are met:
16   *
17   * 1. Redistributions of source code must retain the above copyright
18   *    notice, this list of conditions and the following disclaimer.
19   *
20   * 2. Redistributions in binary form must reproduce the above copyright
21   *    notice, this list of conditions and the following disclaimer in
22   *    the documentation and/or other materials provided with the
23   *    distribution.
24   *
25   * 3. The end-user documentation included with the redistribution, if
26   *    any, must include the following acknowlegement:
27   *       "This product includes software developed by the
28   *        Apache Software Foundation (http://www.apache.org/)."
29   *    Alternately, this acknowlegement may appear in the software itself,
30   *    if and wherever such third-party acknowlegements normally appear.
31   *
32   * 4. The names "The Jakarta Project", "Commons", and "Apache Software
33   *    Foundation" must not be used to endorse or promote products derived
34   *    from this software without prior written permission. For written
35   *    permission, please contact apache@apache.org.
36   *
37   * 5. Products derived from this software may not be called "Apache"
38   *    nor may "Apache" appear in their names without prior written
39   *    permission of the Apache Group.
40   *
41   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52   * SUCH DAMAGE.
53   * ====================================================================
54   *
55   * This software consists of voluntary contributions made by many
56   * individuals on behalf of the Apache Software Foundation.  For more
57   * information on the Apache Software Foundation, please see
58   * <http://www.apache.org/>.
59   *
60   * [Additional notices, if required by prior licensing conditions]
61   *
62   */
63  
64  package org.apache.commons.httpclient;
65  
66  import java.io.IOException;
67  import java.io.ObjectInputStream;
68  import java.io.ObjectOutputStream;
69  import java.io.Serializable;
70  import java.io.UnsupportedEncodingException;
71  import java.util.Locale;
72  import java.util.BitSet;
73  import java.util.Hashtable;
74  import java.net.URL;
75  
76  /***
77   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
78   * This class has the purpose of supportting of parsing a URI reference to
79   * extend any specific protocols, the character encoding of the protocol to 
80   * be transported and the charset of the document.
81   * <p>
82   * A URI is always in an "escaped" form, since escaping or unescaping a
83   * completed URI might change its semantics.  
84   * <p>
85   * Implementers should be careful not to escape or unescape the same string
86   * more than once, since unescaping an already unescaped string might lead to
87   * misinterpreting a percent data character as another escaped character,
88   * or vice versa in the case of escaping an already escaped string.
89   * <p>
90   * In order to avoid these problems, data types used as follows:
91   * <p><blockquote><pre>
92   *   URI character sequence: char
93   *   octet sequence: byte
94   *   original character sequence: String
95   * </pre></blockquote><p>
96   *
97   * So, a URI is a sequence of characters as an array of a char type, which
98   * is not always represented as a sequence of octets as an array of byte.
99   * <p>
100  * 
101  * URI Syntactic Components
102  * <p><blockquote><pre>
103  * - In general, written as follows:
104  *   Absolute URI = <scheme>:<scheme-specific-part>
105  *   Generic URI = <scheme>://<authority><path>?<query>
106  *
107  * - Syntax
108  *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
109  *   hier_part     = ( net_path | abs_path ) [ "?" query ]
110  *   net_path      = "//" authority [ abs_path ]
111  *   abs_path      = "/"  path_segments
112  * </pre></blockquote><p>
113  *
114  * The following examples illustrate URI that are in common use.
115  * <pre>
116  * ftp://ftp.is.co.za/rfc/rfc1808.txt
117  *    -- ftp scheme for File Transfer Protocol services
118  * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
119  *    -- gopher scheme for Gopher and Gopher+ Protocol services
120  * http://www.math.uio.no/faq/compression-faq/part1.html
121  *    -- http scheme for Hypertext Transfer Protocol services
122  * mailto:mduerst@ifi.unizh.ch
123  *    -- mailto scheme for electronic mail addresses
124  * news:comp.infosystems.www.servers.unix
125  *    -- news scheme for USENET news groups and articles
126  * telnet://melvyl.ucop.edu/
127  *    -- telnet scheme for interactive services via the TELNET Protocol
128  * </pre>
129  * Please, notice that there are many modifications from URL(RFC 1738) and
130  * relative URL(RFC 1808).
131  * <p>
132  * <b>The expressions for a URI</b>
133  * <p><pre>
134  * For escaped URI forms
135  *  - URI(char[]) // constructor
136  *  - char[] getRawXxx() // method
137  *  - String getEscapedXxx() // method
138  *  - String toString() // method
139  * <p>
140  * For unescaped URI forms
141  *  - URI(String) // constructor
142  *  - String getXXX() // method
143  * </pre><p>
144  *
145  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
146  * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
147  * @version $Revision: 1.36.2.4 $ $Date: 2002/03/14 15:14:01 
148  */
149 public class URI implements Cloneable, Comparable, Serializable {
150 
151 
152     // ----------------------------------------------------------- Constructors
153 
154     /*** Create an instance as an internal use */
155     protected URI() {
156     }
157 
158 
159     /***
160      * Construct a URI as an escaped form of a character array with the given
161      * charset.
162      *
163      * @param escaped the URI character sequence
164      * @param charset the charset string to do escape encoding
165      * @throws URIException If the URI cannot be created.
166      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
167      * @see #getProtocolCharset
168      */
169     public URI(char[] escaped, String charset) 
170         throws URIException, NullPointerException {
171         protocolCharset = charset;
172         parseUriReference(new String(escaped), true);
173     }
174 
175 
176     /***
177      * Construct a URI as an escaped form of a character array.
178      * An URI can be placed within double-quotes or angle brackets like 
179      * "http://test.com/" and <http://test.com/>
180      * 
181      * @param escaped the URI character sequence
182      * @throws URIException If the URI cannot be created.
183      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
184      * @see #getDefaultProtocolCharset
185      */
186     public URI(char[] escaped) 
187         throws URIException, NullPointerException {
188         parseUriReference(new String(escaped), true);
189     }
190 
191 
192     /***
193      * Construct a URI from the given string with the given charset.
194      *
195      * @param original the string to be represented to URI character sequence
196      * It is one of absoluteURI and relativeURI.
197      * @param charset the charset string to do escape encoding
198      * @throws URIException If the URI cannot be created.
199      * @see #getProtocolCharset
200      */
201     public URI(String original, String charset) throws URIException {
202         protocolCharset = charset;
203         parseUriReference(original, false);
204     }
205 
206 
207     /***
208      * Construct a URI from the given string.
209      * <p><blockquote><pre>
210      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
211      * </pre></blockquote><p>
212      * An URI can be placed within double-quotes or angle brackets like 
213      * "http://test.com/" and <http://test.com/>
214      *
215      * @param original the string to be represented to URI character sequence
216      * It is one of absoluteURI and relativeURI.
217      * @throws URIException If the URI cannot be created.
218      * @see #getDefaultProtocolCharset
219      */
220     public URI(String original) throws URIException {
221         parseUriReference(original, false);
222     }
223 
224 
225     /***
226      * Construct a URI from a URL.
227      *
228      * @param url a valid URL.
229      * @throws URIException If the URI cannot be created.
230      * @since 2.0 
231      * @deprecated currently somewhat wrong and diffrent with java.net.URL usage
232      */
233     public URI(URL url) throws URIException {
234         this(url.toString());
235     }
236 
237 
238     /***
239      * Construct a general URI from the given components.
240      * <p><blockquote><pre>
241      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
242      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
243      *   opaque_part   = uric_no_slash *uric
244      * </pre></blockquote><p>
245      * It's for absolute URI = <scheme>:<scheme-specific-part>#
246      * <fragment>.
247      *
248      * @param scheme the scheme string
249      * @param schemeSpecificPart scheme_specific_part
250      * @param fragment the fragment string
251      * @throws URIException If the URI cannot be created.
252      * @see #getDefaultProtocolCharset
253      */
254     public URI(String scheme, String schemeSpecificPart, String fragment)
255         throws URIException {
256 
257         // validate and contruct the URI character sequence
258         if (scheme == null) {
259            throw new URIException(URIException.PARSING, "scheme required");
260         }
261         char[] s = scheme.toLowerCase().toCharArray();
262         if (validate(s, URI.scheme)) {
263             _scheme = s; // is_absoluteURI
264         } else {
265             throw new URIException(URIException.PARSING, "incorrect scheme");
266         }
267         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
268                 getProtocolCharset());
269         // Set flag
270         _is_opaque_part = true;
271         _fragment = fragment.toCharArray(); 
272 
273         setURI();
274     }
275 
276 
277     /***
278      * Construct a general URI from the given components.
279      * <p><blockquote><pre>
280      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
281      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
282      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
283      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
284      * </pre></blockquote><p>
285      * It's for absolute URI = <scheme>:<path>?<query>#<
286      * fragment> and relative URI = <path>?<query>#<fragment
287      * >.
288      *
289      * @param scheme the scheme string
290      * @param authority the authority string
291      * @param path the path string
292      * @param query the query string
293      * @param fragment the fragment string
294      * @throws URIException If the new URI cannot be created.
295      * @see #getDefaultProtocolCharset
296      */
297     public URI(String scheme, String authority, String path, String query,
298                String fragment) throws URIException {
299 
300         // validate and contruct the URI character sequence
301         StringBuffer buff = new StringBuffer();
302         if (scheme != null) {
303             buff.append(scheme);
304             buff.append(':');
305         }
306         if (authority != null) {
307             buff.append("//");
308             buff.append(authority);
309         }
310         if (path != null) {  // accept empty path
311             if ((scheme != null || authority != null)
312                     && !path.startsWith("/")) {
313                 throw new URIException(URIException.PARSING,
314                         "abs_path requested");
315             }
316             buff.append(path);
317         }
318         if (query != null) {
319             buff.append('?');
320             buff.append(query);
321         }
322         if (fragment != null) {
323             buff.append('#');
324             buff.append(fragment);
325         }
326         parseUriReference(buff.toString(), false);
327     }
328 
329 
330     /***
331      * Construct a general URI from the given components.
332      *
333      * @param scheme the scheme string
334      * @param userinfo the userinfo string
335      * @param host the host string
336      * @param port the port number
337      * @throws URIException If the new URI cannot be created.
338      * @see #getDefaultProtocolCharset
339      */
340     public URI(String scheme, String userinfo, String host, int port)
341         throws URIException {
342 
343         this(scheme, userinfo, host, port, null, null, null);
344     }
345 
346 
347     /***
348      * Construct a general URI from the given components.
349      *
350      * @param scheme the scheme string
351      * @param userinfo the userinfo string
352      * @param host the host string
353      * @param port the port number
354      * @param path the path string
355      * @throws URIException If the new URI cannot be created.
356      * @see #getDefaultProtocolCharset
357      */
358     public URI(String scheme, String userinfo, String host, int port,
359             String path) throws URIException {
360 
361         this(scheme, userinfo, host, port, path, null, null);
362     }
363 
364 
365     /***
366      * Construct a general URI from the given components.
367      *
368      * @param scheme the scheme string
369      * @param userinfo the userinfo string
370      * @param host the host string
371      * @param port the port number
372      * @param path the path string
373      * @param query the query string
374      * @throws URIException If the new URI cannot be created.
375      * @see #getDefaultProtocolCharset
376      */
377     public URI(String scheme, String userinfo, String host, int port,
378             String path, String query) throws URIException {
379 
380         this(scheme, userinfo, host, port, path, query, null);
381     }
382 
383 
384     /***
385      * Construct a general URI from the given components.
386      *
387      * @param scheme the scheme string
388      * @param userinfo the userinfo string
389      * @param host the host string
390      * @param port the port number
391      * @param path the path string
392      * @param query the query string
393      * @param fragment the fragment string
394      * @throws URIException If the new URI cannot be created.
395      * @see #getDefaultProtocolCharset
396      */
397     public URI(String scheme, String userinfo, String host, int port,
398             String path, String query, String fragment) throws URIException {
399 
400         this(scheme, (host == null) ? null 
401             : ((userinfo != null) ? userinfo + '@' : "") + host 
402                 + ((port != -1) ? ":" + port : ""), path, query, fragment);
403     }
404 
405 
406     /***
407      * Construct a general URI from the given components.
408      *
409      * @param scheme the scheme string
410      * @param host the host string
411      * @param path the path string
412      * @param fragment the fragment string
413      * @throws URIException If the new URI cannot be created.
414      * @see #getDefaultProtocolCharset
415      */
416     public URI(String scheme, String host, String path, String fragment)
417         throws URIException {
418 
419         this(scheme, host, path, null, fragment);
420     }
421 
422 
423     /***
424      * Construct a general URI with the given relative URI string.
425      *
426      * @param base the base URI
427      * @param relative the relative URI string
428      * @throws URIException If the new URI cannot be created.
429      */
430     public URI(URI base, String relative) throws URIException {
431         this(base, new URI(relative));
432     }
433 
434 
435     /***
436      * Construct a general URI with the given relative URI.
437      * <p><blockquote><pre>
438      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
439      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
440      * </pre></blockquote><p>
441      * Resolving Relative References to Absolute Form.
442      *
443      * <strong>Examples of Resolving Relative URI References</strong>
444      *
445      * Within an object with a well-defined base URI of
446      * <p><blockquote><pre>
447      *   http://a/b/c/d;p?q
448      * </pre></blockquote><p>
449      * the relative URI would be resolved as follows:
450      *
451      * Normal Examples
452      *
453      * <p><blockquote><pre>
454      *   g:h           =  g:h
455      *   g             =  http://a/b/c/g
456      *   ./g           =  http://a/b/c/g
457      *   g/            =  http://a/b/c/g/
458      *   /g            =  http://a/g
459      *   //g           =  http://g
460      *   ?y            =  http://a/b/c/?y
461      *   g?y           =  http://a/b/c/g?y
462      *   #s            =  (current document)#s
463      *   g#s           =  http://a/b/c/g#s
464      *   g?y#s         =  http://a/b/c/g?y#s
465      *   ;x            =  http://a/b/c/;x
466      *   g;x           =  http://a/b/c/g;x
467      *   g;x?y#s       =  http://a/b/c/g;x?y#s
468      *   .             =  http://a/b/c/
469      *   ./            =  http://a/b/c/
470      *   ..            =  http://a/b/
471      *   ../           =  http://a/b/
472      *   ../g          =  http://a/b/g
473      *   ../..         =  http://a/
474      *   ../../        =  http://a/ 
475      *   ../../g       =  http://a/g
476      * </pre></blockquote><p>
477      *
478      * Some URI schemes do not allow a hierarchical syntax matching the
479      * <hier_part> syntax, and thus cannot use relative references.
480      *
481      * @param base the base URI
482      * @param relative the relative URI
483      * @throws URIException If the new URI cannot be created.
484      */
485     public URI(URI base, URI relative) throws URIException {
486 
487         if (base._scheme == null) {
488             throw new URIException(URIException.PARSING, "base URI required");
489         }
490         if (base._scheme != null) {
491             this._scheme = base._scheme;
492             this._authority = base._authority;
493         }
494         if (base._is_opaque_part || relative._is_opaque_part) {
495             this._scheme = base._scheme;
496             this._is_opaque_part = base._is_opaque_part 
497                 || relative._is_opaque_part;
498             this._opaque = relative._opaque;
499             this._fragment = relative._fragment;
500             this.setURI();
501             return;
502         }
503         if (relative._scheme != null) {
504             this._scheme = relative._scheme;
505             this._is_net_path = relative._is_net_path;
506             this._authority = relative._authority;
507             if (relative._is_server) {
508                 this._is_server = relative._is_server;
509                 this._userinfo = relative._userinfo;
510                 this._host = relative._host;
511                 this._port = relative._port;
512             } else if (relative._is_reg_name) {
513                 this._is_reg_name = relative._is_reg_name;
514             }
515             this._is_abs_path = relative._is_abs_path;
516             this._is_rel_path = relative._is_rel_path;
517             this._path = relative._path;
518         } else if (base._authority != null && relative._scheme == null) {
519             this._is_net_path = base._is_net_path;
520             this._authority = base._authority;
521             if (base._is_server) {
522                 this._is_server = base._is_server;
523                 this._userinfo = base._userinfo;
524                 this._host = base._host;
525                 this._port = base._port;
526             } else if (base._is_reg_name) {
527                 this._is_reg_name = base._is_reg_name;
528             }
529         }
530         if (relative._authority != null) {
531             this._is_net_path = relative._is_net_path;
532             this._authority = relative._authority;
533             if (relative._is_server) {
534                 this._is_server = relative._is_server;
535                 this._userinfo = relative._userinfo;
536                 this._host = relative._host;
537                 this._port = relative._port;
538             } else if (relative._is_reg_name) {
539                 this._is_reg_name = relative._is_reg_name;
540             }
541             this._is_abs_path = relative._is_abs_path;
542             this._is_rel_path = relative._is_rel_path;
543             this._path = relative._path;
544         }
545         // resolve the path and query if necessary
546         if (relative._scheme == null && relative._authority == null) {
547             if ((relative._path == null || relative._path.length == 0)
548                 && relative._query == null) {
549                 // handle a reference to the current document, see RFC 2396 
550                 // section 5.2 step 2
551                 this._path = base._path;
552                 this._query = base._query;
553             } else {
554                 this._path = resolvePath(base._path, relative._path);
555             }
556         }
557         // base._query removed
558         if (relative._query != null) {
559             this._query = relative._query;
560         }
561         // base._fragment removed
562         if (relative._fragment != null) {
563             this._fragment = relative._fragment;
564         }
565         this.setURI();
566         // reparse the newly built URI, this will ensure that all flags are set correctly.
567         // TODO there must be a better way to do this
568         parseUriReference(new String(_uri), true);
569     }
570 
571     // --------------------------------------------------- Instance Variables
572 
573     /*** Version ID for serialization */
574     static final long serialVersionUID = 604752400577948726L;
575 
576 
577     /***
578      * Cache the hash code for this URI.
579      */
580     protected int hash = 0;
581 
582 
583     /***
584      * This Uniform Resource Identifier (URI).
585      * The URI is always in an "escaped" form, since escaping or unescaping
586      * a completed URI might change its semantics.  
587      */
588     protected char[] _uri = null;
589 
590 
591     /***
592      * The charset of the protocol used by this URI instance.
593      */
594     protected String protocolCharset = null;
595 
596 
597     /***
598      * The default charset of the protocol.  RFC 2277, 2396
599      */
600     protected static String defaultProtocolCharset = "UTF-8";
601 
602 
603     /***
604      * The default charset of the document.  RFC 2277, 2396
605      * The platform's charset is used for the document by default.
606      */
607     protected static String defaultDocumentCharset = null;
608     protected static String defaultDocumentCharsetByLocale = null;
609     protected static String defaultDocumentCharsetByPlatform = null;
610     // Static initializer for defaultDocumentCharset
611     static {
612         Locale locale = Locale.getDefault();
613         // in order to support backward compatiblity
614         if (locale != null) {
615             defaultDocumentCharsetByLocale =
616                 LocaleToCharsetMap.getCharset(locale);
617             // set the default document charset
618             defaultDocumentCharset = defaultDocumentCharsetByLocale;
619         }
620         // in order to support platform encoding
621         try {
622             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
623         } catch(SecurityException ignore) {
624         }
625         if (defaultDocumentCharset == null) {
626             // set the default document charset
627             defaultDocumentCharset = defaultDocumentCharsetByPlatform;
628         }
629     }
630 
631 
632     /***
633      * The scheme.
634      */
635     protected char[] _scheme = null;
636 
637 
638     /***
639      * The opaque.
640      */
641     protected char[] _opaque = null;
642 
643 
644     /***
645      * The authority.
646      */
647     protected char[] _authority = null;
648 
649 
650     /***
651      * The userinfo.
652      */
653     protected char[] _userinfo = null;
654 
655 
656     /***
657      * The host.
658      */
659     protected char[] _host = null;
660 
661 
662     /***
663      * The port.
664      */
665     protected int _port = -1;
666 
667 
668     /***
669      * The path.
670      */
671     protected char[] _path = null;
672 
673 
674     /***
675      * The query.
676      */
677     protected char[] _query = null;
678 
679 
680     /***
681      * The fragment.
682      */
683     protected char[] _fragment = null;
684 
685 
686     /***
687      * The root path.
688      */
689     protected static char[] rootPath = { '/' };
690 
691     // ---------------------- Generous characters for each component validation
692 
693     /***
694      * The percent "%" character always has the reserved purpose of being the
695      * escape indicator, it must be escaped as "%25" in order to be used as
696      * data within a URI.
697      */
698     protected static final BitSet percent = new BitSet(256);
699     // Static initializer for percent
700     static {
701         percent.set('%');
702     }
703 
704 
705     /***
706      * BitSet for digit.
707      * <p><blockquote><pre>
708      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
709      *            "8" | "9"
710      * </pre></blockquote><p>
711      */
712     protected static final BitSet digit = new BitSet(256);
713     // Static initializer for digit
714     static {
715         for (int i = '0'; i <= '9'; i++) {
716             digit.set(i);
717         }
718     }
719 
720 
721     /***
722      * BitSet for alpha.
723      * <p><blockquote><pre>
724      * alpha         = lowalpha | upalpha
725      * </pre></blockquote><p>
726      */
727     protected static final BitSet alpha = new BitSet(256);
728     // Static initializer for alpha
729     static {
730         for (int i = 'a'; i <= 'z'; i++) {
731             alpha.set(i);
732         }
733         for (int i = 'A'; i <= 'Z'; i++) {
734             alpha.set(i);
735         }
736     }
737 
738 
739     /***
740      * BitSet for alphanum (join of alpha & digit).
741      * <p><blockquote><pre>
742      *  alphanum      = alpha | digit
743      * </pre></blockquote><p>
744      */
745     protected static final BitSet alphanum = new BitSet(256);
746     // Static initializer for alphanum
747     static {
748         alphanum.or(alpha);
749         alphanum.or(digit);
750     }
751 
752 
753     /***
754      * BitSet for hex.
755      * <p><blockquote><pre>
756      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
757      *                         "a" | "b" | "c" | "d" | "e" | "f"
758      * </pre></blockquote><p>
759      */
760     protected static final BitSet hex = new BitSet(256);
761     // Static initializer for hex
762     static {
763         hex.or(digit);
764         for (int i = 'a'; i <= 'f'; i++) {
765             hex.set(i);
766         }
767         for (int i = 'A'; i <= 'F'; i++) {
768             hex.set(i);
769         }
770     }
771 
772 
773     /***
774      * BitSet for escaped.
775      * <p><blockquote><pre>
776      * escaped       = "%" hex hex
777      * </pre></blockquote><p>
778      */
779     protected static final BitSet escaped = new BitSet(256);
780     // Static initializer for escaped
781     static {
782         escaped.or(percent);
783         escaped.or(hex);
784     }
785 
786 
787     /***
788      * BitSet for mark.
789      * <p><blockquote><pre>
790      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
791      *                 "(" | ")"
792      * </pre></blockquote><p>
793      */
794     protected static final BitSet mark = new BitSet(256);
795     // Static initializer for mark
796     static {
797         mark.set('-');
798         mark.set('_');
799         mark.set('.');
800         mark.set('!');
801         mark.set('~');
802         mark.set('*');
803         mark.set('\'');
804         mark.set('(');
805         mark.set(')');
806     }
807 
808 
809     /***
810      * Data characters that are allowed in a URI but do not have a reserved
811      * purpose are called unreserved.
812      * <p><blockquote><pre>
813      * unreserved    = alphanum | mark
814      * </pre></blockquote><p>
815      */
816     protected static final BitSet unreserved = new BitSet(256);
817     // Static initializer for unreserved
818     static {
819         unreserved.or(alphanum);
820         unreserved.or(mark);
821     }
822 
823 
824     /***
825      * BitSet for reserved.
826      * <p><blockquote><pre>
827      * reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
828      *                 "$" | ","
829      * </pre></blockquote><p>
830      */
831     protected static final BitSet reserved = new BitSet(256);
832     // Static initializer for reserved
833     static {
834         reserved.set(';');
835         reserved.set('/');
836         reserved.set('?');
837         reserved.set(':');
838         reserved.set('@');
839         reserved.set('&');
840         reserved.set('=');
841         reserved.set('+');
842         reserved.set('$');
843         reserved.set(',');
844     }
845 
846 
847     /***
848      * BitSet for uric.
849      * <p><blockquote><pre>
850      * uric          = reserved | unreserved | escaped
851      * </pre></blockquote><p>
852      */
853     protected static final BitSet uric = new BitSet(256);
854     // Static initializer for uric
855     static {
856         uric.or(reserved);
857         uric.or(unreserved);
858         uric.or(escaped);
859     }
860 
861 
862     /***
863      * BitSet for fragment (alias for uric).
864      * <p><blockquote><pre>
865      * fragment      = *uric
866      * </pre></blockquote><p>
867      */
868     protected static final BitSet fragment = uric;
869 
870 
871     /***
872      * BitSet for query (alias for uric).
873      * <p><blockquote><pre>
874      * query         = *uric
875      * </pre></blockquote><p>
876      */
877     protected static final BitSet query = uric;
878 
879 
880     /***
881      * BitSet for pchar.
882      * <p><blockquote><pre>
883      * pchar         = unreserved | escaped |
884      *                 ":" | "@" | "&" | "=" | "+" | "$" | ","
885      * </pre></blockquote><p>
886      */
887     protected static final BitSet pchar = new BitSet(256);
888     // Static initializer for pchar
889     static {
890         pchar.or(unreserved);
891         pchar.or(escaped);
892         pchar.set(':');
893         pchar.set('@');
894         pchar.set('&');
895         pchar.set('=');
896         pchar.set('+');
897         pchar.set('$');
898         pchar.set(',');
899     }
900 
901 
902     /***
903      * BitSet for param (alias for pchar).
904      * <p><blockquote><pre>
905      * param         = *pchar
906      * </pre></blockquote><p>
907      */
908     protected static final BitSet param = pchar;
909 
910 
911     /***
912      * BitSet for segment.
913      * <p><blockquote><pre>
914      * segment       = *pchar *( ";" param )
915      * </pre></blockquote><p>
916      */
917     protected static final BitSet segment = new BitSet(256);
918     // Static initializer for segment
919     static {
920         segment.or(pchar);
921         segment.set(';');
922         segment.or(param);
923     }
924 
925 
926     /***
927      * BitSet for path segments.
928      * <p><blockquote><pre>
929      * path_segments = segment *( "/" segment )
930      * </pre></blockquote><p>
931      */
932     protected static final BitSet path_segments = new BitSet(256);
933     // Static initializer for path_segments
934     static {
935         path_segments.set('/');
936         path_segments.or(segment);
937     }
938 
939 
940     /***
941      * URI absolute path.
942      * <p><blockquote><pre>
943      * abs_path      = "/"  path_segments
944      * </pre></blockquote><p>
945      */
946     protected static final BitSet abs_path = new BitSet(256);
947     // Static initializer for abs_path
948     static {
949         abs_path.set('/');
950         abs_path.or(path_segments);
951     }
952 
953 
954     /***
955      * URI bitset for encoding typical non-slash characters.
956      * <p><blockquote><pre>
957      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
958      *                 "&" | "=" | "+" | "$" | ","
959      * </pre></blockquote><p>
960      */
961     protected static final BitSet uric_no_slash = new BitSet(256);
962     // Static initializer for uric_no_slash
963     static {
964         uric_no_slash.or(unreserved);
965         uric_no_slash.or(escaped);
966         uric_no_slash.set(';');
967         uric_no_slash.set('?');
968         uric_no_slash.set(';');
969         uric_no_slash.set('@');
970         uric_no_slash.set('&');
971         uric_no_slash.set('=');
972         uric_no_slash.set('+');
973         uric_no_slash.set('$');
974         uric_no_slash.set(',');
975     }
976     
977 
978     /***
979      * URI bitset that combines uric_no_slash and uric.
980      * <p><blockquote><pre>
981      * opaque_part   = uric_no_slash *uric
982      * </pre></blockquote><p>
983      */
984     protected static final BitSet opaque_part = new BitSet(256);
985     // Static initializer for opaque_part
986     static {
987         // it's generous. because first character must not include a slash
988         opaque_part.or(uric_no_slash);
989         opaque_part.or(uric);
990     }
991     
992 
993     /***
994      * URI bitset that combines absolute path and opaque part.
995      * <p><blockquote><pre>
996      * path          = [ abs_path | opaque_part ]
997      * </pre></blockquote><p>
998      */
999     protected static final BitSet path = new BitSet(256);
1000     // Static initializer for path
1001     static {
1002         path.or(abs_path);
1003         path.or(opaque_part);
1004     }
1005 
1006 
1007     /***
1008      * Port, a logical alias for digit.
1009      */
1010     protected static final BitSet port = digit;
1011 
1012 
1013     /***
1014      * Bitset that combines digit and dot fo IPv$address.
1015      * <p><blockquote><pre>
1016      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1017      * </pre></blockquote><p>
1018      */
1019     protected static final BitSet IPv4address = new BitSet(256);
1020     // Static initializer for IPv4address
1021     static {
1022         IPv4address.or(digit);
1023         IPv4address.set('.');
1024     }
1025 
1026 
1027     /***
1028      * RFC 2373.
1029      * <p><blockquote><pre>
1030      * IPv6address = hexpart [ ":" IPv4address ]
1031      * </pre></blockquote><p>
1032      */
1033     protected static final BitSet IPv6address = new BitSet(256);
1034     // Static initializer for IPv6address reference
1035     static {
1036         IPv6address.or(hex); // hexpart
1037         IPv6address.set(':');
1038         IPv6address.or(IPv4address);
1039     }
1040 
1041 
1042     /***
1043      * RFC 2732, 2373.
1044      * <p><blockquote><pre>
1045      * IPv6reference   = "[" IPv6address "]"
1046      * </pre></blockquote><p>
1047      */
1048     protected static final BitSet IPv6reference = new BitSet(256);
1049     // Static initializer for IPv6reference
1050     static {
1051         IPv6reference.set('[');
1052         IPv6reference.or(IPv6address);
1053         IPv6reference.set(']');
1054     }
1055 
1056 
1057     /***
1058      * BitSet for toplabel.
1059      * <p><blockquote><pre>
1060      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1061      * </pre></blockquote><p>
1062      */
1063     protected static final BitSet toplabel = new BitSet(256);
1064     // Static initializer for toplabel
1065     static {
1066         toplabel.or(alphanum);
1067         toplabel.set('-');
1068     }
1069 
1070 
1071     /***
1072      * BitSet for domainlabel.
1073      * <p><blockquote><pre>
1074      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1075      * </pre></blockquote><p>
1076      */
1077     protected static final BitSet domainlabel = toplabel;
1078 
1079 
1080     /***
1081      * BitSet for hostname.
1082      * <p><blockquote><pre>
1083      * hostname      = *( domainlabel "." ) toplabel [ "." ]
1084      * </pre></blockquote><p>
1085      */
1086     protected static final BitSet hostname = new BitSet(256);
1087     // Static initializer for hostname
1088     static {
1089         hostname.or(toplabel);
1090         // hostname.or(domainlabel);
1091         hostname.set('.');
1092     }
1093 
1094 
1095     /***
1096      * BitSet for host.
1097      * <p><blockquote><pre>
1098      * host          = hostname | IPv4address | IPv6reference
1099      * </pre></blockquote><p>
1100      */
1101     protected static final BitSet host = new BitSet(256);
1102     // Static initializer for host
1103     static {
1104         host.or(hostname);
1105         // host.or(IPv4address);
1106         host.or(IPv6reference); // IPv4address
1107     }
1108 
1109 
1110     /***
1111      * BitSet for hostport.
1112      * <p><blockquote><pre>
1113      * hostport      = host [ ":" port ]
1114      * </pre></blockquote><p>
1115      */
1116     protected static final BitSet hostport = new BitSet(256);
1117     // Static initializer for hostport
1118     static {
1119         hostport.or(host);
1120         hostport.set(':');
1121         hostport.or(port);
1122     }
1123 
1124 
1125     /***
1126      * Bitset for userinfo.
1127      * <p><blockquote><pre>
1128      * userinfo      = *( unreserved | escaped |
1129      *                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
1130      * </pre></blockquote><p>
1131      */
1132     protected static final BitSet userinfo = new BitSet(256);
1133     // Static initializer for userinfo
1134     static {
1135         userinfo.or(unreserved);
1136         userinfo.or(escaped);
1137         userinfo.set(';');
1138         userinfo.set(':');
1139         userinfo.set('&');
1140         userinfo.set('=');
1141         userinfo.set('+');
1142         userinfo.set('$');
1143         userinfo.set(',');
1144     }
1145 
1146 
1147     /***
1148      * BitSet for within the userinfo component like user and password.
1149      */
1150     public static final BitSet within_userinfo = new BitSet(256);
1151     // Static initializer for within_userinfo
1152     static {
1153         within_userinfo.or(userinfo);
1154         within_userinfo.clear(';'); // reserved within authority
1155         within_userinfo.clear(':');
1156         within_userinfo.clear('@');
1157         within_userinfo.clear('?');
1158         within_userinfo.clear('/');
1159     }
1160 
1161 
1162     /***
1163      * Bitset for server.
1164      * <p><blockquote><pre>
1165      * server        = [ [ userinfo "@" ] hostport ]
1166      * </pre></blockquote><p>
1167      */
1168     protected static final BitSet server = new BitSet(256);
1169     // Static initializer for server
1170     static {
1171         server.or(userinfo);
1172         server.set('@');
1173         server.or(hostport);
1174     }
1175 
1176 
1177     /***
1178      * BitSet for reg_name.
1179      * <p><blockquote><pre>
1180      * reg_name      = 1*( unreserved | escaped | "$" | "," |
1181      *                     ";" | ":" | "@" | "&" | "=" | "+" )
1182      * </pre></blockquote><p>
1183      */
1184     protected static final BitSet reg_name = new BitSet(256);
1185     // Static initializer for reg_name
1186     static {
1187         reg_name.or(unreserved);
1188         reg_name.or(escaped);
1189         reg_name.set('$');
1190         reg_name.set(',');
1191         reg_name.set(';');
1192         reg_name.set(':');
1193         reg_name.set('@');
1194         reg_name.set('&');
1195         reg_name.set('=');
1196         reg_name.set('+');
1197     }
1198 
1199 
1200     /***
1201      * BitSet for authority.
1202      * <p><blockquote><pre>
1203      * authority     = server | reg_name
1204      * </pre></blockquote><p>
1205      */
1206     protected static final BitSet authority = new BitSet(256);
1207     // Static initializer for authority
1208     static {
1209         authority.or(server);
1210         authority.or(reg_name);
1211     }
1212 
1213 
1214     /***
1215      * BitSet for scheme.
1216      * <p><blockquote><pre>
1217      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1218      * </pre></blockquote><p>
1219      */
1220     protected static final BitSet scheme = new BitSet(256);
1221     // Static initializer for scheme
1222     static {
1223         scheme.or(alpha);
1224         scheme.or(digit);
1225         scheme.set('+');
1226         scheme.set('-');
1227         scheme.set('.');
1228     }
1229 
1230 
1231     /***
1232      * BitSet for rel_segment.
1233      * <p><blockquote><pre>
1234      * rel_segment   = 1*( unreserved | escaped |
1235      *                     ";" | "@" | "&" | "=" | "+" | "$" | "," )
1236      * </pre></blockquote><p>
1237      */
1238     protected static final BitSet rel_segment = new BitSet(256);
1239     // Static initializer for rel_segment
1240     static {
1241         rel_segment.or(unreserved);
1242         rel_segment.or(escaped);
1243         rel_segment.set(';');
1244         rel_segment.set('@');
1245         rel_segment.set('&');
1246         rel_segment.set('=');
1247         rel_segment.set('+');
1248         rel_segment.set('$');
1249         rel_segment.set(',');
1250     }
1251 
1252 
1253     /***
1254      * BitSet for rel_path.
1255      * <p><blockquote><pre>
1256      * rel_path      = rel_segment [ abs_path ]
1257      * </pre></blockquote><p>
1258      */
1259     protected static final BitSet rel_path = new BitSet(256);
1260     // Static initializer for rel_path
1261     static {
1262         rel_path.or(rel_segment);
1263         rel_path.or(abs_path);
1264     }
1265 
1266 
1267     /***
1268      * BitSet for net_path.
1269      * <p><blockquote><pre>
1270      * net_path      = "//" authority [ abs_path ]
1271      * </pre></blockquote><p>
1272      */
1273     protected static final BitSet net_path = new BitSet(256);
1274     // Static initializer for net_path
1275     static {
1276         net_path.set('/');
1277         net_path.or(authority);
1278         net_path.or(abs_path);
1279     }
1280     
1281 
1282     /***
1283      * BitSet for hier_part.
1284      * <p><blockquote><pre>
1285      * hier_part     = ( net_path | abs_path ) [ "?" query ]
1286      * </pre></blockquote><p>
1287      */
1288     protected static final BitSet hier_part = new BitSet(256);
1289     // Static initializer for hier_part
1290     static {
1291         hier_part.or(net_path);
1292         hier_part.or(abs_path);
1293         // hier_part.set('?'); aleady included
1294         hier_part.or(query);
1295     }
1296 
1297 
1298     /***
1299      * BitSet for relativeURI.
1300      * <p><blockquote><pre>
1301      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1302      * </pre></blockquote><p>
1303      */
1304     protected static final BitSet relativeURI = new BitSet(256);
1305     // Static initializer for relativeURI
1306     static {
1307         relativeURI.or(net_path);
1308         relativeURI.or(abs_path);
1309         relativeURI.or(rel_path);
1310         // relativeURI.set('?'); aleady included
1311         relativeURI.or(query);
1312     }
1313 
1314 
1315     /***
1316      * BitSet for absoluteURI.
1317      * <p><blockquote><pre>
1318      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1319      * </pre></blockquote><p>
1320      */
1321     protected static final BitSet absoluteURI = new BitSet(256);
1322     // Static initializer for absoluteURI
1323     static {
1324         absoluteURI.or(scheme);
1325         absoluteURI.set(':');
1326         absoluteURI.or(hier_part);
1327         absoluteURI.or(opaque_part);
1328     }
1329 
1330 
1331     /***
1332      * BitSet for URI-reference.
1333      * <p><blockquote><pre>
1334      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1335      * </pre></blockquote><p>
1336      */
1337     protected static final BitSet URI_reference = new BitSet(256);
1338     // Static initializer for URI_reference
1339     static {
1340         URI_reference.or(absoluteURI);
1341         URI_reference.or(relativeURI);
1342         URI_reference.set('#');
1343         URI_reference.or(fragment);
1344     }
1345 
1346     // ---------------------------- Characters disallowed within the URI syntax
1347     // Excluded US-ASCII Characters are like control, space, delims and unwise
1348 
1349     /***
1350      * BitSet for control.
1351      */
1352     public static final BitSet control = new BitSet(256);
1353     // Static initializer for control
1354     static {
1355         for (int i = 0; i <= 0x1F; i++) {
1356             control.set(i);
1357         }
1358         control.set(0x7F);
1359     }
1360 
1361     /***
1362      * BitSet for space.
1363      */
1364     public static final BitSet space = new BitSet(256);
1365     // Static initializer for space
1366     static {
1367         space.set(0x20);
1368     }
1369 
1370 
1371     /***
1372      * BitSet for delims.
1373      */
1374     public static final BitSet delims = new BitSet(256);
1375     // Static initializer for delims
1376     static {
1377         delims.set('<');
1378         delims.set('>');
1379         delims.set('#');
1380         delims.set('%');
1381         delims.set('"');
1382     }
1383 
1384 
1385     /***
1386      * BitSet for unwise.
1387      */
1388     public static final BitSet unwise = new BitSet(256);
1389     // Static initializer for unwise
1390     static {
1391         unwise.set('{');
1392         unwise.set('}');
1393         unwise.set('|');
1394         unwise.set('//');
1395         unwise.set('^');
1396         unwise.set('[');
1397         unwise.set(']');
1398         unwise.set('`');
1399     }
1400 
1401 
1402     /***
1403      * Disallowed rel_path before escaping.
1404      */
1405     public static final BitSet disallowed_rel_path = new BitSet(256);
1406     // Static initializer for disallowed_rel_path
1407     static {
1408         disallowed_rel_path.or(uric);
1409         disallowed_rel_path.andNot(rel_path);
1410     }
1411 
1412 
1413     /***
1414      * Disallowed opaque_part before escaping.
1415      */
1416     public static final BitSet disallowed_opaque_part = new BitSet(256);
1417     // Static initializer for disallowed_opaque_part
1418     static {
1419         disallowed_opaque_part.or(uric);
1420         disallowed_opaque_part.andNot(opaque_part);
1421     }
1422 
1423     // ----------------------- Characters allowed within and for each component
1424 
1425     /***
1426      * Those characters that are allowed for the authority component.
1427      */
1428     public static final BitSet allowed_authority = new BitSet(256);
1429     // Static initializer for allowed_authority
1430     static {
1431         allowed_authority.or(authority);
1432         allowed_authority.clear('%');
1433     }
1434 
1435 
1436     /***
1437      * Those characters that are allowed for the opaque_part.
1438      */
1439     public static final BitSet allowed_opaque_part = new BitSet(256);
1440     // Static initializer for allowed_opaque_part 
1441     static {
1442         allowed_opaque_part.or(opaque_part);
1443         allowed_opaque_part.clear('%');
1444     }
1445 
1446 
1447     /***
1448      * Those characters that are allowed for the reg_name.
1449      */
1450     public static final BitSet allowed_reg_name = new BitSet(256);
1451     // Static initializer for allowed_reg_name 
1452     static {
1453         allowed_reg_name.or(reg_name);
1454         // allowed_reg_name.andNot(percent);
1455         allowed_reg_name.clear('%');
1456     }
1457 
1458 
1459     /***
1460      * Those characters that are allowed for the userinfo component.
1461      */
1462     public static final BitSet allowed_userinfo = new BitSet(256);
1463     // Static initializer for allowed_userinfo
1464     static {
1465         allowed_userinfo.or(userinfo);
1466         // allowed_userinfo.andNot(percent);
1467         allowed_userinfo.clear('%');
1468     }
1469 
1470 
1471     /***
1472      * Those characters that are allowed for within the userinfo component.
1473      */
1474     public static final BitSet allowed_within_userinfo = new BitSet(256);
1475     // Static initializer for allowed_within_userinfo
1476     static {
1477         allowed_within_userinfo.or(within_userinfo);
1478         allowed_within_userinfo.clear('%');
1479     }
1480 
1481 
1482     /***
1483      * Those characters that are allowed for the IPv6reference component.
1484      * The characters '[', ']' in IPv6reference should be excluded.
1485      */
1486     public static final BitSet allowed_IPv6reference = new BitSet(256);
1487     // Static initializer for allowed_IPv6reference
1488     static {
1489         allowed_IPv6reference.or(IPv6reference);
1490         // allowed_IPv6reference.andNot(unwise);
1491         allowed_IPv6reference.clear('[');
1492         allowed_IPv6reference.clear(']');
1493     }
1494 
1495 
1496     /***
1497      * Those characters that are allowed for the host component.
1498      * The characters '[', ']' in IPv6reference should be excluded.
1499      */
1500     public static final BitSet allowed_host = new BitSet(256);
1501     // Static initializer for allowed_host
1502     static {
1503         allowed_host.or(hostname);
1504         allowed_host.or(allowed_IPv6reference);
1505     }
1506 
1507 
1508     /***
1509      * Those characters that are allowed for the authority component.
1510      */
1511     public static final BitSet allowed_within_authority = new BitSet(256);
1512     // Static initializer for allowed_within_authority
1513     static {
1514         allowed_within_authority.or(server);
1515         allowed_within_authority.or(reg_name);
1516         allowed_within_authority.clear(';');
1517         allowed_within_authority.clear(':');
1518         allowed_within_authority.clear('@');
1519         allowed_within_authority.clear('?');
1520         allowed_within_authority.clear('/');
1521     }
1522 
1523 
1524     /***
1525      * Those characters that are allowed for the abs_path.
1526      */
1527     public static final BitSet allowed_abs_path = new BitSet(256);
1528     // Static initializer for allowed_abs_path
1529     static {
1530         allowed_abs_path.or(abs_path);
1531         // allowed_abs_path.set('/');  // aleady included
1532         allowed_abs_path.andNot(percent);
1533     }
1534 
1535 
1536     /***
1537      * Those characters that are allowed for the rel_path.
1538      */
1539     public static final BitSet allowed_rel_path = new BitSet(256);
1540     // Static initializer for allowed_rel_path
1541     static {
1542         allowed_rel_path.or(rel_path);
1543         allowed_rel_path.clear('%');
1544     }
1545 
1546 
1547     /***
1548      * Those characters that are allowed within the path.
1549      */
1550     public static final BitSet allowed_within_path = new BitSet(256);
1551     // Static initializer for allowed_within_path
1552     static {
1553         allowed_within_path.or(abs_path);
1554         allowed_within_path.clear('/');
1555         allowed_within_path.clear(';');
1556         allowed_within_path.clear('=');
1557         allowed_within_path.clear('?');
1558     }
1559 
1560 
1561     /***
1562      * Those characters that are allowed for the query component.
1563      */
1564     public static final BitSet allowed_query = new BitSet(256);
1565     // Static initializer for allowed_query
1566     static {
1567         allowed_query.or(uric);
1568         allowed_query.clear('%');
1569     }
1570 
1571 
1572     /***
1573      * Those characters that are allowed within the query component.
1574      */
1575     public static final BitSet allowed_within_query = new BitSet(256);
1576     // Static initializer for allowed_within_query
1577     static {
1578         allowed_within_query.or(allowed_query);
1579         allowed_within_query.andNot(reserved); // excluded 'reserved'
1580     }
1581 
1582 
1583     /***
1584      * Those characters that are allowed for the fragment component.
1585      */
1586     public static final BitSet allowed_fragment = new BitSet(256);
1587     // Static initializer for allowed_fragment
1588     static {
1589         allowed_fragment.or(uric);
1590         allowed_fragment.clear('%');
1591     }
1592 
1593     // ------------------------------------------- Flags for this URI-reference
1594 
1595     // TODO: Figure out what all these variables are for and provide javadoc
1596 
1597     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1598     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1599     protected boolean _is_hier_part;
1600     protected boolean _is_opaque_part;
1601     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1602     // hier_part     = ( net_path | abs_path ) [ "?" query ]
1603     protected boolean _is_net_path;
1604     protected boolean _is_abs_path;
1605     protected boolean _is_rel_path;
1606     // net_path      = "//" authority [ abs_path ] 
1607     // authority     = server | reg_name
1608     protected boolean _is_reg_name;
1609     protected boolean _is_server;  // = _has_server
1610     // server        = [ [ userinfo "@" ] hostport ]
1611     // host          = hostname | IPv4address | IPv6reference
1612     protected boolean _is_hostname;
1613     protected boolean _is_IPv4address;
1614     protected boolean _is_IPv6reference;
1615 
1616     // ------------------------------------------ Character and escape encoding
1617     
1618     /***
1619      * Encodes URI string.
1620      *
1621      * This is a two mapping, one from original characters to octets, and
1622      * subsequently a second from octets to URI characters:
1623      * <p><blockquote><pre>
1624      *   original character sequence->octet sequence->URI character sequence
1625      * </pre></blockquote><p>
1626      *
1627      * An escaped octet is encoded as a character triplet, consisting of the
1628      * percent character "%" followed by the two hexadecimal digits
1629      * representing the octet code. For example, "%20" is the escaped
1630      * encoding for the US-ASCII space character.
1631      * <p>
1632      * Conversion from the local filesystem character set to UTF-8 will
1633      * normally involve a two step process. First convert the local character
1634      * set to the UCS; then convert the UCS to UTF-8.
1635      * The first step in the process can be performed by maintaining a mapping
1636      * table that includes the local character set code and the corresponding
1637      * UCS code.
1638      * The next step is to convert the UCS character code to the UTF-8 encoding.
1639      * <p>
1640      * Mapping between vendor codepages can be done in a very similar manner
1641      * as described above.
1642      * <p>
1643      * The only time escape encodings can allowedly be made is when a URI is
1644      * being created from its component parts.  The escape and validate methods
1645      * are internally performed within this method.
1646      *
1647      * @param original the original character sequence
1648      * @param allowed those characters that are allowed within a component
1649      * @param charset the protocol charset
1650      * @return URI character sequence
1651      * @throws URIException null component or unsupported character encoding
1652      */
1653     protected static char[] encode(String original, BitSet allowed,
1654             String charset) throws URIException {
1655 
1656         // encode original to uri characters.
1657         if (original == null) {
1658             throw new URIException(URIException.PARSING, "null");
1659         }
1660         // escape octet to uri characters.
1661         if (allowed == null) {
1662             throw new URIException(URIException.PARSING,
1663                     "null allowed characters");
1664         }
1665         byte[] octets;
1666         try {
1667             octets = original.getBytes(charset);
1668         } catch (UnsupportedEncodingException error) {
1669             throw new URIException(URIException.UNSUPPORTED_ENCODING, charset);
1670         }
1671         StringBuffer buf = new StringBuffer(octets.length);
1672         for (int i = 0; i < octets.length; i++) {
1673             char c = (char) octets[i];
1674             if (allowed.get(c)) {
1675                 buf.append(c);
1676             } else {
1677                 buf.append('%');
1678                 byte b = octets[i]; // use the original byte value
1679                 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1680                 buf.append(Character.toUpperCase(hexadecimal)); // high
1681                 hexadecimal = Character.forDigit(b & 0xF, 16);
1682                 buf.append(Character.toUpperCase(hexadecimal)); // low
1683             }
1684         }
1685 
1686         return buf.toString().toCharArray();
1687     }
1688 
1689 
1690     /***
1691      * Decodes URI encoded string.
1692      *
1693      * This is a two mapping, one from URI characters to octets, and
1694      * subsequently a second from octets to original characters:
1695      * <p><blockquote><pre>
1696      *   URI character sequence->octet sequence->original character sequence
1697      * </pre></blockquote><p>
1698      *
1699      * A URI must be separated into its components before the escaped
1700      * characters within those components can be allowedly decoded.
1701      * <p>
1702      * Notice that there is a chance that URI characters that are non UTF-8
1703      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1704      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1705      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1706      * false reading.
1707      * <p>
1708      * The percent "%" character always has the reserved purpose of being
1709      * the escape indicator, it must be escaped as "%25" in order to be used
1710      * as data within a URI.
1711      * <p>
1712      * The unescape method is internally performed within this method.
1713      *
1714      * @param component the URI character sequence
1715      * @param charset the protocol charset
1716      * @return original character sequence
1717      * @throws URIException incomplete trailing escape pattern or unsupported
1718      * character encoding
1719      */
1720     protected static String decode(char[] component, String charset)
1721         throws URIException {
1722 
1723         // unescape uri characters to octets
1724         if (component == null) {  
1725             return null;
1726         }
1727 
1728         byte[] octets;
1729         try {
1730             octets = new String(component).getBytes(charset);
1731         } catch (UnsupportedEncodingException error) {
1732             throw new URIException(URIException.UNSUPPORTED_ENCODING,
1733                     "not supported " + charset + " encoding");
1734         }
1735         int length = octets.length;
1736         int oi = 0; // output index
1737         for (int ii = 0; ii < length; oi++) {
1738             byte aByte = (byte) octets[ii++];
1739             if (aByte == '%' && ii + 2 <= length)  {
1740                 byte high = (byte) Character.digit((char) octets[ii++], 16);
1741                 byte low = (byte) Character.digit((char) octets[ii++], 16);
1742                 if (high == -1 || low == -1) {
1743                     throw new URIException(URIException.ESCAPING,
1744                             "incomplete trailing escape pattern");
1745                             
1746                 }
1747                 aByte = (byte) ((high << 4) + low);
1748             }
1749             octets[oi] = (byte) aByte;
1750         }
1751 
1752         String result;
1753         try {
1754             result = new String(octets, 0, oi, charset);
1755         } catch (UnsupportedEncodingException error) {
1756             throw new URIException(URIException.UNSUPPORTED_ENCODING,
1757                     "not supported " + charset + " encoding");
1758         }
1759 
1760         return result;
1761     }
1762 
1763 
1764     /***
1765      * Pre-validate the unescaped URI string within a specific component.
1766      *
1767      * @param component the component string within the component
1768      * @param disallowed those characters disallowed within the component
1769      * @return if true, it doesn't have the disallowed characters
1770      * if false, the component is undefined or an incorrect one
1771      */
1772     protected boolean prevalidate(String component, BitSet disallowed) {
1773         // prevalidate the given component by disallowed characters
1774         if (component == null) {
1775             return false; // undefined
1776         }
1777         char[] target = component.toCharArray();
1778         for (int i = 0; i < target.length; i++) {
1779             if (disallowed.get(target[i])) {
1780                 return false;
1781             }
1782         }
1783         return true;
1784     }
1785 
1786 
1787     /***
1788      * Validate the URI characters within a specific component.
1789      * The component must be performed after escape encoding. Or it doesn't
1790      * include escaped characters.
1791      *
1792      * @param component the characters sequence within the component
1793      * @param generous those characters that are allowed within a component
1794      * @return if true, it's the correct URI character sequence
1795      */
1796     protected boolean validate(char[] component, BitSet generous) {
1797         // validate each component by generous characters
1798         return validate(component, 0, -1, generous);
1799     }
1800 
1801 
1802     /***
1803      * Validate the URI characters within a specific component.
1804      * The component must be performed after escape encoding. Or it doesn't
1805      * include escaped characters.
1806      * <p>
1807      * It's not that much strict, generous.  The strict validation might be 
1808      * performed before being called this method.
1809      *
1810      * @param component the characters sequence within the component
1811      * @param soffset the starting offset of the given component
1812      * @param eoffset the ending offset of the given component
1813      * if -1, it means the length of the component
1814      * @param generous those characters that are allowed within a component
1815      * @return if true, it's the correct URI character sequence
1816      */
1817     protected boolean validate(char[] component, int soffset, int eoffset,
1818             BitSet generous) {
1819         // validate each component by generous characters
1820         if (eoffset == -1) {
1821             eoffset = component.length - 1;
1822         }
1823         for (int i = soffset; i <= eoffset; i++) {
1824             if (!generous.get(component[i])) { 
1825                 return false;
1826             }
1827         }
1828         return true;
1829     }
1830 
1831 
1832     /***
1833      * In order to avoid any possilbity of conflict with non-ASCII characters,
1834      * Parse a URI reference as a <code>String</code> with the character
1835      * encoding of the local system or the document.
1836      * <p>
1837      * The following line is the regular expression for breaking-down a URI
1838      * reference into its components.
1839      * <p><blockquote><pre>
1840      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1841      *    12            3  4          5       6  7        8 9
1842      * </pre></blockquote><p>
1843      * For example, matching the above expression to
1844      *   http://jakarta.apache.org/ietf/uri/#Related
1845      * results in the following subexpression matches:
1846      * <p><blockquote><pre>
1847      *               $1 = http:
1848      *  scheme    =  $2 = http
1849      *               $3 = //jakarta.apache.org
1850      *  authority =  $4 = jakarta.apache.org
1851      *  path      =  $5 = /ietf/uri/
1852      *               $6 = <undefined>
1853      *  query     =  $7 = <undefined>
1854      *               $8 = #Related
1855      *  fragment  =  $9 = Related
1856      * </pre></blockquote><p>
1857      *
1858      * @param original the original character sequence
1859      * @param escaped <code>true</code> if <code>original</code> is escaped
1860      * @throws URIException If an error occurs.
1861      */
1862     protected void parseUriReference(String original, boolean escaped)
1863         throws URIException {
1864 
1865         // validate and contruct the URI character sequence
1866         if (original == null) {
1867             throw new URIException("URI-Reference required");
1868         }
1869 
1870         /* @
1871          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1872          */
1873         String tmp = original.trim();
1874         
1875         /*
1876          * The length of the string sequence of characters.
1877          * It may not be equal to the length of the byte array.
1878          */
1879         int length = tmp.length();
1880 
1881         /*
1882          * Remove the delimiters like angle brackets around an URI.
1883          */
1884         if (length > 0) {
1885             char[] firstDelimiter = { tmp.charAt(0) };
1886             if (validate(firstDelimiter, delims)) {
1887                 if (length >= 2) {
1888                     char[] lastDelimiter = { tmp.charAt(length - 1) };
1889                     if (validate(lastDelimiter, delims)) {
1890                         tmp = tmp.substring(1, length - 1);
1891                         length = length - 2;
1892                     }
1893                 }
1894             }
1895         }
1896 
1897         /*
1898          * The starting index
1899          */
1900         int from = 0;
1901 
1902         /*
1903          * The test flag whether the URI is started from the path component.
1904          */
1905         boolean isStartedFromPath = false;
1906         int atColon = tmp.indexOf(':');
1907         int atSlash = tmp.indexOf('/');
1908         if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1909             isStartedFromPath = true;
1910         }
1911 
1912         /*
1913          * <p><blockquote><pre>
1914          *     @@@@@@@@
1915          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1916          * </pre></blockquote><p>
1917          */
1918         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1919         if (at == -1) { 
1920             at = 0;
1921         }
1922 
1923         /*
1924          * Parse the scheme.
1925          * <p><blockquote><pre>
1926          *  scheme    =  $2 = http
1927          *              @
1928          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1929          * </pre></blockquote><p>
1930          */
1931         if (at < length && tmp.charAt(at) == ':') {
1932             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1933             if (validate(target, scheme)) {
1934                 _scheme = target;
1935             } else {
1936                 throw new URIException("incorrect scheme");
1937             }
1938             from = ++at;
1939         }
1940 
1941         /*
1942          * Parse the authority component.
1943          * <p><blockquote><pre>
1944          *  authority =  $4 = jakarta.apache.org
1945          *                  @@
1946          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1947          * </pre></blockquote><p>
1948          */
1949         // Reset flags
1950         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1951         if (0 <= at && at < length && tmp.charAt(at) == '/') {
1952             // Set flag
1953             _is_hier_part = true;
1954             if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1955                 // the temporary index to start the search from
1956                 int next = indexFirstOf(tmp, "/?#", at + 2);
1957                 if (next == -1) {
1958                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 
1959                         : tmp.length();
1960                 }
1961                 parseAuthority(tmp.substring(at + 2, next), escaped);
1962                 from = at = next;
1963                 // Set flag
1964                 _is_net_path = true;
1965             }
1966             if (from == at) {
1967                 // Set flag
1968                 _is_abs_path = true;
1969             }
1970         }
1971 
1972         /*
1973          * Parse the path component.
1974          * <p><blockquote><pre>
1975          *  path      =  $5 = /ietf/uri/
1976          *                                @@@@@@
1977          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1978          * </pre></blockquote><p>
1979          */
1980         if (from < length) {
1981             // rel_path = rel_segment [ abs_path ]
1982             int next = indexFirstOf(tmp, "?#", from);
1983             if (next == -1) {
1984                 next = tmp.length();
1985             }
1986             if (!_is_abs_path) {
1987                 if (!escaped 
1988                     && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
1989                     || escaped 
1990                     && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1991                     // Set flag
1992                     _is_rel_path = true;
1993                 } else if (!escaped 
1994                     && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
1995                     || escaped 
1996                     && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
1997                     // Set flag
1998                     _is_opaque_part = true;
1999                 } else {
2000                     // the path component may be empty
2001                     _path = null;
2002                 }
2003             }
2004             if (escaped) {
2005                 setRawPath(tmp.substring(from, next).toCharArray());
2006             } else {
2007                 setPath(tmp.substring(from, next));
2008             }
2009             at = next;
2010         }
2011 
2012         // set the charset to do escape encoding
2013         String charset = getProtocolCharset();
2014 
2015         /*
2016          * Parse the query component.
2017          * <p><blockquote><pre>
2018          *  query     =  $7 = <undefined>
2019          *                                        @@@@@@@@@
2020          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2021          * </pre></blockquote><p>
2022          */
2023         if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2024             int next = tmp.indexOf('#', at + 1);
2025             if (next == -1) {
2026                 next = tmp.length();
2027             }
2028             _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() 
2029                 : encode(tmp.substring(at + 1, next), allowed_query, charset);
2030             at = next;
2031         }
2032 
2033         /*
2034          * Parse the fragment component.
2035          * <p><blockquote><pre>
2036          *  fragment  =  $9 = Related
2037          *                                                   @@@@@@@@
2038          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2039          * </pre></blockquote><p>
2040          */
2041         if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2042             if (at + 1 == length) { // empty fragment
2043                 _fragment = "".toCharArray();
2044             } else {
2045                 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
2046                     : encode(tmp.substring(at + 1), allowed_fragment, charset);
2047             }
2048         }
2049 
2050         // set this URI.
2051         setURI();
2052     }
2053 
2054 
2055     /***
2056      * Get the earlier index that to be searched for the first occurrance in
2057      * one of any of the given string.
2058      *
2059      * @param s the string to be indexed
2060      * @param delims the delimiters used to index
2061      * @return the earlier index if there are delimiters
2062      */
2063     protected int indexFirstOf(String s, String delims) {
2064         return indexFirstOf(s, delims, -1);
2065     }
2066 
2067 
2068     /***
2069      * Get the earlier index that to be searched for the first occurrance in
2070      * one of any of the given string.
2071      *
2072      * @param s the string to be indexed
2073      * @param delims the delimiters used to index
2074      * @param offset the from index
2075      * @return the earlier index if there are delimiters
2076      */
2077     protected int indexFirstOf(String s, String delims, int offset) {
2078         if (s == null || s.length() == 0) {
2079             return -1;
2080         }
2081         if (delims == null || delims.length() == 0) {
2082             return -1;
2083         }
2084         // check boundaries
2085         if (offset < 0) {
2086             offset = 0;
2087         } else if (offset > s.length()) {
2088             return -1;
2089         }
2090         // s is never null
2091         int min = s.length();
2092         char[] delim = delims.toCharArray();
2093         for (int i = 0; i < delim.length; i++) {
2094             int at = s.indexOf(delim[i], offset);
2095             if (at >= 0 && at < min) {
2096                 min = at;
2097             }
2098         }
2099         return (min == s.length()) ? -1 : min;
2100     }
2101 
2102 
2103     /***
2104      * Get the earlier index that to be searched for the first occurrance in
2105      * one of any of the given array.
2106      *
2107      * @param s the character array to be indexed
2108      * @param delim the delimiter used to index
2109      * @return the ealier index if there are a delimiter
2110      */
2111     protected int indexFirstOf(char[] s, char delim) {
2112         return indexFirstOf(s, delim, 0);
2113     }
2114 
2115 
2116     /***
2117      * Get the earlier index that to be searched for the first occurrance in
2118      * one of any of the given array.
2119      *
2120      * @param s the character array to be indexed
2121      * @param delim the delimiter used to index
2122      * @param offset The offset.
2123      * @return the ealier index if there is a delimiter
2124      */
2125     protected int indexFirstOf(char[] s, char delim, int offset) {
2126         if (s == null || s.length == 0) {
2127             return -1;
2128         }
2129         // check boundaries
2130         if (offset < 0) {
2131             offset = 0;
2132         } else if (offset > s.length) {
2133             return -1;
2134         }
2135         for (int i = offset; i < s.length; i++) {
2136             if (s[i] == delim) {
2137                 return i;
2138             }
2139         }
2140         return -1;
2141     }
2142 
2143 
2144     /***
2145      * Parse the authority component.
2146      *
2147      * @param original the original character sequence of authority component
2148      * @param escaped <code>true</code> if <code>original</code> is escaped
2149      * @throws URIException If an error occurs.
2150      */
2151     protected void parseAuthority(String original, boolean escaped)
2152         throws URIException {
2153 
2154         // Reset flags
2155         _is_reg_name = _is_server =
2156         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2157 
2158         // set the charset to do escape encoding
2159         String charset = getProtocolCharset();
2160 
2161         boolean hasPort = true;
2162         int from = 0;
2163         int next = original.indexOf('@');
2164         if (next != -1) { // neither -1 and 0
2165             // each protocol extented from URI supports the specific userinfo
2166             _userinfo = (escaped) ? original.substring(0, next).toCharArray() 
2167                 : encode(original.substring(0, next), allowed_userinfo,
2168                         charset);
2169             from = next + 1;
2170         }
2171         next = original.indexOf('[', from);
2172         if (next >= from) {
2173             next = original.indexOf(']', from);
2174             if (next == -1) {
2175                 throw new URIException(URIException.PARSING, "IPv6reference");
2176             } else {
2177                 next++;
2178             }
2179             // In IPv6reference, '[', ']' should be excluded
2180             _host = (escaped) ? original.substring(from, next).toCharArray() 
2181                 : encode(original.substring(from, next), allowed_IPv6reference,
2182                         charset);
2183             // Set flag
2184             _is_IPv6reference = true;
2185         } else { // only for !_is_IPv6reference
2186             next = original.indexOf(':', from);
2187             if (next == -1) {
2188                 next = original.length();
2189                 hasPort = false;
2190             }
2191             // REMINDME: it doesn't need the pre-validation
2192             _host = original.substring(from, next).toCharArray();
2193             if (validate(_host, IPv4address)) {
2194                 // Set flag
2195                 _is_IPv4address = true;
2196             } else if (validate(_host, hostname)) {
2197                 // Set flag
2198                 _is_hostname = true;
2199             } else {
2200                 // Set flag
2201                 _is_reg_name = true;
2202             }
2203         }
2204         if (_is_reg_name) {
2205             // Reset flags for a server-based naming authority
2206             _is_server = _is_hostname = _is_IPv4address =
2207             _is_IPv6reference = false;
2208             // set a registry-based naming authority
2209             _authority = (escaped) ? original.toString().toCharArray() 
2210                 : encode(original.toString(), allowed_reg_name, charset);
2211         } else {
2212             if (original.length() - 1 > next && hasPort 
2213                 && original.charAt(next) == ':') { // not empty
2214                 from = next + 1;
2215                 try {
2216                     _port = Integer.parseInt(original.substring(from));
2217                 } catch (NumberFormatException error) {
2218                     throw new URIException(URIException.PARSING,
2219                             "invalid port number");
2220                 }
2221             }
2222             // set a server-based naming authority
2223             StringBuffer buf = new StringBuffer();
2224             if (_userinfo != null) { // has_userinfo
2225                 buf.append(_userinfo);
2226                 buf.append('@');
2227             }
2228             if (_host != null) {
2229                 buf.append(_host);
2230                 if (_port != -1) {
2231                     buf.append(':');
2232                     buf.append(_port);
2233                 }
2234             }
2235             _authority = buf.toString().toCharArray();
2236             // Set flag
2237             _is_server = true;
2238         }
2239     }
2240 
2241 
2242     /***
2243      * Once it's parsed successfully, set this URI.
2244      *
2245      * @see #getRawURI
2246      */
2247     protected void setURI() {
2248         // set _uri
2249         StringBuffer buf = new StringBuffer();
2250         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2251         if (_scheme != null) {
2252             buf.append(_scheme);
2253             buf.append(':');
2254         }
2255         if (_is_net_path) {
2256             buf.append("//");
2257             if (_authority != null) { // has_authority
2258                 if (_userinfo != null) { // by default, remove userinfo part
2259                     if (_host != null) {
2260                         buf.append(_host);
2261                         if (_port != -1) {
2262                             buf.append(':');
2263                             buf.append(_port);
2264                         }
2265                     }
2266                 } else {
2267                     buf.append(_authority);
2268                 }
2269             }
2270         }
2271         if (_opaque != null && _is_opaque_part) {
2272             buf.append(_opaque);
2273         } else if (_path != null) {
2274             // _is_hier_part or _is_relativeURI
2275             if (_path.length != 0) {
2276                 buf.append(_path);
2277             }
2278         }
2279         if (_query != null) { // has_query
2280             buf.append('?');
2281             buf.append(_query);
2282         }
2283         // ignore the fragment identifier
2284         _uri = buf.toString().toCharArray();
2285         hash = 0;
2286     }
2287 
2288     // ----------------------------------------------------------- Test methods
2289   
2290 
2291     /***
2292      * Tell whether or not this URI is absolute.
2293      *
2294      * @return true iif this URI is absoluteURI
2295      */
2296     public boolean isAbsoluteURI() {
2297         return (_scheme != null);
2298     }
2299   
2300 
2301     /***
2302      * Tell whether or not this URI is relative.
2303      *
2304      * @return true iif this URI is relativeURI
2305      */
2306     public boolean isRelativeURI() {
2307         return (_scheme == null);
2308     }
2309 
2310 
2311     /***
2312      * Tell whether or not the absoluteURI of this URI is hier_part.
2313      *
2314      * @return true iif the absoluteURI is hier_part
2315      */
2316     public boolean isHierPart() {
2317         return _is_hier_part;
2318     }
2319 
2320 
2321     /***
2322      * Tell whether or not the absoluteURI of this URI is opaque_part.
2323      *
2324      * @return true iif the absoluteURI is opaque_part
2325      */
2326     public boolean isOpaquePart() {
2327         return _is_opaque_part;
2328     }
2329 
2330 
2331     /***
2332      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2333      * It's the same function as the has_authority() method.
2334      *
2335      * @return true iif the relativeURI or heir_part is net_path
2336      * @see #hasAuthority
2337      */
2338     public boolean isNetPath() {
2339         return _is_net_path || (_authority != null);
2340     }
2341 
2342 
2343     /***
2344      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2345      *
2346      * @return true iif the relativeURI or hier_part is abs_path
2347      */
2348     public boolean isAbsPath() {
2349         return _is_abs_path;
2350     }
2351 
2352 
2353     /***
2354      * Tell whether or not the relativeURI of this URI is rel_path.
2355      *
2356      * @return true iif the relativeURI is rel_path
2357      */
2358     public boolean isRelPath() {
2359         return _is_rel_path;
2360     }
2361 
2362 
2363     /***
2364      * Tell whether or not this URI has authority.
2365      * It's the same function as the is_net_path() method.
2366      *
2367      * @return true iif this URI has authority
2368      * @see #isNetPath
2369      */
2370     public boolean hasAuthority() {
2371         return (_authority != null) || _is_net_path;
2372     }
2373 
2374     /***
2375      * Tell whether or not the authority component of this URI is reg_name.
2376      *
2377      * @return true iif the authority component is reg_name
2378      */
2379     public boolean isRegName() {
2380         return _is_reg_name;
2381     }
2382   
2383 
2384     /***
2385      * Tell whether or not the authority component of this URI is server.
2386      *
2387      * @return true iif the authority component is server
2388      */
2389     public boolean isServer() {
2390         return _is_server;
2391     }
2392   
2393 
2394     /***
2395      * Tell whether or not this URI has userinfo.
2396      *
2397      * @return true iif this URI has userinfo
2398      */
2399     public boolean hasUserinfo() {
2400         return (_userinfo != null);
2401     }
2402   
2403 
2404     /***
2405      * Tell whether or not the host part of this URI is hostname.
2406      *
2407      * @return true iif the host part is hostname
2408      */
2409     public boolean isHostname() {
2410         return _is_hostname;
2411     }
2412 
2413 
2414     /***
2415      * Tell whether or not the host part of this URI is IPv4address.
2416      *
2417      * @return true iif the host part is IPv4address
2418      */
2419     public boolean isIPv4address() {
2420         return _is_IPv4address;
2421     }
2422 
2423 
2424     /***
2425      * Tell whether or not the host part of this URI is IPv6reference.
2426      *
2427      * @return true iif the host part is IPv6reference
2428      */
2429     public boolean isIPv6reference() {
2430         return _is_IPv6reference;
2431     }
2432 
2433 
2434     /***
2435      * Tell whether or not this URI has query.
2436      *
2437      * @return true iif this URI has query
2438      */
2439     public boolean hasQuery() {
2440         return (_query != null);
2441     }
2442    
2443 
2444     /***
2445      * Tell whether or not this URI has fragment.
2446      *
2447      * @return true iif this URI has fragment
2448      */
2449     public boolean hasFragment() {
2450         return (_fragment != null);
2451     }
2452    
2453    
2454     // ---------------------------------------------------------------- Charset
2455 
2456 
2457     /***
2458      * Set the default charset of the protocol.
2459      * <p>
2460      * The character set used to store files SHALL remain a local decision and
2461      * MAY depend on the capability of local operating systems. Prior to the
2462      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2463      * and UTF-8 encoded. This approach, while allowing international exchange
2464      * of URIs, will still allow backward compatibility with older systems
2465      * because the code set positions for ASCII characters are identical to the
2466      * one byte sequence in UTF-8.
2467      * <p>
2468      * An individual URI scheme may require a single charset, define a default
2469      * charset, or provide a way to indicate the charset used.
2470      *
2471      * <p>
2472      * Always all the time, the setter method is always succeeded and throws
2473      * <code>DefaultCharsetChanged</code> exception.
2474      *
2475      * So API programmer must follow the following way:
2476      * <code><pre>
2477      *  import org.apache.util.URI$DefaultCharsetChanged;
2478      *      .
2479      *      .
2480      *      .
2481      *  try {
2482      *      URI.setDefaultProtocolCharset("UTF-8");
2483      *  } catch (DefaultCharsetChanged cc) {
2484      *      // CASE 1: the exception could be ignored, when it is set by user
2485      *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2486      *      // CASE 2: let user know the default protocol charset changed
2487      *      } else {
2488      *      // CASE 2: let user know the default document charset changed
2489      *      }
2490      *  }
2491      *  </pre></code>
2492      *
2493      * The API programmer is responsible to set the correct charset.
2494      * And each application should remember its own charset to support.
2495      *
2496      * @param charset the default charset for each protocol
2497      * @throws DefaultCharsetChanged default charset changed
2498      */
2499     public static void setDefaultProtocolCharset(String charset) 
2500         throws DefaultCharsetChanged {
2501             
2502         defaultProtocolCharset = charset;
2503         throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2504                 "the default protocol charset changed");
2505     }
2506 
2507 
2508     /***
2509      * Get the default charset of the protocol.
2510      * <p>
2511      * An individual URI scheme may require a single charset, define a default
2512      * charset, or provide a way to indicate the charset used.
2513      * <p>
2514      * To work globally either requires support of a number of character sets
2515      * and to be able to convert between them, or the use of a single preferred
2516      * character set.
2517      * For support of global compatibility it is STRONGLY RECOMMENDED that
2518      * clients and servers use UTF-8 encoding when exchanging URIs.
2519      *
2520      * @return the default charset string
2521      */
2522     public static String getDefaultProtocolCharset() {
2523         return defaultProtocolCharset;
2524     }
2525 
2526 
2527     /***
2528      * Get the protocol charset used by this current URI instance.
2529      * It was set by the constructor for this instance. If it was not set by
2530      * contructor, it will return the default protocol charset.
2531      *
2532      * @return the protocol charset string
2533      * @see #getDefaultProtocolCharset
2534      */
2535     public String getProtocolCharset() {
2536         return (protocolCharset != null) 
2537             ? protocolCharset 
2538             : defaultProtocolCharset;
2539     }
2540 
2541 
2542     /***
2543      * Set the default charset of the document.
2544      * <p>
2545      * Notice that it will be possible to contain mixed characters (e.g.
2546      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2547      * display of these character sets, the protocol charset could be simply
2548      * used again. Because it's not yet implemented that the insertion of BIDI
2549      * control characters at different points during composition is extracted.
2550      * <p>
2551      *
2552      * Always all the time, the setter method is always succeeded and throws
2553      * <code>DefaultCharsetChanged</code> exception.
2554      *
2555      * So API programmer must follow the following way:
2556      * <code><pre>
2557      *  import org.apache.util.URI$DefaultCharsetChanged;
2558      *      .
2559      *      .
2560      *      .
2561      *  try {
2562      *      URI.setDefaultDocumentCharset("EUC-KR");
2563      *  } catch (DefaultCharsetChanged cc) {
2564      *      // CASE 1: the exception could be ignored, when it is set by user
2565      *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2566      *      // CASE 2: let user know the default document charset changed
2567      *      } else {
2568      *      // CASE 2: let user know the default protocol charset changed
2569      *      }
2570      *  }
2571      *  </pre></code>
2572      *
2573      * The API programmer is responsible to set the correct charset.
2574      * And each application should remember its own charset to support.
2575      *
2576      * @param charset the default charset for the document
2577      * @throws DefaultCharsetChanged default charset changed
2578      */
2579     public static void setDefaultDocumentCharset(String charset) 
2580         throws DefaultCharsetChanged {
2581             
2582         defaultDocumentCharset = charset;
2583         throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2584                 "the default document charset changed");
2585     }
2586 
2587 
2588     /***
2589      * Get the recommended default charset of the document.
2590      *
2591      * @return the default charset string
2592      */
2593     public static String getDefaultDocumentCharset() {
2594         return defaultDocumentCharset;
2595     }
2596 
2597 
2598     /***
2599      * Get the default charset of the document by locale.
2600      *
2601      * @return the default charset string by locale
2602      */
2603     public static String getDefaultDocumentCharsetByLocale() {
2604         return defaultDocumentCharsetByLocale;
2605     }
2606 
2607 
2608     /***
2609      * Get the default charset of the document by platform.
2610      *
2611      * @return the default charset string by platform
2612      */
2613     public static String getDefaultDocumentCharsetByPlatform() {
2614         return defaultDocumentCharsetByPlatform;
2615     }
2616 
2617     // ------------------------------------------------------------- The scheme
2618 
2619     /***
2620      * Get the scheme.
2621      *
2622      * @return the scheme
2623      */
2624     public char[] getRawScheme() {
2625         return _scheme;
2626     }
2627 
2628 
2629     /***
2630      * Get the scheme.
2631      *
2632      * @return the scheme
2633      * null if undefined scheme
2634      */
2635     public String getScheme() {
2636         return (_scheme == null) ? null : new String(_scheme);
2637     }
2638 
2639     // ---------------------------------------------------------- The authority
2640 
2641     /***
2642      * Set the authority.  It can be one type of server, hostport, hostname,
2643      * IPv4address, IPv6reference and reg_name.
2644      * <p><blockquote><pre>
2645      *   authority     = server | reg_name
2646      * </pre></blockquote><p>
2647      *
2648      * @param escapedAuthority the raw escaped authority
2649      * @throws URIException If {@link 
2650      * #parseAuthority(java.lang.String,boolean)} fails
2651      * @throws NullPointerException null authority
2652      */
2653     public void setRawAuthority(char[] escapedAuthority) 
2654         throws URIException, NullPointerException {
2655             
2656         parseAuthority(new String(escapedAuthority), true);
2657         setURI();
2658     }
2659 
2660 
2661     /***
2662      * Set the authority.  It can be one type of server, hostport, hostname,
2663      * IPv4address, IPv6reference and reg_name.
2664      * Note that there is no setAuthority method by the escape encoding reason.
2665      *
2666      * @param escapedAuthority the escaped authority string
2667      * @throws URIException If {@link 
2668      * #parseAuthority(java.lang.String,boolean)} fails
2669      */
2670     public void setEscapedAuthority(String escapedAuthority)
2671         throws URIException {
2672 
2673         parseAuthority(escapedAuthority, true);
2674         setURI();
2675     }
2676 
2677 
2678     /***
2679      * Get the raw-escaped authority.
2680      *
2681      * @return the raw-escaped authority
2682      */
2683     public char[] getRawAuthority() {
2684         return _authority;
2685     }
2686 
2687 
2688     /***
2689      * Get the escaped authority.
2690      *
2691      * @return the escaped authority
2692      */
2693     public String getEscapedAuthority() {
2694         return (_authority == null) ? null : new String(_authority);
2695     }
2696 
2697 
2698     /***
2699      * Get the authority.
2700      *
2701      * @return the authority
2702      * @throws URIException If {@link #decode} fails
2703      */
2704     public String getAuthority() throws URIException {
2705         return (_authority == null) ? null : decode(_authority,
2706                 getProtocolCharset());
2707     }
2708 
2709     // ----------------------------------------------------------- The userinfo
2710 
2711     /***
2712      * Get the raw-escaped userinfo.
2713      *
2714      * @return the raw-escaped userinfo
2715      * @see #getAuthority
2716      */
2717     public char[] getRawUserinfo() {
2718         return _userinfo;
2719     }
2720 
2721 
2722     /***
2723      * Get the escaped userinfo.
2724      *
2725      * @return the escaped userinfo
2726      * @see #getAuthority
2727      */
2728     public String getEscapedUserinfo() {
2729         return (_userinfo == null) ? null : new String(_userinfo);
2730     }
2731 
2732 
2733     /***
2734      * Get the userinfo.
2735      *
2736      * @return the userinfo
2737      * @throws URIException If {@link #decode} fails
2738      * @see #getAuthority
2739      */
2740     public String getUserinfo() throws URIException {
2741         return (_userinfo == null) ? null : decode(_userinfo,
2742                 getProtocolCharset());
2743     }
2744 
2745     // --------------------------------------------------------------- The host
2746 
2747     /***
2748      * Get the host.
2749      * <p><blockquote><pre>
2750      *   host          = hostname | IPv4address | IPv6reference
2751      * </pre></blockquote><p>
2752      *
2753      * @return the host
2754      * @see #getAuthority
2755      */
2756     public char[] getRawHost() {
2757         return _host;
2758     }
2759 
2760 
2761     /***
2762      * Get the host.
2763      * <p><blockquote><pre>
2764      *   host          = hostname | IPv4address | IPv6reference
2765      * </pre></blockquote><p>
2766      *
2767      * @return the host
2768      * @throws URIException If {@link #decode} fails
2769      * @see #getAuthority
2770      */
2771     public String getHost() throws URIException {
2772         return decode(_host, getProtocolCharset());
2773     }
2774 
2775     // --------------------------------------------------------------- The port
2776 
2777     /***
2778      * Get the port.  In order to get the specfic default port, the specific
2779      * protocol-supported class extended from the URI class should be used.
2780      * It has the server-based naming authority.
2781      *
2782      * @return the port
2783      * if -1, it has the default port for the scheme or the server-based
2784      * naming authority is not supported in the specific URI.
2785      */
2786     public int getPort() {
2787         return _port;
2788     }
2789 
2790     // --------------------------------------------------------------- The path
2791 
2792     /***
2793      * Set the raw-escaped path.
2794      *
2795      * @param escapedPath the path character sequence
2796      * @throws URIException encoding error or not proper for initial instance
2797      * @see #encode
2798      */
2799     public void setRawPath(char[] escapedPath) throws URIException {
2800         if (escapedPath == null || escapedPath.length == 0) {
2801             _path = _opaque = escapedPath;
2802             setURI();
2803             return;
2804         }
2805         // remove the fragment identifier
2806         escapedPath = removeFragmentIdentifier(escapedPath);
2807         if (_is_net_path || _is_abs_path) {
2808             if (escapedPath[0] != '/') {
2809                 throw new URIException(URIException.PARSING,
2810                         "not absolute path");
2811             }
2812             if (!validate(escapedPath, abs_path)) {
2813                 throw new URIException(URIException.ESCAPING,
2814                         "escaped absolute path not valid");
2815             }
2816             _path = escapedPath;
2817         } else if (_is_rel_path) {
2818             int at = indexFirstOf(escapedPath, '/');
2819             if (at == 0) {
2820                 throw new URIException(URIException.PARSING, "incorrect path");
2821             }
2822             if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 
2823                 && !validate(escapedPath, at, -1, abs_path) 
2824                 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2825             
2826                 throw new URIException(URIException.ESCAPING,
2827                         "escaped relative path not valid");
2828             }
2829             _path = escapedPath;
2830         } else if (_is_opaque_part) {
2831             if (!uric_no_slash.get(escapedPath[0]) 
2832                 && !validate(escapedPath, 1, -1, uric)) {
2833                 throw new URIException(URIException.ESCAPING,
2834                     "escaped opaque part not valid");
2835             }
2836             _opaque = escapedPath;
2837         } else {
2838             throw new URIException(URIException.PARSING, "incorrect path");
2839         }
2840         setURI();
2841     }
2842 
2843 
2844     /***
2845      * Set the escaped path.
2846      *
2847      * @param escapedPath the escaped path string
2848      * @throws URIException encoding error or not proper for initial instance
2849      * @see #encode
2850      */
2851     public void setEscapedPath(String escapedPath) throws URIException {
2852         if (escapedPath == null) {
2853             _path = _opaque = null;
2854             setURI();
2855             return;
2856         }
2857         setRawPath(escapedPath.toCharArray());
2858     }
2859 
2860 
2861     /***
2862      * Set the path.
2863      *
2864      * @param path the path string
2865      * @throws URIException set incorrectly or fragment only
2866      * @see #encode
2867      */
2868     public void setPath(String path) throws URIException {
2869 
2870         if (path == null || path.length() == 0) {
2871             _path = _opaque = (path == null) ? null : path.toCharArray();
2872             setURI();
2873             return;
2874         }
2875         // set the charset to do escape encoding
2876         String charset = getProtocolCharset();
2877 
2878         if (_is_net_path || _is_abs_path) {
2879             _path = encode(path, allowed_abs_path, charset);
2880         } else if (_is_rel_path) {
2881             StringBuffer buff = new StringBuffer(path.length());
2882             int at = path.indexOf('/');
2883             if (at == 0) { // never 0
2884                 throw new URIException(URIException.PARSING,
2885                         "incorrect relative path");
2886             }
2887             if (at > 0) {
2888                 buff.append(encode(path.substring(0, at), allowed_rel_path,
2889                             charset));
2890                 buff.append(encode(path.substring(at), allowed_abs_path,
2891                             charset));
2892             } else {
2893                 buff.append(encode(path, allowed_rel_path, charset));
2894             }
2895             _path = buff.toString().toCharArray();
2896         } else if (_is_opaque_part) {
2897             StringBuffer buf = new StringBuffer();
2898             buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2899             buf.insert(1, encode(path.substring(1), uric, charset));
2900             _opaque = buf.toString().toCharArray();
2901         } else {
2902             throw new URIException(URIException.PARSING, "incorrect path");
2903         }
2904         setURI();
2905     }
2906 
2907 
2908     /***
2909      * Resolve the base and relative path.
2910      *
2911      * @param basePath a character array of the basePath
2912      * @param relPath a character array of the relPath
2913      * @return the resolved path
2914      * @throws URIException no more higher path level to be resolved
2915      */
2916     protected char[] resolvePath(char[] basePath, char[] relPath)
2917         throws URIException {
2918 
2919         // REMINDME: paths are never null
2920         String base = (basePath == null) ? "" : new String(basePath);
2921         int at = base.lastIndexOf('/');
2922         if (at != -1) {
2923             basePath = base.substring(0, at + 1).toCharArray();
2924         }
2925         // _path could be empty
2926         if (relPath == null || relPath.length == 0) {
2927             return normalize(basePath);
2928         } else if (relPath[0] == '/') {
2929             return normalize(relPath);
2930         } else {
2931             StringBuffer buff = new StringBuffer(base.length() 
2932                 + relPath.length);
2933             buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2934             buff.append(relPath);
2935             return normalize(buff.toString().toCharArray());
2936         }
2937     }
2938 
2939 
2940     /***
2941      * Get the raw-escaped current hierarchy level in the given path.
2942      * If the last namespace is a collection, the slash mark ('/') should be
2943      * ended with at the last character of the path string.
2944      *
2945      * @param path the path
2946      * @return the current hierarchy level
2947      * @throws URIException no hierarchy level
2948      */
2949     protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2950 
2951         if (_is_opaque_part) {
2952             throw new URIException(URIException.PARSING, "no hierarchy level");
2953         }
2954         if (path == null) {
2955             throw new URIException(URIException.PARSING, "empty path");
2956         }
2957         String buff = new String(path);
2958         int first = buff.indexOf('/');
2959         int last = buff.lastIndexOf('/');
2960         if (last == 0) {
2961             return rootPath;
2962         } else if (first != last && last != -1) {
2963             return buff.substring(0, last).toCharArray();
2964         }
2965         // FIXME: it could be a document on the server side
2966         return path;
2967     }
2968 
2969 
2970     /***
2971      * Get the raw-escaped current hierarchy level.
2972      *
2973      * @return the raw-escaped current hierarchy level
2974      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2975      */
2976     public char[] getRawCurrentHierPath() throws URIException {
2977         return (_path == null) ? null : getRawCurrentHierPath(_path);
2978     }
2979  
2980 
2981     /***
2982      * Get the escaped current hierarchy level.
2983      *
2984      * @return the escaped current hierarchy level
2985      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2986      */
2987     public String getEscapedCurrentHierPath() throws URIException {
2988         char[] path = getRawCurrentHierPath();
2989         return (path == null) ? null : new String(path);
2990     }
2991  
2992 
2993     /***
2994      * Get the current hierarchy level.
2995      *
2996      * @return the current hierarchy level
2997      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2998      * @see #decode
2999      */
3000     public String getCurrentHierPath() throws URIException {
3001         char[] path = getRawCurrentHierPath();
3002         return (path == null) ? null : decode(path, getProtocolCharset());
3003     }
3004 
3005 
3006     /***
3007      * Get the level above the this hierarchy level.
3008      *
3009      * @return the raw above hierarchy level
3010      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3011      */
3012     public char[] getRawAboveHierPath() throws URIException {
3013         char[] path = getRawCurrentHierPath();
3014         return (path == null) ? null : getRawCurrentHierPath(path);
3015     }
3016 
3017 
3018     /***
3019      * Get the level above the this hierarchy level.
3020      *
3021      * @return the raw above hierarchy level
3022      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3023      */
3024     public String getEscapedAboveHierPath() throws URIException {
3025         char[] path = getRawAboveHierPath();
3026         return (path == null) ? null : new String(path);
3027     }
3028 
3029 
3030     /***
3031      * Get the level above the this hierarchy level.
3032      *
3033      * @return the above hierarchy level
3034      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3035      * @see #decode
3036      */
3037     public String getAboveHierPath() throws URIException {
3038         char[] path = getRawAboveHierPath();
3039         return (path == null) ? null : decode(path, getProtocolCharset());
3040     }
3041 
3042 
3043     /***
3044      * Get the raw-escaped path.
3045      * <p><blockquote><pre>
3046      *   path          = [ abs_path | opaque_part ]
3047      * </pre></blockquote><p>
3048      *
3049      * @return the raw-escaped path
3050      */
3051     public char[] getRawPath() {
3052         return _is_opaque_part ? _opaque : _path;
3053     }
3054 
3055 
3056     /***
3057      * Get the escaped path.
3058      * <p><blockquote><pre>
3059      *   path          = [ abs_path | opaque_part ]
3060      *   abs_path      = "/"  path_segments 
3061      *   opaque_part   = uric_no_slash *uric
3062      * </pre></blockquote><p>
3063      *
3064      * @return the escaped path string
3065      */
3066     public String getEscapedPath() {
3067         char[] path = getRawPath();
3068         return (path == null) ? null : new String(path);
3069     }
3070 
3071 
3072     /***
3073      * Get the path.
3074      * <p><blockquote><pre>
3075      *   path          = [ abs_path | opaque_part ]
3076      * </pre></blockquote><p>
3077      * @return the path string
3078      * @throws URIException If {@link #decode} fails.
3079      * @see #decode
3080      */
3081     public String getPath() throws URIException { 
3082         char[] path =  getRawPath();
3083         return (path == null) ? null : decode(path, getProtocolCharset());
3084     }
3085 
3086 
3087     /***
3088      * Get the raw-escaped basename of the path.
3089      *
3090      * @return the raw-escaped basename
3091      */
3092     public char[] getRawName() {
3093         if (_path == null) { 
3094             return null;
3095         }
3096 
3097         int at = 0;
3098         for (int i = _path.length - 1; i >= 0; i--) {
3099             if (_path[i] == '/') {
3100                 at = i + 1;
3101                 break;
3102             }
3103         }
3104         int len = _path.length - at;
3105         char[] basename =  new char[len];
3106         System.arraycopy(_path, at, basename, 0, len);
3107         return basename;
3108     }
3109 
3110 
3111     /***
3112      * Get the escaped basename of the path.
3113      *
3114      * @return the escaped basename string
3115      */
3116     public String getEscapedName() {
3117         char[] basename = getRawName();
3118         return (basename == null) ? null : new String(basename);
3119     }
3120 
3121 
3122     /***
3123      * Get the basename of the path.
3124      *
3125      * @return the basename string
3126      * @throws URIException incomplete trailing escape pattern or unsupported
3127      * character encoding
3128      * @see #decode
3129      */
3130     public String getName() throws URIException {
3131         char[] basename = getRawName();
3132         return (basename == null) ? null : decode(getRawName(),
3133                 getProtocolCharset());
3134     }
3135 
3136     // ----------------------------------------------------- The path and query 
3137 
3138     /***
3139      * Get the raw-escaped path and query.
3140      *
3141      * @return the raw-escaped path and query
3142      */
3143     public char[] getRawPathQuery() {
3144 
3145         if (_path == null && _query == null) {
3146             return null;
3147         }
3148         StringBuffer buff = new StringBuffer();
3149         if (_path != null) {
3150             buff.append(_path);
3151         }
3152         if (_query != null) {
3153             buff.append('?');
3154             buff.append(_query);
3155         }
3156         return buff.toString().toCharArray();
3157     }
3158 
3159 
3160     /***
3161      * Get the escaped query.
3162      *
3163      * @return the escaped path and query string
3164      */
3165     public String getEscapedPathQuery() {
3166         char[] rawPathQuery = getRawPathQuery();
3167         return (rawPathQuery == null) ? null : new String(rawPathQuery);
3168     }
3169 
3170 
3171     /***
3172      * Get the path and query.
3173      *
3174      * @return the path and query string.
3175      * @throws URIException incomplete trailing escape pattern or unsupported
3176      * character encoding
3177      * @see #decode
3178      */
3179     public String getPathQuery() throws URIException {
3180         char[] rawPathQuery = getRawPathQuery();
3181         return (rawPathQuery == null) ? null : decode(rawPathQuery,
3182                 getProtocolCharset());
3183     }
3184 
3185     // -------------------------------------------------------------- The query 
3186 
3187     /***
3188      * Set the raw-escaped query.
3189      *
3190      * @param escapedQuery the raw-escaped query
3191      * @throws URIException escaped query not valid
3192      */
3193     public void setRawQuery(char[] escapedQuery) throws URIException {
3194         if (escapedQuery == null || escapedQuery.length == 0) {
3195             _query = escapedQuery;
3196             setURI();
3197             return;
3198         }
3199         // remove the fragment identifier
3200         escapedQuery = removeFragmentIdentifier(escapedQuery);
3201         if (!validate(escapedQuery, query)) {
3202             throw new URIException(URIException.ESCAPING,
3203                     "escaped query not valid");
3204         }
3205         _query = escapedQuery;
3206         setURI();
3207     }
3208 
3209 
3210     /***
3211      * Set the escaped query string.
3212      *
3213      * @param escapedQuery the escaped query string
3214      * @throws URIException escaped query not valid
3215      */
3216     public void setEscapedQuery(String escapedQuery) throws URIException {
3217         if (escapedQuery == null) {
3218             _query = null;
3219             setURI();
3220             return;
3221         }
3222         setRawQuery(escapedQuery.toCharArray());
3223     }
3224 
3225 
3226     /***
3227      * Set the query.
3228      * <p>
3229      * When a query string is not misunderstood the reserved special characters
3230      * ("&", "=", "+", ",", and "$") within a query component, it is
3231      * recommended to use in encoding the whole query with this method.
3232      * <p>
3233      * The additional APIs for the special purpose using by the reserved
3234      * special characters used in each protocol are implemented in each protocol
3235      * classes inherited from <code>URI</code>.  So refer to the same-named APIs
3236      * implemented in each specific protocol instance.
3237      *
3238      * @param query the query string.
3239      * @throws URIException incomplete trailing escape pattern or unsupported
3240      * character encoding
3241      * @see #encode
3242      */
3243     public void setQuery(String query) throws URIException {
3244         if (query == null || query.length() == 0) {
3245             _query = (query == null) ? null : query.toCharArray();
3246             setURI();
3247             return;
3248         }
3249         setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3250     }
3251 
3252 
3253     /***
3254      * Get the raw-escaped query.
3255      *
3256      * @return the raw-escaped query
3257      */
3258     public char[] getRawQuery() {
3259         return _query;
3260     }
3261 
3262 
3263     /***
3264      * Get the escaped query.
3265      *
3266      * @return the escaped query string
3267      */
3268     public String getEscapedQuery() {
3269         return (_query == null) ? null : new String(_query);
3270     }
3271 
3272 
3273     /***
3274      * Get the query.
3275      *
3276      * @return the query string.
3277      * @throws URIException incomplete trailing escape pattern or unsupported
3278      * character encoding
3279      * @see #decode
3280      */
3281     public String getQuery() throws URIException {
3282         return (_query == null) ? null : decode(_query, getProtocolCharset());
3283     }
3284 
3285     // ----------------------------------------------------------- The fragment 
3286 
3287     /***
3288      * Set the raw-escaped fragment.
3289      *
3290      * @param escapedFragment the raw-escaped fragment
3291      * @throws URIException escaped fragment not valid
3292      */
3293     public void setRawFragment(char[] escapedFragment) throws URIException {
3294         if (escapedFragment == null || escapedFragment.length == 0) {
3295             _fragment = escapedFragment;
3296             hash = 0;
3297             return;
3298         }
3299         if (!validate(escapedFragment, fragment)) {
3300             throw new URIException(URIException.ESCAPING,
3301                     "escaped fragment not valid");
3302         }
3303         _fragment = escapedFragment;
3304         hash = 0;
3305     }
3306 
3307 
3308     /***
3309      * Set the escaped fragment string.
3310      *
3311      * @param escapedFragment the escaped fragment string
3312      * @throws URIException escaped fragment not valid
3313      */
3314     public void setEscapedFragment(String escapedFragment) throws URIException {
3315         if (escapedFragment == null) {
3316             _fragment = null;
3317             hash = 0;
3318             return;
3319         }
3320         setRawFragment(escapedFragment.toCharArray());
3321     }
3322 
3323 
3324     /***
3325      * Set the fragment.
3326      *
3327      * @param fragment the fragment string.
3328      * @throws URIException If an error occurs.
3329      */
3330     public void setFragment(String fragment) throws URIException {
3331         if (fragment == null || fragment.length() == 0) {
3332             _fragment = (fragment == null) ? null : fragment.toCharArray();
3333             hash = 0;
3334             return;
3335         }
3336         _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3337         hash = 0;
3338     }
3339 
3340 
3341     /***
3342      * Get the raw-escaped fragment.
3343      * <p>
3344      * The optional fragment identifier is not part of a URI, but is often used
3345      * in conjunction with a URI.
3346      * <p>
3347      * The format and interpretation of fragment identifiers is dependent on
3348      * the media type [RFC2046] of the retrieval result.
3349      * <p>
3350      * A fragment identifier is only meaningful when a URI reference is
3351      * intended for retrieval and the result of that retrieval is a document
3352      * for which the identified fragment is consistently defined.
3353      *
3354      * @return the raw-escaped fragment
3355      */
3356     public char[] getRawFragment() {
3357         return _fragment;
3358     }
3359 
3360 
3361     /***
3362      * Get the escaped fragment.
3363      *
3364      * @return the escaped fragment string
3365      */
3366     public String getEscapedFragment() {
3367         return (_fragment == null) ? null : new String(_fragment);
3368     }
3369 
3370 
3371     /***
3372      * Get the fragment.
3373      *
3374      * @return the fragment string
3375      * @throws URIException incomplete trailing escape pattern or unsupported
3376      * character encoding
3377      * @see #decode
3378      */
3379     public String getFragment() throws URIException {
3380         return (_fragment == null) ? null : decode(_fragment,
3381                 getProtocolCharset());
3382     }
3383 
3384     // ------------------------------------------------------------- Utilities 
3385 
3386     /***
3387      * Remove the fragment identifier of the given component.
3388      *
3389      * @param component the component that a fragment may be included
3390      * @return the component that the fragment identifier is removed
3391      */
3392     protected char[] removeFragmentIdentifier(char[] component) {
3393         if (component == null) { 
3394             return null;
3395         }
3396         int lastIndex = new String(component).indexOf('#');
3397         if (lastIndex != -1) {
3398             component = new String(component).substring(0,
3399                     lastIndex).toCharArray();
3400         }
3401         return component;
3402     }
3403 
3404 
3405     /***
3406      * Normalize the given hier path part.
3407      * 
3408      * <p>Algorithm taken from URI reference parser at 
3409      * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3410      *
3411      * @param path the path to normalize
3412      * @return the normalized path
3413      * @throws URIException no more higher path level to be normalized
3414      */
3415     protected char[] normalize(char[] path) throws URIException {
3416 
3417         if (path == null) { 
3418             return null;
3419         }
3420 
3421         String normalized = new String(path);
3422 
3423         // If the buffer begins with "./" or "../", the "." or ".." is removed.
3424         if (normalized.startsWith("./")) {
3425             normalized = normalized.substring(1);
3426         } else if (normalized.startsWith("../")) {
3427             normalized = normalized.substring(2);
3428         } else if (normalized.startsWith("..")) {
3429             normalized = normalized.substring(2);
3430         }
3431 
3432         // All occurrences of "/./" in the buffer are replaced with "/"
3433         int index = -1;
3434         while ((index = normalized.indexOf("/./")) != -1) {
3435             normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3436         }
3437 
3438         // If the buffer ends with "/.", the "." is removed.
3439         if (normalized.endsWith("/.")) {
3440             normalized = normalized.substring(0, normalized.length() - 1);
3441         }
3442 
3443         int startIndex = 0;
3444 
3445         // All occurrences of "/<segment>/../" in the buffer, where ".."
3446         // and <segment> are complete path segments, are iteratively replaced
3447         // with "/" in order from left to right until no matching pattern remains.
3448         // If the buffer ends with "/<segment>/..", that is also replaced
3449         // with "/".  Note that <segment> may be empty.
3450         while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3451             int slashIndex = normalized.lastIndexOf('/', index - 1);
3452             if (slashIndex >= 0) {
3453                 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3454             } else {
3455                 startIndex = index + 3;   
3456             }
3457         }
3458         if (normalized.endsWith("/..")) {
3459             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3460             if (slashIndex >= 0) {
3461                 normalized = normalized.substring(0, slashIndex + 1);
3462             }
3463         }
3464 
3465         // All prefixes of "<segment>/../" in the buffer, where ".."
3466         // and <segment> are complete path segments, are iteratively replaced
3467         // with "/" in order from left to right until no matching pattern remains.
3468         // If the buffer ends with "<segment>/..", that is also replaced
3469         // with "/".  Note that <segment> may be empty.
3470         while ((index = normalized.indexOf("/../")) != -1) {
3471             int slashIndex = normalized.lastIndexOf('/', index - 1);
3472             if (slashIndex >= 0) {
3473                 break;
3474             } else {
3475                 normalized = normalized.substring(index + 3);
3476             }
3477         }
3478         if (normalized.endsWith("/..")) {
3479             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3480             if (slashIndex < 0) {
3481                 normalized = "/";
3482             }
3483         }
3484 
3485         return normalized.toCharArray();
3486     }
3487 
3488 
3489     /***
3490      * Normalizes the path part of this URI.  Normalization is only meant to be performed on 
3491      * URIs with an absolute path.  Calling this method on a relative path URI will have no
3492      * effect.
3493      *
3494      * @throws URIException no more higher path level to be normalized
3495      * 
3496      * @see #isAbsPath()
3497      */
3498     public void normalize() throws URIException {
3499         if (isAbsPath()) {
3500             _path = normalize(_path);
3501             setURI();
3502         }
3503     }
3504 
3505 
3506     /***
3507      * Test if the first array is equal to the second array.
3508      *
3509      * @param first the first character array
3510      * @param second the second character array
3511      * @return true if they're equal
3512      */
3513     protected boolean equals(char[] first, char[] second) {
3514 
3515         if (first == null && second == null) {
3516             return true;
3517         }
3518         if (first == null || second == null) {
3519             return false;
3520         }
3521         if (first.length != second.length) {
3522             return false;
3523         }
3524         for (int i = 0; i < first.length; i++) {
3525             if (first[i] != second[i]) {
3526                 return false;
3527             }
3528         }
3529         return true;
3530     }
3531 
3532 
3533     /***
3534      * Test an object if this URI is equal to another.
3535      *
3536      * @param obj an object to compare
3537      * @return true if two URI objects are equal
3538      */
3539     public boolean equals(Object obj) {
3540 
3541         // normalize and test each components
3542         if (obj == this) {
3543             return true;
3544         }
3545         if (!(obj instanceof URI)) {
3546             return false;
3547         }
3548         URI another = (URI) obj;
3549         // scheme
3550         if (!equals(_scheme, another._scheme)) {
3551             return false;
3552         }
3553         // is_opaque_part or is_hier_part?  and opaque
3554         if (!equals(_opaque, another._opaque)) {
3555             return false;
3556         }
3557         // is_hier_part
3558         // has_authority
3559         if (!equals(_authority, another._authority)) {
3560             return false;
3561         }
3562         // path
3563         if (!equals(_path, another._path)) {
3564             return false;
3565         }
3566         // has_query
3567         if (!equals(_query, another._query)) {
3568             return false;
3569         }
3570         // has_fragment?  should be careful of the only fragment case.
3571         if (!equals(_fragment, another._fragment)) {
3572             return false;
3573         }
3574         return true;
3575     }
3576 
3577     // ---------------------------------------------------------- Serialization
3578 
3579     /***
3580      * Write the content of this URI.
3581      *
3582      * @param oos the object-output stream
3583      * @throws IOException If an IO problem occurs.
3584      */
3585     protected void writeObject(ObjectOutputStream oos)
3586         throws IOException {
3587 
3588         oos.defaultWriteObject();
3589     }
3590 
3591 
3592     /***
3593      * Read a URI.
3594      *
3595      * @param ois the object-input stream
3596      * @throws ClassNotFoundException If one of the classes specified in the
3597      * input stream cannot be found.
3598      * @throws IOException If an IO problem occurs.
3599      */
3600     protected void readObject(ObjectInputStream ois)
3601         throws ClassNotFoundException, IOException {
3602 
3603         ois.defaultReadObject();
3604     }
3605 
3606     // -------------------------------------------------------------- Hash code
3607 
3608     /***
3609      * Return a hash code for this URI.
3610      *
3611      * @return a has code value for this URI
3612      */
3613     public int hashCode() {
3614         if (hash == 0) {
3615             char[] c = _uri;
3616             if (c != null) {
3617                 for (int i = 0, len = c.length; i < len; i++) {
3618                     hash = 31 * hash + c[i];
3619                 }
3620             }
3621             c = _fragment;
3622             if (c != null) {
3623                 for (int i = 0, len = c.length; i < len; i++) {
3624                     hash = 31 * hash + c[i];
3625                 }
3626             }
3627         }
3628         return hash;
3629     }
3630 
3631     // ------------------------------------------------------------- Comparison 
3632 
3633     /***
3634      * Compare this URI to another object. 
3635      *
3636      * @param obj the object to be compared.
3637      * @return 0, if it's same,
3638      * -1, if failed, first being compared with in the authority component
3639      * @throws ClassCastException not URI argument
3640      */
3641     public int compareTo(Object obj) throws ClassCastException {
3642 
3643         URI another = (URI) obj;
3644         if (!equals(_authority, another.getRawAuthority())) { 
3645             return -1;
3646         }
3647         return toString().compareTo(another.toString());
3648     }
3649 
3650     // ------------------------------------------------------------------ Clone
3651 
3652     /***
3653      * Create and return a copy of this object, the URI-reference containing
3654      * the userinfo component.  Notice that the whole URI-reference including
3655      * the userinfo component counld not be gotten as a <code>String</code>.
3656      * <p>
3657      * To copy the identical <code>URI</code> object including the userinfo
3658      * component, it should be used.
3659      *
3660      * @return a clone of this instance
3661      */
3662     public synchronized Object clone() {
3663 
3664         URI instance = new URI();
3665 
3666         instance._uri = _uri;
3667         instance._scheme = _scheme;
3668         instance._opaque = _opaque;
3669         instance._authority = _authority;
3670         instance._userinfo = _userinfo;
3671         instance._host = _host;
3672         instance._port = _port;
3673         instance._path = _path;
3674         instance._query = _query;
3675         instance._fragment = _fragment;
3676         // the charset to do escape encoding for this instance
3677         instance.protocolCharset = protocolCharset;
3678         // flags
3679         instance._is_hier_part = _is_hier_part;
3680         instance._is_opaque_part = _is_opaque_part;
3681         instance._is_net_path = _is_net_path;
3682         instance._is_abs_path = _is_abs_path;
3683         instance._is_rel_path = _is_rel_path;
3684         instance._is_reg_name = _is_reg_name;
3685         instance._is_server = _is_server;
3686         instance._is_hostname = _is_hostname;
3687         instance._is_IPv4address = _is_IPv4address;
3688         instance._is_IPv6reference = _is_IPv6reference;
3689 
3690         return instance;
3691     }
3692 
3693     // ------------------------------------------------------------ Get the URI
3694 
3695     /***
3696      * It can be gotten the URI character sequence. It's raw-escaped.
3697      * For the purpose of the protocol to be transported, it will be useful.
3698      * <p>
3699      * It is clearly unwise to use a URL that contains a password which is
3700      * intended to be secret. In particular, the use of a password within
3701      * the 'userinfo' component of a URL is strongly disrecommended except
3702      * in those rare cases where the 'password' parameter is intended to be
3703      * public.
3704      * <p>
3705      * When you want to get each part of the userinfo, you need to use the
3706      * specific methods in the specific URL. It depends on the specific URL.
3707      *
3708      * @return the URI character sequence
3709      */
3710     public char[] getRawURI() {
3711         return _uri;
3712     }
3713 
3714 
3715     /***
3716      * It can be gotten the URI character sequence. It's escaped.
3717      * For the purpose of the protocol to be transported, it will be useful.
3718      *
3719      * @return the escaped URI string
3720      */
3721     public String getEscapedURI() {
3722         return (_uri == null) ? null : new String(_uri);
3723     }
3724     
3725 
3726     /***
3727      * It can be gotten the URI character sequence.
3728      *
3729      * @return the original URI string
3730      * @throws URIException incomplete trailing escape pattern or unsupported
3731      * character encoding
3732      * @see #decode
3733      */
3734     public String getURI() throws URIException {
3735         return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3736     }
3737 
3738 
3739     /***
3740      * Get the URI reference character sequence.
3741      *
3742      * @return the URI reference character sequence
3743      */
3744     public char[] getRawURIReference() {
3745         if (_fragment == null) { 
3746             return _uri;
3747         }
3748         if (_uri == null) { 
3749             return _fragment;
3750         }
3751         // if _uri != null &&  _fragment != null
3752         String uriReference = new String(_uri) + "#" + new String(_fragment);
3753         return uriReference.toCharArray();
3754     }
3755 
3756 
3757     /***
3758      * Get the escaped URI reference string.
3759      *
3760      * @return the escaped URI reference string
3761      */
3762     public String getEscapedURIReference() {
3763         char[] uriReference = getRawURIReference();
3764         return (uriReference == null) ? null : new String(uriReference);
3765     }
3766 
3767 
3768     /***
3769      * Get the original URI reference string.
3770      *
3771      * @return the original URI reference string
3772      * @throws URIException If {@link #decode} fails.
3773      */
3774     public String getURIReference() throws URIException {
3775         char[] uriReference = getRawURIReference();
3776         return (uriReference == null) ? null : decode(uriReference,
3777                 getProtocolCharset());
3778     }
3779 
3780 
3781     /***
3782      * Get the escaped URI string.
3783      * <p>
3784      * On the document, the URI-reference form is only used without the userinfo
3785      * component like http://jakarta.apache.org/ by the security reason.
3786      * But the URI-reference form with the userinfo component could be parsed.
3787      * <p>
3788      * In other words, this URI and any its subclasses must not expose the
3789      * URI-reference expression with the userinfo component like
3790      * http://user:password@hostport/restricted_zone.<br>
3791      * It means that the API client programmer should extract each user and
3792      * password to access manually.  Probably it will be supported in the each
3793      * subclass, however, not a whole URI-reference expression.
3794      *
3795      * @return the escaped URI string
3796      * @see #clone()
3797      */
3798     public String toString() {
3799         return getEscapedURI();
3800     }
3801 
3802 
3803     // ------------------------------------------------------------ Inner class
3804 
3805     /*** 
3806      * The charset-changed normal operation to represent to be required to
3807      * alert to user the fact the default charset is changed.
3808      */
3809     public static class DefaultCharsetChanged extends RuntimeException {
3810 
3811         // ------------------------------------------------------- constructors
3812 
3813         /***
3814          * The constructor with a reason string and its code arguments.
3815          *
3816          * @param reasonCode the reason code
3817          * @param reason the reason
3818          */
3819         public DefaultCharsetChanged(int reasonCode, String reason) {
3820             super(reason);
3821             this.reason = reason;
3822             this.reasonCode = reasonCode;
3823         }
3824 
3825         // ---------------------------------------------------------- constants
3826 
3827         /*** No specified reason code. */
3828         public static final int UNKNOWN = 0;
3829 
3830         /*** Protocol charset changed. */
3831         public static final int PROTOCOL_CHARSET = 1;
3832 
3833         /*** Document charset changed. */
3834         public static final int DOCUMENT_CHARSET = 2;
3835 
3836         // ------------------------------------------------- instance variables
3837 
3838         /*** The reason code. */
3839         private int reasonCode;
3840 
3841         /*** The reason message. */
3842         private String reason;
3843 
3844         // ------------------------------------------------------------ methods
3845 
3846         /***
3847          * Get the reason code.
3848          *
3849          * @return the reason code
3850          */
3851         public int getReasonCode() {
3852             return reasonCode;
3853         }
3854 
3855         /***
3856          * Get the reason message.
3857          *
3858          * @return the reason message
3859          */
3860         public String getReason() {
3861             return reason;
3862         }
3863 
3864     }
3865 
3866 
3867     /*** 
3868      * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3869      * given locale.  Supports all locales recognized in JDK 1.1.
3870      * <p>
3871      * The distribution of this class is Servlets.com.    It was originally
3872      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3873      */
3874     public static class LocaleToCharsetMap {
3875 
3876         /*** A mapping of language code to charset */
3877         private static final Hashtable LOCALE_TO_CHARSET_MAP;
3878         static {
3879             LOCALE_TO_CHARSET_MAP = new Hashtable();
3880             LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3881             LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3882             LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3883             LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3884             LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3885             LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3886             LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3887             LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3888             LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3889             LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3890             LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3891             LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3892             LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3893             LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3894             LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3895             LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3896             LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3897             LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3898             LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3899             LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3900             LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3901             LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3902             LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3903             LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3904             LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3905             LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3906             LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3907             LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3908             LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3909             LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3910             LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3911             LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3912             LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3913             LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3914             LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3915             LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3916             LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3917             LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3918             LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3919         }
3920        
3921         /***
3922          * Get the preferred charset for the given locale.
3923          *
3924          * @param locale the locale
3925          * @return the preferred charset or null if the locale is not
3926          * recognized.
3927          */
3928         public static String getCharset(Locale locale) {
3929             // try for an full name match (may include country)
3930             String charset =
3931                 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3932             if (charset != null) { 
3933                 return charset;
3934             }
3935            
3936             // if a full name didn't match, try just the language
3937             charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3938             return charset;  // may be null
3939         }
3940 
3941     }
3942 
3943 }
3944
This page was automatically generated by Maven