1 /*
2 * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.36.2.4 2003/10/11 19:44:27 olegk Exp $
3 * $Revision: 1.36.2.4 $
4 * $Date: 2003/10/11 19:44:27 $
5 *
6 * ====================================================================
7 *
8 * The Apache Software License, Version 1.1
9 *
10 * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
11 * reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 *
25 * 3. The end-user documentation included with the redistribution, if
26 * any, must include the following acknowlegement:
27 * "This product includes software developed by the
28 * Apache Software Foundation (http://www.apache.org/)."
29 * Alternately, this acknowlegement may appear in the software itself,
30 * if and wherever such third-party acknowlegements normally appear.
31 *
32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software
33 * Foundation" must not be used to endorse or promote products derived
34 * from this software without prior written permission. For written
35 * permission, please contact apache@apache.org.
36 *
37 * 5. Products derived from this software may not be called "Apache"
38 * nor may "Apache" appear in their names without prior written
39 * permission of the Apache Group.
40 *
41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 * ====================================================================
54 *
55 * This software consists of voluntary contributions made by many
56 * individuals on behalf of the Apache Software Foundation. For more
57 * information on the Apache Software Foundation, please see
58 * <http://www.apache.org/>.
59 *
60 * [Additional notices, if required by prior licensing conditions]
61 *
62 */
63
64 package org.apache.commons.httpclient;
65
66 import java.io.IOException;
67 import java.io.ObjectInputStream;
68 import java.io.ObjectOutputStream;
69 import java.io.Serializable;
70 import java.io.UnsupportedEncodingException;
71 import java.util.Locale;
72 import java.util.BitSet;
73 import java.util.Hashtable;
74 import java.net.URL;
75
76 /***
77 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
78 * This class has the purpose of supportting of parsing a URI reference to
79 * extend any specific protocols, the character encoding of the protocol to
80 * be transported and the charset of the document.
81 * <p>
82 * A URI is always in an "escaped" form, since escaping or unescaping a
83 * completed URI might change its semantics.
84 * <p>
85 * Implementers should be careful not to escape or unescape the same string
86 * more than once, since unescaping an already unescaped string might lead to
87 * misinterpreting a percent data character as another escaped character,
88 * or vice versa in the case of escaping an already escaped string.
89 * <p>
90 * In order to avoid these problems, data types used as follows:
91 * <p><blockquote><pre>
92 * URI character sequence: char
93 * octet sequence: byte
94 * original character sequence: String
95 * </pre></blockquote><p>
96 *
97 * So, a URI is a sequence of characters as an array of a char type, which
98 * is not always represented as a sequence of octets as an array of byte.
99 * <p>
100 *
101 * URI Syntactic Components
102 * <p><blockquote><pre>
103 * - In general, written as follows:
104 * Absolute URI = <scheme>:<scheme-specific-part>
105 * Generic URI = <scheme>://<authority><path>?<query>
106 *
107 * - Syntax
108 * absoluteURI = scheme ":" ( hier_part | opaque_part )
109 * hier_part = ( net_path | abs_path ) [ "?" query ]
110 * net_path = "//" authority [ abs_path ]
111 * abs_path = "/" path_segments
112 * </pre></blockquote><p>
113 *
114 * The following examples illustrate URI that are in common use.
115 * <pre>
116 * ftp://ftp.is.co.za/rfc/rfc1808.txt
117 * -- ftp scheme for File Transfer Protocol services
118 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
119 * -- gopher scheme for Gopher and Gopher+ Protocol services
120 * http://www.math.uio.no/faq/compression-faq/part1.html
121 * -- http scheme for Hypertext Transfer Protocol services
122 * mailto:mduerst@ifi.unizh.ch
123 * -- mailto scheme for electronic mail addresses
124 * news:comp.infosystems.www.servers.unix
125 * -- news scheme for USENET news groups and articles
126 * telnet://melvyl.ucop.edu/
127 * -- telnet scheme for interactive services via the TELNET Protocol
128 * </pre>
129 * Please, notice that there are many modifications from URL(RFC 1738) and
130 * relative URL(RFC 1808).
131 * <p>
132 * <b>The expressions for a URI</b>
133 * <p><pre>
134 * For escaped URI forms
135 * - URI(char[]) // constructor
136 * - char[] getRawXxx() // method
137 * - String getEscapedXxx() // method
138 * - String toString() // method
139 * <p>
140 * For unescaped URI forms
141 * - URI(String) // constructor
142 * - String getXXX() // method
143 * </pre><p>
144 *
145 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
146 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
147 * @version $Revision: 1.36.2.4 $ $Date: 2002/03/14 15:14:01
148 */
149 public class URI implements Cloneable, Comparable, Serializable {
150
151
152 // ----------------------------------------------------------- Constructors
153
154 /*** Create an instance as an internal use */
155 protected URI() {
156 }
157
158
159 /***
160 * Construct a URI as an escaped form of a character array with the given
161 * charset.
162 *
163 * @param escaped the URI character sequence
164 * @param charset the charset string to do escape encoding
165 * @throws URIException If the URI cannot be created.
166 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
167 * @see #getProtocolCharset
168 */
169 public URI(char[] escaped, String charset)
170 throws URIException, NullPointerException {
171 protocolCharset = charset;
172 parseUriReference(new String(escaped), true);
173 }
174
175
176 /***
177 * Construct a URI as an escaped form of a character array.
178 * An URI can be placed within double-quotes or angle brackets like
179 * "http://test.com/" and <http://test.com/>
180 *
181 * @param escaped the URI character sequence
182 * @throws URIException If the URI cannot be created.
183 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
184 * @see #getDefaultProtocolCharset
185 */
186 public URI(char[] escaped)
187 throws URIException, NullPointerException {
188 parseUriReference(new String(escaped), true);
189 }
190
191
192 /***
193 * Construct a URI from the given string with the given charset.
194 *
195 * @param original the string to be represented to URI character sequence
196 * It is one of absoluteURI and relativeURI.
197 * @param charset the charset string to do escape encoding
198 * @throws URIException If the URI cannot be created.
199 * @see #getProtocolCharset
200 */
201 public URI(String original, String charset) throws URIException {
202 protocolCharset = charset;
203 parseUriReference(original, false);
204 }
205
206
207 /***
208 * Construct a URI from the given string.
209 * <p><blockquote><pre>
210 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
211 * </pre></blockquote><p>
212 * An URI can be placed within double-quotes or angle brackets like
213 * "http://test.com/" and <http://test.com/>
214 *
215 * @param original the string to be represented to URI character sequence
216 * It is one of absoluteURI and relativeURI.
217 * @throws URIException If the URI cannot be created.
218 * @see #getDefaultProtocolCharset
219 */
220 public URI(String original) throws URIException {
221 parseUriReference(original, false);
222 }
223
224
225 /***
226 * Construct a URI from a URL.
227 *
228 * @param url a valid URL.
229 * @throws URIException If the URI cannot be created.
230 * @since 2.0
231 * @deprecated currently somewhat wrong and diffrent with java.net.URL usage
232 */
233 public URI(URL url) throws URIException {
234 this(url.toString());
235 }
236
237
238 /***
239 * Construct a general URI from the given components.
240 * <p><blockquote><pre>
241 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
242 * absoluteURI = scheme ":" ( hier_part | opaque_part )
243 * opaque_part = uric_no_slash *uric
244 * </pre></blockquote><p>
245 * It's for absolute URI = <scheme>:<scheme-specific-part>#
246 * <fragment>.
247 *
248 * @param scheme the scheme string
249 * @param schemeSpecificPart scheme_specific_part
250 * @param fragment the fragment string
251 * @throws URIException If the URI cannot be created.
252 * @see #getDefaultProtocolCharset
253 */
254 public URI(String scheme, String schemeSpecificPart, String fragment)
255 throws URIException {
256
257 // validate and contruct the URI character sequence
258 if (scheme == null) {
259 throw new URIException(URIException.PARSING, "scheme required");
260 }
261 char[] s = scheme.toLowerCase().toCharArray();
262 if (validate(s, URI.scheme)) {
263 _scheme = s; // is_absoluteURI
264 } else {
265 throw new URIException(URIException.PARSING, "incorrect scheme");
266 }
267 _opaque = encode(schemeSpecificPart, allowed_opaque_part,
268 getProtocolCharset());
269 // Set flag
270 _is_opaque_part = true;
271 _fragment = fragment.toCharArray();
272
273 setURI();
274 }
275
276
277 /***
278 * Construct a general URI from the given components.
279 * <p><blockquote><pre>
280 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
281 * absoluteURI = scheme ":" ( hier_part | opaque_part )
282 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
283 * hier_part = ( net_path | abs_path ) [ "?" query ]
284 * </pre></blockquote><p>
285 * It's for absolute URI = <scheme>:<path>?<query>#<
286 * fragment> and relative URI = <path>?<query>#<fragment
287 * >.
288 *
289 * @param scheme the scheme string
290 * @param authority the authority string
291 * @param path the path string
292 * @param query the query string
293 * @param fragment the fragment string
294 * @throws URIException If the new URI cannot be created.
295 * @see #getDefaultProtocolCharset
296 */
297 public URI(String scheme, String authority, String path, String query,
298 String fragment) throws URIException {
299
300 // validate and contruct the URI character sequence
301 StringBuffer buff = new StringBuffer();
302 if (scheme != null) {
303 buff.append(scheme);
304 buff.append(':');
305 }
306 if (authority != null) {
307 buff.append("//");
308 buff.append(authority);
309 }
310 if (path != null) { // accept empty path
311 if ((scheme != null || authority != null)
312 && !path.startsWith("/")) {
313 throw new URIException(URIException.PARSING,
314 "abs_path requested");
315 }
316 buff.append(path);
317 }
318 if (query != null) {
319 buff.append('?');
320 buff.append(query);
321 }
322 if (fragment != null) {
323 buff.append('#');
324 buff.append(fragment);
325 }
326 parseUriReference(buff.toString(), false);
327 }
328
329
330 /***
331 * Construct a general URI from the given components.
332 *
333 * @param scheme the scheme string
334 * @param userinfo the userinfo string
335 * @param host the host string
336 * @param port the port number
337 * @throws URIException If the new URI cannot be created.
338 * @see #getDefaultProtocolCharset
339 */
340 public URI(String scheme, String userinfo, String host, int port)
341 throws URIException {
342
343 this(scheme, userinfo, host, port, null, null, null);
344 }
345
346
347 /***
348 * Construct a general URI from the given components.
349 *
350 * @param scheme the scheme string
351 * @param userinfo the userinfo string
352 * @param host the host string
353 * @param port the port number
354 * @param path the path string
355 * @throws URIException If the new URI cannot be created.
356 * @see #getDefaultProtocolCharset
357 */
358 public URI(String scheme, String userinfo, String host, int port,
359 String path) throws URIException {
360
361 this(scheme, userinfo, host, port, path, null, null);
362 }
363
364
365 /***
366 * Construct a general URI from the given components.
367 *
368 * @param scheme the scheme string
369 * @param userinfo the userinfo string
370 * @param host the host string
371 * @param port the port number
372 * @param path the path string
373 * @param query the query string
374 * @throws URIException If the new URI cannot be created.
375 * @see #getDefaultProtocolCharset
376 */
377 public URI(String scheme, String userinfo, String host, int port,
378 String path, String query) throws URIException {
379
380 this(scheme, userinfo, host, port, path, query, null);
381 }
382
383
384 /***
385 * Construct a general URI from the given components.
386 *
387 * @param scheme the scheme string
388 * @param userinfo the userinfo string
389 * @param host the host string
390 * @param port the port number
391 * @param path the path string
392 * @param query the query string
393 * @param fragment the fragment string
394 * @throws URIException If the new URI cannot be created.
395 * @see #getDefaultProtocolCharset
396 */
397 public URI(String scheme, String userinfo, String host, int port,
398 String path, String query, String fragment) throws URIException {
399
400 this(scheme, (host == null) ? null
401 : ((userinfo != null) ? userinfo + '@' : "") + host
402 + ((port != -1) ? ":" + port : ""), path, query, fragment);
403 }
404
405
406 /***
407 * Construct a general URI from the given components.
408 *
409 * @param scheme the scheme string
410 * @param host the host string
411 * @param path the path string
412 * @param fragment the fragment string
413 * @throws URIException If the new URI cannot be created.
414 * @see #getDefaultProtocolCharset
415 */
416 public URI(String scheme, String host, String path, String fragment)
417 throws URIException {
418
419 this(scheme, host, path, null, fragment);
420 }
421
422
423 /***
424 * Construct a general URI with the given relative URI string.
425 *
426 * @param base the base URI
427 * @param relative the relative URI string
428 * @throws URIException If the new URI cannot be created.
429 */
430 public URI(URI base, String relative) throws URIException {
431 this(base, new URI(relative));
432 }
433
434
435 /***
436 * Construct a general URI with the given relative URI.
437 * <p><blockquote><pre>
438 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
439 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
440 * </pre></blockquote><p>
441 * Resolving Relative References to Absolute Form.
442 *
443 * <strong>Examples of Resolving Relative URI References</strong>
444 *
445 * Within an object with a well-defined base URI of
446 * <p><blockquote><pre>
447 * http://a/b/c/d;p?q
448 * </pre></blockquote><p>
449 * the relative URI would be resolved as follows:
450 *
451 * Normal Examples
452 *
453 * <p><blockquote><pre>
454 * g:h = g:h
455 * g = http://a/b/c/g
456 * ./g = http://a/b/c/g
457 * g/ = http://a/b/c/g/
458 * /g = http://a/g
459 * //g = http://g
460 * ?y = http://a/b/c/?y
461 * g?y = http://a/b/c/g?y
462 * #s = (current document)#s
463 * g#s = http://a/b/c/g#s
464 * g?y#s = http://a/b/c/g?y#s
465 * ;x = http://a/b/c/;x
466 * g;x = http://a/b/c/g;x
467 * g;x?y#s = http://a/b/c/g;x?y#s
468 * . = http://a/b/c/
469 * ./ = http://a/b/c/
470 * .. = http://a/b/
471 * ../ = http://a/b/
472 * ../g = http://a/b/g
473 * ../.. = http://a/
474 * ../../ = http://a/
475 * ../../g = http://a/g
476 * </pre></blockquote><p>
477 *
478 * Some URI schemes do not allow a hierarchical syntax matching the
479 * <hier_part> syntax, and thus cannot use relative references.
480 *
481 * @param base the base URI
482 * @param relative the relative URI
483 * @throws URIException If the new URI cannot be created.
484 */
485 public URI(URI base, URI relative) throws URIException {
486
487 if (base._scheme == null) {
488 throw new URIException(URIException.PARSING, "base URI required");
489 }
490 if (base._scheme != null) {
491 this._scheme = base._scheme;
492 this._authority = base._authority;
493 }
494 if (base._is_opaque_part || relative._is_opaque_part) {
495 this._scheme = base._scheme;
496 this._is_opaque_part = base._is_opaque_part
497 || relative._is_opaque_part;
498 this._opaque = relative._opaque;
499 this._fragment = relative._fragment;
500 this.setURI();
501 return;
502 }
503 if (relative._scheme != null) {
504 this._scheme = relative._scheme;
505 this._is_net_path = relative._is_net_path;
506 this._authority = relative._authority;
507 if (relative._is_server) {
508 this._is_server = relative._is_server;
509 this._userinfo = relative._userinfo;
510 this._host = relative._host;
511 this._port = relative._port;
512 } else if (relative._is_reg_name) {
513 this._is_reg_name = relative._is_reg_name;
514 }
515 this._is_abs_path = relative._is_abs_path;
516 this._is_rel_path = relative._is_rel_path;
517 this._path = relative._path;
518 } else if (base._authority != null && relative._scheme == null) {
519 this._is_net_path = base._is_net_path;
520 this._authority = base._authority;
521 if (base._is_server) {
522 this._is_server = base._is_server;
523 this._userinfo = base._userinfo;
524 this._host = base._host;
525 this._port = base._port;
526 } else if (base._is_reg_name) {
527 this._is_reg_name = base._is_reg_name;
528 }
529 }
530 if (relative._authority != null) {
531 this._is_net_path = relative._is_net_path;
532 this._authority = relative._authority;
533 if (relative._is_server) {
534 this._is_server = relative._is_server;
535 this._userinfo = relative._userinfo;
536 this._host = relative._host;
537 this._port = relative._port;
538 } else if (relative._is_reg_name) {
539 this._is_reg_name = relative._is_reg_name;
540 }
541 this._is_abs_path = relative._is_abs_path;
542 this._is_rel_path = relative._is_rel_path;
543 this._path = relative._path;
544 }
545 // resolve the path and query if necessary
546 if (relative._scheme == null && relative._authority == null) {
547 if ((relative._path == null || relative._path.length == 0)
548 && relative._query == null) {
549 // handle a reference to the current document, see RFC 2396
550 // section 5.2 step 2
551 this._path = base._path;
552 this._query = base._query;
553 } else {
554 this._path = resolvePath(base._path, relative._path);
555 }
556 }
557 // base._query removed
558 if (relative._query != null) {
559 this._query = relative._query;
560 }
561 // base._fragment removed
562 if (relative._fragment != null) {
563 this._fragment = relative._fragment;
564 }
565 this.setURI();
566 // reparse the newly built URI, this will ensure that all flags are set correctly.
567 // TODO there must be a better way to do this
568 parseUriReference(new String(_uri), true);
569 }
570
571 // --------------------------------------------------- Instance Variables
572
573 /*** Version ID for serialization */
574 static final long serialVersionUID = 604752400577948726L;
575
576
577 /***
578 * Cache the hash code for this URI.
579 */
580 protected int hash = 0;
581
582
583 /***
584 * This Uniform Resource Identifier (URI).
585 * The URI is always in an "escaped" form, since escaping or unescaping
586 * a completed URI might change its semantics.
587 */
588 protected char[] _uri = null;
589
590
591 /***
592 * The charset of the protocol used by this URI instance.
593 */
594 protected String protocolCharset = null;
595
596
597 /***
598 * The default charset of the protocol. RFC 2277, 2396
599 */
600 protected static String defaultProtocolCharset = "UTF-8";
601
602
603 /***
604 * The default charset of the document. RFC 2277, 2396
605 * The platform's charset is used for the document by default.
606 */
607 protected static String defaultDocumentCharset = null;
608 protected static String defaultDocumentCharsetByLocale = null;
609 protected static String defaultDocumentCharsetByPlatform = null;
610 // Static initializer for defaultDocumentCharset
611 static {
612 Locale locale = Locale.getDefault();
613 // in order to support backward compatiblity
614 if (locale != null) {
615 defaultDocumentCharsetByLocale =
616 LocaleToCharsetMap.getCharset(locale);
617 // set the default document charset
618 defaultDocumentCharset = defaultDocumentCharsetByLocale;
619 }
620 // in order to support platform encoding
621 try {
622 defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
623 } catch(SecurityException ignore) {
624 }
625 if (defaultDocumentCharset == null) {
626 // set the default document charset
627 defaultDocumentCharset = defaultDocumentCharsetByPlatform;
628 }
629 }
630
631
632 /***
633 * The scheme.
634 */
635 protected char[] _scheme = null;
636
637
638 /***
639 * The opaque.
640 */
641 protected char[] _opaque = null;
642
643
644 /***
645 * The authority.
646 */
647 protected char[] _authority = null;
648
649
650 /***
651 * The userinfo.
652 */
653 protected char[] _userinfo = null;
654
655
656 /***
657 * The host.
658 */
659 protected char[] _host = null;
660
661
662 /***
663 * The port.
664 */
665 protected int _port = -1;
666
667
668 /***
669 * The path.
670 */
671 protected char[] _path = null;
672
673
674 /***
675 * The query.
676 */
677 protected char[] _query = null;
678
679
680 /***
681 * The fragment.
682 */
683 protected char[] _fragment = null;
684
685
686 /***
687 * The root path.
688 */
689 protected static char[] rootPath = { '/' };
690
691 // ---------------------- Generous characters for each component validation
692
693 /***
694 * The percent "%" character always has the reserved purpose of being the
695 * escape indicator, it must be escaped as "%25" in order to be used as
696 * data within a URI.
697 */
698 protected static final BitSet percent = new BitSet(256);
699 // Static initializer for percent
700 static {
701 percent.set('%');
702 }
703
704
705 /***
706 * BitSet for digit.
707 * <p><blockquote><pre>
708 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
709 * "8" | "9"
710 * </pre></blockquote><p>
711 */
712 protected static final BitSet digit = new BitSet(256);
713 // Static initializer for digit
714 static {
715 for (int i = '0'; i <= '9'; i++) {
716 digit.set(i);
717 }
718 }
719
720
721 /***
722 * BitSet for alpha.
723 * <p><blockquote><pre>
724 * alpha = lowalpha | upalpha
725 * </pre></blockquote><p>
726 */
727 protected static final BitSet alpha = new BitSet(256);
728 // Static initializer for alpha
729 static {
730 for (int i = 'a'; i <= 'z'; i++) {
731 alpha.set(i);
732 }
733 for (int i = 'A'; i <= 'Z'; i++) {
734 alpha.set(i);
735 }
736 }
737
738
739 /***
740 * BitSet for alphanum (join of alpha & digit).
741 * <p><blockquote><pre>
742 * alphanum = alpha | digit
743 * </pre></blockquote><p>
744 */
745 protected static final BitSet alphanum = new BitSet(256);
746 // Static initializer for alphanum
747 static {
748 alphanum.or(alpha);
749 alphanum.or(digit);
750 }
751
752
753 /***
754 * BitSet for hex.
755 * <p><blockquote><pre>
756 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
757 * "a" | "b" | "c" | "d" | "e" | "f"
758 * </pre></blockquote><p>
759 */
760 protected static final BitSet hex = new BitSet(256);
761 // Static initializer for hex
762 static {
763 hex.or(digit);
764 for (int i = 'a'; i <= 'f'; i++) {
765 hex.set(i);
766 }
767 for (int i = 'A'; i <= 'F'; i++) {
768 hex.set(i);
769 }
770 }
771
772
773 /***
774 * BitSet for escaped.
775 * <p><blockquote><pre>
776 * escaped = "%" hex hex
777 * </pre></blockquote><p>
778 */
779 protected static final BitSet escaped = new BitSet(256);
780 // Static initializer for escaped
781 static {
782 escaped.or(percent);
783 escaped.or(hex);
784 }
785
786
787 /***
788 * BitSet for mark.
789 * <p><blockquote><pre>
790 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
791 * "(" | ")"
792 * </pre></blockquote><p>
793 */
794 protected static final BitSet mark = new BitSet(256);
795 // Static initializer for mark
796 static {
797 mark.set('-');
798 mark.set('_');
799 mark.set('.');
800 mark.set('!');
801 mark.set('~');
802 mark.set('*');
803 mark.set('\'');
804 mark.set('(');
805 mark.set(')');
806 }
807
808
809 /***
810 * Data characters that are allowed in a URI but do not have a reserved
811 * purpose are called unreserved.
812 * <p><blockquote><pre>
813 * unreserved = alphanum | mark
814 * </pre></blockquote><p>
815 */
816 protected static final BitSet unreserved = new BitSet(256);
817 // Static initializer for unreserved
818 static {
819 unreserved.or(alphanum);
820 unreserved.or(mark);
821 }
822
823
824 /***
825 * BitSet for reserved.
826 * <p><blockquote><pre>
827 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
828 * "$" | ","
829 * </pre></blockquote><p>
830 */
831 protected static final BitSet reserved = new BitSet(256);
832 // Static initializer for reserved
833 static {
834 reserved.set(';');
835 reserved.set('/');
836 reserved.set('?');
837 reserved.set(':');
838 reserved.set('@');
839 reserved.set('&');
840 reserved.set('=');
841 reserved.set('+');
842 reserved.set('$');
843 reserved.set(',');
844 }
845
846
847 /***
848 * BitSet for uric.
849 * <p><blockquote><pre>
850 * uric = reserved | unreserved | escaped
851 * </pre></blockquote><p>
852 */
853 protected static final BitSet uric = new BitSet(256);
854 // Static initializer for uric
855 static {
856 uric.or(reserved);
857 uric.or(unreserved);
858 uric.or(escaped);
859 }
860
861
862 /***
863 * BitSet for fragment (alias for uric).
864 * <p><blockquote><pre>
865 * fragment = *uric
866 * </pre></blockquote><p>
867 */
868 protected static final BitSet fragment = uric;
869
870
871 /***
872 * BitSet for query (alias for uric).
873 * <p><blockquote><pre>
874 * query = *uric
875 * </pre></blockquote><p>
876 */
877 protected static final BitSet query = uric;
878
879
880 /***
881 * BitSet for pchar.
882 * <p><blockquote><pre>
883 * pchar = unreserved | escaped |
884 * ":" | "@" | "&" | "=" | "+" | "$" | ","
885 * </pre></blockquote><p>
886 */
887 protected static final BitSet pchar = new BitSet(256);
888 // Static initializer for pchar
889 static {
890 pchar.or(unreserved);
891 pchar.or(escaped);
892 pchar.set(':');
893 pchar.set('@');
894 pchar.set('&');
895 pchar.set('=');
896 pchar.set('+');
897 pchar.set('$');
898 pchar.set(',');
899 }
900
901
902 /***
903 * BitSet for param (alias for pchar).
904 * <p><blockquote><pre>
905 * param = *pchar
906 * </pre></blockquote><p>
907 */
908 protected static final BitSet param = pchar;
909
910
911 /***
912 * BitSet for segment.
913 * <p><blockquote><pre>
914 * segment = *pchar *( ";" param )
915 * </pre></blockquote><p>
916 */
917 protected static final BitSet segment = new BitSet(256);
918 // Static initializer for segment
919 static {
920 segment.or(pchar);
921 segment.set(';');
922 segment.or(param);
923 }
924
925
926 /***
927 * BitSet for path segments.
928 * <p><blockquote><pre>
929 * path_segments = segment *( "/" segment )
930 * </pre></blockquote><p>
931 */
932 protected static final BitSet path_segments = new BitSet(256);
933 // Static initializer for path_segments
934 static {
935 path_segments.set('/');
936 path_segments.or(segment);
937 }
938
939
940 /***
941 * URI absolute path.
942 * <p><blockquote><pre>
943 * abs_path = "/" path_segments
944 * </pre></blockquote><p>
945 */
946 protected static final BitSet abs_path = new BitSet(256);
947 // Static initializer for abs_path
948 static {
949 abs_path.set('/');
950 abs_path.or(path_segments);
951 }
952
953
954 /***
955 * URI bitset for encoding typical non-slash characters.
956 * <p><blockquote><pre>
957 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
958 * "&" | "=" | "+" | "$" | ","
959 * </pre></blockquote><p>
960 */
961 protected static final BitSet uric_no_slash = new BitSet(256);
962 // Static initializer for uric_no_slash
963 static {
964 uric_no_slash.or(unreserved);
965 uric_no_slash.or(escaped);
966 uric_no_slash.set(';');
967 uric_no_slash.set('?');
968 uric_no_slash.set(';');
969 uric_no_slash.set('@');
970 uric_no_slash.set('&');
971 uric_no_slash.set('=');
972 uric_no_slash.set('+');
973 uric_no_slash.set('$');
974 uric_no_slash.set(',');
975 }
976
977
978 /***
979 * URI bitset that combines uric_no_slash and uric.
980 * <p><blockquote><pre>
981 * opaque_part = uric_no_slash *uric
982 * </pre></blockquote><p>
983 */
984 protected static final BitSet opaque_part = new BitSet(256);
985 // Static initializer for opaque_part
986 static {
987 // it's generous. because first character must not include a slash
988 opaque_part.or(uric_no_slash);
989 opaque_part.or(uric);
990 }
991
992
993 /***
994 * URI bitset that combines absolute path and opaque part.
995 * <p><blockquote><pre>
996 * path = [ abs_path | opaque_part ]
997 * </pre></blockquote><p>
998 */
999 protected static final BitSet path = new BitSet(256);
1000 // Static initializer for path
1001 static {
1002 path.or(abs_path);
1003 path.or(opaque_part);
1004 }
1005
1006
1007 /***
1008 * Port, a logical alias for digit.
1009 */
1010 protected static final BitSet port = digit;
1011
1012
1013 /***
1014 * Bitset that combines digit and dot fo IPv$address.
1015 * <p><blockquote><pre>
1016 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
1017 * </pre></blockquote><p>
1018 */
1019 protected static final BitSet IPv4address = new BitSet(256);
1020 // Static initializer for IPv4address
1021 static {
1022 IPv4address.or(digit);
1023 IPv4address.set('.');
1024 }
1025
1026
1027 /***
1028 * RFC 2373.
1029 * <p><blockquote><pre>
1030 * IPv6address = hexpart [ ":" IPv4address ]
1031 * </pre></blockquote><p>
1032 */
1033 protected static final BitSet IPv6address = new BitSet(256);
1034 // Static initializer for IPv6address reference
1035 static {
1036 IPv6address.or(hex); // hexpart
1037 IPv6address.set(':');
1038 IPv6address.or(IPv4address);
1039 }
1040
1041
1042 /***
1043 * RFC 2732, 2373.
1044 * <p><blockquote><pre>
1045 * IPv6reference = "[" IPv6address "]"
1046 * </pre></blockquote><p>
1047 */
1048 protected static final BitSet IPv6reference = new BitSet(256);
1049 // Static initializer for IPv6reference
1050 static {
1051 IPv6reference.set('[');
1052 IPv6reference.or(IPv6address);
1053 IPv6reference.set(']');
1054 }
1055
1056
1057 /***
1058 * BitSet for toplabel.
1059 * <p><blockquote><pre>
1060 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1061 * </pre></blockquote><p>
1062 */
1063 protected static final BitSet toplabel = new BitSet(256);
1064 // Static initializer for toplabel
1065 static {
1066 toplabel.or(alphanum);
1067 toplabel.set('-');
1068 }
1069
1070
1071 /***
1072 * BitSet for domainlabel.
1073 * <p><blockquote><pre>
1074 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1075 * </pre></blockquote><p>
1076 */
1077 protected static final BitSet domainlabel = toplabel;
1078
1079
1080 /***
1081 * BitSet for hostname.
1082 * <p><blockquote><pre>
1083 * hostname = *( domainlabel "." ) toplabel [ "." ]
1084 * </pre></blockquote><p>
1085 */
1086 protected static final BitSet hostname = new BitSet(256);
1087 // Static initializer for hostname
1088 static {
1089 hostname.or(toplabel);
1090 // hostname.or(domainlabel);
1091 hostname.set('.');
1092 }
1093
1094
1095 /***
1096 * BitSet for host.
1097 * <p><blockquote><pre>
1098 * host = hostname | IPv4address | IPv6reference
1099 * </pre></blockquote><p>
1100 */
1101 protected static final BitSet host = new BitSet(256);
1102 // Static initializer for host
1103 static {
1104 host.or(hostname);
1105 // host.or(IPv4address);
1106 host.or(IPv6reference); // IPv4address
1107 }
1108
1109
1110 /***
1111 * BitSet for hostport.
1112 * <p><blockquote><pre>
1113 * hostport = host [ ":" port ]
1114 * </pre></blockquote><p>
1115 */
1116 protected static final BitSet hostport = new BitSet(256);
1117 // Static initializer for hostport
1118 static {
1119 hostport.or(host);
1120 hostport.set(':');
1121 hostport.or(port);
1122 }
1123
1124
1125 /***
1126 * Bitset for userinfo.
1127 * <p><blockquote><pre>
1128 * userinfo = *( unreserved | escaped |
1129 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1130 * </pre></blockquote><p>
1131 */
1132 protected static final BitSet userinfo = new BitSet(256);
1133 // Static initializer for userinfo
1134 static {
1135 userinfo.or(unreserved);
1136 userinfo.or(escaped);
1137 userinfo.set(';');
1138 userinfo.set(':');
1139 userinfo.set('&');
1140 userinfo.set('=');
1141 userinfo.set('+');
1142 userinfo.set('$');
1143 userinfo.set(',');
1144 }
1145
1146
1147 /***
1148 * BitSet for within the userinfo component like user and password.
1149 */
1150 public static final BitSet within_userinfo = new BitSet(256);
1151 // Static initializer for within_userinfo
1152 static {
1153 within_userinfo.or(userinfo);
1154 within_userinfo.clear(';'); // reserved within authority
1155 within_userinfo.clear(':');
1156 within_userinfo.clear('@');
1157 within_userinfo.clear('?');
1158 within_userinfo.clear('/');
1159 }
1160
1161
1162 /***
1163 * Bitset for server.
1164 * <p><blockquote><pre>
1165 * server = [ [ userinfo "@" ] hostport ]
1166 * </pre></blockquote><p>
1167 */
1168 protected static final BitSet server = new BitSet(256);
1169 // Static initializer for server
1170 static {
1171 server.or(userinfo);
1172 server.set('@');
1173 server.or(hostport);
1174 }
1175
1176
1177 /***
1178 * BitSet for reg_name.
1179 * <p><blockquote><pre>
1180 * reg_name = 1*( unreserved | escaped | "$" | "," |
1181 * ";" | ":" | "@" | "&" | "=" | "+" )
1182 * </pre></blockquote><p>
1183 */
1184 protected static final BitSet reg_name = new BitSet(256);
1185 // Static initializer for reg_name
1186 static {
1187 reg_name.or(unreserved);
1188 reg_name.or(escaped);
1189 reg_name.set('$');
1190 reg_name.set(',');
1191 reg_name.set(';');
1192 reg_name.set(':');
1193 reg_name.set('@');
1194 reg_name.set('&');
1195 reg_name.set('=');
1196 reg_name.set('+');
1197 }
1198
1199
1200 /***
1201 * BitSet for authority.
1202 * <p><blockquote><pre>
1203 * authority = server | reg_name
1204 * </pre></blockquote><p>
1205 */
1206 protected static final BitSet authority = new BitSet(256);
1207 // Static initializer for authority
1208 static {
1209 authority.or(server);
1210 authority.or(reg_name);
1211 }
1212
1213
1214 /***
1215 * BitSet for scheme.
1216 * <p><blockquote><pre>
1217 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1218 * </pre></blockquote><p>
1219 */
1220 protected static final BitSet scheme = new BitSet(256);
1221 // Static initializer for scheme
1222 static {
1223 scheme.or(alpha);
1224 scheme.or(digit);
1225 scheme.set('+');
1226 scheme.set('-');
1227 scheme.set('.');
1228 }
1229
1230
1231 /***
1232 * BitSet for rel_segment.
1233 * <p><blockquote><pre>
1234 * rel_segment = 1*( unreserved | escaped |
1235 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
1236 * </pre></blockquote><p>
1237 */
1238 protected static final BitSet rel_segment = new BitSet(256);
1239 // Static initializer for rel_segment
1240 static {
1241 rel_segment.or(unreserved);
1242 rel_segment.or(escaped);
1243 rel_segment.set(';');
1244 rel_segment.set('@');
1245 rel_segment.set('&');
1246 rel_segment.set('=');
1247 rel_segment.set('+');
1248 rel_segment.set('$');
1249 rel_segment.set(',');
1250 }
1251
1252
1253 /***
1254 * BitSet for rel_path.
1255 * <p><blockquote><pre>
1256 * rel_path = rel_segment [ abs_path ]
1257 * </pre></blockquote><p>
1258 */
1259 protected static final BitSet rel_path = new BitSet(256);
1260 // Static initializer for rel_path
1261 static {
1262 rel_path.or(rel_segment);
1263 rel_path.or(abs_path);
1264 }
1265
1266
1267 /***
1268 * BitSet for net_path.
1269 * <p><blockquote><pre>
1270 * net_path = "//" authority [ abs_path ]
1271 * </pre></blockquote><p>
1272 */
1273 protected static final BitSet net_path = new BitSet(256);
1274 // Static initializer for net_path
1275 static {
1276 net_path.set('/');
1277 net_path.or(authority);
1278 net_path.or(abs_path);
1279 }
1280
1281
1282 /***
1283 * BitSet for hier_part.
1284 * <p><blockquote><pre>
1285 * hier_part = ( net_path | abs_path ) [ "?" query ]
1286 * </pre></blockquote><p>
1287 */
1288 protected static final BitSet hier_part = new BitSet(256);
1289 // Static initializer for hier_part
1290 static {
1291 hier_part.or(net_path);
1292 hier_part.or(abs_path);
1293 // hier_part.set('?'); aleady included
1294 hier_part.or(query);
1295 }
1296
1297
1298 /***
1299 * BitSet for relativeURI.
1300 * <p><blockquote><pre>
1301 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1302 * </pre></blockquote><p>
1303 */
1304 protected static final BitSet relativeURI = new BitSet(256);
1305 // Static initializer for relativeURI
1306 static {
1307 relativeURI.or(net_path);
1308 relativeURI.or(abs_path);
1309 relativeURI.or(rel_path);
1310 // relativeURI.set('?'); aleady included
1311 relativeURI.or(query);
1312 }
1313
1314
1315 /***
1316 * BitSet for absoluteURI.
1317 * <p><blockquote><pre>
1318 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1319 * </pre></blockquote><p>
1320 */
1321 protected static final BitSet absoluteURI = new BitSet(256);
1322 // Static initializer for absoluteURI
1323 static {
1324 absoluteURI.or(scheme);
1325 absoluteURI.set(':');
1326 absoluteURI.or(hier_part);
1327 absoluteURI.or(opaque_part);
1328 }
1329
1330
1331 /***
1332 * BitSet for URI-reference.
1333 * <p><blockquote><pre>
1334 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1335 * </pre></blockquote><p>
1336 */
1337 protected static final BitSet URI_reference = new BitSet(256);
1338 // Static initializer for URI_reference
1339 static {
1340 URI_reference.or(absoluteURI);
1341 URI_reference.or(relativeURI);
1342 URI_reference.set('#');
1343 URI_reference.or(fragment);
1344 }
1345
1346 // ---------------------------- Characters disallowed within the URI syntax
1347 // Excluded US-ASCII Characters are like control, space, delims and unwise
1348
1349 /***
1350 * BitSet for control.
1351 */
1352 public static final BitSet control = new BitSet(256);
1353 // Static initializer for control
1354 static {
1355 for (int i = 0; i <= 0x1F; i++) {
1356 control.set(i);
1357 }
1358 control.set(0x7F);
1359 }
1360
1361 /***
1362 * BitSet for space.
1363 */
1364 public static final BitSet space = new BitSet(256);
1365 // Static initializer for space
1366 static {
1367 space.set(0x20);
1368 }
1369
1370
1371 /***
1372 * BitSet for delims.
1373 */
1374 public static final BitSet delims = new BitSet(256);
1375 // Static initializer for delims
1376 static {
1377 delims.set('<');
1378 delims.set('>');
1379 delims.set('#');
1380 delims.set('%');
1381 delims.set('"');
1382 }
1383
1384
1385 /***
1386 * BitSet for unwise.
1387 */
1388 public static final BitSet unwise = new BitSet(256);
1389 // Static initializer for unwise
1390 static {
1391 unwise.set('{');
1392 unwise.set('}');
1393 unwise.set('|');
1394 unwise.set('//');
1395 unwise.set('^');
1396 unwise.set('[');
1397 unwise.set(']');
1398 unwise.set('`');
1399 }
1400
1401
1402 /***
1403 * Disallowed rel_path before escaping.
1404 */
1405 public static final BitSet disallowed_rel_path = new BitSet(256);
1406 // Static initializer for disallowed_rel_path
1407 static {
1408 disallowed_rel_path.or(uric);
1409 disallowed_rel_path.andNot(rel_path);
1410 }
1411
1412
1413 /***
1414 * Disallowed opaque_part before escaping.
1415 */
1416 public static final BitSet disallowed_opaque_part = new BitSet(256);
1417 // Static initializer for disallowed_opaque_part
1418 static {
1419 disallowed_opaque_part.or(uric);
1420 disallowed_opaque_part.andNot(opaque_part);
1421 }
1422
1423 // ----------------------- Characters allowed within and for each component
1424
1425 /***
1426 * Those characters that are allowed for the authority component.
1427 */
1428 public static final BitSet allowed_authority = new BitSet(256);
1429 // Static initializer for allowed_authority
1430 static {
1431 allowed_authority.or(authority);
1432 allowed_authority.clear('%');
1433 }
1434
1435
1436 /***
1437 * Those characters that are allowed for the opaque_part.
1438 */
1439 public static final BitSet allowed_opaque_part = new BitSet(256);
1440 // Static initializer for allowed_opaque_part
1441 static {
1442 allowed_opaque_part.or(opaque_part);
1443 allowed_opaque_part.clear('%');
1444 }
1445
1446
1447 /***
1448 * Those characters that are allowed for the reg_name.
1449 */
1450 public static final BitSet allowed_reg_name = new BitSet(256);
1451 // Static initializer for allowed_reg_name
1452 static {
1453 allowed_reg_name.or(reg_name);
1454 // allowed_reg_name.andNot(percent);
1455 allowed_reg_name.clear('%');
1456 }
1457
1458
1459 /***
1460 * Those characters that are allowed for the userinfo component.
1461 */
1462 public static final BitSet allowed_userinfo = new BitSet(256);
1463 // Static initializer for allowed_userinfo
1464 static {
1465 allowed_userinfo.or(userinfo);
1466 // allowed_userinfo.andNot(percent);
1467 allowed_userinfo.clear('%');
1468 }
1469
1470
1471 /***
1472 * Those characters that are allowed for within the userinfo component.
1473 */
1474 public static final BitSet allowed_within_userinfo = new BitSet(256);
1475 // Static initializer for allowed_within_userinfo
1476 static {
1477 allowed_within_userinfo.or(within_userinfo);
1478 allowed_within_userinfo.clear('%');
1479 }
1480
1481
1482 /***
1483 * Those characters that are allowed for the IPv6reference component.
1484 * The characters '[', ']' in IPv6reference should be excluded.
1485 */
1486 public static final BitSet allowed_IPv6reference = new BitSet(256);
1487 // Static initializer for allowed_IPv6reference
1488 static {
1489 allowed_IPv6reference.or(IPv6reference);
1490 // allowed_IPv6reference.andNot(unwise);
1491 allowed_IPv6reference.clear('[');
1492 allowed_IPv6reference.clear(']');
1493 }
1494
1495
1496 /***
1497 * Those characters that are allowed for the host component.
1498 * The characters '[', ']' in IPv6reference should be excluded.
1499 */
1500 public static final BitSet allowed_host = new BitSet(256);
1501 // Static initializer for allowed_host
1502 static {
1503 allowed_host.or(hostname);
1504 allowed_host.or(allowed_IPv6reference);
1505 }
1506
1507
1508 /***
1509 * Those characters that are allowed for the authority component.
1510 */
1511 public static final BitSet allowed_within_authority = new BitSet(256);
1512 // Static initializer for allowed_within_authority
1513 static {
1514 allowed_within_authority.or(server);
1515 allowed_within_authority.or(reg_name);
1516 allowed_within_authority.clear(';');
1517 allowed_within_authority.clear(':');
1518 allowed_within_authority.clear('@');
1519 allowed_within_authority.clear('?');
1520 allowed_within_authority.clear('/');
1521 }
1522
1523
1524 /***
1525 * Those characters that are allowed for the abs_path.
1526 */
1527 public static final BitSet allowed_abs_path = new BitSet(256);
1528 // Static initializer for allowed_abs_path
1529 static {
1530 allowed_abs_path.or(abs_path);
1531 // allowed_abs_path.set('/'); // aleady included
1532 allowed_abs_path.andNot(percent);
1533 }
1534
1535
1536 /***
1537 * Those characters that are allowed for the rel_path.
1538 */
1539 public static final BitSet allowed_rel_path = new BitSet(256);
1540 // Static initializer for allowed_rel_path
1541 static {
1542 allowed_rel_path.or(rel_path);
1543 allowed_rel_path.clear('%');
1544 }
1545
1546
1547 /***
1548 * Those characters that are allowed within the path.
1549 */
1550 public static final BitSet allowed_within_path = new BitSet(256);
1551 // Static initializer for allowed_within_path
1552 static {
1553 allowed_within_path.or(abs_path);
1554 allowed_within_path.clear('/');
1555 allowed_within_path.clear(';');
1556 allowed_within_path.clear('=');
1557 allowed_within_path.clear('?');
1558 }
1559
1560
1561 /***
1562 * Those characters that are allowed for the query component.
1563 */
1564 public static final BitSet allowed_query = new BitSet(256);
1565 // Static initializer for allowed_query
1566 static {
1567 allowed_query.or(uric);
1568 allowed_query.clear('%');
1569 }
1570
1571
1572 /***
1573 * Those characters that are allowed within the query component.
1574 */
1575 public static final BitSet allowed_within_query = new BitSet(256);
1576 // Static initializer for allowed_within_query
1577 static {
1578 allowed_within_query.or(allowed_query);
1579 allowed_within_query.andNot(reserved); // excluded 'reserved'
1580 }
1581
1582
1583 /***
1584 * Those characters that are allowed for the fragment component.
1585 */
1586 public static final BitSet allowed_fragment = new BitSet(256);
1587 // Static initializer for allowed_fragment
1588 static {
1589 allowed_fragment.or(uric);
1590 allowed_fragment.clear('%');
1591 }
1592
1593 // ------------------------------------------- Flags for this URI-reference
1594
1595 // TODO: Figure out what all these variables are for and provide javadoc
1596
1597 // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1598 // absoluteURI = scheme ":" ( hier_part | opaque_part )
1599 protected boolean _is_hier_part;
1600 protected boolean _is_opaque_part;
1601 // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1602 // hier_part = ( net_path | abs_path ) [ "?" query ]
1603 protected boolean _is_net_path;
1604 protected boolean _is_abs_path;
1605 protected boolean _is_rel_path;
1606 // net_path = "//" authority [ abs_path ]
1607 // authority = server | reg_name
1608 protected boolean _is_reg_name;
1609 protected boolean _is_server; // = _has_server
1610 // server = [ [ userinfo "@" ] hostport ]
1611 // host = hostname | IPv4address | IPv6reference
1612 protected boolean _is_hostname;
1613 protected boolean _is_IPv4address;
1614 protected boolean _is_IPv6reference;
1615
1616 // ------------------------------------------ Character and escape encoding
1617
1618 /***
1619 * Encodes URI string.
1620 *
1621 * This is a two mapping, one from original characters to octets, and
1622 * subsequently a second from octets to URI characters:
1623 * <p><blockquote><pre>
1624 * original character sequence->octet sequence->URI character sequence
1625 * </pre></blockquote><p>
1626 *
1627 * An escaped octet is encoded as a character triplet, consisting of the
1628 * percent character "%" followed by the two hexadecimal digits
1629 * representing the octet code. For example, "%20" is the escaped
1630 * encoding for the US-ASCII space character.
1631 * <p>
1632 * Conversion from the local filesystem character set to UTF-8 will
1633 * normally involve a two step process. First convert the local character
1634 * set to the UCS; then convert the UCS to UTF-8.
1635 * The first step in the process can be performed by maintaining a mapping
1636 * table that includes the local character set code and the corresponding
1637 * UCS code.
1638 * The next step is to convert the UCS character code to the UTF-8 encoding.
1639 * <p>
1640 * Mapping between vendor codepages can be done in a very similar manner
1641 * as described above.
1642 * <p>
1643 * The only time escape encodings can allowedly be made is when a URI is
1644 * being created from its component parts. The escape and validate methods
1645 * are internally performed within this method.
1646 *
1647 * @param original the original character sequence
1648 * @param allowed those characters that are allowed within a component
1649 * @param charset the protocol charset
1650 * @return URI character sequence
1651 * @throws URIException null component or unsupported character encoding
1652 */
1653 protected static char[] encode(String original, BitSet allowed,
1654 String charset) throws URIException {
1655
1656 // encode original to uri characters.
1657 if (original == null) {
1658 throw new URIException(URIException.PARSING, "null");
1659 }
1660 // escape octet to uri characters.
1661 if (allowed == null) {
1662 throw new URIException(URIException.PARSING,
1663 "null allowed characters");
1664 }
1665 byte[] octets;
1666 try {
1667 octets = original.getBytes(charset);
1668 } catch (UnsupportedEncodingException error) {
1669 throw new URIException(URIException.UNSUPPORTED_ENCODING, charset);
1670 }
1671 StringBuffer buf = new StringBuffer(octets.length);
1672 for (int i = 0; i < octets.length; i++) {
1673 char c = (char) octets[i];
1674 if (allowed.get(c)) {
1675 buf.append(c);
1676 } else {
1677 buf.append('%');
1678 byte b = octets[i]; // use the original byte value
1679 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1680 buf.append(Character.toUpperCase(hexadecimal)); // high
1681 hexadecimal = Character.forDigit(b & 0xF, 16);
1682 buf.append(Character.toUpperCase(hexadecimal)); // low
1683 }
1684 }
1685
1686 return buf.toString().toCharArray();
1687 }
1688
1689
1690 /***
1691 * Decodes URI encoded string.
1692 *
1693 * This is a two mapping, one from URI characters to octets, and
1694 * subsequently a second from octets to original characters:
1695 * <p><blockquote><pre>
1696 * URI character sequence->octet sequence->original character sequence
1697 * </pre></blockquote><p>
1698 *
1699 * A URI must be separated into its components before the escaped
1700 * characters within those components can be allowedly decoded.
1701 * <p>
1702 * Notice that there is a chance that URI characters that are non UTF-8
1703 * may be parsed as valid UTF-8. A recent non-scientific analysis found
1704 * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1705 * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1706 * false reading.
1707 * <p>
1708 * The percent "%" character always has the reserved purpose of being
1709 * the escape indicator, it must be escaped as "%25" in order to be used
1710 * as data within a URI.
1711 * <p>
1712 * The unescape method is internally performed within this method.
1713 *
1714 * @param component the URI character sequence
1715 * @param charset the protocol charset
1716 * @return original character sequence
1717 * @throws URIException incomplete trailing escape pattern or unsupported
1718 * character encoding
1719 */
1720 protected static String decode(char[] component, String charset)
1721 throws URIException {
1722
1723 // unescape uri characters to octets
1724 if (component == null) {
1725 return null;
1726 }
1727
1728 byte[] octets;
1729 try {
1730 octets = new String(component).getBytes(charset);
1731 } catch (UnsupportedEncodingException error) {
1732 throw new URIException(URIException.UNSUPPORTED_ENCODING,
1733 "not supported " + charset + " encoding");
1734 }
1735 int length = octets.length;
1736 int oi = 0; // output index
1737 for (int ii = 0; ii < length; oi++) {
1738 byte aByte = (byte) octets[ii++];
1739 if (aByte == '%' && ii + 2 <= length) {
1740 byte high = (byte) Character.digit((char) octets[ii++], 16);
1741 byte low = (byte) Character.digit((char) octets[ii++], 16);
1742 if (high == -1 || low == -1) {
1743 throw new URIException(URIException.ESCAPING,
1744 "incomplete trailing escape pattern");
1745
1746 }
1747 aByte = (byte) ((high << 4) + low);
1748 }
1749 octets[oi] = (byte) aByte;
1750 }
1751
1752 String result;
1753 try {
1754 result = new String(octets, 0, oi, charset);
1755 } catch (UnsupportedEncodingException error) {
1756 throw new URIException(URIException.UNSUPPORTED_ENCODING,
1757 "not supported " + charset + " encoding");
1758 }
1759
1760 return result;
1761 }
1762
1763
1764 /***
1765 * Pre-validate the unescaped URI string within a specific component.
1766 *
1767 * @param component the component string within the component
1768 * @param disallowed those characters disallowed within the component
1769 * @return if true, it doesn't have the disallowed characters
1770 * if false, the component is undefined or an incorrect one
1771 */
1772 protected boolean prevalidate(String component, BitSet disallowed) {
1773 // prevalidate the given component by disallowed characters
1774 if (component == null) {
1775 return false; // undefined
1776 }
1777 char[] target = component.toCharArray();
1778 for (int i = 0; i < target.length; i++) {
1779 if (disallowed.get(target[i])) {
1780 return false;
1781 }
1782 }
1783 return true;
1784 }
1785
1786
1787 /***
1788 * Validate the URI characters within a specific component.
1789 * The component must be performed after escape encoding. Or it doesn't
1790 * include escaped characters.
1791 *
1792 * @param component the characters sequence within the component
1793 * @param generous those characters that are allowed within a component
1794 * @return if true, it's the correct URI character sequence
1795 */
1796 protected boolean validate(char[] component, BitSet generous) {
1797 // validate each component by generous characters
1798 return validate(component, 0, -1, generous);
1799 }
1800
1801
1802 /***
1803 * Validate the URI characters within a specific component.
1804 * The component must be performed after escape encoding. Or it doesn't
1805 * include escaped characters.
1806 * <p>
1807 * It's not that much strict, generous. The strict validation might be
1808 * performed before being called this method.
1809 *
1810 * @param component the characters sequence within the component
1811 * @param soffset the starting offset of the given component
1812 * @param eoffset the ending offset of the given component
1813 * if -1, it means the length of the component
1814 * @param generous those characters that are allowed within a component
1815 * @return if true, it's the correct URI character sequence
1816 */
1817 protected boolean validate(char[] component, int soffset, int eoffset,
1818 BitSet generous) {
1819 // validate each component by generous characters
1820 if (eoffset == -1) {
1821 eoffset = component.length - 1;
1822 }
1823 for (int i = soffset; i <= eoffset; i++) {
1824 if (!generous.get(component[i])) {
1825 return false;
1826 }
1827 }
1828 return true;
1829 }
1830
1831
1832 /***
1833 * In order to avoid any possilbity of conflict with non-ASCII characters,
1834 * Parse a URI reference as a <code>String</code> with the character
1835 * encoding of the local system or the document.
1836 * <p>
1837 * The following line is the regular expression for breaking-down a URI
1838 * reference into its components.
1839 * <p><blockquote><pre>
1840 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1841 * 12 3 4 5 6 7 8 9
1842 * </pre></blockquote><p>
1843 * For example, matching the above expression to
1844 * http://jakarta.apache.org/ietf/uri/#Related
1845 * results in the following subexpression matches:
1846 * <p><blockquote><pre>
1847 * $1 = http:
1848 * scheme = $2 = http
1849 * $3 = //jakarta.apache.org
1850 * authority = $4 = jakarta.apache.org
1851 * path = $5 = /ietf/uri/
1852 * $6 = <undefined>
1853 * query = $7 = <undefined>
1854 * $8 = #Related
1855 * fragment = $9 = Related
1856 * </pre></blockquote><p>
1857 *
1858 * @param original the original character sequence
1859 * @param escaped <code>true</code> if <code>original</code> is escaped
1860 * @throws URIException If an error occurs.
1861 */
1862 protected void parseUriReference(String original, boolean escaped)
1863 throws URIException {
1864
1865 // validate and contruct the URI character sequence
1866 if (original == null) {
1867 throw new URIException("URI-Reference required");
1868 }
1869
1870 /* @
1871 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1872 */
1873 String tmp = original.trim();
1874
1875 /*
1876 * The length of the string sequence of characters.
1877 * It may not be equal to the length of the byte array.
1878 */
1879 int length = tmp.length();
1880
1881 /*
1882 * Remove the delimiters like angle brackets around an URI.
1883 */
1884 if (length > 0) {
1885 char[] firstDelimiter = { tmp.charAt(0) };
1886 if (validate(firstDelimiter, delims)) {
1887 if (length >= 2) {
1888 char[] lastDelimiter = { tmp.charAt(length - 1) };
1889 if (validate(lastDelimiter, delims)) {
1890 tmp = tmp.substring(1, length - 1);
1891 length = length - 2;
1892 }
1893 }
1894 }
1895 }
1896
1897 /*
1898 * The starting index
1899 */
1900 int from = 0;
1901
1902 /*
1903 * The test flag whether the URI is started from the path component.
1904 */
1905 boolean isStartedFromPath = false;
1906 int atColon = tmp.indexOf(':');
1907 int atSlash = tmp.indexOf('/');
1908 if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1909 isStartedFromPath = true;
1910 }
1911
1912 /*
1913 * <p><blockquote><pre>
1914 * @@@@@@@@
1915 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1916 * </pre></blockquote><p>
1917 */
1918 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1919 if (at == -1) {
1920 at = 0;
1921 }
1922
1923 /*
1924 * Parse the scheme.
1925 * <p><blockquote><pre>
1926 * scheme = $2 = http
1927 * @
1928 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1929 * </pre></blockquote><p>
1930 */
1931 if (at < length && tmp.charAt(at) == ':') {
1932 char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1933 if (validate(target, scheme)) {
1934 _scheme = target;
1935 } else {
1936 throw new URIException("incorrect scheme");
1937 }
1938 from = ++at;
1939 }
1940
1941 /*
1942 * Parse the authority component.
1943 * <p><blockquote><pre>
1944 * authority = $4 = jakarta.apache.org
1945 * @@
1946 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1947 * </pre></blockquote><p>
1948 */
1949 // Reset flags
1950 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1951 if (0 <= at && at < length && tmp.charAt(at) == '/') {
1952 // Set flag
1953 _is_hier_part = true;
1954 if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1955 // the temporary index to start the search from
1956 int next = indexFirstOf(tmp, "/?#", at + 2);
1957 if (next == -1) {
1958 next = (tmp.substring(at + 2).length() == 0) ? at + 2
1959 : tmp.length();
1960 }
1961 parseAuthority(tmp.substring(at + 2, next), escaped);
1962 from = at = next;
1963 // Set flag
1964 _is_net_path = true;
1965 }
1966 if (from == at) {
1967 // Set flag
1968 _is_abs_path = true;
1969 }
1970 }
1971
1972 /*
1973 * Parse the path component.
1974 * <p><blockquote><pre>
1975 * path = $5 = /ietf/uri/
1976 * @@@@@@
1977 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1978 * </pre></blockquote><p>
1979 */
1980 if (from < length) {
1981 // rel_path = rel_segment [ abs_path ]
1982 int next = indexFirstOf(tmp, "?#", from);
1983 if (next == -1) {
1984 next = tmp.length();
1985 }
1986 if (!_is_abs_path) {
1987 if (!escaped
1988 && prevalidate(tmp.substring(from, next), disallowed_rel_path)
1989 || escaped
1990 && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1991 // Set flag
1992 _is_rel_path = true;
1993 } else if (!escaped
1994 && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
1995 || escaped
1996 && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
1997 // Set flag
1998 _is_opaque_part = true;
1999 } else {
2000 // the path component may be empty
2001 _path = null;
2002 }
2003 }
2004 if (escaped) {
2005 setRawPath(tmp.substring(from, next).toCharArray());
2006 } else {
2007 setPath(tmp.substring(from, next));
2008 }
2009 at = next;
2010 }
2011
2012 // set the charset to do escape encoding
2013 String charset = getProtocolCharset();
2014
2015 /*
2016 * Parse the query component.
2017 * <p><blockquote><pre>
2018 * query = $7 = <undefined>
2019 * @@@@@@@@@
2020 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2021 * </pre></blockquote><p>
2022 */
2023 if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2024 int next = tmp.indexOf('#', at + 1);
2025 if (next == -1) {
2026 next = tmp.length();
2027 }
2028 _query = (escaped) ? tmp.substring(at + 1, next).toCharArray()
2029 : encode(tmp.substring(at + 1, next), allowed_query, charset);
2030 at = next;
2031 }
2032
2033 /*
2034 * Parse the fragment component.
2035 * <p><blockquote><pre>
2036 * fragment = $9 = Related
2037 * @@@@@@@@
2038 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2039 * </pre></blockquote><p>
2040 */
2041 if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2042 if (at + 1 == length) { // empty fragment
2043 _fragment = "".toCharArray();
2044 } else {
2045 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
2046 : encode(tmp.substring(at + 1), allowed_fragment, charset);
2047 }
2048 }
2049
2050 // set this URI.
2051 setURI();
2052 }
2053
2054
2055 /***
2056 * Get the earlier index that to be searched for the first occurrance in
2057 * one of any of the given string.
2058 *
2059 * @param s the string to be indexed
2060 * @param delims the delimiters used to index
2061 * @return the earlier index if there are delimiters
2062 */
2063 protected int indexFirstOf(String s, String delims) {
2064 return indexFirstOf(s, delims, -1);
2065 }
2066
2067
2068 /***
2069 * Get the earlier index that to be searched for the first occurrance in
2070 * one of any of the given string.
2071 *
2072 * @param s the string to be indexed
2073 * @param delims the delimiters used to index
2074 * @param offset the from index
2075 * @return the earlier index if there are delimiters
2076 */
2077 protected int indexFirstOf(String s, String delims, int offset) {
2078 if (s == null || s.length() == 0) {
2079 return -1;
2080 }
2081 if (delims == null || delims.length() == 0) {
2082 return -1;
2083 }
2084 // check boundaries
2085 if (offset < 0) {
2086 offset = 0;
2087 } else if (offset > s.length()) {
2088 return -1;
2089 }
2090 // s is never null
2091 int min = s.length();
2092 char[] delim = delims.toCharArray();
2093 for (int i = 0; i < delim.length; i++) {
2094 int at = s.indexOf(delim[i], offset);
2095 if (at >= 0 && at < min) {
2096 min = at;
2097 }
2098 }
2099 return (min == s.length()) ? -1 : min;
2100 }
2101
2102
2103 /***
2104 * Get the earlier index that to be searched for the first occurrance in
2105 * one of any of the given array.
2106 *
2107 * @param s the character array to be indexed
2108 * @param delim the delimiter used to index
2109 * @return the ealier index if there are a delimiter
2110 */
2111 protected int indexFirstOf(char[] s, char delim) {
2112 return indexFirstOf(s, delim, 0);
2113 }
2114
2115
2116 /***
2117 * Get the earlier index that to be searched for the first occurrance in
2118 * one of any of the given array.
2119 *
2120 * @param s the character array to be indexed
2121 * @param delim the delimiter used to index
2122 * @param offset The offset.
2123 * @return the ealier index if there is a delimiter
2124 */
2125 protected int indexFirstOf(char[] s, char delim, int offset) {
2126 if (s == null || s.length == 0) {
2127 return -1;
2128 }
2129 // check boundaries
2130 if (offset < 0) {
2131 offset = 0;
2132 } else if (offset > s.length) {
2133 return -1;
2134 }
2135 for (int i = offset; i < s.length; i++) {
2136 if (s[i] == delim) {
2137 return i;
2138 }
2139 }
2140 return -1;
2141 }
2142
2143
2144 /***
2145 * Parse the authority component.
2146 *
2147 * @param original the original character sequence of authority component
2148 * @param escaped <code>true</code> if <code>original</code> is escaped
2149 * @throws URIException If an error occurs.
2150 */
2151 protected void parseAuthority(String original, boolean escaped)
2152 throws URIException {
2153
2154 // Reset flags
2155 _is_reg_name = _is_server =
2156 _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2157
2158 // set the charset to do escape encoding
2159 String charset = getProtocolCharset();
2160
2161 boolean hasPort = true;
2162 int from = 0;
2163 int next = original.indexOf('@');
2164 if (next != -1) { // neither -1 and 0
2165 // each protocol extented from URI supports the specific userinfo
2166 _userinfo = (escaped) ? original.substring(0, next).toCharArray()
2167 : encode(original.substring(0, next), allowed_userinfo,
2168 charset);
2169 from = next + 1;
2170 }
2171 next = original.indexOf('[', from);
2172 if (next >= from) {
2173 next = original.indexOf(']', from);
2174 if (next == -1) {
2175 throw new URIException(URIException.PARSING, "IPv6reference");
2176 } else {
2177 next++;
2178 }
2179 // In IPv6reference, '[', ']' should be excluded
2180 _host = (escaped) ? original.substring(from, next).toCharArray()
2181 : encode(original.substring(from, next), allowed_IPv6reference,
2182 charset);
2183 // Set flag
2184 _is_IPv6reference = true;
2185 } else { // only for !_is_IPv6reference
2186 next = original.indexOf(':', from);
2187 if (next == -1) {
2188 next = original.length();
2189 hasPort = false;
2190 }
2191 // REMINDME: it doesn't need the pre-validation
2192 _host = original.substring(from, next).toCharArray();
2193 if (validate(_host, IPv4address)) {
2194 // Set flag
2195 _is_IPv4address = true;
2196 } else if (validate(_host, hostname)) {
2197 // Set flag
2198 _is_hostname = true;
2199 } else {
2200 // Set flag
2201 _is_reg_name = true;
2202 }
2203 }
2204 if (_is_reg_name) {
2205 // Reset flags for a server-based naming authority
2206 _is_server = _is_hostname = _is_IPv4address =
2207 _is_IPv6reference = false;
2208 // set a registry-based naming authority
2209 _authority = (escaped) ? original.toString().toCharArray()
2210 : encode(original.toString(), allowed_reg_name, charset);
2211 } else {
2212 if (original.length() - 1 > next && hasPort
2213 && original.charAt(next) == ':') { // not empty
2214 from = next + 1;
2215 try {
2216 _port = Integer.parseInt(original.substring(from));
2217 } catch (NumberFormatException error) {
2218 throw new URIException(URIException.PARSING,
2219 "invalid port number");
2220 }
2221 }
2222 // set a server-based naming authority
2223 StringBuffer buf = new StringBuffer();
2224 if (_userinfo != null) { // has_userinfo
2225 buf.append(_userinfo);
2226 buf.append('@');
2227 }
2228 if (_host != null) {
2229 buf.append(_host);
2230 if (_port != -1) {
2231 buf.append(':');
2232 buf.append(_port);
2233 }
2234 }
2235 _authority = buf.toString().toCharArray();
2236 // Set flag
2237 _is_server = true;
2238 }
2239 }
2240
2241
2242 /***
2243 * Once it's parsed successfully, set this URI.
2244 *
2245 * @see #getRawURI
2246 */
2247 protected void setURI() {
2248 // set _uri
2249 StringBuffer buf = new StringBuffer();
2250 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2251 if (_scheme != null) {
2252 buf.append(_scheme);
2253 buf.append(':');
2254 }
2255 if (_is_net_path) {
2256 buf.append("//");
2257 if (_authority != null) { // has_authority
2258 if (_userinfo != null) { // by default, remove userinfo part
2259 if (_host != null) {
2260 buf.append(_host);
2261 if (_port != -1) {
2262 buf.append(':');
2263 buf.append(_port);
2264 }
2265 }
2266 } else {
2267 buf.append(_authority);
2268 }
2269 }
2270 }
2271 if (_opaque != null && _is_opaque_part) {
2272 buf.append(_opaque);
2273 } else if (_path != null) {
2274 // _is_hier_part or _is_relativeURI
2275 if (_path.length != 0) {
2276 buf.append(_path);
2277 }
2278 }
2279 if (_query != null) { // has_query
2280 buf.append('?');
2281 buf.append(_query);
2282 }
2283 // ignore the fragment identifier
2284 _uri = buf.toString().toCharArray();
2285 hash = 0;
2286 }
2287
2288 // ----------------------------------------------------------- Test methods
2289
2290
2291 /***
2292 * Tell whether or not this URI is absolute.
2293 *
2294 * @return true iif this URI is absoluteURI
2295 */
2296 public boolean isAbsoluteURI() {
2297 return (_scheme != null);
2298 }
2299
2300
2301 /***
2302 * Tell whether or not this URI is relative.
2303 *
2304 * @return true iif this URI is relativeURI
2305 */
2306 public boolean isRelativeURI() {
2307 return (_scheme == null);
2308 }
2309
2310
2311 /***
2312 * Tell whether or not the absoluteURI of this URI is hier_part.
2313 *
2314 * @return true iif the absoluteURI is hier_part
2315 */
2316 public boolean isHierPart() {
2317 return _is_hier_part;
2318 }
2319
2320
2321 /***
2322 * Tell whether or not the absoluteURI of this URI is opaque_part.
2323 *
2324 * @return true iif the absoluteURI is opaque_part
2325 */
2326 public boolean isOpaquePart() {
2327 return _is_opaque_part;
2328 }
2329
2330
2331 /***
2332 * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2333 * It's the same function as the has_authority() method.
2334 *
2335 * @return true iif the relativeURI or heir_part is net_path
2336 * @see #hasAuthority
2337 */
2338 public boolean isNetPath() {
2339 return _is_net_path || (_authority != null);
2340 }
2341
2342
2343 /***
2344 * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2345 *
2346 * @return true iif the relativeURI or hier_part is abs_path
2347 */
2348 public boolean isAbsPath() {
2349 return _is_abs_path;
2350 }
2351
2352
2353 /***
2354 * Tell whether or not the relativeURI of this URI is rel_path.
2355 *
2356 * @return true iif the relativeURI is rel_path
2357 */
2358 public boolean isRelPath() {
2359 return _is_rel_path;
2360 }
2361
2362
2363 /***
2364 * Tell whether or not this URI has authority.
2365 * It's the same function as the is_net_path() method.
2366 *
2367 * @return true iif this URI has authority
2368 * @see #isNetPath
2369 */
2370 public boolean hasAuthority() {
2371 return (_authority != null) || _is_net_path;
2372 }
2373
2374 /***
2375 * Tell whether or not the authority component of this URI is reg_name.
2376 *
2377 * @return true iif the authority component is reg_name
2378 */
2379 public boolean isRegName() {
2380 return _is_reg_name;
2381 }
2382
2383
2384 /***
2385 * Tell whether or not the authority component of this URI is server.
2386 *
2387 * @return true iif the authority component is server
2388 */
2389 public boolean isServer() {
2390 return _is_server;
2391 }
2392
2393
2394 /***
2395 * Tell whether or not this URI has userinfo.
2396 *
2397 * @return true iif this URI has userinfo
2398 */
2399 public boolean hasUserinfo() {
2400 return (_userinfo != null);
2401 }
2402
2403
2404 /***
2405 * Tell whether or not the host part of this URI is hostname.
2406 *
2407 * @return true iif the host part is hostname
2408 */
2409 public boolean isHostname() {
2410 return _is_hostname;
2411 }
2412
2413
2414 /***
2415 * Tell whether or not the host part of this URI is IPv4address.
2416 *
2417 * @return true iif the host part is IPv4address
2418 */
2419 public boolean isIPv4address() {
2420 return _is_IPv4address;
2421 }
2422
2423
2424 /***
2425 * Tell whether or not the host part of this URI is IPv6reference.
2426 *
2427 * @return true iif the host part is IPv6reference
2428 */
2429 public boolean isIPv6reference() {
2430 return _is_IPv6reference;
2431 }
2432
2433
2434 /***
2435 * Tell whether or not this URI has query.
2436 *
2437 * @return true iif this URI has query
2438 */
2439 public boolean hasQuery() {
2440 return (_query != null);
2441 }
2442
2443
2444 /***
2445 * Tell whether or not this URI has fragment.
2446 *
2447 * @return true iif this URI has fragment
2448 */
2449 public boolean hasFragment() {
2450 return (_fragment != null);
2451 }
2452
2453
2454 // ---------------------------------------------------------------- Charset
2455
2456
2457 /***
2458 * Set the default charset of the protocol.
2459 * <p>
2460 * The character set used to store files SHALL remain a local decision and
2461 * MAY depend on the capability of local operating systems. Prior to the
2462 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2463 * and UTF-8 encoded. This approach, while allowing international exchange
2464 * of URIs, will still allow backward compatibility with older systems
2465 * because the code set positions for ASCII characters are identical to the
2466 * one byte sequence in UTF-8.
2467 * <p>
2468 * An individual URI scheme may require a single charset, define a default
2469 * charset, or provide a way to indicate the charset used.
2470 *
2471 * <p>
2472 * Always all the time, the setter method is always succeeded and throws
2473 * <code>DefaultCharsetChanged</code> exception.
2474 *
2475 * So API programmer must follow the following way:
2476 * <code><pre>
2477 * import org.apache.util.URI$DefaultCharsetChanged;
2478 * .
2479 * .
2480 * .
2481 * try {
2482 * URI.setDefaultProtocolCharset("UTF-8");
2483 * } catch (DefaultCharsetChanged cc) {
2484 * // CASE 1: the exception could be ignored, when it is set by user
2485 * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2486 * // CASE 2: let user know the default protocol charset changed
2487 * } else {
2488 * // CASE 2: let user know the default document charset changed
2489 * }
2490 * }
2491 * </pre></code>
2492 *
2493 * The API programmer is responsible to set the correct charset.
2494 * And each application should remember its own charset to support.
2495 *
2496 * @param charset the default charset for each protocol
2497 * @throws DefaultCharsetChanged default charset changed
2498 */
2499 public static void setDefaultProtocolCharset(String charset)
2500 throws DefaultCharsetChanged {
2501
2502 defaultProtocolCharset = charset;
2503 throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2504 "the default protocol charset changed");
2505 }
2506
2507
2508 /***
2509 * Get the default charset of the protocol.
2510 * <p>
2511 * An individual URI scheme may require a single charset, define a default
2512 * charset, or provide a way to indicate the charset used.
2513 * <p>
2514 * To work globally either requires support of a number of character sets
2515 * and to be able to convert between them, or the use of a single preferred
2516 * character set.
2517 * For support of global compatibility it is STRONGLY RECOMMENDED that
2518 * clients and servers use UTF-8 encoding when exchanging URIs.
2519 *
2520 * @return the default charset string
2521 */
2522 public static String getDefaultProtocolCharset() {
2523 return defaultProtocolCharset;
2524 }
2525
2526
2527 /***
2528 * Get the protocol charset used by this current URI instance.
2529 * It was set by the constructor for this instance. If it was not set by
2530 * contructor, it will return the default protocol charset.
2531 *
2532 * @return the protocol charset string
2533 * @see #getDefaultProtocolCharset
2534 */
2535 public String getProtocolCharset() {
2536 return (protocolCharset != null)
2537 ? protocolCharset
2538 : defaultProtocolCharset;
2539 }
2540
2541
2542 /***
2543 * Set the default charset of the document.
2544 * <p>
2545 * Notice that it will be possible to contain mixed characters (e.g.
2546 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2547 * display of these character sets, the protocol charset could be simply
2548 * used again. Because it's not yet implemented that the insertion of BIDI
2549 * control characters at different points during composition is extracted.
2550 * <p>
2551 *
2552 * Always all the time, the setter method is always succeeded and throws
2553 * <code>DefaultCharsetChanged</code> exception.
2554 *
2555 * So API programmer must follow the following way:
2556 * <code><pre>
2557 * import org.apache.util.URI$DefaultCharsetChanged;
2558 * .
2559 * .
2560 * .
2561 * try {
2562 * URI.setDefaultDocumentCharset("EUC-KR");
2563 * } catch (DefaultCharsetChanged cc) {
2564 * // CASE 1: the exception could be ignored, when it is set by user
2565 * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2566 * // CASE 2: let user know the default document charset changed
2567 * } else {
2568 * // CASE 2: let user know the default protocol charset changed
2569 * }
2570 * }
2571 * </pre></code>
2572 *
2573 * The API programmer is responsible to set the correct charset.
2574 * And each application should remember its own charset to support.
2575 *
2576 * @param charset the default charset for the document
2577 * @throws DefaultCharsetChanged default charset changed
2578 */
2579 public static void setDefaultDocumentCharset(String charset)
2580 throws DefaultCharsetChanged {
2581
2582 defaultDocumentCharset = charset;
2583 throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2584 "the default document charset changed");
2585 }
2586
2587
2588 /***
2589 * Get the recommended default charset of the document.
2590 *
2591 * @return the default charset string
2592 */
2593 public static String getDefaultDocumentCharset() {
2594 return defaultDocumentCharset;
2595 }
2596
2597
2598 /***
2599 * Get the default charset of the document by locale.
2600 *
2601 * @return the default charset string by locale
2602 */
2603 public static String getDefaultDocumentCharsetByLocale() {
2604 return defaultDocumentCharsetByLocale;
2605 }
2606
2607
2608 /***
2609 * Get the default charset of the document by platform.
2610 *
2611 * @return the default charset string by platform
2612 */
2613 public static String getDefaultDocumentCharsetByPlatform() {
2614 return defaultDocumentCharsetByPlatform;
2615 }
2616
2617 // ------------------------------------------------------------- The scheme
2618
2619 /***
2620 * Get the scheme.
2621 *
2622 * @return the scheme
2623 */
2624 public char[] getRawScheme() {
2625 return _scheme;
2626 }
2627
2628
2629 /***
2630 * Get the scheme.
2631 *
2632 * @return the scheme
2633 * null if undefined scheme
2634 */
2635 public String getScheme() {
2636 return (_scheme == null) ? null : new String(_scheme);
2637 }
2638
2639 // ---------------------------------------------------------- The authority
2640
2641 /***
2642 * Set the authority. It can be one type of server, hostport, hostname,
2643 * IPv4address, IPv6reference and reg_name.
2644 * <p><blockquote><pre>
2645 * authority = server | reg_name
2646 * </pre></blockquote><p>
2647 *
2648 * @param escapedAuthority the raw escaped authority
2649 * @throws URIException If {@link
2650 * #parseAuthority(java.lang.String,boolean)} fails
2651 * @throws NullPointerException null authority
2652 */
2653 public void setRawAuthority(char[] escapedAuthority)
2654 throws URIException, NullPointerException {
2655
2656 parseAuthority(new String(escapedAuthority), true);
2657 setURI();
2658 }
2659
2660
2661 /***
2662 * Set the authority. It can be one type of server, hostport, hostname,
2663 * IPv4address, IPv6reference and reg_name.
2664 * Note that there is no setAuthority method by the escape encoding reason.
2665 *
2666 * @param escapedAuthority the escaped authority string
2667 * @throws URIException If {@link
2668 * #parseAuthority(java.lang.String,boolean)} fails
2669 */
2670 public void setEscapedAuthority(String escapedAuthority)
2671 throws URIException {
2672
2673 parseAuthority(escapedAuthority, true);
2674 setURI();
2675 }
2676
2677
2678 /***
2679 * Get the raw-escaped authority.
2680 *
2681 * @return the raw-escaped authority
2682 */
2683 public char[] getRawAuthority() {
2684 return _authority;
2685 }
2686
2687
2688 /***
2689 * Get the escaped authority.
2690 *
2691 * @return the escaped authority
2692 */
2693 public String getEscapedAuthority() {
2694 return (_authority == null) ? null : new String(_authority);
2695 }
2696
2697
2698 /***
2699 * Get the authority.
2700 *
2701 * @return the authority
2702 * @throws URIException If {@link #decode} fails
2703 */
2704 public String getAuthority() throws URIException {
2705 return (_authority == null) ? null : decode(_authority,
2706 getProtocolCharset());
2707 }
2708
2709 // ----------------------------------------------------------- The userinfo
2710
2711 /***
2712 * Get the raw-escaped userinfo.
2713 *
2714 * @return the raw-escaped userinfo
2715 * @see #getAuthority
2716 */
2717 public char[] getRawUserinfo() {
2718 return _userinfo;
2719 }
2720
2721
2722 /***
2723 * Get the escaped userinfo.
2724 *
2725 * @return the escaped userinfo
2726 * @see #getAuthority
2727 */
2728 public String getEscapedUserinfo() {
2729 return (_userinfo == null) ? null : new String(_userinfo);
2730 }
2731
2732
2733 /***
2734 * Get the userinfo.
2735 *
2736 * @return the userinfo
2737 * @throws URIException If {@link #decode} fails
2738 * @see #getAuthority
2739 */
2740 public String getUserinfo() throws URIException {
2741 return (_userinfo == null) ? null : decode(_userinfo,
2742 getProtocolCharset());
2743 }
2744
2745 // --------------------------------------------------------------- The host
2746
2747 /***
2748 * Get the host.
2749 * <p><blockquote><pre>
2750 * host = hostname | IPv4address | IPv6reference
2751 * </pre></blockquote><p>
2752 *
2753 * @return the host
2754 * @see #getAuthority
2755 */
2756 public char[] getRawHost() {
2757 return _host;
2758 }
2759
2760
2761 /***
2762 * Get the host.
2763 * <p><blockquote><pre>
2764 * host = hostname | IPv4address | IPv6reference
2765 * </pre></blockquote><p>
2766 *
2767 * @return the host
2768 * @throws URIException If {@link #decode} fails
2769 * @see #getAuthority
2770 */
2771 public String getHost() throws URIException {
2772 return decode(_host, getProtocolCharset());
2773 }
2774
2775 // --------------------------------------------------------------- The port
2776
2777 /***
2778 * Get the port. In order to get the specfic default port, the specific
2779 * protocol-supported class extended from the URI class should be used.
2780 * It has the server-based naming authority.
2781 *
2782 * @return the port
2783 * if -1, it has the default port for the scheme or the server-based
2784 * naming authority is not supported in the specific URI.
2785 */
2786 public int getPort() {
2787 return _port;
2788 }
2789
2790 // --------------------------------------------------------------- The path
2791
2792 /***
2793 * Set the raw-escaped path.
2794 *
2795 * @param escapedPath the path character sequence
2796 * @throws URIException encoding error or not proper for initial instance
2797 * @see #encode
2798 */
2799 public void setRawPath(char[] escapedPath) throws URIException {
2800 if (escapedPath == null || escapedPath.length == 0) {
2801 _path = _opaque = escapedPath;
2802 setURI();
2803 return;
2804 }
2805 // remove the fragment identifier
2806 escapedPath = removeFragmentIdentifier(escapedPath);
2807 if (_is_net_path || _is_abs_path) {
2808 if (escapedPath[0] != '/') {
2809 throw new URIException(URIException.PARSING,
2810 "not absolute path");
2811 }
2812 if (!validate(escapedPath, abs_path)) {
2813 throw new URIException(URIException.ESCAPING,
2814 "escaped absolute path not valid");
2815 }
2816 _path = escapedPath;
2817 } else if (_is_rel_path) {
2818 int at = indexFirstOf(escapedPath, '/');
2819 if (at == 0) {
2820 throw new URIException(URIException.PARSING, "incorrect path");
2821 }
2822 if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
2823 && !validate(escapedPath, at, -1, abs_path)
2824 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2825
2826 throw new URIException(URIException.ESCAPING,
2827 "escaped relative path not valid");
2828 }
2829 _path = escapedPath;
2830 } else if (_is_opaque_part) {
2831 if (!uric_no_slash.get(escapedPath[0])
2832 && !validate(escapedPath, 1, -1, uric)) {
2833 throw new URIException(URIException.ESCAPING,
2834 "escaped opaque part not valid");
2835 }
2836 _opaque = escapedPath;
2837 } else {
2838 throw new URIException(URIException.PARSING, "incorrect path");
2839 }
2840 setURI();
2841 }
2842
2843
2844 /***
2845 * Set the escaped path.
2846 *
2847 * @param escapedPath the escaped path string
2848 * @throws URIException encoding error or not proper for initial instance
2849 * @see #encode
2850 */
2851 public void setEscapedPath(String escapedPath) throws URIException {
2852 if (escapedPath == null) {
2853 _path = _opaque = null;
2854 setURI();
2855 return;
2856 }
2857 setRawPath(escapedPath.toCharArray());
2858 }
2859
2860
2861 /***
2862 * Set the path.
2863 *
2864 * @param path the path string
2865 * @throws URIException set incorrectly or fragment only
2866 * @see #encode
2867 */
2868 public void setPath(String path) throws URIException {
2869
2870 if (path == null || path.length() == 0) {
2871 _path = _opaque = (path == null) ? null : path.toCharArray();
2872 setURI();
2873 return;
2874 }
2875 // set the charset to do escape encoding
2876 String charset = getProtocolCharset();
2877
2878 if (_is_net_path || _is_abs_path) {
2879 _path = encode(path, allowed_abs_path, charset);
2880 } else if (_is_rel_path) {
2881 StringBuffer buff = new StringBuffer(path.length());
2882 int at = path.indexOf('/');
2883 if (at == 0) { // never 0
2884 throw new URIException(URIException.PARSING,
2885 "incorrect relative path");
2886 }
2887 if (at > 0) {
2888 buff.append(encode(path.substring(0, at), allowed_rel_path,
2889 charset));
2890 buff.append(encode(path.substring(at), allowed_abs_path,
2891 charset));
2892 } else {
2893 buff.append(encode(path, allowed_rel_path, charset));
2894 }
2895 _path = buff.toString().toCharArray();
2896 } else if (_is_opaque_part) {
2897 StringBuffer buf = new StringBuffer();
2898 buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2899 buf.insert(1, encode(path.substring(1), uric, charset));
2900 _opaque = buf.toString().toCharArray();
2901 } else {
2902 throw new URIException(URIException.PARSING, "incorrect path");
2903 }
2904 setURI();
2905 }
2906
2907
2908 /***
2909 * Resolve the base and relative path.
2910 *
2911 * @param basePath a character array of the basePath
2912 * @param relPath a character array of the relPath
2913 * @return the resolved path
2914 * @throws URIException no more higher path level to be resolved
2915 */
2916 protected char[] resolvePath(char[] basePath, char[] relPath)
2917 throws URIException {
2918
2919 // REMINDME: paths are never null
2920 String base = (basePath == null) ? "" : new String(basePath);
2921 int at = base.lastIndexOf('/');
2922 if (at != -1) {
2923 basePath = base.substring(0, at + 1).toCharArray();
2924 }
2925 // _path could be empty
2926 if (relPath == null || relPath.length == 0) {
2927 return normalize(basePath);
2928 } else if (relPath[0] == '/') {
2929 return normalize(relPath);
2930 } else {
2931 StringBuffer buff = new StringBuffer(base.length()
2932 + relPath.length);
2933 buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2934 buff.append(relPath);
2935 return normalize(buff.toString().toCharArray());
2936 }
2937 }
2938
2939
2940 /***
2941 * Get the raw-escaped current hierarchy level in the given path.
2942 * If the last namespace is a collection, the slash mark ('/') should be
2943 * ended with at the last character of the path string.
2944 *
2945 * @param path the path
2946 * @return the current hierarchy level
2947 * @throws URIException no hierarchy level
2948 */
2949 protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2950
2951 if (_is_opaque_part) {
2952 throw new URIException(URIException.PARSING, "no hierarchy level");
2953 }
2954 if (path == null) {
2955 throw new URIException(URIException.PARSING, "empty path");
2956 }
2957 String buff = new String(path);
2958 int first = buff.indexOf('/');
2959 int last = buff.lastIndexOf('/');
2960 if (last == 0) {
2961 return rootPath;
2962 } else if (first != last && last != -1) {
2963 return buff.substring(0, last).toCharArray();
2964 }
2965 // FIXME: it could be a document on the server side
2966 return path;
2967 }
2968
2969
2970 /***
2971 * Get the raw-escaped current hierarchy level.
2972 *
2973 * @return the raw-escaped current hierarchy level
2974 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2975 */
2976 public char[] getRawCurrentHierPath() throws URIException {
2977 return (_path == null) ? null : getRawCurrentHierPath(_path);
2978 }
2979
2980
2981 /***
2982 * Get the escaped current hierarchy level.
2983 *
2984 * @return the escaped current hierarchy level
2985 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2986 */
2987 public String getEscapedCurrentHierPath() throws URIException {
2988 char[] path = getRawCurrentHierPath();
2989 return (path == null) ? null : new String(path);
2990 }
2991
2992
2993 /***
2994 * Get the current hierarchy level.
2995 *
2996 * @return the current hierarchy level
2997 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2998 * @see #decode
2999 */
3000 public String getCurrentHierPath() throws URIException {
3001 char[] path = getRawCurrentHierPath();
3002 return (path == null) ? null : decode(path, getProtocolCharset());
3003 }
3004
3005
3006 /***
3007 * Get the level above the this hierarchy level.
3008 *
3009 * @return the raw above hierarchy level
3010 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3011 */
3012 public char[] getRawAboveHierPath() throws URIException {
3013 char[] path = getRawCurrentHierPath();
3014 return (path == null) ? null : getRawCurrentHierPath(path);
3015 }
3016
3017
3018 /***
3019 * Get the level above the this hierarchy level.
3020 *
3021 * @return the raw above hierarchy level
3022 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3023 */
3024 public String getEscapedAboveHierPath() throws URIException {
3025 char[] path = getRawAboveHierPath();
3026 return (path == null) ? null : new String(path);
3027 }
3028
3029
3030 /***
3031 * Get the level above the this hierarchy level.
3032 *
3033 * @return the above hierarchy level
3034 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3035 * @see #decode
3036 */
3037 public String getAboveHierPath() throws URIException {
3038 char[] path = getRawAboveHierPath();
3039 return (path == null) ? null : decode(path, getProtocolCharset());
3040 }
3041
3042
3043 /***
3044 * Get the raw-escaped path.
3045 * <p><blockquote><pre>
3046 * path = [ abs_path | opaque_part ]
3047 * </pre></blockquote><p>
3048 *
3049 * @return the raw-escaped path
3050 */
3051 public char[] getRawPath() {
3052 return _is_opaque_part ? _opaque : _path;
3053 }
3054
3055
3056 /***
3057 * Get the escaped path.
3058 * <p><blockquote><pre>
3059 * path = [ abs_path | opaque_part ]
3060 * abs_path = "/" path_segments
3061 * opaque_part = uric_no_slash *uric
3062 * </pre></blockquote><p>
3063 *
3064 * @return the escaped path string
3065 */
3066 public String getEscapedPath() {
3067 char[] path = getRawPath();
3068 return (path == null) ? null : new String(path);
3069 }
3070
3071
3072 /***
3073 * Get the path.
3074 * <p><blockquote><pre>
3075 * path = [ abs_path | opaque_part ]
3076 * </pre></blockquote><p>
3077 * @return the path string
3078 * @throws URIException If {@link #decode} fails.
3079 * @see #decode
3080 */
3081 public String getPath() throws URIException {
3082 char[] path = getRawPath();
3083 return (path == null) ? null : decode(path, getProtocolCharset());
3084 }
3085
3086
3087 /***
3088 * Get the raw-escaped basename of the path.
3089 *
3090 * @return the raw-escaped basename
3091 */
3092 public char[] getRawName() {
3093 if (_path == null) {
3094 return null;
3095 }
3096
3097 int at = 0;
3098 for (int i = _path.length - 1; i >= 0; i--) {
3099 if (_path[i] == '/') {
3100 at = i + 1;
3101 break;
3102 }
3103 }
3104 int len = _path.length - at;
3105 char[] basename = new char[len];
3106 System.arraycopy(_path, at, basename, 0, len);
3107 return basename;
3108 }
3109
3110
3111 /***
3112 * Get the escaped basename of the path.
3113 *
3114 * @return the escaped basename string
3115 */
3116 public String getEscapedName() {
3117 char[] basename = getRawName();
3118 return (basename == null) ? null : new String(basename);
3119 }
3120
3121
3122 /***
3123 * Get the basename of the path.
3124 *
3125 * @return the basename string
3126 * @throws URIException incomplete trailing escape pattern or unsupported
3127 * character encoding
3128 * @see #decode
3129 */
3130 public String getName() throws URIException {
3131 char[] basename = getRawName();
3132 return (basename == null) ? null : decode(getRawName(),
3133 getProtocolCharset());
3134 }
3135
3136 // ----------------------------------------------------- The path and query
3137
3138 /***
3139 * Get the raw-escaped path and query.
3140 *
3141 * @return the raw-escaped path and query
3142 */
3143 public char[] getRawPathQuery() {
3144
3145 if (_path == null && _query == null) {
3146 return null;
3147 }
3148 StringBuffer buff = new StringBuffer();
3149 if (_path != null) {
3150 buff.append(_path);
3151 }
3152 if (_query != null) {
3153 buff.append('?');
3154 buff.append(_query);
3155 }
3156 return buff.toString().toCharArray();
3157 }
3158
3159
3160 /***
3161 * Get the escaped query.
3162 *
3163 * @return the escaped path and query string
3164 */
3165 public String getEscapedPathQuery() {
3166 char[] rawPathQuery = getRawPathQuery();
3167 return (rawPathQuery == null) ? null : new String(rawPathQuery);
3168 }
3169
3170
3171 /***
3172 * Get the path and query.
3173 *
3174 * @return the path and query string.
3175 * @throws URIException incomplete trailing escape pattern or unsupported
3176 * character encoding
3177 * @see #decode
3178 */
3179 public String getPathQuery() throws URIException {
3180 char[] rawPathQuery = getRawPathQuery();
3181 return (rawPathQuery == null) ? null : decode(rawPathQuery,
3182 getProtocolCharset());
3183 }
3184
3185 // -------------------------------------------------------------- The query
3186
3187 /***
3188 * Set the raw-escaped query.
3189 *
3190 * @param escapedQuery the raw-escaped query
3191 * @throws URIException escaped query not valid
3192 */
3193 public void setRawQuery(char[] escapedQuery) throws URIException {
3194 if (escapedQuery == null || escapedQuery.length == 0) {
3195 _query = escapedQuery;
3196 setURI();
3197 return;
3198 }
3199 // remove the fragment identifier
3200 escapedQuery = removeFragmentIdentifier(escapedQuery);
3201 if (!validate(escapedQuery, query)) {
3202 throw new URIException(URIException.ESCAPING,
3203 "escaped query not valid");
3204 }
3205 _query = escapedQuery;
3206 setURI();
3207 }
3208
3209
3210 /***
3211 * Set the escaped query string.
3212 *
3213 * @param escapedQuery the escaped query string
3214 * @throws URIException escaped query not valid
3215 */
3216 public void setEscapedQuery(String escapedQuery) throws URIException {
3217 if (escapedQuery == null) {
3218 _query = null;
3219 setURI();
3220 return;
3221 }
3222 setRawQuery(escapedQuery.toCharArray());
3223 }
3224
3225
3226 /***
3227 * Set the query.
3228 * <p>
3229 * When a query string is not misunderstood the reserved special characters
3230 * ("&", "=", "+", ",", and "$") within a query component, it is
3231 * recommended to use in encoding the whole query with this method.
3232 * <p>
3233 * The additional APIs for the special purpose using by the reserved
3234 * special characters used in each protocol are implemented in each protocol
3235 * classes inherited from <code>URI</code>. So refer to the same-named APIs
3236 * implemented in each specific protocol instance.
3237 *
3238 * @param query the query string.
3239 * @throws URIException incomplete trailing escape pattern or unsupported
3240 * character encoding
3241 * @see #encode
3242 */
3243 public void setQuery(String query) throws URIException {
3244 if (query == null || query.length() == 0) {
3245 _query = (query == null) ? null : query.toCharArray();
3246 setURI();
3247 return;
3248 }
3249 setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3250 }
3251
3252
3253 /***
3254 * Get the raw-escaped query.
3255 *
3256 * @return the raw-escaped query
3257 */
3258 public char[] getRawQuery() {
3259 return _query;
3260 }
3261
3262
3263 /***
3264 * Get the escaped query.
3265 *
3266 * @return the escaped query string
3267 */
3268 public String getEscapedQuery() {
3269 return (_query == null) ? null : new String(_query);
3270 }
3271
3272
3273 /***
3274 * Get the query.
3275 *
3276 * @return the query string.
3277 * @throws URIException incomplete trailing escape pattern or unsupported
3278 * character encoding
3279 * @see #decode
3280 */
3281 public String getQuery() throws URIException {
3282 return (_query == null) ? null : decode(_query, getProtocolCharset());
3283 }
3284
3285 // ----------------------------------------------------------- The fragment
3286
3287 /***
3288 * Set the raw-escaped fragment.
3289 *
3290 * @param escapedFragment the raw-escaped fragment
3291 * @throws URIException escaped fragment not valid
3292 */
3293 public void setRawFragment(char[] escapedFragment) throws URIException {
3294 if (escapedFragment == null || escapedFragment.length == 0) {
3295 _fragment = escapedFragment;
3296 hash = 0;
3297 return;
3298 }
3299 if (!validate(escapedFragment, fragment)) {
3300 throw new URIException(URIException.ESCAPING,
3301 "escaped fragment not valid");
3302 }
3303 _fragment = escapedFragment;
3304 hash = 0;
3305 }
3306
3307
3308 /***
3309 * Set the escaped fragment string.
3310 *
3311 * @param escapedFragment the escaped fragment string
3312 * @throws URIException escaped fragment not valid
3313 */
3314 public void setEscapedFragment(String escapedFragment) throws URIException {
3315 if (escapedFragment == null) {
3316 _fragment = null;
3317 hash = 0;
3318 return;
3319 }
3320 setRawFragment(escapedFragment.toCharArray());
3321 }
3322
3323
3324 /***
3325 * Set the fragment.
3326 *
3327 * @param fragment the fragment string.
3328 * @throws URIException If an error occurs.
3329 */
3330 public void setFragment(String fragment) throws URIException {
3331 if (fragment == null || fragment.length() == 0) {
3332 _fragment = (fragment == null) ? null : fragment.toCharArray();
3333 hash = 0;
3334 return;
3335 }
3336 _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3337 hash = 0;
3338 }
3339
3340
3341 /***
3342 * Get the raw-escaped fragment.
3343 * <p>
3344 * The optional fragment identifier is not part of a URI, but is often used
3345 * in conjunction with a URI.
3346 * <p>
3347 * The format and interpretation of fragment identifiers is dependent on
3348 * the media type [RFC2046] of the retrieval result.
3349 * <p>
3350 * A fragment identifier is only meaningful when a URI reference is
3351 * intended for retrieval and the result of that retrieval is a document
3352 * for which the identified fragment is consistently defined.
3353 *
3354 * @return the raw-escaped fragment
3355 */
3356 public char[] getRawFragment() {
3357 return _fragment;
3358 }
3359
3360
3361 /***
3362 * Get the escaped fragment.
3363 *
3364 * @return the escaped fragment string
3365 */
3366 public String getEscapedFragment() {
3367 return (_fragment == null) ? null : new String(_fragment);
3368 }
3369
3370
3371 /***
3372 * Get the fragment.
3373 *
3374 * @return the fragment string
3375 * @throws URIException incomplete trailing escape pattern or unsupported
3376 * character encoding
3377 * @see #decode
3378 */
3379 public String getFragment() throws URIException {
3380 return (_fragment == null) ? null : decode(_fragment,
3381 getProtocolCharset());
3382 }
3383
3384 // ------------------------------------------------------------- Utilities
3385
3386 /***
3387 * Remove the fragment identifier of the given component.
3388 *
3389 * @param component the component that a fragment may be included
3390 * @return the component that the fragment identifier is removed
3391 */
3392 protected char[] removeFragmentIdentifier(char[] component) {
3393 if (component == null) {
3394 return null;
3395 }
3396 int lastIndex = new String(component).indexOf('#');
3397 if (lastIndex != -1) {
3398 component = new String(component).substring(0,
3399 lastIndex).toCharArray();
3400 }
3401 return component;
3402 }
3403
3404
3405 /***
3406 * Normalize the given hier path part.
3407 *
3408 * <p>Algorithm taken from URI reference parser at
3409 * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3410 *
3411 * @param path the path to normalize
3412 * @return the normalized path
3413 * @throws URIException no more higher path level to be normalized
3414 */
3415 protected char[] normalize(char[] path) throws URIException {
3416
3417 if (path == null) {
3418 return null;
3419 }
3420
3421 String normalized = new String(path);
3422
3423 // If the buffer begins with "./" or "../", the "." or ".." is removed.
3424 if (normalized.startsWith("./")) {
3425 normalized = normalized.substring(1);
3426 } else if (normalized.startsWith("../")) {
3427 normalized = normalized.substring(2);
3428 } else if (normalized.startsWith("..")) {
3429 normalized = normalized.substring(2);
3430 }
3431
3432 // All occurrences of "/./" in the buffer are replaced with "/"
3433 int index = -1;
3434 while ((index = normalized.indexOf("/./")) != -1) {
3435 normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3436 }
3437
3438 // If the buffer ends with "/.", the "." is removed.
3439 if (normalized.endsWith("/.")) {
3440 normalized = normalized.substring(0, normalized.length() - 1);
3441 }
3442
3443 int startIndex = 0;
3444
3445 // All occurrences of "/<segment>/../" in the buffer, where ".."
3446 // and <segment> are complete path segments, are iteratively replaced
3447 // with "/" in order from left to right until no matching pattern remains.
3448 // If the buffer ends with "/<segment>/..", that is also replaced
3449 // with "/". Note that <segment> may be empty.
3450 while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3451 int slashIndex = normalized.lastIndexOf('/', index - 1);
3452 if (slashIndex >= 0) {
3453 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3454 } else {
3455 startIndex = index + 3;
3456 }
3457 }
3458 if (normalized.endsWith("/..")) {
3459 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3460 if (slashIndex >= 0) {
3461 normalized = normalized.substring(0, slashIndex + 1);
3462 }
3463 }
3464
3465 // All prefixes of "<segment>/../" in the buffer, where ".."
3466 // and <segment> are complete path segments, are iteratively replaced
3467 // with "/" in order from left to right until no matching pattern remains.
3468 // If the buffer ends with "<segment>/..", that is also replaced
3469 // with "/". Note that <segment> may be empty.
3470 while ((index = normalized.indexOf("/../")) != -1) {
3471 int slashIndex = normalized.lastIndexOf('/', index - 1);
3472 if (slashIndex >= 0) {
3473 break;
3474 } else {
3475 normalized = normalized.substring(index + 3);
3476 }
3477 }
3478 if (normalized.endsWith("/..")) {
3479 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3480 if (slashIndex < 0) {
3481 normalized = "/";
3482 }
3483 }
3484
3485 return normalized.toCharArray();
3486 }
3487
3488
3489 /***
3490 * Normalizes the path part of this URI. Normalization is only meant to be performed on
3491 * URIs with an absolute path. Calling this method on a relative path URI will have no
3492 * effect.
3493 *
3494 * @throws URIException no more higher path level to be normalized
3495 *
3496 * @see #isAbsPath()
3497 */
3498 public void normalize() throws URIException {
3499 if (isAbsPath()) {
3500 _path = normalize(_path);
3501 setURI();
3502 }
3503 }
3504
3505
3506 /***
3507 * Test if the first array is equal to the second array.
3508 *
3509 * @param first the first character array
3510 * @param second the second character array
3511 * @return true if they're equal
3512 */
3513 protected boolean equals(char[] first, char[] second) {
3514
3515 if (first == null && second == null) {
3516 return true;
3517 }
3518 if (first == null || second == null) {
3519 return false;
3520 }
3521 if (first.length != second.length) {
3522 return false;
3523 }
3524 for (int i = 0; i < first.length; i++) {
3525 if (first[i] != second[i]) {
3526 return false;
3527 }
3528 }
3529 return true;
3530 }
3531
3532
3533 /***
3534 * Test an object if this URI is equal to another.
3535 *
3536 * @param obj an object to compare
3537 * @return true if two URI objects are equal
3538 */
3539 public boolean equals(Object obj) {
3540
3541 // normalize and test each components
3542 if (obj == this) {
3543 return true;
3544 }
3545 if (!(obj instanceof URI)) {
3546 return false;
3547 }
3548 URI another = (URI) obj;
3549 // scheme
3550 if (!equals(_scheme, another._scheme)) {
3551 return false;
3552 }
3553 // is_opaque_part or is_hier_part? and opaque
3554 if (!equals(_opaque, another._opaque)) {
3555 return false;
3556 }
3557 // is_hier_part
3558 // has_authority
3559 if (!equals(_authority, another._authority)) {
3560 return false;
3561 }
3562 // path
3563 if (!equals(_path, another._path)) {
3564 return false;
3565 }
3566 // has_query
3567 if (!equals(_query, another._query)) {
3568 return false;
3569 }
3570 // has_fragment? should be careful of the only fragment case.
3571 if (!equals(_fragment, another._fragment)) {
3572 return false;
3573 }
3574 return true;
3575 }
3576
3577 // ---------------------------------------------------------- Serialization
3578
3579 /***
3580 * Write the content of this URI.
3581 *
3582 * @param oos the object-output stream
3583 * @throws IOException If an IO problem occurs.
3584 */
3585 protected void writeObject(ObjectOutputStream oos)
3586 throws IOException {
3587
3588 oos.defaultWriteObject();
3589 }
3590
3591
3592 /***
3593 * Read a URI.
3594 *
3595 * @param ois the object-input stream
3596 * @throws ClassNotFoundException If one of the classes specified in the
3597 * input stream cannot be found.
3598 * @throws IOException If an IO problem occurs.
3599 */
3600 protected void readObject(ObjectInputStream ois)
3601 throws ClassNotFoundException, IOException {
3602
3603 ois.defaultReadObject();
3604 }
3605
3606 // -------------------------------------------------------------- Hash code
3607
3608 /***
3609 * Return a hash code for this URI.
3610 *
3611 * @return a has code value for this URI
3612 */
3613 public int hashCode() {
3614 if (hash == 0) {
3615 char[] c = _uri;
3616 if (c != null) {
3617 for (int i = 0, len = c.length; i < len; i++) {
3618 hash = 31 * hash + c[i];
3619 }
3620 }
3621 c = _fragment;
3622 if (c != null) {
3623 for (int i = 0, len = c.length; i < len; i++) {
3624 hash = 31 * hash + c[i];
3625 }
3626 }
3627 }
3628 return hash;
3629 }
3630
3631 // ------------------------------------------------------------- Comparison
3632
3633 /***
3634 * Compare this URI to another object.
3635 *
3636 * @param obj the object to be compared.
3637 * @return 0, if it's same,
3638 * -1, if failed, first being compared with in the authority component
3639 * @throws ClassCastException not URI argument
3640 */
3641 public int compareTo(Object obj) throws ClassCastException {
3642
3643 URI another = (URI) obj;
3644 if (!equals(_authority, another.getRawAuthority())) {
3645 return -1;
3646 }
3647 return toString().compareTo(another.toString());
3648 }
3649
3650 // ------------------------------------------------------------------ Clone
3651
3652 /***
3653 * Create and return a copy of this object, the URI-reference containing
3654 * the userinfo component. Notice that the whole URI-reference including
3655 * the userinfo component counld not be gotten as a <code>String</code>.
3656 * <p>
3657 * To copy the identical <code>URI</code> object including the userinfo
3658 * component, it should be used.
3659 *
3660 * @return a clone of this instance
3661 */
3662 public synchronized Object clone() {
3663
3664 URI instance = new URI();
3665
3666 instance._uri = _uri;
3667 instance._scheme = _scheme;
3668 instance._opaque = _opaque;
3669 instance._authority = _authority;
3670 instance._userinfo = _userinfo;
3671 instance._host = _host;
3672 instance._port = _port;
3673 instance._path = _path;
3674 instance._query = _query;
3675 instance._fragment = _fragment;
3676 // the charset to do escape encoding for this instance
3677 instance.protocolCharset = protocolCharset;
3678 // flags
3679 instance._is_hier_part = _is_hier_part;
3680 instance._is_opaque_part = _is_opaque_part;
3681 instance._is_net_path = _is_net_path;
3682 instance._is_abs_path = _is_abs_path;
3683 instance._is_rel_path = _is_rel_path;
3684 instance._is_reg_name = _is_reg_name;
3685 instance._is_server = _is_server;
3686 instance._is_hostname = _is_hostname;
3687 instance._is_IPv4address = _is_IPv4address;
3688 instance._is_IPv6reference = _is_IPv6reference;
3689
3690 return instance;
3691 }
3692
3693 // ------------------------------------------------------------ Get the URI
3694
3695 /***
3696 * It can be gotten the URI character sequence. It's raw-escaped.
3697 * For the purpose of the protocol to be transported, it will be useful.
3698 * <p>
3699 * It is clearly unwise to use a URL that contains a password which is
3700 * intended to be secret. In particular, the use of a password within
3701 * the 'userinfo' component of a URL is strongly disrecommended except
3702 * in those rare cases where the 'password' parameter is intended to be
3703 * public.
3704 * <p>
3705 * When you want to get each part of the userinfo, you need to use the
3706 * specific methods in the specific URL. It depends on the specific URL.
3707 *
3708 * @return the URI character sequence
3709 */
3710 public char[] getRawURI() {
3711 return _uri;
3712 }
3713
3714
3715 /***
3716 * It can be gotten the URI character sequence. It's escaped.
3717 * For the purpose of the protocol to be transported, it will be useful.
3718 *
3719 * @return the escaped URI string
3720 */
3721 public String getEscapedURI() {
3722 return (_uri == null) ? null : new String(_uri);
3723 }
3724
3725
3726 /***
3727 * It can be gotten the URI character sequence.
3728 *
3729 * @return the original URI string
3730 * @throws URIException incomplete trailing escape pattern or unsupported
3731 * character encoding
3732 * @see #decode
3733 */
3734 public String getURI() throws URIException {
3735 return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3736 }
3737
3738
3739 /***
3740 * Get the URI reference character sequence.
3741 *
3742 * @return the URI reference character sequence
3743 */
3744 public char[] getRawURIReference() {
3745 if (_fragment == null) {
3746 return _uri;
3747 }
3748 if (_uri == null) {
3749 return _fragment;
3750 }
3751 // if _uri != null && _fragment != null
3752 String uriReference = new String(_uri) + "#" + new String(_fragment);
3753 return uriReference.toCharArray();
3754 }
3755
3756
3757 /***
3758 * Get the escaped URI reference string.
3759 *
3760 * @return the escaped URI reference string
3761 */
3762 public String getEscapedURIReference() {
3763 char[] uriReference = getRawURIReference();
3764 return (uriReference == null) ? null : new String(uriReference);
3765 }
3766
3767
3768 /***
3769 * Get the original URI reference string.
3770 *
3771 * @return the original URI reference string
3772 * @throws URIException If {@link #decode} fails.
3773 */
3774 public String getURIReference() throws URIException {
3775 char[] uriReference = getRawURIReference();
3776 return (uriReference == null) ? null : decode(uriReference,
3777 getProtocolCharset());
3778 }
3779
3780
3781 /***
3782 * Get the escaped URI string.
3783 * <p>
3784 * On the document, the URI-reference form is only used without the userinfo
3785 * component like http://jakarta.apache.org/ by the security reason.
3786 * But the URI-reference form with the userinfo component could be parsed.
3787 * <p>
3788 * In other words, this URI and any its subclasses must not expose the
3789 * URI-reference expression with the userinfo component like
3790 * http://user:password@hostport/restricted_zone.<br>
3791 * It means that the API client programmer should extract each user and
3792 * password to access manually. Probably it will be supported in the each
3793 * subclass, however, not a whole URI-reference expression.
3794 *
3795 * @return the escaped URI string
3796 * @see #clone()
3797 */
3798 public String toString() {
3799 return getEscapedURI();
3800 }
3801
3802
3803 // ------------------------------------------------------------ Inner class
3804
3805 /***
3806 * The charset-changed normal operation to represent to be required to
3807 * alert to user the fact the default charset is changed.
3808 */
3809 public static class DefaultCharsetChanged extends RuntimeException {
3810
3811 // ------------------------------------------------------- constructors
3812
3813 /***
3814 * The constructor with a reason string and its code arguments.
3815 *
3816 * @param reasonCode the reason code
3817 * @param reason the reason
3818 */
3819 public DefaultCharsetChanged(int reasonCode, String reason) {
3820 super(reason);
3821 this.reason = reason;
3822 this.reasonCode = reasonCode;
3823 }
3824
3825 // ---------------------------------------------------------- constants
3826
3827 /*** No specified reason code. */
3828 public static final int UNKNOWN = 0;
3829
3830 /*** Protocol charset changed. */
3831 public static final int PROTOCOL_CHARSET = 1;
3832
3833 /*** Document charset changed. */
3834 public static final int DOCUMENT_CHARSET = 2;
3835
3836 // ------------------------------------------------- instance variables
3837
3838 /*** The reason code. */
3839 private int reasonCode;
3840
3841 /*** The reason message. */
3842 private String reason;
3843
3844 // ------------------------------------------------------------ methods
3845
3846 /***
3847 * Get the reason code.
3848 *
3849 * @return the reason code
3850 */
3851 public int getReasonCode() {
3852 return reasonCode;
3853 }
3854
3855 /***
3856 * Get the reason message.
3857 *
3858 * @return the reason message
3859 */
3860 public String getReason() {
3861 return reason;
3862 }
3863
3864 }
3865
3866
3867 /***
3868 * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3869 * given locale. Supports all locales recognized in JDK 1.1.
3870 * <p>
3871 * The distribution of this class is Servlets.com. It was originally
3872 * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3873 */
3874 public static class LocaleToCharsetMap {
3875
3876 /*** A mapping of language code to charset */
3877 private static final Hashtable LOCALE_TO_CHARSET_MAP;
3878 static {
3879 LOCALE_TO_CHARSET_MAP = new Hashtable();
3880 LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3881 LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3882 LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3883 LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3884 LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3885 LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3886 LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3887 LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3888 LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3889 LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3890 LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3891 LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3892 LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3893 LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3894 LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3895 LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3896 LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3897 LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3898 LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3899 LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3900 LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3901 LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3902 LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3903 LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3904 LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3905 LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3906 LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3907 LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3908 LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3909 LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3910 LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3911 LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3912 LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3913 LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3914 LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3915 LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3916 LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3917 LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3918 LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3919 }
3920
3921 /***
3922 * Get the preferred charset for the given locale.
3923 *
3924 * @param locale the locale
3925 * @return the preferred charset or null if the locale is not
3926 * recognized.
3927 */
3928 public static String getCharset(Locale locale) {
3929 // try for an full name match (may include country)
3930 String charset =
3931 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3932 if (charset != null) {
3933 return charset;
3934 }
3935
3936 // if a full name didn't match, try just the language
3937 charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3938 return charset; // may be null
3939 }
3940
3941 }
3942
3943 }
3944
This page was automatically generated by Maven