View Javadoc
1 /* 2 * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.36.2.4 2003/10/11 19:44:27 olegk Exp $ 3 * $Revision: 1.36.2.4 $ 4 * $Date: 2003/10/11 19:44:27 $ 5 * 6 * ==================================================================== 7 * 8 * The Apache Software License, Version 1.1 9 * 10 * Copyright (c) 2002-2003 The Apache Software Foundation. All rights 11 * reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in 22 * the documentation and/or other materials provided with the 23 * distribution. 24 * 25 * 3. The end-user documentation included with the redistribution, if 26 * any, must include the following acknowlegement: 27 * "This product includes software developed by the 28 * Apache Software Foundation (http://www.apache.org/)." 29 * Alternately, this acknowlegement may appear in the software itself, 30 * if and wherever such third-party acknowlegements normally appear. 31 * 32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software 33 * Foundation" must not be used to endorse or promote products derived 34 * from this software without prior written permission. For written 35 * permission, please contact apache@apache.org. 36 * 37 * 5. Products derived from this software may not be called "Apache" 38 * nor may "Apache" appear in their names without prior written 39 * permission of the Apache Group. 40 * 41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * ==================================================================== 54 * 55 * This software consists of voluntary contributions made by many 56 * individuals on behalf of the Apache Software Foundation. For more 57 * information on the Apache Software Foundation, please see 58 * <http://www.apache.org/>. 59 * 60 * [Additional notices, if required by prior licensing conditions] 61 * 62 */ 63 64 package org.apache.commons.httpclient; 65 66 import java.io.IOException; 67 import java.io.ObjectInputStream; 68 import java.io.ObjectOutputStream; 69 import java.io.Serializable; 70 import java.io.UnsupportedEncodingException; 71 import java.util.Locale; 72 import java.util.BitSet; 73 import java.util.Hashtable; 74 import java.net.URL; 75 76 /*** 77 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. 78 * This class has the purpose of supportting of parsing a URI reference to 79 * extend any specific protocols, the character encoding of the protocol to 80 * be transported and the charset of the document. 81 * <p> 82 * A URI is always in an "escaped" form, since escaping or unescaping a 83 * completed URI might change its semantics. 84 * <p> 85 * Implementers should be careful not to escape or unescape the same string 86 * more than once, since unescaping an already unescaped string might lead to 87 * misinterpreting a percent data character as another escaped character, 88 * or vice versa in the case of escaping an already escaped string. 89 * <p> 90 * In order to avoid these problems, data types used as follows: 91 * <p><blockquote><pre> 92 * URI character sequence: char 93 * octet sequence: byte 94 * original character sequence: String 95 * </pre></blockquote><p> 96 * 97 * So, a URI is a sequence of characters as an array of a char type, which 98 * is not always represented as a sequence of octets as an array of byte. 99 * <p> 100 * 101 * URI Syntactic Components 102 * <p><blockquote><pre> 103 * - In general, written as follows: 104 * Absolute URI = <scheme>:<scheme-specific-part> 105 * Generic URI = <scheme>://<authority><path>?<query> 106 * 107 * - Syntax 108 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 109 * hier_part = ( net_path | abs_path ) [ "?" query ] 110 * net_path = "//" authority [ abs_path ] 111 * abs_path = "/" path_segments 112 * </pre></blockquote><p> 113 * 114 * The following examples illustrate URI that are in common use. 115 * <pre> 116 * ftp://ftp.is.co.za/rfc/rfc1808.txt 117 * -- ftp scheme for File Transfer Protocol services 118 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles 119 * -- gopher scheme for Gopher and Gopher+ Protocol services 120 * http://www.math.uio.no/faq/compression-faq/part1.html 121 * -- http scheme for Hypertext Transfer Protocol services 122 * mailto:mduerst@ifi.unizh.ch 123 * -- mailto scheme for electronic mail addresses 124 * news:comp.infosystems.www.servers.unix 125 * -- news scheme for USENET news groups and articles 126 * telnet://melvyl.ucop.edu/ 127 * -- telnet scheme for interactive services via the TELNET Protocol 128 * </pre> 129 * Please, notice that there are many modifications from URL(RFC 1738) and 130 * relative URL(RFC 1808). 131 * <p> 132 * <b>The expressions for a URI</b> 133 * <p><pre> 134 * For escaped URI forms 135 * - URI(char[]) // constructor 136 * - char[] getRawXxx() // method 137 * - String getEscapedXxx() // method 138 * - String toString() // method 139 * <p> 140 * For unescaped URI forms 141 * - URI(String) // constructor 142 * - String getXXX() // method 143 * </pre><p> 144 * 145 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a> 146 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a> 147 * @version $Revision: 1.36.2.4 $ $Date: 2002/03/14 15:14:01 148 */ 149 public class URI implements Cloneable, Comparable, Serializable { 150 151 152 // ----------------------------------------------------------- Constructors 153 154 /*** Create an instance as an internal use */ 155 protected URI() { 156 } 157 158 159 /*** 160 * Construct a URI as an escaped form of a character array with the given 161 * charset. 162 * 163 * @param escaped the URI character sequence 164 * @param charset the charset string to do escape encoding 165 * @throws URIException If the URI cannot be created. 166 * @throws NullPointerException if <code>escaped</code> is <code>null</code> 167 * @see #getProtocolCharset 168 */ 169 public URI(char[] escaped, String charset) 170 throws URIException, NullPointerException { 171 protocolCharset = charset; 172 parseUriReference(new String(escaped), true); 173 } 174 175 176 /*** 177 * Construct a URI as an escaped form of a character array. 178 * An URI can be placed within double-quotes or angle brackets like 179 * "http://test.com/" and <http://test.com/> 180 * 181 * @param escaped the URI character sequence 182 * @throws URIException If the URI cannot be created. 183 * @throws NullPointerException if <code>escaped</code> is <code>null</code> 184 * @see #getDefaultProtocolCharset 185 */ 186 public URI(char[] escaped) 187 throws URIException, NullPointerException { 188 parseUriReference(new String(escaped), true); 189 } 190 191 192 /*** 193 * Construct a URI from the given string with the given charset. 194 * 195 * @param original the string to be represented to URI character sequence 196 * It is one of absoluteURI and relativeURI. 197 * @param charset the charset string to do escape encoding 198 * @throws URIException If the URI cannot be created. 199 * @see #getProtocolCharset 200 */ 201 public URI(String original, String charset) throws URIException { 202 protocolCharset = charset; 203 parseUriReference(original, false); 204 } 205 206 207 /*** 208 * Construct a URI from the given string. 209 * <p><blockquote><pre> 210 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 211 * </pre></blockquote><p> 212 * An URI can be placed within double-quotes or angle brackets like 213 * "http://test.com/" and <http://test.com/> 214 * 215 * @param original the string to be represented to URI character sequence 216 * It is one of absoluteURI and relativeURI. 217 * @throws URIException If the URI cannot be created. 218 * @see #getDefaultProtocolCharset 219 */ 220 public URI(String original) throws URIException { 221 parseUriReference(original, false); 222 } 223 224 225 /*** 226 * Construct a URI from a URL. 227 * 228 * @param url a valid URL. 229 * @throws URIException If the URI cannot be created. 230 * @since 2.0 231 * @deprecated currently somewhat wrong and diffrent with java.net.URL usage 232 */ 233 public URI(URL url) throws URIException { 234 this(url.toString()); 235 } 236 237 238 /*** 239 * Construct a general URI from the given components. 240 * <p><blockquote><pre> 241 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 242 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 243 * opaque_part = uric_no_slash *uric 244 * </pre></blockquote><p> 245 * It's for absolute URI = <scheme>:<scheme-specific-part># 246 * <fragment>. 247 * 248 * @param scheme the scheme string 249 * @param schemeSpecificPart scheme_specific_part 250 * @param fragment the fragment string 251 * @throws URIException If the URI cannot be created. 252 * @see #getDefaultProtocolCharset 253 */ 254 public URI(String scheme, String schemeSpecificPart, String fragment) 255 throws URIException { 256 257 // validate and contruct the URI character sequence 258 if (scheme == null) { 259 throw new URIException(URIException.PARSING, "scheme required"); 260 } 261 char[] s = scheme.toLowerCase().toCharArray(); 262 if (validate(s, URI.scheme)) { 263 _scheme = s; // is_absoluteURI 264 } else { 265 throw new URIException(URIException.PARSING, "incorrect scheme"); 266 } 267 _opaque = encode(schemeSpecificPart, allowed_opaque_part, 268 getProtocolCharset()); 269 // Set flag 270 _is_opaque_part = true; 271 _fragment = fragment.toCharArray(); 272 273 setURI(); 274 } 275 276 277 /*** 278 * Construct a general URI from the given components. 279 * <p><blockquote><pre> 280 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 281 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 282 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 283 * hier_part = ( net_path | abs_path ) [ "?" query ] 284 * </pre></blockquote><p> 285 * It's for absolute URI = <scheme>:<path>?<query>#< 286 * fragment> and relative URI = <path>?<query>#<fragment 287 * >. 288 * 289 * @param scheme the scheme string 290 * @param authority the authority string 291 * @param path the path string 292 * @param query the query string 293 * @param fragment the fragment string 294 * @throws URIException If the new URI cannot be created. 295 * @see #getDefaultProtocolCharset 296 */ 297 public URI(String scheme, String authority, String path, String query, 298 String fragment) throws URIException { 299 300 // validate and contruct the URI character sequence 301 StringBuffer buff = new StringBuffer(); 302 if (scheme != null) { 303 buff.append(scheme); 304 buff.append(':'); 305 } 306 if (authority != null) { 307 buff.append("//"); 308 buff.append(authority); 309 } 310 if (path != null) { // accept empty path 311 if ((scheme != null || authority != null) 312 && !path.startsWith("/")) { 313 throw new URIException(URIException.PARSING, 314 "abs_path requested"); 315 } 316 buff.append(path); 317 } 318 if (query != null) { 319 buff.append('?'); 320 buff.append(query); 321 } 322 if (fragment != null) { 323 buff.append('#'); 324 buff.append(fragment); 325 } 326 parseUriReference(buff.toString(), false); 327 } 328 329 330 /*** 331 * Construct a general URI from the given components. 332 * 333 * @param scheme the scheme string 334 * @param userinfo the userinfo string 335 * @param host the host string 336 * @param port the port number 337 * @throws URIException If the new URI cannot be created. 338 * @see #getDefaultProtocolCharset 339 */ 340 public URI(String scheme, String userinfo, String host, int port) 341 throws URIException { 342 343 this(scheme, userinfo, host, port, null, null, null); 344 } 345 346 347 /*** 348 * Construct a general URI from the given components. 349 * 350 * @param scheme the scheme string 351 * @param userinfo the userinfo string 352 * @param host the host string 353 * @param port the port number 354 * @param path the path string 355 * @throws URIException If the new URI cannot be created. 356 * @see #getDefaultProtocolCharset 357 */ 358 public URI(String scheme, String userinfo, String host, int port, 359 String path) throws URIException { 360 361 this(scheme, userinfo, host, port, path, null, null); 362 } 363 364 365 /*** 366 * Construct a general URI from the given components. 367 * 368 * @param scheme the scheme string 369 * @param userinfo the userinfo string 370 * @param host the host string 371 * @param port the port number 372 * @param path the path string 373 * @param query the query string 374 * @throws URIException If the new URI cannot be created. 375 * @see #getDefaultProtocolCharset 376 */ 377 public URI(String scheme, String userinfo, String host, int port, 378 String path, String query) throws URIException { 379 380 this(scheme, userinfo, host, port, path, query, null); 381 } 382 383 384 /*** 385 * Construct a general URI from the given components. 386 * 387 * @param scheme the scheme string 388 * @param userinfo the userinfo string 389 * @param host the host string 390 * @param port the port number 391 * @param path the path string 392 * @param query the query string 393 * @param fragment the fragment string 394 * @throws URIException If the new URI cannot be created. 395 * @see #getDefaultProtocolCharset 396 */ 397 public URI(String scheme, String userinfo, String host, int port, 398 String path, String query, String fragment) throws URIException { 399 400 this(scheme, (host == null) ? null 401 : ((userinfo != null) ? userinfo + '@' : "") + host 402 + ((port != -1) ? ":" + port : ""), path, query, fragment); 403 } 404 405 406 /*** 407 * Construct a general URI from the given components. 408 * 409 * @param scheme the scheme string 410 * @param host the host string 411 * @param path the path string 412 * @param fragment the fragment string 413 * @throws URIException If the new URI cannot be created. 414 * @see #getDefaultProtocolCharset 415 */ 416 public URI(String scheme, String host, String path, String fragment) 417 throws URIException { 418 419 this(scheme, host, path, null, fragment); 420 } 421 422 423 /*** 424 * Construct a general URI with the given relative URI string. 425 * 426 * @param base the base URI 427 * @param relative the relative URI string 428 * @throws URIException If the new URI cannot be created. 429 */ 430 public URI(URI base, String relative) throws URIException { 431 this(base, new URI(relative)); 432 } 433 434 435 /*** 436 * Construct a general URI with the given relative URI. 437 * <p><blockquote><pre> 438 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 439 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 440 * </pre></blockquote><p> 441 * Resolving Relative References to Absolute Form. 442 * 443 * <strong>Examples of Resolving Relative URI References</strong> 444 * 445 * Within an object with a well-defined base URI of 446 * <p><blockquote><pre> 447 * http://a/b/c/d;p?q 448 * </pre></blockquote><p> 449 * the relative URI would be resolved as follows: 450 * 451 * Normal Examples 452 * 453 * <p><blockquote><pre> 454 * g:h = g:h 455 * g = http://a/b/c/g 456 * ./g = http://a/b/c/g 457 * g/ = http://a/b/c/g/ 458 * /g = http://a/g 459 * //g = http://g 460 * ?y = http://a/b/c/?y 461 * g?y = http://a/b/c/g?y 462 * #s = (current document)#s 463 * g#s = http://a/b/c/g#s 464 * g?y#s = http://a/b/c/g?y#s 465 * ;x = http://a/b/c/;x 466 * g;x = http://a/b/c/g;x 467 * g;x?y#s = http://a/b/c/g;x?y#s 468 * . = http://a/b/c/ 469 * ./ = http://a/b/c/ 470 * .. = http://a/b/ 471 * ../ = http://a/b/ 472 * ../g = http://a/b/g 473 * ../.. = http://a/ 474 * ../../ = http://a/ 475 * ../../g = http://a/g 476 * </pre></blockquote><p> 477 * 478 * Some URI schemes do not allow a hierarchical syntax matching the 479 * <hier_part> syntax, and thus cannot use relative references. 480 * 481 * @param base the base URI 482 * @param relative the relative URI 483 * @throws URIException If the new URI cannot be created. 484 */ 485 public URI(URI base, URI relative) throws URIException { 486 487 if (base._scheme == null) { 488 throw new URIException(URIException.PARSING, "base URI required"); 489 } 490 if (base._scheme != null) { 491 this._scheme = base._scheme; 492 this._authority = base._authority; 493 } 494 if (base._is_opaque_part || relative._is_opaque_part) { 495 this._scheme = base._scheme; 496 this._is_opaque_part = base._is_opaque_part 497 || relative._is_opaque_part; 498 this._opaque = relative._opaque; 499 this._fragment = relative._fragment; 500 this.setURI(); 501 return; 502 } 503 if (relative._scheme != null) { 504 this._scheme = relative._scheme; 505 this._is_net_path = relative._is_net_path; 506 this._authority = relative._authority; 507 if (relative._is_server) { 508 this._is_server = relative._is_server; 509 this._userinfo = relative._userinfo; 510 this._host = relative._host; 511 this._port = relative._port; 512 } else if (relative._is_reg_name) { 513 this._is_reg_name = relative._is_reg_name; 514 } 515 this._is_abs_path = relative._is_abs_path; 516 this._is_rel_path = relative._is_rel_path; 517 this._path = relative._path; 518 } else if (base._authority != null && relative._scheme == null) { 519 this._is_net_path = base._is_net_path; 520 this._authority = base._authority; 521 if (base._is_server) { 522 this._is_server = base._is_server; 523 this._userinfo = base._userinfo; 524 this._host = base._host; 525 this._port = base._port; 526 } else if (base._is_reg_name) { 527 this._is_reg_name = base._is_reg_name; 528 } 529 } 530 if (relative._authority != null) { 531 this._is_net_path = relative._is_net_path; 532 this._authority = relative._authority; 533 if (relative._is_server) { 534 this._is_server = relative._is_server; 535 this._userinfo = relative._userinfo; 536 this._host = relative._host; 537 this._port = relative._port; 538 } else if (relative._is_reg_name) { 539 this._is_reg_name = relative._is_reg_name; 540 } 541 this._is_abs_path = relative._is_abs_path; 542 this._is_rel_path = relative._is_rel_path; 543 this._path = relative._path; 544 } 545 // resolve the path and query if necessary 546 if (relative._scheme == null && relative._authority == null) { 547 if ((relative._path == null || relative._path.length == 0) 548 && relative._query == null) { 549 // handle a reference to the current document, see RFC 2396 550 // section 5.2 step 2 551 this._path = base._path; 552 this._query = base._query; 553 } else { 554 this._path = resolvePath(base._path, relative._path); 555 } 556 } 557 // base._query removed 558 if (relative._query != null) { 559 this._query = relative._query; 560 } 561 // base._fragment removed 562 if (relative._fragment != null) { 563 this._fragment = relative._fragment; 564 } 565 this.setURI(); 566 // reparse the newly built URI, this will ensure that all flags are set correctly. 567 // TODO there must be a better way to do this 568 parseUriReference(new String(_uri), true); 569 } 570 571 // --------------------------------------------------- Instance Variables 572 573 /*** Version ID for serialization */ 574 static final long serialVersionUID = 604752400577948726L; 575 576 577 /*** 578 * Cache the hash code for this URI. 579 */ 580 protected int hash = 0; 581 582 583 /*** 584 * This Uniform Resource Identifier (URI). 585 * The URI is always in an "escaped" form, since escaping or unescaping 586 * a completed URI might change its semantics. 587 */ 588 protected char[] _uri = null; 589 590 591 /*** 592 * The charset of the protocol used by this URI instance. 593 */ 594 protected String protocolCharset = null; 595 596 597 /*** 598 * The default charset of the protocol. RFC 2277, 2396 599 */ 600 protected static String defaultProtocolCharset = "UTF-8"; 601 602 603 /*** 604 * The default charset of the document. RFC 2277, 2396 605 * The platform's charset is used for the document by default. 606 */ 607 protected static String defaultDocumentCharset = null; 608 protected static String defaultDocumentCharsetByLocale = null; 609 protected static String defaultDocumentCharsetByPlatform = null; 610 // Static initializer for defaultDocumentCharset 611 static { 612 Locale locale = Locale.getDefault(); 613 // in order to support backward compatiblity 614 if (locale != null) { 615 defaultDocumentCharsetByLocale = 616 LocaleToCharsetMap.getCharset(locale); 617 // set the default document charset 618 defaultDocumentCharset = defaultDocumentCharsetByLocale; 619 } 620 // in order to support platform encoding 621 try { 622 defaultDocumentCharsetByPlatform = System.getProperty("file.encoding"); 623 } catch(SecurityException ignore) { 624 } 625 if (defaultDocumentCharset == null) { 626 // set the default document charset 627 defaultDocumentCharset = defaultDocumentCharsetByPlatform; 628 } 629 } 630 631 632 /*** 633 * The scheme. 634 */ 635 protected char[] _scheme = null; 636 637 638 /*** 639 * The opaque. 640 */ 641 protected char[] _opaque = null; 642 643 644 /*** 645 * The authority. 646 */ 647 protected char[] _authority = null; 648 649 650 /*** 651 * The userinfo. 652 */ 653 protected char[] _userinfo = null; 654 655 656 /*** 657 * The host. 658 */ 659 protected char[] _host = null; 660 661 662 /*** 663 * The port. 664 */ 665 protected int _port = -1; 666 667 668 /*** 669 * The path. 670 */ 671 protected char[] _path = null; 672 673 674 /*** 675 * The query. 676 */ 677 protected char[] _query = null; 678 679 680 /*** 681 * The fragment. 682 */ 683 protected char[] _fragment = null; 684 685 686 /*** 687 * The root path. 688 */ 689 protected static char[] rootPath = { '/' }; 690 691 // ---------------------- Generous characters for each component validation 692 693 /*** 694 * The percent "%" character always has the reserved purpose of being the 695 * escape indicator, it must be escaped as "%25" in order to be used as 696 * data within a URI. 697 */ 698 protected static final BitSet percent = new BitSet(256); 699 // Static initializer for percent 700 static { 701 percent.set('%'); 702 } 703 704 705 /*** 706 * BitSet for digit. 707 * <p><blockquote><pre> 708 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | 709 * "8" | "9" 710 * </pre></blockquote><p> 711 */ 712 protected static final BitSet digit = new BitSet(256); 713 // Static initializer for digit 714 static { 715 for (int i = '0'; i <= '9'; i++) { 716 digit.set(i); 717 } 718 } 719 720 721 /*** 722 * BitSet for alpha. 723 * <p><blockquote><pre> 724 * alpha = lowalpha | upalpha 725 * </pre></blockquote><p> 726 */ 727 protected static final BitSet alpha = new BitSet(256); 728 // Static initializer for alpha 729 static { 730 for (int i = 'a'; i <= 'z'; i++) { 731 alpha.set(i); 732 } 733 for (int i = 'A'; i <= 'Z'; i++) { 734 alpha.set(i); 735 } 736 } 737 738 739 /*** 740 * BitSet for alphanum (join of alpha & digit). 741 * <p><blockquote><pre> 742 * alphanum = alpha | digit 743 * </pre></blockquote><p> 744 */ 745 protected static final BitSet alphanum = new BitSet(256); 746 // Static initializer for alphanum 747 static { 748 alphanum.or(alpha); 749 alphanum.or(digit); 750 } 751 752 753 /*** 754 * BitSet for hex. 755 * <p><blockquote><pre> 756 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 757 * "a" | "b" | "c" | "d" | "e" | "f" 758 * </pre></blockquote><p> 759 */ 760 protected static final BitSet hex = new BitSet(256); 761 // Static initializer for hex 762 static { 763 hex.or(digit); 764 for (int i = 'a'; i <= 'f'; i++) { 765 hex.set(i); 766 } 767 for (int i = 'A'; i <= 'F'; i++) { 768 hex.set(i); 769 } 770 } 771 772 773 /*** 774 * BitSet for escaped. 775 * <p><blockquote><pre> 776 * escaped = "%" hex hex 777 * </pre></blockquote><p> 778 */ 779 protected static final BitSet escaped = new BitSet(256); 780 // Static initializer for escaped 781 static { 782 escaped.or(percent); 783 escaped.or(hex); 784 } 785 786 787 /*** 788 * BitSet for mark. 789 * <p><blockquote><pre> 790 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | 791 * "(" | ")" 792 * </pre></blockquote><p> 793 */ 794 protected static final BitSet mark = new BitSet(256); 795 // Static initializer for mark 796 static { 797 mark.set('-'); 798 mark.set('_'); 799 mark.set('.'); 800 mark.set('!'); 801 mark.set('~'); 802 mark.set('*'); 803 mark.set('\''); 804 mark.set('('); 805 mark.set(')'); 806 } 807 808 809 /*** 810 * Data characters that are allowed in a URI but do not have a reserved 811 * purpose are called unreserved. 812 * <p><blockquote><pre> 813 * unreserved = alphanum | mark 814 * </pre></blockquote><p> 815 */ 816 protected static final BitSet unreserved = new BitSet(256); 817 // Static initializer for unreserved 818 static { 819 unreserved.or(alphanum); 820 unreserved.or(mark); 821 } 822 823 824 /*** 825 * BitSet for reserved. 826 * <p><blockquote><pre> 827 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 828 * "$" | "," 829 * </pre></blockquote><p> 830 */ 831 protected static final BitSet reserved = new BitSet(256); 832 // Static initializer for reserved 833 static { 834 reserved.set(';'); 835 reserved.set('/'); 836 reserved.set('?'); 837 reserved.set(':'); 838 reserved.set('@'); 839 reserved.set('&'); 840 reserved.set('='); 841 reserved.set('+'); 842 reserved.set('$'); 843 reserved.set(','); 844 } 845 846 847 /*** 848 * BitSet for uric. 849 * <p><blockquote><pre> 850 * uric = reserved | unreserved | escaped 851 * </pre></blockquote><p> 852 */ 853 protected static final BitSet uric = new BitSet(256); 854 // Static initializer for uric 855 static { 856 uric.or(reserved); 857 uric.or(unreserved); 858 uric.or(escaped); 859 } 860 861 862 /*** 863 * BitSet for fragment (alias for uric). 864 * <p><blockquote><pre> 865 * fragment = *uric 866 * </pre></blockquote><p> 867 */ 868 protected static final BitSet fragment = uric; 869 870 871 /*** 872 * BitSet for query (alias for uric). 873 * <p><blockquote><pre> 874 * query = *uric 875 * </pre></blockquote><p> 876 */ 877 protected static final BitSet query = uric; 878 879 880 /*** 881 * BitSet for pchar. 882 * <p><blockquote><pre> 883 * pchar = unreserved | escaped | 884 * ":" | "@" | "&" | "=" | "+" | "$" | "," 885 * </pre></blockquote><p> 886 */ 887 protected static final BitSet pchar = new BitSet(256); 888 // Static initializer for pchar 889 static { 890 pchar.or(unreserved); 891 pchar.or(escaped); 892 pchar.set(':'); 893 pchar.set('@'); 894 pchar.set('&'); 895 pchar.set('='); 896 pchar.set('+'); 897 pchar.set('$'); 898 pchar.set(','); 899 } 900 901 902 /*** 903 * BitSet for param (alias for pchar). 904 * <p><blockquote><pre> 905 * param = *pchar 906 * </pre></blockquote><p> 907 */ 908 protected static final BitSet param = pchar; 909 910 911 /*** 912 * BitSet for segment. 913 * <p><blockquote><pre> 914 * segment = *pchar *( ";" param ) 915 * </pre></blockquote><p> 916 */ 917 protected static final BitSet segment = new BitSet(256); 918 // Static initializer for segment 919 static { 920 segment.or(pchar); 921 segment.set(';'); 922 segment.or(param); 923 } 924 925 926 /*** 927 * BitSet for path segments. 928 * <p><blockquote><pre> 929 * path_segments = segment *( "/" segment ) 930 * </pre></blockquote><p> 931 */ 932 protected static final BitSet path_segments = new BitSet(256); 933 // Static initializer for path_segments 934 static { 935 path_segments.set('/'); 936 path_segments.or(segment); 937 } 938 939 940 /*** 941 * URI absolute path. 942 * <p><blockquote><pre> 943 * abs_path = "/" path_segments 944 * </pre></blockquote><p> 945 */ 946 protected static final BitSet abs_path = new BitSet(256); 947 // Static initializer for abs_path 948 static { 949 abs_path.set('/'); 950 abs_path.or(path_segments); 951 } 952 953 954 /*** 955 * URI bitset for encoding typical non-slash characters. 956 * <p><blockquote><pre> 957 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | 958 * "&" | "=" | "+" | "$" | "," 959 * </pre></blockquote><p> 960 */ 961 protected static final BitSet uric_no_slash = new BitSet(256); 962 // Static initializer for uric_no_slash 963 static { 964 uric_no_slash.or(unreserved); 965 uric_no_slash.or(escaped); 966 uric_no_slash.set(';'); 967 uric_no_slash.set('?'); 968 uric_no_slash.set(';'); 969 uric_no_slash.set('@'); 970 uric_no_slash.set('&'); 971 uric_no_slash.set('='); 972 uric_no_slash.set('+'); 973 uric_no_slash.set('$'); 974 uric_no_slash.set(','); 975 } 976 977 978 /*** 979 * URI bitset that combines uric_no_slash and uric. 980 * <p><blockquote><pre> 981 * opaque_part = uric_no_slash *uric 982 * </pre></blockquote><p> 983 */ 984 protected static final BitSet opaque_part = new BitSet(256); 985 // Static initializer for opaque_part 986 static { 987 // it's generous. because first character must not include a slash 988 opaque_part.or(uric_no_slash); 989 opaque_part.or(uric); 990 } 991 992 993 /*** 994 * URI bitset that combines absolute path and opaque part. 995 * <p><blockquote><pre> 996 * path = [ abs_path | opaque_part ] 997 * </pre></blockquote><p> 998 */ 999 protected static final BitSet path = new BitSet(256); 1000 // Static initializer for path 1001 static { 1002 path.or(abs_path); 1003 path.or(opaque_part); 1004 } 1005 1006 1007 /*** 1008 * Port, a logical alias for digit. 1009 */ 1010 protected static final BitSet port = digit; 1011 1012 1013 /*** 1014 * Bitset that combines digit and dot fo IPv$address. 1015 * <p><blockquote><pre> 1016 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 1017 * </pre></blockquote><p> 1018 */ 1019 protected static final BitSet IPv4address = new BitSet(256); 1020 // Static initializer for IPv4address 1021 static { 1022 IPv4address.or(digit); 1023 IPv4address.set('.'); 1024 } 1025 1026 1027 /*** 1028 * RFC 2373. 1029 * <p><blockquote><pre> 1030 * IPv6address = hexpart [ ":" IPv4address ] 1031 * </pre></blockquote><p> 1032 */ 1033 protected static final BitSet IPv6address = new BitSet(256); 1034 // Static initializer for IPv6address reference 1035 static { 1036 IPv6address.or(hex); // hexpart 1037 IPv6address.set(':'); 1038 IPv6address.or(IPv4address); 1039 } 1040 1041 1042 /*** 1043 * RFC 2732, 2373. 1044 * <p><blockquote><pre> 1045 * IPv6reference = "[" IPv6address "]" 1046 * </pre></blockquote><p> 1047 */ 1048 protected static final BitSet IPv6reference = new BitSet(256); 1049 // Static initializer for IPv6reference 1050 static { 1051 IPv6reference.set('['); 1052 IPv6reference.or(IPv6address); 1053 IPv6reference.set(']'); 1054 } 1055 1056 1057 /*** 1058 * BitSet for toplabel. 1059 * <p><blockquote><pre> 1060 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1061 * </pre></blockquote><p> 1062 */ 1063 protected static final BitSet toplabel = new BitSet(256); 1064 // Static initializer for toplabel 1065 static { 1066 toplabel.or(alphanum); 1067 toplabel.set('-'); 1068 } 1069 1070 1071 /*** 1072 * BitSet for domainlabel. 1073 * <p><blockquote><pre> 1074 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1075 * </pre></blockquote><p> 1076 */ 1077 protected static final BitSet domainlabel = toplabel; 1078 1079 1080 /*** 1081 * BitSet for hostname. 1082 * <p><blockquote><pre> 1083 * hostname = *( domainlabel "." ) toplabel [ "." ] 1084 * </pre></blockquote><p> 1085 */ 1086 protected static final BitSet hostname = new BitSet(256); 1087 // Static initializer for hostname 1088 static { 1089 hostname.or(toplabel); 1090 // hostname.or(domainlabel); 1091 hostname.set('.'); 1092 } 1093 1094 1095 /*** 1096 * BitSet for host. 1097 * <p><blockquote><pre> 1098 * host = hostname | IPv4address | IPv6reference 1099 * </pre></blockquote><p> 1100 */ 1101 protected static final BitSet host = new BitSet(256); 1102 // Static initializer for host 1103 static { 1104 host.or(hostname); 1105 // host.or(IPv4address); 1106 host.or(IPv6reference); // IPv4address 1107 } 1108 1109 1110 /*** 1111 * BitSet for hostport. 1112 * <p><blockquote><pre> 1113 * hostport = host [ ":" port ] 1114 * </pre></blockquote><p> 1115 */ 1116 protected static final BitSet hostport = new BitSet(256); 1117 // Static initializer for hostport 1118 static { 1119 hostport.or(host); 1120 hostport.set(':'); 1121 hostport.or(port); 1122 } 1123 1124 1125 /*** 1126 * Bitset for userinfo. 1127 * <p><blockquote><pre> 1128 * userinfo = *( unreserved | escaped | 1129 * ";" | ":" | "&" | "=" | "+" | "$" | "," ) 1130 * </pre></blockquote><p> 1131 */ 1132 protected static final BitSet userinfo = new BitSet(256); 1133 // Static initializer for userinfo 1134 static { 1135 userinfo.or(unreserved); 1136 userinfo.or(escaped); 1137 userinfo.set(';'); 1138 userinfo.set(':'); 1139 userinfo.set('&'); 1140 userinfo.set('='); 1141 userinfo.set('+'); 1142 userinfo.set('$'); 1143 userinfo.set(','); 1144 } 1145 1146 1147 /*** 1148 * BitSet for within the userinfo component like user and password. 1149 */ 1150 public static final BitSet within_userinfo = new BitSet(256); 1151 // Static initializer for within_userinfo 1152 static { 1153 within_userinfo.or(userinfo); 1154 within_userinfo.clear(';'); // reserved within authority 1155 within_userinfo.clear(':'); 1156 within_userinfo.clear('@'); 1157 within_userinfo.clear('?'); 1158 within_userinfo.clear('/'); 1159 } 1160 1161 1162 /*** 1163 * Bitset for server. 1164 * <p><blockquote><pre> 1165 * server = [ [ userinfo "@" ] hostport ] 1166 * </pre></blockquote><p> 1167 */ 1168 protected static final BitSet server = new BitSet(256); 1169 // Static initializer for server 1170 static { 1171 server.or(userinfo); 1172 server.set('@'); 1173 server.or(hostport); 1174 } 1175 1176 1177 /*** 1178 * BitSet for reg_name. 1179 * <p><blockquote><pre> 1180 * reg_name = 1*( unreserved | escaped | "$" | "," | 1181 * ";" | ":" | "@" | "&" | "=" | "+" ) 1182 * </pre></blockquote><p> 1183 */ 1184 protected static final BitSet reg_name = new BitSet(256); 1185 // Static initializer for reg_name 1186 static { 1187 reg_name.or(unreserved); 1188 reg_name.or(escaped); 1189 reg_name.set('$'); 1190 reg_name.set(','); 1191 reg_name.set(';'); 1192 reg_name.set(':'); 1193 reg_name.set('@'); 1194 reg_name.set('&'); 1195 reg_name.set('='); 1196 reg_name.set('+'); 1197 } 1198 1199 1200 /*** 1201 * BitSet for authority. 1202 * <p><blockquote><pre> 1203 * authority = server | reg_name 1204 * </pre></blockquote><p> 1205 */ 1206 protected static final BitSet authority = new BitSet(256); 1207 // Static initializer for authority 1208 static { 1209 authority.or(server); 1210 authority.or(reg_name); 1211 } 1212 1213 1214 /*** 1215 * BitSet for scheme. 1216 * <p><blockquote><pre> 1217 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) 1218 * </pre></blockquote><p> 1219 */ 1220 protected static final BitSet scheme = new BitSet(256); 1221 // Static initializer for scheme 1222 static { 1223 scheme.or(alpha); 1224 scheme.or(digit); 1225 scheme.set('+'); 1226 scheme.set('-'); 1227 scheme.set('.'); 1228 } 1229 1230 1231 /*** 1232 * BitSet for rel_segment. 1233 * <p><blockquote><pre> 1234 * rel_segment = 1*( unreserved | escaped | 1235 * ";" | "@" | "&" | "=" | "+" | "$" | "," ) 1236 * </pre></blockquote><p> 1237 */ 1238 protected static final BitSet rel_segment = new BitSet(256); 1239 // Static initializer for rel_segment 1240 static { 1241 rel_segment.or(unreserved); 1242 rel_segment.or(escaped); 1243 rel_segment.set(';'); 1244 rel_segment.set('@'); 1245 rel_segment.set('&'); 1246 rel_segment.set('='); 1247 rel_segment.set('+'); 1248 rel_segment.set('$'); 1249 rel_segment.set(','); 1250 } 1251 1252 1253 /*** 1254 * BitSet for rel_path. 1255 * <p><blockquote><pre> 1256 * rel_path = rel_segment [ abs_path ] 1257 * </pre></blockquote><p> 1258 */ 1259 protected static final BitSet rel_path = new BitSet(256); 1260 // Static initializer for rel_path 1261 static { 1262 rel_path.or(rel_segment); 1263 rel_path.or(abs_path); 1264 } 1265 1266 1267 /*** 1268 * BitSet for net_path. 1269 * <p><blockquote><pre> 1270 * net_path = "//" authority [ abs_path ] 1271 * </pre></blockquote><p> 1272 */ 1273 protected static final BitSet net_path = new BitSet(256); 1274 // Static initializer for net_path 1275 static { 1276 net_path.set('/'); 1277 net_path.or(authority); 1278 net_path.or(abs_path); 1279 } 1280 1281 1282 /*** 1283 * BitSet for hier_part. 1284 * <p><blockquote><pre> 1285 * hier_part = ( net_path | abs_path ) [ "?" query ] 1286 * </pre></blockquote><p> 1287 */ 1288 protected static final BitSet hier_part = new BitSet(256); 1289 // Static initializer for hier_part 1290 static { 1291 hier_part.or(net_path); 1292 hier_part.or(abs_path); 1293 // hier_part.set('?'); aleady included 1294 hier_part.or(query); 1295 } 1296 1297 1298 /*** 1299 * BitSet for relativeURI. 1300 * <p><blockquote><pre> 1301 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 1302 * </pre></blockquote><p> 1303 */ 1304 protected static final BitSet relativeURI = new BitSet(256); 1305 // Static initializer for relativeURI 1306 static { 1307 relativeURI.or(net_path); 1308 relativeURI.or(abs_path); 1309 relativeURI.or(rel_path); 1310 // relativeURI.set('?'); aleady included 1311 relativeURI.or(query); 1312 } 1313 1314 1315 /*** 1316 * BitSet for absoluteURI. 1317 * <p><blockquote><pre> 1318 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 1319 * </pre></blockquote><p> 1320 */ 1321 protected static final BitSet absoluteURI = new BitSet(256); 1322 // Static initializer for absoluteURI 1323 static { 1324 absoluteURI.or(scheme); 1325 absoluteURI.set(':'); 1326 absoluteURI.or(hier_part); 1327 absoluteURI.or(opaque_part); 1328 } 1329 1330 1331 /*** 1332 * BitSet for URI-reference. 1333 * <p><blockquote><pre> 1334 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1335 * </pre></blockquote><p> 1336 */ 1337 protected static final BitSet URI_reference = new BitSet(256); 1338 // Static initializer for URI_reference 1339 static { 1340 URI_reference.or(absoluteURI); 1341 URI_reference.or(relativeURI); 1342 URI_reference.set('#'); 1343 URI_reference.or(fragment); 1344 } 1345 1346 // ---------------------------- Characters disallowed within the URI syntax 1347 // Excluded US-ASCII Characters are like control, space, delims and unwise 1348 1349 /*** 1350 * BitSet for control. 1351 */ 1352 public static final BitSet control = new BitSet(256); 1353 // Static initializer for control 1354 static { 1355 for (int i = 0; i <= 0x1F; i++) { 1356 control.set(i); 1357 } 1358 control.set(0x7F); 1359 } 1360 1361 /*** 1362 * BitSet for space. 1363 */ 1364 public static final BitSet space = new BitSet(256); 1365 // Static initializer for space 1366 static { 1367 space.set(0x20); 1368 } 1369 1370 1371 /*** 1372 * BitSet for delims. 1373 */ 1374 public static final BitSet delims = new BitSet(256); 1375 // Static initializer for delims 1376 static { 1377 delims.set('<'); 1378 delims.set('>'); 1379 delims.set('#'); 1380 delims.set('%'); 1381 delims.set('"'); 1382 } 1383 1384 1385 /*** 1386 * BitSet for unwise. 1387 */ 1388 public static final BitSet unwise = new BitSet(256); 1389 // Static initializer for unwise 1390 static { 1391 unwise.set('{'); 1392 unwise.set('}'); 1393 unwise.set('|'); 1394 unwise.set('//'); 1395 unwise.set('^'); 1396 unwise.set('['); 1397 unwise.set(']'); 1398 unwise.set('`'); 1399 } 1400 1401 1402 /*** 1403 * Disallowed rel_path before escaping. 1404 */ 1405 public static final BitSet disallowed_rel_path = new BitSet(256); 1406 // Static initializer for disallowed_rel_path 1407 static { 1408 disallowed_rel_path.or(uric); 1409 disallowed_rel_path.andNot(rel_path); 1410 } 1411 1412 1413 /*** 1414 * Disallowed opaque_part before escaping. 1415 */ 1416 public static final BitSet disallowed_opaque_part = new BitSet(256); 1417 // Static initializer for disallowed_opaque_part 1418 static { 1419 disallowed_opaque_part.or(uric); 1420 disallowed_opaque_part.andNot(opaque_part); 1421 } 1422 1423 // ----------------------- Characters allowed within and for each component 1424 1425 /*** 1426 * Those characters that are allowed for the authority component. 1427 */ 1428 public static final BitSet allowed_authority = new BitSet(256); 1429 // Static initializer for allowed_authority 1430 static { 1431 allowed_authority.or(authority); 1432 allowed_authority.clear('%'); 1433 } 1434 1435 1436 /*** 1437 * Those characters that are allowed for the opaque_part. 1438 */ 1439 public static final BitSet allowed_opaque_part = new BitSet(256); 1440 // Static initializer for allowed_opaque_part 1441 static { 1442 allowed_opaque_part.or(opaque_part); 1443 allowed_opaque_part.clear('%'); 1444 } 1445 1446 1447 /*** 1448 * Those characters that are allowed for the reg_name. 1449 */ 1450 public static final BitSet allowed_reg_name = new BitSet(256); 1451 // Static initializer for allowed_reg_name 1452 static { 1453 allowed_reg_name.or(reg_name); 1454 // allowed_reg_name.andNot(percent); 1455 allowed_reg_name.clear('%'); 1456 } 1457 1458 1459 /*** 1460 * Those characters that are allowed for the userinfo component. 1461 */ 1462 public static final BitSet allowed_userinfo = new BitSet(256); 1463 // Static initializer for allowed_userinfo 1464 static { 1465 allowed_userinfo.or(userinfo); 1466 // allowed_userinfo.andNot(percent); 1467 allowed_userinfo.clear('%'); 1468 } 1469 1470 1471 /*** 1472 * Those characters that are allowed for within the userinfo component. 1473 */ 1474 public static final BitSet allowed_within_userinfo = new BitSet(256); 1475 // Static initializer for allowed_within_userinfo 1476 static { 1477 allowed_within_userinfo.or(within_userinfo); 1478 allowed_within_userinfo.clear('%'); 1479 } 1480 1481 1482 /*** 1483 * Those characters that are allowed for the IPv6reference component. 1484 * The characters '[', ']' in IPv6reference should be excluded. 1485 */ 1486 public static final BitSet allowed_IPv6reference = new BitSet(256); 1487 // Static initializer for allowed_IPv6reference 1488 static { 1489 allowed_IPv6reference.or(IPv6reference); 1490 // allowed_IPv6reference.andNot(unwise); 1491 allowed_IPv6reference.clear('['); 1492 allowed_IPv6reference.clear(']'); 1493 } 1494 1495 1496 /*** 1497 * Those characters that are allowed for the host component. 1498 * The characters '[', ']' in IPv6reference should be excluded. 1499 */ 1500 public static final BitSet allowed_host = new BitSet(256); 1501 // Static initializer for allowed_host 1502 static { 1503 allowed_host.or(hostname); 1504 allowed_host.or(allowed_IPv6reference); 1505 } 1506 1507 1508 /*** 1509 * Those characters that are allowed for the authority component. 1510 */ 1511 public static final BitSet allowed_within_authority = new BitSet(256); 1512 // Static initializer for allowed_within_authority 1513 static { 1514 allowed_within_authority.or(server); 1515 allowed_within_authority.or(reg_name); 1516 allowed_within_authority.clear(';'); 1517 allowed_within_authority.clear(':'); 1518 allowed_within_authority.clear('@'); 1519 allowed_within_authority.clear('?'); 1520 allowed_within_authority.clear('/'); 1521 } 1522 1523 1524 /*** 1525 * Those characters that are allowed for the abs_path. 1526 */ 1527 public static final BitSet allowed_abs_path = new BitSet(256); 1528 // Static initializer for allowed_abs_path 1529 static { 1530 allowed_abs_path.or(abs_path); 1531 // allowed_abs_path.set('/'); // aleady included 1532 allowed_abs_path.andNot(percent); 1533 } 1534 1535 1536 /*** 1537 * Those characters that are allowed for the rel_path. 1538 */ 1539 public static final BitSet allowed_rel_path = new BitSet(256); 1540 // Static initializer for allowed_rel_path 1541 static { 1542 allowed_rel_path.or(rel_path); 1543 allowed_rel_path.clear('%'); 1544 } 1545 1546 1547 /*** 1548 * Those characters that are allowed within the path. 1549 */ 1550 public static final BitSet allowed_within_path = new BitSet(256); 1551 // Static initializer for allowed_within_path 1552 static { 1553 allowed_within_path.or(abs_path); 1554 allowed_within_path.clear('/'); 1555 allowed_within_path.clear(';'); 1556 allowed_within_path.clear('='); 1557 allowed_within_path.clear('?'); 1558 } 1559 1560 1561 /*** 1562 * Those characters that are allowed for the query component. 1563 */ 1564 public static final BitSet allowed_query = new BitSet(256); 1565 // Static initializer for allowed_query 1566 static { 1567 allowed_query.or(uric); 1568 allowed_query.clear('%'); 1569 } 1570 1571 1572 /*** 1573 * Those characters that are allowed within the query component. 1574 */ 1575 public static final BitSet allowed_within_query = new BitSet(256); 1576 // Static initializer for allowed_within_query 1577 static { 1578 allowed_within_query.or(allowed_query); 1579 allowed_within_query.andNot(reserved); // excluded 'reserved' 1580 } 1581 1582 1583 /*** 1584 * Those characters that are allowed for the fragment component. 1585 */ 1586 public static final BitSet allowed_fragment = new BitSet(256); 1587 // Static initializer for allowed_fragment 1588 static { 1589 allowed_fragment.or(uric); 1590 allowed_fragment.clear('%'); 1591 } 1592 1593 // ------------------------------------------- Flags for this URI-reference 1594 1595 // TODO: Figure out what all these variables are for and provide javadoc 1596 1597 // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1598 // absoluteURI = scheme ":" ( hier_part | opaque_part ) 1599 protected boolean _is_hier_part; 1600 protected boolean _is_opaque_part; 1601 // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 1602 // hier_part = ( net_path | abs_path ) [ "?" query ] 1603 protected boolean _is_net_path; 1604 protected boolean _is_abs_path; 1605 protected boolean _is_rel_path; 1606 // net_path = "//" authority [ abs_path ] 1607 // authority = server | reg_name 1608 protected boolean _is_reg_name; 1609 protected boolean _is_server; // = _has_server 1610 // server = [ [ userinfo "@" ] hostport ] 1611 // host = hostname | IPv4address | IPv6reference 1612 protected boolean _is_hostname; 1613 protected boolean _is_IPv4address; 1614 protected boolean _is_IPv6reference; 1615 1616 // ------------------------------------------ Character and escape encoding 1617 1618 /*** 1619 * Encodes URI string. 1620 * 1621 * This is a two mapping, one from original characters to octets, and 1622 * subsequently a second from octets to URI characters: 1623 * <p><blockquote><pre> 1624 * original character sequence->octet sequence->URI character sequence 1625 * </pre></blockquote><p> 1626 * 1627 * An escaped octet is encoded as a character triplet, consisting of the 1628 * percent character "%" followed by the two hexadecimal digits 1629 * representing the octet code. For example, "%20" is the escaped 1630 * encoding for the US-ASCII space character. 1631 * <p> 1632 * Conversion from the local filesystem character set to UTF-8 will 1633 * normally involve a two step process. First convert the local character 1634 * set to the UCS; then convert the UCS to UTF-8. 1635 * The first step in the process can be performed by maintaining a mapping 1636 * table that includes the local character set code and the corresponding 1637 * UCS code. 1638 * The next step is to convert the UCS character code to the UTF-8 encoding. 1639 * <p> 1640 * Mapping between vendor codepages can be done in a very similar manner 1641 * as described above. 1642 * <p> 1643 * The only time escape encodings can allowedly be made is when a URI is 1644 * being created from its component parts. The escape and validate methods 1645 * are internally performed within this method. 1646 * 1647 * @param original the original character sequence 1648 * @param allowed those characters that are allowed within a component 1649 * @param charset the protocol charset 1650 * @return URI character sequence 1651 * @throws URIException null component or unsupported character encoding 1652 */ 1653 protected static char[] encode(String original, BitSet allowed, 1654 String charset) throws URIException { 1655 1656 // encode original to uri characters. 1657 if (original == null) { 1658 throw new URIException(URIException.PARSING, "null"); 1659 } 1660 // escape octet to uri characters. 1661 if (allowed == null) { 1662 throw new URIException(URIException.PARSING, 1663 "null allowed characters"); 1664 } 1665 byte[] octets; 1666 try { 1667 octets = original.getBytes(charset); 1668 } catch (UnsupportedEncodingException error) { 1669 throw new URIException(URIException.UNSUPPORTED_ENCODING, charset); 1670 } 1671 StringBuffer buf = new StringBuffer(octets.length); 1672 for (int i = 0; i < octets.length; i++) { 1673 char c = (char) octets[i]; 1674 if (allowed.get(c)) { 1675 buf.append(c); 1676 } else { 1677 buf.append('%'); 1678 byte b = octets[i]; // use the original byte value 1679 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16); 1680 buf.append(Character.toUpperCase(hexadecimal)); // high 1681 hexadecimal = Character.forDigit(b & 0xF, 16); 1682 buf.append(Character.toUpperCase(hexadecimal)); // low 1683 } 1684 } 1685 1686 return buf.toString().toCharArray(); 1687 } 1688 1689 1690 /*** 1691 * Decodes URI encoded string. 1692 * 1693 * This is a two mapping, one from URI characters to octets, and 1694 * subsequently a second from octets to original characters: 1695 * <p><blockquote><pre> 1696 * URI character sequence->octet sequence->original character sequence 1697 * </pre></blockquote><p> 1698 * 1699 * A URI must be separated into its components before the escaped 1700 * characters within those components can be allowedly decoded. 1701 * <p> 1702 * Notice that there is a chance that URI characters that are non UTF-8 1703 * may be parsed as valid UTF-8. A recent non-scientific analysis found 1704 * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a 1705 * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% 1706 * false reading. 1707 * <p> 1708 * The percent "%" character always has the reserved purpose of being 1709 * the escape indicator, it must be escaped as "%25" in order to be used 1710 * as data within a URI. 1711 * <p> 1712 * The unescape method is internally performed within this method. 1713 * 1714 * @param component the URI character sequence 1715 * @param charset the protocol charset 1716 * @return original character sequence 1717 * @throws URIException incomplete trailing escape pattern or unsupported 1718 * character encoding 1719 */ 1720 protected static String decode(char[] component, String charset) 1721 throws URIException { 1722 1723 // unescape uri characters to octets 1724 if (component == null) { 1725 return null; 1726 } 1727 1728 byte[] octets; 1729 try { 1730 octets = new String(component).getBytes(charset); 1731 } catch (UnsupportedEncodingException error) { 1732 throw new URIException(URIException.UNSUPPORTED_ENCODING, 1733 "not supported " + charset + " encoding"); 1734 } 1735 int length = octets.length; 1736 int oi = 0; // output index 1737 for (int ii = 0; ii < length; oi++) { 1738 byte aByte = (byte) octets[ii++]; 1739 if (aByte == '%' && ii + 2 <= length) { 1740 byte high = (byte) Character.digit((char) octets[ii++], 16); 1741 byte low = (byte) Character.digit((char) octets[ii++], 16); 1742 if (high == -1 || low == -1) { 1743 throw new URIException(URIException.ESCAPING, 1744 "incomplete trailing escape pattern"); 1745 1746 } 1747 aByte = (byte) ((high << 4) + low); 1748 } 1749 octets[oi] = (byte) aByte; 1750 } 1751 1752 String result; 1753 try { 1754 result = new String(octets, 0, oi, charset); 1755 } catch (UnsupportedEncodingException error) { 1756 throw new URIException(URIException.UNSUPPORTED_ENCODING, 1757 "not supported " + charset + " encoding"); 1758 } 1759 1760 return result; 1761 } 1762 1763 1764 /*** 1765 * Pre-validate the unescaped URI string within a specific component. 1766 * 1767 * @param component the component string within the component 1768 * @param disallowed those characters disallowed within the component 1769 * @return if true, it doesn't have the disallowed characters 1770 * if false, the component is undefined or an incorrect one 1771 */ 1772 protected boolean prevalidate(String component, BitSet disallowed) { 1773 // prevalidate the given component by disallowed characters 1774 if (component == null) { 1775 return false; // undefined 1776 } 1777 char[] target = component.toCharArray(); 1778 for (int i = 0; i < target.length; i++) { 1779 if (disallowed.get(target[i])) { 1780 return false; 1781 } 1782 } 1783 return true; 1784 } 1785 1786 1787 /*** 1788 * Validate the URI characters within a specific component. 1789 * The component must be performed after escape encoding. Or it doesn't 1790 * include escaped characters. 1791 * 1792 * @param component the characters sequence within the component 1793 * @param generous those characters that are allowed within a component 1794 * @return if true, it's the correct URI character sequence 1795 */ 1796 protected boolean validate(char[] component, BitSet generous) { 1797 // validate each component by generous characters 1798 return validate(component, 0, -1, generous); 1799 } 1800 1801 1802 /*** 1803 * Validate the URI characters within a specific component. 1804 * The component must be performed after escape encoding. Or it doesn't 1805 * include escaped characters. 1806 * <p> 1807 * It's not that much strict, generous. The strict validation might be 1808 * performed before being called this method. 1809 * 1810 * @param component the characters sequence within the component 1811 * @param soffset the starting offset of the given component 1812 * @param eoffset the ending offset of the given component 1813 * if -1, it means the length of the component 1814 * @param generous those characters that are allowed within a component 1815 * @return if true, it's the correct URI character sequence 1816 */ 1817 protected boolean validate(char[] component, int soffset, int eoffset, 1818 BitSet generous) { 1819 // validate each component by generous characters 1820 if (eoffset == -1) { 1821 eoffset = component.length - 1; 1822 } 1823 for (int i = soffset; i <= eoffset; i++) { 1824 if (!generous.get(component[i])) { 1825 return false; 1826 } 1827 } 1828 return true; 1829 } 1830 1831 1832 /*** 1833 * In order to avoid any possilbity of conflict with non-ASCII characters, 1834 * Parse a URI reference as a <code>String</code> with the character 1835 * encoding of the local system or the document. 1836 * <p> 1837 * The following line is the regular expression for breaking-down a URI 1838 * reference into its components. 1839 * <p><blockquote><pre> 1840 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1841 * 12 3 4 5 6 7 8 9 1842 * </pre></blockquote><p> 1843 * For example, matching the above expression to 1844 * http://jakarta.apache.org/ietf/uri/#Related 1845 * results in the following subexpression matches: 1846 * <p><blockquote><pre> 1847 * $1 = http: 1848 * scheme = $2 = http 1849 * $3 = //jakarta.apache.org 1850 * authority = $4 = jakarta.apache.org 1851 * path = $5 = /ietf/uri/ 1852 * $6 = <undefined> 1853 * query = $7 = <undefined> 1854 * $8 = #Related 1855 * fragment = $9 = Related 1856 * </pre></blockquote><p> 1857 * 1858 * @param original the original character sequence 1859 * @param escaped <code>true</code> if <code>original</code> is escaped 1860 * @throws URIException If an error occurs. 1861 */ 1862 protected void parseUriReference(String original, boolean escaped) 1863 throws URIException { 1864 1865 // validate and contruct the URI character sequence 1866 if (original == null) { 1867 throw new URIException("URI-Reference required"); 1868 } 1869 1870 /* @ 1871 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1872 */ 1873 String tmp = original.trim(); 1874 1875 /* 1876 * The length of the string sequence of characters. 1877 * It may not be equal to the length of the byte array. 1878 */ 1879 int length = tmp.length(); 1880 1881 /* 1882 * Remove the delimiters like angle brackets around an URI. 1883 */ 1884 if (length > 0) { 1885 char[] firstDelimiter = { tmp.charAt(0) }; 1886 if (validate(firstDelimiter, delims)) { 1887 if (length >= 2) { 1888 char[] lastDelimiter = { tmp.charAt(length - 1) }; 1889 if (validate(lastDelimiter, delims)) { 1890 tmp = tmp.substring(1, length - 1); 1891 length = length - 2; 1892 } 1893 } 1894 } 1895 } 1896 1897 /* 1898 * The starting index 1899 */ 1900 int from = 0; 1901 1902 /* 1903 * The test flag whether the URI is started from the path component. 1904 */ 1905 boolean isStartedFromPath = false; 1906 int atColon = tmp.indexOf(':'); 1907 int atSlash = tmp.indexOf('/'); 1908 if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) { 1909 isStartedFromPath = true; 1910 } 1911 1912 /* 1913 * <p><blockquote><pre> 1914 * @@@@@@@@ 1915 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1916 * </pre></blockquote><p> 1917 */ 1918 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); 1919 if (at == -1) { 1920 at = 0; 1921 } 1922 1923 /* 1924 * Parse the scheme. 1925 * <p><blockquote><pre> 1926 * scheme = $2 = http 1927 * @ 1928 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1929 * </pre></blockquote><p> 1930 */ 1931 if (at < length && tmp.charAt(at) == ':') { 1932 char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); 1933 if (validate(target, scheme)) { 1934 _scheme = target; 1935 } else { 1936 throw new URIException("incorrect scheme"); 1937 } 1938 from = ++at; 1939 } 1940 1941 /* 1942 * Parse the authority component. 1943 * <p><blockquote><pre> 1944 * authority = $4 = jakarta.apache.org 1945 * @@ 1946 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1947 * </pre></blockquote><p> 1948 */ 1949 // Reset flags 1950 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; 1951 if (0 <= at && at < length && tmp.charAt(at) == '/') { 1952 // Set flag 1953 _is_hier_part = true; 1954 if (at + 2 < length && tmp.charAt(at + 1) == '/') { 1955 // the temporary index to start the search from 1956 int next = indexFirstOf(tmp, "/?#", at + 2); 1957 if (next == -1) { 1958 next = (tmp.substring(at + 2).length() == 0) ? at + 2 1959 : tmp.length(); 1960 } 1961 parseAuthority(tmp.substring(at + 2, next), escaped); 1962 from = at = next; 1963 // Set flag 1964 _is_net_path = true; 1965 } 1966 if (from == at) { 1967 // Set flag 1968 _is_abs_path = true; 1969 } 1970 } 1971 1972 /* 1973 * Parse the path component. 1974 * <p><blockquote><pre> 1975 * path = $5 = /ietf/uri/ 1976 * @@@@@@ 1977 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1978 * </pre></blockquote><p> 1979 */ 1980 if (from < length) { 1981 // rel_path = rel_segment [ abs_path ] 1982 int next = indexFirstOf(tmp, "?#", from); 1983 if (next == -1) { 1984 next = tmp.length(); 1985 } 1986 if (!_is_abs_path) { 1987 if (!escaped 1988 && prevalidate(tmp.substring(from, next), disallowed_rel_path) 1989 || escaped 1990 && validate(tmp.substring(from, next).toCharArray(), rel_path)) { 1991 // Set flag 1992 _is_rel_path = true; 1993 } else if (!escaped 1994 && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 1995 || escaped 1996 && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { 1997 // Set flag 1998 _is_opaque_part = true; 1999 } else { 2000 // the path component may be empty 2001 _path = null; 2002 } 2003 } 2004 if (escaped) { 2005 setRawPath(tmp.substring(from, next).toCharArray()); 2006 } else { 2007 setPath(tmp.substring(from, next)); 2008 } 2009 at = next; 2010 } 2011 2012 // set the charset to do escape encoding 2013 String charset = getProtocolCharset(); 2014 2015 /* 2016 * Parse the query component. 2017 * <p><blockquote><pre> 2018 * query = $7 = <undefined> 2019 * @@@@@@@@@ 2020 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 2021 * </pre></blockquote><p> 2022 */ 2023 if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { 2024 int next = tmp.indexOf('#', at + 1); 2025 if (next == -1) { 2026 next = tmp.length(); 2027 } 2028 _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() 2029 : encode(tmp.substring(at + 1, next), allowed_query, charset); 2030 at = next; 2031 } 2032 2033 /* 2034 * Parse the fragment component. 2035 * <p><blockquote><pre> 2036 * fragment = $9 = Related 2037 * @@@@@@@@ 2038 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 2039 * </pre></blockquote><p> 2040 */ 2041 if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { 2042 if (at + 1 == length) { // empty fragment 2043 _fragment = "".toCharArray(); 2044 } else { 2045 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 2046 : encode(tmp.substring(at + 1), allowed_fragment, charset); 2047 } 2048 } 2049 2050 // set this URI. 2051 setURI(); 2052 } 2053 2054 2055 /*** 2056 * Get the earlier index that to be searched for the first occurrance in 2057 * one of any of the given string. 2058 * 2059 * @param s the string to be indexed 2060 * @param delims the delimiters used to index 2061 * @return the earlier index if there are delimiters 2062 */ 2063 protected int indexFirstOf(String s, String delims) { 2064 return indexFirstOf(s, delims, -1); 2065 } 2066 2067 2068 /*** 2069 * Get the earlier index that to be searched for the first occurrance in 2070 * one of any of the given string. 2071 * 2072 * @param s the string to be indexed 2073 * @param delims the delimiters used to index 2074 * @param offset the from index 2075 * @return the earlier index if there are delimiters 2076 */ 2077 protected int indexFirstOf(String s, String delims, int offset) { 2078 if (s == null || s.length() == 0) { 2079 return -1; 2080 } 2081 if (delims == null || delims.length() == 0) { 2082 return -1; 2083 } 2084 // check boundaries 2085 if (offset < 0) { 2086 offset = 0; 2087 } else if (offset > s.length()) { 2088 return -1; 2089 } 2090 // s is never null 2091 int min = s.length(); 2092 char[] delim = delims.toCharArray(); 2093 for (int i = 0; i < delim.length; i++) { 2094 int at = s.indexOf(delim[i], offset); 2095 if (at >= 0 && at < min) { 2096 min = at; 2097 } 2098 } 2099 return (min == s.length()) ? -1 : min; 2100 } 2101 2102 2103 /*** 2104 * Get the earlier index that to be searched for the first occurrance in 2105 * one of any of the given array. 2106 * 2107 * @param s the character array to be indexed 2108 * @param delim the delimiter used to index 2109 * @return the ealier index if there are a delimiter 2110 */ 2111 protected int indexFirstOf(char[] s, char delim) { 2112 return indexFirstOf(s, delim, 0); 2113 } 2114 2115 2116 /*** 2117 * Get the earlier index that to be searched for the first occurrance in 2118 * one of any of the given array. 2119 * 2120 * @param s the character array to be indexed 2121 * @param delim the delimiter used to index 2122 * @param offset The offset. 2123 * @return the ealier index if there is a delimiter 2124 */ 2125 protected int indexFirstOf(char[] s, char delim, int offset) { 2126 if (s == null || s.length == 0) { 2127 return -1; 2128 } 2129 // check boundaries 2130 if (offset < 0) { 2131 offset = 0; 2132 } else if (offset > s.length) { 2133 return -1; 2134 } 2135 for (int i = offset; i < s.length; i++) { 2136 if (s[i] == delim) { 2137 return i; 2138 } 2139 } 2140 return -1; 2141 } 2142 2143 2144 /*** 2145 * Parse the authority component. 2146 * 2147 * @param original the original character sequence of authority component 2148 * @param escaped <code>true</code> if <code>original</code> is escaped 2149 * @throws URIException If an error occurs. 2150 */ 2151 protected void parseAuthority(String original, boolean escaped) 2152 throws URIException { 2153 2154 // Reset flags 2155 _is_reg_name = _is_server = 2156 _is_hostname = _is_IPv4address = _is_IPv6reference = false; 2157 2158 // set the charset to do escape encoding 2159 String charset = getProtocolCharset(); 2160 2161 boolean hasPort = true; 2162 int from = 0; 2163 int next = original.indexOf('@'); 2164 if (next != -1) { // neither -1 and 0 2165 // each protocol extented from URI supports the specific userinfo 2166 _userinfo = (escaped) ? original.substring(0, next).toCharArray() 2167 : encode(original.substring(0, next), allowed_userinfo, 2168 charset); 2169 from = next + 1; 2170 } 2171 next = original.indexOf('[', from); 2172 if (next >= from) { 2173 next = original.indexOf(']', from); 2174 if (next == -1) { 2175 throw new URIException(URIException.PARSING, "IPv6reference"); 2176 } else { 2177 next++; 2178 } 2179 // In IPv6reference, '[', ']' should be excluded 2180 _host = (escaped) ? original.substring(from, next).toCharArray() 2181 : encode(original.substring(from, next), allowed_IPv6reference, 2182 charset); 2183 // Set flag 2184 _is_IPv6reference = true; 2185 } else { // only for !_is_IPv6reference 2186 next = original.indexOf(':', from); 2187 if (next == -1) { 2188 next = original.length(); 2189 hasPort = false; 2190 } 2191 // REMINDME: it doesn't need the pre-validation 2192 _host = original.substring(from, next).toCharArray(); 2193 if (validate(_host, IPv4address)) { 2194 // Set flag 2195 _is_IPv4address = true; 2196 } else if (validate(_host, hostname)) { 2197 // Set flag 2198 _is_hostname = true; 2199 } else { 2200 // Set flag 2201 _is_reg_name = true; 2202 } 2203 } 2204 if (_is_reg_name) { 2205 // Reset flags for a server-based naming authority 2206 _is_server = _is_hostname = _is_IPv4address = 2207 _is_IPv6reference = false; 2208 // set a registry-based naming authority 2209 _authority = (escaped) ? original.toString().toCharArray() 2210 : encode(original.toString(), allowed_reg_name, charset); 2211 } else { 2212 if (original.length() - 1 > next && hasPort 2213 && original.charAt(next) == ':') { // not empty 2214 from = next + 1; 2215 try { 2216 _port = Integer.parseInt(original.substring(from)); 2217 } catch (NumberFormatException error) { 2218 throw new URIException(URIException.PARSING, 2219 "invalid port number"); 2220 } 2221 } 2222 // set a server-based naming authority 2223 StringBuffer buf = new StringBuffer(); 2224 if (_userinfo != null) { // has_userinfo 2225 buf.append(_userinfo); 2226 buf.append('@'); 2227 } 2228 if (_host != null) { 2229 buf.append(_host); 2230 if (_port != -1) { 2231 buf.append(':'); 2232 buf.append(_port); 2233 } 2234 } 2235 _authority = buf.toString().toCharArray(); 2236 // Set flag 2237 _is_server = true; 2238 } 2239 } 2240 2241 2242 /*** 2243 * Once it's parsed successfully, set this URI. 2244 * 2245 * @see #getRawURI 2246 */ 2247 protected void setURI() { 2248 // set _uri 2249 StringBuffer buf = new StringBuffer(); 2250 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 2251 if (_scheme != null) { 2252 buf.append(_scheme); 2253 buf.append(':'); 2254 } 2255 if (_is_net_path) { 2256 buf.append("//"); 2257 if (_authority != null) { // has_authority 2258 if (_userinfo != null) { // by default, remove userinfo part 2259 if (_host != null) { 2260 buf.append(_host); 2261 if (_port != -1) { 2262 buf.append(':'); 2263 buf.append(_port); 2264 } 2265 } 2266 } else { 2267 buf.append(_authority); 2268 } 2269 } 2270 } 2271 if (_opaque != null && _is_opaque_part) { 2272 buf.append(_opaque); 2273 } else if (_path != null) { 2274 // _is_hier_part or _is_relativeURI 2275 if (_path.length != 0) { 2276 buf.append(_path); 2277 } 2278 } 2279 if (_query != null) { // has_query 2280 buf.append('?'); 2281 buf.append(_query); 2282 } 2283 // ignore the fragment identifier 2284 _uri = buf.toString().toCharArray(); 2285 hash = 0; 2286 } 2287 2288 // ----------------------------------------------------------- Test methods 2289 2290 2291 /*** 2292 * Tell whether or not this URI is absolute. 2293 * 2294 * @return true iif this URI is absoluteURI 2295 */ 2296 public boolean isAbsoluteURI() { 2297 return (_scheme != null); 2298 } 2299 2300 2301 /*** 2302 * Tell whether or not this URI is relative. 2303 * 2304 * @return true iif this URI is relativeURI 2305 */ 2306 public boolean isRelativeURI() { 2307 return (_scheme == null); 2308 } 2309 2310 2311 /*** 2312 * Tell whether or not the absoluteURI of this URI is hier_part. 2313 * 2314 * @return true iif the absoluteURI is hier_part 2315 */ 2316 public boolean isHierPart() { 2317 return _is_hier_part; 2318 } 2319 2320 2321 /*** 2322 * Tell whether or not the absoluteURI of this URI is opaque_part. 2323 * 2324 * @return true iif the absoluteURI is opaque_part 2325 */ 2326 public boolean isOpaquePart() { 2327 return _is_opaque_part; 2328 } 2329 2330 2331 /*** 2332 * Tell whether or not the relativeURI or heir_part of this URI is net_path. 2333 * It's the same function as the has_authority() method. 2334 * 2335 * @return true iif the relativeURI or heir_part is net_path 2336 * @see #hasAuthority 2337 */ 2338 public boolean isNetPath() { 2339 return _is_net_path || (_authority != null); 2340 } 2341 2342 2343 /*** 2344 * Tell whether or not the relativeURI or hier_part of this URI is abs_path. 2345 * 2346 * @return true iif the relativeURI or hier_part is abs_path 2347 */ 2348 public boolean isAbsPath() { 2349 return _is_abs_path; 2350 } 2351 2352 2353 /*** 2354 * Tell whether or not the relativeURI of this URI is rel_path. 2355 * 2356 * @return true iif the relativeURI is rel_path 2357 */ 2358 public boolean isRelPath() { 2359 return _is_rel_path; 2360 } 2361 2362 2363 /*** 2364 * Tell whether or not this URI has authority. 2365 * It's the same function as the is_net_path() method. 2366 * 2367 * @return true iif this URI has authority 2368 * @see #isNetPath 2369 */ 2370 public boolean hasAuthority() { 2371 return (_authority != null) || _is_net_path; 2372 } 2373 2374 /*** 2375 * Tell whether or not the authority component of this URI is reg_name. 2376 * 2377 * @return true iif the authority component is reg_name 2378 */ 2379 public boolean isRegName() { 2380 return _is_reg_name; 2381 } 2382 2383 2384 /*** 2385 * Tell whether or not the authority component of this URI is server. 2386 * 2387 * @return true iif the authority component is server 2388 */ 2389 public boolean isServer() { 2390 return _is_server; 2391 } 2392 2393 2394 /*** 2395 * Tell whether or not this URI has userinfo. 2396 * 2397 * @return true iif this URI has userinfo 2398 */ 2399 public boolean hasUserinfo() { 2400 return (_userinfo != null); 2401 } 2402 2403 2404 /*** 2405 * Tell whether or not the host part of this URI is hostname. 2406 * 2407 * @return true iif the host part is hostname 2408 */ 2409 public boolean isHostname() { 2410 return _is_hostname; 2411 } 2412 2413 2414 /*** 2415 * Tell whether or not the host part of this URI is IPv4address. 2416 * 2417 * @return true iif the host part is IPv4address 2418 */ 2419 public boolean isIPv4address() { 2420 return _is_IPv4address; 2421 } 2422 2423 2424 /*** 2425 * Tell whether or not the host part of this URI is IPv6reference. 2426 * 2427 * @return true iif the host part is IPv6reference 2428 */ 2429 public boolean isIPv6reference() { 2430 return _is_IPv6reference; 2431 } 2432 2433 2434 /*** 2435 * Tell whether or not this URI has query. 2436 * 2437 * @return true iif this URI has query 2438 */ 2439 public boolean hasQuery() { 2440 return (_query != null); 2441 } 2442 2443 2444 /*** 2445 * Tell whether or not this URI has fragment. 2446 * 2447 * @return true iif this URI has fragment 2448 */ 2449 public boolean hasFragment() { 2450 return (_fragment != null); 2451 } 2452 2453 2454 // ---------------------------------------------------------------- Charset 2455 2456 2457 /*** 2458 * Set the default charset of the protocol. 2459 * <p> 2460 * The character set used to store files SHALL remain a local decision and 2461 * MAY depend on the capability of local operating systems. Prior to the 2462 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format 2463 * and UTF-8 encoded. This approach, while allowing international exchange 2464 * of URIs, will still allow backward compatibility with older systems 2465 * because the code set positions for ASCII characters are identical to the 2466 * one byte sequence in UTF-8. 2467 * <p> 2468 * An individual URI scheme may require a single charset, define a default 2469 * charset, or provide a way to indicate the charset used. 2470 * 2471 * <p> 2472 * Always all the time, the setter method is always succeeded and throws 2473 * <code>DefaultCharsetChanged</code> exception. 2474 * 2475 * So API programmer must follow the following way: 2476 * <code><pre> 2477 * import org.apache.util.URI$DefaultCharsetChanged; 2478 * . 2479 * . 2480 * . 2481 * try { 2482 * URI.setDefaultProtocolCharset("UTF-8"); 2483 * } catch (DefaultCharsetChanged cc) { 2484 * // CASE 1: the exception could be ignored, when it is set by user 2485 * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) { 2486 * // CASE 2: let user know the default protocol charset changed 2487 * } else { 2488 * // CASE 2: let user know the default document charset changed 2489 * } 2490 * } 2491 * </pre></code> 2492 * 2493 * The API programmer is responsible to set the correct charset. 2494 * And each application should remember its own charset to support. 2495 * 2496 * @param charset the default charset for each protocol 2497 * @throws DefaultCharsetChanged default charset changed 2498 */ 2499 public static void setDefaultProtocolCharset(String charset) 2500 throws DefaultCharsetChanged { 2501 2502 defaultProtocolCharset = charset; 2503 throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET, 2504 "the default protocol charset changed"); 2505 } 2506 2507 2508 /*** 2509 * Get the default charset of the protocol. 2510 * <p> 2511 * An individual URI scheme may require a single charset, define a default 2512 * charset, or provide a way to indicate the charset used. 2513 * <p> 2514 * To work globally either requires support of a number of character sets 2515 * and to be able to convert between them, or the use of a single preferred 2516 * character set. 2517 * For support of global compatibility it is STRONGLY RECOMMENDED that 2518 * clients and servers use UTF-8 encoding when exchanging URIs. 2519 * 2520 * @return the default charset string 2521 */ 2522 public static String getDefaultProtocolCharset() { 2523 return defaultProtocolCharset; 2524 } 2525 2526 2527 /*** 2528 * Get the protocol charset used by this current URI instance. 2529 * It was set by the constructor for this instance. If it was not set by 2530 * contructor, it will return the default protocol charset. 2531 * 2532 * @return the protocol charset string 2533 * @see #getDefaultProtocolCharset 2534 */ 2535 public String getProtocolCharset() { 2536 return (protocolCharset != null) 2537 ? protocolCharset 2538 : defaultProtocolCharset; 2539 } 2540 2541 2542 /*** 2543 * Set the default charset of the document. 2544 * <p> 2545 * Notice that it will be possible to contain mixed characters (e.g. 2546 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional 2547 * display of these character sets, the protocol charset could be simply 2548 * used again. Because it's not yet implemented that the insertion of BIDI 2549 * control characters at different points during composition is extracted. 2550 * <p> 2551 * 2552 * Always all the time, the setter method is always succeeded and throws 2553 * <code>DefaultCharsetChanged</code> exception. 2554 * 2555 * So API programmer must follow the following way: 2556 * <code><pre> 2557 * import org.apache.util.URI$DefaultCharsetChanged; 2558 * . 2559 * . 2560 * . 2561 * try { 2562 * URI.setDefaultDocumentCharset("EUC-KR"); 2563 * } catch (DefaultCharsetChanged cc) { 2564 * // CASE 1: the exception could be ignored, when it is set by user 2565 * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) { 2566 * // CASE 2: let user know the default document charset changed 2567 * } else { 2568 * // CASE 2: let user know the default protocol charset changed 2569 * } 2570 * } 2571 * </pre></code> 2572 * 2573 * The API programmer is responsible to set the correct charset. 2574 * And each application should remember its own charset to support. 2575 * 2576 * @param charset the default charset for the document 2577 * @throws DefaultCharsetChanged default charset changed 2578 */ 2579 public static void setDefaultDocumentCharset(String charset) 2580 throws DefaultCharsetChanged { 2581 2582 defaultDocumentCharset = charset; 2583 throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET, 2584 "the default document charset changed"); 2585 } 2586 2587 2588 /*** 2589 * Get the recommended default charset of the document. 2590 * 2591 * @return the default charset string 2592 */ 2593 public static String getDefaultDocumentCharset() { 2594 return defaultDocumentCharset; 2595 } 2596 2597 2598 /*** 2599 * Get the default charset of the document by locale. 2600 * 2601 * @return the default charset string by locale 2602 */ 2603 public static String getDefaultDocumentCharsetByLocale() { 2604 return defaultDocumentCharsetByLocale; 2605 } 2606 2607 2608 /*** 2609 * Get the default charset of the document by platform. 2610 * 2611 * @return the default charset string by platform 2612 */ 2613 public static String getDefaultDocumentCharsetByPlatform() { 2614 return defaultDocumentCharsetByPlatform; 2615 } 2616 2617 // ------------------------------------------------------------- The scheme 2618 2619 /*** 2620 * Get the scheme. 2621 * 2622 * @return the scheme 2623 */ 2624 public char[] getRawScheme() { 2625 return _scheme; 2626 } 2627 2628 2629 /*** 2630 * Get the scheme. 2631 * 2632 * @return the scheme 2633 * null if undefined scheme 2634 */ 2635 public String getScheme() { 2636 return (_scheme == null) ? null : new String(_scheme); 2637 } 2638 2639 // ---------------------------------------------------------- The authority 2640 2641 /*** 2642 * Set the authority. It can be one type of server, hostport, hostname, 2643 * IPv4address, IPv6reference and reg_name. 2644 * <p><blockquote><pre> 2645 * authority = server | reg_name 2646 * </pre></blockquote><p> 2647 * 2648 * @param escapedAuthority the raw escaped authority 2649 * @throws URIException If {@link 2650 * #parseAuthority(java.lang.String,boolean)} fails 2651 * @throws NullPointerException null authority 2652 */ 2653 public void setRawAuthority(char[] escapedAuthority) 2654 throws URIException, NullPointerException { 2655 2656 parseAuthority(new String(escapedAuthority), true); 2657 setURI(); 2658 } 2659 2660 2661 /*** 2662 * Set the authority. It can be one type of server, hostport, hostname, 2663 * IPv4address, IPv6reference and reg_name. 2664 * Note that there is no setAuthority method by the escape encoding reason. 2665 * 2666 * @param escapedAuthority the escaped authority string 2667 * @throws URIException If {@link 2668 * #parseAuthority(java.lang.String,boolean)} fails 2669 */ 2670 public void setEscapedAuthority(String escapedAuthority) 2671 throws URIException { 2672 2673 parseAuthority(escapedAuthority, true); 2674 setURI(); 2675 } 2676 2677 2678 /*** 2679 * Get the raw-escaped authority. 2680 * 2681 * @return the raw-escaped authority 2682 */ 2683 public char[] getRawAuthority() { 2684 return _authority; 2685 } 2686 2687 2688 /*** 2689 * Get the escaped authority. 2690 * 2691 * @return the escaped authority 2692 */ 2693 public String getEscapedAuthority() { 2694 return (_authority == null) ? null : new String(_authority); 2695 } 2696 2697 2698 /*** 2699 * Get the authority. 2700 * 2701 * @return the authority 2702 * @throws URIException If {@link #decode} fails 2703 */ 2704 public String getAuthority() throws URIException { 2705 return (_authority == null) ? null : decode(_authority, 2706 getProtocolCharset()); 2707 } 2708 2709 // ----------------------------------------------------------- The userinfo 2710 2711 /*** 2712 * Get the raw-escaped userinfo. 2713 * 2714 * @return the raw-escaped userinfo 2715 * @see #getAuthority 2716 */ 2717 public char[] getRawUserinfo() { 2718 return _userinfo; 2719 } 2720 2721 2722 /*** 2723 * Get the escaped userinfo. 2724 * 2725 * @return the escaped userinfo 2726 * @see #getAuthority 2727 */ 2728 public String getEscapedUserinfo() { 2729 return (_userinfo == null) ? null : new String(_userinfo); 2730 } 2731 2732 2733 /*** 2734 * Get the userinfo. 2735 * 2736 * @return the userinfo 2737 * @throws URIException If {@link #decode} fails 2738 * @see #getAuthority 2739 */ 2740 public String getUserinfo() throws URIException { 2741 return (_userinfo == null) ? null : decode(_userinfo, 2742 getProtocolCharset()); 2743 } 2744 2745 // --------------------------------------------------------------- The host 2746 2747 /*** 2748 * Get the host. 2749 * <p><blockquote><pre> 2750 * host = hostname | IPv4address | IPv6reference 2751 * </pre></blockquote><p> 2752 * 2753 * @return the host 2754 * @see #getAuthority 2755 */ 2756 public char[] getRawHost() { 2757 return _host; 2758 } 2759 2760 2761 /*** 2762 * Get the host. 2763 * <p><blockquote><pre> 2764 * host = hostname | IPv4address | IPv6reference 2765 * </pre></blockquote><p> 2766 * 2767 * @return the host 2768 * @throws URIException If {@link #decode} fails 2769 * @see #getAuthority 2770 */ 2771 public String getHost() throws URIException { 2772 return decode(_host, getProtocolCharset()); 2773 } 2774 2775 // --------------------------------------------------------------- The port 2776 2777 /*** 2778 * Get the port. In order to get the specfic default port, the specific 2779 * protocol-supported class extended from the URI class should be used. 2780 * It has the server-based naming authority. 2781 * 2782 * @return the port 2783 * if -1, it has the default port for the scheme or the server-based 2784 * naming authority is not supported in the specific URI. 2785 */ 2786 public int getPort() { 2787 return _port; 2788 } 2789 2790 // --------------------------------------------------------------- The path 2791 2792 /*** 2793 * Set the raw-escaped path. 2794 * 2795 * @param escapedPath the path character sequence 2796 * @throws URIException encoding error or not proper for initial instance 2797 * @see #encode 2798 */ 2799 public void setRawPath(char[] escapedPath) throws URIException { 2800 if (escapedPath == null || escapedPath.length == 0) { 2801 _path = _opaque = escapedPath; 2802 setURI(); 2803 return; 2804 } 2805 // remove the fragment identifier 2806 escapedPath = removeFragmentIdentifier(escapedPath); 2807 if (_is_net_path || _is_abs_path) { 2808 if (escapedPath[0] != '/') { 2809 throw new URIException(URIException.PARSING, 2810 "not absolute path"); 2811 } 2812 if (!validate(escapedPath, abs_path)) { 2813 throw new URIException(URIException.ESCAPING, 2814 "escaped absolute path not valid"); 2815 } 2816 _path = escapedPath; 2817 } else if (_is_rel_path) { 2818 int at = indexFirstOf(escapedPath, '/'); 2819 if (at == 0) { 2820 throw new URIException(URIException.PARSING, "incorrect path"); 2821 } 2822 if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 2823 && !validate(escapedPath, at, -1, abs_path) 2824 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) { 2825 2826 throw new URIException(URIException.ESCAPING, 2827 "escaped relative path not valid"); 2828 } 2829 _path = escapedPath; 2830 } else if (_is_opaque_part) { 2831 if (!uric_no_slash.get(escapedPath[0]) 2832 && !validate(escapedPath, 1, -1, uric)) { 2833 throw new URIException(URIException.ESCAPING, 2834 "escaped opaque part not valid"); 2835 } 2836 _opaque = escapedPath; 2837 } else { 2838 throw new URIException(URIException.PARSING, "incorrect path"); 2839 } 2840 setURI(); 2841 } 2842 2843 2844 /*** 2845 * Set the escaped path. 2846 * 2847 * @param escapedPath the escaped path string 2848 * @throws URIException encoding error or not proper for initial instance 2849 * @see #encode 2850 */ 2851 public void setEscapedPath(String escapedPath) throws URIException { 2852 if (escapedPath == null) { 2853 _path = _opaque = null; 2854 setURI(); 2855 return; 2856 } 2857 setRawPath(escapedPath.toCharArray()); 2858 } 2859 2860 2861 /*** 2862 * Set the path. 2863 * 2864 * @param path the path string 2865 * @throws URIException set incorrectly or fragment only 2866 * @see #encode 2867 */ 2868 public void setPath(String path) throws URIException { 2869 2870 if (path == null || path.length() == 0) { 2871 _path = _opaque = (path == null) ? null : path.toCharArray(); 2872 setURI(); 2873 return; 2874 } 2875 // set the charset to do escape encoding 2876 String charset = getProtocolCharset(); 2877 2878 if (_is_net_path || _is_abs_path) { 2879 _path = encode(path, allowed_abs_path, charset); 2880 } else if (_is_rel_path) { 2881 StringBuffer buff = new StringBuffer(path.length()); 2882 int at = path.indexOf('/'); 2883 if (at == 0) { // never 0 2884 throw new URIException(URIException.PARSING, 2885 "incorrect relative path"); 2886 } 2887 if (at > 0) { 2888 buff.append(encode(path.substring(0, at), allowed_rel_path, 2889 charset)); 2890 buff.append(encode(path.substring(at), allowed_abs_path, 2891 charset)); 2892 } else { 2893 buff.append(encode(path, allowed_rel_path, charset)); 2894 } 2895 _path = buff.toString().toCharArray(); 2896 } else if (_is_opaque_part) { 2897 StringBuffer buf = new StringBuffer(); 2898 buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset)); 2899 buf.insert(1, encode(path.substring(1), uric, charset)); 2900 _opaque = buf.toString().toCharArray(); 2901 } else { 2902 throw new URIException(URIException.PARSING, "incorrect path"); 2903 } 2904 setURI(); 2905 } 2906 2907 2908 /*** 2909 * Resolve the base and relative path. 2910 * 2911 * @param basePath a character array of the basePath 2912 * @param relPath a character array of the relPath 2913 * @return the resolved path 2914 * @throws URIException no more higher path level to be resolved 2915 */ 2916 protected char[] resolvePath(char[] basePath, char[] relPath) 2917 throws URIException { 2918 2919 // REMINDME: paths are never null 2920 String base = (basePath == null) ? "" : new String(basePath); 2921 int at = base.lastIndexOf('/'); 2922 if (at != -1) { 2923 basePath = base.substring(0, at + 1).toCharArray(); 2924 } 2925 // _path could be empty 2926 if (relPath == null || relPath.length == 0) { 2927 return normalize(basePath); 2928 } else if (relPath[0] == '/') { 2929 return normalize(relPath); 2930 } else { 2931 StringBuffer buff = new StringBuffer(base.length() 2932 + relPath.length); 2933 buff.append((at != -1) ? base.substring(0, at + 1) : "/"); 2934 buff.append(relPath); 2935 return normalize(buff.toString().toCharArray()); 2936 } 2937 } 2938 2939 2940 /*** 2941 * Get the raw-escaped current hierarchy level in the given path. 2942 * If the last namespace is a collection, the slash mark ('/') should be 2943 * ended with at the last character of the path string. 2944 * 2945 * @param path the path 2946 * @return the current hierarchy level 2947 * @throws URIException no hierarchy level 2948 */ 2949 protected char[] getRawCurrentHierPath(char[] path) throws URIException { 2950 2951 if (_is_opaque_part) { 2952 throw new URIException(URIException.PARSING, "no hierarchy level"); 2953 } 2954 if (path == null) { 2955 throw new URIException(URIException.PARSING, "empty path"); 2956 } 2957 String buff = new String(path); 2958 int first = buff.indexOf('/'); 2959 int last = buff.lastIndexOf('/'); 2960 if (last == 0) { 2961 return rootPath; 2962 } else if (first != last && last != -1) { 2963 return buff.substring(0, last).toCharArray(); 2964 } 2965 // FIXME: it could be a document on the server side 2966 return path; 2967 } 2968 2969 2970 /*** 2971 * Get the raw-escaped current hierarchy level. 2972 * 2973 * @return the raw-escaped current hierarchy level 2974 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 2975 */ 2976 public char[] getRawCurrentHierPath() throws URIException { 2977 return (_path == null) ? null : getRawCurrentHierPath(_path); 2978 } 2979 2980 2981 /*** 2982 * Get the escaped current hierarchy level. 2983 * 2984 * @return the escaped current hierarchy level 2985 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 2986 */ 2987 public String getEscapedCurrentHierPath() throws URIException { 2988 char[] path = getRawCurrentHierPath(); 2989 return (path == null) ? null : new String(path); 2990 } 2991 2992 2993 /*** 2994 * Get the current hierarchy level. 2995 * 2996 * @return the current hierarchy level 2997 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 2998 * @see #decode 2999 */ 3000 public String getCurrentHierPath() throws URIException { 3001 char[] path = getRawCurrentHierPath(); 3002 return (path == null) ? null : decode(path, getProtocolCharset()); 3003 } 3004 3005 3006 /*** 3007 * Get the level above the this hierarchy level. 3008 * 3009 * @return the raw above hierarchy level 3010 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 3011 */ 3012 public char[] getRawAboveHierPath() throws URIException { 3013 char[] path = getRawCurrentHierPath(); 3014 return (path == null) ? null : getRawCurrentHierPath(path); 3015 } 3016 3017 3018 /*** 3019 * Get the level above the this hierarchy level. 3020 * 3021 * @return the raw above hierarchy level 3022 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 3023 */ 3024 public String getEscapedAboveHierPath() throws URIException { 3025 char[] path = getRawAboveHierPath(); 3026 return (path == null) ? null : new String(path); 3027 } 3028 3029 3030 /*** 3031 * Get the level above the this hierarchy level. 3032 * 3033 * @return the above hierarchy level 3034 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 3035 * @see #decode 3036 */ 3037 public String getAboveHierPath() throws URIException { 3038 char[] path = getRawAboveHierPath(); 3039 return (path == null) ? null : decode(path, getProtocolCharset()); 3040 } 3041 3042 3043 /*** 3044 * Get the raw-escaped path. 3045 * <p><blockquote><pre> 3046 * path = [ abs_path | opaque_part ] 3047 * </pre></blockquote><p> 3048 * 3049 * @return the raw-escaped path 3050 */ 3051 public char[] getRawPath() { 3052 return _is_opaque_part ? _opaque : _path; 3053 } 3054 3055 3056 /*** 3057 * Get the escaped path. 3058 * <p><blockquote><pre> 3059 * path = [ abs_path | opaque_part ] 3060 * abs_path = "/" path_segments 3061 * opaque_part = uric_no_slash *uric 3062 * </pre></blockquote><p> 3063 * 3064 * @return the escaped path string 3065 */ 3066 public String getEscapedPath() { 3067 char[] path = getRawPath(); 3068 return (path == null) ? null : new String(path); 3069 } 3070 3071 3072 /*** 3073 * Get the path. 3074 * <p><blockquote><pre> 3075 * path = [ abs_path | opaque_part ] 3076 * </pre></blockquote><p> 3077 * @return the path string 3078 * @throws URIException If {@link #decode} fails. 3079 * @see #decode 3080 */ 3081 public String getPath() throws URIException { 3082 char[] path = getRawPath(); 3083 return (path == null) ? null : decode(path, getProtocolCharset()); 3084 } 3085 3086 3087 /*** 3088 * Get the raw-escaped basename of the path. 3089 * 3090 * @return the raw-escaped basename 3091 */ 3092 public char[] getRawName() { 3093 if (_path == null) { 3094 return null; 3095 } 3096 3097 int at = 0; 3098 for (int i = _path.length - 1; i >= 0; i--) { 3099 if (_path[i] == '/') { 3100 at = i + 1; 3101 break; 3102 } 3103 } 3104 int len = _path.length - at; 3105 char[] basename = new char[len]; 3106 System.arraycopy(_path, at, basename, 0, len); 3107 return basename; 3108 } 3109 3110 3111 /*** 3112 * Get the escaped basename of the path. 3113 * 3114 * @return the escaped basename string 3115 */ 3116 public String getEscapedName() { 3117 char[] basename = getRawName(); 3118 return (basename == null) ? null : new String(basename); 3119 } 3120 3121 3122 /*** 3123 * Get the basename of the path. 3124 * 3125 * @return the basename string 3126 * @throws URIException incomplete trailing escape pattern or unsupported 3127 * character encoding 3128 * @see #decode 3129 */ 3130 public String getName() throws URIException { 3131 char[] basename = getRawName(); 3132 return (basename == null) ? null : decode(getRawName(), 3133 getProtocolCharset()); 3134 } 3135 3136 // ----------------------------------------------------- The path and query 3137 3138 /*** 3139 * Get the raw-escaped path and query. 3140 * 3141 * @return the raw-escaped path and query 3142 */ 3143 public char[] getRawPathQuery() { 3144 3145 if (_path == null && _query == null) { 3146 return null; 3147 } 3148 StringBuffer buff = new StringBuffer(); 3149 if (_path != null) { 3150 buff.append(_path); 3151 } 3152 if (_query != null) { 3153 buff.append('?'); 3154 buff.append(_query); 3155 } 3156 return buff.toString().toCharArray(); 3157 } 3158 3159 3160 /*** 3161 * Get the escaped query. 3162 * 3163 * @return the escaped path and query string 3164 */ 3165 public String getEscapedPathQuery() { 3166 char[] rawPathQuery = getRawPathQuery(); 3167 return (rawPathQuery == null) ? null : new String(rawPathQuery); 3168 } 3169 3170 3171 /*** 3172 * Get the path and query. 3173 * 3174 * @return the path and query string. 3175 * @throws URIException incomplete trailing escape pattern or unsupported 3176 * character encoding 3177 * @see #decode 3178 */ 3179 public String getPathQuery() throws URIException { 3180 char[] rawPathQuery = getRawPathQuery(); 3181 return (rawPathQuery == null) ? null : decode(rawPathQuery, 3182 getProtocolCharset()); 3183 } 3184 3185 // -------------------------------------------------------------- The query 3186 3187 /*** 3188 * Set the raw-escaped query. 3189 * 3190 * @param escapedQuery the raw-escaped query 3191 * @throws URIException escaped query not valid 3192 */ 3193 public void setRawQuery(char[] escapedQuery) throws URIException { 3194 if (escapedQuery == null || escapedQuery.length == 0) { 3195 _query = escapedQuery; 3196 setURI(); 3197 return; 3198 } 3199 // remove the fragment identifier 3200 escapedQuery = removeFragmentIdentifier(escapedQuery); 3201 if (!validate(escapedQuery, query)) { 3202 throw new URIException(URIException.ESCAPING, 3203 "escaped query not valid"); 3204 } 3205 _query = escapedQuery; 3206 setURI(); 3207 } 3208 3209 3210 /*** 3211 * Set the escaped query string. 3212 * 3213 * @param escapedQuery the escaped query string 3214 * @throws URIException escaped query not valid 3215 */ 3216 public void setEscapedQuery(String escapedQuery) throws URIException { 3217 if (escapedQuery == null) { 3218 _query = null; 3219 setURI(); 3220 return; 3221 } 3222 setRawQuery(escapedQuery.toCharArray()); 3223 } 3224 3225 3226 /*** 3227 * Set the query. 3228 * <p> 3229 * When a query string is not misunderstood the reserved special characters 3230 * ("&", "=", "+", ",", and "$") within a query component, it is 3231 * recommended to use in encoding the whole query with this method. 3232 * <p> 3233 * The additional APIs for the special purpose using by the reserved 3234 * special characters used in each protocol are implemented in each protocol 3235 * classes inherited from <code>URI</code>. So refer to the same-named APIs 3236 * implemented in each specific protocol instance. 3237 * 3238 * @param query the query string. 3239 * @throws URIException incomplete trailing escape pattern or unsupported 3240 * character encoding 3241 * @see #encode 3242 */ 3243 public void setQuery(String query) throws URIException { 3244 if (query == null || query.length() == 0) { 3245 _query = (query == null) ? null : query.toCharArray(); 3246 setURI(); 3247 return; 3248 } 3249 setRawQuery(encode(query, allowed_query, getProtocolCharset())); 3250 } 3251 3252 3253 /*** 3254 * Get the raw-escaped query. 3255 * 3256 * @return the raw-escaped query 3257 */ 3258 public char[] getRawQuery() { 3259 return _query; 3260 } 3261 3262 3263 /*** 3264 * Get the escaped query. 3265 * 3266 * @return the escaped query string 3267 */ 3268 public String getEscapedQuery() { 3269 return (_query == null) ? null : new String(_query); 3270 } 3271 3272 3273 /*** 3274 * Get the query. 3275 * 3276 * @return the query string. 3277 * @throws URIException incomplete trailing escape pattern or unsupported 3278 * character encoding 3279 * @see #decode 3280 */ 3281 public String getQuery() throws URIException { 3282 return (_query == null) ? null : decode(_query, getProtocolCharset()); 3283 } 3284 3285 // ----------------------------------------------------------- The fragment 3286 3287 /*** 3288 * Set the raw-escaped fragment. 3289 * 3290 * @param escapedFragment the raw-escaped fragment 3291 * @throws URIException escaped fragment not valid 3292 */ 3293 public void setRawFragment(char[] escapedFragment) throws URIException { 3294 if (escapedFragment == null || escapedFragment.length == 0) { 3295 _fragment = escapedFragment; 3296 hash = 0; 3297 return; 3298 } 3299 if (!validate(escapedFragment, fragment)) { 3300 throw new URIException(URIException.ESCAPING, 3301 "escaped fragment not valid"); 3302 } 3303 _fragment = escapedFragment; 3304 hash = 0; 3305 } 3306 3307 3308 /*** 3309 * Set the escaped fragment string. 3310 * 3311 * @param escapedFragment the escaped fragment string 3312 * @throws URIException escaped fragment not valid 3313 */ 3314 public void setEscapedFragment(String escapedFragment) throws URIException { 3315 if (escapedFragment == null) { 3316 _fragment = null; 3317 hash = 0; 3318 return; 3319 } 3320 setRawFragment(escapedFragment.toCharArray()); 3321 } 3322 3323 3324 /*** 3325 * Set the fragment. 3326 * 3327 * @param fragment the fragment string. 3328 * @throws URIException If an error occurs. 3329 */ 3330 public void setFragment(String fragment) throws URIException { 3331 if (fragment == null || fragment.length() == 0) { 3332 _fragment = (fragment == null) ? null : fragment.toCharArray(); 3333 hash = 0; 3334 return; 3335 } 3336 _fragment = encode(fragment, allowed_fragment, getProtocolCharset()); 3337 hash = 0; 3338 } 3339 3340 3341 /*** 3342 * Get the raw-escaped fragment. 3343 * <p> 3344 * The optional fragment identifier is not part of a URI, but is often used 3345 * in conjunction with a URI. 3346 * <p> 3347 * The format and interpretation of fragment identifiers is dependent on 3348 * the media type [RFC2046] of the retrieval result. 3349 * <p> 3350 * A fragment identifier is only meaningful when a URI reference is 3351 * intended for retrieval and the result of that retrieval is a document 3352 * for which the identified fragment is consistently defined. 3353 * 3354 * @return the raw-escaped fragment 3355 */ 3356 public char[] getRawFragment() { 3357 return _fragment; 3358 } 3359 3360 3361 /*** 3362 * Get the escaped fragment. 3363 * 3364 * @return the escaped fragment string 3365 */ 3366 public String getEscapedFragment() { 3367 return (_fragment == null) ? null : new String(_fragment); 3368 } 3369 3370 3371 /*** 3372 * Get the fragment. 3373 * 3374 * @return the fragment string 3375 * @throws URIException incomplete trailing escape pattern or unsupported 3376 * character encoding 3377 * @see #decode 3378 */ 3379 public String getFragment() throws URIException { 3380 return (_fragment == null) ? null : decode(_fragment, 3381 getProtocolCharset()); 3382 } 3383 3384 // ------------------------------------------------------------- Utilities 3385 3386 /*** 3387 * Remove the fragment identifier of the given component. 3388 * 3389 * @param component the component that a fragment may be included 3390 * @return the component that the fragment identifier is removed 3391 */ 3392 protected char[] removeFragmentIdentifier(char[] component) { 3393 if (component == null) { 3394 return null; 3395 } 3396 int lastIndex = new String(component).indexOf('#'); 3397 if (lastIndex != -1) { 3398 component = new String(component).substring(0, 3399 lastIndex).toCharArray(); 3400 } 3401 return component; 3402 } 3403 3404 3405 /*** 3406 * Normalize the given hier path part. 3407 * 3408 * <p>Algorithm taken from URI reference parser at 3409 * http://www.apache.org/~fielding/uri/rev-2002/issues.html. 3410 * 3411 * @param path the path to normalize 3412 * @return the normalized path 3413 * @throws URIException no more higher path level to be normalized 3414 */ 3415 protected char[] normalize(char[] path) throws URIException { 3416 3417 if (path == null) { 3418 return null; 3419 } 3420 3421 String normalized = new String(path); 3422 3423 // If the buffer begins with "./" or "../", the "." or ".." is removed. 3424 if (normalized.startsWith("./")) { 3425 normalized = normalized.substring(1); 3426 } else if (normalized.startsWith("../")) { 3427 normalized = normalized.substring(2); 3428 } else if (normalized.startsWith("..")) { 3429 normalized = normalized.substring(2); 3430 } 3431 3432 // All occurrences of "/./" in the buffer are replaced with "/" 3433 int index = -1; 3434 while ((index = normalized.indexOf("/./")) != -1) { 3435 normalized = normalized.substring(0, index) + normalized.substring(index + 2); 3436 } 3437 3438 // If the buffer ends with "/.", the "." is removed. 3439 if (normalized.endsWith("/.")) { 3440 normalized = normalized.substring(0, normalized.length() - 1); 3441 } 3442 3443 int startIndex = 0; 3444 3445 // All occurrences of "/<segment>/../" in the buffer, where ".." 3446 // and <segment> are complete path segments, are iteratively replaced 3447 // with "/" in order from left to right until no matching pattern remains. 3448 // If the buffer ends with "/<segment>/..", that is also replaced 3449 // with "/". Note that <segment> may be empty. 3450 while ((index = normalized.indexOf("/../", startIndex)) != -1) { 3451 int slashIndex = normalized.lastIndexOf('/', index - 1); 3452 if (slashIndex >= 0) { 3453 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3); 3454 } else { 3455 startIndex = index + 3; 3456 } 3457 } 3458 if (normalized.endsWith("/..")) { 3459 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); 3460 if (slashIndex >= 0) { 3461 normalized = normalized.substring(0, slashIndex + 1); 3462 } 3463 } 3464 3465 // All prefixes of "<segment>/../" in the buffer, where ".." 3466 // and <segment> are complete path segments, are iteratively replaced 3467 // with "/" in order from left to right until no matching pattern remains. 3468 // If the buffer ends with "<segment>/..", that is also replaced 3469 // with "/". Note that <segment> may be empty. 3470 while ((index = normalized.indexOf("/../")) != -1) { 3471 int slashIndex = normalized.lastIndexOf('/', index - 1); 3472 if (slashIndex >= 0) { 3473 break; 3474 } else { 3475 normalized = normalized.substring(index + 3); 3476 } 3477 } 3478 if (normalized.endsWith("/..")) { 3479 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); 3480 if (slashIndex < 0) { 3481 normalized = "/"; 3482 } 3483 } 3484 3485 return normalized.toCharArray(); 3486 } 3487 3488 3489 /*** 3490 * Normalizes the path part of this URI. Normalization is only meant to be performed on 3491 * URIs with an absolute path. Calling this method on a relative path URI will have no 3492 * effect. 3493 * 3494 * @throws URIException no more higher path level to be normalized 3495 * 3496 * @see #isAbsPath() 3497 */ 3498 public void normalize() throws URIException { 3499 if (isAbsPath()) { 3500 _path = normalize(_path); 3501 setURI(); 3502 } 3503 } 3504 3505 3506 /*** 3507 * Test if the first array is equal to the second array. 3508 * 3509 * @param first the first character array 3510 * @param second the second character array 3511 * @return true if they're equal 3512 */ 3513 protected boolean equals(char[] first, char[] second) { 3514 3515 if (first == null && second == null) { 3516 return true; 3517 } 3518 if (first == null || second == null) { 3519 return false; 3520 } 3521 if (first.length != second.length) { 3522 return false; 3523 } 3524 for (int i = 0; i < first.length; i++) { 3525 if (first[i] != second[i]) { 3526 return false; 3527 } 3528 } 3529 return true; 3530 } 3531 3532 3533 /*** 3534 * Test an object if this URI is equal to another. 3535 * 3536 * @param obj an object to compare 3537 * @return true if two URI objects are equal 3538 */ 3539 public boolean equals(Object obj) { 3540 3541 // normalize and test each components 3542 if (obj == this) { 3543 return true; 3544 } 3545 if (!(obj instanceof URI)) { 3546 return false; 3547 } 3548 URI another = (URI) obj; 3549 // scheme 3550 if (!equals(_scheme, another._scheme)) { 3551 return false; 3552 } 3553 // is_opaque_part or is_hier_part? and opaque 3554 if (!equals(_opaque, another._opaque)) { 3555 return false; 3556 } 3557 // is_hier_part 3558 // has_authority 3559 if (!equals(_authority, another._authority)) { 3560 return false; 3561 } 3562 // path 3563 if (!equals(_path, another._path)) { 3564 return false; 3565 } 3566 // has_query 3567 if (!equals(_query, another._query)) { 3568 return false; 3569 } 3570 // has_fragment? should be careful of the only fragment case. 3571 if (!equals(_fragment, another._fragment)) { 3572 return false; 3573 } 3574 return true; 3575 } 3576 3577 // ---------------------------------------------------------- Serialization 3578 3579 /*** 3580 * Write the content of this URI. 3581 * 3582 * @param oos the object-output stream 3583 * @throws IOException If an IO problem occurs. 3584 */ 3585 protected void writeObject(ObjectOutputStream oos) 3586 throws IOException { 3587 3588 oos.defaultWriteObject(); 3589 } 3590 3591 3592 /*** 3593 * Read a URI. 3594 * 3595 * @param ois the object-input stream 3596 * @throws ClassNotFoundException If one of the classes specified in the 3597 * input stream cannot be found. 3598 * @throws IOException If an IO problem occurs. 3599 */ 3600 protected void readObject(ObjectInputStream ois) 3601 throws ClassNotFoundException, IOException { 3602 3603 ois.defaultReadObject(); 3604 } 3605 3606 // -------------------------------------------------------------- Hash code 3607 3608 /*** 3609 * Return a hash code for this URI. 3610 * 3611 * @return a has code value for this URI 3612 */ 3613 public int hashCode() { 3614 if (hash == 0) { 3615 char[] c = _uri; 3616 if (c != null) { 3617 for (int i = 0, len = c.length; i < len; i++) { 3618 hash = 31 * hash + c[i]; 3619 } 3620 } 3621 c = _fragment; 3622 if (c != null) { 3623 for (int i = 0, len = c.length; i < len; i++) { 3624 hash = 31 * hash + c[i]; 3625 } 3626 } 3627 } 3628 return hash; 3629 } 3630 3631 // ------------------------------------------------------------- Comparison 3632 3633 /*** 3634 * Compare this URI to another object. 3635 * 3636 * @param obj the object to be compared. 3637 * @return 0, if it's same, 3638 * -1, if failed, first being compared with in the authority component 3639 * @throws ClassCastException not URI argument 3640 */ 3641 public int compareTo(Object obj) throws ClassCastException { 3642 3643 URI another = (URI) obj; 3644 if (!equals(_authority, another.getRawAuthority())) { 3645 return -1; 3646 } 3647 return toString().compareTo(another.toString()); 3648 } 3649 3650 // ------------------------------------------------------------------ Clone 3651 3652 /*** 3653 * Create and return a copy of this object, the URI-reference containing 3654 * the userinfo component. Notice that the whole URI-reference including 3655 * the userinfo component counld not be gotten as a <code>String</code>. 3656 * <p> 3657 * To copy the identical <code>URI</code> object including the userinfo 3658 * component, it should be used. 3659 * 3660 * @return a clone of this instance 3661 */ 3662 public synchronized Object clone() { 3663 3664 URI instance = new URI(); 3665 3666 instance._uri = _uri; 3667 instance._scheme = _scheme; 3668 instance._opaque = _opaque; 3669 instance._authority = _authority; 3670 instance._userinfo = _userinfo; 3671 instance._host = _host; 3672 instance._port = _port; 3673 instance._path = _path; 3674 instance._query = _query; 3675 instance._fragment = _fragment; 3676 // the charset to do escape encoding for this instance 3677 instance.protocolCharset = protocolCharset; 3678 // flags 3679 instance._is_hier_part = _is_hier_part; 3680 instance._is_opaque_part = _is_opaque_part; 3681 instance._is_net_path = _is_net_path; 3682 instance._is_abs_path = _is_abs_path; 3683 instance._is_rel_path = _is_rel_path; 3684 instance._is_reg_name = _is_reg_name; 3685 instance._is_server = _is_server; 3686 instance._is_hostname = _is_hostname; 3687 instance._is_IPv4address = _is_IPv4address; 3688 instance._is_IPv6reference = _is_IPv6reference; 3689 3690 return instance; 3691 } 3692 3693 // ------------------------------------------------------------ Get the URI 3694 3695 /*** 3696 * It can be gotten the URI character sequence. It's raw-escaped. 3697 * For the purpose of the protocol to be transported, it will be useful. 3698 * <p> 3699 * It is clearly unwise to use a URL that contains a password which is 3700 * intended to be secret. In particular, the use of a password within 3701 * the 'userinfo' component of a URL is strongly disrecommended except 3702 * in those rare cases where the 'password' parameter is intended to be 3703 * public. 3704 * <p> 3705 * When you want to get each part of the userinfo, you need to use the 3706 * specific methods in the specific URL. It depends on the specific URL. 3707 * 3708 * @return the URI character sequence 3709 */ 3710 public char[] getRawURI() { 3711 return _uri; 3712 } 3713 3714 3715 /*** 3716 * It can be gotten the URI character sequence. It's escaped. 3717 * For the purpose of the protocol to be transported, it will be useful. 3718 * 3719 * @return the escaped URI string 3720 */ 3721 public String getEscapedURI() { 3722 return (_uri == null) ? null : new String(_uri); 3723 } 3724 3725 3726 /*** 3727 * It can be gotten the URI character sequence. 3728 * 3729 * @return the original URI string 3730 * @throws URIException incomplete trailing escape pattern or unsupported 3731 * character encoding 3732 * @see #decode 3733 */ 3734 public String getURI() throws URIException { 3735 return (_uri == null) ? null : decode(_uri, getProtocolCharset()); 3736 } 3737 3738 3739 /*** 3740 * Get the URI reference character sequence. 3741 * 3742 * @return the URI reference character sequence 3743 */ 3744 public char[] getRawURIReference() { 3745 if (_fragment == null) { 3746 return _uri; 3747 } 3748 if (_uri == null) { 3749 return _fragment; 3750 } 3751 // if _uri != null && _fragment != null 3752 String uriReference = new String(_uri) + "#" + new String(_fragment); 3753 return uriReference.toCharArray(); 3754 } 3755 3756 3757 /*** 3758 * Get the escaped URI reference string. 3759 * 3760 * @return the escaped URI reference string 3761 */ 3762 public String getEscapedURIReference() { 3763 char[] uriReference = getRawURIReference(); 3764 return (uriReference == null) ? null : new String(uriReference); 3765 } 3766 3767 3768 /*** 3769 * Get the original URI reference string. 3770 * 3771 * @return the original URI reference string 3772 * @throws URIException If {@link #decode} fails. 3773 */ 3774 public String getURIReference() throws URIException { 3775 char[] uriReference = getRawURIReference(); 3776 return (uriReference == null) ? null : decode(uriReference, 3777 getProtocolCharset()); 3778 } 3779 3780 3781 /*** 3782 * Get the escaped URI string. 3783 * <p> 3784 * On the document, the URI-reference form is only used without the userinfo 3785 * component like http://jakarta.apache.org/ by the security reason. 3786 * But the URI-reference form with the userinfo component could be parsed. 3787 * <p> 3788 * In other words, this URI and any its subclasses must not expose the 3789 * URI-reference expression with the userinfo component like 3790 * http://user:password@hostport/restricted_zone.<br> 3791 * It means that the API client programmer should extract each user and 3792 * password to access manually. Probably it will be supported in the each 3793 * subclass, however, not a whole URI-reference expression. 3794 * 3795 * @return the escaped URI string 3796 * @see #clone() 3797 */ 3798 public String toString() { 3799 return getEscapedURI(); 3800 } 3801 3802 3803 // ------------------------------------------------------------ Inner class 3804 3805 /*** 3806 * The charset-changed normal operation to represent to be required to 3807 * alert to user the fact the default charset is changed. 3808 */ 3809 public static class DefaultCharsetChanged extends RuntimeException { 3810 3811 // ------------------------------------------------------- constructors 3812 3813 /*** 3814 * The constructor with a reason string and its code arguments. 3815 * 3816 * @param reasonCode the reason code 3817 * @param reason the reason 3818 */ 3819 public DefaultCharsetChanged(int reasonCode, String reason) { 3820 super(reason); 3821 this.reason = reason; 3822 this.reasonCode = reasonCode; 3823 } 3824 3825 // ---------------------------------------------------------- constants 3826 3827 /*** No specified reason code. */ 3828 public static final int UNKNOWN = 0; 3829 3830 /*** Protocol charset changed. */ 3831 public static final int PROTOCOL_CHARSET = 1; 3832 3833 /*** Document charset changed. */ 3834 public static final int DOCUMENT_CHARSET = 2; 3835 3836 // ------------------------------------------------- instance variables 3837 3838 /*** The reason code. */ 3839 private int reasonCode; 3840 3841 /*** The reason message. */ 3842 private String reason; 3843 3844 // ------------------------------------------------------------ methods 3845 3846 /*** 3847 * Get the reason code. 3848 * 3849 * @return the reason code 3850 */ 3851 public int getReasonCode() { 3852 return reasonCode; 3853 } 3854 3855 /*** 3856 * Get the reason message. 3857 * 3858 * @return the reason message 3859 */ 3860 public String getReason() { 3861 return reason; 3862 } 3863 3864 } 3865 3866 3867 /*** 3868 * A mapping to determine the (somewhat arbitrarily) preferred charset for a 3869 * given locale. Supports all locales recognized in JDK 1.1. 3870 * <p> 3871 * The distribution of this class is Servlets.com. It was originally 3872 * written by Jason Hunter [jhunter at acm.org] and used by with permission. 3873 */ 3874 public static class LocaleToCharsetMap { 3875 3876 /*** A mapping of language code to charset */ 3877 private static final Hashtable LOCALE_TO_CHARSET_MAP; 3878 static { 3879 LOCALE_TO_CHARSET_MAP = new Hashtable(); 3880 LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6"); 3881 LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5"); 3882 LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5"); 3883 LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1"); 3884 LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2"); 3885 LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1"); 3886 LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1"); 3887 LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7"); 3888 LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1"); 3889 LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1"); 3890 LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1"); 3891 LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1"); 3892 LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1"); 3893 LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2"); 3894 LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2"); 3895 LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1"); 3896 LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1"); 3897 LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8"); 3898 LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS"); 3899 LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR"); 3900 LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2"); 3901 LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2"); 3902 LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5"); 3903 LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1"); 3904 LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1"); 3905 LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2"); 3906 LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1"); 3907 LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2"); 3908 LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5"); 3909 LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5"); 3910 LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2"); 3911 LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2"); 3912 LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2"); 3913 LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5"); 3914 LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1"); 3915 LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9"); 3916 LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5"); 3917 LOCALE_TO_CHARSET_MAP.put("zh", "GB2312"); 3918 LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5"); 3919 } 3920 3921 /*** 3922 * Get the preferred charset for the given locale. 3923 * 3924 * @param locale the locale 3925 * @return the preferred charset or null if the locale is not 3926 * recognized. 3927 */ 3928 public static String getCharset(Locale locale) { 3929 // try for an full name match (may include country) 3930 String charset = 3931 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString()); 3932 if (charset != null) { 3933 return charset; 3934 } 3935 3936 // if a full name didn't match, try just the language 3937 charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage()); 3938 return charset; // may be null 3939 } 3940 3941 } 3942 3943 } 3944

This page was automatically generated by Maven