View Javadoc
1 /* 2 * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.21 2003/06/29 21:34:06 olegk Exp $ 3 * $Revision: 1.21 $ 4 * $Date: 2003/06/29 21:34:06 $ 5 * 6 * ==================================================================== 7 * 8 * The Apache Software License, Version 1.1 9 * 10 * Copyright (c) 2002-2003 The Apache Software Foundation. All rights 11 * reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in 22 * the documentation and/or other materials provided with the 23 * distribution. 24 * 25 * 3. The end-user documentation included with the redistribution, if 26 * any, must include the following acknowlegement: 27 * "This product includes software developed by the 28 * Apache Software Foundation (http://www.apache.org/)." 29 * Alternately, this acknowlegement may appear in the software itself, 30 * if and wherever such third-party acknowlegements normally appear. 31 * 32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software 33 * Foundation" must not be used to endorse or promote products derived 34 * from this software without prior written permission. For written 35 * permission, please contact apache@apache.org. 36 * 37 * 5. Products derived from this software may not be called "Apache" 38 * nor may "Apache" appear in their names without prior written 39 * permission of the Apache Group. 40 * 41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * ==================================================================== 54 * 55 * This software consists of voluntary contributions made by many 56 * individuals on behalf of the Apache Software Foundation. For more 57 * information on the Apache Software Foundation, please see 58 * <http://www.apache.org/>. 59 * 60 * [Additional notices, if required by prior licensing conditions] 61 * 62 */ 63 64 package org.apache.commons.httpclient.util; 65 66 import java.io.UnsupportedEncodingException; 67 import java.util.BitSet; 68 import org.apache.commons.httpclient.URI; 69 import org.apache.commons.httpclient.URIException; 70 71 /*** 72 * The URI escape and character encoding and decoding utility. 73 * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather 74 * than {@link org.apache.commons.httpclient.URI}. 75 * 76 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a> 77 * @version $Revision: 1.21 $ $Date: 2002/03/14 15:14:01 78 */ 79 80 public class URIUtil { 81 82 // ----------------------------------------------------- Instance variables 83 84 protected static final BitSet empty = new BitSet(1); 85 86 // ---------------------------------------------------------- URI utilities 87 88 /*** 89 * Get the basename of an URI. It's possibly an empty string. 90 * 91 * @param uri a string regarded an URI 92 * @return the basename string; an empty string if the path ends with slash 93 */ 94 public static String getName(String uri) { 95 if (uri == null || uri.length() == 0) { return uri; } 96 String path = URIUtil.getPath(uri); 97 int at = path.lastIndexOf("/"); 98 int to = path.length(); 99 return (at >= 0) ? path.substring(at + 1, to) : path; 100 } 101 102 103 /*** 104 * Get the query of an URI. 105 * 106 * @param uri a string regarded an URI 107 * @return the query string; <code>null</code> if empty or undefined 108 */ 109 public static String getQuery(String uri) { 110 if (uri == null || uri.length() == 0) { return null; } 111 // consider of net_path 112 int at = uri.indexOf("//"); 113 int from = uri.indexOf( 114 "/", 115 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 116 ); 117 // the authority part of URI ignored 118 int to = uri.length(); 119 // reuse the at and from variables to consider the query 120 at = uri.indexOf("?", from); 121 if (at >= 0) { 122 from = at + 1; 123 } else { 124 return null; 125 } 126 // check the fragment 127 if (uri.lastIndexOf("#") > from) { 128 to = uri.lastIndexOf("#"); 129 } 130 // get the path and query. 131 return (from < 0 || from == to) ? null : uri.substring(from, to); 132 } 133 134 135 /*** 136 * Get the path of an URI. 137 * 138 * @param uri a string regarded an URI 139 * @return the path string 140 */ 141 public static String getPath(String uri) { 142 if (uri == null) { 143 return null; 144 } 145 // consider of net_path 146 int at = uri.indexOf("//"); 147 int from = uri.indexOf( 148 "/", 149 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 150 ); 151 // the authority part of URI ignored 152 int to = uri.length(); 153 // check the query 154 if (uri.indexOf('?', from) != -1) { 155 to = uri.indexOf('?', from); 156 } 157 // check the fragment 158 if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) { 159 to = uri.lastIndexOf("#"); 160 } 161 // get only the path. 162 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); 163 } 164 165 166 /*** 167 * Get the path and query of an URI. 168 * 169 * @param uri a string regarded an URI 170 * @return the path and query string 171 */ 172 public static String getPathQuery(String uri) { 173 if (uri == null) { 174 return null; 175 } 176 // consider of net_path 177 int at = uri.indexOf("//"); 178 int from = uri.indexOf( 179 "/", 180 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 181 ); 182 // the authority part of URI ignored 183 int to = uri.length(); 184 // Ignore the '?' mark so to ignore the query. 185 // check the fragment 186 if (uri.lastIndexOf("#") > from) { 187 to = uri.lastIndexOf("#"); 188 } 189 // get the path and query. 190 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); 191 } 192 193 194 /*** 195 * Get the path of an URI and its rest part. 196 * 197 * @param uri a string regarded an URI 198 * @return the string from the path part 199 */ 200 public static String getFromPath(String uri) { 201 if (uri == null) { 202 return null; 203 } 204 // consider of net_path 205 int at = uri.indexOf("//"); 206 int from = uri.indexOf( 207 "/", 208 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 209 ); 210 // get the path and its rest. 211 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from); 212 } 213 214 // ----------------------------------------------------- Encoding utilities 215 216 /*** 217 * Get the all escaped and encoded string with the default protocl charset. 218 * It's the same function to use <code>encode(String unescaped, Bitset 219 * empty, URI.getDefaultProtocolCharset())</code>. 220 * 221 * @param unescaped an unescaped string 222 * @return the escaped string 223 * 224 * @throws URIException if the default protocol charset is not supported 225 * 226 * @see URI#getDefaultProtocolCharset 227 * @see #encode 228 */ 229 public static String encodeAll(String unescaped) throws URIException { 230 return encodeAll(unescaped, URI.getDefaultProtocolCharset()); 231 } 232 233 234 /*** 235 * Get the all escaped and encoded string with a given charset. 236 * It's the same function to use <code>encode(String unescaped, Bitset 237 * empty, String charset)</code>. 238 * 239 * @param unescaped an unescaped string 240 * @param charset the charset 241 * @return the escaped string 242 * 243 * @throws URIException if the charset is not supported 244 * 245 * @see #encode 246 */ 247 public static String encodeAll(String unescaped, String charset) 248 throws URIException { 249 250 return encode(unescaped, empty, charset); 251 } 252 253 254 /*** 255 * Escape and encode a string regarded as within the authority component of 256 * an URI with the default protocol charset. 257 * Within the authority component, the characters ";", ":", "@", "?", and 258 * "/" are reserved. 259 * 260 * @param unescaped an unescaped string 261 * @return the escaped string 262 * 263 * @throws URIException if the default protocol charset is not supported 264 * 265 * @see URI#getDefaultProtocolCharset 266 * @see #encode 267 */ 268 public static String encodeWithinAuthority(String unescaped) 269 throws URIException { 270 271 return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset()); 272 } 273 274 275 /*** 276 * Escape and encode a string regarded as within the authority component of 277 * an URI with a given charset. 278 * Within the authority component, the characters ";", ":", "@", "?", and 279 * "/" are reserved. 280 * 281 * @param unescaped an unescaped string 282 * @param charset the charset 283 * @return the escaped string 284 * 285 * @throws URIException if the charset is not supported 286 * 287 * @see #encode 288 */ 289 public static String encodeWithinAuthority(String unescaped, String charset) 290 throws URIException { 291 292 return encode(unescaped, URI.allowed_within_authority, charset); 293 } 294 295 296 /*** 297 * Escape and encode a string regarded as the path and query components of 298 * an URI with the default protocol charset. 299 * 300 * @param unescaped an unescaped string 301 * @return the escaped string 302 * 303 * @throws URIException if the default protocol charset is not supported 304 * 305 * @see URI#getDefaultProtocolCharset 306 * @see #encode 307 */ 308 public static String encodePathQuery(String unescaped) throws URIException { 309 return encodePathQuery(unescaped, URI.getDefaultProtocolCharset()); 310 } 311 312 313 /*** 314 * Escape and encode a string regarded as the path and query components of 315 * an URI with a given charset. 316 * 317 * @param unescaped an unescaped string 318 * @param charset the charset 319 * @return the escaped string 320 * 321 * @throws URIException if the charset is not supported 322 * 323 * @see #encode 324 */ 325 public static String encodePathQuery(String unescaped, String charset) 326 throws URIException { 327 328 int at = unescaped.indexOf('?'); 329 if (at < 0) { 330 return encode(unescaped, URI.allowed_abs_path, charset); 331 } 332 // else 333 return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset) 334 + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset); 335 } 336 337 338 /*** 339 * Escape and encode a string regarded as within the path component of an 340 * URI with the default protocol charset. 341 * The path may consist of a sequence of path segments separated by a 342 * single slash "/" character. Within a path segment, the characters 343 * "/", ";", "=", and "?" are reserved. 344 * 345 * @param unescaped an unescaped string 346 * @return the escaped string 347 * 348 * @throws URIException if the default protocol charset is not supported 349 * 350 * @see URI#getDefaultProtocolCharset 351 * @see #encode 352 */ 353 public static String encodeWithinPath(String unescaped) 354 throws URIException { 355 356 return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset()); 357 } 358 359 360 /*** 361 * Escape and encode a string regarded as within the path component of an 362 * URI with a given charset. 363 * The path may consist of a sequence of path segments separated by a 364 * single slash "/" character. Within a path segment, the characters 365 * "/", ";", "=", and "?" are reserved. 366 * 367 * @param unescaped an unescaped string 368 * @param charset the charset 369 * @return the escaped string 370 * 371 * @throws URIException if the charset is not supported 372 * 373 * @see #encode 374 */ 375 public static String encodeWithinPath(String unescaped, String charset) 376 throws URIException { 377 378 return encode(unescaped, URI.allowed_within_path, charset); 379 } 380 381 382 /*** 383 * Escape and encode a string regarded as the path component of an URI with 384 * the default protocol charset. 385 * 386 * @param unescaped an unescaped string 387 * @return the escaped string 388 * 389 * @throws URIException if the default protocol charset is not supported 390 * 391 * @see URI#getDefaultProtocolCharset 392 * @see #encode 393 */ 394 public static String encodePath(String unescaped) throws URIException { 395 return encodePath(unescaped, URI.getDefaultProtocolCharset()); 396 } 397 398 399 /*** 400 * Escape and encode a string regarded as the path component of an URI with 401 * a given charset. 402 * 403 * @param unescaped an unescaped string 404 * @param charset the charset 405 * @return the escaped string 406 * 407 * @throws URIException if the charset is not supported 408 * 409 * @see #encode 410 */ 411 public static String encodePath(String unescaped, String charset) 412 throws URIException { 413 414 return encode(unescaped, URI.allowed_abs_path, charset); 415 } 416 417 418 /*** 419 * Escape and encode a string regarded as within the query component of an 420 * URI with the default protocol charset. 421 * When a query comprise the name and value pairs, it is used in order 422 * to encode each name and value string. The reserved special characters 423 * within a query component are being included in encoding the query. 424 * 425 * @param unescaped an unescaped string 426 * @return the escaped string 427 * 428 * @throws URIException if the default protocol charset is not supported 429 * 430 * @see URI#getDefaultProtocolCharset 431 * @see #encode 432 */ 433 public static String encodeWithinQuery(String unescaped) 434 throws URIException { 435 436 return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset()); 437 } 438 439 440 /*** 441 * Escape and encode a string regarded as within the query component of an 442 * URI with a given charset. 443 * When a query comprise the name and value pairs, it is used in order 444 * to encode each name and value string. The reserved special characters 445 * within a query component are being included in encoding the query. 446 * 447 * @param unescaped an unescaped string 448 * @param charset the charset 449 * @return the escaped string 450 * 451 * @throws URIException if the charset is not supported 452 * 453 * @see #encode 454 */ 455 public static String encodeWithinQuery(String unescaped, String charset) 456 throws URIException { 457 458 return encode(unescaped, URI.allowed_within_query, charset); 459 } 460 461 462 /*** 463 * Escape and encode a string regarded as the query component of an URI with 464 * the default protocol charset. 465 * When a query string is not misunderstood the reserved special characters 466 * ("&", "=", "+", ",", and "$") within a query component, this method 467 * is recommended to use in encoding the whole query. 468 * 469 * @param unescaped an unescaped string 470 * @return the escaped string 471 * 472 * @throws URIException if the default protocol charset is not supported 473 * 474 * @see URI#getDefaultProtocolCharset 475 * @see #encode 476 */ 477 public static String encodeQuery(String unescaped) throws URIException { 478 return encodeQuery(unescaped, URI.getDefaultProtocolCharset()); 479 } 480 481 482 /*** 483 * Escape and encode a string regarded as the query component of an URI with 484 * a given charset. 485 * When a query string is not misunderstood the reserved special characters 486 * ("&", "=", "+", ",", and "$") within a query component, this method 487 * is recommended to use in encoding the whole query. 488 * 489 * @param unescaped an unescaped string 490 * @param charset the charset 491 * @return the escaped string 492 * 493 * @throws URIException if the charset is not supported 494 * 495 * @see #encode 496 */ 497 public static String encodeQuery(String unescaped, String charset) 498 throws URIException { 499 500 return encode(unescaped, URI.allowed_query, charset); 501 } 502 503 504 /*** 505 * Escape and encode a given string with allowed characters not to be 506 * escaped and the default protocol charset. 507 * 508 * @param unescaped a string 509 * @param allowed allowed characters not to be escaped 510 * @return the escaped string 511 * 512 * @throws URIException if the default protocol charset is not supported 513 * 514 * @see URI#getDefaultProtocolCharset 515 * @see Coder#encode 516 */ 517 public static String encode(String unescaped, BitSet allowed) 518 throws URIException { 519 520 return encode(unescaped, allowed, URI.getDefaultProtocolCharset()); 521 } 522 523 524 /*** 525 * Escape and encode a given string with allowed characters not to be 526 * escaped and a given charset. 527 * 528 * @param unescaped a string 529 * @param allowed allowed characters not to be escaped 530 * @param charset the charset 531 * @return the escaped string 532 * 533 * @throws URIException if the charset is not supported 534 * 535 * @see Coder#encode 536 */ 537 public static String encode(String unescaped, BitSet allowed, 538 String charset) throws URIException { 539 540 return new String(Coder.encode(unescaped, allowed, charset)); 541 } 542 543 544 /*** 545 * Unescape and decode a given string regarded as an escaped string with the 546 * default protocol charset. 547 * 548 * @param escaped a string 549 * @return the unescaped string 550 * 551 * @throws URIException if the default protocol charset is not supported 552 * 553 * @see URI#getDefaultProtocolCharset 554 * @see Coder#decode 555 */ 556 public static String decode(String escaped) throws URIException { 557 return Coder.decode(escaped.toCharArray(), URI.getDefaultProtocolCharset()); 558 } 559 560 561 /*** 562 * Unescape and decode a given string regarded as an escaped string. 563 * 564 * @param escaped a string 565 * @param charset the charset 566 * @return the unescaped string 567 * 568 * @throws URIException if the charset is not supported 569 * 570 * @see Coder#decode 571 */ 572 public static String decode(String escaped, String charset) 573 throws URIException { 574 575 return Coder.decode(escaped.toCharArray(), charset); 576 } 577 578 // --------------------------------- transforming a string between charsets 579 580 /*** 581 * Convert a target string to the specified character encoded string with 582 * the default protocol charset. 583 * 584 * @param target a target string 585 * @return the protocol character encoded string 586 * 587 * @throws URIException if the default protocol charset is not supported 588 * 589 * @see URI#getDefaultProtocolCharset 590 * 591 * @deprecated Do not use. To be removed 592 */ 593 public static String toProtocolCharset(String target) throws URIException { 594 return toUsingCharset( 595 target, 596 URI.getDefaultDocumentCharset(), 597 URI.getDefaultProtocolCharset()); 598 } 599 600 601 /*** 602 * Convert a target string to the specified character encoded string with 603 * a given protocol charset. 604 * 605 * @param target a target string 606 * @param charset the transformed protocol charset 607 * @return the protocol character encoded string 608 * 609 * @throws URIException if the charset is not supported 610 * 611 * @deprecated Do not use. To be removed 612 */ 613 public static String toProtocolCharset(String target, String charset) 614 throws URIException { 615 616 return toUsingCharset(target, URI.getDefaultDocumentCharset(), charset); 617 } 618 619 620 /*** 621 * Convert a target string to the specified character encoded string with 622 * the default document charset. 623 * 624 * @param target a target string 625 * @return the document character encoded string 626 * 627 * @throws URIException if the default protocol charset is not supported 628 * 629 * @see URI#getDefaultDocumentCharset 630 * 631 * @deprecated Do not use. To be removed 632 */ 633 public static String toDocumentCharset(String target) throws URIException { 634 return toUsingCharset(target, URI.getDefaultProtocolCharset(), 635 URI.getDefaultDocumentCharset()); 636 } 637 638 639 /*** 640 * Convert a target string to the specified character encoded string with 641 * a given document charset. 642 * 643 * @param target a target string 644 * @param charset the transformed document charset 645 * @return the document character encoded string 646 * 647 * @throws URIException if the charset is not supported 648 * 649 * @deprecated Do not use. To be removed 650 */ 651 public static String toDocumentCharset(String target, String charset) 652 throws URIException { 653 654 return toUsingCharset(target, URI.getDefaultProtocolCharset(), charset); 655 } 656 657 658 /*** 659 * Convert a target string from the <code>fromCharset</code> charset to 660 * the <code>toCharset</code> charset. 661 * <p> 662 * What if the document charset is ISO-8859-1 and the protocol charset is 663 * UTF-8, when it's read from the document part and is used in the protocol 664 * part, the use of the method will be <code>toUsingCharset(the string, 665 * "ISO-8859-1", "UTF-8")</code>. 666 * 667 * @param target a target string 668 * @param fromCharset the previous charset 669 * @param toCharset the changing charset 670 * @return the document character encoded string 671 * 672 * @throws URIException if either of the charsets are not supported 673 * 674 * @deprecated Do not use. To be removed 675 */ 676 677 public static String toUsingCharset(String target, String fromCharset, 678 String toCharset) throws URIException { 679 680 try { 681 return new String(target.getBytes(fromCharset), toCharset); 682 } catch (UnsupportedEncodingException error) { 683 throw new URIException(URIException.UNSUPPORTED_ENCODING, 684 error.getMessage()); 685 } 686 } 687 688 // ---------------------------------------------------------- Inner classes 689 690 /*** 691 * The basic and internal utility for URI escape and character encoding and 692 * decoding. 693 */ 694 protected static class Coder extends URI { 695 696 /*** 697 * Escape and encode a given string with allowed characters not to be 698 * escaped. 699 * 700 * @param unescapedComponent an unescaped component 701 * @param allowed allowed characters not to be escaped 702 * @param charset the charset to encode 703 * @return the escaped and encoded string 704 * 705 * @throws URIException if the charset is not supported 706 */ 707 public static char[] encode(String unescapedComponent, BitSet allowed, String charset) 708 throws URIException { 709 710 return URI.encode(unescapedComponent, allowed, charset); 711 } 712 713 714 /*** 715 * Unescape and decode a given string. 716 * 717 * @param escapedComponent an being-unescaped component 718 * @param charset the charset to decode 719 * @return the escaped and encoded string 720 * 721 * @throws URIException if the charset is not supported 722 */ 723 public static String decode(char[] escapedComponent, String charset) 724 throws URIException { 725 726 return URI.decode(escapedComponent, charset); 727 } 728 729 730 /*** 731 * Verify whether a given string is escaped or not 732 * 733 * @param original given characters 734 * @return true if the given character array is 7 bit ASCII-compatible. 735 */ 736 public static boolean verifyEscaped(char[] original) { 737 for (int i = 0; i < original.length; i++) { 738 int c = original[i]; 739 if (c > 128) { 740 return false; 741 } else if (c == '%') { 742 if (Character.digit(original[++i], 16) == -1 743 || Character.digit(original[++i], 16) == -1) { 744 return false; 745 } 746 } 747 } 748 return true; 749 } 750 751 752 /*** 753 * Replace from a given character to given character in an array order 754 * for a given string. 755 * 756 * @param original a given string 757 * @param from a replacing character array 758 * @param to a replaced character array 759 * @return the replaced string 760 */ 761 public static String replace(String original, char[] from, char[] to) { 762 for (int i = from.length; i > 0; --i) { 763 original = replace(original, from[i], to[i]); 764 } 765 return original.toString(); 766 } 767 768 769 /*** 770 * Replace from a given character to given character for a given string. 771 * 772 * @param original a given string 773 * @param from a replacing character array 774 * @param to a replaced character array 775 * @return the replaced string 776 */ 777 public static String replace(String original, char from, char to) { 778 StringBuffer result = new StringBuffer(original.length()); 779 int at, saved = 0; 780 do { 781 at = original.indexOf(from); 782 if (at >= 0) { 783 result.append(original.substring(0, at)); 784 result.append(to); 785 } else { 786 result.append(original.substring(saved)); 787 } 788 saved = at; 789 } while (at >= 0); 790 return result.toString(); 791 } 792 } 793 794 } 795

This page was automatically generated by Maven