Coverage Report - org.jaxen.function.SubstringFunction
 
Classes in this File Line Coverage Branch Coverage Complexity
SubstringFunction
98%
44/45
100%
17/17
9.333
 
 1  
 /*
 2  
  * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/SubstringFunction.java,v 1.16 2006/02/05 21:47:41 elharo Exp $
 3  
  * $Revision: 1.16 $
 4  
  * $Date: 2006/02/05 21:47:41 $
 5  
  *
 6  
  * ====================================================================
 7  
  *
 8  
  * Copyright 2000-2002 bob mcwhirter & James Strachan.
 9  
  * All rights reserved.
 10  
  *
 11  
  *
 12  
  * Redistribution and use in source and binary forms, with or without
 13  
  * modification, are permitted provided that the following conditions are
 14  
  * met:
 15  
  * 
 16  
  *   * Redistributions of source code must retain the above copyright
 17  
  *     notice, this list of conditions and the following disclaimer.
 18  
  * 
 19  
  *   * Redistributions in binary form must reproduce the above copyright
 20  
  *     notice, this list of conditions and the following disclaimer in the
 21  
  *     documentation and/or other materials provided with the distribution.
 22  
  * 
 23  
  *   * Neither the name of the Jaxen Project nor the names of its
 24  
  *     contributors may be used to endorse or promote products derived 
 25  
  *     from this software without specific prior written permission.
 26  
  * 
 27  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 28  
  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 29  
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 30  
  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 31  
  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 32  
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 33  
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 34  
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 35  
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 36  
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 37  
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38  
  *
 39  
  * ====================================================================
 40  
  * This software consists of voluntary contributions made by many
 41  
  * individuals on behalf of the Jaxen Project and was originally
 42  
  * created by bob mcwhirter <bob@werken.com> and
 43  
  * James Strachan <jstrachan@apache.org>.  For more information on the
 44  
  * Jaxen Project, please see <http://www.jaxen.org/>.
 45  
  *
 46  
  */
 47  
 package org.jaxen.function;
 48  
 
 49  
 import java.util.List;
 50  
 
 51  
 import org.jaxen.Context;
 52  
 import org.jaxen.Function;
 53  
 import org.jaxen.FunctionCallException;
 54  
 import org.jaxen.Navigator;
 55  
 /**
 56  
  * <p>
 57  
  * <b>4.2</b>
 58  
  * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code>
 59  
  * </p>
 60  
  * 
 61  
  * <blockquote src="http://www.w3.org/TR/xpath"> 
 62  
  * <p>The <b>substring</b> function returns the
 63  
  * substring of the first argument starting at the position specified in
 64  
  * the second argument with length specified in the third argument. For
 65  
  * example,
 66  
  * 
 67  
  * <code>substring("12345",2,3)</code> returns <code>"234"</code>.
 68  
  * If the third argument is not specified, it returns the substring
 69  
  * starting at the position specified in the second argument and
 70  
  * continuing to the end of the string. For example,
 71  
  * <code>substring("12345",2)</code> returns <code>"2345"</code>.
 72  
  * </p>
 73  
  * 
 74  
  * <p>
 75  
  * More precisely, each character in the string (see <a
 76  
  * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a
 77  
  * numeric position: the position of the first character is 1, the
 78  
  * position of the second character is 2 and so on.
 79  
  * </p>
 80  
  * 
 81  
  * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in
 82  
  * which the <code>String.substring</code> method treats the position
 83  
  * of the first character as 0.</blockquote>
 84  
  * 
 85  
  * <p>
 86  
  * The returned substring contains those characters for which the
 87  
  * position of the character is greater than or equal to the rounded
 88  
  * value of the second argument and, if the third argument is specified,
 89  
  * less than the sum of the rounded value of the second argument and the
 90  
  * rounded value of the third argument; the comparisons and addition
 91  
  * used for the above follow the standard IEEE 754 rules; rounding is
 92  
  * done as if by a call to the <b><a href="#function-round">round</a></b>
 93  
  * function. The following examples illustrate various unusual cases:
 94  
  * </p>
 95  
  * 
 96  
  * <ul>
 97  
  * 
 98  
  * <li>
 99  
  * <p>
 100  
  * <code>substring("12345", 1.5, 2.6)</code> returns
 101  
  * <code>"234"</code>
 102  
  * </p>
 103  
  * </li>
 104  
  * 
 105  
  * <li>
 106  
  * <p>
 107  
  * <code>substring("12345", 0, 3)</code> returns <code>"12"</code>
 108  
  * 
 109  
  * </p>
 110  
  * </li>
 111  
  * 
 112  
  * <li>
 113  
  * <p>
 114  
  * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code>
 115  
  * </p>
 116  
  * </li>
 117  
  * 
 118  
  * <li>
 119  
  * <p>.
 120  
  * <code>substring("12345", 1, 0 div 0)</code> returns
 121  
  * 
 122  
  * <code>""</code>
 123  
  * </p>
 124  
  * </li>
 125  
  * 
 126  
  * <li>
 127  
  * <p>
 128  
  * <code>substring("12345", -42, 1 div 0)</code> returns
 129  
  * <code>"12345"</code>
 130  
  * </p>
 131  
  * </li>
 132  
  * 
 133  
  * <li>
 134  
  * <p>
 135  
  * 
 136  
  * <code>substring("12345", -1 div 0, 1 div 0)</code> returns
 137  
  * <code>""</code> </blockquote>
 138  
  * 
 139  
  * @author bob mcwhirter (bob @ werken.com)
 140  
  * 
 141  
  * @see <a href="http://www.w3.org/TR/xpath#function-substring"
 142  
  *      target="_top">Section 4.2 of the XPath Specification</a>
 143  
  */
 144  
 public class SubstringFunction implements Function
 145  
 {
 146  
 
 147  
     /**
 148  
      * Create a new <code>SubstringFunction</code> object.
 149  
      */
 150  106
     public SubstringFunction() {}
 151  
 
 152  
     
 153  
     /** Returns a substring of an XPath string-value by character index.
 154  
      *
 155  
      * @param context the context at the point in the
 156  
      *         expression when the function is called
 157  
      * @param args a list that contains two or three items
 158  
      * 
 159  
      * @return a <code>String</code> containing the specifed character subsequence of 
 160  
      *     the original string or the string-value of the context node
 161  
      * 
 162  
      * @throws FunctionCallException if <code>args</code> has more than three
 163  
      *     or less than two items
 164  
      */
 165  
     public Object call(Context context,
 166  
                        List args) throws FunctionCallException
 167  
     {
 168  104
         final int argc = args.size();
 169  104
         if (argc < 2 || argc > 3){
 170  4
             throw new FunctionCallException( "substring() requires two or three arguments." );
 171  
         }
 172  
 
 173  100
         final Navigator nav = context.getNavigator();
 174  
 
 175  100
         final String str = StringFunction.evaluate(args.get(0), nav );
 176  
         // The spec doesn't really address this case
 177  100
         if (str == null) {
 178  0
             return "";
 179  
         }
 180  
 
 181  100
         final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue();
 182  
 
 183  100
         if (stringLength == 0) {
 184  2
             return "";
 185  
         }
 186  
 
 187  98
         Double d1 = NumberFunction.evaluate(args.get(1), nav);
 188  
 
 189  98
         if (d1.isNaN()){
 190  10
             return "";
 191  
         }
 192  
         // Round the value and subtract 1 as Java strings are zero based
 193  88
         int start = RoundFunction.evaluate(d1, nav).intValue() - 1;
 194  
 
 195  88
         int substringLength = stringLength;
 196  88
         if (argc == 3){
 197  74
             Double d2 = NumberFunction.evaluate(args.get(2), nav);
 198  
 
 199  74
             if (!d2.isNaN()){
 200  64
                 substringLength = RoundFunction.evaluate(d2, nav ).intValue();
 201  64
             }
 202  
             else {
 203  10
                 substringLength = 0;
 204  
             }
 205  
         }
 206  
         
 207  88
         if (substringLength < 0) return "";
 208  
 
 209  84
         int end = start + substringLength;
 210  84
         if (argc == 2) end = stringLength;
 211  
             
 212  
         // negative start is treated as 0
 213  84
         if ( start < 0){
 214  24
             start = 0;
 215  24
         }
 216  60
         else if (start > stringLength){
 217  10
             return "";
 218  
         }
 219  
 
 220  74
         if (end > stringLength){
 221  22
             end = stringLength;
 222  22
         }
 223  52
         else if (end < start) return "";
 224  
         
 225  72
         if (stringLength == str.length()) {
 226  
             // easy case; no surrogate pairs
 227  66
             return str.substring(start, end);
 228  
         }
 229  
         else {
 230  6
             return unicodeSubstring(str, start, end);
 231  
         }
 232  
         
 233  
     }
 234  
 
 235  
     private static String unicodeSubstring(String s, int start, int end) {
 236  
 
 237  6
         StringBuffer result = new StringBuffer(s.length());
 238  22
         for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) {
 239  16
             char c = s.charAt(jChar);
 240  16
             if (uChar >= start) result.append(c);
 241  16
             if (c >= 0xD800) { // get the low surrogate
 242  
                 // ???? we could check here that this is indeed a low surroagte
 243  
                 // we could also catch StringIndexOutOfBoundsException
 244  6
                 jChar++;
 245  6
                 if (uChar >= start) result.append(s.charAt(jChar));
 246  
             }
 247  
         }
 248  6
         return result.toString();
 249  
     }
 250  
 }