View Javadoc

1   /*
2    * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/StringLengthFunction.java,v 1.12 2006/02/05 21:47:41 elharo Exp $
3    * $Revision: 1.12 $
4    * $Date: 2006/02/05 21:47:41 $
5    *
6    * ====================================================================
7    *
8    * Copyright 2000-2002 bob mcwhirter & James Strachan.
9    * All rights reserved.
10   *
11   * Redistribution and use in source and binary forms, with or without
12   * modification, are permitted provided that the following conditions are
13   * met:
14   * 
15   *   * Redistributions of source code must retain the above copyright
16   *     notice, this list of conditions and the following disclaimer.
17   * 
18   *   * Redistributions in binary form must reproduce the above copyright
19   *     notice, this list of conditions and the following disclaimer in the
20   *     documentation and/or other materials provided with the distribution.
21   * 
22   *   * Neither the name of the Jaxen Project nor the names of its
23   *     contributors may be used to endorse or promote products derived 
24   *     from this software without specific prior written permission.
25   * 
26   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
27   * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28   * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
29   * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
30   * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33   * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34   * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35   * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37   *
38   * ====================================================================
39   * This software consists of voluntary contributions made by many 
40   * individuals on behalf of the Jaxen Project and was originally 
41   * created by bob mcwhirter <bob@werken.com> and 
42   * James Strachan <jstrachan@apache.org>.  For more information on the 
43   * Jaxen Project, please see <http://www.jaxen.org/>.
44   * 
45   * $Id: StringLengthFunction.java,v 1.12 2006/02/05 21:47:41 elharo Exp $
46   */
47  
48  
49  package org.jaxen.function;
50  
51  import java.util.List;
52  
53  import org.jaxen.Context;
54  import org.jaxen.Function;
55  import org.jaxen.FunctionCallException;
56  import org.jaxen.Navigator;
57  
58  /***
59   * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p> 
60   * 
61   * <p>
62   * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong>
63   * in its argument. This is <strong>not</strong> necessarily 
64   * the same as the number <strong>Java chars</strong>
65   * in the corresponding Java string. In particular, if the Java <code>String</code>
66   * contains surrogate pairs each such pair will be counted as only one character
67   * by this function. If the argument is omitted, 
68   * it returns the length of the string-value of the context node.
69   * </p>
70   * 
71   * @author bob mcwhirter (bob @ werken.com)
72   * @see <a href="http://www.w3.org/TR/xpath#function-string-length" target="_top">Section 
73   *      4.2 of the XPath Specification</a>
74   */
75  public class StringLengthFunction implements Function
76  {
77  
78      
79      /***
80       * Create a new <code>StringLengthFunction</code> object.
81       */
82      public StringLengthFunction() {}
83      
84      
85      /***
86       * <p>
87       * Returns the number of Unicode characters in the string-value of the argument.
88       * </p>
89       * 
90       * @param context the context at the point in the
91       *         expression when the function is called
92       * @param args a list containing the item whose string-value is to be counted.
93       *     If empty, the length of the context node's string-value is returned.
94       * 
95       * @return a <code>Double</code> giving the number of Unicode characters
96       * 
97       * @throws FunctionCallException if args has more than one item
98       */
99      public Object call(Context context,
100                        List args) throws FunctionCallException
101     {
102         if (args.size() == 0)
103         {
104             return evaluate( context.getNodeSet(),
105                              context.getNavigator() );
106         } 
107         else if (args.size() == 1)
108         {
109             return evaluate( args.get(0),
110                              context.getNavigator() );
111         }
112 
113         throw new FunctionCallException( "string-length() requires one argument." );
114     }
115 
116     /***
117      * <p>
118      * Returns the number of Unicode characters in the string-value of 
119      * an object.
120      * </p>
121      * 
122      * @param obj the object whose string-value is counted
123      * @param nav used to calculate the string-values of the first two arguments
124      * 
125      * @return a <code>Double</code> giving the number of Unicode characters
126      * 
127      * @throws FunctionCallException if the string contains mismatched surrogates
128      */
129     public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException
130     {
131         String str = StringFunction.evaluate( obj, nav );
132         // String.length() counts UTF-16 code points; not Unicode characters
133         char[] data = str.toCharArray();
134         int length = 0;
135         for (int i = 0; i < data.length; i++) {
136             char c = data[i];
137             length++;
138             // if this is a high surrogate; assume the next character is
139             // is a low surrogate and skip it
140             if (c >= 0xD800) {
141                 try {
142                     char low = data[i+1];
143                     if (low < 0xDC00 || low > 0xDFFF) {
144                         throw new FunctionCallException("Bad surrogate pair in string " + str);
145                     }
146                     i++; // increment past low surrogate
147                 }
148                 catch (ArrayIndexOutOfBoundsException ex) {
149                     throw new FunctionCallException("Bad surrogate pair in string " + str);
150                 }
151             }
152         }
153         return new Double(length);
154     }
155     
156 }