1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 package org.jaxen.function;
48
49 import java.util.List;
50
51 import org.jaxen.Context;
52 import org.jaxen.Function;
53 import org.jaxen.FunctionCallException;
54 import org.jaxen.Navigator;
55 /***
56 * <p>
57 * <b>4.2</b>
58 * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code>
59 * </p>
60 *
61 * <blockquote src="http://www.w3.org/TR/xpath">
62 * <p>The <b>substring</b> function returns the
63 * substring of the first argument starting at the position specified in
64 * the second argument with length specified in the third argument. For
65 * example,
66 *
67 * <code>substring("12345",2,3)</code> returns <code>"234"</code>.
68 * If the third argument is not specified, it returns the substring
69 * starting at the position specified in the second argument and
70 * continuing to the end of the string. For example,
71 * <code>substring("12345",2)</code> returns <code>"2345"</code>.
72 * </p>
73 *
74 * <p>
75 * More precisely, each character in the string (see <a
76 * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a
77 * numeric position: the position of the first character is 1, the
78 * position of the second character is 2 and so on.
79 * </p>
80 *
81 * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in
82 * which the <code>String.substring</code> method treats the position
83 * of the first character as 0.</blockquote>
84 *
85 * <p>
86 * The returned substring contains those characters for which the
87 * position of the character is greater than or equal to the rounded
88 * value of the second argument and, if the third argument is specified,
89 * less than the sum of the rounded value of the second argument and the
90 * rounded value of the third argument; the comparisons and addition
91 * used for the above follow the standard IEEE 754 rules; rounding is
92 * done as if by a call to the <b><a href="#function-round">round</a></b>
93 * function. The following examples illustrate various unusual cases:
94 * </p>
95 *
96 * <ul>
97 *
98 * <li>
99 * <p>
100 * <code>substring("12345", 1.5, 2.6)</code> returns
101 * <code>"234"</code>
102 * </p>
103 * </li>
104 *
105 * <li>
106 * <p>
107 * <code>substring("12345", 0, 3)</code> returns <code>"12"</code>
108 *
109 * </p>
110 * </li>
111 *
112 * <li>
113 * <p>
114 * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code>
115 * </p>
116 * </li>
117 *
118 * <li>
119 * <p>.
120 * <code>substring("12345", 1, 0 div 0)</code> returns
121 *
122 * <code>""</code>
123 * </p>
124 * </li>
125 *
126 * <li>
127 * <p>
128 * <code>substring("12345", -42, 1 div 0)</code> returns
129 * <code>"12345"</code>
130 * </p>
131 * </li>
132 *
133 * <li>
134 * <p>
135 *
136 * <code>substring("12345", -1 div 0, 1 div 0)</code> returns
137 * <code>""</code> </blockquote>
138 *
139 * @author bob mcwhirter (bob @ werken.com)
140 *
141 * @see <a href="http://www.w3.org/TR/xpath#function-substring"
142 * target="_top">Section 4.2 of the XPath Specification</a>
143 */
144 public class SubstringFunction implements Function
145 {
146
147 /***
148 * Create a new <code>SubstringFunction</code> object.
149 */
150 public SubstringFunction() {}
151
152
153 /*** Returns a substring of an XPath string-value by character index.
154 *
155 * @param context the context at the point in the
156 * expression when the function is called
157 * @param args a list that contains two or three items
158 *
159 * @return a <code>String</code> containing the specifed character subsequence of
160 * the original string or the string-value of the context node
161 *
162 * @throws FunctionCallException if <code>args</code> has more than three
163 * or less than two items
164 */
165 public Object call(Context context,
166 List args) throws FunctionCallException
167 {
168 final int argc = args.size();
169 if (argc < 2 || argc > 3){
170 throw new FunctionCallException( "substring() requires two or three arguments." );
171 }
172
173 final Navigator nav = context.getNavigator();
174
175 final String str = StringFunction.evaluate(args.get(0), nav );
176
177 if (str == null) {
178 return "";
179 }
180
181 final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue();
182
183 if (stringLength == 0) {
184 return "";
185 }
186
187 Double d1 = NumberFunction.evaluate(args.get(1), nav);
188
189 if (d1.isNaN()){
190 return "";
191 }
192
193 int start = RoundFunction.evaluate(d1, nav).intValue() - 1;
194
195 int substringLength = stringLength;
196 if (argc == 3){
197 Double d2 = NumberFunction.evaluate(args.get(2), nav);
198
199 if (!d2.isNaN()){
200 substringLength = RoundFunction.evaluate(d2, nav ).intValue();
201 }
202 else {
203 substringLength = 0;
204 }
205 }
206
207 if (substringLength < 0) return "";
208
209 int end = start + substringLength;
210 if (argc == 2) end = stringLength;
211
212
213 if ( start < 0){
214 start = 0;
215 }
216 else if (start > stringLength){
217 return "";
218 }
219
220 if (end > stringLength){
221 end = stringLength;
222 }
223 else if (end < start) return "";
224
225 if (stringLength == str.length()) {
226
227 return str.substring(start, end);
228 }
229 else {
230 return unicodeSubstring(str, start, end);
231 }
232
233 }
234
235 private static String unicodeSubstring(String s, int start, int end) {
236
237 StringBuffer result = new StringBuffer(s.length());
238 for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) {
239 char c = s.charAt(jChar);
240 if (uChar >= start) result.append(c);
241 if (c >= 0xD800) {
242
243
244 jChar++;
245 if (uChar >= start) result.append(s.charAt(jChar));
246 }
247 }
248 return result.toString();
249 }
250 }