1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 package org.jaxen.function;
50
51 import java.util.HashMap;
52 import java.util.List;
53 import java.util.Map;
54
55 import org.jaxen.Context;
56 import org.jaxen.Function;
57 import org.jaxen.FunctionCallException;
58 import org.jaxen.Navigator;
59
60 /***
61 * <p>
62 * <b>4.2</b>
63 * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code>
64 * </p>
65 *
66 * <blockquote src="http://www.w3.org/TR/xpath#function-translate">
67 * <p>
68 * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function
69 * returns the first argument string with occurrences of characters in
70 * the second argument string replaced by the character at the
71 * corresponding position in the third argument string. For example,
72 * <code>translate("bar","abc","ABC")</code> returns the string
73 * <code>BAr</code>. If there is a character in the second argument
74 * string with no character at a corresponding position in the third
75 * argument string (because the second argument string is longer than
76 * the third argument string), then occurrences of that character in the
77 * first argument string are removed. For example,
78 * <code>translate("--aaa--","abc-","ABC")</code> returns
79 * <code>"AAA"</code>. If a character occurs more than once in the
80 * second argument string, then the first occurrence determines the
81 * replacement character. If the third argument string is longer than
82 * the second argument string, then excess characters are ignored.
83 * </p>
84 *
85 * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a
86 * sufficient solution for case conversion in all languages. A future
87 * version of XPath may provide additional functions for case
88 * conversion.</blockquote>
89 *
90 * </blockquote>
91 *
92 * @author Jan Dvorak ( jan.dvorak @ mathan.cz )
93 *
94 * @see <a href="http://www.w3.org/TR/xpath#function-translate"
95 * target="_top">Section 4.2 of the XPath Specification</a>
96 */
97 public class TranslateFunction implements Function
98 {
99
100
101
102
103
104 /***
105 * Create a new <code>TranslateFunction</code> object.
106 */
107 public TranslateFunction() {}
108
109
110 /*** Returns a copy of the first argument in which
111 * characters found in the second argument are replaced by
112 * corresponding characters from the third argument.
113 *
114 * @param context the context at the point in the
115 * expression when the function is called
116 * @param args a list that contains exactly three items
117 *
118 * @return a <code>String</code> built from <code>args.get(0)</code>
119 * in which occurrences of characters in <code>args.get(1)</code>
120 * are replaced by the corresponding characters in <code>args.get(2)</code>
121 *
122 * @throws FunctionCallException if <code>args</code> does not have exactly three items
123 */
124 public Object call(Context context,
125 List args) throws FunctionCallException
126 {
127 if (args.size() == 3) {
128 return evaluate( args.get(0),
129 args.get(1),
130 args.get(2),
131 context.getNavigator() );
132 }
133
134 throw new FunctionCallException( "translate() requires three arguments." );
135 }
136
137 /***
138 * Returns a copy of <code>strArg</code> in which
139 * characters found in <code>fromArg</code> are replaced by
140 * corresponding characters from <code>toArg</code>.
141 * If necessary each argument is first converted to it string-value
142 * as if by the XPath <code>string()</code> function.
143 *
144 * @param strArg the base string
145 * @param fromArg the characters to be replaced
146 * @param toArg the characters they will be replaced by
147 * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments.
148 *
149 * @return a copy of <code>strArg</code> in which
150 * characters found in <code>fromArg</code> are replaced by
151 * corresponding characters from <code>toArg</code>
152 *
153 * @throws FunctionCallException if one of the arguments is a malformed Unicode string;
154 * that is, if surrogate characters don't line up properly
155 *
156 */
157 public static String evaluate(Object strArg,
158 Object fromArg,
159 Object toArg,
160 Navigator nav) throws FunctionCallException
161 {
162 String inStr = StringFunction.evaluate( strArg, nav );
163 String fromStr = StringFunction.evaluate( fromArg, nav );
164 String toStr = StringFunction.evaluate( toArg, nav );
165
166
167 Map characterMap = new HashMap();
168 String[] fromCharacters = toUnicodeCharacters(fromStr);
169 String[] toCharacters = toUnicodeCharacters(toStr);
170 int fromLen = fromCharacters.length;
171 int toLen = toCharacters.length;
172 for ( int i = 0; i < fromLen; i++ ) {
173 String cFrom = fromCharacters[i];
174 if ( characterMap.containsKey( cFrom ) ) {
175
176 continue;
177 }
178
179 if ( i < toLen ) {
180
181 characterMap.put( cFrom, toCharacters[i] );
182 }
183 else {
184
185 characterMap.put( cFrom, null );
186 }
187 }
188
189
190 StringBuffer outStr = new StringBuffer( inStr.length() );
191 String[] inCharacters = toUnicodeCharacters(inStr);
192 int inLen = inCharacters.length;
193 for ( int i = 0; i < inLen; i++ ) {
194 String cIn = inCharacters[i];
195 if ( characterMap.containsKey( cIn ) ) {
196 String cTo = (String) characterMap.get( cIn );
197 if ( cTo != null ) {
198 outStr.append( cTo );
199 }
200 }
201 else {
202 outStr.append( cIn );
203 }
204 }
205
206 return outStr.toString();
207 }
208
209 private static String[] toUnicodeCharacters(String s) throws FunctionCallException {
210
211 String[] result = new String[s.length()];
212 int stringLength = 0;
213 for (int i = 0; i < s.length(); i++) {
214 char c1 = s.charAt(i);
215 if (isHighSurrogate(c1)) {
216 try {
217 char c2 = s.charAt(i+1);
218 if (isLowSurrogate(c2)) {
219 result[stringLength] = (c1 + "" + c2).intern();
220 i++;
221 }
222 else {
223 throw new FunctionCallException("Mismatched surrogate pair in translate function");
224 }
225 }
226 catch (StringIndexOutOfBoundsException ex) {
227 throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function");
228 }
229 }
230 else {
231 result[stringLength]=String.valueOf(c1).intern();
232 }
233 stringLength++;
234 }
235
236 if (stringLength == result.length) return result;
237
238
239 String[] trimmed = new String[stringLength];
240 System.arraycopy(result, 0, trimmed, 0, stringLength);
241 return trimmed;
242
243 }
244
245 private static boolean isHighSurrogate(char c) {
246 return c >= 0xD800 && c <= 0xDBFF;
247 }
248
249 private static boolean isLowSurrogate(char c) {
250 return c >= 0xDC00 && c <= 0xDFFF;
251 }
252
253 }