View Javadoc

1   /*
2    * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $
3    * $Revision: 1.10 $
4    * $Date: 2006/02/05 21:47:41 $
5    *
6    * ====================================================================
7    *
8    * Copyright 2000-2002 bob mcwhirter & James Strachan.
9    * All rights reserved.
10   *
11   * Redistribution and use in source and binary forms, with or without
12   * modification, are permitted provided that the following conditions are
13   * met:
14   * 
15   *   * Redistributions of source code must retain the above copyright
16   *     notice, this list of conditions and the following disclaimer.
17   * 
18   *   * Redistributions in binary form must reproduce the above copyright
19   *     notice, this list of conditions and the following disclaimer in the
20   *     documentation and/or other materials provided with the distribution.
21   * 
22   *   * Neither the name of the Jaxen Project nor the names of its
23   *     contributors may be used to endorse or promote products derived 
24   *     from this software without specific prior written permission.
25   * 
26   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
27   * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28   * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
29   * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
30   * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33   * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34   * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35   * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37   *
38   * ====================================================================
39   * This software consists of voluntary contributions made by many 
40   * individuals on behalf of the Jaxen Project and was originally 
41   * created by bob mcwhirter <bob@werken.com> and 
42   * James Strachan <jstrachan@apache.org>.  For more information on the 
43   * Jaxen Project, please see <http://www.jaxen.org/>.
44   * 
45   * $Id: TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $
46   */
47  
48  
49  package org.jaxen.function;
50  
51  import java.util.HashMap;
52  import java.util.List;
53  import java.util.Map;
54  
55  import org.jaxen.Context;
56  import org.jaxen.Function;
57  import org.jaxen.FunctionCallException;
58  import org.jaxen.Navigator;
59  
60  /***
61   * <p>
62   * <b>4.2</b>
63   * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code>
64   * </p>
65   * 
66   * <blockquote src="http://www.w3.org/TR/xpath#function-translate">
67   * <p>
68   * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function
69   * returns the first argument string with occurrences of characters in
70   * the second argument string replaced by the character at the
71   * corresponding position in the third argument string. For example,
72   * <code>translate("bar","abc","ABC")</code> returns the string
73   * <code>BAr</code>. If there is a character in the second argument
74   * string with no character at a corresponding position in the third
75   * argument string (because the second argument string is longer than
76   * the third argument string), then occurrences of that character in the
77   * first argument string are removed. For example,
78   * <code>translate("--aaa--","abc-","ABC")</code> returns
79   * <code>"AAA"</code>. If a character occurs more than once in the
80   * second argument string, then the first occurrence determines the
81   * replacement character. If the third argument string is longer than
82   * the second argument string, then excess characters are ignored.
83   * </p>
84   * 
85   * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a
86   * sufficient solution for case conversion in all languages. A future
87   * version of XPath may provide additional functions for case
88   * conversion.</blockquote>
89   * 
90   * </blockquote>
91   * 
92   * @author Jan Dvorak ( jan.dvorak @ mathan.cz )
93   * 
94   * @see <a href="http://www.w3.org/TR/xpath#function-translate"
95   *      target="_top">Section 4.2 of the XPath Specification</a>
96   */
97  public class TranslateFunction implements Function
98  {
99  
100      /* The translation is done thru a HashMap. Performance tip (for anyone
101       * who needs to improve the performance of this particular function):
102       * Cache the HashMaps, once they are constructed. */
103     
104     /***
105      * Create a new <code>TranslateFunction</code> object.
106      */
107     public TranslateFunction() {}
108     
109     
110     /*** Returns a copy of the first argument in which
111      * characters found in the second argument are replaced by
112      * corresponding characters from the third argument.
113      *
114      * @param context the context at the point in the
115      *         expression when the function is called
116      * @param args a list that contains exactly three items
117      * 
118      * @return a <code>String</code> built from <code>args.get(0)</code> 
119      *     in which occurrences of characters in <code>args.get(1)</code> 
120      *     are replaced by the corresponding characters in <code>args.get(2)</code> 
121      * 
122      * @throws FunctionCallException if <code>args</code> does not have exactly three items
123      */
124     public Object call(Context context,
125                        List args) throws FunctionCallException
126     {
127         if (args.size() == 3) {
128             return evaluate( args.get(0),
129                              args.get(1),
130                              args.get(2),
131                              context.getNavigator() );
132         }
133 
134         throw new FunctionCallException( "translate() requires three arguments." );
135     }
136 
137     /*** 
138      * Returns a copy of <code>strArg</code> in which
139      * characters found in <code>fromArg</code> are replaced by
140      * corresponding characters from <code>toArg</code>.
141      * If necessary each argument is first converted to it string-value
142      * as if by the XPath <code>string()</code> function.
143      * 
144      * @param strArg the base string
145      * @param fromArg the characters to be replaced
146      * @param toArg the characters they will be replaced by
147      * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments.
148      * 
149      * @return a copy of <code>strArg</code> in which
150      *  characters found in <code>fromArg</code> are replaced by
151      *  corresponding characters from <code>toArg</code>
152      *  
153      * @throws FunctionCallException if one of the arguments is a malformed Unicode string;
154      *     that is, if surrogate characters don't line up properly
155      * 
156      */
157     public static String evaluate(Object strArg,
158                                   Object fromArg,
159                                   Object toArg,
160                                   Navigator nav) throws FunctionCallException
161     {
162         String inStr = StringFunction.evaluate( strArg, nav );
163         String fromStr = StringFunction.evaluate( fromArg, nav );
164         String toStr = StringFunction.evaluate( toArg, nav );
165     
166         // Initialize the mapping in a HashMap
167         Map characterMap = new HashMap();
168         String[] fromCharacters = toUnicodeCharacters(fromStr);
169         String[] toCharacters = toUnicodeCharacters(toStr);
170         int fromLen = fromCharacters.length;
171         int toLen = toCharacters.length;
172         for ( int i = 0; i < fromLen; i++ ) {
173             String cFrom = fromCharacters[i];
174             if ( characterMap.containsKey( cFrom ) ) {
175                 // We've seen the character before, ignore
176                 continue;
177             }
178             
179             if ( i < toLen ) {
180                 // Will change
181                 characterMap.put( cFrom, toCharacters[i] );
182             } 
183             else {
184                 // Will delete
185                 characterMap.put( cFrom, null );
186             }
187         }
188 
189         // Process the input string thru the map
190         StringBuffer outStr = new StringBuffer( inStr.length() );
191         String[] inCharacters = toUnicodeCharacters(inStr);
192         int inLen = inCharacters.length;
193         for ( int i = 0; i < inLen; i++ ) {
194             String cIn = inCharacters[i];
195             if ( characterMap.containsKey( cIn ) ) {
196                 String cTo = (String) characterMap.get( cIn );
197                 if ( cTo != null ) {
198                     outStr.append( cTo );
199                 }
200             } 
201             else {
202                 outStr.append( cIn );
203             }
204         }
205     
206         return outStr.toString();
207     }
208 
209     private static String[] toUnicodeCharacters(String s) throws FunctionCallException {
210 
211         String[] result = new String[s.length()];
212         int stringLength = 0;
213         for (int i = 0; i < s.length(); i++) {
214             char c1 = s.charAt(i);
215             if (isHighSurrogate(c1)) {
216                 try {
217                     char c2 = s.charAt(i+1);
218                     if (isLowSurrogate(c2)) {
219                         result[stringLength] = (c1 + "" + c2).intern();
220                         i++;
221                     }
222                     else {
223                         throw new FunctionCallException("Mismatched surrogate pair in translate function");
224                     }
225                 }
226                 catch (StringIndexOutOfBoundsException ex) {
227                     throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function");
228                 }
229             }
230             else {
231                 result[stringLength]=String.valueOf(c1).intern();
232             }
233             stringLength++;
234         }
235         
236         if (stringLength == result.length) return result;
237         
238         // trim array
239         String[] trimmed = new String[stringLength];
240         System.arraycopy(result, 0, trimmed, 0, stringLength);
241         return trimmed;
242         
243     }
244 
245     private static boolean isHighSurrogate(char c) {
246         return c >= 0xD800 && c <= 0xDBFF;
247     }
248      
249     private static boolean isLowSurrogate(char c) {
250         return c >= 0xDC00 && c <= 0xDFFF;
251     }
252      
253 }