Coverage Report - org.jaxen.function.TranslateFunction
 
Classes in this File Line Coverage Branch Coverage Complexity
TranslateFunction
96%
52/54
100%
13/13
3.833
 
 1  
 /*
 2  
  * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $
 3  
  * $Revision: 1.10 $
 4  
  * $Date: 2006/02/05 21:47:41 $
 5  
  *
 6  
  * ====================================================================
 7  
  *
 8  
  * Copyright 2000-2002 bob mcwhirter & James Strachan.
 9  
  * All rights reserved.
 10  
  *
 11  
  * Redistribution and use in source and binary forms, with or without
 12  
  * modification, are permitted provided that the following conditions are
 13  
  * met:
 14  
  * 
 15  
  *   * Redistributions of source code must retain the above copyright
 16  
  *     notice, this list of conditions and the following disclaimer.
 17  
  * 
 18  
  *   * Redistributions in binary form must reproduce the above copyright
 19  
  *     notice, this list of conditions and the following disclaimer in the
 20  
  *     documentation and/or other materials provided with the distribution.
 21  
  * 
 22  
  *   * Neither the name of the Jaxen Project nor the names of its
 23  
  *     contributors may be used to endorse or promote products derived 
 24  
  *     from this software without specific prior written permission.
 25  
  * 
 26  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 27  
  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 28  
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 29  
  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 30  
  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 31  
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 32  
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 33  
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 34  
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 35  
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 36  
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 37  
  *
 38  
  * ====================================================================
 39  
  * This software consists of voluntary contributions made by many 
 40  
  * individuals on behalf of the Jaxen Project and was originally 
 41  
  * created by bob mcwhirter <bob@werken.com> and 
 42  
  * James Strachan <jstrachan@apache.org>.  For more information on the 
 43  
  * Jaxen Project, please see <http://www.jaxen.org/>.
 44  
  * 
 45  
  * $Id: TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $
 46  
  */
 47  
 
 48  
 
 49  
 package org.jaxen.function;
 50  
 
 51  
 import java.util.HashMap;
 52  
 import java.util.List;
 53  
 import java.util.Map;
 54  
 
 55  
 import org.jaxen.Context;
 56  
 import org.jaxen.Function;
 57  
 import org.jaxen.FunctionCallException;
 58  
 import org.jaxen.Navigator;
 59  
 
 60  
 /**
 61  
  * <p>
 62  
  * <b>4.2</b>
 63  
  * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code>
 64  
  * </p>
 65  
  * 
 66  
  * <blockquote src="http://www.w3.org/TR/xpath#function-translate">
 67  
  * <p>
 68  
  * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function
 69  
  * returns the first argument string with occurrences of characters in
 70  
  * the second argument string replaced by the character at the
 71  
  * corresponding position in the third argument string. For example,
 72  
  * <code>translate("bar","abc","ABC")</code> returns the string
 73  
  * <code>BAr</code>. If there is a character in the second argument
 74  
  * string with no character at a corresponding position in the third
 75  
  * argument string (because the second argument string is longer than
 76  
  * the third argument string), then occurrences of that character in the
 77  
  * first argument string are removed. For example,
 78  
  * <code>translate("--aaa--","abc-","ABC")</code> returns
 79  
  * <code>"AAA"</code>. If a character occurs more than once in the
 80  
  * second argument string, then the first occurrence determines the
 81  
  * replacement character. If the third argument string is longer than
 82  
  * the second argument string, then excess characters are ignored.
 83  
  * </p>
 84  
  * 
 85  
  * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a
 86  
  * sufficient solution for case conversion in all languages. A future
 87  
  * version of XPath may provide additional functions for case
 88  
  * conversion.</blockquote>
 89  
  * 
 90  
  * </blockquote>
 91  
  * 
 92  
  * @author Jan Dvorak ( jan.dvorak @ mathan.cz )
 93  
  * 
 94  
  * @see <a href="http://www.w3.org/TR/xpath#function-translate"
 95  
  *      target="_top">Section 4.2 of the XPath Specification</a>
 96  
  */
 97  
 public class TranslateFunction implements Function
 98  
 {
 99  
 
 100  
      /* The translation is done thru a HashMap. Performance tip (for anyone
 101  
       * who needs to improve the performance of this particular function):
 102  
       * Cache the HashMaps, once they are constructed. */
 103  
     
 104  
     /**
 105  
      * Create a new <code>TranslateFunction</code> object.
 106  
      */
 107  106
     public TranslateFunction() {}
 108  
     
 109  
     
 110  
     /** Returns a copy of the first argument in which
 111  
      * characters found in the second argument are replaced by
 112  
      * corresponding characters from the third argument.
 113  
      *
 114  
      * @param context the context at the point in the
 115  
      *         expression when the function is called
 116  
      * @param args a list that contains exactly three items
 117  
      * 
 118  
      * @return a <code>String</code> built from <code>args.get(0)</code> 
 119  
      *     in which occurrences of characters in <code>args.get(1)</code> 
 120  
      *     are replaced by the corresponding characters in <code>args.get(2)</code> 
 121  
      * 
 122  
      * @throws FunctionCallException if <code>args</code> does not have exactly three items
 123  
      */
 124  
     public Object call(Context context,
 125  
                        List args) throws FunctionCallException
 126  
     {
 127  102
         if (args.size() == 3) {
 128  100
             return evaluate( args.get(0),
 129  
                              args.get(1),
 130  
                              args.get(2),
 131  
                              context.getNavigator() );
 132  
         }
 133  
 
 134  2
         throw new FunctionCallException( "translate() requires three arguments." );
 135  
     }
 136  
 
 137  
     /** 
 138  
      * Returns a copy of <code>strArg</code> in which
 139  
      * characters found in <code>fromArg</code> are replaced by
 140  
      * corresponding characters from <code>toArg</code>.
 141  
      * If necessary each argument is first converted to it string-value
 142  
      * as if by the XPath <code>string()</code> function.
 143  
      * 
 144  
      * @param strArg the base string
 145  
      * @param fromArg the characters to be replaced
 146  
      * @param toArg the characters they will be replaced by
 147  
      * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments.
 148  
      * 
 149  
      * @return a copy of <code>strArg</code> in which
 150  
      *  characters found in <code>fromArg</code> are replaced by
 151  
      *  corresponding characters from <code>toArg</code>
 152  
      *  
 153  
      * @throws FunctionCallException if one of the arguments is a malformed Unicode string;
 154  
      *     that is, if surrogate characters don't line up properly
 155  
      * 
 156  
      */
 157  
     public static String evaluate(Object strArg,
 158  
                                   Object fromArg,
 159  
                                   Object toArg,
 160  
                                   Navigator nav) throws FunctionCallException
 161  
     {
 162  100
         String inStr = StringFunction.evaluate( strArg, nav );
 163  100
         String fromStr = StringFunction.evaluate( fromArg, nav );
 164  100
         String toStr = StringFunction.evaluate( toArg, nav );
 165  
     
 166  
         // Initialize the mapping in a HashMap
 167  100
         Map characterMap = new HashMap();
 168  100
         String[] fromCharacters = toUnicodeCharacters(fromStr);
 169  100
         String[] toCharacters = toUnicodeCharacters(toStr);
 170  96
         int fromLen = fromCharacters.length;
 171  96
         int toLen = toCharacters.length;
 172  376
         for ( int i = 0; i < fromLen; i++ ) {
 173  280
             String cFrom = fromCharacters[i];
 174  280
             if ( characterMap.containsKey( cFrom ) ) {
 175  
                 // We've seen the character before, ignore
 176  8
                 continue;
 177  
             }
 178  
             
 179  272
             if ( i < toLen ) {
 180  
                 // Will change
 181  224
                 characterMap.put( cFrom, toCharacters[i] );
 182  224
             } 
 183  
             else {
 184  
                 // Will delete
 185  48
                 characterMap.put( cFrom, null );
 186  
             }
 187  
         }
 188  
 
 189  
         // Process the input string thru the map
 190  96
         StringBuffer outStr = new StringBuffer( inStr.length() );
 191  96
         String[] inCharacters = toUnicodeCharacters(inStr);
 192  96
         int inLen = inCharacters.length;
 193  438
         for ( int i = 0; i < inLen; i++ ) {
 194  342
             String cIn = inCharacters[i];
 195  342
             if ( characterMap.containsKey( cIn ) ) {
 196  274
                 String cTo = (String) characterMap.get( cIn );
 197  274
                 if ( cTo != null ) {
 198  226
                     outStr.append( cTo );
 199  
                 }
 200  274
             } 
 201  
             else {
 202  68
                 outStr.append( cIn );
 203  
             }
 204  
         }
 205  
     
 206  96
         return outStr.toString();
 207  
     }
 208  
 
 209  
     private static String[] toUnicodeCharacters(String s) throws FunctionCallException {
 210  
 
 211  296
         String[] result = new String[s.length()];
 212  296
         int stringLength = 0;
 213  1180
         for (int i = 0; i < s.length(); i++) {
 214  888
             char c1 = s.charAt(i);
 215  888
             if (isHighSurrogate(c1)) {
 216  
                 try {
 217  18
                     char c2 = s.charAt(i+1);
 218  18
                     if (isLowSurrogate(c2)) {
 219  14
                         result[stringLength] = (c1 + "" + c2).intern();
 220  14
                         i++;
 221  14
                     }
 222  
                     else {
 223  4
                         throw new FunctionCallException("Mismatched surrogate pair in translate function");
 224  
                     }
 225  
                 }
 226  0
                 catch (StringIndexOutOfBoundsException ex) {
 227  0
                     throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function");
 228  14
                 }
 229  
             }
 230  
             else {
 231  870
                 result[stringLength]=String.valueOf(c1).intern();
 232  
             }
 233  884
             stringLength++;
 234  
         }
 235  
         
 236  292
         if (stringLength == result.length) return result;
 237  
         
 238  
         // trim array
 239  14
         String[] trimmed = new String[stringLength];
 240  14
         System.arraycopy(result, 0, trimmed, 0, stringLength);
 241  14
         return trimmed;
 242  
         
 243  
     }
 244  
 
 245  
     private static boolean isHighSurrogate(char c) {
 246  888
         return c >= 0xD800 && c <= 0xDBFF;
 247  
     }
 248  
      
 249  
     private static boolean isLowSurrogate(char c) {
 250  18
         return c >= 0xDC00 && c <= 0xDFFF;
 251  
     }
 252  
      
 253  
 }