Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » xpath » regex » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl.xpath.regex;
   19   
   20   import java.text.CharacterIterator;
   21   
   22   /**
   23    * @xerces.internal
   24    * 
   25    * @version $Id: REUtil.java 446721 2006-09-15 20:35:34Z mrglavas $
   26    */
   27   public final class REUtil {
   28       private REUtil() {
   29       }
   30   
   31       static final int composeFromSurrogates(int high, int low) {
   32           return 0x10000 + ((high-0xd800)<<10) + low-0xdc00;
   33       }
   34   
   35       static final boolean isLowSurrogate(int ch) {
   36           return (ch & 0xfc00) == 0xdc00;
   37       }
   38   
   39       static final boolean isHighSurrogate(int ch) {
   40           return (ch & 0xfc00) == 0xd800;
   41       }
   42   
   43       static final String decomposeToSurrogates(int ch) {
   44           char[] chs = new char[2];
   45           ch -= 0x10000;
   46           chs[0] = (char)((ch>>10)+0xd800);
   47           chs[1] = (char)((ch&0x3ff)+0xdc00);
   48           return new String(chs);
   49       }
   50   
   51       static final String substring(CharacterIterator iterator, int begin, int end) {
   52           char[] src = new char[end-begin];
   53           for (int i = 0;  i < src.length;  i ++)
   54               src[i] = iterator.setIndex(i+begin);
   55           return new String(src);
   56       }
   57   
   58       // ================================================================
   59   
   60       static final int getOptionValue(int ch) {
   61           int ret = 0;
   62           switch (ch) {
   63             case 'i':
   64               ret = RegularExpression.IGNORE_CASE;
   65               break;
   66             case 'm':
   67               ret = RegularExpression.MULTIPLE_LINES;
   68               break;
   69             case 's':
   70               ret = RegularExpression.SINGLE_LINE;
   71               break;
   72             case 'x':
   73               ret = RegularExpression.EXTENDED_COMMENT;
   74               break;
   75             case 'u':
   76               ret = RegularExpression.USE_UNICODE_CATEGORY;
   77               break;
   78             case 'w':
   79               ret = RegularExpression.UNICODE_WORD_BOUNDARY;
   80               break;
   81             case 'F':
   82               ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
   83               break;
   84             case 'H':
   85               ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
   86               break;
   87             case 'X':
   88               ret = RegularExpression.XMLSCHEMA_MODE;
   89               break;
   90             case ',':
   91               ret = RegularExpression.SPECIAL_COMMA;
   92               break;
   93             default:
   94           }
   95           return ret;
   96       }
   97   
   98       static final int parseOptions(String opts) throws ParseException {
   99           if (opts == null)  return 0;
  100           int options = 0;
  101           for (int i = 0;  i < opts.length();  i ++) {
  102               int v = getOptionValue(opts.charAt(i));
  103               if (v == 0)
  104                   throw new ParseException("Unknown Option: "+opts.substring(i), -1);
  105               options |= v;
  106           }
  107           return options;
  108       }
  109   
  110       static final String createOptionString(int options) {
  111           StringBuffer sb = new StringBuffer(9);
  112           if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
  113               sb.append((char)'F');
  114           if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
  115               sb.append((char)'H');
  116           if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
  117               sb.append((char)'X');
  118           if ((options & RegularExpression.IGNORE_CASE) != 0)
  119               sb.append((char)'i');
  120           if ((options & RegularExpression.MULTIPLE_LINES) != 0)
  121               sb.append((char)'m');
  122           if ((options & RegularExpression.SINGLE_LINE) != 0)
  123               sb.append((char)'s');
  124           if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
  125               sb.append((char)'u');
  126           if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
  127               sb.append((char)'w');
  128           if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
  129               sb.append((char)'x');
  130           if ((options & RegularExpression.SPECIAL_COMMA) != 0)
  131               sb.append((char)',');
  132           return sb.toString().intern();
  133       }
  134   
  135       // ================================================================
  136   
  137       static String stripExtendedComment(String regex) {
  138           int len = regex.length();
  139           StringBuffer buffer = new StringBuffer(len);
  140           int offset = 0;
  141           while (offset < len) {
  142               int ch = regex.charAt(offset++);
  143                                                   // Skips a white space.
  144               if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
  145                   continue;
  146   
  147               if (ch == '#') {                    // Skips chracters between '#' and a line end.
  148                   while (offset < len) {
  149                       ch = regex.charAt(offset++);
  150                       if (ch == '\r' || ch == '\n')
  151                           break;
  152                   }
  153                   continue;
  154               }
  155   
  156               int next;                           // Strips an escaped white space.
  157               if (ch == '\\' && offset < len) {
  158                   if ((next = regex.charAt(offset)) == '#'
  159                       || next == '\t' || next == '\n' || next == '\f'
  160                       || next == '\r' || next == ' ') {
  161                       buffer.append((char)next);
  162                       offset ++;
  163                   } else {                        // Other escaped character.
  164                       buffer.append((char)'\\');
  165                       buffer.append((char)next);
  166                       offset ++;
  167                   }
  168               } else                              // As is.
  169                   buffer.append((char)ch);
  170           }
  171           return buffer.toString();
  172       }
  173   
  174       // ================================================================
  175   
  176       /**
  177        * Sample entry.
  178        * <div>Usage: <KBD>org.apache.xerces.utils.regex.REUtil &lt;regex&gt; &lt;string&gt;</KBD></div>
  179        */
  180       public static void main(String[] argv) {
  181           String pattern = null;
  182           try {
  183               String options = "";
  184               String target = null;
  185               if( argv.length == 0 ) {
  186                   System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
  187                   System.exit( 0 );
  188               }
  189               for (int i = 0;  i < argv.length;  i ++) {
  190                   if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
  191                       if (pattern == null)
  192                           pattern = argv[i];
  193                       else if (target == null)
  194                           target = argv[i];
  195                       else
  196                           System.err.println("Unnecessary: "+argv[i]);
  197                   } else if (argv[i].equals("-i")) {
  198                       options += "i";
  199                   } else if (argv[i].equals("-m")) {
  200                       options += "m";
  201                   } else if (argv[i].equals("-s")) {
  202                       options += "s";
  203                   } else if (argv[i].equals("-u")) {
  204                       options += "u";
  205                   } else if (argv[i].equals("-w")) {
  206                       options += "w";
  207                   } else if (argv[i].equals("-X")) {
  208                       options += "X";
  209                   } else {
  210                       System.err.println("Unknown option: "+argv[i]);
  211                   }
  212               }
  213               RegularExpression reg = new RegularExpression(pattern, options);
  214               System.out.println("RegularExpression: "+reg);
  215               Match match = new Match();
  216               reg.matches(target, match);
  217               for (int i = 0;  i < match.getNumberOfGroups();  i ++) {
  218                   if (i == 0 )  System.out.print("Matched range for the whole pattern: ");
  219                   else System.out.print("["+i+"]: ");
  220                   if (match.getBeginning(i) < 0)
  221                       System.out.println("-1");
  222                   else {
  223                       System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
  224                       System.out.println("\""+match.getCapturedText(i)+"\"");
  225                   }
  226               }
  227           } catch (ParseException pe) {
  228               if (pattern == null) {
  229                   pe.printStackTrace();
  230               } else {
  231                   System.err.println("org.apache.xerces.utils.regex.ParseException: "+pe.getMessage());
  232                   String indent = "        ";
  233                   System.err.println(indent+pattern);
  234                   int loc = pe.getLocation();
  235                   if (loc >= 0) {
  236                       System.err.print(indent);
  237                       for (int i = 0;  i < loc;  i ++)  System.err.print("-");
  238                       System.err.println("^");
  239                   }
  240               }
  241           } catch (Exception e) {
  242               e.printStackTrace();
  243           }
  244       }
  245   
  246       static final int CACHESIZE = 20;
  247       static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
  248       /**
  249        * Creates a RegularExpression instance.
  250        * This method caches created instances.
  251        *
  252        * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String)
  253        */
  254       public static RegularExpression createRegex(String pattern, String options)
  255           throws ParseException {
  256           RegularExpression re = null;
  257           int intOptions = REUtil.parseOptions(options);
  258           synchronized (REUtil.regexCache) {
  259               int i;
  260               for (i = 0;  i < REUtil.CACHESIZE;  i ++) {
  261                   RegularExpression cached = REUtil.regexCache[i];
  262                   if (cached == null) {
  263                       i = -1;
  264                       break;
  265                   }
  266                   if (cached.equals(pattern, intOptions)) {
  267                       re = cached;
  268                       break;
  269                   }
  270               }
  271               if (re != null) {
  272                   if (i != 0) {
  273                       System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i);
  274                       REUtil.regexCache[0] = re;
  275                   }
  276               } else {
  277                   re = new RegularExpression(pattern, options);
  278                   System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1);
  279                   REUtil.regexCache[0] = re;
  280               }
  281           }
  282           return re;
  283       }
  284   
  285       /**
  286        *
  287        * @see RegularExpression#matches(java.lang.String)
  288        */
  289       public static boolean matches(String regex, String target) throws ParseException {
  290           return REUtil.createRegex(regex, null).matches(target);
  291       }
  292   
  293       /**
  294        *
  295        * @see RegularExpression#matches(java.lang.String)
  296        */
  297       public static boolean matches(String regex, String options, String target) throws ParseException {
  298           return REUtil.createRegex(regex, options).matches(target);
  299       }
  300   
  301       // ================================================================
  302   
  303       /**
  304        *
  305        */
  306       public static String quoteMeta(String literal) {
  307           int len = literal.length();
  308           StringBuffer buffer = null;
  309           for (int i = 0;  i < len;  i ++) {
  310               int ch = literal.charAt(i);
  311               if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
  312                   if (buffer == null) {
  313                       buffer = new StringBuffer(i+(len-i)*2);
  314                       if (i > 0)  buffer.append(literal.substring(0, i));
  315                   }
  316                   buffer.append((char)'\\');
  317                   buffer.append((char)ch);
  318               } else if (buffer != null)
  319                   buffer.append((char)ch);
  320           }
  321           return buffer != null ? buffer.toString() : literal;
  322       }
  323   
  324       // ================================================================
  325   
  326       static void dumpString(String v) {
  327           for (int i = 0;  i < v.length();  i ++) {
  328               System.out.print(Integer.toHexString(v.charAt(i)));
  329               System.out.print(" ");
  330           }
  331           System.out.println();
  332       }
  333   }

Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » xpath » regex » [javadoc | source]