Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » xpath » regex » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl.xpath.regex;
   19   
   20   import java.text.CharacterIterator;
   21   
   22   /**
   23    * Boyer-Moore searcher.
   24    * 
   25    * @xerces.internal
   26    *
   27    * @version $Id: BMPattern.java 572108 2007-09-02 18:48:31Z mrglavas $
   28    */
   29   public class BMPattern {
   30       final char[] pattern;
   31       final int[] shiftTable;
   32       final boolean ignoreCase;
   33   
   34       public BMPattern(String pat, boolean ignoreCase) {
   35           this(pat, 256, ignoreCase);
   36       }
   37   
   38       public BMPattern(String pat, int tableSize, boolean ignoreCase) {
   39           this.pattern = pat.toCharArray();
   40           this.shiftTable = new int[tableSize];
   41           this.ignoreCase = ignoreCase;
   42   
   43           int length = pattern.length;
   44           for (int i = 0;  i < this.shiftTable.length;  i ++)
   45               this.shiftTable[i] = length;
   46   
   47           for (int i = 0;  i < length;  i ++) {
   48               char ch = this.pattern[i];
   49               int diff = length-i-1;
   50               int index = ch % this.shiftTable.length;
   51               if (diff < this.shiftTable[index])
   52                   this.shiftTable[index] = diff;
   53               if (this.ignoreCase) {
   54                   ch = Character.toUpperCase(ch);
   55                   index = ch % this.shiftTable.length;
   56                   if (diff < this.shiftTable[index])
   57                       this.shiftTable[index] = diff;
   58                   ch = Character.toLowerCase(ch);
   59                   index = ch % this.shiftTable.length;
   60                   if (diff < this.shiftTable[index])
   61                       this.shiftTable[index] = diff;
   62               }
   63           }
   64       }
   65   
   66       /**
   67        *
   68        * @return -1 if <var>iterator</var> does not contain this pattern.
   69        */
   70       public int matches(CharacterIterator iterator, int start, int limit) {
   71           if (this.ignoreCase)  return this.matchesIgnoreCase(iterator, start, limit);
   72           int plength = this.pattern.length;
   73           if (plength == 0)  return start;
   74           int index = start+plength;
   75           while (index <= limit) {
   76               int pindex = plength;
   77               int nindex = index+1;
   78               char ch;
   79               do {
   80                   if ((ch = iterator.setIndex(--index)) != this.pattern[--pindex])
   81                       break;
   82                   if (pindex == 0)
   83                       return index;
   84               } while (pindex > 0);
   85               index += this.shiftTable[ch % this.shiftTable.length]+1;
   86               if (index < nindex)  index = nindex;
   87           }
   88           return -1;
   89       }
   90   
   91       /**
   92        *
   93        * @return -1 if <var>str</var> does not contain this pattern.
   94        */
   95       public int matches(String str, int start, int limit) {
   96           if (this.ignoreCase)  return this.matchesIgnoreCase(str, start, limit);
   97           int plength = this.pattern.length;
   98           if (plength == 0)  return start;
   99           int index = start+plength;
  100           while (index <= limit) {
  101               //System.err.println("Starts at "+index);
  102               int pindex = plength;
  103               int nindex = index+1;
  104               char ch;
  105               do {
  106                   if ((ch = str.charAt(--index)) != this.pattern[--pindex])
  107                       break;
  108                   if (pindex == 0)
  109                       return index;
  110               } while (pindex > 0);
  111               index += this.shiftTable[ch % this.shiftTable.length]+1;
  112               if (index < nindex)  index = nindex;
  113           }
  114           return -1;
  115       }
  116       /**
  117        *
  118        * @return -1 if <var>chars</char> does not contain this pattern.
  119        */
  120       public int matches(char[] chars, int start, int limit) {
  121           if (this.ignoreCase)  return this.matchesIgnoreCase(chars, start, limit);
  122           int plength = this.pattern.length;
  123           if (plength == 0)  return start;
  124           int index = start+plength;
  125           while (index <= limit) {
  126               //System.err.println("Starts at "+index);
  127               int pindex = plength;
  128               int nindex = index+1;
  129               char ch;
  130               do {
  131                   if ((ch = chars[--index]) != this.pattern[--pindex])
  132                       break;
  133                   if (pindex == 0)
  134                       return index;
  135               } while (pindex > 0);
  136               index += this.shiftTable[ch % this.shiftTable.length]+1;
  137               if (index < nindex)  index = nindex;
  138           }
  139           return -1;
  140       }
  141   
  142       int matchesIgnoreCase(CharacterIterator iterator, int start, int limit) {
  143           int plength = this.pattern.length;
  144           if (plength == 0)  return start;
  145           int index = start+plength;
  146           while (index <= limit) {
  147               int pindex = plength;
  148               int nindex = index+1;
  149               char ch;
  150               do {
  151                   char ch1 = ch = iterator.setIndex(--index);
  152                   char ch2 = this.pattern[--pindex];
  153                   if (ch1 != ch2) {
  154                       ch1 = Character.toUpperCase(ch1);
  155                       ch2 = Character.toUpperCase(ch2);
  156                       if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
  157                           break;
  158                   }
  159                   if (pindex == 0)
  160                       return index;
  161               } while (pindex > 0);
  162               index += this.shiftTable[ch % this.shiftTable.length]+1;
  163               if (index < nindex)  index = nindex;
  164           }
  165           return -1;
  166       }
  167       
  168       int matchesIgnoreCase(String text, int start, int limit) {
  169           int plength = this.pattern.length;
  170           if (plength == 0)  return start;
  171           int index = start+plength;
  172           while (index <= limit) {
  173               int pindex = plength;
  174               int nindex = index+1;
  175               char ch;
  176               do {
  177                   char ch1 = ch = text.charAt(--index);
  178                   char ch2 = this.pattern[--pindex];
  179                   if (ch1 != ch2) {
  180                       ch1 = Character.toUpperCase(ch1);
  181                       ch2 = Character.toUpperCase(ch2);
  182                       if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
  183                           break;
  184                   }
  185                   if (pindex == 0)
  186                       return index;
  187               } while (pindex > 0);
  188               index += this.shiftTable[ch % this.shiftTable.length]+1;
  189               if (index < nindex)  index = nindex;
  190           }
  191           return -1;
  192       }
  193       int matchesIgnoreCase(char[] chars, int start, int limit) {
  194           int plength = this.pattern.length;
  195           if (plength == 0)  return start;
  196           int index = start+plength;
  197           while (index <= limit) {
  198               int pindex = plength;
  199               int nindex = index+1;
  200               char ch;
  201               do {
  202                   char ch1 = ch = chars[--index];
  203                   char ch2 = this.pattern[--pindex];
  204                   if (ch1 != ch2) {
  205                       ch1 = Character.toUpperCase(ch1);
  206                       ch2 = Character.toUpperCase(ch2);
  207                       if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
  208                           break;
  209                   }
  210                   if (pindex == 0)
  211                       return index;
  212               } while (pindex > 0);
  213               index += this.shiftTable[ch % this.shiftTable.length]+1;
  214               if (index < nindex)  index = nindex;
  215           }
  216           return -1;
  217       }
  218   
  219       /*
  220       public static void main(String[] argv) {
  221           try {
  222               int[] shiftTable = new int[256];
  223               initializeBoyerMoore(argv[0], shiftTable, true);
  224               int o = -1;
  225               CharacterIterator ite = new java.text.StringCharacterIterator(argv[1]);
  226               long start = System.currentTimeMillis();
  227               //for (int i = 0;  i < 10000;  i ++)
  228                   o = searchIgnoreCasesWithBoyerMoore(ite, 0, argv[0], shiftTable);
  229               start = System.currentTimeMillis()-start;
  230               System.out.println("Result: "+o+", Elapsed: "+start);
  231           } catch (Exception ex) {
  232               ex.printStackTrace();
  233           }
  234       }*/
  235   }
  236   

Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » xpath » regex » [javadoc | source]