Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » ru » [javadoc | source]
    1   package org.apache.lucene.analysis.ru;
    2   
    3   /**
    4    * Copyright 2004 The Apache Software Foundation
    5    *
    6    * Licensed under the Apache License, Version 2.0 (the "License");
    7    * you may not use this file except in compliance with the License.
    8    * You may obtain a copy of the License at
    9    *
   10    *     http://www.apache.org/licenses/LICENSE-2.0
   11    *
   12    * Unless required by applicable law or agreed to in writing, software
   13    * distributed under the License is distributed on an "AS IS" BASIS,
   14    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   15    * See the License for the specific language governing permissions and
   16    * limitations under the License.
   17    */
   18   
   19   import junit.framework.TestCase;
   20   
   21   import java.io;
   22   
   23   import org.apache.lucene.analysis.TokenStream;
   24   import org.apache.lucene.analysis.Token;
   25   
   26   /**
   27    * Test case for RussianAnalyzer.
   28    *
   29    * @author    Boris Okner
   30    * @version   $Id: TestRussianAnalyzer.java,v 1.6 2004/03/29 22:48:06 cutting Exp $
   31    */
   32   
   33   public class TestRussianAnalyzer extends TestCase
   34   {
   35       private InputStreamReader inWords;
   36   
   37       private InputStreamReader sampleUnicode;
   38   
   39       private Reader inWordsKOI8;
   40   
   41       private Reader sampleKOI8;
   42   
   43       private Reader inWords1251;
   44   
   45       private Reader sample1251;
   46   
   47       private File dataDir;
   48   
   49       protected void setUp() throws Exception
   50       {
   51         dataDir = new File(System.getProperty("dataDir"));
   52       }
   53   
   54       public void testUnicode() throws IOException
   55       {
   56           RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.UnicodeRussian);
   57           inWords =
   58               new InputStreamReader(
   59                   new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUnicode.txt")),
   60                   "Unicode");
   61   
   62           sampleUnicode =
   63               new InputStreamReader(
   64                   new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUnicode.htm")),
   65                   "Unicode");
   66   
   67           TokenStream in = ra.tokenStream("all", inWords);
   68   
   69           RussianLetterTokenizer sample =
   70               new RussianLetterTokenizer(
   71                   sampleUnicode,
   72                   RussianCharsets.UnicodeRussian);
   73   
   74           for (;;)
   75           {
   76               Token token = in.next();
   77   
   78               if (token == null)
   79               {
   80                   break;
   81               }
   82   
   83               Token sampleToken = sample.next();
   84               assertEquals(
   85                   "Unicode",
   86                   token.termText(),
   87                   sampleToken == null
   88                   ? null
   89                   : sampleToken.termText());
   90           }
   91   
   92           inWords.close();
   93           sampleUnicode.close();
   94       }
   95   
   96       public void testKOI8() throws IOException
   97       {
   98           //System.out.println(new java.util.Date());
   99           RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.KOI8);
  100           // KOI8
  101           inWordsKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testKOI8.txt")), "iso-8859-1");
  102   
  103           sampleKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resKOI8.htm")), "iso-8859-1");
  104   
  105           TokenStream in = ra.tokenStream("all", inWordsKOI8);
  106           RussianLetterTokenizer sample =
  107               new RussianLetterTokenizer(
  108                   sampleKOI8,
  109                   RussianCharsets.KOI8);
  110   
  111           for (;;)
  112           {
  113               Token token = in.next();
  114   
  115               if (token == null)
  116               {
  117                   break;
  118               }
  119   
  120               Token sampleToken = sample.next();
  121               assertEquals(
  122                   "KOI8",
  123                   token.termText(),
  124                   sampleToken == null
  125                   ? null
  126                   : sampleToken.termText());
  127   
  128           }
  129   
  130           inWordsKOI8.close();
  131           sampleKOI8.close();
  132       }
  133   
  134       public void test1251() throws IOException
  135       {
  136           // 1251
  137           inWords1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/test1251.txt")), "iso-8859-1");
  138   
  139           sample1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/res1251.htm")), "iso-8859-1");
  140   
  141           RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.CP1251);
  142           TokenStream in = ra.tokenStream("", inWords1251);
  143           RussianLetterTokenizer sample =
  144               new RussianLetterTokenizer(
  145                   sample1251,
  146                   RussianCharsets.CP1251);
  147   
  148           for (;;)
  149           {
  150               Token token = in.next();
  151   
  152               if (token == null)
  153               {
  154                   break;
  155               }
  156   
  157               Token sampleToken = sample.next();
  158               assertEquals(
  159                   "1251",
  160                   token.termText(),
  161                   sampleToken == null
  162                   ? null
  163                   : sampleToken.termText());
  164   
  165           }
  166   
  167           inWords1251.close();
  168           sample1251.close();
  169       }
  170   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » ru » [javadoc | source]