Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » io » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl.io;
   19   
   20   import java.io.InputStream;
   21   import java.io.IOException;
   22   import java.io.Reader;
   23   
   24   /** 
   25    * Reader for UCS-2 and UCS-4 encodings.
   26    * (i.e., encodings from ISO-10646-UCS-(2|4)).
   27    * 
   28    * @xerces.internal
   29    *
   30    * @author Neil Graham, IBM
   31    *
   32    * @version $Id: UCSReader.java 449317 2006-09-23 22:12:30Z mrglavas $
   33    */
   34   public class UCSReader extends Reader {
   35   
   36       //
   37       // Constants
   38       //
   39   
   40       /** 
   41        * Default byte buffer size (8192, larger than that of ASCIIReader
   42        * since it's reasonable to surmise that the average UCS-4-encoded
   43        * file should be 4 times as large as the average ASCII-encoded file). 
   44        */
   45       public static final int DEFAULT_BUFFER_SIZE = 8192;
   46   
   47       public static final short UCS2LE = 1;
   48       public static final short UCS2BE = 2;
   49       public static final short UCS4LE = 4;
   50       public static final short UCS4BE = 8;
   51   
   52       //
   53       // Data
   54       //
   55   
   56       /** Input stream. */
   57       protected final InputStream fInputStream;
   58   
   59       /** Byte buffer. */
   60       protected final byte[] fBuffer;
   61   
   62       // what kind of data we're dealing with
   63       protected final short fEncoding;
   64   
   65       //
   66       // Constructors
   67       //
   68   
   69       /** 
   70        * Constructs a UCS reader from the specified input stream 
   71        * using the default buffer size.  The Endian-ness and whether this is
   72        * UCS-2 or UCS-4 needs also to be known in advance.
   73        *
   74        * @param inputStream The input stream.
   75        * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
   76        */
   77       public UCSReader(InputStream inputStream, short encoding) {
   78           this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
   79       } // <init>(InputStream, short)
   80   
   81       /** 
   82        * Constructs a UCS reader from the specified input stream 
   83        * and buffer size.  The Endian-ness and whether this is
   84        * UCS-2 or UCS-4 needs also to be known in advance.
   85        *
   86        * @param inputStream The input stream.
   87        * @param size        The initial buffer size.
   88        * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
   89        */
   90       public UCSReader(InputStream inputStream, int size, short encoding) {
   91           this(inputStream, new byte[size], encoding);
   92       } // <init>(InputStream,int,short)
   93       
   94       /** 
   95        * Constructs a UCS reader from the specified input stream 
   96        * and buffer.  The Endian-ness and whether this is
   97        * UCS-2 or UCS-4 needs also to be known in advance.
   98        *
   99        * @param inputStream The input stream.
  100        * @param buffer      The byte buffer.
  101        * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
  102        */
  103       public UCSReader(InputStream inputStream, byte [] buffer, short encoding) {
  104           fInputStream = inputStream;
  105           fBuffer = buffer;
  106           fEncoding = encoding;
  107       } // <init>(InputStream,int,short)
  108   
  109       //
  110       // Reader methods
  111       //
  112   
  113       /**
  114        * Read a single character.  This method will block until a character is
  115        * available, an I/O error occurs, or the end of the stream is reached.
  116        *
  117        * <p> Subclasses that intend to support efficient single-character input
  118        * should override this method.
  119        *
  120        * @return     The character read, as an integer in the range 0 to 127
  121        *             (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
  122        *             been reached
  123        *
  124        * @exception  IOException  If an I/O error occurs
  125        */
  126       public int read() throws IOException { 
  127           int b0 = fInputStream.read() & 0xff;
  128           if (b0 == 0xff) {
  129               return -1;
  130           }
  131           int b1 = fInputStream.read() & 0xff;
  132           if (b1 == 0xff) {
  133               return -1;
  134           }
  135           // UCS-4
  136           if (fEncoding >= 4) {
  137               int b2 = fInputStream.read() & 0xff;
  138               if (b2 == 0xff) {
  139                   return -1;
  140               }
  141               int b3 = fInputStream.read() & 0xff;
  142               if (b3 == 0xff) {
  143                   return -1;
  144               }
  145               if (fEncoding == UCS4BE) {
  146                   return (b0<<24)+(b1<<16)+(b2<<8)+b3;
  147               }
  148               return (b3<<24)+(b2<<16)+(b1<<8)+b0;
  149           }
  150           // UCS-2
  151           if (fEncoding == UCS2BE) {
  152               return (b0<<8)+b1;
  153           }
  154           return (b1<<8)+b0;
  155       } // read():int
  156   
  157       /**
  158        * Read characters into a portion of an array.  This method will block
  159        * until some input is available, an I/O error occurs, or the end of the
  160        * stream is reached.
  161        *
  162        * @param      ch     Destination buffer
  163        * @param      offset Offset at which to start storing characters
  164        * @param      length Maximum number of characters to read
  165        *
  166        * @return     The number of characters read, or -1 if the end of the
  167        *             stream has been reached
  168        *
  169        * @exception  IOException  If an I/O error occurs
  170        */
  171       public int read(char ch[], int offset, int length) throws IOException {
  172           int byteLength = length << ((fEncoding >= 4)?2:1);
  173           if (byteLength > fBuffer.length) {
  174               byteLength = fBuffer.length;
  175           }
  176           int count = fInputStream.read(fBuffer, 0, byteLength);
  177           if (count == -1) return -1;
  178           // try and make count be a multiple of the number of bytes we're looking for
  179           if (fEncoding >= 4) { // BigEndian
  180               // this looks ugly, but it avoids an if at any rate...
  181               int numToRead = (4 - (count & 3) & 3);
  182               for (int i = 0; i < numToRead; i++) {
  183                   int charRead = fInputStream.read();
  184                   if (charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
  185                       for (int j = i; j < numToRead; j++) {
  186                           fBuffer[count+j] = 0;
  187                       }
  188                       break;
  189                   }
  190                   fBuffer[count+i] = (byte)charRead;
  191               }
  192               count += numToRead;
  193           } 
  194           else {
  195               int numToRead = count & 1;
  196               if (numToRead != 0) {
  197                   count++;
  198                   int charRead = fInputStream.read();
  199                   if (charRead == -1) { // end of input; something likely went wrong!A  Pad buffer with nulls.
  200                       fBuffer[count] = 0;
  201                   } 
  202                   else {
  203                       fBuffer[count] = (byte)charRead;
  204                   }
  205               }
  206           }
  207   
  208           // now count is a multiple of the right number of bytes
  209           int numChars = count >> ((fEncoding >= 4)?2:1);
  210           int curPos = 0;
  211           for (int i = 0; i < numChars; i++) {
  212               int b0 = fBuffer[curPos++] & 0xff;
  213               int b1 = fBuffer[curPos++] & 0xff;
  214               // UCS-4
  215               if (fEncoding >= 4) {
  216                   int b2 = fBuffer[curPos++] & 0xff;
  217                   int b3 = fBuffer[curPos++] & 0xff;
  218                   if (fEncoding == UCS4BE) {
  219                       ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
  220                   }
  221                   else {
  222                       ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
  223                   }
  224               } 
  225               else { // UCS-2
  226                   if (fEncoding == UCS2BE) {
  227                       ch[offset+i] = (char)((b0<<8)+b1);
  228                   }
  229                   else {
  230                       ch[offset+i] = (char)((b1<<8)+b0);
  231                   }
  232               }
  233           }
  234           return numChars;
  235       } // read(char[],int,int)
  236   
  237       /**
  238        * Skip characters.  This method will block until some characters are
  239        * available, an I/O error occurs, or the end of the stream is reached.
  240        *
  241        * @param  n  The number of characters to skip
  242        *
  243        * @return    The number of characters actually skipped
  244        *
  245        * @exception  IOException  If an I/O error occurs
  246        */
  247       public long skip(long n) throws IOException {
  248           // charWidth will represent the number of bits to move
  249           // n leftward to get num of bytes to skip, and then move the result rightward
  250           // to get num of chars effectively skipped.
  251           // The trick with &'ing, as with elsewhere in this dcode, is
  252           // intended to avoid an expensive use of / that might not be optimized
  253           // away.
  254           int charWidth = (fEncoding >=4)?2:1;
  255           long bytesSkipped = fInputStream.skip(n<<charWidth);
  256           if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
  257           return (bytesSkipped >> charWidth) + 1;
  258       } // skip(long):long
  259   
  260       /**
  261        * Tell whether this stream is ready to be read.
  262        *
  263        * @return True if the next read() is guaranteed not to block for input,
  264        * false otherwise.  Note that returning false does not guarantee that the
  265        * next read will block.
  266        *
  267        * @exception  IOException  If an I/O error occurs
  268        */
  269       public boolean ready() throws IOException {
  270   	    return false;
  271       } // ready()
  272   
  273       /**
  274        * Tell whether this stream supports the mark() operation.
  275        */
  276       public boolean markSupported() {
  277   	    return fInputStream.markSupported();
  278       } // markSupported()
  279   
  280       /**
  281        * Mark the present position in the stream.  Subsequent calls to reset()
  282        * will attempt to reposition the stream to this point.  Not all
  283        * character-input streams support the mark() operation.
  284        *
  285        * @param  readAheadLimit  Limit on the number of characters that may be
  286        *                         read while still preserving the mark.  After
  287        *                         reading this many characters, attempting to
  288        *                         reset the stream may fail.
  289        *
  290        * @exception  IOException  If the stream does not support mark(),
  291        *                          or if some other I/O error occurs
  292        */
  293       public void mark(int readAheadLimit) throws IOException {
  294   	    fInputStream.mark(readAheadLimit);
  295       } // mark(int)
  296   
  297       /**
  298        * Reset the stream.  If the stream has been marked, then attempt to
  299        * reposition it at the mark.  If the stream has not been marked, then
  300        * attempt to reset it in some way appropriate to the particular stream,
  301        * for example by repositioning it to its starting point.  Not all
  302        * character-input streams support the reset() operation, and some support
  303        * reset() without supporting mark().
  304        *
  305        * @exception  IOException  If the stream has not been marked,
  306        *                          or if the mark has been invalidated,
  307        *                          or if the stream does not support reset(),
  308        *                          or if some other I/O error occurs
  309        */
  310       public void reset() throws IOException {
  311           fInputStream.reset();
  312       } // reset()
  313   
  314       /**
  315        * Close the stream.  Once a stream has been closed, further read(),
  316        * ready(), mark(), or reset() invocations will throw an IOException.
  317        * Closing a previously-closed stream, however, has no effect.
  318        *
  319        * @exception  IOException  If an I/O error occurs
  320        */
  321        public void close() throws IOException {
  322            fInputStream.close();
  323        } // close()
  324   
  325   } // class UCSReader

Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » io » [javadoc | source]