Home » pdfbox-1.1.0-src » org.apache.pdfbox.cos » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.pdfbox.cos;
   18   
   19   import java.io.BufferedInputStream;
   20   import java.io.BufferedOutputStream;
   21   import java.io.ByteArrayInputStream;
   22   import java.io.InputStream;
   23   import java.io.IOException;
   24   import java.io.OutputStream;
   25   
   26   import java.util.List;
   27   
   28   import org.apache.pdfbox.filter.Filter;
   29   import org.apache.pdfbox.filter.FilterManager;
   30   
   31   import org.apache.pdfbox.pdfparser.PDFStreamParser;
   32   
   33   import org.apache.pdfbox.exceptions.COSVisitorException;
   34   
   35   import org.apache.pdfbox.io.RandomAccess;
   36   import org.apache.pdfbox.io.RandomAccessFileInputStream;
   37   import org.apache.pdfbox.io.RandomAccessFileOutputStream;
   38   
   39   /**
   40    * This class represents a stream object in a PDF document.
   41    *
   42    * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
   43    * @version $Revision: 1.41 $
   44    */
   45   public class COSStream extends COSDictionary
   46   {
   47       private static final int BUFFER_SIZE=16384;
   48   
   49       private RandomAccess file;
   50       /**
   51        * The stream with all of the filters applied.
   52        */
   53       private RandomAccessFileOutputStream filteredStream;
   54   
   55       /**
   56        * The stream with no filters, this contains the useful data.
   57        */
   58       private RandomAccessFileOutputStream unFilteredStream;
   59   
   60       /**
   61        * Constructor.  Creates a new stream with an empty dictionary.
   62        *
   63        * @param storage The intermediate storage for the stream.
   64        */
   65       public COSStream( RandomAccess storage )
   66       {
   67           super();
   68           file = storage;
   69       }
   70   
   71       /**
   72        * Constructor.
   73        *
   74        * @param dictionary The dictionary that is associated with this stream.
   75        * @param storage The intermediate storage for the stream.
   76        */
   77       public COSStream( COSDictionary dictionary, RandomAccess storage )
   78       {
   79           super( dictionary );
   80           file = storage;
   81       }
   82   
   83       /**
   84        * This will replace this object with the data from the new object.  This
   85        * is used to easily maintain referential integrity when changing references
   86        * to new objects.
   87        *
   88        * @param stream The stream that have the new values in it.
   89        */
   90       public void replaceWithStream( COSStream stream )
   91       {
   92           this.clear();
   93           this.addAll( stream );
   94           file = stream.file;
   95           filteredStream = stream.filteredStream;
   96           unFilteredStream = stream.unFilteredStream;
   97       }
   98   
   99       /**
  100        * This will get the scratch file associated with this stream.
  101        *
  102        * @return The scratch file where this stream is being stored.
  103        */
  104       public RandomAccess getScratchFile()
  105       {
  106           return file;
  107       }
  108   
  109       /**
  110        * This will get all the tokens in the stream.
  111        *
  112        * @return All of the tokens in the stream.
  113        *
  114        * @throws IOException If there is an error parsing the stream.
  115        */
  116       public List<Object> getStreamTokens() throws IOException
  117       {
  118           PDFStreamParser parser = new PDFStreamParser( this );
  119           parser.parse();
  120           return parser.getTokens();
  121       }
  122   
  123       /**
  124        * This will get the stream with all of the filters applied.
  125        *
  126        * @return the bytes of the physical (endoced) stream
  127        *
  128        * @throws IOException when encoding/decoding causes an exception
  129        */
  130       public InputStream getFilteredStream() throws IOException
  131       {
  132           if( filteredStream == null )
  133           {
  134               doEncode();
  135           }
  136           long position = filteredStream.getPosition();
  137           long length = filteredStream.getLength();
  138   
  139           RandomAccessFileInputStream input =
  140               new RandomAccessFileInputStream( file, position, length );
  141           return new BufferedInputStream( input, BUFFER_SIZE );
  142       }
  143   
  144       /**
  145        * This will get the logical content stream with none of the filters.
  146        *
  147        * @return the bytes of the logical (decoded) stream
  148        *
  149        * @throws IOException when encoding/decoding causes an exception
  150        */
  151       public InputStream getUnfilteredStream() throws IOException
  152       {
  153           InputStream retval = null;
  154           if( unFilteredStream == null )
  155           {
  156               doDecode();
  157           }
  158   
  159           //if unFilteredStream is still null then this stream has not been
  160           //created yet, so we should return null.
  161           if( unFilteredStream != null )
  162           {
  163               long position = unFilteredStream.getPosition();
  164               long length = unFilteredStream.getLength();
  165               RandomAccessFileInputStream input =
  166                   new RandomAccessFileInputStream( file, position, length );
  167               retval = new BufferedInputStream( input, BUFFER_SIZE );
  168           }
  169           else
  170           {
  171               // We should check if the COSStream contains data, maybe it
  172               // has been created with a RandomAccessFile - which is not
  173               // necessary empty.
  174               // In this case, the creation was been done as an input, this should
  175               // be the unfiltered file, since no filter has been applied yet.
  176   //            if ( (file != null) &&
  177   //                    (file.length() > 0) )
  178   //            {
  179   //                retval = new RandomAccessFileInputStream( file,
  180   //                                                          0,
  181   //                                                          file.length() );
  182   //            }
  183   //            else
  184   //            {
  185                   //if there is no stream data then simply return an empty stream.
  186                   retval = new ByteArrayInputStream( new byte[0] );
  187   //            }
  188           }
  189           return retval;
  190       }
  191   
  192       /**
  193        * visitor pattern double dispatch method.
  194        *
  195        * @param visitor The object to notify when visiting this object.
  196        * @return any object, depending on the visitor implementation, or null
  197        * @throws COSVisitorException If an error occurs while visiting this object.
  198        */
  199       public Object accept(ICOSVisitor visitor) throws COSVisitorException
  200       {
  201           return visitor.visitFromStream(this);
  202       }
  203   
  204       /**
  205        * This will decode the physical byte stream applying all of the filters to the stream.
  206        *
  207        * @throws IOException If there is an error applying a filter to the stream.
  208        */
  209       private void doDecode() throws IOException
  210       {
  211   // FIXME: We shouldn't keep the same reference?
  212           unFilteredStream = filteredStream;
  213   
  214           COSBase filters = getFilters();
  215           if( filters == null )
  216           {
  217               //then do nothing
  218           }
  219           else if( filters instanceof COSName )
  220           {
  221               doDecode( (COSName)filters, 0 );
  222           }
  223           else if( filters instanceof COSArray )
  224           {
  225               COSArray filterArray = (COSArray)filters;
  226               for( int i=0; i<filterArray.size(); i++ )
  227               {
  228                   COSName filterName = (COSName)filterArray.get( i );
  229                   doDecode( filterName, i );
  230               }
  231           }
  232           else
  233           {
  234               throw new IOException( "Error: Unknown filter type:" + filters );
  235           }
  236       }
  237   
  238       /**
  239        * This will decode applying a single filter on the stream.
  240        *
  241        * @param filterName The name of the filter.
  242        * @param filterIndex The index of the current filter.
  243        *
  244        * @throws IOException If there is an error parsing the stream.
  245        */
  246       private void doDecode( COSName filterName, int filterIndex ) throws IOException
  247       {
  248           FilterManager manager = getFilterManager();
  249           Filter filter = manager.getFilter( filterName );
  250           InputStream input;
  251   
  252           boolean done = false;
  253           IOException exception = null;
  254           long position = unFilteredStream.getPosition();
  255           long length = unFilteredStream.getLength();
  256           // in case we need it later
  257           long writtenLength = unFilteredStream.getLengthWritten();  
  258   
  259           if( length == 0 )
  260           {
  261               //if the length is zero then don't bother trying to decode
  262               //some filters don't work when attempting to decode
  263               //with a zero length stream.  See zlib_error_01.pdf
  264               unFilteredStream = new RandomAccessFileOutputStream( file );
  265               done = true;
  266           }
  267           else
  268           {
  269               //ok this is a simple hack, sometimes we read a couple extra
  270               //bytes that shouldn't be there, so we encounter an error we will just
  271               //try again with one less byte.
  272               for( int tryCount=0; !done && tryCount<5; tryCount++ )
  273               {
  274                   try
  275                   {
  276                       input = new BufferedInputStream(
  277                           new RandomAccessFileInputStream( file, position, length ), BUFFER_SIZE );
  278                       unFilteredStream = new RandomAccessFileOutputStream( file );
  279                       filter.decode( input, unFilteredStream, this, filterIndex );
  280                       done = true;
  281                   }
  282                   catch( IOException io )
  283                   {
  284                       length--;
  285                       exception = io;
  286                   }
  287               }
  288               if( !done )
  289               {
  290                   //if no good stream was found then lets try again but with the
  291                   //length of data that was actually read and not length
  292                   //defined in the dictionary
  293                   length = writtenLength;
  294                   for( int tryCount=0; !done && tryCount<5; tryCount++ )
  295                   {
  296                       try
  297                       {
  298                           input = new BufferedInputStream(
  299                               new RandomAccessFileInputStream( file, position, length ), BUFFER_SIZE );
  300                           unFilteredStream = new RandomAccessFileOutputStream( file );
  301                           filter.decode( input, unFilteredStream, this, filterIndex );
  302                           done = true;
  303                       }
  304                       catch( IOException io )
  305                       {
  306                           length--;
  307                           exception = io;
  308                       }
  309                   }
  310               }
  311           }
  312           if( !done )
  313           {
  314               throw exception;
  315           }
  316       }
  317   
  318       /**
  319        * This will encode the logical byte stream applying all of the filters to the stream.
  320        *
  321        * @throws IOException If there is an error applying a filter to the stream.
  322        */
  323       private void doEncode() throws IOException
  324       {
  325           filteredStream = unFilteredStream;
  326   
  327           COSBase filters = getFilters();
  328           if( filters == null )
  329           {
  330               //there is no filter to apply
  331           }
  332           else if( filters instanceof COSName )
  333           {
  334               doEncode( (COSName)filters, 0 );
  335           }
  336           else if( filters instanceof COSArray )
  337           {
  338               // apply filters in reverse order
  339               COSArray filterArray = (COSArray)filters;
  340               for( int i=filterArray.size()-1; i>=0; i-- )
  341               {
  342                   COSName filterName = (COSName)filterArray.get( i );
  343                   doEncode( filterName, i );
  344               }
  345           }
  346       }
  347   
  348       /**
  349        * This will encode applying a single filter on the stream.
  350        *
  351        * @param filterName The name of the filter.
  352        * @param filterIndex The index to the filter.
  353        *
  354        * @throws IOException If there is an error parsing the stream.
  355        */
  356       private void doEncode( COSName filterName, int filterIndex ) throws IOException
  357       {
  358           FilterManager manager = getFilterManager();
  359           Filter filter = manager.getFilter( filterName );
  360           InputStream input;
  361   
  362           input = new BufferedInputStream(
  363               new RandomAccessFileInputStream( file, filteredStream.getPosition(),
  364                                                      filteredStream.getLength() ), BUFFER_SIZE );
  365           filteredStream = new RandomAccessFileOutputStream( file );
  366           filter.encode( input, filteredStream, this, filterIndex );
  367       }
  368   
  369       /**
  370        * This will return the filters to apply to the byte stream.
  371        * The method will return
  372        * - null if no filters are to be applied
  373        * - a COSName if one filter is to be applied
  374        * - a COSArray containing COSNames if multiple filters are to be applied
  375        *
  376        * @return the COSBase object representing the filters
  377        */
  378       public COSBase getFilters()
  379       {
  380           return getDictionaryObject(COSName.FILTER);
  381       }
  382   
  383       /**
  384        * This will create a new stream for which filtered byte should be
  385        * written to.  You probably don't want this but want to use the
  386        * createUnfilteredStream, which is used to write raw bytes to.
  387        *
  388        * @return A stream that can be written to.
  389        *
  390        * @throws IOException If there is an error creating the stream.
  391        */
  392       public OutputStream createFilteredStream() throws IOException
  393       {
  394           filteredStream = new RandomAccessFileOutputStream( file );
  395           unFilteredStream = null;
  396           return new BufferedOutputStream( filteredStream, BUFFER_SIZE );
  397       }
  398   
  399       /**
  400        * This will create a new stream for which filtered byte should be
  401        * written to.  You probably don't want this but want to use the
  402        * createUnfilteredStream, which is used to write raw bytes to.
  403        *
  404        * @param expectedLength An entry where a length is expected.
  405        *
  406        * @return A stream that can be written to.
  407        *
  408        * @throws IOException If there is an error creating the stream.
  409        */
  410       public OutputStream createFilteredStream( COSBase expectedLength ) throws IOException
  411       {
  412           filteredStream = new RandomAccessFileOutputStream( file );
  413           filteredStream.setExpectedLength( expectedLength );
  414           unFilteredStream = null;
  415           return new BufferedOutputStream( filteredStream, BUFFER_SIZE );
  416       }
  417   
  418       /**
  419        * set the filters to be applied to the stream.
  420        *
  421        * @param filters The filters to set on this stream.
  422        *
  423        * @throws IOException If there is an error clearing the old filters.
  424        */
  425       public void setFilters(COSBase filters) throws IOException
  426       {
  427           setItem(COSName.FILTER, filters);
  428           // kill cached filtered streams
  429           filteredStream = null;
  430       }
  431   
  432       /**
  433        * This will create an output stream that can be written to.
  434        *
  435        * @return An output stream which raw data bytes should be written to.
  436        *
  437        * @throws IOException If there is an error creating the stream.
  438        */
  439       public OutputStream createUnfilteredStream() throws IOException
  440       {
  441           unFilteredStream = new RandomAccessFileOutputStream( file );
  442           filteredStream = null;
  443           return new BufferedOutputStream( unFilteredStream, BUFFER_SIZE );
  444       }
  445   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.cos » [javadoc | source]