Home » pdfbox-1.1.0-src » org.apache.pdfbox.cos » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.pdfbox.cos;
   18   
   19   import java.io.File;
   20   import java.io.IOException;
   21   
   22   import java.util.ArrayList;
   23   import java.util.HashMap;
   24   import java.util.List;
   25   import java.util.Map;
   26   
   27   import org.apache.commons.logging.Log;
   28   import org.apache.commons.logging.LogFactory;
   29   import org.apache.pdfbox.exceptions.COSVisitorException;
   30   import org.apache.pdfbox.io.RandomAccess;
   31   import org.apache.pdfbox.io.RandomAccessFile;
   32   
   33   import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
   34   import org.apache.pdfbox.pdfparser.PDFXrefStreamParser;
   35   import org.apache.pdfbox.persistence.util.COSObjectKey;
   36   
   37   /**
   38    * This is the in-memory representation of the PDF document.  You need to call
   39    * close() on this object when you are done using it!!
   40    *
   41    * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
   42    * @version $Revision: 1.28 $
   43    */
   44   public class COSDocument extends COSBase
   45   {
   46   
   47       /**
   48        * Log instance.
   49        */
   50       private static final Log log = LogFactory.getLog(COSDocument.class);
   51   
   52       private float version;
   53   
   54       /**
   55        * Maps ObjectKeys to a COSObject. Note that references to these objects
   56        * are also stored in COSDictionary objects that map a name to a specific object.
   57        */
   58       private final Map<COSObjectKey, COSObject> objectPool =
   59           new HashMap<COSObjectKey, COSObject>();
   60   
   61       /**
   62        * Maps object and generation ids to object byte offsets.
   63        */
   64       private final Map<COSObjectKey, Integer> xrefTable =
   65           new HashMap<COSObjectKey, Integer>();
   66   
   67       /**
   68        * Document trailer dictionary.
   69        */
   70       private COSDictionary trailer;
   71   
   72       /**
   73        * This file will store the streams in order to conserve memory.
   74        */
   75       private RandomAccess scratchFile = null;
   76   
   77       private File tmpFile = null;
   78   
   79       private String headerString = "%PDF-1.4";
   80   
   81       private boolean warnMissingClose = true;
   82   
   83       /**
   84        * Constructor.  Uses the java.io.tmpdir value to create a file
   85        * to store the streams.
   86        *
   87        *  @throws IOException If there is an error creating the tmp file.
   88        */
   89       public COSDocument() throws IOException
   90       {
   91           this( new File( System.getProperty( "java.io.tmpdir" ) ) );
   92       }
   93   
   94       /**
   95        * Constructor that will create a create a scratch file in the
   96        * following directory.
   97        *
   98        * @param scratchDir The directory to store a scratch file.
   99        *
  100        *  @throws IOException If there is an error creating the tmp file.
  101        */
  102       public COSDocument( File scratchDir ) throws IOException
  103       {
  104           tmpFile = File.createTempFile( "pdfbox", "tmp", scratchDir );
  105           scratchFile = new RandomAccessFile( tmpFile, "rw" );
  106       }
  107   
  108       /**
  109        * Constructor that will use the following random access file for storage
  110        * of the PDF streams.  The client of this method is responsible for deleting
  111        * the storage if necessary that this file will write to.  The close method
  112        * will close the file though.
  113        *
  114        * @param file The random access file to use for storage.
  115        */
  116       public COSDocument( RandomAccess file )
  117       {
  118           scratchFile = file;
  119       }
  120   
  121       /**
  122        * This will get the scratch file for this document.
  123        *
  124        * @return The scratch file.
  125        */
  126       public RandomAccess getScratchFile()
  127       {
  128           return scratchFile;
  129       }
  130   
  131       /**
  132        * This will get the first dictionary object by type.
  133        *
  134        * @param type The type of the object.
  135        *
  136        * @return This will return an object with the specified type.
  137        * @throws IOException If there is an error getting the object
  138        */
  139       public COSObject getObjectByType( String type ) throws IOException
  140       {
  141           return getObjectByType( COSName.getPDFName( type ) );
  142       }
  143   
  144       /**
  145        * This will get the first dictionary object by type.
  146        *
  147        * @param type The type of the object.
  148        *
  149        * @return This will return an object with the specified type.
  150        * @throws IOException If there is an error getting the object
  151        */
  152       public COSObject getObjectByType( COSName type ) throws IOException
  153       {
  154           for( COSObject object : objectPool.values() )
  155           {
  156   
  157               COSBase realObject = object.getObject();
  158               if( realObject instanceof COSDictionary )
  159               {
  160                   try
  161                   {
  162                       COSDictionary dic = (COSDictionary)realObject;
  163                       COSName objectType = (COSName)dic.getItem( COSName.TYPE );
  164                       if( objectType != null && objectType.equals( type ) )
  165                       {
  166                           return object;
  167                       }
  168                   }
  169                   catch (ClassCastException e)
  170                   {
  171                       log.warn(e, e);
  172                   }
  173               }
  174           }
  175           return null;
  176       }
  177   
  178       /**
  179        * This will get all dictionary objects by type.
  180        *
  181        * @param type The type of the object.
  182        *
  183        * @return This will return an object with the specified type.
  184        * @throws IOException If there is an error getting the object
  185        */
  186       public List<COSObject> getObjectsByType( String type ) throws IOException
  187       {
  188           return getObjectsByType( COSName.getPDFName( type ) );
  189       }
  190   
  191       /**
  192        * This will get a dictionary object by type.
  193        *
  194        * @param type The type of the object.
  195        *
  196        * @return This will return an object with the specified type.
  197        * @throws IOException If there is an error getting the object
  198        */
  199       public List<COSObject> getObjectsByType( COSName type ) throws IOException
  200       {
  201           List<COSObject> retval = new ArrayList<COSObject>();
  202           for( COSObject object : objectPool.values() )
  203           {
  204               COSBase realObject = object.getObject();
  205               if( realObject instanceof COSDictionary )
  206               {
  207                   try
  208                   {
  209                       COSDictionary dic = (COSDictionary)realObject;
  210                       COSName objectType = (COSName)dic.getItem( COSName.TYPE );
  211                       if( objectType != null && objectType.equals( type ) )
  212                       {
  213                           retval.add( object );
  214                       }
  215                   }
  216                   catch (ClassCastException e)
  217                   {
  218                       log.warn(e, e);
  219                   }
  220               }
  221           }
  222           return retval;
  223       }
  224   
  225       /**
  226        * This will print contents to stdout.
  227        */
  228       public void print()
  229       {
  230           for( COSObject object : objectPool.values() )
  231           {
  232               System.out.println( object);
  233           }
  234       }
  235   
  236       /**
  237        * This will set the version of this PDF document.
  238        *
  239        * @param versionValue The version of the PDF document.
  240        */
  241       public void setVersion( float versionValue )
  242       {
  243           version = versionValue;
  244       }
  245   
  246       /**
  247        * This will get the version of this PDF document.
  248        *
  249        * @return This documents version.
  250        */
  251       public float getVersion()
  252       {
  253           return version;
  254       }
  255   
  256       /**
  257        * This will tell if this is an encrypted document.
  258        *
  259        * @return true If this document is encrypted.
  260        */
  261       public boolean isEncrypted()
  262       {
  263           boolean encrypted = false;
  264           if( trailer != null )
  265           {
  266               encrypted = trailer.getDictionaryObject( COSName.ENCRYPT ) != null;
  267           }
  268           return encrypted;
  269       }
  270   
  271       /**
  272        * This will get the encryption dictionary if the document is encrypted or null
  273        * if the document is not encrypted.
  274        *
  275        * @return The encryption dictionary.
  276        */
  277       public COSDictionary getEncryptionDictionary()
  278       {
  279           return (COSDictionary)trailer.getDictionaryObject( COSName.ENCRYPT );
  280       }
  281   
  282       /**
  283        * This will set the encryption dictionary, this should only be called when
  284        * encypting the document.
  285        *
  286        * @param encDictionary The encryption dictionary.
  287        */
  288       public void setEncryptionDictionary( COSDictionary encDictionary )
  289       {
  290           trailer.setItem( COSName.ENCRYPT, encDictionary );
  291       }
  292   
  293       /**
  294        * This will get the document ID.
  295        *
  296        * @return The document id.
  297        */
  298       public COSArray getDocumentID()
  299       {
  300           return (COSArray) getTrailer().getItem(COSName.ID);
  301       }
  302   
  303       /**
  304        * This will set the document ID.
  305        *
  306        * @param id The document id.
  307        */
  308       public void setDocumentID( COSArray id )
  309       {
  310           getTrailer().setItem(COSName.ID, id);
  311       }
  312   
  313       /**
  314        * This will get the document catalog.
  315        *
  316        * Maybe this should move to an object at PDFEdit level
  317        *
  318        * @return catalog is the root of all document activities
  319        *
  320        * @throws IOException If no catalog can be found.
  321        */
  322       public COSObject getCatalog() throws IOException
  323       {
  324           COSObject catalog = getObjectByType( COSName.CATALOG );
  325           if( catalog == null )
  326           {
  327               throw new IOException( "Catalog cannot be found" );
  328           }
  329           return catalog;
  330       }
  331   
  332       /**
  333        * This will get a list of all available objects.
  334        *
  335        * @return A list of all objects.
  336        */
  337       public List<COSObject> getObjects()
  338       {
  339           return new ArrayList<COSObject>(objectPool.values());
  340       }
  341   
  342       /**
  343        * This will get the document trailer.
  344        *
  345        * @return the document trailer dict
  346        */
  347       public COSDictionary getTrailer()
  348       {
  349           return trailer;
  350       }
  351   
  352       /**
  353        * // MIT added, maybe this should not be supported as trailer is a persistence construct.
  354        * This will set the document trailer.
  355        *
  356        * @param newTrailer the document trailer dictionary
  357        */
  358       public void setTrailer(COSDictionary newTrailer)
  359       {
  360           trailer = newTrailer;
  361       }
  362   
  363       /**
  364        * visitor pattern double dispatch method.
  365        *
  366        * @param visitor The object to notify when visiting this object.
  367        * @return any object, depending on the visitor implementation, or null
  368        * @throws COSVisitorException If an error occurs while visiting this object.
  369        */
  370       public Object accept(ICOSVisitor visitor) throws COSVisitorException
  371       {
  372           return visitor.visitFromDocument( this );
  373       }
  374   
  375       /**
  376        * This will close all storage and delete the tmp files.
  377        *
  378        *  @throws IOException If there is an error close resources.
  379        */
  380       public void close() throws IOException
  381       {
  382           if( scratchFile != null )
  383           {
  384               scratchFile.close();
  385               scratchFile = null;
  386           }
  387           if( tmpFile != null )
  388           {
  389               tmpFile.delete();
  390               tmpFile = null;
  391           }
  392       }
  393   
  394       /**
  395        * Warn the user in the finalizer if he didn't close the PDF document. The method also
  396        * closes the document just in case, to avoid abandoned temporary files. It's still a good
  397        * idea for the user to close the PDF document at the earliest possible to conserve resources.
  398        * @throws IOException if an error occurs while closing the temporary files
  399        */
  400       protected void finalize() throws IOException
  401       {
  402           if( this.warnMissingClose && ( tmpFile != null || scratchFile != null ) )
  403           {
  404               Throwable t = new Throwable( "Warning: You did not close the PDF Document" );
  405               t.printStackTrace();
  406           }
  407           close();
  408       }
  409   
  410       /**
  411        * Controls whether this instance shall issue a warning if the PDF document wasn't closed
  412        * properly through a call to the {@link #close()} method. If the PDF document is held in
  413        * a cache governed by soft references it is impossible to reliably close the document
  414        * before the warning is raised. By default, the warning is enabled.
  415        * @param warn true enables the warning, false disables it.
  416        */
  417       public void setWarnMissingClose(boolean warn)
  418       {
  419           this.warnMissingClose = warn;
  420       }
  421   
  422       /**
  423        * @return Returns the headerString.
  424        */
  425       public String getHeaderString()
  426       {
  427           return headerString;
  428       }
  429       /**
  430        * @param header The headerString to set.
  431        */
  432       public void setHeaderString(String header)
  433       {
  434           headerString = header;
  435       }
  436   
  437       /**
  438        * This method will search the list of objects for types of ObjStm.  If it finds
  439        * them then it will parse out all of the objects from the stream that is contains.
  440        *
  441        * @throws IOException If there is an error parsing the stream.
  442        */
  443       public void dereferenceObjectStreams() throws IOException
  444       {
  445           for( COSObject objStream : getObjectsByType( "ObjStm" ) )
  446           {
  447               COSStream stream = (COSStream)objStream.getObject();
  448               PDFObjectStreamParser parser = new PDFObjectStreamParser( stream, this );
  449               parser.parse();
  450               for( COSObject next : parser.getObjects() )
  451               {
  452                   COSObjectKey key = new COSObjectKey( next );
  453                   COSObject obj = getObjectFromPool( key );
  454                   obj.setObject( next.getObject() );
  455               }
  456           }
  457       }
  458   
  459       /**
  460        * This will get an object from the pool.
  461        *
  462        * @param key The object key.
  463        *
  464        * @return The object in the pool or a new one if it has not been parsed yet.
  465        *
  466        * @throws IOException If there is an error getting the proxy object.
  467        */
  468       public COSObject getObjectFromPool(COSObjectKey key) throws IOException
  469       {
  470           COSObject obj = null;
  471           if( key != null )
  472           {
  473               obj = (COSObject) objectPool.get(key);
  474           }
  475           if (obj == null)
  476           {
  477               // this was a forward reference, make "proxy" object
  478               obj = new COSObject(null);
  479               if( key != null )
  480               {
  481                   obj.setObjectNumber( COSInteger.get( key.getNumber() ) );
  482                   obj.setGenerationNumber( COSInteger.get( key.getGeneration() ) );
  483                   objectPool.put(key, obj);
  484               }
  485           }
  486           return obj;
  487       }
  488   
  489       /**
  490        * Used to populate the XRef HashMap. Will add an Xreftable entry
  491        * that maps ObjectKeys to byte offsets in the file.
  492        * @param objKey The objkey, with id and gen numbers
  493        * @param offset The byte offset in this file
  494        */
  495       public void setXRef(COSObjectKey objKey, int offset)
  496       {
  497           xrefTable.put(objKey, offset);
  498       }
  499   
  500       /**
  501        * Returns the xrefTable which is a mapping of ObjectKeys
  502        * to byte offsets in the file.
  503        * @return mapping of ObjectsKeys to byte offsets
  504        */
  505       public Map<COSObjectKey, Integer> getXrefTable()
  506       {
  507           return xrefTable;
  508       }
  509   
  510       /**
  511        * This method will search the list of objects for types of XRef and
  512        * uses the parsed data to populate the trailer information as well as
  513        * the xref Map.
  514        *
  515        * @throws IOException if there is an error parsing the stream
  516        */
  517       public void parseXrefStreams() throws IOException
  518       {
  519           COSDictionary trailerDict = new COSDictionary();
  520           for( COSObject xrefStream : getObjectsByType( "XRef" ) )
  521           {
  522               COSStream stream = (COSStream)xrefStream.getObject();
  523               trailerDict.addAll(stream);
  524               PDFXrefStreamParser parser = new PDFXrefStreamParser(stream, this);
  525               parser.parse();
  526           }
  527           setTrailer( trailerDict );
  528       }
  529   
  530   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.cos » [javadoc | source]