1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.cos; 18 19 import java.io.File; 20 import java.io.IOException; 21 22 import java.util.ArrayList; 23 import java.util.HashMap; 24 import java.util.List; 25 import java.util.Map; 26 27 import org.apache.commons.logging.Log; 28 import org.apache.commons.logging.LogFactory; 29 import org.apache.pdfbox.exceptions.COSVisitorException; 30 import org.apache.pdfbox.io.RandomAccess; 31 import org.apache.pdfbox.io.RandomAccessFile; 32 33 import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; 34 import org.apache.pdfbox.pdfparser.PDFXrefStreamParser; 35 import org.apache.pdfbox.persistence.util.COSObjectKey; 36 37 /** 38 * This is the in-memory representation of the PDF document. You need to call 39 * close() on this object when you are done using it!! 40 * 41 * @author <a href="ben@benlitchfield.com">Ben Litchfield</a> 42 * @version $Revision: 1.28 $ 43 */ 44 public class COSDocument extends COSBase 45 { 46 47 /** 48 * Log instance. 49 */ 50 private static final Log log = LogFactory.getLog(COSDocument.class); 51 52 private float version; 53 54 /** 55 * Maps ObjectKeys to a COSObject. Note that references to these objects 56 * are also stored in COSDictionary objects that map a name to a specific object. 57 */ 58 private final Map<COSObjectKey, COSObject> objectPool = 59 new HashMap<COSObjectKey, COSObject>(); 60 61 /** 62 * Maps object and generation ids to object byte offsets. 63 */ 64 private final Map<COSObjectKey, Integer> xrefTable = 65 new HashMap<COSObjectKey, Integer>(); 66 67 /** 68 * Document trailer dictionary. 69 */ 70 private COSDictionary trailer; 71 72 /** 73 * This file will store the streams in order to conserve memory. 74 */ 75 private RandomAccess scratchFile = null; 76 77 private File tmpFile = null; 78 79 private String headerString = "%PDF-1.4"; 80 81 private boolean warnMissingClose = true; 82 83 /** 84 * Constructor. Uses the java.io.tmpdir value to create a file 85 * to store the streams. 86 * 87 * @throws IOException If there is an error creating the tmp file. 88 */ 89 public COSDocument() throws IOException 90 { 91 this( new File( System.getProperty( "java.io.tmpdir" ) ) ); 92 } 93 94 /** 95 * Constructor that will create a create a scratch file in the 96 * following directory. 97 * 98 * @param scratchDir The directory to store a scratch file. 99 * 100 * @throws IOException If there is an error creating the tmp file. 101 */ 102 public COSDocument( File scratchDir ) throws IOException 103 { 104 tmpFile = File.createTempFile( "pdfbox", "tmp", scratchDir ); 105 scratchFile = new RandomAccessFile( tmpFile, "rw" ); 106 } 107 108 /** 109 * Constructor that will use the following random access file for storage 110 * of the PDF streams. The client of this method is responsible for deleting 111 * the storage if necessary that this file will write to. The close method 112 * will close the file though. 113 * 114 * @param file The random access file to use for storage. 115 */ 116 public COSDocument( RandomAccess file ) 117 { 118 scratchFile = file; 119 } 120 121 /** 122 * This will get the scratch file for this document. 123 * 124 * @return The scratch file. 125 */ 126 public RandomAccess getScratchFile() 127 { 128 return scratchFile; 129 } 130 131 /** 132 * This will get the first dictionary object by type. 133 * 134 * @param type The type of the object. 135 * 136 * @return This will return an object with the specified type. 137 * @throws IOException If there is an error getting the object 138 */ 139 public COSObject getObjectByType( String type ) throws IOException 140 { 141 return getObjectByType( COSName.getPDFName( type ) ); 142 } 143 144 /** 145 * This will get the first dictionary object by type. 146 * 147 * @param type The type of the object. 148 * 149 * @return This will return an object with the specified type. 150 * @throws IOException If there is an error getting the object 151 */ 152 public COSObject getObjectByType( COSName type ) throws IOException 153 { 154 for( COSObject object : objectPool.values() ) 155 { 156 157 COSBase realObject = object.getObject(); 158 if( realObject instanceof COSDictionary ) 159 { 160 try 161 { 162 COSDictionary dic = (COSDictionary)realObject; 163 COSName objectType = (COSName)dic.getItem( COSName.TYPE ); 164 if( objectType != null && objectType.equals( type ) ) 165 { 166 return object; 167 } 168 } 169 catch (ClassCastException e) 170 { 171 log.warn(e, e); 172 } 173 } 174 } 175 return null; 176 } 177 178 /** 179 * This will get all dictionary objects by type. 180 * 181 * @param type The type of the object. 182 * 183 * @return This will return an object with the specified type. 184 * @throws IOException If there is an error getting the object 185 */ 186 public List<COSObject> getObjectsByType( String type ) throws IOException 187 { 188 return getObjectsByType( COSName.getPDFName( type ) ); 189 } 190 191 /** 192 * This will get a dictionary object by type. 193 * 194 * @param type The type of the object. 195 * 196 * @return This will return an object with the specified type. 197 * @throws IOException If there is an error getting the object 198 */ 199 public List<COSObject> getObjectsByType( COSName type ) throws IOException 200 { 201 List<COSObject> retval = new ArrayList<COSObject>(); 202 for( COSObject object : objectPool.values() ) 203 { 204 COSBase realObject = object.getObject(); 205 if( realObject instanceof COSDictionary ) 206 { 207 try 208 { 209 COSDictionary dic = (COSDictionary)realObject; 210 COSName objectType = (COSName)dic.getItem( COSName.TYPE ); 211 if( objectType != null && objectType.equals( type ) ) 212 { 213 retval.add( object ); 214 } 215 } 216 catch (ClassCastException e) 217 { 218 log.warn(e, e); 219 } 220 } 221 } 222 return retval; 223 } 224 225 /** 226 * This will print contents to stdout. 227 */ 228 public void print() 229 { 230 for( COSObject object : objectPool.values() ) 231 { 232 System.out.println( object); 233 } 234 } 235 236 /** 237 * This will set the version of this PDF document. 238 * 239 * @param versionValue The version of the PDF document. 240 */ 241 public void setVersion( float versionValue ) 242 { 243 version = versionValue; 244 } 245 246 /** 247 * This will get the version of this PDF document. 248 * 249 * @return This documents version. 250 */ 251 public float getVersion() 252 { 253 return version; 254 } 255 256 /** 257 * This will tell if this is an encrypted document. 258 * 259 * @return true If this document is encrypted. 260 */ 261 public boolean isEncrypted() 262 { 263 boolean encrypted = false; 264 if( trailer != null ) 265 { 266 encrypted = trailer.getDictionaryObject( COSName.ENCRYPT ) != null; 267 } 268 return encrypted; 269 } 270 271 /** 272 * This will get the encryption dictionary if the document is encrypted or null 273 * if the document is not encrypted. 274 * 275 * @return The encryption dictionary. 276 */ 277 public COSDictionary getEncryptionDictionary() 278 { 279 return (COSDictionary)trailer.getDictionaryObject( COSName.ENCRYPT ); 280 } 281 282 /** 283 * This will set the encryption dictionary, this should only be called when 284 * encypting the document. 285 * 286 * @param encDictionary The encryption dictionary. 287 */ 288 public void setEncryptionDictionary( COSDictionary encDictionary ) 289 { 290 trailer.setItem( COSName.ENCRYPT, encDictionary ); 291 } 292 293 /** 294 * This will get the document ID. 295 * 296 * @return The document id. 297 */ 298 public COSArray getDocumentID() 299 { 300 return (COSArray) getTrailer().getItem(COSName.ID); 301 } 302 303 /** 304 * This will set the document ID. 305 * 306 * @param id The document id. 307 */ 308 public void setDocumentID( COSArray id ) 309 { 310 getTrailer().setItem(COSName.ID, id); 311 } 312 313 /** 314 * This will get the document catalog. 315 * 316 * Maybe this should move to an object at PDFEdit level 317 * 318 * @return catalog is the root of all document activities 319 * 320 * @throws IOException If no catalog can be found. 321 */ 322 public COSObject getCatalog() throws IOException 323 { 324 COSObject catalog = getObjectByType( COSName.CATALOG ); 325 if( catalog == null ) 326 { 327 throw new IOException( "Catalog cannot be found" ); 328 } 329 return catalog; 330 } 331 332 /** 333 * This will get a list of all available objects. 334 * 335 * @return A list of all objects. 336 */ 337 public List<COSObject> getObjects() 338 { 339 return new ArrayList<COSObject>(objectPool.values()); 340 } 341 342 /** 343 * This will get the document trailer. 344 * 345 * @return the document trailer dict 346 */ 347 public COSDictionary getTrailer() 348 { 349 return trailer; 350 } 351 352 /** 353 * // MIT added, maybe this should not be supported as trailer is a persistence construct. 354 * This will set the document trailer. 355 * 356 * @param newTrailer the document trailer dictionary 357 */ 358 public void setTrailer(COSDictionary newTrailer) 359 { 360 trailer = newTrailer; 361 } 362 363 /** 364 * visitor pattern double dispatch method. 365 * 366 * @param visitor The object to notify when visiting this object. 367 * @return any object, depending on the visitor implementation, or null 368 * @throws COSVisitorException If an error occurs while visiting this object. 369 */ 370 public Object accept(ICOSVisitor visitor) throws COSVisitorException 371 { 372 return visitor.visitFromDocument( this ); 373 } 374 375 /** 376 * This will close all storage and delete the tmp files. 377 * 378 * @throws IOException If there is an error close resources. 379 */ 380 public void close() throws IOException 381 { 382 if( scratchFile != null ) 383 { 384 scratchFile.close(); 385 scratchFile = null; 386 } 387 if( tmpFile != null ) 388 { 389 tmpFile.delete(); 390 tmpFile = null; 391 } 392 } 393 394 /** 395 * Warn the user in the finalizer if he didn't close the PDF document. The method also 396 * closes the document just in case, to avoid abandoned temporary files. It's still a good 397 * idea for the user to close the PDF document at the earliest possible to conserve resources. 398 * @throws IOException if an error occurs while closing the temporary files 399 */ 400 protected void finalize() throws IOException 401 { 402 if( this.warnMissingClose && ( tmpFile != null || scratchFile != null ) ) 403 { 404 Throwable t = new Throwable( "Warning: You did not close the PDF Document" ); 405 t.printStackTrace(); 406 } 407 close(); 408 } 409 410 /** 411 * Controls whether this instance shall issue a warning if the PDF document wasn't closed 412 * properly through a call to the {@link #close()} method. If the PDF document is held in 413 * a cache governed by soft references it is impossible to reliably close the document 414 * before the warning is raised. By default, the warning is enabled. 415 * @param warn true enables the warning, false disables it. 416 */ 417 public void setWarnMissingClose(boolean warn) 418 { 419 this.warnMissingClose = warn; 420 } 421 422 /** 423 * @return Returns the headerString. 424 */ 425 public String getHeaderString() 426 { 427 return headerString; 428 } 429 /** 430 * @param header The headerString to set. 431 */ 432 public void setHeaderString(String header) 433 { 434 headerString = header; 435 } 436 437 /** 438 * This method will search the list of objects for types of ObjStm. If it finds 439 * them then it will parse out all of the objects from the stream that is contains. 440 * 441 * @throws IOException If there is an error parsing the stream. 442 */ 443 public void dereferenceObjectStreams() throws IOException 444 { 445 for( COSObject objStream : getObjectsByType( "ObjStm" ) ) 446 { 447 COSStream stream = (COSStream)objStream.getObject(); 448 PDFObjectStreamParser parser = new PDFObjectStreamParser( stream, this ); 449 parser.parse(); 450 for( COSObject next : parser.getObjects() ) 451 { 452 COSObjectKey key = new COSObjectKey( next ); 453 COSObject obj = getObjectFromPool( key ); 454 obj.setObject( next.getObject() ); 455 } 456 } 457 } 458 459 /** 460 * This will get an object from the pool. 461 * 462 * @param key The object key. 463 * 464 * @return The object in the pool or a new one if it has not been parsed yet. 465 * 466 * @throws IOException If there is an error getting the proxy object. 467 */ 468 public COSObject getObjectFromPool(COSObjectKey key) throws IOException 469 { 470 COSObject obj = null; 471 if( key != null ) 472 { 473 obj = (COSObject) objectPool.get(key); 474 } 475 if (obj == null) 476 { 477 // this was a forward reference, make "proxy" object 478 obj = new COSObject(null); 479 if( key != null ) 480 { 481 obj.setObjectNumber( COSInteger.get( key.getNumber() ) ); 482 obj.setGenerationNumber( COSInteger.get( key.getGeneration() ) ); 483 objectPool.put(key, obj); 484 } 485 } 486 return obj; 487 } 488 489 /** 490 * Used to populate the XRef HashMap. Will add an Xreftable entry 491 * that maps ObjectKeys to byte offsets in the file. 492 * @param objKey The objkey, with id and gen numbers 493 * @param offset The byte offset in this file 494 */ 495 public void setXRef(COSObjectKey objKey, int offset) 496 { 497 xrefTable.put(objKey, offset); 498 } 499 500 /** 501 * Returns the xrefTable which is a mapping of ObjectKeys 502 * to byte offsets in the file. 503 * @return mapping of ObjectsKeys to byte offsets 504 */ 505 public Map<COSObjectKey, Integer> getXrefTable() 506 { 507 return xrefTable; 508 } 509 510 /** 511 * This method will search the list of objects for types of XRef and 512 * uses the parsed data to populate the trailer information as well as 513 * the xref Map. 514 * 515 * @throws IOException if there is an error parsing the stream 516 */ 517 public void parseXrefStreams() throws IOException 518 { 519 COSDictionary trailerDict = new COSDictionary(); 520 for( COSObject xrefStream : getObjectsByType( "XRef" ) ) 521 { 522 COSStream stream = (COSStream)xrefStream.getObject(); 523 trailerDict.addAll(stream); 524 PDFXrefStreamParser parser = new PDFXrefStreamParser(stream, this); 525 parser.parse(); 526 } 527 setTrailer( trailerDict ); 528 } 529 530 }