1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.pdmodel; 18 19 import java.awt.Dimension; 20 import java.awt.print.PageFormat; 21 import java.awt.print.Pageable; 22 import java.awt.print.Paper; 23 import java.awt.print.Printable; 24 import java.awt.print.PrinterException; 25 import java.awt.print.PrinterJob; 26 import java.io.BufferedInputStream; 27 import java.io.File; 28 import java.io.FileInputStream; 29 import java.io.FileOutputStream; 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.io.OutputStream; 33 import java.net.URL; 34 import java.util.List; 35 import java.util.HashMap; 36 import java.util.Map; 37 38 import org.apache.pdfbox.cos.COSArray; 39 import org.apache.pdfbox.cos.COSBase; 40 import org.apache.pdfbox.cos.COSDictionary; 41 import org.apache.pdfbox.cos.COSDocument; 42 import org.apache.pdfbox.cos.COSInteger; 43 import org.apache.pdfbox.cos.COSName; 44 import org.apache.pdfbox.cos.COSObject; 45 import org.apache.pdfbox.cos.COSStream; 46 import org.apache.pdfbox.exceptions.COSVisitorException; 47 import org.apache.pdfbox.exceptions.CryptographyException; 48 import org.apache.pdfbox.exceptions.InvalidPasswordException; 49 import org.apache.pdfbox.io.RandomAccess; 50 import org.apache.pdfbox.pdfparser.PDFParser; 51 import org.apache.pdfbox.pdfwriter.COSWriter; 52 import org.apache.pdfbox.pdmodel.common.COSArrayList; 53 import org.apache.pdfbox.pdmodel.common.PDStream; 54 import org.apache.pdfbox.pdmodel.encryption.AccessPermission; 55 import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException; 56 import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; 57 import org.apache.pdfbox.pdmodel.encryption.PDEncryptionDictionary; 58 import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy; 59 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; 60 import org.apache.pdfbox.pdmodel.encryption.SecurityHandlersManager; 61 import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; 62 import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy; 63 64 /** 65 * This is the in-memory representation of the PDF document. You need to call 66 * close() on this object when you are done using it!! 67 * 68 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> 69 * @version $Revision: 1.47 $ 70 */ 71 public class PDDocument implements Pageable 72 { 73 private COSDocument document; 74 75 // NOTE BGUILLON: this property must be removed because it is 76 // not the responsability of this class to know 77 //private boolean encryptOnSave = false; 78 79 80 // NOTE BGUILLON: these properties are not used anymore. See getCurrentAccessPermission() instead 81 //private String encryptUserPassword = null; 82 //private String encryptOwnerPassword = null; 83 84 //cached values 85 private PDDocumentInformation documentInformation; 86 private PDDocumentCatalog documentCatalog; 87 88 //The encParameters will be cached here. When the document is decrypted then 89 //the COSDocument will not have an "Encrypt" dictionary anymore and this object 90 //must be used. 91 private PDEncryptionDictionary encParameters = null; 92 93 /** 94 * This will tell if the document was decrypted with the master password. 95 * NOTE BGUILLON: this property is not used anymore. See getCurrentAccessPermission() instead 96 */ 97 //private boolean decryptedWithOwnerPassword = false; 98 99 100 /** 101 * The security handler used to decrypt / encrypt the document. 102 */ 103 private SecurityHandler securityHandler = null; 104 105 106 /** 107 * This assocates object ids with a page number. It's used to determine 108 * the page number for bookmarks (or page numbers for anything else for 109 * which you have an object id for that matter). 110 */ 111 private Map pageMap = null; 112 113 /** 114 * This will hold a flag which tells us if we should remove all security 115 * from this documents 116 */ 117 private boolean allSecurityToBeRemoved = false; 118 119 /** 120 * Constructor, creates a new PDF Document with no pages. You need to add 121 * at least one page for the document to be valid. 122 * 123 * @throws IOException If there is an error creating this document. 124 */ 125 public PDDocument() throws IOException 126 { 127 document = new COSDocument(); 128 129 //First we need a trailer 130 COSDictionary trailer = new COSDictionary(); 131 document.setTrailer( trailer ); 132 133 //Next we need the root dictionary. 134 COSDictionary rootDictionary = new COSDictionary(); 135 trailer.setItem( COSName.ROOT, rootDictionary ); 136 rootDictionary.setItem( COSName.TYPE, COSName.CATALOG ); 137 rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) ); 138 139 //next we need the pages tree structure 140 COSDictionary pages = new COSDictionary(); 141 rootDictionary.setItem( COSName.PAGES, pages ); 142 pages.setItem( COSName.TYPE, COSName.PAGES ); 143 COSArray kidsArray = new COSArray(); 144 pages.setItem( COSName.KIDS, kidsArray ); 145 pages.setItem( COSName.COUNT, COSInteger.ZERO ); 146 } 147 148 private void generatePageMap() 149 { 150 pageMap = new HashMap(); 151 // these page nodes could be references to pages, 152 // or references to arrays which have references to pages 153 // or references to arrays which have references to arrays which have references to pages 154 // or ... (I think you get the idea...) 155 COSArray pageNodes = ((COSArrayList)(getDocumentCatalog().getPages().getKids())).toList(); 156 157 for(int arrayCounter=0; arrayCounter < pageNodes.size(); ++arrayCounter) 158 { 159 parseCatalogObject((COSObject)pageNodes.get(arrayCounter)); 160 } 161 } 162 163 /** 164 * This will either add the page passed in, or, if it's a pointer to an array 165 * of pages, it'll recursivly call itself and process everything in the list. 166 */ 167 private void parseCatalogObject(COSObject thePageOrArrayObject) 168 { 169 COSBase arrayCountBase = thePageOrArrayObject.getItem(COSName.COUNT); 170 int arrayCount = -1; 171 if(arrayCountBase instanceof COSInteger) 172 { 173 arrayCount = ((COSInteger)arrayCountBase).intValue(); 174 } 175 176 COSBase kidsBase = thePageOrArrayObject.getItem(COSName.KIDS); 177 int kidsCount = -1; 178 if(kidsBase instanceof COSArray) 179 { 180 kidsCount = ((COSArray)kidsBase).size(); 181 } 182 183 if(arrayCount == -1 || kidsCount == -1) 184 { 185 // these cases occur when we have a page, not an array of pages 186 String objStr = String.valueOf(thePageOrArrayObject.getObjectNumber().intValue()); 187 String genStr = String.valueOf(thePageOrArrayObject.getGenerationNumber().intValue()); 188 getPageMap().put(objStr+","+genStr, new Integer(getPageMap().size()+1)); 189 } 190 else 191 { 192 // we either have an array of page pointers, or an array of arrays 193 if(arrayCount == kidsCount) 194 { 195 // process the kids... they're all references to pages 196 COSArray kidsArray = ((COSArray)kidsBase); 197 for(int i=0; i<kidsArray.size(); ++i) 198 { 199 COSObject thisObject = (COSObject)kidsArray.get(i); 200 String objStr = String.valueOf(thisObject.getObjectNumber().intValue()); 201 String genStr = String.valueOf(thisObject.getGenerationNumber().intValue()); 202 getPageMap().put(objStr+","+genStr, new Integer(getPageMap().size()+1)); 203 } 204 } 205 else 206 { 207 // this object is an array of references to other arrays 208 COSArray list = null; 209 if(kidsBase instanceof COSArray) 210 { 211 list = ((COSArray)kidsBase); 212 } 213 if(list != null) 214 { 215 for(int arrayCounter=0; arrayCounter < list.size(); ++arrayCounter) 216 { 217 parseCatalogObject((COSObject)list.get(arrayCounter)); 218 } 219 } 220 } 221 } 222 } 223 224 /** 225 * This will return the Map containing the mapping from object-ids to pagenumbers. 226 * 227 * @return the pageMap 228 */ 229 public final Map getPageMap() 230 { 231 if (pageMap == null) 232 { 233 generatePageMap(); 234 } 235 return pageMap; 236 } 237 238 /** 239 * This will add a page to the document. This is a convenience method, that 240 * will add the page to the root of the hierarchy and set the parent of the 241 * page to the root. 242 * 243 * @param page The page to add to the document. 244 */ 245 public void addPage( PDPage page ) 246 { 247 PDPageNode rootPages = getDocumentCatalog().getPages(); 248 rootPages.getKids().add( page ); 249 page.setParent( rootPages ); 250 rootPages.updateCount(); 251 } 252 253 /** 254 * Remove the page from the document. 255 * 256 * @param page The page to remove from the document. 257 * 258 * @return true if the page was found false otherwise. 259 */ 260 public boolean removePage( PDPage page ) 261 { 262 PDPageNode parent = page.getParent(); 263 boolean retval = parent.getKids().remove( page ); 264 if( retval ) 265 { 266 //do a recursive updateCount starting at the root 267 //of the document 268 getDocumentCatalog().getPages().updateCount(); 269 } 270 return retval; 271 } 272 273 /** 274 * Remove the page from the document. 275 * 276 * @param pageNumber 0 based index to page number. 277 * @return true if the page was found false otherwise. 278 */ 279 public boolean removePage( int pageNumber ) 280 { 281 boolean removed = false; 282 List allPages = getDocumentCatalog().getAllPages(); 283 if( allPages.size() > pageNumber) 284 { 285 PDPage page = (PDPage)allPages.get( pageNumber ); 286 removed = removePage( page ); 287 } 288 return removed; 289 } 290 291 /** 292 * This will import and copy the contents from another location. Currently 293 * the content stream is stored in a scratch file. The scratch file is 294 * associated with the document. If you are adding a page to this document 295 * from another document and want to copy the contents to this document's 296 * scratch file then use this method otherwise just use the addPage method. 297 * 298 * @param page The page to import. 299 * @return The page that was imported. 300 * 301 * @throws IOException If there is an error copying the page. 302 */ 303 public PDPage importPage( PDPage page ) throws IOException 304 { 305 PDPage importedPage = new PDPage( new COSDictionary( page.getCOSDictionary() ) ); 306 InputStream is = null; 307 OutputStream os = null; 308 try 309 { 310 PDStream src = page.getContents(); 311 PDStream dest = new PDStream( new COSStream( src.getStream(), document.getScratchFile() ) ); 312 importedPage.setContents( dest ); 313 os = dest.createOutputStream(); 314 315 byte[] buf = new byte[10240]; 316 int amountRead = 0; 317 is = src.createInputStream(); 318 while((amountRead = is.read(buf,0,10240)) > -1) 319 { 320 os.write(buf, 0, amountRead); 321 } 322 addPage( importedPage ); 323 } 324 finally 325 { 326 if( is != null ) 327 { 328 is.close(); 329 } 330 if( os != null ) 331 { 332 os.close(); 333 } 334 } 335 return importedPage; 336 337 } 338 339 /** 340 * Constructor that uses an existing document. The COSDocument that 341 * is passed in must be valid. 342 * 343 * @param doc The COSDocument that this document wraps. 344 */ 345 public PDDocument( COSDocument doc ) 346 { 347 document = doc; 348 } 349 350 /** 351 * This will get the low level document. 352 * 353 * @return The document that this layer sits on top of. 354 */ 355 public COSDocument getDocument() 356 { 357 return document; 358 } 359 360 /** 361 * This will get the document info dictionary. This is guaranteed to not return null. 362 * 363 * @return The documents /Info dictionary 364 */ 365 public PDDocumentInformation getDocumentInformation() 366 { 367 if( documentInformation == null ) 368 { 369 COSDictionary trailer = document.getTrailer(); 370 COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.INFO ); 371 if( infoDic == null ) 372 { 373 infoDic = new COSDictionary(); 374 trailer.setItem( COSName.INFO, infoDic ); 375 } 376 documentInformation = new PDDocumentInformation( infoDic ); 377 } 378 return documentInformation; 379 } 380 381 /** 382 * This will set the document information for this document. 383 * 384 * @param info The updated document information. 385 */ 386 public void setDocumentInformation( PDDocumentInformation info ) 387 { 388 documentInformation = info; 389 document.getTrailer().setItem( COSName.INFO, info.getDictionary() ); 390 } 391 392 /** 393 * This will get the document CATALOG. This is guaranteed to not return null. 394 * 395 * @return The documents /Root dictionary 396 */ 397 public PDDocumentCatalog getDocumentCatalog() 398 { 399 if( documentCatalog == null ) 400 { 401 COSDictionary trailer = document.getTrailer(); 402 COSDictionary infoDic = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT ); 403 if( infoDic == null ) 404 { 405 documentCatalog = new PDDocumentCatalog( this ); 406 } 407 else 408 { 409 documentCatalog = new PDDocumentCatalog( this, infoDic ); 410 } 411 412 } 413 return documentCatalog; 414 } 415 416 /** 417 * This will tell if this document is encrypted or not. 418 * 419 * @return true If this document is encrypted. 420 */ 421 public boolean isEncrypted() 422 { 423 return document.isEncrypted(); 424 } 425 426 /** 427 * This will get the encryption dictionary for this document. This will still 428 * return the parameters if the document was decrypted. If the document was 429 * never encrypted then this will return null. As the encryption architecture 430 * in PDF documents is plugable this returns an abstract class, but the only 431 * supported subclass at this time is a PDStandardEncryption object. 432 * 433 * @return The encryption dictionary(most likely a PDStandardEncryption object) 434 * 435 * @throws IOException If there is an error determining which security handler to use. 436 */ 437 public PDEncryptionDictionary getEncryptionDictionary() throws IOException 438 { 439 if( encParameters == null ) 440 { 441 if( isEncrypted() ) 442 { 443 encParameters = new PDEncryptionDictionary(document.getEncryptionDictionary()); 444 } 445 } 446 return encParameters; 447 } 448 449 /** 450 * This will set the encryption dictionary for this document. 451 * 452 * @param encDictionary The encryption dictionary(most likely a PDStandardEncryption object) 453 * 454 * @throws IOException If there is an error determining which security handler to use. 455 */ 456 public void setEncryptionDictionary( PDEncryptionDictionary encDictionary ) throws IOException 457 { 458 encParameters = encDictionary; 459 } 460 461 /** 462 * This will determine if this is the user password. This only applies when 463 * the document is encrypted and uses standard encryption. 464 * 465 * @param password The plain text user password. 466 * 467 * @return true If the password passed in matches the user password used to encrypt the document. 468 * 469 * @throws IOException If there is an error determining if it is the user password. 470 * @throws CryptographyException If there is an error in the encryption algorithms. 471 * 472 * @deprecated 473 */ 474 public boolean isUserPassword( String password ) throws IOException, CryptographyException 475 { 476 return false; 477 /*boolean retval = false; 478 if( password == null ) 479 { 480 password = ""; 481 } 482 PDFEncryption encryptor = new PDFEncryption(); 483 PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary(); 484 if( encryptionDictionary == null ) 485 { 486 throw new IOException( "Error: Document is not encrypted" ); 487 } 488 else 489 { 490 if( encryptionDictionary instanceof PDStandardEncryption ) 491 { 492 COSString documentID = (COSString)document.getDocumentID().get(0); 493 PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary; 494 retval = encryptor.isUserPassword( 495 password.getBytes(), 496 standard.getUserKey(), 497 standard.getOwnerKey(), 498 standard.getPermissions(), 499 documentID.getBytes(), 500 standard.getRevision(), 501 standard.getLength()/8 ); 502 } 503 else 504 { 505 throw new IOException( "Error: Encyption dictionary is not 'Standard'" + 506 encryptionDictionary.getClass().getName() ); 507 } 508 } 509 return retval;*/ 510 } 511 512 /** 513 * This will determine if this is the owner password. This only applies when 514 * the document is encrypted and uses standard encryption. 515 * 516 * @param password The plain text owner password. 517 * 518 * @return true If the password passed in matches the owner password used to encrypt the document. 519 * 520 * @throws IOException If there is an error determining if it is the user password. 521 * @throws CryptographyException If there is an error in the encryption algorithms. 522 * 523 * @deprecated 524 */ 525 public boolean isOwnerPassword( String password ) throws IOException, CryptographyException 526 { 527 return false; 528 /*boolean retval = false; 529 if( password == null ) 530 { 531 password = ""; 532 } 533 PDFEncryption encryptor = new PDFEncryption(); 534 PDEncryptionDictionary encryptionDictionary = getEncryptionDictionary(); 535 if( encryptionDictionary == null ) 536 { 537 throw new IOException( "Error: Document is not encrypted" ); 538 } 539 else 540 { 541 if( encryptionDictionary instanceof PDStandardEncryption ) 542 { 543 COSString documentID = (COSString)document.getDocumentID().get( 0 ); 544 PDStandardEncryption standard = (PDStandardEncryption)encryptionDictionary; 545 retval = encryptor.isOwnerPassword( 546 password.getBytes(), 547 standard.getUserKey(), 548 standard.getOwnerKey(), 549 standard.getPermissions(), 550 documentID.getBytes(), 551 standard.getRevision(), 552 standard.getLength()/8 ); 553 } 554 else 555 { 556 throw new IOException( "Error: Encyption dictionary is not 'Standard'" + 557 encryptionDictionary.getClass().getName() ); 558 } 559 } 560 return retval;*/ 561 } 562 563 /** 564 * This will decrypt a document. This method is provided for compatibility reasons only. User should use 565 * the new security layer instead and the openProtection method especially. 566 * 567 * @param password Either the user or owner password. 568 * 569 * @throws CryptographyException If there is an error decrypting the document. 570 * @throws IOException If there is an error getting the stream data. 571 * @throws InvalidPasswordException If the password is not a user or owner password. 572 * 573 */ 574 public void decrypt( String password ) throws CryptographyException, IOException, InvalidPasswordException 575 { 576 try 577 { 578 StandardDecryptionMaterial m = new StandardDecryptionMaterial(password); 579 this.openProtection(m); 580 document.dereferenceObjectStreams(); 581 } 582 catch(BadSecurityHandlerException e) 583 { 584 throw new CryptographyException(e); 585 } 586 } 587 588 /** 589 * This will tell if the document was decrypted with the master password. This 590 * entry is invalid if the PDF was not decrypted. 591 * 592 * @return true if the pdf was decrypted with the master password. 593 * 594 * @deprecated use <code>getCurrentAccessPermission</code> instead 595 */ 596 public boolean wasDecryptedWithOwnerPassword() 597 { 598 return false; 599 } 600 601 /** 602 * This will <b>mark</b> a document to be encrypted. The actual encryption 603 * will occur when the document is saved. 604 * This method is provided for compatibility reasons only. User should use 605 * the new security layer instead and the openProtection method especially. 606 * 607 * @param ownerPassword The owner password to encrypt the document. 608 * @param userPassword The user password to encrypt the document. 609 * 610 * @throws CryptographyException If an error occurs during encryption. 611 * @throws IOException If there is an error accessing the data. 612 * 613 */ 614 public void encrypt( String ownerPassword, String userPassword ) 615 throws CryptographyException, IOException 616 { 617 try 618 { 619 StandardProtectionPolicy policy = 620 new StandardProtectionPolicy(ownerPassword, userPassword, new AccessPermission()); 621 this.protect(policy); 622 } 623 catch(BadSecurityHandlerException e) 624 { 625 throw new CryptographyException(e); 626 } 627 } 628 629 630 /** 631 * The owner password that was passed into the encrypt method. You should 632 * never use this method. This will not longer be valid once encryption 633 * has occured. 634 * 635 * @return The owner password passed to the encrypt method. 636 * 637 * @deprecated Do not rely on this method anymore. 638 */ 639 public String getOwnerPasswordForEncryption() 640 { 641 return null; 642 } 643 644 /** 645 * The user password that was passed into the encrypt method. You should 646 * never use this method. This will not longer be valid once encryption 647 * has occured. 648 * 649 * @return The user password passed to the encrypt method. 650 * 651 * @deprecated Do not rely on this method anymore. 652 */ 653 public String getUserPasswordForEncryption() 654 { 655 return null; 656 } 657 658 /** 659 * Internal method do determine if the document will be encrypted when it is saved. 660 * 661 * @return True if encrypt has been called and the document 662 * has not been saved yet. 663 * 664 * @deprecated Do not rely on this method anymore. It is the responsibility of 665 * COSWriter to hold this state 666 */ 667 public boolean willEncryptWhenSaving() 668 { 669 return false; 670 } 671 672 /** 673 * This shoule only be called by the COSWriter after encryption has completed. 674 * 675 * @deprecated Do not rely on this method anymore. It is the responsability of 676 * COSWriter to hold this state. 677 */ 678 public void clearWillEncryptWhenSaving() 679 { 680 //method is deprecated. 681 } 682 683 /** 684 * This will load a document from a url. 685 * 686 * @param url The url to load the PDF from. 687 * 688 * @return The document that was loaded. 689 * 690 * @throws IOException If there is an error reading from the stream. 691 */ 692 public static PDDocument load( URL url ) throws IOException 693 { 694 return load( url.openStream() ); 695 } 696 /** 697 * This will load a document from a url. Used for skipping corrupt 698 * pdf objects 699 * 700 * @param url The url to load the PDF from. 701 * @param force When true, the parser will skip corrupt pdf objects and 702 * will continue parsing at the next object in the file 703 * 704 * @return The document that was loaded. 705 * 706 * @throws IOException If there is an error reading from the stream. 707 */ 708 public static PDDocument load(URL url, boolean force) throws IOException 709 { 710 return load(url.openStream(), force); 711 } 712 713 /** 714 * This will load a document from a url. 715 * 716 * @param url The url to load the PDF from. 717 * @param scratchFile A location to store temp PDFBox data for this document. 718 * 719 * @return The document that was loaded. 720 * 721 * @throws IOException If there is an error reading from the stream. 722 */ 723 public static PDDocument load( URL url, RandomAccess scratchFile ) throws IOException 724 { 725 return load( url.openStream(), scratchFile ); 726 } 727 728 /** 729 * This will load a document from a file. 730 * 731 * @param filename The name of the file to load. 732 * 733 * @return The document that was loaded. 734 * 735 * @throws IOException If there is an error reading from the stream. 736 */ 737 public static PDDocument load( String filename ) throws IOException 738 { 739 return load( new FileInputStream( filename ) ); 740 } 741 742 /** 743 * This will load a document from a file. Allows for skipping corrupt pdf 744 * objects 745 * 746 * @param filename The name of the file to load. 747 * @param force When true, the parser will skip corrupt pdf objects and 748 * will continue parsing at the next object in the file 749 * 750 * @return The document that was loaded. 751 * 752 * @throws IOException If there is an error reading from the stream. 753 */ 754 public static PDDocument load(String filename, boolean force) throws IOException 755 { 756 return load(new FileInputStream( filename ), force); 757 } 758 759 /** 760 * This will load a document from a file. 761 * 762 * @param filename The name of the file to load. 763 * @param scratchFile A location to store temp PDFBox data for this document. 764 * 765 * @return The document that was loaded. 766 * 767 * @throws IOException If there is an error reading from the stream. 768 */ 769 public static PDDocument load( String filename, RandomAccess scratchFile ) throws IOException 770 { 771 return load( new FileInputStream( filename ), scratchFile ); 772 } 773 774 /** 775 * This will load a document from a file. 776 * 777 * @param file The name of the file to load. 778 * 779 * @return The document that was loaded. 780 * 781 * @throws IOException If there is an error reading from the stream. 782 */ 783 public static PDDocument load( File file ) throws IOException 784 { 785 return load( new FileInputStream( file ) ); 786 } 787 788 /** 789 * This will load a document from a file. 790 * 791 * @param file The name of the file to load. 792 * @param scratchFile A location to store temp PDFBox data for this document. 793 * 794 * @return The document that was loaded. 795 * 796 * @throws IOException If there is an error reading from the stream. 797 */ 798 public static PDDocument load( File file, RandomAccess scratchFile ) throws IOException 799 { 800 return load( new FileInputStream( file ) ); 801 } 802 803 /** 804 * This will load a document from an input stream. 805 * 806 * @param input The stream that contains the document. 807 * 808 * @return The document that was loaded. 809 * 810 * @throws IOException If there is an error reading from the stream. 811 */ 812 public static PDDocument load( InputStream input ) throws IOException 813 { 814 return load( input, null ); 815 } 816 817 /** 818 * This will load a document from an input stream. 819 * Allows for skipping corrupt pdf objects 820 * 821 * @param input The stream that contains the document. 822 * @param force When true, the parser will skip corrupt pdf objects and 823 * will continue parsing at the next object in the file 824 * 825 * @return The document that was loaded. 826 * 827 * @throws IOException If there is an error reading from the stream. 828 */ 829 public static PDDocument load(InputStream input, boolean force) throws IOException 830 { 831 return load(input, null, force); 832 } 833 834 /** 835 * This will load a document from an input stream. 836 * 837 * @param input The stream that contains the document. 838 * @param scratchFile A location to store temp PDFBox data for this document. 839 * 840 * @return The document that was loaded. 841 * 842 * @throws IOException If there is an error reading from the stream. 843 */ 844 public static PDDocument load( InputStream input, RandomAccess scratchFile ) throws IOException 845 { 846 PDFParser parser = new PDFParser( new BufferedInputStream( input ), scratchFile ); 847 parser.parse(); 848 return parser.getPDDocument(); 849 } 850 851 /** 852 * This will load a document from an input stream. Allows for skipping corrupt pdf objects 853 * 854 * @param input The stream that contains the document. 855 * @param scratchFile A location to store temp PDFBox data for this document. 856 * @param force When true, the parser will skip corrupt pdf objects and 857 * will continue parsing at the next object in the file 858 * 859 * @return The document that was loaded. 860 * 861 * @throws IOException If there is an error reading from the stream. 862 */ 863 public static PDDocument load(InputStream input, RandomAccess scratchFile, boolean force) throws IOException 864 { 865 PDFParser parser = new PDFParser( new BufferedInputStream( input ), scratchFile, force); 866 parser.parse(); 867 return parser.getPDDocument(); 868 } 869 870 /** 871 * This will save this document to the filesystem. 872 * 873 * @param fileName The file to save as. 874 * 875 * @throws IOException If there is an error saving the document. 876 * @throws COSVisitorException If an error occurs while generating the data. 877 */ 878 public void save( String fileName ) throws IOException, COSVisitorException 879 { 880 save( new FileOutputStream( fileName ) ); 881 } 882 883 /** 884 * This will save the document to an output stream. 885 * 886 * @param output The stream to write to. 887 * 888 * @throws IOException If there is an error writing the document. 889 * @throws COSVisitorException If an error occurs while generating the data. 890 */ 891 public void save( OutputStream output ) throws IOException, COSVisitorException 892 { 893 //update the count in case any pages have been added behind the scenes. 894 getDocumentCatalog().getPages().updateCount(); 895 COSWriter writer = null; 896 try 897 { 898 writer = new COSWriter( output ); 899 writer.write( this ); 900 writer.close(); 901 } 902 finally 903 { 904 if( writer != null ) 905 { 906 writer.close(); 907 } 908 } 909 } 910 911 /** 912 * This will return the total page count of the PDF document. Note: This method 913 * is deprecated in favor of the getNumberOfPages method. The getNumberOfPages is 914 * a required interface method of the Pageable interface. This method will 915 * be removed in a future version of PDFBox!! 916 * 917 * @return The total number of pages in the PDF document. 918 * @deprecated Use the getNumberOfPages method instead! 919 */ 920 public int getPageCount() 921 { 922 return getNumberOfPages(); 923 } 924 925 /** 926 * {@inheritDoc} 927 */ 928 public int getNumberOfPages() 929 { 930 PDDocumentCatalog cat = getDocumentCatalog(); 931 return (int)cat.getPages().getCount(); 932 } 933 934 /** 935 * {@inheritDoc} 936 */ 937 public PageFormat getPageFormat(int pageIndex) 938 { 939 PDPage page = (PDPage)getDocumentCatalog().getAllPages().get( pageIndex ); 940 Dimension mediaBox = page.findMediaBox().createDimension(); 941 Dimension cropBox = page.findCropBox().createDimension(); 942 double diffWidth = 0; 943 double diffHeight = 0; 944 double mediaWidth = mediaBox.getWidth(); 945 double mediaHeight = mediaBox.getHeight(); 946 double cropWidth = cropBox.getWidth(); 947 double cropHeight = cropBox.getHeight(); 948 // we have to center the ImageableArea if the cropBox is smaller than the mediaBox 949 if (!mediaBox.equals(cropBox)) 950 { 951 diffWidth = (mediaWidth - cropWidth)/2; 952 diffHeight = (mediaHeight - cropHeight)/2; 953 } 954 Paper paper = new Paper(); 955 paper.setImageableArea( diffWidth, diffHeight, cropWidth, cropHeight); 956 paper.setSize( mediaWidth, mediaHeight ); 957 PageFormat format = new PageFormat(); 958 format.setPaper( paper ); 959 return format; 960 } 961 962 /** 963 * {@inheritDoc} 964 */ 965 public Printable getPrintable(int pageIndex) 966 { 967 return (Printable)getDocumentCatalog().getAllPages().get( pageIndex ); 968 } 969 970 /** 971 * @see PDDocument#print() 972 * 973 * @param printJob The printer job. 974 * 975 * @throws PrinterException If there is an error while sending the PDF to 976 * the printer, or you do not have permissions to print this document. 977 */ 978 public void print(PrinterJob printJob) throws PrinterException 979 { 980 if(printJob == null) 981 { 982 throw new PrinterException( "The delivered printJob is null." ); 983 } 984 AccessPermission currentPermissions = this.getCurrentAccessPermission(); 985 986 if(!currentPermissions.canPrint()) 987 { 988 throw new PrinterException( "You do not have permission to print this document." ); 989 } 990 printJob.setPageable(this); 991 if( printJob.printDialog() ) 992 { 993 printJob.print(); 994 } 995 } 996 997 /** 998 * This will send the PDF document to a printer. The printing functionality 999 * depends on the org.apache.pdfbox.pdfviewer.PageDrawer functionality. The PageDrawer 1000 * is a work in progress and some PDFs will print correctly and some will 1001 * not. This is a convenience method to create the java.awt.print.PrinterJob. 1002 * The PDDocument implements the java.awt.print.Pageable interface and 1003 * PDPage implementes the java.awt.print.Printable interface, so advanced printing 1004 * capabilities can be done by using those interfaces instead of this method. 1005 * 1006 * @throws PrinterException If there is an error while sending the PDF to 1007 * the printer, or you do not have permissions to print this document. 1008 */ 1009 public void print() throws PrinterException 1010 { 1011 print( PrinterJob.getPrinterJob() ); 1012 } 1013 1014 /** 1015 * This will send the PDF to the default printer without prompting the user 1016 * for any printer settings. 1017 * 1018 * @see PDDocument#print() 1019 * 1020 * @throws PrinterException If there is an error while printing. 1021 */ 1022 public void silentPrint() throws PrinterException 1023 { 1024 silentPrint( PrinterJob.getPrinterJob() ); 1025 } 1026 1027 /** 1028 * This will send the PDF to the default printer without prompting the user 1029 * for any printer settings. 1030 * 1031 * @param printJob A printer job definition. 1032 * @see PDDocument#print() 1033 * 1034 * @throws PrinterException If there is an error while printing. 1035 */ 1036 public void silentPrint( PrinterJob printJob ) throws PrinterException 1037 { 1038 if(printJob == null) 1039 { 1040 throw new PrinterException( "The delivered printJob is null." ); 1041 } 1042 AccessPermission currentPermissions = this.getCurrentAccessPermission(); 1043 1044 if(!currentPermissions.canPrint()) 1045 { 1046 throw new PrinterException( "You do not have permission to print this document." ); 1047 } 1048 printJob.setPageable(this); 1049 printJob.print(); 1050 } 1051 1052 /** 1053 * This will close the underlying COSDocument object. 1054 * 1055 * @throws IOException If there is an error releasing resources. 1056 */ 1057 public void close() throws IOException 1058 { 1059 document.close(); 1060 } 1061 1062 1063 /** 1064 * Protects the document with the protection policy pp. The document content will be really encrypted 1065 * when it will be saved. This method only marks the document for encryption. 1066 * 1067 * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy 1068 * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy 1069 * 1070 * @param pp The protection policy. 1071 * 1072 * @throws BadSecurityHandlerException If there is an error during protection. 1073 */ 1074 public void protect(ProtectionPolicy pp) throws BadSecurityHandlerException 1075 { 1076 SecurityHandler handler = SecurityHandlersManager.getInstance().getSecurityHandler(pp); 1077 securityHandler = handler; 1078 } 1079 1080 /** 1081 * Tries to decrypt the document in memory using the provided decryption material. 1082 * 1083 * @see org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial 1084 * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial 1085 * 1086 * @param pm The decryption material (password or certificate). 1087 * 1088 * @throws BadSecurityHandlerException If there is an error during decryption. 1089 * @throws IOException If there is an error reading cryptographic information. 1090 * @throws CryptographyException If there is an error during decryption. 1091 */ 1092 public void openProtection(DecryptionMaterial pm) 1093 throws BadSecurityHandlerException, IOException, CryptographyException 1094 { 1095 PDEncryptionDictionary dict = this.getEncryptionDictionary(); 1096 if(dict.getFilter() != null) 1097 { 1098 securityHandler = SecurityHandlersManager.getInstance().getSecurityHandler(dict.getFilter()); 1099 securityHandler.decryptDocument(this, pm); 1100 document.dereferenceObjectStreams(); 1101 document.setEncryptionDictionary( null ); 1102 } 1103 else 1104 { 1105 throw new RuntimeException("This document does not need to be decrypted"); 1106 } 1107 } 1108 1109 /** 1110 * Returns the access permissions granted when the document was decrypted. 1111 * If the document was not decrypted this method returns the access permission 1112 * for a document owner (ie can do everything). 1113 * The returned object is in read only mode so that permissions cannot be changed. 1114 * Methods providing access to content should rely on this object to verify if the current 1115 * user is allowed to proceed. 1116 * 1117 * @return the access permissions for the current user on the document. 1118 */ 1119 1120 public AccessPermission getCurrentAccessPermission() 1121 { 1122 if(this.securityHandler == null) 1123 { 1124 return AccessPermission.getOwnerAccessPermission(); 1125 } 1126 return securityHandler.getCurrentAccessPermission(); 1127 } 1128 1129 /** 1130 * Get the security handler that is used for document encryption. 1131 * 1132 * @return The handler used to encrypt/decrypt the document. 1133 */ 1134 public SecurityHandler getSecurityHandler() 1135 { 1136 return securityHandler; 1137 } 1138 1139 public boolean isAllSecurityToBeRemoved() { 1140 return allSecurityToBeRemoved; 1141 } 1142 1143 public void setAllSecurityToBeRemoved(boolean allSecurityToBeRemoved) { 1144 this.allSecurityToBeRemoved = allSecurityToBeRemoved; 1145 } 1146 1147 }