1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.pdmodel; 18 19 import org.apache.commons.logging.Log; 20 import org.apache.commons.logging.LogFactory; 21 import org.apache.pdfbox.cos.COSArray; 22 import org.apache.pdfbox.cos.COSBase; 23 import org.apache.pdfbox.cos.COSDictionary; 24 import org.apache.pdfbox.cos.COSName; 25 import org.apache.pdfbox.cos.COSNumber; 26 import org.apache.pdfbox.cos.COSStream; 27 28 import org.apache.pdfbox.pdfviewer.PageDrawer; 29 import org.apache.pdfbox.pdmodel.common.COSArrayList; 30 import org.apache.pdfbox.pdmodel.common.COSObjectable; 31 import org.apache.pdfbox.pdmodel.common.PDMetadata; 32 import org.apache.pdfbox.pdmodel.common.PDRectangle; 33 import org.apache.pdfbox.pdmodel.common.PDStream; 34 import org.apache.pdfbox.pdmodel.interactive.action.PDPageAdditionalActions; 35 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; 36 import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead; 37 38 import java.awt.Color; 39 import java.awt.Dimension; 40 import java.awt.Graphics; 41 import java.awt.Graphics2D; 42 import java.awt.image.BufferedImage; 43 import java.awt.image.ImagingOpException; 44 import java.awt.print.PageFormat; 45 import java.awt.print.Printable; 46 import java.awt.print.PrinterException; 47 import java.awt.print.PrinterIOException; 48 import java.io.IOException; 49 50 import java.util.ArrayList; 51 import java.util.Calendar; 52 import java.util.GregorianCalendar; 53 import java.util.List; 54 55 /** 56 * This represents a single page in a PDF document. 57 * 58 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> 59 * @version $Revision: 1.29 $ 60 */ 61 public class PDPage implements COSObjectable, Printable 62 { 63 64 /** 65 * Log instance. 66 */ 67 private static final Log log = LogFactory.getLog(PDPage.class); 68 69 private static final int DEFAULT_USER_SPACE_UNIT_DPI = 72; 70 71 private COSDictionary page; 72 73 /** 74 * A page size of LETTER or 8.5x11. 75 */ 76 public static final PDRectangle PAGE_SIZE_LETTER = new PDRectangle( 612, 792 ); 77 /** 78 * A page size of A0 Paper. 79 */ 80 public static final PDRectangle PAGE_SIZE_A0 = new PDRectangle( 2383, 3370 ); 81 /** 82 * A page size of A1 Paper. 83 */ 84 public static final PDRectangle PAGE_SIZE_A1 = new PDRectangle( 1685, 2383 ); 85 /** 86 * A page size of A2 Paper. 87 */ 88 public static final PDRectangle PAGE_SIZE_A2 = new PDRectangle( 1192, 1685 ); 89 /** 90 * A page size of A3 Paper. 91 */ 92 public static final PDRectangle PAGE_SIZE_A3 = new PDRectangle( 843, 1192 ); 93 /** 94 * A page size of A4 Paper. 95 */ 96 public static final PDRectangle PAGE_SIZE_A4 = new PDRectangle( 596, 843 ); 97 /** 98 * A page size of A5 Paper. 99 */ 100 public static final PDRectangle PAGE_SIZE_A5 = new PDRectangle( 421, 596 ); 101 /** 102 * A page size of A6 Paper. 103 */ 104 public static final PDRectangle PAGE_SIZE_A6 = new PDRectangle( 298, 421 ); 105 106 107 /** 108 * Creates a new instance of PDPage with a size of 8.5x11. 109 */ 110 public PDPage() 111 { 112 page = new COSDictionary(); 113 page.setItem( COSName.TYPE, COSName.PAGE ); 114 setMediaBox( PAGE_SIZE_LETTER ); 115 } 116 117 /** 118 * Creates a new instance of PDPage. 119 * 120 * @param pageDic The existing page dictionary. 121 */ 122 public PDPage( COSDictionary pageDic ) 123 { 124 page = pageDic; 125 } 126 127 /** 128 * Convert this standard java object to a COS object. 129 * 130 * @return The cos object that matches this Java object. 131 */ 132 public COSBase getCOSObject() 133 { 134 return page; 135 } 136 137 /** 138 * This will get the underlying dictionary that this class acts on. 139 * 140 * @return The underlying dictionary for this class. 141 */ 142 public COSDictionary getCOSDictionary() 143 { 144 return page; 145 } 146 147 148 /** 149 * This is the parent page node. The parent is a required element of the 150 * page. This will be null until this page is added to the document. 151 * 152 * @return The parent to this page. 153 */ 154 public PDPageNode getParent() 155 { 156 if( parent == null){ 157 COSDictionary parentDic = (COSDictionary)page.getDictionaryObject( "Parent", "P" ); 158 if( parentDic != null ) 159 { 160 parent = new PDPageNode( parentDic ); 161 } 162 } 163 return parent; 164 } 165 166 private PDPageNode parent = null; 167 168 /** 169 * This will set the parent of this page. 170 * 171 * @param parent The parent to this page node. 172 */ 173 public void setParent( PDPageNode parent ) 174 { 175 this.parent = parent; 176 page.setItem( COSName.PARENT, parent.getDictionary() ); 177 } 178 179 /** 180 * This will update the last modified time for the page object. 181 */ 182 public void updateLastModified() 183 { 184 page.setDate( "LastModified", new GregorianCalendar() ); 185 } 186 187 /** 188 * This will get the date that the content stream was last modified. This 189 * may return null. 190 * 191 * @return The date the content stream was last modified. 192 * 193 * @throws IOException If there is an error accessing the date information. 194 */ 195 public Calendar getLastModified() throws IOException 196 { 197 return page.getDate( "LastModified" ); 198 } 199 200 /** 201 * This will get the resources at this page and not look up the hierarchy. 202 * This attribute is inheritable, and findResources() should probably used. 203 * This will return null if no resources are available at this level. 204 * 205 * @return The resources at this level in the hierarchy. 206 */ 207 public PDResources getResources() 208 { 209 PDResources retval = null; 210 COSDictionary resources = (COSDictionary)page.getDictionaryObject( COSName.RESOURCES ); 211 if( resources != null ) 212 { 213 retval = new PDResources( resources ); 214 } 215 return retval; 216 } 217 218 /** 219 * This will find the resources for this page by looking up the hierarchy until 220 * it finds them. 221 * 222 * @return The resources at this level in the hierarchy. 223 */ 224 public PDResources findResources() 225 { 226 PDResources retval = getResources(); 227 PDPageNode parent = getParent(); 228 if( retval == null && parent != null ) 229 { 230 retval = parent.findResources(); 231 } 232 return retval; 233 } 234 235 /** 236 * This will set the resources for this page. 237 * 238 * @param resources The new resources for this page. 239 */ 240 public void setResources( PDResources resources ) 241 { 242 page.setItem( COSName.RESOURCES, resources ); 243 } 244 245 /** 246 * A rectangle, expressed 247 * in default user space units, defining the boundaries of the physical 248 * medium on which the page is intended to be displayed or printed 249 * 250 * This will get the MediaBox at this page and not look up the hierarchy. 251 * This attribute is inheritable, and findMediaBox() should probably used. 252 * This will return null if no MediaBox are available at this level. 253 * 254 * @return The MediaBox at this level in the hierarchy. 255 */ 256 public PDRectangle getMediaBox() 257 { 258 if( mediaBox == null){ 259 COSArray array = (COSArray)page.getDictionaryObject( COSName.MEDIA_BOX ); 260 if( array != null ) 261 { 262 mediaBox = new PDRectangle( array ); 263 } 264 } 265 return mediaBox; 266 } 267 268 private PDRectangle mediaBox = null; 269 270 /** 271 * This will find the MediaBox for this page by looking up the hierarchy until 272 * it finds them. 273 * 274 * @return The MediaBox at this level in the hierarchy. 275 */ 276 public PDRectangle findMediaBox() 277 { 278 PDRectangle retval = getMediaBox(); 279 if( retval == null && getParent() != null ) 280 { 281 retval = getParent().findMediaBox(); 282 } 283 return retval; 284 } 285 286 /** 287 * This will set the mediaBox for this page. 288 * 289 * @param mediaBox The new mediaBox for this page. 290 */ 291 public void setMediaBox( PDRectangle mediaBox ) 292 { 293 this.mediaBox = mediaBox; 294 if( mediaBox == null ) 295 { 296 page.removeItem( COSName.MEDIA_BOX ); 297 } 298 else 299 { 300 page.setItem( COSName.MEDIA_BOX, mediaBox.getCOSArray() ); 301 } 302 } 303 304 /** 305 * A rectangle, expressed in default user space units, 306 * defining the visible region of default user space. When the page is displayed 307 * or printed, its contents are to be clipped (cropped) to this rectangle 308 * and then imposed on the output medium in some implementationdefined 309 * manner 310 * 311 * This will get the CropBox at this page and not look up the hierarchy. 312 * This attribute is inheritable, and findCropBox() should probably used. 313 * This will return null if no CropBox is available at this level. 314 * 315 * @return The CropBox at this level in the hierarchy. 316 */ 317 public PDRectangle getCropBox() 318 { 319 PDRectangle retval = null; 320 COSArray array = (COSArray)page.getDictionaryObject( COSName.CROP_BOX); 321 if( array != null ) 322 { 323 retval = new PDRectangle( array ); 324 } 325 return retval; 326 } 327 328 /** 329 * This will find the CropBox for this page by looking up the hierarchy until 330 * it finds them. 331 * 332 * @return The CropBox at this level in the hierarchy. 333 */ 334 public PDRectangle findCropBox() 335 { 336 PDRectangle retval = getCropBox(); 337 PDPageNode parent = getParent(); 338 if( retval == null && parent != null ) 339 { 340 retval = findParentCropBox( parent ); 341 } 342 343 //default value for cropbox is the media box 344 if( retval == null ) 345 { 346 retval = findMediaBox(); 347 } 348 return retval; 349 } 350 351 /** 352 * This will search for a crop box in the parent and return null if it is not 353 * found. It will NOT default to the media box if it cannot be found. 354 * 355 * @param node The node 356 */ 357 private PDRectangle findParentCropBox( PDPageNode node ) 358 { 359 PDRectangle rect = node.getCropBox(); 360 PDPageNode parent = node.getParent(); 361 if( rect == null && parent != null ) 362 { 363 rect = findParentCropBox( parent ); 364 } 365 return rect; 366 } 367 368 /** 369 * This will set the CropBox for this page. 370 * 371 * @param cropBox The new CropBox for this page. 372 */ 373 public void setCropBox( PDRectangle cropBox ) 374 { 375 if( cropBox == null ) 376 { 377 page.removeItem( COSName.CROP_BOX ); 378 } 379 else 380 { 381 page.setItem( COSName.CROP_BOX, cropBox.getCOSArray() ); 382 } 383 } 384 385 /** 386 * A rectangle, expressed in default user space units, defining 387 * the region to which the contents of the page should be clipped 388 * when output in a production environment. The default is the CropBox. 389 * 390 * @return The BleedBox attribute. 391 */ 392 public PDRectangle getBleedBox() 393 { 394 PDRectangle retval = null; 395 COSArray array = (COSArray)page.getDictionaryObject( COSName.BLEED_BOX ); 396 if( array != null ) 397 { 398 retval = new PDRectangle( array ); 399 } 400 else 401 { 402 retval = findCropBox(); 403 } 404 return retval; 405 } 406 407 /** 408 * This will set the BleedBox for this page. 409 * 410 * @param bleedBox The new BleedBox for this page. 411 */ 412 public void setBleedBox( PDRectangle bleedBox ) 413 { 414 if( bleedBox == null ) 415 { 416 page.removeItem( COSName.BLEED_BOX ); 417 } 418 else 419 { 420 page.setItem( COSName.BLEED_BOX, bleedBox.getCOSArray() ); 421 } 422 } 423 424 /** 425 * A rectangle, expressed in default user space units, defining 426 * the intended dimensions of the finished page after trimming. 427 * The default is the CropBox. 428 * 429 * @return The TrimBox attribute. 430 */ 431 public PDRectangle getTrimBox() 432 { 433 PDRectangle retval = null; 434 COSArray array = (COSArray)page.getDictionaryObject( COSName.TRIM_BOX ); 435 if( array != null ) 436 { 437 retval = new PDRectangle( array ); 438 } 439 else 440 { 441 retval = findCropBox(); 442 } 443 return retval; 444 } 445 446 /** 447 * This will set the TrimBox for this page. 448 * 449 * @param trimBox The new TrimBox for this page. 450 */ 451 public void setTrimBox( PDRectangle trimBox ) 452 { 453 if( trimBox == null ) 454 { 455 page.removeItem( COSName.TRIM_BOX ); 456 } 457 else 458 { 459 page.setItem( COSName.TRIM_BOX, trimBox.getCOSArray() ); 460 } 461 } 462 463 /** 464 * A rectangle, expressed in default user space units, defining 465 * the extent of the page's meaningful content (including potential 466 * white space) as intended by the page's creator The default isthe CropBox. 467 * 468 * @return The ArtBox attribute. 469 */ 470 public PDRectangle getArtBox() 471 { 472 PDRectangle retval = null; 473 COSArray array = (COSArray)page.getDictionaryObject( COSName.ART_BOX ); 474 if( array != null ) 475 { 476 retval = new PDRectangle( array ); 477 } 478 else 479 { 480 retval = findCropBox(); 481 } 482 return retval; 483 } 484 485 /** 486 * This will set the ArtBox for this page. 487 * 488 * @param artBox The new ArtBox for this page. 489 */ 490 public void setArtBox( PDRectangle artBox ) 491 { 492 if( artBox == null ) 493 { 494 page.removeItem( COSName.ART_BOX ); 495 } 496 else 497 { 498 page.setItem( COSName.ART_BOX, artBox.getCOSArray() ); 499 } 500 } 501 502 503 //todo BoxColorInfo 504 //todo Contents 505 506 /** 507 * A value representing the rotation. This will be null if not set at this level 508 * The number of degrees by which the page should 509 * be rotated clockwise when displayed or printed. The value must be a multiple 510 * of 90. 511 * 512 * This will get the rotation at this page and not look up the hierarchy. 513 * This attribute is inheritable, and findRotation() should probably used. 514 * This will return null if no rotation is available at this level. 515 * 516 * @return The rotation at this level in the hierarchy. 517 */ 518 public Integer getRotation() 519 { 520 Integer retval = null; 521 COSNumber value = (COSNumber)page.getDictionaryObject( COSName.ROTATE ); 522 if( value != null ) 523 { 524 retval = new Integer( value.intValue() ); 525 } 526 return retval; 527 } 528 529 /** 530 * This will find the rotation for this page by looking up the hierarchy until 531 * it finds them. 532 * 533 * @return The rotation at this level in the hierarchy. 534 */ 535 public int findRotation() 536 { 537 int retval = 0; 538 Integer rotation = getRotation(); 539 if( rotation != null ) 540 { 541 retval = rotation.intValue(); 542 } 543 else 544 { 545 PDPageNode parent = getParent(); 546 if( parent != null ) 547 { 548 retval = parent.findRotation(); 549 } 550 } 551 552 return retval; 553 } 554 555 /** 556 * This will set the rotation for this page. 557 * 558 * @param rotation The new rotation for this page. 559 */ 560 public void setRotation( int rotation ) 561 { 562 page.setInt( COSName.ROTATE, rotation ); 563 } 564 565 /** 566 * This will get the contents of the PDF Page, in the case that the contents 567 * of the page is an array then then the entire array of streams will be 568 * be wrapped and appear as a single stream. 569 * 570 * @return The page content stream. 571 * 572 * @throws IOException If there is an error obtaining the stream. 573 */ 574 public PDStream getContents() throws IOException 575 { 576 return PDStream.createFromCOS( page.getDictionaryObject( COSName.CONTENTS ) ); 577 } 578 579 /** 580 * This will set the contents of this page. 581 * 582 * @param contents The new contents of the page. 583 */ 584 public void setContents( PDStream contents ) 585 { 586 page.setItem( COSName.CONTENTS, contents ); 587 } 588 589 /** 590 * This will get a list of PDThreadBead objects, which are article threads in the 591 * document. This will return an empty list of there are no thread beads. 592 * 593 * @return A list of article threads on this page. 594 */ 595 public List getThreadBeads() 596 { 597 COSArray beads = (COSArray)page.getDictionaryObject( COSName.B ); 598 if( beads == null ) 599 { 600 beads = new COSArray(); 601 } 602 List pdObjects = new ArrayList(); 603 for( int i=0; i<beads.size(); i++) 604 { 605 COSDictionary beadDic = (COSDictionary)beads.getObject( i ); 606 PDThreadBead bead = null; 607 //in some cases the bead is null 608 if( beadDic != null ) 609 { 610 bead = new PDThreadBead( beadDic ); 611 } 612 pdObjects.add( bead ); 613 } 614 return new COSArrayList(pdObjects, beads); 615 616 } 617 618 /** 619 * This will set the list of thread beads. 620 * 621 * @param beads A list of PDThreadBead objects or null. 622 */ 623 public void setThreadBeads( List beads ) 624 { 625 page.setItem( COSName.B, COSArrayList.converterToCOSArray( beads ) ); 626 } 627 628 /** 629 * Get the metadata that is part of the document catalog. This will 630 * return null if there is no meta data for this object. 631 * 632 * @return The metadata for this object. 633 */ 634 public PDMetadata getMetadata() 635 { 636 PDMetadata retval = null; 637 COSStream stream = (COSStream)page.getDictionaryObject( COSName.METADATA ); 638 if( stream != null ) 639 { 640 retval = new PDMetadata( stream ); 641 } 642 return retval; 643 } 644 645 /** 646 * Set the metadata for this object. This can be null. 647 * 648 * @param meta The meta data for this object. 649 */ 650 public void setMetadata( PDMetadata meta ) 651 { 652 page.setItem( COSName.METADATA, meta ); 653 } 654 655 /** 656 * Convert this page to an output image with 8 bits per pixel and the double 657 * default screen resolution. 658 * 659 * @return A graphical representation of this page. 660 * 661 * @throws IOException If there is an error drawing to the image. 662 */ 663 public BufferedImage convertToImage() throws IOException 664 { 665 //note we are doing twice as many pixels because 666 //the default size is not really good resolution, 667 //so create an image that is twice the size 668 //and let the client scale it down. 669 return convertToImage(8, 2 * DEFAULT_USER_SPACE_UNIT_DPI); 670 } 671 672 /** 673 * Convert this page to an output image. 674 * 675 * @param imageType the image type (see {@link BufferedImage}.TYPE_*) 676 * @param resolution the resolution in dpi (dots per inch) 677 * @return A graphical representation of this page. 678 * 679 * @throws IOException If there is an error drawing to the image. 680 */ 681 public BufferedImage convertToImage(int imageType, int resolution) throws IOException 682 { 683 PDRectangle mBox = findMediaBox(); 684 float widthPt = mBox.getWidth(); 685 float heightPt = mBox.getHeight(); 686 float scaling = resolution / (float)DEFAULT_USER_SPACE_UNIT_DPI; 687 int widthPx = Math.round(widthPt * scaling); 688 int heightPx = Math.round(heightPt * scaling); 689 //TODO The following reduces accuracy. It should really be a Dimension2D.Float. 690 Dimension pageDimension = new Dimension( (int)widthPt, (int)heightPt ); 691 692 BufferedImage retval = new BufferedImage( widthPx, heightPx, imageType ); 693 Graphics2D graphics = (Graphics2D)retval.getGraphics(); 694 graphics.setBackground( Color.WHITE ); 695 graphics.clearRect( 0, 0, retval.getWidth(), retval.getHeight() ); 696 graphics.scale( scaling, scaling ); 697 PageDrawer drawer = new PageDrawer(); 698 drawer.drawPage( graphics, this, pageDimension ); 699 700 //TODO This could be done directly by manipulating the transformation matrix before painting. 701 //That could result in a better image quality. 702 try 703 { 704 int rotation = findRotation(); 705 if (rotation == 90 || rotation == 270) 706 { 707 int w = retval.getWidth(); 708 int h = retval.getHeight(); 709 BufferedImage rotatedImg = new BufferedImage(w, h, retval.getType()); 710 Graphics2D g = rotatedImg.createGraphics(); 711 g.rotate(Math.toRadians(rotation), w/2, h/2); 712 g.drawImage(retval, null, 0, 0); 713 } 714 } 715 catch (ImagingOpException e) 716 { 717 log.warn("Unable to rotate page image", e); 718 } 719 720 return retval; 721 } 722 723 /** 724 * Get the page actions. 725 * 726 * @return The Actions for this Page 727 */ 728 public PDPageAdditionalActions getActions() 729 { 730 COSDictionary addAct = (COSDictionary) page.getDictionaryObject(COSName.AA); 731 if (addAct == null) 732 { 733 addAct = new COSDictionary(); 734 page.setItem(COSName.AA, addAct); 735 } 736 return new PDPageAdditionalActions(addAct); 737 } 738 739 /** 740 * Set the page actions. 741 * 742 * @param actions The actions for the page. 743 */ 744 public void setActions( PDPageAdditionalActions actions ) 745 { 746 page.setItem( COSName.AA, actions ); 747 } 748 749 /** 750 * This will return a list of the Annotations for this page. 751 * 752 * @return List of the PDAnnotation objects. 753 * 754 * @throws IOException If there is an error while creating the annotations. 755 */ 756 public List getAnnotations() throws IOException 757 { 758 COSArrayList retval = null; 759 COSArray annots = (COSArray)page.getDictionaryObject(COSName.ANNOTS); 760 if (annots == null) 761 { 762 annots = new COSArray(); 763 page.setItem(COSName.ANNOTS, annots); 764 retval = new COSArrayList(new ArrayList(), annots); 765 } 766 else 767 { 768 List actuals = new ArrayList(); 769 770 for (int i=0; i < annots.size(); i++) 771 { 772 COSBase item = annots.getObject(i); 773 actuals.add( PDAnnotation.createAnnotation( item ) ); 774 } 775 retval = new COSArrayList(actuals, annots); 776 } 777 return retval; 778 } 779 780 /** 781 * This will set the list of annotations. 782 * 783 * @param annots The new list of annotations. 784 */ 785 public void setAnnotations( List annots ) 786 { 787 page.setItem( COSName.ANNOTS, COSArrayList.converterToCOSArray( annots ) ); 788 } 789 790 /** 791 * {@inheritDoc} 792 */ 793 public int print(Graphics graphics, PageFormat pageFormat, int pageIndex) 794 throws PrinterException 795 { 796 int retval = Printable.PAGE_EXISTS; 797 try 798 { 799 PageDrawer drawer = new PageDrawer(); 800 PDRectangle cropBox = findCropBox(); 801 drawer.drawPage( graphics, this, cropBox.createDimension() ); 802 } 803 catch( IOException io ) 804 { 805 throw new PrinterIOException( io ); 806 } 807 return retval; 808 } 809 810 /** 811 * {@inheritDoc} 812 */ 813 public boolean equals( Object other ) 814 { 815 return other instanceof PDPage && ((PDPage)other).getCOSObject() == this.getCOSObject(); 816 } 817 818 /** 819 * {@inheritDoc} 820 */ 821 public int hashCode() 822 { 823 return this.getCOSDictionary().hashCode(); 824 } 825 }