Home » pdfbox-1.1.0-src » org.apache.pdfbox.pdmodel.graphics.xobject » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.pdfbox.pdmodel.graphics.xobject;
   18   
   19   import java.awt.image.BufferedImage;
   20   import java.io.InputStream;
   21   import java.io.IOException;
   22   import java.io.OutputStream;
   23   import java.io.File;
   24   import java.io.FileInputStream;
   25   import java.io.ByteArrayInputStream;
   26   import java.io.FileOutputStream;
   27   
   28   import java.util.ArrayList;
   29   import java.util.List;
   30   
   31   import javax.imageio.ImageIO;
   32   import javax.imageio.IIOException;
   33   
   34   import org.apache.pdfbox.cos.COSDictionary;
   35   import org.apache.pdfbox.cos.COSName;
   36   
   37   import org.apache.pdfbox.pdmodel.PDDocument;
   38   import org.apache.pdfbox.pdmodel.common.PDStream;
   39   import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
   40   
   41   /**
   42    * An image class for JPegs.
   43    *
   44    * @author mathiak
   45    * @version $Revision: 1.5 $
   46    */
   47   public class PDJpeg extends PDXObjectImage
   48   {
   49   
   50       private static final List DCT_FILTERS = new ArrayList();
   51   
   52       static
   53       {
   54           DCT_FILTERS.add( COSName.DCT_DECODE.getName() );
   55           DCT_FILTERS.add( COSName.DCT_DECODE_ABBREVIATION.getName() );
   56       }
   57   
   58       /**
   59        * Standard constructor.
   60        *
   61        * @param jpeg The COSStream from which to extract the JPeg
   62        */
   63       public PDJpeg(PDStream jpeg)
   64       {
   65           super(jpeg, "jpg");
   66       }
   67   
   68       /**
   69        * Construct from a stream.
   70        *
   71        * @param doc The document to create the image as part of.
   72        * @param is The stream that contains the jpeg data.
   73        * @throws IOException If there is an error reading the jpeg data.
   74        */
   75       public PDJpeg( PDDocument doc, InputStream is ) throws IOException
   76       {
   77           super( new PDStream( doc, is, true ), "jpg" );
   78           COSDictionary dic = getCOSStream();
   79           dic.setItem( COSName.FILTER, COSName.DCT_DECODE );
   80           dic.setItem( COSName.SUBTYPE, COSName.IMAGE);
   81           dic.setItem( COSName.TYPE, COSName.getPDFName( "XObject" ) );
   82   
   83           BufferedImage image = getRGBImage();
   84           if (image != null) 
   85           {
   86               setBitsPerComponent( 8 );
   87               setColorSpace( PDDeviceRGB.INSTANCE );
   88               setHeight( image.getHeight() );
   89               setWidth( image.getWidth() );
   90           }
   91   
   92       }
   93   
   94       /**
   95        * Construct from a buffered image.
   96        *
   97        * @param doc The document to create the image as part of.
   98        * @param bi The image to convert to a jpeg
   99        * @throws IOException If there is an error processing the jpeg data.
  100        */
  101       public PDJpeg( PDDocument doc, BufferedImage bi ) throws IOException
  102       {
  103           super( new PDStream( doc ), "jpg" );
  104   
  105           java.io.OutputStream os = getCOSStream().createFilteredStream();
  106           try
  107           {
  108   
  109               ImageIO.write(bi,"jpeg",os);
  110   
  111               COSDictionary dic = getCOSStream();
  112               dic.setItem( COSName.FILTER, COSName.DCT_DECODE );
  113               dic.setItem( COSName.SUBTYPE, COSName.IMAGE);
  114               dic.setItem( COSName.TYPE, COSName.getPDFName( "XObject" ) );
  115   
  116               setBitsPerComponent( 8 );
  117               setColorSpace( PDDeviceRGB.INSTANCE );
  118               setHeight( bi.getHeight() );
  119               setWidth( bi.getWidth() );
  120           }
  121           finally
  122           {
  123               os.close();
  124           }
  125       }
  126   
  127       /**
  128        * Returns an image of the JPeg, or null if JPegs are not supported. (They should be. )
  129        * {@inheritDoc}
  130        */
  131       public BufferedImage getRGBImage() throws IOException
  132       {   //TODO PKOCH
  133           File imgFile = null;
  134           BufferedImage bi = null;
  135           boolean readError = false;
  136           try 
  137           {
  138               imgFile = File.createTempFile("pdjpeg", ".jpeg");
  139               write2file(imgFile);
  140   
  141               // 1. try to read jpeg image
  142               try 
  143               {
  144                   bi = ImageIO.read(imgFile);
  145               } 
  146               catch (IIOException iioe) 
  147               {
  148                   // cannot read jpeg
  149                   readError = true;
  150               } 
  151               catch (Exception ignore) 
  152               {}
  153   
  154               // 2. try to read jpeg again. some jpegs have some strange header containing
  155               //    "Adobe " at some place. so just replace the header with a valid jpeg header.
  156               // TODO : not sure if it works for all cases
  157               if (bi == null && readError) 
  158               {
  159                   byte[] newImage = replaceHeader(imgFile);
  160   
  161                   ByteArrayInputStream bai = new ByteArrayInputStream(newImage);
  162   
  163                   // persist file temporarely, because i was not able to manage
  164                   // to call the ImageIO.read(InputStream) successfully.
  165                   FileOutputStream o = new FileOutputStream(imgFile);
  166                   byte[] buffer = new byte[512];
  167                   int read;
  168                   while ((read=bai.read(buffer)) >0) 
  169                   {
  170                      o.write(buffer, 0, read);
  171                   }
  172   
  173                   bai.close();
  174                   o.close();
  175   
  176                   bi = ImageIO.read(imgFile);
  177               }
  178           } 
  179           finally 
  180           {
  181               if (imgFile != null) 
  182               {
  183                   imgFile.delete();
  184               }
  185           }
  186           return bi;
  187       }
  188   
  189       /**
  190        * This writes the JPeg to out.
  191        * {@inheritDoc}
  192        */
  193       public void write2OutputStream(OutputStream out) throws IOException
  194       {
  195           InputStream data = getPDStream().getPartiallyFilteredStream( DCT_FILTERS );
  196           byte[] buf = new byte[1024];
  197           int amountRead = -1;
  198           while( (amountRead = data.read( buf )) != -1 )
  199           {
  200               out.write( buf, 0, amountRead );
  201           }
  202       }
  203   
  204       /**
  205        * Returns the given file as byte array.
  206        * @param file File to be read
  207        * @return given file as byte array
  208        * @throws IOException if somethin went wrong during reading the file
  209        */
  210       public static byte[] getBytesFromFile(File file) throws IOException 
  211       {
  212           InputStream is = new FileInputStream(file);
  213           long length = file.length();
  214   
  215           if (length > Integer.MAX_VALUE) 
  216           {
  217               // File is too large
  218               throw new IOException("File is tooo large");
  219           }
  220   
  221           // Create the byte array to hold the data
  222           byte[] bytes = new byte[(int)length];
  223   
  224           // Read in the bytes
  225           int offset = 0;
  226           int numRead = 0;
  227   
  228           while (offset < bytes.length
  229                   && (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) 
  230           {
  231               offset += numRead;
  232           }
  233   
  234           // Ensure all the bytes have been read in
  235           if (offset < bytes.length) 
  236           {
  237               throw new IOException("Could not completely read file "+file.getName());
  238           }
  239           is.close();
  240           return bytes;
  241       }
  242   
  243       private int getHeaderEndPos(byte[] image) 
  244       {
  245           for (int i = 0; i < image.length; i++) 
  246           {
  247               byte b = image[i];
  248               if (b == (byte) 0xDB) 
  249               {        
  250                   // TODO : check for ff db
  251                   return i -2;
  252               }
  253           }
  254           return 0;
  255       }
  256   
  257       private byte[] replaceHeader(File jpegFile) throws IOException 
  258       {
  259           // read image into memory
  260           byte[] image = getBytesFromFile(jpegFile);
  261   
  262           // get end position of wrong header respectively startposition of "real jpeg data"
  263           int pos = getHeaderEndPos(image);
  264   
  265           // simple correct header
  266           byte[] header = new byte[]{(byte) 0xFF, (byte) 0xD8, (byte) 0xFF, (byte) 0xE0, (byte) 0x00,
  267                   (byte) 0x10, (byte) 0x4A, (byte) 0x46, (byte) 0x49, (byte) 0x46, (byte) 0x00, (byte) 0x01,
  268                   (byte) 0x01, (byte) 0x01, (byte) 0x00, (byte) 0x60, (byte) 0x00, (byte) 0x60, (byte) 0x00, (byte) 0x00};
  269   
  270           // concat
  271           byte[] newImage = new byte[image.length - pos + header.length - 1];
  272           System.arraycopy(header, 0, newImage, 0, header.length);
  273           System.arraycopy(image, pos + 1, newImage, header.length, image.length - pos - 1);
  274   
  275           return newImage;
  276       }
  277   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.pdmodel.graphics.xobject » [javadoc | source]