Home » pdfbox-1.1.0-src » org.apache.pdfbox.examples.util » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.pdfbox.examples.util;
   18   
   19   import org.apache.pdfbox.exceptions.InvalidPasswordException;
   20   
   21   
   22   import org.apache.pdfbox.pdmodel.PDDocument;
   23   import org.apache.pdfbox.pdmodel.PDPage;
   24   import org.apache.pdfbox.pdmodel.common.PDStream;
   25   import org.apache.pdfbox.util.PDFTextStripper;
   26   import org.apache.pdfbox.util.TextPosition;
   27   
   28   import java.io.IOException;
   29   
   30   import java.util.List;
   31   
   32   /**
   33    * This is an example on how to get some x/y coordinates of text.
   34    *
   35    * Usage: java org.apache.pdfbox.examples.util.PrintTextLocations <input-pdf>
   36    *
   37    * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
   38    * @version $Revision: 1.7 $
   39    */
   40   public class PrintTextLocations extends PDFTextStripper
   41   {
   42       /**
   43        * Default constructor.
   44        *
   45        * @throws IOException If there is an error loading text stripper properties.
   46        */
   47       public PrintTextLocations() throws IOException
   48       {
   49           super.setSortByPosition( true );
   50       }
   51   
   52       /**
   53        * This will print the documents data.
   54        *
   55        * @param args The command line arguments.
   56        *
   57        * @throws Exception If there is an error parsing the document.
   58        */
   59       public static void main( String[] args ) throws Exception
   60       {
   61           if( args.length != 1 )
   62           {
   63               usage();
   64           }
   65           else
   66           {
   67               PDDocument document = null;
   68               try
   69               {
   70                   document = PDDocument.load( args[0] );
   71                   if( document.isEncrypted() )
   72                   {
   73                       try
   74                       {
   75                           document.decrypt( "" );
   76                       }
   77                       catch( InvalidPasswordException e )
   78                       {
   79                           System.err.println( "Error: Document is encrypted with a password." );
   80                           System.exit( 1 );
   81                       }
   82                   }
   83                   PrintTextLocations printer = new PrintTextLocations();
   84                   List allPages = document.getDocumentCatalog().getAllPages();
   85                   for( int i=0; i<allPages.size(); i++ )
   86                   {
   87                       PDPage page = (PDPage)allPages.get( i );
   88                       System.out.println( "Processing page: " + i );
   89                       PDStream contents = page.getContents();
   90                       if( contents != null )
   91                       {
   92                           printer.processStream( page, page.findResources(), page.getContents().getStream() );
   93                       }
   94                   }
   95               }
   96               finally
   97               {
   98                   if( document != null )
   99                   {
  100                       document.close();
  101                   }
  102               }
  103           }
  104       }
  105   
  106       /**
  107        * A method provided as an event interface to allow a subclass to perform
  108        * some specific functionality when text needs to be processed.
  109        *
  110        * @param text The text to be processed
  111        */
  112       protected void processTextPosition( TextPosition text )
  113       {
  114           System.out.println( "String[" + text.getXDirAdj() + "," +
  115                   text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" +
  116                   text.getXScale() + " height=" + text.getHeightDir() + " space=" +
  117                   text.getWidthOfSpace() + " width=" +
  118                   text.getWidthDirAdj() + "]" + text.getCharacter() );
  119       }
  120   
  121       /**
  122        * This will print the usage for this document.
  123        */
  124       private static void usage()
  125       {
  126           System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.PrintTextLocations <input-pdf>" );
  127       }
  128   
  129   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.examples.util » [javadoc | source]