1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.examples.util; 18 19 import org.apache.pdfbox.exceptions.InvalidPasswordException; 20 21 22 import org.apache.pdfbox.pdmodel.PDDocument; 23 import org.apache.pdfbox.pdmodel.PDPage; 24 import org.apache.pdfbox.pdmodel.common.PDStream; 25 import org.apache.pdfbox.util.PDFTextStripper; 26 import org.apache.pdfbox.util.TextPosition; 27 28 import java.io.IOException; 29 30 import java.util.List; 31 32 /** 33 * This is an example on how to get some x/y coordinates of text. 34 * 35 * Usage: java org.apache.pdfbox.examples.util.PrintTextLocations <input-pdf> 36 * 37 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> 38 * @version $Revision: 1.7 $ 39 */ 40 public class PrintTextLocations extends PDFTextStripper 41 { 42 /** 43 * Default constructor. 44 * 45 * @throws IOException If there is an error loading text stripper properties. 46 */ 47 public PrintTextLocations() throws IOException 48 { 49 super.setSortByPosition( true ); 50 } 51 52 /** 53 * This will print the documents data. 54 * 55 * @param args The command line arguments. 56 * 57 * @throws Exception If there is an error parsing the document. 58 */ 59 public static void main( String[] args ) throws Exception 60 { 61 if( args.length != 1 ) 62 { 63 usage(); 64 } 65 else 66 { 67 PDDocument document = null; 68 try 69 { 70 document = PDDocument.load( args[0] ); 71 if( document.isEncrypted() ) 72 { 73 try 74 { 75 document.decrypt( "" ); 76 } 77 catch( InvalidPasswordException e ) 78 { 79 System.err.println( "Error: Document is encrypted with a password." ); 80 System.exit( 1 ); 81 } 82 } 83 PrintTextLocations printer = new PrintTextLocations(); 84 List allPages = document.getDocumentCatalog().getAllPages(); 85 for( int i=0; i<allPages.size(); i++ ) 86 { 87 PDPage page = (PDPage)allPages.get( i ); 88 System.out.println( "Processing page: " + i ); 89 PDStream contents = page.getContents(); 90 if( contents != null ) 91 { 92 printer.processStream( page, page.findResources(), page.getContents().getStream() ); 93 } 94 } 95 } 96 finally 97 { 98 if( document != null ) 99 { 100 document.close(); 101 } 102 } 103 } 104 } 105 106 /** 107 * A method provided as an event interface to allow a subclass to perform 108 * some specific functionality when text needs to be processed. 109 * 110 * @param text The text to be processed 111 */ 112 protected void processTextPosition( TextPosition text ) 113 { 114 System.out.println( "String[" + text.getXDirAdj() + "," + 115 text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" + 116 text.getXScale() + " height=" + text.getHeightDir() + " space=" + 117 text.getWidthOfSpace() + " width=" + 118 text.getWidthDirAdj() + "]" + text.getCharacter() ); 119 } 120 121 /** 122 * This will print the usage for this document. 123 */ 124 private static void usage() 125 { 126 System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.PrintTextLocations <input-pdf>" ); 127 } 128 129 }