1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.ant; 18 19 import java.io.File; 20 21 import java.util.ArrayList; 22 import java.util.Iterator; 23 import java.util.List; 24 25 import org.apache.tools.ant.DirectoryScanner; 26 import org.apache.tools.ant.Task; 27 28 import org.apache.tools.ant.types.FileSet; 29 30 /** 31 * This is an ant task that will allow pdf documents to be converted using an 32 * and task. 33 * 34 * @author <a href="ben@benlitchfield.com">Ben Litchfield</a> 35 * @version $Revision: 1.8 $ 36 */ 37 public class PDFToTextTask extends Task 38 { 39 private List fileSets = new ArrayList(); 40 41 /** 42 * Adds a set of files (nested fileset attribute). 43 * 44 * @param set Another fileset to add. 45 */ 46 public void addFileset( FileSet set ) 47 { 48 fileSets.add( set ); 49 } 50 51 /** 52 * This will perform the execution. 53 */ 54 public void execute() 55 { 56 log( "PDFToTextTask executing" ); 57 Iterator fileSetIter = fileSets.iterator(); 58 while( fileSetIter.hasNext() ) 59 { 60 FileSet next = (FileSet)fileSetIter.next(); 61 DirectoryScanner dirScanner = next.getDirectoryScanner( getProject() ); 62 dirScanner.scan(); 63 String[] files = dirScanner.getIncludedFiles(); 64 for( int i=0; i<files.length; i++ ) 65 { 66 File f = new File( dirScanner.getBasedir(), files[i] ); 67 log( "processing: " + f.getAbsolutePath() ); 68 String pdfFile = f.getAbsolutePath(); 69 if( pdfFile.toUpperCase().endsWith( ".PDF" ) ) 70 { 71 String textFile = pdfFile.substring( 0, pdfFile.length() -3 ); 72 textFile = textFile + "txt"; 73 try 74 { 75 org.apache.pdfbox.ExtractText.main( new String[] { pdfFile, textFile } ); 76 } 77 catch( Exception e ) 78 { 79 log( "Error processing " + pdfFile + e.getMessage() ); 80 } 81 } 82 } 83 84 } 85 } 86 }