Home » pdfbox-1.1.0-src » org.apache.pdfbox.examples.util » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.pdfbox.examples.util;
   18   
   19   import org.apache.pdfbox.pdfparser.PDFStreamParser;
   20   import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
   21   
   22   import org.apache.pdfbox.pdmodel.PDDocument;
   23   import org.apache.pdfbox.pdmodel.PDPage;
   24   import org.apache.pdfbox.pdmodel.common.PDStream;
   25   import org.apache.pdfbox.util.PDFOperator;
   26   
   27   import java.util.ArrayList;
   28   import java.util.List;
   29   
   30   /**
   31    * This is an example on how to remove all text from PDF document.
   32    *
   33    * Usage: java org.apache.pdfbox.examples.util.RemoveAllText <input-pdf> <output-pdf>
   34    *
   35    * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
   36    * @version $Revision: 1.2 $
   37    */
   38   public class RemoveAllText
   39   {
   40       /**
   41        * Default constructor.
   42        */
   43       private RemoveAllText()
   44       {
   45           //example class should not be instantiated
   46       }
   47   
   48       /**
   49        * This will remove all text from a PDF document.
   50        *
   51        * @param args The command line arguments.
   52        *
   53        * @throws Exception If there is an error parsing the document.
   54        */
   55       public static void main( String[] args ) throws Exception
   56       {
   57           if( args.length != 2 )
   58           {
   59               usage();
   60           }
   61           else
   62           {
   63               PDDocument document = null;
   64               try
   65               {
   66                   document = PDDocument.load( args[0] );
   67                   if( document.isEncrypted() )
   68                   {
   69                       System.err.println( "Error: Encrypted documents are not supported for this example." );
   70                       System.exit( 1 );
   71                   }
   72                   List allPages = document.getDocumentCatalog().getAllPages();
   73                   for( int i=0; i<allPages.size(); i++ )
   74                   {
   75                       PDPage page = (PDPage)allPages.get( i );
   76                       PDFStreamParser parser = new PDFStreamParser(page.getContents());
   77                       parser.parse();
   78                       List tokens = parser.getTokens();
   79                       List newTokens = new ArrayList();
   80                       for( int j=0; j<tokens.size(); j++)
   81                       {
   82                           Object token = tokens.get( j );
   83                           if( token instanceof PDFOperator )
   84                           {
   85                               PDFOperator op = (PDFOperator)token;
   86                               if( op.getOperation().equals( "TJ") || op.getOperation().equals( "Tj" ))
   87                               {
   88                                   //remove the one argument to this operator
   89                                   newTokens.remove( newTokens.size() -1 );
   90                                   continue;
   91                               }
   92                           }
   93                           newTokens.add( token );
   94   
   95                       }
   96                       PDStream newContents = new PDStream( document );
   97                       ContentStreamWriter writer = new ContentStreamWriter( newContents.createOutputStream() );
   98                       writer.writeTokens( newTokens );
   99                       newContents.addCompression();
  100                       page.setContents( newContents );
  101                   }
  102                   document.save( args[1] );
  103               }
  104               finally
  105               {
  106                   if( document != null )
  107                   {
  108                       document.close();
  109                   }
  110               }
  111           }
  112       }
  113   
  114       /**
  115        * This will print the usage for this document.
  116        */
  117       private static void usage()
  118       {
  119           System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.RemoveAllText <input-pdf> <output-pdf>" );
  120       }
  121   
  122   }

Home » pdfbox-1.1.0-src » org.apache.pdfbox.examples.util » [javadoc | source]