1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.cos; 18 19 import java.io.BufferedInputStream; 20 import java.io.BufferedOutputStream; 21 import java.io.ByteArrayInputStream; 22 import java.io.InputStream; 23 import java.io.IOException; 24 import java.io.OutputStream; 25 26 import java.util.List; 27 28 import org.apache.pdfbox.filter.Filter; 29 import org.apache.pdfbox.filter.FilterManager; 30 31 import org.apache.pdfbox.pdfparser.PDFStreamParser; 32 33 import org.apache.pdfbox.exceptions.COSVisitorException; 34 35 import org.apache.pdfbox.io.RandomAccess; 36 import org.apache.pdfbox.io.RandomAccessFileInputStream; 37 import org.apache.pdfbox.io.RandomAccessFileOutputStream; 38 39 /** 40 * This class represents a stream object in a PDF document. 41 * 42 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> 43 * @version $Revision: 1.41 $ 44 */ 45 public class COSStream extends COSDictionary 46 { 47 private static final int BUFFER_SIZE=16384; 48 49 private RandomAccess file; 50 /** 51 * The stream with all of the filters applied. 52 */ 53 private RandomAccessFileOutputStream filteredStream; 54 55 /** 56 * The stream with no filters, this contains the useful data. 57 */ 58 private RandomAccessFileOutputStream unFilteredStream; 59 60 /** 61 * Constructor. Creates a new stream with an empty dictionary. 62 * 63 * @param storage The intermediate storage for the stream. 64 */ 65 public COSStream( RandomAccess storage ) 66 { 67 super(); 68 file = storage; 69 } 70 71 /** 72 * Constructor. 73 * 74 * @param dictionary The dictionary that is associated with this stream. 75 * @param storage The intermediate storage for the stream. 76 */ 77 public COSStream( COSDictionary dictionary, RandomAccess storage ) 78 { 79 super( dictionary ); 80 file = storage; 81 } 82 83 /** 84 * This will replace this object with the data from the new object. This 85 * is used to easily maintain referential integrity when changing references 86 * to new objects. 87 * 88 * @param stream The stream that have the new values in it. 89 */ 90 public void replaceWithStream( COSStream stream ) 91 { 92 this.clear(); 93 this.addAll( stream ); 94 file = stream.file; 95 filteredStream = stream.filteredStream; 96 unFilteredStream = stream.unFilteredStream; 97 } 98 99 /** 100 * This will get the scratch file associated with this stream. 101 * 102 * @return The scratch file where this stream is being stored. 103 */ 104 public RandomAccess getScratchFile() 105 { 106 return file; 107 } 108 109 /** 110 * This will get all the tokens in the stream. 111 * 112 * @return All of the tokens in the stream. 113 * 114 * @throws IOException If there is an error parsing the stream. 115 */ 116 public List<Object> getStreamTokens() throws IOException 117 { 118 PDFStreamParser parser = new PDFStreamParser( this ); 119 parser.parse(); 120 return parser.getTokens(); 121 } 122 123 /** 124 * This will get the stream with all of the filters applied. 125 * 126 * @return the bytes of the physical (endoced) stream 127 * 128 * @throws IOException when encoding/decoding causes an exception 129 */ 130 public InputStream getFilteredStream() throws IOException 131 { 132 if( filteredStream == null ) 133 { 134 doEncode(); 135 } 136 long position = filteredStream.getPosition(); 137 long length = filteredStream.getLength(); 138 139 RandomAccessFileInputStream input = 140 new RandomAccessFileInputStream( file, position, length ); 141 return new BufferedInputStream( input, BUFFER_SIZE ); 142 } 143 144 /** 145 * This will get the logical content stream with none of the filters. 146 * 147 * @return the bytes of the logical (decoded) stream 148 * 149 * @throws IOException when encoding/decoding causes an exception 150 */ 151 public InputStream getUnfilteredStream() throws IOException 152 { 153 InputStream retval = null; 154 if( unFilteredStream == null ) 155 { 156 doDecode(); 157 } 158 159 //if unFilteredStream is still null then this stream has not been 160 //created yet, so we should return null. 161 if( unFilteredStream != null ) 162 { 163 long position = unFilteredStream.getPosition(); 164 long length = unFilteredStream.getLength(); 165 RandomAccessFileInputStream input = 166 new RandomAccessFileInputStream( file, position, length ); 167 retval = new BufferedInputStream( input, BUFFER_SIZE ); 168 } 169 else 170 { 171 // We should check if the COSStream contains data, maybe it 172 // has been created with a RandomAccessFile - which is not 173 // necessary empty. 174 // In this case, the creation was been done as an input, this should 175 // be the unfiltered file, since no filter has been applied yet. 176 // if ( (file != null) && 177 // (file.length() > 0) ) 178 // { 179 // retval = new RandomAccessFileInputStream( file, 180 // 0, 181 // file.length() ); 182 // } 183 // else 184 // { 185 //if there is no stream data then simply return an empty stream. 186 retval = new ByteArrayInputStream( new byte[0] ); 187 // } 188 } 189 return retval; 190 } 191 192 /** 193 * visitor pattern double dispatch method. 194 * 195 * @param visitor The object to notify when visiting this object. 196 * @return any object, depending on the visitor implementation, or null 197 * @throws COSVisitorException If an error occurs while visiting this object. 198 */ 199 public Object accept(ICOSVisitor visitor) throws COSVisitorException 200 { 201 return visitor.visitFromStream(this); 202 } 203 204 /** 205 * This will decode the physical byte stream applying all of the filters to the stream. 206 * 207 * @throws IOException If there is an error applying a filter to the stream. 208 */ 209 private void doDecode() throws IOException 210 { 211 // FIXME: We shouldn't keep the same reference? 212 unFilteredStream = filteredStream; 213 214 COSBase filters = getFilters(); 215 if( filters == null ) 216 { 217 //then do nothing 218 } 219 else if( filters instanceof COSName ) 220 { 221 doDecode( (COSName)filters, 0 ); 222 } 223 else if( filters instanceof COSArray ) 224 { 225 COSArray filterArray = (COSArray)filters; 226 for( int i=0; i<filterArray.size(); i++ ) 227 { 228 COSName filterName = (COSName)filterArray.get( i ); 229 doDecode( filterName, i ); 230 } 231 } 232 else 233 { 234 throw new IOException( "Error: Unknown filter type:" + filters ); 235 } 236 } 237 238 /** 239 * This will decode applying a single filter on the stream. 240 * 241 * @param filterName The name of the filter. 242 * @param filterIndex The index of the current filter. 243 * 244 * @throws IOException If there is an error parsing the stream. 245 */ 246 private void doDecode( COSName filterName, int filterIndex ) throws IOException 247 { 248 FilterManager manager = getFilterManager(); 249 Filter filter = manager.getFilter( filterName ); 250 InputStream input; 251 252 boolean done = false; 253 IOException exception = null; 254 long position = unFilteredStream.getPosition(); 255 long length = unFilteredStream.getLength(); 256 // in case we need it later 257 long writtenLength = unFilteredStream.getLengthWritten(); 258 259 if( length == 0 ) 260 { 261 //if the length is zero then don't bother trying to decode 262 //some filters don't work when attempting to decode 263 //with a zero length stream. See zlib_error_01.pdf 264 unFilteredStream = new RandomAccessFileOutputStream( file ); 265 done = true; 266 } 267 else 268 { 269 //ok this is a simple hack, sometimes we read a couple extra 270 //bytes that shouldn't be there, so we encounter an error we will just 271 //try again with one less byte. 272 for( int tryCount=0; !done && tryCount<5; tryCount++ ) 273 { 274 try 275 { 276 input = new BufferedInputStream( 277 new RandomAccessFileInputStream( file, position, length ), BUFFER_SIZE ); 278 unFilteredStream = new RandomAccessFileOutputStream( file ); 279 filter.decode( input, unFilteredStream, this, filterIndex ); 280 done = true; 281 } 282 catch( IOException io ) 283 { 284 length--; 285 exception = io; 286 } 287 } 288 if( !done ) 289 { 290 //if no good stream was found then lets try again but with the 291 //length of data that was actually read and not length 292 //defined in the dictionary 293 length = writtenLength; 294 for( int tryCount=0; !done && tryCount<5; tryCount++ ) 295 { 296 try 297 { 298 input = new BufferedInputStream( 299 new RandomAccessFileInputStream( file, position, length ), BUFFER_SIZE ); 300 unFilteredStream = new RandomAccessFileOutputStream( file ); 301 filter.decode( input, unFilteredStream, this, filterIndex ); 302 done = true; 303 } 304 catch( IOException io ) 305 { 306 length--; 307 exception = io; 308 } 309 } 310 } 311 } 312 if( !done ) 313 { 314 throw exception; 315 } 316 } 317 318 /** 319 * This will encode the logical byte stream applying all of the filters to the stream. 320 * 321 * @throws IOException If there is an error applying a filter to the stream. 322 */ 323 private void doEncode() throws IOException 324 { 325 filteredStream = unFilteredStream; 326 327 COSBase filters = getFilters(); 328 if( filters == null ) 329 { 330 //there is no filter to apply 331 } 332 else if( filters instanceof COSName ) 333 { 334 doEncode( (COSName)filters, 0 ); 335 } 336 else if( filters instanceof COSArray ) 337 { 338 // apply filters in reverse order 339 COSArray filterArray = (COSArray)filters; 340 for( int i=filterArray.size()-1; i>=0; i-- ) 341 { 342 COSName filterName = (COSName)filterArray.get( i ); 343 doEncode( filterName, i ); 344 } 345 } 346 } 347 348 /** 349 * This will encode applying a single filter on the stream. 350 * 351 * @param filterName The name of the filter. 352 * @param filterIndex The index to the filter. 353 * 354 * @throws IOException If there is an error parsing the stream. 355 */ 356 private void doEncode( COSName filterName, int filterIndex ) throws IOException 357 { 358 FilterManager manager = getFilterManager(); 359 Filter filter = manager.getFilter( filterName ); 360 InputStream input; 361 362 input = new BufferedInputStream( 363 new RandomAccessFileInputStream( file, filteredStream.getPosition(), 364 filteredStream.getLength() ), BUFFER_SIZE ); 365 filteredStream = new RandomAccessFileOutputStream( file ); 366 filter.encode( input, filteredStream, this, filterIndex ); 367 } 368 369 /** 370 * This will return the filters to apply to the byte stream. 371 * The method will return 372 * - null if no filters are to be applied 373 * - a COSName if one filter is to be applied 374 * - a COSArray containing COSNames if multiple filters are to be applied 375 * 376 * @return the COSBase object representing the filters 377 */ 378 public COSBase getFilters() 379 { 380 return getDictionaryObject(COSName.FILTER); 381 } 382 383 /** 384 * This will create a new stream for which filtered byte should be 385 * written to. You probably don't want this but want to use the 386 * createUnfilteredStream, which is used to write raw bytes to. 387 * 388 * @return A stream that can be written to. 389 * 390 * @throws IOException If there is an error creating the stream. 391 */ 392 public OutputStream createFilteredStream() throws IOException 393 { 394 filteredStream = new RandomAccessFileOutputStream( file ); 395 unFilteredStream = null; 396 return new BufferedOutputStream( filteredStream, BUFFER_SIZE ); 397 } 398 399 /** 400 * This will create a new stream for which filtered byte should be 401 * written to. You probably don't want this but want to use the 402 * createUnfilteredStream, which is used to write raw bytes to. 403 * 404 * @param expectedLength An entry where a length is expected. 405 * 406 * @return A stream that can be written to. 407 * 408 * @throws IOException If there is an error creating the stream. 409 */ 410 public OutputStream createFilteredStream( COSBase expectedLength ) throws IOException 411 { 412 filteredStream = new RandomAccessFileOutputStream( file ); 413 filteredStream.setExpectedLength( expectedLength ); 414 unFilteredStream = null; 415 return new BufferedOutputStream( filteredStream, BUFFER_SIZE ); 416 } 417 418 /** 419 * set the filters to be applied to the stream. 420 * 421 * @param filters The filters to set on this stream. 422 * 423 * @throws IOException If there is an error clearing the old filters. 424 */ 425 public void setFilters(COSBase filters) throws IOException 426 { 427 setItem(COSName.FILTER, filters); 428 // kill cached filtered streams 429 filteredStream = null; 430 } 431 432 /** 433 * This will create an output stream that can be written to. 434 * 435 * @return An output stream which raw data bytes should be written to. 436 * 437 * @throws IOException If there is an error creating the stream. 438 */ 439 public OutputStream createUnfilteredStream() throws IOException 440 { 441 unFilteredStream = new RandomAccessFileOutputStream( file ); 442 filteredStream = null; 443 return new BufferedOutputStream( unFilteredStream, BUFFER_SIZE ); 444 } 445 }