public void parse() throws IOException {
try
{
if ( raf == null )
{
if( tempDirectory != null )
{
document = new COSDocument( tempDirectory );
}
else
{
document = new COSDocument();
}
}
else
{
document = new COSDocument( raf );
}
setDocument( document );
parseHeader();
//Some PDF files have garbage between the header and the
//first object
skipToNextObj();
boolean wasLastParsedObjectEOF = false;
try
{
while(true)
{
if(pdfSource.isEOF())
{
break;
}
try
{
wasLastParsedObjectEOF = parseObject();
}
catch(IOException e)
{
if(forceParsing)
{
/*
* Warning is sent to the PDFBox.log and to the Console that
* we skipped over an object
*/
log.warn("Parsing Error, Skipping Object", e);
skipToNextObj();
}
else
{
throw e;
}
}
skipSpaces();
}
//Test if we saw a trailer section. If not, look for an XRef Stream (Cross-Reference Stream)
//to populate the trailer and xref information. For PDF 1.5 and above
if( document.getTrailer() == null )
{
document.parseXrefStreams();
}
if( !document.isEncrypted() )
{
document.dereferenceObjectStreams();
}
ConflictObj.resolveConflicts(document, conflictList);
}
catch( IOException e )
{
/*
* PDF files may have random data after the EOF marker. Ignore errors if
* last object processed is EOF.
*/
if( !wasLastParsedObjectEOF )
{
throw e;
}
}
}
catch( Throwable t )
{
//so if the PDF is corrupt then close the document and clear
//all resources to it
if( document != null )
{
document.close();
}
if( t instanceof IOException )
{
throw (IOException)t;
}
else
{
throw new WrappedIOException( t );
}
}
finally
{
pdfSource.close();
}
}
This will parse the stream and populate the COSDocument object. This will close
the stream when it is done parsing. |