public Document convertDocument(File file) throws IOException {
Document document = new Document();
// Add the url as a field named "url". Use an UnIndexed field, so
// that the url is just stored with the document, but is not searchable.
addUnindexedField( document, "path", file.getPath() );
addUnindexedField( document, "url", file.getPath().replace(FILE_SEPARATOR, '/') );
// Add the last modified date of the file a field named "modified". Use a
// Keyword field, so that it's searchable, but so that no attempt is made
// to tokenize the field into words.
addKeywordField( document, "modified", timeToString( file.lastModified() ) );
String uid = file.getPath().replace(FILE_SEPARATOR,'\u0000')
+ "\u0000"
+ timeToString( file.lastModified() );
// Add the uid as a field, so that index can be incrementally maintained.
// This field is not stored with document, it is indexed, but it is not
// tokenized prior to indexing.
addUnstoredKeywordField( document, "uid", uid );
FileInputStream input = null;
try
{
input = new FileInputStream( file );
addContent( document, input, file.getPath() );
}
finally
{
if( input != null )
{
input.close();
}
}
// return the document
return document;
}
This will take a reference to a PDF document and create a lucene document. |