Save This Page
Home » iText-2.1.7 » com.lowagie » text » pdf » codec » [javadoc | source]
    1   /*
    2    * $Id: JBIG2SegmentReader.java 3714 2009-02-20 21:04:16Z xlv $
    3    *
    4    * Copyright 2009 by Nigel Kerr.
    5    *
    6    * The contents of this file are subject to the Mozilla Public License Version 1.1
    7    * (the "License"); you may not use this file except in compliance with the License.
    8    * You may obtain a copy of the License at http://www.mozilla.org/MPL/
    9    *
   10    * Software distributed under the License is distributed on an "AS IS" basis,
   11    * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
   12    * for the specific language governing rights and limitations under the License.
   13    *
   14    * The Original Code is 'iText, a free JAVA-PDF library'.
   15    *
   16    * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
   17    * the Initial Developer are Copyright (C) 1999-2009 by Bruno Lowagie.
   18    * All Rights Reserved.
   19    * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
   20    * are Copyright (C) 2000-2009 by Paulo Soares. All Rights Reserved.
   21    *
   22    * Contributor(s): all the names of the contributors are added in the source code
   23    * where applicable.
   24    *
   25    * Alternatively, the contents of this file may be used under the terms of the
   26    * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
   27    * provisions of LGPL are applicable instead of those above.  If you wish to
   28    * allow use of your version of this file only under the terms of the LGPL
   29    * License and not to allow others to use your version of this file under
   30    * the MPL, indicate your decision by deleting the provisions above and
   31    * replace them with the notice and other provisions required by the LGPL.
   32    * If you do not delete the provisions above, a recipient may use your version
   33    * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
   34    *
   35    * This library is free software; you can redistribute it and/or modify it
   36    * under the terms of the MPL as stated above or under the terms of the GNU
   37    * Library General Public License as published by the Free Software Foundation;
   38    * either version 2 of the License, or any later version.
   39    *
   40    * This library is distributed in the hope that it will be useful, but WITHOUT
   41    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   42    * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
   43    * details.
   44    *
   45    * If you didn't download this code from the following link, you should check if
   46    * you aren't using an obsolete version:
   47    * http://www.lowagie.com/iText/
   48    */
   49   
   50   package com.lowagie.text.pdf.codec;
   51   
   52   import java.io.ByteArrayOutputStream;
   53   import java.io.IOException;
   54   import java.util.Iterator;
   55   import java.util.SortedMap;
   56   import java.util.SortedSet;
   57   import java.util.TreeMap;
   58   import java.util.TreeSet;
   59   
   60   import com.lowagie.text.pdf.RandomAccessFileOrArray;
   61   
   62   /**
   63    * Class to read a JBIG2 file at a basic level: understand all the segments, 
   64    * understand what segments belong to which pages, how many pages there are,
   65    * what the width and height of each page is, and global segments if there
   66    * are any.  Or: the minimum required to be able to take a normal sequential
   67    * or random-access organized file, and be able to embed JBIG2 pages as images 
   68    * in a PDF.
   69    * 
   70    * TODO: the indeterminate-segment-size value of dataLength, else?
   71    * 
   72    * @since 2.1.5
   73    */
   74   
   75   public class JBIG2SegmentReader {
   76   	
   77   	public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2.                                               
   78   
   79   	public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3.                                        
   80   	public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3.                                           
   81   	public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3.                                  
   82   	public static final int PATTERN_DICTIONARY = 16; //see 7.4.4.                                             
   83   	public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5.                                   
   84   	public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5.                                      
   85   	public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5.                             
   86   	public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6.                                    
   87   	public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6.                                       
   88   	public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6.                              
   89   	public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7.                          
   90   	public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7.                             
   91   	public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7.                    
   92   
   93   	public static final int PAGE_INFORMATION = 48; //see 7.4.8.                                               
   94   	public static final int END_OF_PAGE = 49; //see 7.4.9.                                                    
   95   	public static final int END_OF_STRIPE = 50; //see 7.4.10.                                                 
   96   	public static final int END_OF_FILE = 51; //see 7.4.11.                                                   
   97   	public static final int PROFILES = 52; //see 7.4.12.                                                      
   98   	public static final int TABLES = 53; //see 7.4.13.                                                        
   99   	public static final int EXTENSION = 62; //see 7.4.14.                                                     
  100   	
  101   	private final SortedMap segments = new TreeMap();
  102   	private final SortedMap pages = new TreeMap();
  103   	private final SortedSet globals = new TreeSet();
  104   	private RandomAccessFileOrArray ra;
  105   	private boolean sequential;
  106   	private boolean number_of_pages_known;
  107   	private int number_of_pages = -1;
  108   	private boolean read = false;
  109   	
  110   	/**
  111   	 * Inner class that holds information about a JBIG2 segment.
  112   	 * @since	2.1.5
  113   	 */
  114   	public static class JBIG2Segment implements Comparable {
  115   
  116   		public final int segmentNumber;
  117   		public long dataLength = -1;
  118   		public int page = -1;
  119   		public int[] referredToSegmentNumbers = null;
  120   		public boolean[] segmentRetentionFlags = null;
  121   		public int type = -1;
  122   		public boolean deferredNonRetain = false;
  123   		public int countOfReferredToSegments = -1;
  124   		public byte[] data = null;
  125   		public byte[] headerData = null;
  126   		public boolean page_association_size = false;
  127   		public int page_association_offset = -1;
  128   
  129   		public JBIG2Segment(int segment_number) {
  130   			this.segmentNumber = segment_number;
  131   		}
  132   
  133   		// for the globals treeset
  134   		public int compareTo(Object o) {
  135   			return this.compareTo((JBIG2Segment)o);
  136   		}
  137   		public int compareTo(JBIG2Segment s) {
  138   			return this.segmentNumber - s.segmentNumber;
  139   		}
  140   
  141   		
  142   	}
  143   	/**
  144   	 * Inner class that holds information about a JBIG2 page.
  145   	 * @since	2.1.5
  146   	 */
  147   	public static class JBIG2Page {
  148   		public final int page;
  149   		private final JBIG2SegmentReader sr;
  150   		private final SortedMap segs = new TreeMap();
  151   		public int pageBitmapWidth = -1;
  152   		public int pageBitmapHeight = -1;
  153   		public JBIG2Page(int page, JBIG2SegmentReader sr) {
  154   			this.page = page;
  155   			this.sr = sr;
  156   		}
  157   		/**
  158   		 * return as a single byte array the header-data for each segment in segment number
  159   		 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization.
  160   		 * if for_embedding, skip the segment types that are known to be not for acrobat. 
  161   		 * @param for_embedding
  162   		 * @return	a byte array
  163   		 * @throws IOException
  164   		 */
  165   		public byte[] getData(boolean for_embedding) throws IOException {
  166   			ByteArrayOutputStream os = new ByteArrayOutputStream();
  167   			for (Iterator i = segs.keySet().iterator(); i.hasNext();  ) {
  168   				Integer sn = (Integer) i.next();
  169   				JBIG2Segment s = (JBIG2Segment) segs.get(sn);
  170   
  171   				// pdf reference 1.4, section 3.3.6 JBIG2Decode Filter
  172   				// D.3 Embedded organisation
  173   				if ( for_embedding && 
  174   						( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
  175   					continue;
  176   				}
  177   
  178   				if ( for_embedding ) {
  179   					// change the page association to page 1
  180   					byte[] headerData_emb = copyByteArray(s.headerData);
  181   					if ( s.page_association_size ) {
  182   						headerData_emb[s.page_association_offset] = 0x0;
  183   						headerData_emb[s.page_association_offset+1] = 0x0;
  184   						headerData_emb[s.page_association_offset+2] = 0x0;
  185   						headerData_emb[s.page_association_offset+3] = 0x1;
  186   					} else {
  187   						headerData_emb[s.page_association_offset] = 0x1;
  188   					}
  189   					os.write(headerData_emb);
  190   				} else {
  191   					os.write(s.headerData);
  192   				}
  193   				os.write(s.data);
  194   			}
  195   			os.close();
  196   			return os.toByteArray();
  197   		}
  198   		public void addSegment(JBIG2Segment s) {
  199   			segs.put(new Integer(s.segmentNumber), s);
  200   		}
  201   		
  202   	}
  203   	
  204   	public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException {
  205   		this.ra = ra;
  206   	}
  207   
  208   	public static byte[] copyByteArray(byte[] b) {
  209   		byte[] bc = new byte[b.length];
  210   		System.arraycopy(b, 0, bc, 0, b.length);
  211   		return bc;
  212   	}
  213   
  214   	public void read() throws IOException {
  215   		if ( this.read ) {
  216   			throw new IllegalStateException("already attempted a read() on this Jbig2 File");
  217   		}
  218   		this.read = true;
  219   		
  220   		readFileHeader();
  221   		// Annex D
  222   		if ( this.sequential ) {
  223   			// D.1
  224   			do {
  225   				JBIG2Segment tmp = readHeader();
  226   				readSegment(tmp);
  227   				segments.put(new Integer(tmp.segmentNumber), tmp);
  228   			} while ( this.ra.getFilePointer() < this.ra.length() );
  229   		} else {
  230   			// D.2
  231   			JBIG2Segment tmp;
  232   			do {
  233   				tmp = readHeader();
  234   				segments.put(new Integer(tmp.segmentNumber), tmp);
  235   			} while ( tmp.type != END_OF_FILE );
  236   			Iterator segs = segments.keySet().iterator();
  237   			while ( segs.hasNext() ) {
  238   				readSegment((JBIG2Segment)segments.get(segs.next()));
  239   			}
  240   		}
  241   	}
  242   
  243   	void readSegment(JBIG2Segment s) throws IOException {
  244   		int ptr = ra.getFilePointer();
  245   		
  246   		if ( s.dataLength == 0xffffffffl ) {
  247   			// TODO figure this bit out, 7.2.7
  248   			return;
  249   		}
  250   		
  251   		byte[] data = new byte[(int)s.dataLength];
  252   		ra.read(data);
  253   		s.data = data;
  254   		
  255   		if ( s.type == PAGE_INFORMATION ) {
  256   			int last = ra.getFilePointer();
  257   			ra.seek(ptr);
  258   			int page_bitmap_width = ra.readInt();
  259   			int page_bitmap_height = ra.readInt();
  260   			ra.seek(last);
  261   			JBIG2Page p = (JBIG2Page)pages.get(new Integer(s.page));
  262   			if ( p == null ) {
  263   				throw new IllegalStateException("referring to widht/height of page we havent seen yet? " + s.page);
  264   			}
  265   			
  266   			p.pageBitmapWidth = page_bitmap_width;
  267   			p.pageBitmapHeight = page_bitmap_height;
  268   		}
  269   	}
  270   
  271   	JBIG2Segment readHeader() throws IOException {
  272   		int ptr = ra.getFilePointer();
  273   		// 7.2.1
  274   		int segment_number = ra.readInt();
  275   		JBIG2Segment s = new JBIG2Segment(segment_number);
  276   
  277   		// 7.2.3
  278   		int segment_header_flags = ra.read();
  279   		boolean deferred_non_retain = (( segment_header_flags & 0x80 ) == 0x80);
  280   		s.deferredNonRetain = deferred_non_retain;
  281   		boolean page_association_size = (( segment_header_flags & 0x40 ) == 0x40);
  282   		int segment_type = ( segment_header_flags & 0x3f );
  283   		s.type = segment_type;
  284   		
  285   		//7.2.4
  286   		int referred_to_byte0 = ra.read();
  287   		int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5;
  288   		int[] referred_to_segment_numbers = null;
  289   		boolean[] segment_retention_flags = null;
  290   		
  291   		if ( count_of_referred_to_segments == 7 ) {
  292   			// at least five bytes
  293   			ra.seek(ra.getFilePointer() - 1);
  294   			count_of_referred_to_segments = ( ra.readInt() & 0x1fffffff );
  295   			segment_retention_flags = new boolean[count_of_referred_to_segments+1];
  296   			int i = 0;
  297   			int referred_to_current_byte = 0;
  298   			do {
  299   				int j = i % 8;
  300   				if ( j == 0) {
  301   					referred_to_current_byte = ra.read();
  302   				}
  303   				segment_retention_flags[i] = (((( 0x1 << j ) & referred_to_current_byte) >> j) == 0x1);
  304   				i++;
  305   			} while ( i <= count_of_referred_to_segments );
  306   			
  307   		} else if ( count_of_referred_to_segments <= 4 ) {
  308   			// only one byte
  309   			segment_retention_flags = new boolean[count_of_referred_to_segments+1];
  310   			referred_to_byte0 &= 0x1f;
  311   			for ( int i = 0; i <= count_of_referred_to_segments; i++ ) {
  312   				segment_retention_flags[i] = (((( 0x1 << i ) & referred_to_byte0) >> i) == 0x1); 
  313   			}
  314   			
  315   		} else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) {
  316   			throw new IllegalStateException("count of referred-to segments had bad value in header for segment " + segment_number + " starting at " + ptr);
  317   		}
  318   		s.segmentRetentionFlags = segment_retention_flags;
  319   		s.countOfReferredToSegments = count_of_referred_to_segments;
  320   
  321   		// 7.2.5
  322   		referred_to_segment_numbers = new int[count_of_referred_to_segments+1];
  323   		for ( int i = 1; i <= count_of_referred_to_segments; i++ ) {
  324   			if ( segment_number <= 256 ) {
  325   				referred_to_segment_numbers[i] = ra.read();
  326   			} else if ( segment_number <= 65536 ) {
  327   				referred_to_segment_numbers[i] = ra.readUnsignedShort();
  328   			} else {
  329   				referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack
  330   			}
  331   		}
  332   		s.referredToSegmentNumbers = referred_to_segment_numbers;
  333   		
  334   		// 7.2.6
  335   		int segment_page_association;
  336   		int page_association_offset = ra.getFilePointer() - ptr;
  337   		if ( page_association_size ) {
  338   			segment_page_association = ra.readInt();
  339   		} else {
  340   			segment_page_association = ra.read();
  341   		}
  342   		if ( segment_page_association < 0 ) {
  343   			throw new IllegalStateException("page " + segment_page_association + " invalid for segment " + segment_number + " starting at " + ptr);
  344   		}
  345   		s.page = segment_page_association;
  346   		// so we can change the page association at embedding time.
  347   		s.page_association_size = page_association_size;
  348   		s.page_association_offset = page_association_offset;
  349   		
  350   		if ( segment_page_association > 0 && ! pages.containsKey(new Integer(segment_page_association)) ) {
  351   			pages.put(new Integer(segment_page_association), new JBIG2Page(segment_page_association, this));
  352   		}
  353   		if ( segment_page_association > 0 ) {
  354   			((JBIG2Page)pages.get(new Integer(segment_page_association))).addSegment(s);
  355   		} else {
  356   			globals.add(s);
  357   		}
  358   		
  359   		// 7.2.7
  360   		long segment_data_length = ra.readUnsignedInt();
  361   		// TODO the 0xffffffff value that might be here, and how to understand those afflicted segments
  362   		s.dataLength = segment_data_length;
  363   		
  364   		int end_ptr = ra.getFilePointer();
  365   		ra.seek(ptr);
  366   		byte[] header_data = new byte[end_ptr - ptr];
  367   		ra.read(header_data);
  368   		s.headerData  = header_data;
  369   		
  370   		return s;
  371   	}
  372   
  373   	void readFileHeader() throws IOException {
  374   		ra.seek(0);
  375   		byte[] idstring = new byte[8];
  376   		ra.read(idstring);
  377   		
  378   		byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A};
  379   		
  380   		for ( int i = 0; i < idstring.length; i++ ) {
  381   			if ( idstring[i] != refidstring[i] ) {
  382   				throw new IllegalStateException("file header idstring not good at byte " + i);
  383   			}
  384   		}
  385   		
  386   		int fileheaderflags = ra.read();
  387   
  388   		this.sequential = (( fileheaderflags & 0x1 ) == 0x1);
  389   		this.number_of_pages_known = (( fileheaderflags & 0x2) == 0x0);
  390   		
  391   		if ( (fileheaderflags & 0xfc) != 0x0 ) {
  392   			throw new IllegalStateException("file header flags bits 2-7 not 0");
  393   		}
  394   		
  395   		if ( this.number_of_pages_known ) {
  396   			this.number_of_pages = ra.readInt();
  397   		}
  398   	}
  399   
  400   	public int numberOfPages() {
  401   		return pages.size();
  402   	}
  403   
  404   	public int getPageHeight(int i) {
  405   		return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapHeight;
  406   	}
  407   
  408   	public int getPageWidth(int i) {
  409   		return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapWidth;
  410   	}
  411   
  412   	public JBIG2Page getPage(int page) {
  413   		return (JBIG2Page)pages.get(new Integer(page));
  414   	}
  415   
  416   	public byte[] getGlobal(boolean for_embedding) {
  417   		ByteArrayOutputStream os = new ByteArrayOutputStream();
  418   		try {
  419   			for (Iterator gitr = globals.iterator(); gitr.hasNext();) {
  420   				JBIG2Segment s = (JBIG2Segment)gitr.next();
  421   				if ( for_embedding && 
  422   						( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
  423   					continue;
  424   				}
  425   				os.write(s.headerData);
  426   				os.write(s.data);
  427   			}
  428   			os.close();
  429   		} catch (IOException e) {
  430   			e.printStackTrace();
  431   		}
  432   		if ( os.size() <= 0 ) {
  433   			return null;
  434   		}
  435   		return os.toByteArray();
  436   	}
  437   	
  438   	public String toString() {
  439   		if ( this.read ) {
  440   			return "Jbig2SegmentReader: number of pages: " + this.numberOfPages();
  441   		} else {
  442   			return "Jbig2SegmentReader in indeterminate state.";
  443   		}
  444   	}
  445   }

Save This Page
Home » iText-2.1.7 » com.lowagie » text » pdf » codec » [javadoc | source]