1 /*
2 * $Id: JBIG2SegmentReader.java 3714 2009-02-20 21:04:16Z xlv $
3 *
4 * Copyright 2009 by Nigel Kerr.
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * (the "License"); you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9 *
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the License.
13 *
14 * The Original Code is 'iText, a free JAVA-PDF library'.
15 *
16 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17 * the Initial Developer are Copyright (C) 1999-2009 by Bruno Lowagie.
18 * All Rights Reserved.
19 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20 * are Copyright (C) 2000-2009 by Paulo Soares. All Rights Reserved.
21 *
22 * Contributor(s): all the names of the contributors are added in the source code
23 * where applicable.
24 *
25 * Alternatively, the contents of this file may be used under the terms of the
26 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
27 * provisions of LGPL are applicable instead of those above. If you wish to
28 * allow use of your version of this file only under the terms of the LGPL
29 * License and not to allow others to use your version of this file under
30 * the MPL, indicate your decision by deleting the provisions above and
31 * replace them with the notice and other provisions required by the LGPL.
32 * If you do not delete the provisions above, a recipient may use your version
33 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
34 *
35 * This library is free software; you can redistribute it and/or modify it
36 * under the terms of the MPL as stated above or under the terms of the GNU
37 * Library General Public License as published by the Free Software Foundation;
38 * either version 2 of the License, or any later version.
39 *
40 * This library is distributed in the hope that it will be useful, but WITHOUT
41 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
43 * details.
44 *
45 * If you didn't download this code from the following link, you should check if
46 * you aren't using an obsolete version:
47 * http://www.lowagie.com/iText/
48 */
49
50 package com.lowagie.text.pdf.codec;
51
52 import java.io.ByteArrayOutputStream;
53 import java.io.IOException;
54 import java.util.Iterator;
55 import java.util.SortedMap;
56 import java.util.SortedSet;
57 import java.util.TreeMap;
58 import java.util.TreeSet;
59
60 import com.lowagie.text.pdf.RandomAccessFileOrArray;
61
62 /**
63 * Class to read a JBIG2 file at a basic level: understand all the segments,
64 * understand what segments belong to which pages, how many pages there are,
65 * what the width and height of each page is, and global segments if there
66 * are any. Or: the minimum required to be able to take a normal sequential
67 * or random-access organized file, and be able to embed JBIG2 pages as images
68 * in a PDF.
69 *
70 * TODO: the indeterminate-segment-size value of dataLength, else?
71 *
72 * @since 2.1.5
73 */
74
75 public class JBIG2SegmentReader {
76
77 public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2.
78
79 public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3.
80 public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3.
81 public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3.
82 public static final int PATTERN_DICTIONARY = 16; //see 7.4.4.
83 public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5.
84 public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5.
85 public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5.
86 public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6.
87 public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6.
88 public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6.
89 public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7.
90 public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7.
91 public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7.
92
93 public static final int PAGE_INFORMATION = 48; //see 7.4.8.
94 public static final int END_OF_PAGE = 49; //see 7.4.9.
95 public static final int END_OF_STRIPE = 50; //see 7.4.10.
96 public static final int END_OF_FILE = 51; //see 7.4.11.
97 public static final int PROFILES = 52; //see 7.4.12.
98 public static final int TABLES = 53; //see 7.4.13.
99 public static final int EXTENSION = 62; //see 7.4.14.
100
101 private final SortedMap segments = new TreeMap();
102 private final SortedMap pages = new TreeMap();
103 private final SortedSet globals = new TreeSet();
104 private RandomAccessFileOrArray ra;
105 private boolean sequential;
106 private boolean number_of_pages_known;
107 private int number_of_pages = -1;
108 private boolean read = false;
109
110 /**
111 * Inner class that holds information about a JBIG2 segment.
112 * @since 2.1.5
113 */
114 public static class JBIG2Segment implements Comparable {
115
116 public final int segmentNumber;
117 public long dataLength = -1;
118 public int page = -1;
119 public int[] referredToSegmentNumbers = null;
120 public boolean[] segmentRetentionFlags = null;
121 public int type = -1;
122 public boolean deferredNonRetain = false;
123 public int countOfReferredToSegments = -1;
124 public byte[] data = null;
125 public byte[] headerData = null;
126 public boolean page_association_size = false;
127 public int page_association_offset = -1;
128
129 public JBIG2Segment(int segment_number) {
130 this.segmentNumber = segment_number;
131 }
132
133 // for the globals treeset
134 public int compareTo(Object o) {
135 return this.compareTo((JBIG2Segment)o);
136 }
137 public int compareTo(JBIG2Segment s) {
138 return this.segmentNumber - s.segmentNumber;
139 }
140
141
142 }
143 /**
144 * Inner class that holds information about a JBIG2 page.
145 * @since 2.1.5
146 */
147 public static class JBIG2Page {
148 public final int page;
149 private final JBIG2SegmentReader sr;
150 private final SortedMap segs = new TreeMap();
151 public int pageBitmapWidth = -1;
152 public int pageBitmapHeight = -1;
153 public JBIG2Page(int page, JBIG2SegmentReader sr) {
154 this.page = page;
155 this.sr = sr;
156 }
157 /**
158 * return as a single byte array the header-data for each segment in segment number
159 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization.
160 * if for_embedding, skip the segment types that are known to be not for acrobat.
161 * @param for_embedding
162 * @return a byte array
163 * @throws IOException
164 */
165 public byte[] getData(boolean for_embedding) throws IOException {
166 ByteArrayOutputStream os = new ByteArrayOutputStream();
167 for (Iterator i = segs.keySet().iterator(); i.hasNext(); ) {
168 Integer sn = (Integer) i.next();
169 JBIG2Segment s = (JBIG2Segment) segs.get(sn);
170
171 // pdf reference 1.4, section 3.3.6 JBIG2Decode Filter
172 // D.3 Embedded organisation
173 if ( for_embedding &&
174 ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
175 continue;
176 }
177
178 if ( for_embedding ) {
179 // change the page association to page 1
180 byte[] headerData_emb = copyByteArray(s.headerData);
181 if ( s.page_association_size ) {
182 headerData_emb[s.page_association_offset] = 0x0;
183 headerData_emb[s.page_association_offset+1] = 0x0;
184 headerData_emb[s.page_association_offset+2] = 0x0;
185 headerData_emb[s.page_association_offset+3] = 0x1;
186 } else {
187 headerData_emb[s.page_association_offset] = 0x1;
188 }
189 os.write(headerData_emb);
190 } else {
191 os.write(s.headerData);
192 }
193 os.write(s.data);
194 }
195 os.close();
196 return os.toByteArray();
197 }
198 public void addSegment(JBIG2Segment s) {
199 segs.put(new Integer(s.segmentNumber), s);
200 }
201
202 }
203
204 public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException {
205 this.ra = ra;
206 }
207
208 public static byte[] copyByteArray(byte[] b) {
209 byte[] bc = new byte[b.length];
210 System.arraycopy(b, 0, bc, 0, b.length);
211 return bc;
212 }
213
214 public void read() throws IOException {
215 if ( this.read ) {
216 throw new IllegalStateException("already attempted a read() on this Jbig2 File");
217 }
218 this.read = true;
219
220 readFileHeader();
221 // Annex D
222 if ( this.sequential ) {
223 // D.1
224 do {
225 JBIG2Segment tmp = readHeader();
226 readSegment(tmp);
227 segments.put(new Integer(tmp.segmentNumber), tmp);
228 } while ( this.ra.getFilePointer() < this.ra.length() );
229 } else {
230 // D.2
231 JBIG2Segment tmp;
232 do {
233 tmp = readHeader();
234 segments.put(new Integer(tmp.segmentNumber), tmp);
235 } while ( tmp.type != END_OF_FILE );
236 Iterator segs = segments.keySet().iterator();
237 while ( segs.hasNext() ) {
238 readSegment((JBIG2Segment)segments.get(segs.next()));
239 }
240 }
241 }
242
243 void readSegment(JBIG2Segment s) throws IOException {
244 int ptr = ra.getFilePointer();
245
246 if ( s.dataLength == 0xffffffffl ) {
247 // TODO figure this bit out, 7.2.7
248 return;
249 }
250
251 byte[] data = new byte[(int)s.dataLength];
252 ra.read(data);
253 s.data = data;
254
255 if ( s.type == PAGE_INFORMATION ) {
256 int last = ra.getFilePointer();
257 ra.seek(ptr);
258 int page_bitmap_width = ra.readInt();
259 int page_bitmap_height = ra.readInt();
260 ra.seek(last);
261 JBIG2Page p = (JBIG2Page)pages.get(new Integer(s.page));
262 if ( p == null ) {
263 throw new IllegalStateException("referring to widht/height of page we havent seen yet? " + s.page);
264 }
265
266 p.pageBitmapWidth = page_bitmap_width;
267 p.pageBitmapHeight = page_bitmap_height;
268 }
269 }
270
271 JBIG2Segment readHeader() throws IOException {
272 int ptr = ra.getFilePointer();
273 // 7.2.1
274 int segment_number = ra.readInt();
275 JBIG2Segment s = new JBIG2Segment(segment_number);
276
277 // 7.2.3
278 int segment_header_flags = ra.read();
279 boolean deferred_non_retain = (( segment_header_flags & 0x80 ) == 0x80);
280 s.deferredNonRetain = deferred_non_retain;
281 boolean page_association_size = (( segment_header_flags & 0x40 ) == 0x40);
282 int segment_type = ( segment_header_flags & 0x3f );
283 s.type = segment_type;
284
285 //7.2.4
286 int referred_to_byte0 = ra.read();
287 int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5;
288 int[] referred_to_segment_numbers = null;
289 boolean[] segment_retention_flags = null;
290
291 if ( count_of_referred_to_segments == 7 ) {
292 // at least five bytes
293 ra.seek(ra.getFilePointer() - 1);
294 count_of_referred_to_segments = ( ra.readInt() & 0x1fffffff );
295 segment_retention_flags = new boolean[count_of_referred_to_segments+1];
296 int i = 0;
297 int referred_to_current_byte = 0;
298 do {
299 int j = i % 8;
300 if ( j == 0) {
301 referred_to_current_byte = ra.read();
302 }
303 segment_retention_flags[i] = (((( 0x1 << j ) & referred_to_current_byte) >> j) == 0x1);
304 i++;
305 } while ( i <= count_of_referred_to_segments );
306
307 } else if ( count_of_referred_to_segments <= 4 ) {
308 // only one byte
309 segment_retention_flags = new boolean[count_of_referred_to_segments+1];
310 referred_to_byte0 &= 0x1f;
311 for ( int i = 0; i <= count_of_referred_to_segments; i++ ) {
312 segment_retention_flags[i] = (((( 0x1 << i ) & referred_to_byte0) >> i) == 0x1);
313 }
314
315 } else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) {
316 throw new IllegalStateException("count of referred-to segments had bad value in header for segment " + segment_number + " starting at " + ptr);
317 }
318 s.segmentRetentionFlags = segment_retention_flags;
319 s.countOfReferredToSegments = count_of_referred_to_segments;
320
321 // 7.2.5
322 referred_to_segment_numbers = new int[count_of_referred_to_segments+1];
323 for ( int i = 1; i <= count_of_referred_to_segments; i++ ) {
324 if ( segment_number <= 256 ) {
325 referred_to_segment_numbers[i] = ra.read();
326 } else if ( segment_number <= 65536 ) {
327 referred_to_segment_numbers[i] = ra.readUnsignedShort();
328 } else {
329 referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack
330 }
331 }
332 s.referredToSegmentNumbers = referred_to_segment_numbers;
333
334 // 7.2.6
335 int segment_page_association;
336 int page_association_offset = ra.getFilePointer() - ptr;
337 if ( page_association_size ) {
338 segment_page_association = ra.readInt();
339 } else {
340 segment_page_association = ra.read();
341 }
342 if ( segment_page_association < 0 ) {
343 throw new IllegalStateException("page " + segment_page_association + " invalid for segment " + segment_number + " starting at " + ptr);
344 }
345 s.page = segment_page_association;
346 // so we can change the page association at embedding time.
347 s.page_association_size = page_association_size;
348 s.page_association_offset = page_association_offset;
349
350 if ( segment_page_association > 0 && ! pages.containsKey(new Integer(segment_page_association)) ) {
351 pages.put(new Integer(segment_page_association), new JBIG2Page(segment_page_association, this));
352 }
353 if ( segment_page_association > 0 ) {
354 ((JBIG2Page)pages.get(new Integer(segment_page_association))).addSegment(s);
355 } else {
356 globals.add(s);
357 }
358
359 // 7.2.7
360 long segment_data_length = ra.readUnsignedInt();
361 // TODO the 0xffffffff value that might be here, and how to understand those afflicted segments
362 s.dataLength = segment_data_length;
363
364 int end_ptr = ra.getFilePointer();
365 ra.seek(ptr);
366 byte[] header_data = new byte[end_ptr - ptr];
367 ra.read(header_data);
368 s.headerData = header_data;
369
370 return s;
371 }
372
373 void readFileHeader() throws IOException {
374 ra.seek(0);
375 byte[] idstring = new byte[8];
376 ra.read(idstring);
377
378 byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A};
379
380 for ( int i = 0; i < idstring.length; i++ ) {
381 if ( idstring[i] != refidstring[i] ) {
382 throw new IllegalStateException("file header idstring not good at byte " + i);
383 }
384 }
385
386 int fileheaderflags = ra.read();
387
388 this.sequential = (( fileheaderflags & 0x1 ) == 0x1);
389 this.number_of_pages_known = (( fileheaderflags & 0x2) == 0x0);
390
391 if ( (fileheaderflags & 0xfc) != 0x0 ) {
392 throw new IllegalStateException("file header flags bits 2-7 not 0");
393 }
394
395 if ( this.number_of_pages_known ) {
396 this.number_of_pages = ra.readInt();
397 }
398 }
399
400 public int numberOfPages() {
401 return pages.size();
402 }
403
404 public int getPageHeight(int i) {
405 return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapHeight;
406 }
407
408 public int getPageWidth(int i) {
409 return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapWidth;
410 }
411
412 public JBIG2Page getPage(int page) {
413 return (JBIG2Page)pages.get(new Integer(page));
414 }
415
416 public byte[] getGlobal(boolean for_embedding) {
417 ByteArrayOutputStream os = new ByteArrayOutputStream();
418 try {
419 for (Iterator gitr = globals.iterator(); gitr.hasNext();) {
420 JBIG2Segment s = (JBIG2Segment)gitr.next();
421 if ( for_embedding &&
422 ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
423 continue;
424 }
425 os.write(s.headerData);
426 os.write(s.data);
427 }
428 os.close();
429 } catch (IOException e) {
430 e.printStackTrace();
431 }
432 if ( os.size() <= 0 ) {
433 return null;
434 }
435 return os.toByteArray();
436 }
437
438 public String toString() {
439 if ( this.read ) {
440 return "Jbig2SegmentReader: number of pages: " + this.numberOfPages();
441 } else {
442 return "Jbig2SegmentReader in indeterminate state.";
443 }
444 }
445 }