1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.pdfbox.pdmodel.common; 18 19 import java.io.IOException; 20 import java.util.HashMap; 21 import java.util.Iterator; 22 import java.util.Map; 23 import java.util.NoSuchElementException; 24 import java.util.SortedMap; 25 import java.util.TreeMap; 26 import java.util.Map.Entry; 27 28 import org.apache.pdfbox.cos.COSArray; 29 import org.apache.pdfbox.cos.COSBase; 30 import org.apache.pdfbox.cos.COSDictionary; 31 import org.apache.pdfbox.cos.COSInteger; 32 import org.apache.pdfbox.pdmodel.PDDocument; 33 34 /** 35 * Represents the page label dictionary of a document. 36 * 37 * @author <a href="mailto:igor.podolskiy@ievvwi.uni-stuttgart.de">Igor 38 * Podolskiy</a> 39 * @version $Revision$ 40 */ 41 public class PDPageLabels implements COSObjectable 42 { 43 44 private SortedMap<Integer, PDPageLabelRange> labels; 45 46 private PDDocument doc; 47 48 /** 49 * Creates an empty page label dictionary for the given document. 50 * 51 * <p> 52 * Note that the page label dictionary won't be automatically added to the 53 * document; you will still need to do it manually (see 54 * {@link PDDocumentCatalog#setPageLabels(PDPageLabels)}. 55 * </p> 56 * 57 * @param document 58 * The document the page label dictionary is created for. 59 * @see PDDocumentCatalog#setPageLabels(PDPageLabels) 60 */ 61 public PDPageLabels(PDDocument document) 62 { 63 labels = new TreeMap<Integer, PDPageLabelRange>(); 64 this.doc = document; 65 PDPageLabelRange defaultRange = new PDPageLabelRange(); 66 defaultRange.setStyle(PDPageLabelRange.STYLE_DECIMAL); 67 labels.put(0, defaultRange); 68 } 69 70 /** 71 * Creates an page label dictionary for a document using the information in 72 * the given COS dictionary. 73 * 74 * <p> 75 * Note that the page label dictionary won't be automatically added to the 76 * document; you will still need to do it manually (see 77 * {@link PDDocumentCatalog#setPageLabels(PDPageLabels)}. 78 * </p> 79 * 80 * @param document 81 * The document the page label dictionary is created for. 82 * @param dict 83 * an existing page label dictionary 84 * @see PDDocumentCatalog#setPageLabels(PDPageLabels) 85 * @throws IOException 86 * If something goes wrong during the number tree conversion. 87 */ 88 public PDPageLabels(PDDocument document, COSDictionary dict) throws IOException 89 { 90 this(document); 91 if (dict == null) 92 { 93 return; 94 } 95 PDNumberTreeNode root = new PDNumberTreeNode(dict, COSDictionary.class); 96 Map<Integer, COSDictionary> numbers = root.getNumbers(); 97 for (Entry<Integer, COSDictionary> i : numbers.entrySet()) 98 { 99 labels.put(i.getKey(), new PDPageLabelRange(i.getValue())); 100 } 101 } 102 103 /** 104 * Returns the number of page label ranges. 105 * 106 * <p> 107 * This will be always >= 1, as the required default entry for the page 108 * range starting at the first page is added automatically by this 109 * implementation (see PDF32000-1:2008, p. 375). 110 * </p> 111 * 112 * @return the number of page label ranges. 113 */ 114 public int getPageRangeCount() 115 { 116 return labels.size(); 117 } 118 119 /** 120 * Returns the page label range starting at the given page, or {@code null} 121 * if no such range is defined. 122 * 123 * @param startPage 124 * the 0-based page index representing the start page of the page 125 * range the item is defined for. 126 * @return the page label range or {@code null} if no label range is defined 127 * for the given start page. 128 */ 129 public PDPageLabelRange getPageLabelRange(int startPage) 130 { 131 return labels.get(startPage); 132 } 133 134 /** 135 * Sets the page label range beginning at the specified start page. 136 * 137 * @param startPage 138 * the 0-based index of the page representing the start of the 139 * page label range. 140 * @param item 141 * the page label item to set. 142 */ 143 public void setLabelItem(int startPage, PDPageLabelRange item) 144 { 145 labels.put(startPage, item); 146 } 147 148 public COSBase getCOSObject() 149 { 150 COSDictionary dict = new COSDictionary(); 151 COSArray arr = new COSArray(); 152 for (Entry<Integer, PDPageLabelRange> i : labels.entrySet()) 153 { 154 arr.add(COSInteger.get(i.getKey())); 155 arr.add(i.getValue()); 156 } 157 dict.setItem("Nums", arr); 158 return dict; 159 } 160 161 /** 162 * Returns a mapping with computed page labels as keys and corresponding 163 * 0-based page indices as values. The returned map will contain at most as 164 * much entries as the document has pages. 165 * 166 * <p> 167 * <strong>NOTE:</strong> If the document contains duplicate page labels, 168 * the returned map will contain <em>less</em> entries than the document has 169 * pages. The page index returned in this case is the <em>highest</em> index 170 * among all pages sharing the same label. 171 * </p> 172 * 173 * @return a mapping from labels to 0-based page indices. 174 */ 175 public Map<String, Integer> getPageIndicesByLabels() 176 { 177 final Map<String, Integer> labelMap = 178 new HashMap<String, Integer>(doc.getNumberOfPages()); 179 computeLabels(new LabelHandler() 180 { 181 public void newLabel(int pageIndex, String label) 182 { 183 labelMap.put(label, pageIndex); 184 } 185 }); 186 return labelMap; 187 } 188 189 /** 190 * Returns a mapping with 0-based page indices as keys and corresponding 191 * page labels as values as an array. The array will have exactly as much 192 * entries as the document has pages. 193 * 194 * @return an array mapping from 0-based page indices to labels. 195 */ 196 public String[] getLabelsByPageIndices() 197 { 198 final String[] map = new String[doc.getNumberOfPages()]; 199 computeLabels(new LabelHandler() 200 { 201 public void newLabel(int pageIndex, String label) 202 { 203 map[pageIndex] = label; 204 } 205 }); 206 return map; 207 } 208 209 /** 210 * Internal interface for the control flow support. 211 * 212 * @author Igor Podolskiy 213 */ 214 private static interface LabelHandler 215 { 216 public void newLabel(int pageIndex, String label); 217 } 218 219 private void computeLabels(LabelHandler handler) 220 { 221 Iterator<Entry<Integer, PDPageLabelRange>> iterator = 222 labels.entrySet().iterator(); 223 if (!iterator.hasNext()) 224 { 225 return; 226 } 227 int pageIndex = 0; 228 Entry<Integer, PDPageLabelRange> lastEntry = iterator.next(); 229 while (iterator.hasNext()) 230 { 231 Entry<Integer, PDPageLabelRange> entry = iterator.next(); 232 int numPages = entry.getKey() - lastEntry.getKey(); 233 LabelGenerator gen = new LabelGenerator(lastEntry.getValue(), 234 numPages); 235 while (gen.hasNext()) 236 { 237 handler.newLabel(pageIndex, gen.next()); 238 pageIndex++; 239 } 240 lastEntry = entry; 241 } 242 LabelGenerator gen = new LabelGenerator(lastEntry.getValue(), 243 doc.getNumberOfPages() - lastEntry.getKey()); 244 while (gen.hasNext()) 245 { 246 handler.newLabel(pageIndex, gen.next()); 247 pageIndex++; 248 } 249 } 250 251 /** 252 * Generates the labels in a page range. 253 * 254 * @author Igor Podolskiy 255 * 256 */ 257 private static class LabelGenerator implements Iterator<String> 258 { 259 private PDPageLabelRange labelInfo; 260 private int numPages; 261 private int currentPage; 262 263 public LabelGenerator(PDPageLabelRange label, int pages) 264 { 265 this.labelInfo = label; 266 this.numPages = pages; 267 this.currentPage = 0; 268 } 269 270 public boolean hasNext() 271 { 272 return currentPage < numPages; 273 } 274 275 public String next() 276 { 277 if (!hasNext()) 278 { 279 throw new NoSuchElementException(); 280 } 281 StringBuilder buf = new StringBuilder(); 282 if (labelInfo.getPrefix() != null) 283 { 284 buf.append(labelInfo.getPrefix()); 285 } 286 if (labelInfo.getStyle() != null) 287 { 288 buf.append(getNumber(labelInfo.getStart() + currentPage, 289 labelInfo.getStyle())); 290 } 291 currentPage++; 292 return buf.toString(); 293 } 294 295 private String getNumber(int pageIndex, String style) 296 { 297 if (PDPageLabelRange.STYLE_DECIMAL.equals(style)) 298 { 299 return Integer.toString(pageIndex); 300 } 301 else if (PDPageLabelRange.STYLE_LETTERS_LOWER.equals(style)) 302 { 303 return makeLetterLabel(pageIndex); 304 } 305 else if (PDPageLabelRange.STYLE_LETTERS_UPPER.equals(style)) 306 { 307 return makeLetterLabel(pageIndex).toUpperCase(); 308 } 309 else if (PDPageLabelRange.STYLE_ROMAN_LOWER.equals(style)) 310 { 311 return makeRomanLabel(pageIndex); 312 } 313 else if (PDPageLabelRange.STYLE_ROMAN_UPPER.equals(style)) 314 { 315 return makeRomanLabel(pageIndex).toUpperCase(); 316 } 317 else 318 { 319 // Fall back to decimals. 320 return Integer.toString(pageIndex); 321 } 322 } 323 324 private static final String[][] ROMANS = new String[][] 325 { 326 { "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" }, 327 { "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" }, 328 { "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" }, }; 329 330 private static String makeRomanLabel(int pageIndex) 331 { 332 StringBuilder buf = new StringBuilder(); 333 int power = 0; 334 while (power < 3 && pageIndex > 0) 335 { 336 buf.insert(0, ROMANS[power][pageIndex % 10]); 337 pageIndex = pageIndex / 10; 338 power++; 339 } 340 // Prepend as many m as there are thousands (which is 341 // incorrect by the roman numeral rules for numbers > 3999, 342 // but is unbounded and Adobe Acrobat does it this way). 343 // This code is somewhat inefficient for really big numbers, 344 // but those don't occur too often (and the numbers in those cases 345 // would be incomprehensible even if we and Adobe 346 // used strict Roman rules). 347 for (int i = 0; i < pageIndex; i++) 348 { 349 buf.insert(0, 'm'); 350 } 351 return buf.toString(); 352 } 353 354 /** 355 * A..Z, AA..ZZ, AAA..ZZZ ... labeling as described in PDF32000-1:2008, 356 * Table 159, Page 375. 357 */ 358 private static String makeLetterLabel(int num) 359 { 360 StringBuilder buf = new StringBuilder(); 361 int numLetters = num / 26 + Integer.signum(num % 26); 362 int letter = num % 26 + 26 * (1 - Integer.signum(num % 26)) + 64; 363 for (int i = 0; i < numLetters; i++) 364 { 365 buf.appendCodePoint(letter); 366 } 367 return buf.toString(); 368 } 369 370 public void remove() 371 { 372 // This is a generator, no removing allowed. 373 throw new UnsupportedOperationException(); 374 } 375 } 376 }