1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.jempbox.xmp; 18 19 import java.io.IOException; 20 import java.io.InputStream; 21 import java.io.OutputStream; 22 import java.lang.reflect.Constructor; 23 import java.util.ArrayList; 24 import java.util.HashMap; 25 import java.util.Iterator; 26 import java.util.LinkedList; 27 import java.util.List; 28 import java.util.Map; 29 30 import javax.xml.transform.TransformerException; 31 32 import org.apache.jempbox.impl.XMLUtil; 33 import org.w3c.dom.Document; 34 import org.w3c.dom.Element; 35 import org.w3c.dom.NamedNodeMap; 36 import org.w3c.dom.Node; 37 import org.w3c.dom.NodeList; 38 import org.w3c.dom.ProcessingInstruction; 39 import org.xml.sax.InputSource; 40 41 /** 42 * This class represents the top level XMP data structure and gives access to 43 * the various schemas that are available as part of the XMP specification. 44 * 45 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> 46 * @version $Revision: 1.10 $ 47 */ 48 public class XMPMetadata 49 { 50 /** 51 * Supported encoding for persisted XML. 52 */ 53 public static final String ENCODING_UTF8 = "UTF-8"; 54 55 /** 56 * Supported encoding for persisted XML. 57 */ 58 public static final String ENCODING_UTF16BE = "UTF-16BE"; 59 60 /** 61 * Supported encoding for persisted XML. 62 */ 63 public static final String ENCODING_UTF16LE = "UTF-16LE"; 64 65 /** 66 * The DOM representation of the metadata. 67 */ 68 protected Document xmpDocument; 69 70 /** 71 * The encoding of the XMP document. Default is UTF8. 72 */ 73 protected String encoding = ENCODING_UTF8; 74 75 /** 76 * A mapping of namespaces. 77 */ 78 protected Map<String,Class<?>> nsMappings = new HashMap<String,Class<?>>(); 79 80 /** 81 * Default constructor, creates blank XMP doc. 82 * 83 * @throws IOException 84 * If there is an error creating the initial document. 85 */ 86 public XMPMetadata() throws IOException 87 { 88 xmpDocument = XMLUtil.newDocument(); 89 ProcessingInstruction beginXPacket = xmpDocument 90 .createProcessingInstruction("xpacket", 91 "begin=\"\uFEFF\" id=\"W5M0MpCehiHzreSzNTczkc9d\""); 92 93 xmpDocument.appendChild(beginXPacket); 94 Element xmpMeta = xmpDocument.createElementNS("adobe:ns:meta/", 95 "x:xmpmeta"); 96 xmpMeta.setAttributeNS(XMPSchema.NS_NAMESPACE, "xmlns:x", 97 "adobe:ns:meta/"); 98 99 xmpDocument.appendChild(xmpMeta); 100 101 Element rdf = xmpDocument.createElement("rdf:RDF"); 102 rdf.setAttributeNS(XMPSchema.NS_NAMESPACE, "xmlns:rdf", 103 "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); 104 105 xmpMeta.appendChild(rdf); 106 107 ProcessingInstruction endXPacket = xmpDocument 108 .createProcessingInstruction("xpacket", "end=\"w\""); 109 xmpDocument.appendChild(endXPacket); 110 init(); 111 } 112 113 /** 114 * Constructor from an existing XML document. 115 * 116 * @param doc 117 * The root XMP document. 118 */ 119 public XMPMetadata(Document doc) 120 { 121 xmpDocument = doc; 122 init(); 123 } 124 125 private void init() 126 { 127 nsMappings.put(XMPSchemaPDF.NAMESPACE, XMPSchemaPDF.class); 128 nsMappings.put(XMPSchemaBasic.NAMESPACE, XMPSchemaBasic.class); 129 nsMappings 130 .put(XMPSchemaDublinCore.NAMESPACE, XMPSchemaDublinCore.class); 131 nsMappings.put(XMPSchemaMediaManagement.NAMESPACE, 132 XMPSchemaMediaManagement.class); 133 nsMappings.put(XMPSchemaRightsManagement.NAMESPACE, 134 XMPSchemaRightsManagement.class); 135 nsMappings.put(XMPSchemaBasicJobTicket.NAMESPACE, 136 XMPSchemaBasicJobTicket.class); 137 nsMappings.put(XMPSchemaDynamicMedia.NAMESPACE, 138 XMPSchemaDynamicMedia.class); 139 nsMappings.put(XMPSchemaPagedText.NAMESPACE, XMPSchemaPagedText.class); 140 nsMappings.put(XMPSchemaIptc4xmpCore.NAMESPACE, 141 XMPSchemaIptc4xmpCore.class); 142 nsMappings.put(XMPSchemaPhotoshop.NAMESPACE, XMPSchemaPhotoshop.class); 143 } 144 145 /** 146 * Will add a XMPSchema to the set of identified schemas. 147 * 148 * The class needs to have a constructor with parameter Element 149 * 150 * @param namespace 151 * The namespace URI of the schmema for instance 152 * http://purl.org/dc/elements/1.1/. 153 * @param xmpSchema 154 * The schema to associated this identifier with. 155 */ 156 public void addXMLNSMapping(String namespace, Class<?> xmpSchema) 157 { 158 159 if (!(XMPSchema.class.isAssignableFrom(xmpSchema))) 160 { 161 throw new IllegalArgumentException( 162 "Only XMPSchemas can be mapped to."); 163 } 164 165 nsMappings.put(namespace, xmpSchema); 166 } 167 168 /** 169 * Get the PDF Schema. 170 * 171 * @return The first PDF schema in the list. 172 * 173 * @throws IOException 174 * If there is an error accessing the schema. 175 */ 176 public XMPSchemaPDF getPDFSchema() throws IOException 177 { 178 return (XMPSchemaPDF) getSchemaByClass(XMPSchemaPDF.class); 179 } 180 181 /** 182 * Get the Basic Schema. 183 * 184 * @return The first Basic schema in the list. 185 * 186 * @throws IOException 187 * If there is an error accessing the schema. 188 */ 189 public XMPSchemaBasic getBasicSchema() throws IOException 190 { 191 return (XMPSchemaBasic) getSchemaByClass(XMPSchemaBasic.class); 192 } 193 194 /** 195 * Get the Dublin Core Schema. 196 * 197 * @return The first Dublin schema in the list. 198 * 199 * @throws IOException 200 * If there is an error accessing the schema. 201 */ 202 public XMPSchemaDublinCore getDublinCoreSchema() throws IOException 203 { 204 return (XMPSchemaDublinCore) getSchemaByClass(XMPSchemaDublinCore.class); 205 } 206 207 /** 208 * Get the Media Management Schema. 209 * 210 * @return The first Media Management schema in the list. 211 * 212 * @throws IOException 213 * If there is an error accessing the schema. 214 */ 215 public XMPSchemaMediaManagement getMediaManagementSchema() 216 throws IOException 217 { 218 return (XMPSchemaMediaManagement) getSchemaByClass(XMPSchemaMediaManagement.class); 219 } 220 221 /** 222 * Get the Schema Rights Schema. 223 * 224 * @return The first Schema Rights schema in the list. 225 * 226 * @throws IOException 227 * If there is an error accessing the schema. 228 */ 229 public XMPSchemaRightsManagement getRightsManagementSchema() 230 throws IOException 231 { 232 return (XMPSchemaRightsManagement) getSchemaByClass(XMPSchemaRightsManagement.class); 233 } 234 235 /** 236 * Get the Job Ticket Schema. 237 * 238 * @return The first Job Ticket schema in the list. 239 * 240 * @throws IOException 241 * If there is an error accessing the schema. 242 */ 243 public XMPSchemaBasicJobTicket getBasicJobTicketSchema() throws IOException 244 { 245 return (XMPSchemaBasicJobTicket) getSchemaByClass(XMPSchemaBasicJobTicket.class); 246 } 247 248 /** 249 * Get the Dynamic Media Schema. 250 * 251 * @return The first Dynamic Media schema in the list. 252 * 253 * @throws IOException 254 * If there is an error accessing the schema. 255 */ 256 public XMPSchemaDynamicMedia getDynamicMediaSchema() throws IOException 257 { 258 return (XMPSchemaDynamicMedia) getSchemaByClass(XMPSchemaDynamicMedia.class); 259 } 260 261 /** 262 * Get the Paged Text Schema. 263 * 264 * @return The first Paged Text schema in the list. 265 * 266 * @throws IOException 267 * If there is an error accessing the schema. 268 */ 269 public XMPSchemaPagedText getPagedTextSchema() throws IOException 270 { 271 return (XMPSchemaPagedText) getSchemaByClass(XMPSchemaPagedText.class); 272 } 273 274 /** 275 * Add a new Media Management schema. 276 * 277 * @return The newly added schema. 278 */ 279 public XMPSchemaMediaManagement addMediaManagementSchema() 280 { 281 XMPSchemaMediaManagement schema = new XMPSchemaMediaManagement(this); 282 return (XMPSchemaMediaManagement) basicAddSchema(schema); 283 } 284 285 /** 286 * Add a new Rights Managment schema. 287 * 288 * @return The newly added schema. 289 */ 290 public XMPSchemaRightsManagement addRightsManagementSchema() 291 { 292 XMPSchemaRightsManagement schema = new XMPSchemaRightsManagement(this); 293 return (XMPSchemaRightsManagement) basicAddSchema(schema); 294 } 295 296 /** 297 * Add a new Job Ticket schema. 298 * 299 * @return The newly added schema. 300 */ 301 public XMPSchemaBasicJobTicket addBasicJobTicketSchema() 302 { 303 XMPSchemaBasicJobTicket schema = new XMPSchemaBasicJobTicket(this); 304 return (XMPSchemaBasicJobTicket) basicAddSchema(schema); 305 } 306 307 /** 308 * Add a new Dynamic Media schema. 309 * 310 * @return The newly added schema. 311 */ 312 public XMPSchemaDynamicMedia addDynamicMediaSchema() 313 { 314 XMPSchemaDynamicMedia schema = new XMPSchemaDynamicMedia(this); 315 return (XMPSchemaDynamicMedia) basicAddSchema(schema); 316 } 317 318 /** 319 * Add a new Paged Text schema. 320 * 321 * @return The newly added schema. 322 */ 323 public XMPSchemaPagedText addPagedTextSchema() 324 { 325 XMPSchemaPagedText schema = new XMPSchemaPagedText(this); 326 return (XMPSchemaPagedText) basicAddSchema(schema); 327 } 328 329 /** 330 * Add a custom schema to the root rdf. The schema has to have been created 331 * as a child of this XMPMetadata. 332 * 333 * @param schema 334 * The schema to add. 335 */ 336 public void addSchema(XMPSchema schema) 337 { 338 Element rdf = getRDFElement(); 339 rdf.appendChild(schema.getElement()); 340 } 341 342 /** 343 * Save the XMP document to a file. 344 * 345 * @param file 346 * The file to save the XMP document to. 347 * 348 * @throws Exception 349 * If there is an error while writing to the stream. 350 */ 351 public void save(String file) throws Exception 352 { 353 XMLUtil.save(xmpDocument, file, encoding); 354 } 355 356 /** 357 * Save the XMP document to a stream. 358 * 359 * @param outStream 360 * The stream to save the XMP document to. 361 * 362 * @throws TransformerException 363 * If there is an error while writing to the stream. 364 */ 365 public void save(OutputStream outStream) throws TransformerException 366 { 367 XMLUtil.save(xmpDocument, outStream, encoding); 368 } 369 370 /** 371 * Get the XML document as a byte array. 372 * 373 * @return The metadata as an XML byte stream. 374 * @throws Exception 375 * If there is an error creating the stream. 376 */ 377 public byte[] asByteArray() throws Exception 378 { 379 return XMLUtil.asByteArray(xmpDocument, encoding); 380 } 381 382 /** 383 * Get the XML document from this object. 384 * 385 * @return This object as an XML document. 386 */ 387 public Document getXMPDocument() 388 { 389 return xmpDocument; 390 } 391 392 /** 393 * Generic add schema method. 394 * 395 * @param schema 396 * The schema to add. 397 * 398 * @return The newly added schema. 399 */ 400 protected XMPSchema basicAddSchema(XMPSchema schema) 401 { 402 Element rdf = getRDFElement(); 403 rdf.appendChild(schema.getElement()); 404 return schema; 405 } 406 407 /** 408 * Create and add a new PDF Schema to this metadata. Typically a XMP 409 * document will only have one PDF schema (but multiple are supported) so it 410 * is recommended that you first check the existence of a PDF scheme by 411 * using getPDFSchema() 412 * 413 * @return A new blank PDF schema that is now part of the metadata. 414 */ 415 public XMPSchemaPDF addPDFSchema() 416 { 417 XMPSchemaPDF schema = new XMPSchemaPDF(this); 418 return (XMPSchemaPDF) basicAddSchema(schema); 419 } 420 421 /** 422 * Create and add a new Dublin Core Schema to this metadata. Typically a XMP 423 * document will only have one schema for each type (but multiple are 424 * supported) so it is recommended that you first check the existence of a 425 * this scheme by using getDublinCoreSchema() 426 * 427 * @return A new blank PDF schema that is now part of the metadata. 428 */ 429 public XMPSchemaDublinCore addDublinCoreSchema() 430 { 431 XMPSchemaDublinCore schema = new XMPSchemaDublinCore(this); 432 return (XMPSchemaDublinCore) basicAddSchema(schema); 433 } 434 435 /** 436 * Create and add a new Basic Schema to this metadata. Typically a XMP 437 * document will only have one schema for each type (but multiple are 438 * supported) so it is recommended that you first check the existence of a 439 * this scheme by using getDublinCoreSchema() 440 * 441 * @return A new blank PDF schema that is now part of the metadata. 442 */ 443 public XMPSchemaBasic addBasicSchema() 444 { 445 XMPSchemaBasic schema = new XMPSchemaBasic(this); 446 return (XMPSchemaBasic) basicAddSchema(schema); 447 } 448 449 /** 450 * Create and add a new IPTC schema to this metadata. 451 * 452 * @return A new blank IPTC schema that is now part of the metadata. 453 */ 454 public XMPSchemaIptc4xmpCore addIptc4xmpCoreSchema() 455 { 456 XMPSchemaIptc4xmpCore schema = new XMPSchemaIptc4xmpCore(this); 457 return (XMPSchemaIptc4xmpCore) basicAddSchema(schema); 458 } 459 460 /** 461 * Create and add a new Photoshop schema to this metadata. 462 * 463 * @return A new blank Photoshop schema that is now part of the metadata. 464 */ 465 public XMPSchemaPhotoshop addPhotoshopSchema() 466 { 467 XMPSchemaPhotoshop schema = new XMPSchemaPhotoshop(this); 468 return (XMPSchemaPhotoshop) basicAddSchema(schema); 469 } 470 471 /** 472 * The encoding used to write the XML. Default value:UTF-8<br/> See the 473 * ENCODING_XXX constants 474 * 475 * @param xmlEncoding 476 * The encoding to write the XML as. 477 */ 478 public void setEncoding(String xmlEncoding) 479 { 480 encoding = xmlEncoding; 481 } 482 483 /** 484 * Get the current encoding that will be used to write the XML. 485 * 486 * @return The current encoding to write the XML to. 487 */ 488 public String getEncoding() 489 { 490 return encoding; 491 } 492 493 /** 494 * Get the root RDF element. 495 * 496 * @return The root RDF element. 497 */ 498 private Element getRDFElement() 499 { 500 Element rdf = null; 501 NodeList nodes = xmpDocument.getElementsByTagName("rdf:RDF"); 502 if (nodes.getLength() > 0) 503 { 504 rdf = (Element) nodes.item(0); 505 } 506 return rdf; 507 } 508 509 /** 510 * Load metadata from the filesystem. 511 * 512 * @param file 513 * The file to load the metadata from. 514 * 515 * @return The loaded XMP document. 516 * 517 * @throws IOException 518 * If there is an error reading the data. 519 */ 520 public static XMPMetadata load(String file) throws IOException 521 { 522 return new XMPMetadata(XMLUtil.parse(file)); 523 } 524 525 /** 526 * Load a schema from an input source. 527 * 528 * @param is 529 * The input source to load the schema from. 530 * 531 * @return The loaded/parsed schema. 532 * 533 * @throws IOException 534 * If there was an error while loading the schema. 535 */ 536 public static XMPMetadata load(InputSource is) throws IOException 537 { 538 return new XMPMetadata(XMLUtil.parse(is)); 539 } 540 541 /** 542 * Load metadata from the filesystem. 543 * 544 * @param is 545 * The stream to load the data from. 546 * 547 * @return The loaded XMP document. 548 * 549 * @throws IOException 550 * If there is an error reading the data. 551 */ 552 public static XMPMetadata load(InputStream is) throws IOException 553 { 554 return new XMPMetadata(XMLUtil.parse(is)); 555 } 556 557 /** 558 * Test main program. 559 * 560 * @param args 561 * The command line arguments. 562 * @throws Exception 563 * If there is an error. 564 */ 565 public static void main(String[] args) throws Exception 566 { 567 XMPMetadata metadata = new XMPMetadata(); 568 XMPSchemaPDF pdf = metadata.addPDFSchema(); 569 pdf.setAbout("uuid:b8659d3a-369e-11d9-b951-000393c97fd8"); 570 pdf.setKeywords("ben,bob,pdf"); 571 pdf.setPDFVersion("1.3"); 572 pdf.setProducer("Acrobat Distiller 6.0.1 for Macintosh"); 573 574 XMPSchemaDublinCore dc = metadata.addDublinCoreSchema(); 575 dc.addContributor("Ben Litchfield"); 576 dc.addContributor("Solar Eclipse"); 577 dc.addContributor("Some Other Guy"); 578 579 XMPSchemaBasic basic = metadata.addBasicSchema(); 580 Thumbnail t = new Thumbnail(metadata); 581 t.setFormat(Thumbnail.FORMAT_JPEG); 582 t.setImage("IMAGE_DATA"); 583 t.setHeight(new Integer(100)); 584 t.setWidth(new Integer(200)); 585 basic.setThumbnail(t); 586 basic.setBaseURL("http://www.pdfbox.org/"); 587 588 List<XMPSchema> schemas = metadata.getSchemas(); 589 System.out.println("schemas=" + schemas); 590 591 metadata.save("test.xmp"); 592 } 593 594 /** 595 * This will get a list of XMPSchema(or subclass) objects. 596 * 597 * @return A non null read-only list of schemas that are part of this 598 * metadata. 599 * 600 * @throws IOException 601 * If there is an error creating a specific schema. 602 */ 603 public List<XMPSchema> getSchemas() throws IOException 604 { 605 NodeList schemaList = xmpDocument 606 .getElementsByTagName("rdf:Description"); 607 List<XMPSchema> retval = new ArrayList<XMPSchema>(schemaList.getLength()); 608 for (int i = 0; i < schemaList.getLength(); i++) 609 { 610 Element schema = (Element) schemaList.item(i); 611 boolean found = false; 612 NamedNodeMap attributes = schema.getAttributes(); 613 for (int j = 0; j < attributes.getLength() && !found; j++) 614 { 615 Node attribute = attributes.item(j); 616 String name = attribute.getNodeName(); 617 String value = attribute.getNodeValue(); 618 if (name.startsWith("xmlns:") && nsMappings.containsKey(value)) 619 { 620 Class<?> schemaClass = nsMappings.get(value); 621 try 622 { 623 Constructor<?> ctor = schemaClass 624 .getConstructor(new Class[] { Element.class, 625 String.class }); 626 retval.add((XMPSchema)ctor.newInstance(new Object[] { schema, 627 name.substring(6) })); 628 found = true; 629 } 630 catch(NoSuchMethodException e) 631 { 632 throw new IOException( 633 "Error: Class " 634 + schemaClass.getName() 635 + " must have a constructor with the signature of " 636 + schemaClass.getName() 637 + "( org.w3c.dom.Element, java.lang.String )"); 638 } 639 catch(Exception e) 640 { 641 e.printStackTrace(); 642 throw new IOException(e.getMessage()); 643 } 644 } 645 } 646 if (!found) 647 { 648 retval.add(new XMPSchema(schema, null)); 649 } 650 } 651 return retval; 652 } 653 654 /** 655 * Will return all schemas that fit the given namespaceURI. Which is only 656 * done by using the namespace mapping (nsMapping) and not by actually 657 * checking the xmlns property. 658 * 659 * @param namespaceURI 660 * The namespaceURI to filter for. 661 * @return A list containing the found schemas or an empty list if non are 662 * found or the namespaceURI could not be found in the namespace 663 * mapping. 664 * @throws IOException 665 * If an operation on the document fails. 666 */ 667 public List<XMPSchema> getSchemasByNamespaceURI(String namespaceURI) 668 throws IOException 669 { 670 671 List<XMPSchema> l = getSchemas(); 672 List<XMPSchema> result = new LinkedList<XMPSchema>(); 673 674 Class<?> schemaClass = nsMappings.get(namespaceURI); 675 if (schemaClass == null) 676 { 677 return result; 678 } 679 680 Iterator<XMPSchema> i = l.iterator(); 681 while (i.hasNext()) 682 { 683 XMPSchema schema = i.next(); 684 685 if (schemaClass.isAssignableFrom(schema.getClass())) 686 { 687 result.add(schema); 688 } 689 } 690 return result; 691 } 692 693 /** 694 * This will return true if the XMP contains an unknown schema. 695 * 696 * @return True if an unknown schema is found, false otherwise 697 * 698 * @throws IOException 699 * If there is an error 700 */ 701 public boolean hasUnknownSchema() throws IOException 702 { 703 NodeList schemaList = xmpDocument 704 .getElementsByTagName("rdf:Description"); 705 for (int i = 0; i < schemaList.getLength(); i++) 706 { 707 Element schema = (Element) schemaList.item(i); 708 NamedNodeMap attributes = schema.getAttributes(); 709 for (int j = 0; j < attributes.getLength(); j++) 710 { 711 Node attribute = attributes.item(j); 712 String name = attribute.getNodeName(); 713 String value = attribute.getNodeValue(); 714 if (name.startsWith("xmlns:") && !nsMappings.containsKey(value) 715 && !value.equals(ResourceEvent.NAMESPACE)) 716 { 717 return true; 718 } 719 } 720 } 721 return false; 722 } 723 724 /** 725 * Tries to retrieve a schema from this by classname. 726 * 727 * @param targetSchema 728 * Class for targetSchema. 729 * 730 * @return XMPSchema or null if no target is found. 731 * 732 * @throws IOException 733 * if there was an error creating the schemas of this. 734 */ 735 public XMPSchema getSchemaByClass(Class<?> targetSchema) throws IOException 736 { 737 Iterator<XMPSchema> iter = getSchemas().iterator(); 738 while (iter.hasNext()) 739 { 740 XMPSchema element = (XMPSchema) iter.next(); 741 if (element.getClass().getName().equals(targetSchema.getName())) 742 { 743 return element; 744 } 745 } 746 // not found 747 return null; 748 } 749 750 /** 751 * Merge this metadata with the given metadata object. 752 * 753 * @param metadata The metadata to merge with this document. 754 * 755 * @throws IOException If there is an error merging the data. 756 */ 757 public void merge(XMPMetadata metadata) throws IOException 758 { 759 List<XMPSchema> schemas2 = metadata.getSchemas(); 760 for (Iterator<XMPSchema> iterator = schemas2.iterator(); iterator.hasNext();) 761 { 762 XMPSchema schema2 = iterator.next(); 763 XMPSchema schema1 = getSchemaByClass(schema2.getClass()); 764 if (schema1 == null) 765 { 766 Element rdf = getRDFElement(); 767 rdf.appendChild(xmpDocument.importNode(schema2.getElement(), 768 true)); 769 } 770 else 771 { 772 schema1.merge(schema2); 773 } 774 } 775 } 776 }