1 /* 2 * The Apache Software License, Version 1.1 3 * 4 * 5 * Copyright (c) 1999, 2000 The Apache Software Foundation. All rights 6 * reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * 3. The end-user documentation included with the redistribution, 21 * if any, must include the following acknowledgment: 22 * "This product includes software developed by the 23 * Apache Software Foundation (http://www.apache.org/)." 24 * Alternately, this acknowledgment may appear in the software itself, 25 * if and wherever such third-party acknowledgments normally appear. 26 * 27 * 4. The names "Xerces" and "Apache Software Foundation" must 28 * not be used to endorse or promote products derived from this 29 * software without prior written permission. For written 30 * permission, please contact apache@apache.org. 31 * 32 * 5. Products derived from this software may not be called "Apache", 33 * nor may "Apache" appear in their name, without prior written 34 * permission of the Apache Software Foundation. 35 * 36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 * ==================================================================== 49 * 50 * This software consists of voluntary contributions made by many 51 * individuals on behalf of the Apache Software Foundation and was 52 * originally based on software copyright (c) 1999, International 53 * Business Machines, Inc., http://www.apache.org. For more 54 * information on the Apache Software Foundation, please see 55 * <http://www.apache.org/>. 56 */ 57 58 package dom; 59 60 import util.Arguments; 61 import java.io.OutputStreamWriter; 62 import java.io.PrintWriter; 63 import java.io.UnsupportedEncodingException; 64 65 import org.apache.xerces.dom.TextImpl; 66 67 import org.w3c.dom.Attr; 68 import org.w3c.dom.Document; 69 import org.w3c.dom.NamedNodeMap; 70 import org.w3c.dom.Node; 71 import org.w3c.dom.NodeList; 72 73 /** 74 * A sample DOM counter. This sample program illustrates how to 75 * traverse a DOM tree in order to information about the document. 76 * 77 * @version $id$ 78 */ 79 public class DOMCount { 80 81 // 82 // Constants 83 // 84 85 /** Default parser name. */ 86 private static final String 87 DEFAULT_PARSER_NAME = "dom.wrappers.DOMParser"; 88 89 private static boolean setValidation = false; //defaults 90 private static boolean setNameSpaces = true; 91 private static boolean setSchemaSupport = true; 92 private static boolean setDeferredDOM = true; 93 94 95 96 // 97 // Data 98 // 99 100 /** Elements. */ 101 private long elements; 102 103 /** Attributes. */ 104 private long attributes; 105 106 /** Characters. */ 107 private long characters; 108 109 /** Ignorable whitespace. */ 110 private long ignorableWhitespace; 111 112 113 // 114 // Public static methods 115 // 116 117 /** Counts the resulting document tree. */ 118 public static void count(String parserWrapperName, String uri) { 119 120 try { 121 DOMParserWrapper parser = 122 (DOMParserWrapper)Class.forName(parserWrapperName).newInstance(); 123 DOMCount counter = new DOMCount(); 124 long before = System.currentTimeMillis(); 125 parser.setFeature( "http://apache.org/xml/features/dom/defer-node-expansion", 126 127 setDeferredDOM ); 128 parser.setFeature( "http://xml.org/sax/features/validation", 129 setValidation ); 130 parser.setFeature( "http://xml.org/sax/features/namespaces", 131 setNameSpaces ); 132 parser.setFeature( "http://apache.org/xml/features/validation/schema", 133 setSchemaSupport ); 134 135 Document document = parser.parse(uri); 136 counter.traverse(document); 137 long after = System.currentTimeMillis(); 138 counter.printResults(uri, after - before); 139 } catch (org.xml.sax.SAXParseException spe) { 140 } catch (org.xml.sax.SAXNotRecognizedException ex ){ 141 } catch (org.xml.sax.SAXNotSupportedException ex ){ 142 } catch (org.xml.sax.SAXException se) { 143 if (se.getException() != null) 144 se.getException().printStackTrace(System.err); 145 else 146 se.printStackTrace(System.err); 147 } catch (Exception e) { 148 e.printStackTrace(System.err); 149 } 150 151 } // print(String,String,boolean) 152 153 // 154 // Public methods 155 // 156 157 /** Traverses the specified node, recursively. */ 158 public void traverse(Node node) { 159 160 // is there anything to do? 161 if (node == null) { 162 return; 163 } 164 165 int type = node.getNodeType(); 166 switch (type) { 167 // print document 168 case Node.DOCUMENT_NODE: { 169 elements = 0; 170 attributes = 0; 171 characters = 0; 172 ignorableWhitespace = 0; 173 traverse(((Document)node).getDocumentElement()); 174 break; 175 } 176 177 // print element with attributes 178 case Node.ELEMENT_NODE: { 179 elements++; 180 NamedNodeMap attrs = node.getAttributes(); 181 if (attrs != null) { 182 attributes += attrs.getLength(); 183 } 184 NodeList children = node.getChildNodes(); 185 if (children != null) { 186 int len = children.getLength(); 187 for (int i = 0; i < len; i++) { 188 traverse(children.item(i)); 189 } 190 } 191 break; 192 } 193 194 // handle entity reference nodes 195 case Node.ENTITY_REFERENCE_NODE: { 196 NodeList children = node.getChildNodes(); 197 if (children != null) { 198 int len = children.getLength(); 199 for (int i = 0; i < len; i++) { 200 traverse(children.item(i)); 201 } 202 } 203 break; 204 } 205 206 // print text 207 case Node.CDATA_SECTION_NODE: { 208 characters += node.getNodeValue().length(); 209 break; 210 } 211 case Node.TEXT_NODE: { 212 if (node instanceof TextImpl) { 213 if (((TextImpl)node).isIgnorableWhitespace()) 214 ignorableWhitespace += node.getNodeValue().length(); 215 else 216 characters += node.getNodeValue().length(); 217 } else 218 characters += node.getNodeValue().length(); 219 break; 220 } 221 } 222 223 } // traverse(Node) 224 225 /** Prints the results. */ 226 public void printResults(String uri, long time) { 227 228 // filename.xml: 631 ms (4 elems, 0 attrs, 78 spaces, 0 chars) 229 System.out.print(uri); 230 System.out.print(": "); 231 System.out.print(time); 232 System.out.print(" ms ("); 233 System.out.print(elements); 234 System.out.print(" elems, "); 235 System.out.print(attributes); 236 System.out.print(" attrs, "); 237 System.out.print(ignorableWhitespace); 238 System.out.print(" spaces, "); 239 System.out.print(characters); 240 System.out.print(" chars)"); 241 System.out.println(); 242 243 } // printResults(String,long) 244 245 // 246 // Main 247 // 248 249 /** Main program entry point. */ 250 public static void main(String argv[]) { 251 252 Arguments argopt = new Arguments(); 253 argopt.setUsage( new String[] { 254 "usage: java dom.DOMCount (options) uri ...", 255 "", 256 "options:", 257 " -p name Specify DOM parser wrapper by name.", 258 " -n | -N Turn on/off namespace [default=on]", 259 " -v | -V Turn on/off validation [default=on]", 260 " -s | -S Turn on/off Schema support [default=on]", 261 " -d | -D Turn on/off deferred DOM [default=on]", 262 " -h This help screen."} ); 263 264 265 // is there anything to do? 266 if (argv.length == 0) { 267 argopt.printUsage(); 268 System.exit(1); 269 } 270 271 // vars 272 String parserName = DEFAULT_PARSER_NAME; 273 274 argopt.parseArgumentTokens(argv , new char[] { 'p'} ); 275 276 int c; 277 String arg = null; 278 while ( ( arg = argopt.getlistFiles() ) != null ) { 279 outer: 280 while ( (c = argopt.getArguments()) != -1 ){ 281 switch (c) { 282 case 'v': 283 setValidation = true; 284 //System.out.println( "v" ); 285 break; 286 case 'V': 287 setValidation = false; 288 //System.out.println( "V" ); 289 break; 290 case 'N': 291 setNameSpaces = false; 292 break; 293 case 'n': 294 setNameSpaces = true; 295 break; 296 case 'p': 297 //System.out.println('p'); 298 parserName = argopt.getStringParameter(); 299 //System.out.println( "parserName = " + parserName ); 300 break; 301 case 'd': 302 setDeferredDOM = true; 303 break; 304 case 'D': 305 setDeferredDOM = false; 306 break; 307 case 's': 308 //System.out.println("s" ); 309 setSchemaSupport = true; 310 break; 311 case 'S': 312 //System.out.println("S" ); 313 setSchemaSupport = false; 314 break; 315 case '?': 316 case 'h': 317 case '-': 318 argopt.printUsage(); 319 System.exit(1); 320 break; 321 case -1: 322 //System.out.println( "-1" ); 323 break outer; 324 default: 325 326 break; 327 } 328 } 329 330 count(parserName, arg ); //count uri 331 } 332 333 } // main(String[]) 334 335 } // class DOMCount