Home » Xerces-J-src.2.9.1 » xni » parser » [javadoc | source]

    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package xni.parser;
   19   
   20   import java.io.BufferedReader;
   21   import java.io.IOException;
   22   import java.io.InputStream;
   23   import java.io.InputStreamReader;
   24   import java.io.Reader;
   25   import java.util.StringTokenizer;
   26   
   27   import org.apache.xerces.util.NamespaceSupport;
   28   import org.apache.xerces.util.XMLAttributesImpl;
   29   import org.apache.xerces.util.XMLStringBuffer;
   30   import org.apache.xerces.xni.QName;
   31   import org.apache.xerces.xni.XMLAttributes;
   32   import org.apache.xerces.xni.XMLDTDContentModelHandler;
   33   import org.apache.xerces.xni.XMLString;
   34   import org.apache.xerces.xni.XNIException;
   35   import org.apache.xerces.xni.parser.XMLInputSource;
   36   
   37   /**
   38    * This example is a very simple parser configuration that can 
   39    * parse files with comma-separated values (CSV) to generate XML
   40    * events. For example, the following CSV document:
   41    * <pre>
   42    * Andy Clark,16 Jan 1973,Cincinnati
   43    * </pre>
   44    * produces the following XML "document" as represented by the 
   45    * XNI streaming document information: 
   46    * <pre>
   47    * &lt;?xml version='1.0' encoding='UTF-8' standalone='true'?&gt;
   48    * &lt;!DOCTYPE csv [
   49    * &lt;!ELEMENT csv (row)*&gt;
   50    * &lt;!ELEMENT row (col)*&gt;
   51    * &lt;!ELEMENT col (#PCDATA)&gt;
   52    * ]&gt;
   53    * &lt;csv&gt;
   54    *  &lt;row&gt;
   55    *   &lt;col&gt;Andy Clark&lt;/col&gt;
   56    *   &lt;col&gt;16 Jan 1973&lt;/col&gt;
   57    *   &lt;col&gt;Cincinnati&lt;/col&gt;
   58    *  &lt;/row&gt;
   59    * &lt;/csv&gt;
   60    * </pre>
   61    * 
   62    * @author Andy Clark, IBM
   63    *
   64    * @version $Id: CSVConfiguration.java 447690 2006-09-19 02:41:53Z mrglavas $
   65    */
   66   public class CSVConfiguration
   67       extends AbstractConfiguration {
   68   
   69       //
   70       // Constants
   71       //
   72   
   73       /** A QName for the &lt;csv&gt; element name. */
   74       protected static final QName CSV = new QName(null, null, "csv", null);
   75   
   76       /** A QName for the &lt;row&gt; element name. */
   77       protected static final QName ROW = new QName(null, null, "row", null);
   78   
   79       /** A QName for the &lt;col&gt; element name. */
   80       protected static final QName COL = new QName(null, null, "col", null);
   81       
   82       /** An empty list of attributes. */
   83       protected static final XMLAttributes EMPTY_ATTRS = new XMLAttributesImpl();
   84   
   85       /** A newline XMLString. */
   86       private final XMLString NEWLINE = new XMLStringBuffer("\n");
   87   
   88       /** A newline + one space XMLString. */
   89       private final XMLString NEWLINE_ONE_SPACE = new XMLStringBuffer("\n ");
   90   
   91       /** A newline + two spaces XMLString. */
   92       private final XMLString NEWLINE_TWO_SPACES = new XMLStringBuffer("\n  ");
   93   
   94       //
   95       // Data
   96       //
   97   
   98       /** 
   99        * A string buffer for use in copying string into an XMLString
  100        * object for passing to the characters method.
  101        */
  102       private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  103   
  104       //
  105       // XMLParserConfiguration methods
  106       //
  107   
  108       /**
  109        * Parse an XML document.
  110        * <p>
  111        * The parser can use this method to instruct this configuration
  112        * to begin parsing an XML document from any valid input source
  113        * (a character stream, a byte stream, or a URI).
  114        * <p>
  115        * Parsers may not invoke this method while a parse is in progress.
  116        * Once a parse is complete, the parser may then parse another XML
  117        * document.
  118        * <p>
  119        * This method is synchronous: it will not return until parsing
  120        * has ended.  If a client application wants to terminate 
  121        * parsing early, it should throw an exception.
  122        *
  123        * @param source The input source for the top-level of the
  124        *               XML document.
  125        *
  126        * @exception XNIException Any XNI exception, possibly wrapping 
  127        *                         another exception.
  128        * @exception IOException  An IO exception from the parser, possibly
  129        *                         from a byte stream or character stream
  130        *                         supplied by the parser.
  131        */
  132       public void parse(XMLInputSource source) 
  133           throws IOException, XNIException {
  134   
  135           // get reader
  136           openInputSourceStream(source);
  137           Reader reader = source.getCharacterStream();
  138           if (reader == null) {
  139               InputStream stream = source.getByteStream();
  140               reader = new InputStreamReader(stream);
  141           }
  142           BufferedReader bufferedReader = new BufferedReader(reader);
  143   
  144           // start document
  145           if (fDocumentHandler != null) {
  146               fDocumentHandler.startDocument(null, "UTF-8", new NamespaceSupport(), null);
  147               fDocumentHandler.xmlDecl("1.0", "UTF-8", "true", null);
  148               fDocumentHandler.doctypeDecl("csv", null, null, null);
  149           }
  150           if (fDTDHandler != null) {
  151               fDTDHandler.startDTD(null, null);
  152               fDTDHandler.elementDecl("csv", "(row)*", null);
  153               fDTDHandler.elementDecl("row", "(col)*", null);
  154               fDTDHandler.elementDecl("col", "(#PCDATA)", null);
  155           }
  156           if (fDTDContentModelHandler != null) {
  157               fDTDContentModelHandler.startContentModel("csv", null);
  158               fDTDContentModelHandler.startGroup(null);
  159               fDTDContentModelHandler.element("row", null);
  160               fDTDContentModelHandler.endGroup(null);
  161               short csvOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE;
  162               fDTDContentModelHandler.occurrence(csvOccurs, null);
  163               fDTDContentModelHandler.endContentModel(null);
  164               
  165               fDTDContentModelHandler.startContentModel("row", null);
  166               fDTDContentModelHandler.startGroup(null);
  167               fDTDContentModelHandler.element("col", null);
  168               fDTDContentModelHandler.endGroup(null);
  169               short rowOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE;
  170               fDTDContentModelHandler.occurrence(rowOccurs, null);
  171               fDTDContentModelHandler.endContentModel(null);
  172           
  173               fDTDContentModelHandler.startContentModel("col", null);
  174               fDTDContentModelHandler.startGroup(null);
  175               fDTDContentModelHandler.pcdata(null);
  176               fDTDContentModelHandler.endGroup(null);
  177               fDTDContentModelHandler.endContentModel(null);
  178           }
  179           if (fDTDHandler != null) {
  180               fDTDHandler.endDTD(null);
  181           }
  182           if (fDocumentHandler != null) {
  183               fDocumentHandler.startElement(CSV, EMPTY_ATTRS, null);
  184           }
  185   
  186           // read lines
  187           String line;
  188           while ((line = bufferedReader.readLine()) != null) {
  189               if (fDocumentHandler != null) {
  190                   fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null);
  191                   fDocumentHandler.startElement(ROW, EMPTY_ATTRS, null);
  192                   StringTokenizer tokenizer = new StringTokenizer(line, ",");
  193                   while (tokenizer.hasMoreTokens()) {
  194                       fDocumentHandler.ignorableWhitespace(NEWLINE_TWO_SPACES, null);
  195                       fDocumentHandler.startElement(COL, EMPTY_ATTRS, null);
  196                       String token = tokenizer.nextToken();
  197                       fStringBuffer.clear();
  198                       fStringBuffer.append(token);
  199                       fDocumentHandler.characters(fStringBuffer, null);
  200                       fDocumentHandler.endElement(COL, null);
  201                   }
  202                   fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null);
  203                   fDocumentHandler.endElement(ROW, null);
  204               }
  205           }
  206           bufferedReader.close();
  207   
  208           // end document
  209           if (fDocumentHandler != null) {
  210               fDocumentHandler.ignorableWhitespace(NEWLINE, null);
  211               fDocumentHandler.endElement(CSV, null);
  212               fDocumentHandler.endDocument(null);
  213           }
  214   
  215       } // parse(XMLInputSource)
  216       
  217       // NOTE: The following methods are overloaded to ignore setting
  218       //       of parser state so that this configuration does not
  219       //       throw configuration exceptions for features and properties
  220       //       that it doesn't care about.
  221   
  222       public void setFeature(String featureId, boolean state) {}
  223       public boolean getFeature(String featureId) { return false; }
  224       public void setProperty(String propertyId, Object value) {}
  225       public Object getProperty(String propertyId) { return null; }
  226   
  227   } // class CSVConfiguration

Home » Xerces-J-src.2.9.1 » xni » parser » [javadoc | source]