1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package xni.parser; 19 20 import java.io.BufferedReader; 21 import java.io.IOException; 22 import java.io.InputStream; 23 import java.io.InputStreamReader; 24 import java.io.Reader; 25 import java.util.StringTokenizer; 26 27 import org.apache.xerces.util.NamespaceSupport; 28 import org.apache.xerces.util.XMLAttributesImpl; 29 import org.apache.xerces.util.XMLStringBuffer; 30 import org.apache.xerces.xni.QName; 31 import org.apache.xerces.xni.XMLAttributes; 32 import org.apache.xerces.xni.XMLDTDContentModelHandler; 33 import org.apache.xerces.xni.XMLString; 34 import org.apache.xerces.xni.XNIException; 35 import org.apache.xerces.xni.parser.XMLInputSource; 36 37 /** 38 * This example is a very simple parser configuration that can 39 * parse files with comma-separated values (CSV) to generate XML 40 * events. For example, the following CSV document: 41 * <pre> 42 * Andy Clark,16 Jan 1973,Cincinnati 43 * </pre> 44 * produces the following XML "document" as represented by the 45 * XNI streaming document information: 46 * <pre> 47 * <?xml version='1.0' encoding='UTF-8' standalone='true'?> 48 * <!DOCTYPE csv [ 49 * <!ELEMENT csv (row)*> 50 * <!ELEMENT row (col)*> 51 * <!ELEMENT col (#PCDATA)> 52 * ]> 53 * <csv> 54 * <row> 55 * <col>Andy Clark</col> 56 * <col>16 Jan 1973</col> 57 * <col>Cincinnati</col> 58 * </row> 59 * </csv> 60 * </pre> 61 * 62 * @author Andy Clark, IBM 63 * 64 * @version $Id: CSVConfiguration.java 447690 2006-09-19 02:41:53Z mrglavas $ 65 */ 66 public class CSVConfiguration 67 extends AbstractConfiguration { 68 69 // 70 // Constants 71 // 72 73 /** A QName for the <csv> element name. */ 74 protected static final QName CSV = new QName(null, null, "csv", null); 75 76 /** A QName for the <row> element name. */ 77 protected static final QName ROW = new QName(null, null, "row", null); 78 79 /** A QName for the <col> element name. */ 80 protected static final QName COL = new QName(null, null, "col", null); 81 82 /** An empty list of attributes. */ 83 protected static final XMLAttributes EMPTY_ATTRS = new XMLAttributesImpl(); 84 85 /** A newline XMLString. */ 86 private final XMLString NEWLINE = new XMLStringBuffer("\n"); 87 88 /** A newline + one space XMLString. */ 89 private final XMLString NEWLINE_ONE_SPACE = new XMLStringBuffer("\n "); 90 91 /** A newline + two spaces XMLString. */ 92 private final XMLString NEWLINE_TWO_SPACES = new XMLStringBuffer("\n "); 93 94 // 95 // Data 96 // 97 98 /** 99 * A string buffer for use in copying string into an XMLString 100 * object for passing to the characters method. 101 */ 102 private final XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 103 104 // 105 // XMLParserConfiguration methods 106 // 107 108 /** 109 * Parse an XML document. 110 * <p> 111 * The parser can use this method to instruct this configuration 112 * to begin parsing an XML document from any valid input source 113 * (a character stream, a byte stream, or a URI). 114 * <p> 115 * Parsers may not invoke this method while a parse is in progress. 116 * Once a parse is complete, the parser may then parse another XML 117 * document. 118 * <p> 119 * This method is synchronous: it will not return until parsing 120 * has ended. If a client application wants to terminate 121 * parsing early, it should throw an exception. 122 * 123 * @param source The input source for the top-level of the 124 * XML document. 125 * 126 * @exception XNIException Any XNI exception, possibly wrapping 127 * another exception. 128 * @exception IOException An IO exception from the parser, possibly 129 * from a byte stream or character stream 130 * supplied by the parser. 131 */ 132 public void parse(XMLInputSource source) 133 throws IOException, XNIException { 134 135 // get reader 136 openInputSourceStream(source); 137 Reader reader = source.getCharacterStream(); 138 if (reader == null) { 139 InputStream stream = source.getByteStream(); 140 reader = new InputStreamReader(stream); 141 } 142 BufferedReader bufferedReader = new BufferedReader(reader); 143 144 // start document 145 if (fDocumentHandler != null) { 146 fDocumentHandler.startDocument(null, "UTF-8", new NamespaceSupport(), null); 147 fDocumentHandler.xmlDecl("1.0", "UTF-8", "true", null); 148 fDocumentHandler.doctypeDecl("csv", null, null, null); 149 } 150 if (fDTDHandler != null) { 151 fDTDHandler.startDTD(null, null); 152 fDTDHandler.elementDecl("csv", "(row)*", null); 153 fDTDHandler.elementDecl("row", "(col)*", null); 154 fDTDHandler.elementDecl("col", "(#PCDATA)", null); 155 } 156 if (fDTDContentModelHandler != null) { 157 fDTDContentModelHandler.startContentModel("csv", null); 158 fDTDContentModelHandler.startGroup(null); 159 fDTDContentModelHandler.element("row", null); 160 fDTDContentModelHandler.endGroup(null); 161 short csvOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE; 162 fDTDContentModelHandler.occurrence(csvOccurs, null); 163 fDTDContentModelHandler.endContentModel(null); 164 165 fDTDContentModelHandler.startContentModel("row", null); 166 fDTDContentModelHandler.startGroup(null); 167 fDTDContentModelHandler.element("col", null); 168 fDTDContentModelHandler.endGroup(null); 169 short rowOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE; 170 fDTDContentModelHandler.occurrence(rowOccurs, null); 171 fDTDContentModelHandler.endContentModel(null); 172 173 fDTDContentModelHandler.startContentModel("col", null); 174 fDTDContentModelHandler.startGroup(null); 175 fDTDContentModelHandler.pcdata(null); 176 fDTDContentModelHandler.endGroup(null); 177 fDTDContentModelHandler.endContentModel(null); 178 } 179 if (fDTDHandler != null) { 180 fDTDHandler.endDTD(null); 181 } 182 if (fDocumentHandler != null) { 183 fDocumentHandler.startElement(CSV, EMPTY_ATTRS, null); 184 } 185 186 // read lines 187 String line; 188 while ((line = bufferedReader.readLine()) != null) { 189 if (fDocumentHandler != null) { 190 fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null); 191 fDocumentHandler.startElement(ROW, EMPTY_ATTRS, null); 192 StringTokenizer tokenizer = new StringTokenizer(line, ","); 193 while (tokenizer.hasMoreTokens()) { 194 fDocumentHandler.ignorableWhitespace(NEWLINE_TWO_SPACES, null); 195 fDocumentHandler.startElement(COL, EMPTY_ATTRS, null); 196 String token = tokenizer.nextToken(); 197 fStringBuffer.clear(); 198 fStringBuffer.append(token); 199 fDocumentHandler.characters(fStringBuffer, null); 200 fDocumentHandler.endElement(COL, null); 201 } 202 fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null); 203 fDocumentHandler.endElement(ROW, null); 204 } 205 } 206 bufferedReader.close(); 207 208 // end document 209 if (fDocumentHandler != null) { 210 fDocumentHandler.ignorableWhitespace(NEWLINE, null); 211 fDocumentHandler.endElement(CSV, null); 212 fDocumentHandler.endDocument(null); 213 } 214 215 } // parse(XMLInputSource) 216 217 // NOTE: The following methods are overloaded to ignore setting 218 // of parser state so that this configuration does not 219 // throw configuration exceptions for features and properties 220 // that it doesn't care about. 221 222 public void setFeature(String featureId, boolean state) {} 223 public boolean getFeature(String featureId) { return false; } 224 public void setProperty(String propertyId, Object value) {} 225 public Object getProperty(String propertyId) { return null; } 226 227 } // class CSVConfiguration