View Javadoc

1   /*-------------------------------------------------------------------------
2    Copyright 2006 Olivier Berlanger
3   
4    Licensed under the Apache License, Version 2.0 (the "License");
5    you may not use this file except in compliance with the License.
6    You may obtain a copy of the License at
7   
8    http://www.apache.org/licenses/LICENSE-2.0
9   
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15   -------------------------------------------------------------------------*/
16  package net.sf.xolite.sax;
17  
18  
19  import java.io.File;
20  import java.io.InputStream;
21  import java.net.URL;
22  import java.util.ArrayList;
23  import java.util.Iterator;
24  import java.util.List;
25  
26  import javax.xml.parsers.SAXParser;
27  import javax.xml.parsers.SAXParserFactory;
28  
29  import net.sf.xolite.XMLParseException;
30  import net.sf.xolite.XMLSerializable;
31  import net.sf.xolite.impl.BaseXMLEventParser;
32  import net.sf.xolite.utils.RootHolder;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.xml.sax.Attributes;
37  import org.xml.sax.ContentHandler;
38  import org.xml.sax.InputSource;
39  import org.xml.sax.Locator;
40  import org.xml.sax.SAXException;
41  import org.xml.sax.SAXParseException;
42  import org.xml.sax.XMLReader;
43  import org.xml.sax.helpers.DefaultHandler;
44  
45  
46  /**
47   * A <code>XMLEventParser</code> implementation based on a SAX parser.
48   * 
49   * @author Olivier Berlanger
50   */
51  public class SaxXMLEventParser extends BaseXMLEventParser {
52  
53      /** Apache Xerces property used to turn schema validation on. */
54      private static final String SCHEMA_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/schema";
55      /** Apache Xerces property used to define schema location. */
56      private static final String SCHEMA_LOCATION_PROPERTY = "http://apache.org/xml/properties/schema/external-schemaLocation";
57  
58      private static Log log = LogFactory.getLog(SaxXMLEventParser.class);
59  
60      /** SAX reader used for parsing. */
61      private XMLReader reader;
62      /** SAX locator used when parsing. */
63      private Locator saxLocator;
64      /** StringBuffer used to accumulate the characters. */
65      private StringBuffer charBuffer;
66      /** The attributes of the currently started element, non-null only when startElement is called. */
67      private Attributes currentAttributes;
68  
69  
70      /**
71       * Build a <code>StreamXMLEventParser</code> using the system-default <code>SAXParser</code> configured to be namespace aware
72       * and perform no validation.
73       * 
74       * @see javax.xml.parsers.SAXParserFactory
75       * @see javax.xml.parsers.SAXParser
76       */
77      public SaxXMLEventParser() {
78          try {
79              SAXParserFactory parserFactory = SAXParserFactory.newInstance();
80              log.info("Using SAX parser factory: " + parserFactory);
81              parserFactory.setNamespaceAware(true);
82              parserFactory.setValidating(false);
83              SAXParser parser = parserFactory.newSAXParser();
84              reader = parser.getXMLReader();
85          } catch (Exception e) {
86              throw new RuntimeException("Cannot configure parser", e);
87          }
88          init();
89      }
90  
91  
92      /**
93       * Build a <code>StreamXMLEventParser</code> using the system-default <code>SAXParser</code> configured to be namespace aware
94       * and perform validation using the provided schema locations. The 'schemaLocations' string must be a suite of pair 'namespace
95       * URI' + 'schema location URL' separated by spaces (just as the value of the standard 'xsi:schemaLocation' xml attribute).
96       * <p>
97       * <b>Caution: </b> This setup works with Sun JRE default parser (Xerces) but is not guaranteed to work if you (or a library
98       * you're using) have changed the default XML parser implementation. <br>
99       * In this case, you have to setup a validating parser yourself (following your parser documentation) and pass it to the
100      * <code>SaxXMLEventParser(XMLReader)</code> constructor.
101      * </p>
102      * 
103      * @param schemaLocations
104      *            the schema location expressed as in the standard 'xsi:schemaLocation' xml attributes. You can use the X-O lite
105      *            SchemaLocation helper to build this string if your schema is in your application jar.
106      * @see javax.xml.parsers.SAXParserFactory
107      * @see javax.xml.parsers.SAXParser
108      * @see net.sf.xolite.utils.SchemaLocation
109      */
110     public SaxXMLEventParser(String schemaLocations) {
111         try {
112             SAXParserFactory parserFactory = SAXParserFactory.newInstance();
113             log.info("Using SAX parser factory: " + parserFactory);
114             parserFactory.setNamespaceAware(true);
115             parserFactory.setValidating(true);
116             SAXParser parser = parserFactory.newSAXParser();
117             reader = parser.getXMLReader();
118             reader.setFeature(SCHEMA_VALIDATION_FEATURE_ID, true);
119             reader.setProperty(SCHEMA_LOCATION_PROPERTY, schemaLocations);
120         } catch (Exception e) {
121             throw new RuntimeException("Cannot configure parser", e);
122         }
123         init();
124     }
125 
126 
127     /**
128      * Build a <code>SimpleSaxParser</code> using the given <code>XMLReader</code>. The <code>XMLReader</code> must be correctly
129      * configured, this class will not change it's configuration (namespace aware, validating, ...).
130      */
131     public SaxXMLEventParser(XMLReader rdr) {
132         reader = rdr;
133         if (reader != null) init();
134     }
135 
136 
137     public ContentHandler initForExternalRead(XMLSerializable rootHandler) {
138         SAXHandlerImpl handler = new SAXHandlerImpl();
139         charBuffer = new StringBuffer();
140         setup(rootHandler);
141         return handler;
142     }
143 
144 
145     private void init() {
146         SAXHandlerImpl handler = new SAXHandlerImpl();
147         reader.setContentHandler(handler);
148         reader.setErrorHandler(handler);
149         charBuffer = new StringBuffer();
150     }
151 
152 
153     /**
154      * Parse the given file to java objects. <br>
155      * This method will succeed only if a factory knowing the class of the root element is defined.
156      * 
157      * @param f
158      *            file containing the XML to parse.
159      * @throws XMLParseException
160      *             if the source cannot be read or contains an invalid XML document.
161      */
162     public XMLSerializable parse(File f) throws XMLParseException {
163         return parse(getInputSource(f));
164     }
165 
166 
167     /**
168      * Parse a file to java objects using the given <code>XMLSerializable</code> as root handler (as object representing the root
169      * element).
170      * 
171      * @param f
172      *            file containing the XML to parse.
173      * @param rootHandler
174      *            the root handler.
175      * @throws XMLParseException
176      *             if the source cannot be read or contains an invalid XML document.
177      */
178     public void parse(File f, XMLSerializable rootHandler) throws XMLParseException {
179         parse(getInputSource(f), rootHandler);
180     }
181 
182 
183     /**
184      * Parse the given input stream to java objects. <br>
185      * This method will succeed only if a factory knowing the class of the root element is defined.
186      * 
187      * @param is
188      *            input stream containing the XML to parse.
189      * @throws XMLParseException
190      *             if the source cannot be read or contains an invalid XML document.
191      */
192     public XMLSerializable parse(InputStream is) throws XMLParseException {
193         return parse(new InputSource(is));
194     }
195 
196 
197     /**
198      * Parse an input stream to java objects using the given <code>XMLSerializable</code> as root handler (as object representing
199      * the root element).
200      * 
201      * @param is
202      *            input stream containing the XML to parse.
203      * @param rootHandler
204      *            the root handler.
205      * @throws XMLParseException
206      *             if the source cannot be read or contains an invalid XML document.
207      */
208     public void parse(InputStream is, XMLSerializable rootHandler) throws XMLParseException {
209         parse(new InputSource(is), rootHandler);
210     }
211 
212 
213     /**
214      * Parse the given URL content to java objects. <br>
215      * This method will succeed only if a factory knowing the class of the root element is defined.
216      * 
217      * @param url
218      *            URL pointing to file containing the XML to parse.
219      * @throws XMLParseException
220      *             if the source cannot be read or contains an invalid XML document.
221      */
222     public XMLSerializable parse(URL url) throws XMLParseException {
223         return parse(getInputSource(url));
224     }
225 
226 
227     /**
228      * Parse an URL content to java objects using the given <code>XMLSerializable</code> as root handler (as object representing the
229      * root element).
230      * 
231      * @param url
232      *            URL pointing to file containing the XML to parse.
233      * @param rootHandler
234      *            the root handler.
235      * @throws XMLParseException
236      *             if the source cannot be read or contains an invalid XML document.
237      */
238     public void parse(URL url, XMLSerializable rootHandler) throws XMLParseException {
239         parse(getInputSource(url), rootHandler);
240     }
241 
242 
243     /**
244      * Parse the given input source to java objects. <br>
245      * This method will succeed only if a factory knowing the class of the root element is defined.
246      * 
247      * @param src
248      *            input source for the SAX parsing.
249      * @throws XMLParseException
250      *             if the source cannot be read or contains an invalid XML document.
251      */
252     public XMLSerializable parse(InputSource src) throws XMLParseException {
253         RootHolder holder = new RootHolder();
254         parse(src, holder);
255         return holder.getRoot();
256     }
257 
258 
259     /**
260      * Parse an input source to java objects using the given <code>XMLSerializable</code> as root handler (as object representing
261      * the root element).
262      * 
263      * @param src
264      *            input source for the SAX parsing.
265      * @param rootHandler
266      *            the root handler.
267      * @throws XMLParseException
268      *             if the source cannot be read or contains an invalid XML document.
269      */
270     public void parse(InputSource src, XMLSerializable rootHandler) throws XMLParseException {
271         try {
272             if (src == null) throw new IllegalArgumentException("InputSource cannot be null");
273             if (rootHandler == null) throw new IllegalArgumentException("Root SaxHandler cannot be null");
274             setup(rootHandler);
275             reader.parse(src);
276             tearDown();
277         } catch (Exception e) {
278             transformAndThrowException(e);
279         }
280     }
281 
282 
283     private InputSource getInputSource(File f) {
284         InputSource is = new InputSource(f.toURI().toASCIIString());
285         is.setPublicId(f.getAbsolutePath());
286         return is;
287     }
288 
289 
290     private InputSource getInputSource(URL url) {
291         InputSource is = new InputSource(url.toExternalForm());
292         is.setPublicId(url.toString());
293         return is;
294     }
295 
296 
297     /**
298      * Get the current SAX locator (for reporting SAXParseException). This Locator is not null only when parsing.
299      */
300     public Locator getLocator() {
301         return saxLocator;
302     }
303 
304 
305     /**
306      * Get the current SAX locator (for reporting SAXParseException). This Locator is not null only when parsing.
307      */
308     void setLocator(Locator newLocator) {
309         saxLocator = newLocator;
310     }
311 
312 
313     // ------------------------ XMLEventParser interface implementation -----------------------------------------------
314 
315     public String getElementText() {
316         return charBuffer.toString();
317     }
318 
319 
320     // --------------------------------- Exception management -----------------------------------
321 
322     @Override
323     protected void addLocationInfo(XMLParseException xpe) {
324         if (saxLocator != null) {
325             String publicId = saxLocator.getPublicId();
326             if ((publicId != null) && !publicId.trim().equals("")) {
327                 xpe.setSource(publicId);
328             }
329             xpe.setLocation(saxLocator.getLineNumber(), saxLocator.getColumnNumber());
330         }
331     }
332 
333 
334     @Override
335     protected void transformAndThrowException(Exception source) throws XMLParseException {
336         if (source instanceof InnerTransportException) {
337             source = ((InnerTransportException) source).getTransportedException();
338         }
339         super.transformAndThrowException(source);
340     }
341 
342     /**
343      * Exception used internally to temporary wrap an inner exception into a SAXException (required by SAX handler interface).
344      */
345     static class InnerTransportException extends SAXException {
346 
347         private static final long serialVersionUID = 3386326111058301247L;
348 
349         private Exception transported;
350 
351 
352         InnerTransportException(Exception transportedEx) {
353             transported = transportedEx;
354         }
355 
356 
357         Exception getTransportedException() {
358             return transported;
359         }
360 
361     }
362 
363 
364     // --------------------------------- Attributes management -----------------------------------
365 
366     public String getAttributeValue(String attrName) throws XMLParseException {
367         return getAttributeValueNS(null, attrName);
368     }
369 
370 
371     public String getAttributeValueNS(String attrNamespaceURI, String attrName) throws XMLParseException {
372         if (currentAttributes == null) throw new XMLParseException(
373                 "getAttributeValue(..) can only be called from startElement(..) method");
374         String attrVal = (attrNamespaceURI == null) ? currentAttributes.getValue(attrName) : currentAttributes.getValue(
375                 attrNamespaceURI, attrName);
376         return attrVal;
377     }
378 
379 
380     public Iterator<String> getAttributeNamespaceIterator() throws XMLParseException {
381         if (currentAttributes == null) throw new XMLParseException(
382                 "getAttributeNameIterator(..) can only be called from startElement(..) method");
383         List<String> uris = new ArrayList<String>();
384         int len = currentAttributes.getLength();
385         for (int i = 0; i < len; i++) {
386             String uri = currentAttributes.getURI(i);
387             if ("".equals(uri)) uri = null;
388             if (!uris.contains(uri)) {
389                 if (log.isDebugEnabled()) {
390                     log.debug("Found attribute namespace " + uri + " for attribute: " + currentAttributes.getLocalName(i));
391                 }
392                 uris.add(uri);
393             }
394         }
395         return uris.iterator();
396     }
397 
398 
399     public Iterator<String> getAttributeNameIterator(String attrNamespaceURI) throws XMLParseException {
400         if (currentAttributes == null) throw new XMLParseException(
401                 "getAttributeNameIterator(..) can only be called from startElement(..) method");
402         return new AttributeNameIterator(attrNamespaceURI, currentAttributes);
403     }
404 
405 
406     void startElementImpl(String uri, String localName, Attributes attributes) throws Exception {
407         currentAttributes = attributes;
408         startElementImpl(uri, localName);
409         currentAttributes = null;
410         charBuffer.setLength(0);
411     }
412 
413 
414     @Override
415     protected void endElementImpl(String uri, String localName) throws Exception {
416         super.endElementImpl(uri, localName);
417         charBuffer.setLength(0);
418     }
419 
420 
421     /**
422      * Rem: overridden just to increase visibility.
423      */
424     @Override
425     protected void pushPrefixMappingInNextLevel(String newPrefix, String newNamespaceURI) {
426         super.pushPrefixMappingInNextLevel(newPrefix, newNamespaceURI);
427     }
428 
429 
430     void appendCharacters(char[] ch, int start, int length) {
431         if (charBuffer != null) charBuffer.append(ch, start, length);
432     }
433 
434     // ----------------------- SAX handler implementation ----------------------------------------------
435 
436     /**
437      * The SAX handler used internally by this event parser.
438      */
439     class SAXHandlerImpl extends DefaultHandler {
440 
441         @Override
442         public void setDocumentLocator(Locator locator) {
443             setLocator(locator);
444         }
445 
446 
447         @Override
448         public void endDocument() throws SAXException {
449             setLocator(null);
450         }
451 
452 
453         @Override
454         public void startPrefixMapping(String prefix, String uri) throws SAXException {
455             pushPrefixMappingInNextLevel(prefix, uri);
456         }
457 
458 
459         @Override
460         public void endPrefixMapping(String prefix) throws SAXException {
461             // the mapping will disappear with the level
462         }
463 
464 
465         @Override
466         public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
467             try {
468                 startElementImpl(uri, localName, attributes);
469             } catch (Exception ex) {
470                 throw new InnerTransportException(ex);
471             }
472         }
473 
474 
475         @Override
476         public void endElement(String uri, String localName, String qName) throws SAXException {
477             try {
478                 endElementImpl(uri, localName);
479             } catch (Exception ex) {
480                 throw new InnerTransportException(ex);
481             }
482         }
483 
484 
485         @Override
486         public void error(SAXParseException e) throws SAXException {
487             throw e;
488         }
489 
490 
491         @Override
492         public void characters(char[] ch, int start, int length) throws SAXException {
493             appendCharacters(ch, start, length);
494         }
495 
496 
497         @Override
498         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
499             appendCharacters(ch, start, length);
500         }
501 
502     }
503 
504     // --------------------- Inner Attribute Name Iterator implementation -------------------------------
505 
506     /**
507      * Iterator on attribute mames of a SAX element.
508      */
509     static class AttributeNameIterator implements Iterator<String> {
510 
511         private String namespaceURI;
512         private Attributes attrList;
513         private int nbrAttr;
514         private int index;
515 
516 
517         AttributeNameIterator(String uri, Attributes attrs) {
518             namespaceURI = uri;
519             attrList = attrs;
520             index = 0;
521             nbrAttr = (attrList == null) ? 0 : attrList.getLength();
522             checkNextAttibuteNamespace();
523         }
524 
525 
526         private void checkNextAttibuteNamespace() {
527             boolean namespaceOK;
528             String attributeURI;
529             while (index < nbrAttr) {
530                 attributeURI = attrList.getURI(index);
531                 if (namespaceURI == null) namespaceOK = (attributeURI == null) || attributeURI.equals("");
532                 else namespaceOK = namespaceURI.equals(attributeURI);
533                 if (namespaceOK) break;
534                 else index++;
535             }
536         }
537 
538 
539         public boolean hasNext() {
540             return index < nbrAttr;
541         }
542 
543 
544         public String next() {
545             String name = attrList.getLocalName(index);
546             index++;
547             checkNextAttibuteNamespace();
548             return name;
549         }
550 
551 
552         public void remove() {
553             throw new UnsupportedOperationException("Cannot remove attribute");
554         }
555 
556     }
557 
558 }