001 /*
002 * Copyright 2005 John G. Wilson
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 */
017
018 package groovy.util;
019
020 import groovy.util.slurpersupport.GPathResult;
021 import groovy.util.slurpersupport.Node;
022 import groovy.util.slurpersupport.NodeChild;
023
024 import java.io.File;
025 import java.io.FileInputStream;
026 import java.io.IOException;
027 import java.io.InputStream;
028 import java.io.Reader;
029 import java.io.StringReader;
030 import java.net.URL;
031 import java.security.AccessController;
032 import java.security.PrivilegedActionException;
033 import java.security.PrivilegedExceptionAction;
034 import java.util.HashMap;
035 import java.util.Hashtable;
036 import java.util.Map;
037 import java.util.Stack;
038
039 import javax.xml.parsers.ParserConfigurationException;
040 import javax.xml.parsers.SAXParser;
041 import javax.xml.parsers.SAXParserFactory;
042
043 import org.xml.sax.Attributes;
044 import org.xml.sax.DTDHandler;
045 import org.xml.sax.EntityResolver;
046 import org.xml.sax.ErrorHandler;
047 import org.xml.sax.InputSource;
048 import org.xml.sax.SAXException;
049 import org.xml.sax.SAXNotRecognizedException;
050 import org.xml.sax.SAXNotSupportedException;
051 import org.xml.sax.XMLReader;
052 import org.xml.sax.helpers.DefaultHandler;
053
054 /**
055 * @author John Wilson
056 *
057 */
058
059 public class XmlSlurper extends DefaultHandler {
060 private final XMLReader reader;
061 private Node currentNode = null;
062 private final Stack stack = new Stack();
063 private final StringBuffer charBuffer = new StringBuffer();
064 private final Map namespaceTagHints = new Hashtable();
065
066 public XmlSlurper() throws ParserConfigurationException, SAXException {
067 this(false, true);
068 }
069
070 public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
071 SAXParserFactory factory = null;
072
073 try {
074 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
075 public Object run() throws ParserConfigurationException {
076 return SAXParserFactory.newInstance();
077 }
078 });
079 } catch (final PrivilegedActionException pae) {
080 final Exception e = pae.getException();
081
082 if (e instanceof ParserConfigurationException) {
083 throw (ParserConfigurationException) e;
084 } else {
085 throw new RuntimeException(e);
086 }
087 }
088 factory.setNamespaceAware(namespaceAware);
089 factory.setValidating(validating);
090
091 final SAXParser parser = factory.newSAXParser();
092 this.reader = parser.getXMLReader();
093 }
094
095 public XmlSlurper(final XMLReader reader) {
096 this.reader = reader;
097 }
098
099 public XmlSlurper(final SAXParser parser) throws SAXException {
100 this(parser.getXMLReader());
101 }
102
103 /**
104 * @return The GPathResult instance created by consuming a stream of SAX events
105 * Note if one of the parse methods has been called then this returns null
106 * Note if this is called more than once all calls after the first will return null
107 *
108 */
109 public GPathResult getDocument() {
110 try {
111 return new NodeChild(this.currentNode, null, this.namespaceTagHints);
112 } finally {
113 this.currentNode = null;
114 }
115 }
116
117 /**
118 * Parse the content of the specified input source into a GPathResult object
119 *
120 * @param input
121 * @return An object which supports GPath expressions
122 * @throws IOException
123 * @throws SAXException
124 */
125 public GPathResult parse(final InputSource input) throws IOException, SAXException {
126 this.reader.setContentHandler(this);
127 this.reader.parse(input);
128
129 return getDocument();
130
131 }
132
133 /**
134 * Parses the content of the given file as XML turning it into a GPathResult object
135 *
136 * @param file
137 * @return An object which supports GPath expressions
138 * @throws IOException
139 * @throws SAXException
140 */
141 public GPathResult parse(final File file) throws IOException, SAXException {
142 final InputSource input = new InputSource(new FileInputStream(file));
143
144 input.setSystemId("file://" + file.getAbsolutePath());
145
146 return parse(input);
147
148 }
149
150 /**
151 * Parse the content of the specified input stream into an GPathResult Object.
152 * Note that using this method will not provide the parser with any URI
153 * for which to find DTDs etc
154 *
155 * @param input
156 * @return An object which supports GPath expressions
157 * @throws IOException
158 * @throws SAXException
159 */
160 public GPathResult parse(final InputStream input) throws IOException, SAXException {
161 return parse(new InputSource(input));
162 }
163
164 /**
165 * Parse the content of the specified reader into a GPathResult Object.
166 * Note that using this method will not provide the parser with any URI
167 * for which to find DTDs etc
168 *
169 * @param in
170 * @return An object which supports GPath expressions
171 * @throws IOException
172 * @throws SAXException
173 */
174 public GPathResult parse(final Reader in) throws IOException, SAXException {
175 return parse(new InputSource(in));
176 }
177
178 /**
179 * Parse the content of the specified URI into a GPathResult Object
180 *
181 * @param uri
182 * @return An object which supports GPath expressions
183 * @throws IOException
184 * @throws SAXException
185 */
186 public GPathResult parse(final String uri) throws IOException, SAXException {
187 return parse(new InputSource(uri));
188 }
189
190 /**
191 * A helper method to parse the given text as XML
192 *
193 * @param text
194 * @return An object which supports GPath expressions
195 */
196 public GPathResult parseText(final String text) throws IOException, SAXException {
197 return parse(new StringReader(text));
198 }
199
200 // Delegated XMLReader methods
201 //------------------------------------------------------------------------
202
203 /* (non-Javadoc)
204 * @see org.xml.sax.XMLReader#getDTDHandler()
205 */
206 public DTDHandler getDTDHandler() {
207 return this.reader.getDTDHandler();
208 }
209
210 /* (non-Javadoc)
211 * @see org.xml.sax.XMLReader#getEntityResolver()
212 */
213 public EntityResolver getEntityResolver() {
214 return this.reader.getEntityResolver();
215 }
216
217 /* (non-Javadoc)
218 * @see org.xml.sax.XMLReader#getErrorHandler()
219 */
220 public ErrorHandler getErrorHandler() {
221 return this.reader.getErrorHandler();
222 }
223
224 /* (non-Javadoc)
225 * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
226 */
227 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
228 return this.reader.getFeature(uri);
229 }
230
231 /* (non-Javadoc)
232 * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
233 */
234 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
235 return this.reader.getProperty(uri);
236 }
237
238 /* (non-Javadoc)
239 * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
240 */
241 public void setDTDHandler(final DTDHandler dtdHandler) {
242 this.reader.setDTDHandler(dtdHandler);
243 }
244
245 /* (non-Javadoc)
246 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
247 */
248 public void setEntityResolver(final EntityResolver entityResolver) {
249 this.reader.setEntityResolver(entityResolver);
250 }
251
252 /**
253 * Resolves entities against using the suppied URL as the base for relative URLs
254 *
255 * @param base
256 * The URL used to resolve relative URLs
257 */
258 public void setEntityBaseUrl(final URL base) {
259 this.reader.setEntityResolver(new EntityResolver() {
260 public InputSource resolveEntity(final String publicId, final String systemId) throws IOException {
261 return new InputSource(new URL(base, systemId).openStream());
262 }
263 });
264 }
265
266 /* (non-Javadoc)
267 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
268 */
269 public void setErrorHandler(final ErrorHandler errorHandler) {
270 this.reader.setErrorHandler(errorHandler);
271 }
272
273 /* (non-Javadoc)
274 * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
275 */
276 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
277 this.reader.setFeature(uri, value);
278 }
279
280 /* (non-Javadoc)
281 * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
282 */
283 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
284 this.reader.setProperty(uri, value);
285 }
286
287
288 // ContentHandler interface
289 //-------------------------------------------------------------------------
290
291 /* (non-Javadoc)
292 * @see org.xml.sax.ContentHandler#startDocument()
293 */
294 public void startDocument() throws SAXException {
295 this.currentNode = null;
296 this.charBuffer.setLength(0);
297 }
298
299 /* (non-Javadoc)
300 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
301 */
302 public void startPrefixMapping(final String tag, final String uri) throws SAXException {
303 this.namespaceTagHints.put(tag, uri);
304 }
305
306 /* (non-Javadoc)
307 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
308 */
309 public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
310 addNonWhitespaceCdata();
311
312 final Map attributes = new HashMap();
313 final Map attributeNamespaces = new HashMap();
314
315 for (int i = atts.getLength() - 1; i != -1; i--) {
316 if (atts.getURI(i).length() == 0) {
317 attributes.put(atts.getQName(i), atts.getValue(i));
318 } else {
319 attributes.put(atts.getLocalName(i), atts.getValue(i));
320 attributeNamespaces.put(atts.getLocalName(i), atts.getURI(i));
321 }
322
323 }
324
325 final Node newElement;
326
327 if (namespaceURI.length() == 0){
328 newElement = new Node(this.currentNode, qName, attributes, attributeNamespaces, namespaceURI);
329 } else {
330 newElement = new Node(this.currentNode, localName, attributes, attributeNamespaces, namespaceURI);
331 }
332
333 if (this.currentNode != null) {
334 this.currentNode.addChild(newElement);
335 }
336
337 this.stack.push(this.currentNode);
338 this.currentNode = newElement;
339 }
340
341 /* (non-Javadoc)
342 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
343 */
344 public void characters(final char[] ch, final int start, final int length) throws SAXException {
345 this.charBuffer.append(ch, start, length);
346 }
347
348 /* (non-Javadoc)
349 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
350 */
351 public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
352 addNonWhitespaceCdata();
353
354 final Object oldCurrentNode = this.stack.pop();
355
356 if (oldCurrentNode != null) {
357 this.currentNode = (Node)oldCurrentNode;
358 }
359 }
360
361 /* (non-Javadoc)
362 * @see org.xml.sax.ContentHandler#endDocument()
363 */
364 public void endDocument() throws SAXException {
365 }
366
367 // Implementation methods
368 //-------------------------------------------------------------------------
369
370 /**
371 *
372 */
373 private void addNonWhitespaceCdata() {
374 if (this.charBuffer.length() != 0) {
375 //
376 // This element is preceeded by CDATA if it's not whitespace add it to the body
377 // Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
378 // but for the sort of work I'm doing ignoring the whitespace is preferable
379 //
380 final String cdata = this.charBuffer.toString();
381
382 this.charBuffer.setLength(0);
383 if (cdata.trim().length() != 0) {
384 this.currentNode.addChild(cdata);
385 }
386 }
387 }
388 }