View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.xml.dom;
23  
24  import java.io.IOException;
25  import java.io.Writer;
26  
27  import org.dom4j.Document;
28  
29  import de.fu_berlin.ties.ContextMap;
30  import de.fu_berlin.ties.DocumentReader;
31  import de.fu_berlin.ties.TiesConfiguration;
32  
33  /***
34   * An XML stripper converts a XML document to plain text, removing all markup.
35   * <p>This class is thread-safe and can be used to convert several documents
36   * in parallel.
37   *
38   * @author Christian Siefkes
39   * @version $Revision: 1.4 $, $Date: 2004/08/23 17:11:21 $, $Author: siefkes $
40   */
41  public class XMLStripper extends DocumentReader {
42  
43      /***
44       * Creates a new instance, using a default extension and the
45       * {@linkplain TiesConfiguration#CONF standard configuration}.
46       */
47      public XMLStripper() {
48          this("txt");
49      }
50  
51      /***
52       * Creates a new instance, using the
53       * {@linkplain TiesConfiguration#CONF standard configuration}.
54       *
55       * @param outExt the extension to use for output files
56       */
57      public XMLStripper(final String outExt) {
58          this(outExt, TiesConfiguration.CONF);
59      }
60  
61      /***
62       * Creates a new instance.
63       *
64       * @param outExt the extension to use for output files
65       * @param config used to configure superclasses
66       */
67      public XMLStripper(final String outExt, final TiesConfiguration config) {
68          super(outExt, config);
69      }
70  
71      /***
72       * Strips all markup from an XML document and stores the resulting plain
73       * text. This method just delegates to
74       * {@link DOMUtils#collectText(org.dom4j.Branch, StringBuffer)}.
75       *
76       * @param document the document to read
77       * @param writer the writer to write the resulting plain text to; flushed
78       * but not closed by this method
79       * @param context a map of objects that are made available for processing
80       * @throws IOException if an I/O error occurs
81       */
82      public void process(final Document document, final Writer writer,
83              final ContextMap context) throws IOException {
84          DOMUtils.collectText(document, writer);
85      }
86  
87  }