1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.text;
23
24 import java.io.IOException;
25 import java.io.Reader;
26 import java.io.Writer;
27 import java.util.regex.Pattern;
28
29 import de.fu_berlin.ties.ContextMap;
30 import de.fu_berlin.ties.ProcessingException;
31 import de.fu_berlin.ties.TextProcessor;
32 import de.fu_berlin.ties.TiesConfiguration;
33 import de.fu_berlin.ties.io.IOUtils;
34
35 /***
36 * Simplifies different kinds of quotes that can occur in text files, replacing
37 * all kinds of quotes by a " character.
38 *
39 * <p>The {@link #simplifyQuotes(String)} method can be used statically,
40 * instance creation is only necessary if you want to .
41 *
42 * @author Christian Siefkes
43 * @version $Revision: 1.5 $, $Date: 2006/10/21 16:04:25 $, $Author: siefkes $
44 */
45 public class SimplifyQuotes extends TextProcessor {
46
47 /***
48 * Pattern matching the different kinds of quotes that are simplified by
49 * this instance.
50 */
51 private static final Pattern QUOTE_PATTERN = Pattern.compile("(?:''|``|"
52 + "\u0171|\u0187|\u8216|\u0145|\u8217|\u0146|\u8218|\u0130|\u8220|"
53 + "\u0147|\u8221|\u0148|\u8222|\u0132|\u8249|\u0139|\u8250|\u0155)");
54
55
56 /***
57 * Simplifies different kinds of quotes that can occur in text files,
58 * replacing all kinds of quotes by a " character.
59 *
60 * @param input the input text to simplify
61 * @return a string created by simplifying all quotes in the input
62 */
63 public static String simplifyQuotes(final String input) {
64 return TextUtils.replaceAll(input, QUOTE_PATTERN, "\"");
65 }
66
67
68 /***
69 * Creates a new instance.
70 *
71 * @param outExt the extension to use for output files
72 */
73 public SimplifyQuotes(final String outExt) {
74 super(outExt);
75 }
76
77 /***
78 * Creates a new instance.
79 *
80 * @param outExt the extension to use for output files
81 * @param conf used to configure superclasses
82 */
83 public SimplifyQuotes(final String outExt, final TiesConfiguration conf) {
84 super(outExt, conf);
85 }
86
87 /***
88 * {@inheritDoc}
89 */
90 protected void doProcess(final Reader reader, final Writer writer,
91 final ContextMap context) throws IOException, ProcessingException {
92
93 writer.write(simplifyQuotes(IOUtils.readToString(reader)));
94 }
95
96 }