1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.io.IOException;
25 import java.io.Reader;
26 import java.io.Writer;
27 import java.util.Iterator;
28 import java.util.LinkedList;
29 import java.util.List;
30
31 import org.apache.commons.lang.builder.ToStringBuilder;
32
33 import de.fu_berlin.ties.ContextMap;
34 import de.fu_berlin.ties.TextProcessor;
35 import de.fu_berlin.ties.TiesConfiguration;
36 import de.fu_berlin.ties.io.IOUtils;
37 import de.fu_berlin.ties.text.TextUtils;
38
39 /***
40 * Randomly reshuffles the lines in a file (except for the first <em>n</em>
41 * lines, if configured).
42 *
43 * @author Christian Siefkes
44 * @version $Revision: 1.4 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
45 */
46 public class LineShuffleGenerator extends TextProcessor {
47
48 /***
49 * Randomly reshuffles the lines in a file (except for the first
50 * <code>ignoreFirst</code> lines).
51 *
52 * @param reader reader containing the text to process; not closed
53 * by this method
54 * @param writer the writer to write the processed text to; flushed
55 * but not closed by this method
56 * @param ignoreFirst the number of lines at the start of a file that
57 * should be ignored, if any; must be 0 or positive
58 * @throws IllegalArgumentException if <code>ignoreFirst</code> is negative
59 * @throws IOException if an I/O error occurs while reading or writing
60 */
61 protected static void shuffleLines(final Reader reader, final Writer writer,
62 final int ignoreFirst)
63 throws IllegalArgumentException, IOException {
64
65 if (ignoreFirst < 0) {
66 throw new IllegalArgumentException(
67 "LineShuffleGenerator: Number of lines to ignore is negative: "
68 + ignoreFirst);
69 }
70
71
72 final String input = IOUtils.readToString(reader);
73 final String[] lines = TextUtils.splitLinesExact(input);
74
75 if (ignoreFirst > lines.length) {
76
77
78 writer.write(input);
79 } else {
80 int i = 0;
81
82
83 for (; i < ignoreFirst; i++) {
84 IOUtils.writeLine(lines[i], writer);
85 }
86
87
88 final List<String> linesToShuffle = new LinkedList<String>();
89 for (; i < lines.length; i++) {
90 linesToShuffle.add(lines[i]);
91 }
92
93
94 final List<String> shuffledLines =
95 ShuffleGenerator.shuffle(linesToShuffle);
96 final Iterator<String> iter = shuffledLines.iterator();
97
98 while (iter.hasNext()) {
99 IOUtils.writeLine(iter.next(), writer);
100 }
101 }
102
103 writer.flush();
104 }
105
106
107 /***
108 * The number of lines at the start of a file that are ignored (not
109 * reshuffled).
110 */
111 private final int firstIgnored;
112
113 /***
114 * Creates a new instance from the
115 * {@linkplain TiesConfiguration#CONF standard configuration}.
116 *
117 * @param outExt the extension to use for output files
118 */
119 public LineShuffleGenerator(final String outExt) {
120 this(outExt, TiesConfiguration.CONF);
121 }
122
123 /***
124 * Creates a new instance from the provided configuration.
125 *
126 * @param outExt the extension to use for output files
127 * @param conf used to configure this instance; must not be
128 * <code>null</code>
129 */
130 public LineShuffleGenerator(final String outExt,
131 final TiesConfiguration conf) {
132 this(outExt, conf.getInt("shuffle.lines.ignore-first"), conf);
133 }
134
135 /***
136 * Creates a new instance.
137 *
138 * @param outExt the extension to use for output files
139 * @param ignoredLines the number of lines at the start of a file that
140 * should be ignored, if any; must be 0 or positive
141 * @param conf passed to the superclass; if <code>null</code>,
142 * the {@linkplain TiesConfiguration#CONF standard configuration} is used
143 */
144 public LineShuffleGenerator(final String outExt, final int ignoredLines,
145 final TiesConfiguration conf) {
146 super(outExt, conf);
147 firstIgnored = ignoredLines;
148 }
149
150
151 /***
152 * {@inheritDoc} This implementation delegates to the static
153 * {@link #shuffleLines(Reader, Writer, int)} method, using the configured
154 * number of lines to ignore.
155 */
156 protected void doProcess(final Reader reader, final Writer writer,
157 final ContextMap context) throws IOException {
158 shuffleLines(reader, writer, firstIgnored);
159 }
160
161 /***
162 * Returns a string representation of this object.
163 *
164 * @return a textual representation
165 */
166 public String toString() {
167 return new ToStringBuilder(this)
168 .append("first ignored", firstIgnored)
169 .toString();
170 }
171
172 }