View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.io.IOException;
25  import java.io.Reader;
26  import java.io.Writer;
27  import java.util.Iterator;
28  import java.util.LinkedList;
29  import java.util.List;
30  
31  import org.apache.commons.lang.builder.ToStringBuilder;
32  
33  import de.fu_berlin.ties.ContextMap;
34  import de.fu_berlin.ties.TextProcessor;
35  import de.fu_berlin.ties.TiesConfiguration;
36  import de.fu_berlin.ties.io.IOUtils;
37  import de.fu_berlin.ties.text.TextUtils;
38  
39  /***
40   * Randomly reshuffles the lines in a file (except for the first <em>n</em>
41   * lines, if configured).
42   *
43   * @author Christian Siefkes
44   * @version $Revision: 1.4 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
45   */
46  public class LineShuffleGenerator extends TextProcessor {
47  
48      /***
49       * Randomly reshuffles the lines in a file (except for the first
50       * <code>ignoreFirst</code> lines).
51       *
52       * @param reader reader containing the text to process; not closed
53       * by this method
54       * @param writer the writer to write the processed text to; flushed
55       * but not closed by this method
56       * @param ignoreFirst the number of lines at the start of a file that
57       * should be ignored, if any; must be 0 or positive
58       * @throws IllegalArgumentException if <code>ignoreFirst</code> is negative
59       * @throws IOException if an I/O error occurs while reading or writing
60       */
61      protected static void shuffleLines(final Reader reader, final Writer writer,
62              final int ignoreFirst)
63      throws IllegalArgumentException, IOException {
64          // check argument
65          if (ignoreFirst < 0) {
66              throw new IllegalArgumentException(
67                  "LineShuffleGenerator: Number of lines to ignore is negative: "
68                  + ignoreFirst);
69          }
70  
71          // split input
72          final String input = IOUtils.readToString(reader);
73          final String[] lines = TextUtils.splitLinesExact(input);
74  
75          if (ignoreFirst > lines.length) {
76              // all lines should be excepted from shuffling
77              // -- copy full text to output
78              writer.write(input);
79          } else {
80              int i = 0;
81  
82              // write first n lines unchanged:
83              for (; i < ignoreFirst; i++) {
84                  IOUtils.writeLine(lines[i], writer);
85              }
86  
87              // collect remaining lines for shuffling
88              final List<String> linesToShuffle = new LinkedList<String>();
89              for (; i < lines.length; i++) {
90                  linesToShuffle.add(lines[i]);
91              }
92  
93              // shuffle + write to output
94              final List<String> shuffledLines =
95                  ShuffleGenerator.shuffle(linesToShuffle);
96              final Iterator<String> iter = shuffledLines.iterator();
97  
98              while (iter.hasNext()) {
99                  IOUtils.writeLine(iter.next(), writer);
100             }
101         }
102 
103         writer.flush();
104     }
105 
106 
107     /***
108      * The number of lines at the start of a file that are ignored (not
109      * reshuffled).
110      */
111     private final int firstIgnored;
112 
113     /***
114      * Creates a new instance from the
115      * {@linkplain TiesConfiguration#CONF standard configuration}.
116      *
117      * @param outExt the extension to use for output files
118      */
119     public LineShuffleGenerator(final String outExt) {
120         this(outExt, TiesConfiguration.CONF);
121     }
122 
123     /***
124      * Creates a new instance from the provided configuration.
125      *
126      * @param outExt the extension to use for output files
127      * @param conf used to configure this instance; must not be
128      * <code>null</code>
129      */
130     public LineShuffleGenerator(final String outExt,
131             final TiesConfiguration conf) {
132         this(outExt, conf.getInt("shuffle.lines.ignore-first"), conf);
133     }
134 
135     /***
136      * Creates a new instance.
137      *
138      * @param outExt the extension to use for output files
139      * @param ignoredLines the number of lines at the start of a file that
140      * should be ignored, if any; must be 0 or positive
141      * @param conf passed to the superclass; if <code>null</code>,
142      * the {@linkplain TiesConfiguration#CONF standard configuration} is used
143      */
144     public LineShuffleGenerator(final String outExt, final int ignoredLines,
145             final TiesConfiguration conf) {
146         super(outExt, conf);
147         firstIgnored = ignoredLines;
148     }
149 
150 
151     /***
152      * {@inheritDoc} This implementation delegates to the static
153      * {@link #shuffleLines(Reader, Writer, int)} method, using the configured
154      * number of lines to ignore.
155      */
156     protected void doProcess(final Reader reader, final Writer writer,
157             final ContextMap context) throws IOException {
158         shuffleLines(reader, writer, firstIgnored);
159     }
160 
161     /***
162      * Returns a string representation of this object.
163      *
164      * @return a textual representation
165      */
166     public String toString() {
167         return new ToStringBuilder(this)
168             .append("first ignored", firstIgnored)
169             .toString();
170     }
171 
172 }