1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.Writer;
27 import java.text.NumberFormat;
28 import java.util.ArrayList;
29 import java.util.Iterator;
30 import java.util.LinkedList;
31 import java.util.List;
32 import java.util.Random;
33
34 import org.apache.commons.lang.builder.ToStringBuilder;
35
36 import de.fu_berlin.ties.CollectingProcessor;
37 import de.fu_berlin.ties.ContextMap;
38 import de.fu_berlin.ties.TiesConfiguration;
39 import de.fu_berlin.ties.io.IOUtils;
40 import de.fu_berlin.ties.text.TextUtils;
41
42 /***
43 * Arranges all input arguments (for example, files or URLs) in random
44 * "shuffles", so they can subsequently processed in random (but fixed) order.
45 * This helps generating input data for experiments. Each random shuffle will
46 * be stored in a file; each line of the file will contain one of the input
47 * arguments; the order of input arguments will be random. If the input
48 * arguments are URIs/URLs, this format correspondings to the
49 * <code>text/uri-list</code> media type defined in
50 * <a href="http://www.faqs.org/rfcs/rfc2483.html">RFC 2483</a>; it can be
51 * read using the {@link de.fu_berlin.ties.io.IOUtils#readURIList(CharSequence)}
52 * methods.
53 *
54 * <p>Also contains the static {@link #shuffle(List)} method so it can
55 * be used as a static utility class.
56 *
57 * @author Christian Siefkes
58 * @version $Revision: 1.5 $, $Date: 2004/11/16 10:41:19 $, $Author: siefkes $
59 */
60 public class ShuffleGenerator extends CollectingProcessor {
61
62 /***
63 * Static utility method that "shuffles" a list by re-arranging its elements
64 * in random order. The original list will be destroyed by this method
65 * (i.e. it might be empty after this method returns)! It is recommended
66 * to pass a {@link LinkedList} (and not a {@link ArrayList}) as input
67 * argument because iteratively deleting elements from {@link ArrayList}s
68 * is very inefficient.
69 *
70 * @param input the list of elements to shuffle; should be a
71 * {@link LinkedList} for efficiency; will be destroyed by this method
72 * (i.e. it might be empty after this method returns)
73 * @return a new list containing the elements of the <code>input</code>
74 * in random order
75 */
76 public static <T> List<T> shuffle(final List<T> input) {
77 final Random random = new Random();
78 final List<T> result = new ArrayList<T>(input.size());
79
80 while (!input.isEmpty()) {
81
82 result.add(input.remove(random.nextInt(input.size())));
83 }
84
85 return result;
86 }
87
88 /***
89 * The number used in the name of the first generated shuffle file.
90 */
91 private final int first;
92
93 /***
94 * The number of random shuffles to generate.
95 */
96 private final int number;
97
98 /***
99 * Prefix used for the generated random shuffle files.
100 */
101 private final String prefix;
102
103 /***
104 * Extension used for the generated random shuffle files.
105 */
106 private final String extension;
107
108 /***
109 * Creates a new instance from the
110 * {@linkplain TiesConfiguration#CONF standard configuration}.
111 */
112 public ShuffleGenerator() {
113 this(TiesConfiguration.CONF);
114 }
115
116 /***
117 * Creates a new instance from the provided configuration.
118 *
119 * @param conf used to configure this instance; must not be
120 * <code>null</code>
121 */
122 public ShuffleGenerator(final TiesConfiguration conf) {
123 this(conf.getInt("shuffle.first"), conf.getInt("shuffle.num"),
124 conf.getString("shuffle.prefix"), conf.getString("shuffle.ext"),
125 conf);
126 }
127
128 /***
129 * Creates a new instance from the provided configuration. Generated
130 * shuffle files will be named from <code>filePrefixNUMBER.fileExt</code>,
131 * where <code>NUMBER</code> is a number from <em>firstNum</em>
132 * to <em>firstNum + num - 1</em>; if necessary, numbers will be padded
133 * with leading zeros so all numbers are of the same length
134 * (e.g. <em>01</em> to <em>10</em>).
135 *
136 * @param firstNum The number used in the name of the first generated
137 * shuffle file (typically 1)
138 * @param num The number of random shuffles to generate
139 * @param filePrefix Prefix used for the generated random shuffle files
140 * @param fileExt Extension used for the generated random shuffle files
141 * @param conf passed to the superclass; if <code>null</code>,
142 * the {@linkplain TiesConfiguration#CONF standard configuration} is used
143 */
144 public ShuffleGenerator(final int firstNum, final int num,
145 final String filePrefix, final String fileExt,
146 final TiesConfiguration conf) {
147 super(conf);
148 first = firstNum;
149 number = num;
150 prefix = filePrefix;
151 extension = fileExt;
152 }
153
154 /***
155 * {@inheritDoc}
156 */
157 public void process(final List<String> collected, final ContextMap context)
158 throws IOException {
159 LinkedList<String> inputCopy;
160 List resultList;
161 Iterator resultIter;
162 String item;
163 final int last = first + number - 1;
164 final File outDirectory = IOUtils.determineOutputDirectory(getConfig());
165 File outFile;
166 Writer writer;
167
168
169 final NumberFormat formatter = NumberFormat.getIntegerInstance();
170 formatter.setMinimumIntegerDigits(Integer.toString(last).length());
171
172
173 for (int i = first; i <= last; i++) {
174
175 inputCopy = new LinkedList<String>(collected);
176 resultList = shuffle(inputCopy);
177
178
179 outFile = IOUtils.createOutFile(outDirectory,
180 prefix + formatter.format(i), extension);
181 writer = IOUtils.openWriter(outFile, getConfig());
182
183 try {
184 resultIter = resultList.iterator();
185
186
187 while (resultIter.hasNext()) {
188 item = (String) resultIter.next();
189 writer.write(item);
190 writer.write(TextUtils.LINE_SEPARATOR);
191 }
192 } finally {
193
194 IOUtils.tryToClose(writer);
195 }
196 }
197 }
198
199 /***
200 * Returns a string representation of this object.
201 *
202 * @return a textual representation
203 */
204 public String toString() {
205 return new ToStringBuilder(this)
206 .append("first", first)
207 .append("number", number)
208 .append("prefix", prefix)
209 .append("extension", extension)
210 .toString();
211 }
212
213 }