1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.Writer;
27 import java.text.NumberFormat;
28 import java.util.ArrayList;
29 import java.util.Iterator;
30 import java.util.LinkedList;
31 import java.util.List;
32 import java.util.Random;
33
34 import org.apache.commons.lang.builder.ToStringBuilder;
35
36 import de.fu_berlin.ties.CollectingProcessor;
37 import de.fu_berlin.ties.ContextMap;
38 import de.fu_berlin.ties.TiesConfiguration;
39 import de.fu_berlin.ties.io.IOUtils;
40 import de.fu_berlin.ties.text.TextUtils;
41
42 /***
43 * Arranges all input arguments (for example, files or URLs) in random
44 * "shuffles", so they can subsequently processed in random (but fixed) order.
45 * This helps generating input data for experiments. Each random shuffle will
46 * be stored in a file; each line of the file will contain one of the input
47 * arguments; the order of input arguments will be random. If the input
48 * arguments are URIs/URLs, this format correspondings to the
49 * <code>text/uri-list</code> media type defined in
50 * <a href="http://www.faqs.org/rfcs/rfc2483.html">RFC 2483</a>; it can be
51 * read using the {@link de.fu_berlin.ties.io.IOUtils#readURIList(CharSequence)}
52 * methods.
53 *
54 * <p>Also contains the static {@link #shuffle(List)} method so it can
55 * be used as a static utility class.
56 *
57 * @author Christian Siefkes
58 * @version $Revision: 1.9 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
59 */
60 public class ShuffleGenerator extends CollectingProcessor {
61
62 /***
63 * Static utility method that "shuffles" a list by re-arranging its elements
64 * in random order. The original list will be destroyed by this method
65 * (i.e. it might be empty after this method returns)! It is recommended
66 * to pass a {@link LinkedList} (and not a {@link ArrayList}) as input
67 * argument because iteratively deleting elements from {@link ArrayList}s
68 * is very inefficient.
69 *
70 * @param <T> the type of the list
71 * @param input the list of elements to shuffle; should be a
72 * {@link LinkedList} for efficiency; will be destroyed by this method
73 * (i.e. it might be empty after this method returns)
74 * @return a new list containing the elements of the <code>input</code>
75 * in random order
76 */
77 public static <T> List<T> shuffle(final List<T> input) {
78 final Random random = new Random();
79 final List<T> result = new ArrayList<T>(input.size());
80
81 while (!input.isEmpty()) {
82
83 result.add(input.remove(random.nextInt(input.size())));
84 }
85
86 return result;
87 }
88
89 /***
90 * The number used in the name of the first generated shuffle file.
91 */
92 private final int first;
93
94 /***
95 * The number of random shuffles to generate.
96 */
97 private final int number;
98
99 /***
100 * Prefix used for the generated random shuffle files.
101 */
102 private final String prefix;
103
104 /***
105 * Extension used for the generated random shuffle files.
106 */
107 private final String extension;
108
109 /***
110 * Creates a new instance from the
111 * {@linkplain TiesConfiguration#CONF standard configuration}.
112 */
113 public ShuffleGenerator() {
114 this(TiesConfiguration.CONF);
115 }
116
117 /***
118 * Creates a new instance from the provided configuration.
119 *
120 * @param conf used to configure this instance; must not be
121 * <code>null</code>
122 */
123 public ShuffleGenerator(final TiesConfiguration conf) {
124 this(conf.getInt("shuffle.first"), conf.getInt("shuffle.num"),
125 conf.getString("shuffle.prefix"), conf.getString("shuffle.ext"),
126 conf);
127 }
128
129 /***
130 * Creates a new instance from the provided configuration. Generated
131 * shuffle files will be named from <code>filePrefixNUMBER.fileExt</code>,
132 * where <code>NUMBER</code> is a number from <em>firstNum</em>
133 * to <em>firstNum + num - 1</em>; if necessary, numbers will be padded
134 * with leading zeros so all numbers are of the same length
135 * (e.g. <em>01</em> to <em>10</em>).
136 *
137 * @param firstNum The number used in the name of the first generated
138 * shuffle file (typically 1)
139 * @param num The number of random shuffles to generate
140 * @param filePrefix Prefix used for the generated random shuffle files
141 * @param fileExt Extension used for the generated random shuffle files
142 * @param conf passed to the superclass; if <code>null</code>,
143 * the {@linkplain TiesConfiguration#CONF standard configuration} is used
144 */
145 public ShuffleGenerator(final int firstNum, final int num,
146 final String filePrefix, final String fileExt,
147 final TiesConfiguration conf) {
148 super(conf);
149 first = firstNum;
150 number = num;
151 prefix = filePrefix;
152 extension = fileExt;
153 }
154
155 /***
156 * {@inheritDoc}
157 */
158 public void process(final List<String> collected, final ContextMap context)
159 throws IOException {
160 LinkedList<String> inputCopy;
161 List resultList;
162 Iterator resultIter;
163 String item;
164 final int last = first + number - 1;
165 final File outDirectory = IOUtils.determineOutputDirectory(getConfig());
166 File outFile;
167 Writer writer;
168
169
170 final NumberFormat formatter = NumberFormat.getIntegerInstance();
171 formatter.setMinimumIntegerDigits(Integer.toString(last).length());
172
173
174 for (int i = first; i <= last; i++) {
175
176 inputCopy = new LinkedList<String>(collected);
177 resultList = shuffle(inputCopy);
178
179
180 outFile = IOUtils.createOutFile(outDirectory,
181 prefix + formatter.format(i), extension);
182 writer = IOUtils.openWriter(outFile, getConfig());
183
184 try {
185 resultIter = resultList.iterator();
186
187
188 while (resultIter.hasNext()) {
189 item = (String) resultIter.next();
190 writer.write(item);
191 writer.write(TextUtils.LINE_SEPARATOR);
192 }
193 } finally {
194
195 IOUtils.tryToClose(writer);
196 }
197 }
198 }
199
200 /***
201 * Returns a string representation of this object.
202 *
203 * @return a textual representation
204 */
205 public String toString() {
206 return new ToStringBuilder(this)
207 .append("first", first)
208 .append("number", number)
209 .append("prefix", prefix)
210 .append("extension", extension)
211 .toString();
212 }
213
214 }