View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.Writer;
27  import java.text.NumberFormat;
28  import java.util.ArrayList;
29  import java.util.Iterator;
30  import java.util.LinkedList;
31  import java.util.List;
32  import java.util.Random;
33  
34  import org.apache.commons.lang.builder.ToStringBuilder;
35  
36  import de.fu_berlin.ties.CollectingProcessor;
37  import de.fu_berlin.ties.ContextMap;
38  import de.fu_berlin.ties.TiesConfiguration;
39  import de.fu_berlin.ties.io.IOUtils;
40  import de.fu_berlin.ties.text.TextUtils;
41  
42  /***
43   * Arranges all input arguments (for example, files or URLs) in random
44   * "shuffles", so they can subsequently processed in random (but fixed) order.
45   * This helps generating input data for experiments. Each random shuffle will
46   * be stored in a file; each line of the file will contain one of the input
47   * arguments; the order of input arguments will be random. If the input
48   * arguments are URIs/URLs, this format correspondings to the
49   * <code>text/uri-list</code> media type defined in
50   * <a href="http://www.faqs.org/rfcs/rfc2483.html">RFC 2483</a>; it can be
51   * read using the {@link de.fu_berlin.ties.io.IOUtils#readURIList(CharSequence)}
52   * methods.
53   *
54   * <p>Also contains the static {@link #shuffle(List)} method so it can
55   * be used as a static utility class.
56   *
57   * @author Christian Siefkes
58   * @version $Revision: 1.9 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
59   */
60  public class ShuffleGenerator extends CollectingProcessor {
61  
62      /***
63       * Static utility method that "shuffles" a list by re-arranging its elements
64       * in random order. The original list will be destroyed by this method
65       * (i.e. it might be empty after this method returns)! It is recommended
66       * to pass a {@link LinkedList} (and not a {@link ArrayList}) as input
67       * argument because iteratively deleting elements from {@link ArrayList}s
68       * is very inefficient.
69       *
70       * @param <T> the type of the list
71       * @param input the list of elements to shuffle; should be a
72       * {@link LinkedList} for efficiency; will be destroyed by this method
73       * (i.e. it might be empty after this method returns)
74       * @return a new list containing the elements of the <code>input</code>
75       * in random order
76       */
77      public static <T> List<T> shuffle(final List<T> input) {
78          final Random random = new Random();
79          final List<T> result = new ArrayList<T>(input.size());
80  
81          while (!input.isEmpty()) {
82              // randomly remove a list element and append it to the result list
83              result.add(input.remove(random.nextInt(input.size())));
84          }
85  
86          return result;
87      }
88  
89      /***
90       * The number used in the name of the first generated shuffle file.
91       */
92      private final int first;
93  
94      /***
95       * The number of random shuffles to generate.
96       */
97      private final int number;
98  
99      /***
100      * Prefix used for the generated random shuffle files.
101      */
102     private final String prefix;
103 
104     /***
105      * Extension used for the generated random shuffle files.
106      */
107     private final String extension;
108 
109     /***
110      * Creates a new instance from the
111      * {@linkplain TiesConfiguration#CONF standard configuration}.
112      */
113     public ShuffleGenerator() {
114         this(TiesConfiguration.CONF);
115     }
116 
117     /***
118      * Creates a new instance from the provided configuration.
119      *
120      * @param conf used to configure this instance; must not be
121      * <code>null</code>
122      */
123     public ShuffleGenerator(final TiesConfiguration conf) {
124         this(conf.getInt("shuffle.first"), conf.getInt("shuffle.num"),
125             conf.getString("shuffle.prefix"), conf.getString("shuffle.ext"),
126             conf);
127     }
128 
129     /***
130      * Creates a new instance from the provided configuration. Generated
131      * shuffle files will be named from <code>filePrefixNUMBER.fileExt</code>,
132      * where <code>NUMBER</code> is a number from <em>firstNum</em>
133      * to <em>firstNum + num - 1</em>; if necessary, numbers will be padded
134      * with leading zeros so all numbers are of the same length
135      * (e.g. <em>01</em> to <em>10</em>).
136      *
137      * @param firstNum The number used in the name of the first generated
138      * shuffle file (typically 1)
139      * @param num The number of random shuffles to generate
140      * @param filePrefix Prefix used for the generated random shuffle files
141      * @param fileExt Extension used for the generated random shuffle files
142      * @param conf passed to the superclass; if <code>null</code>,
143      * the {@linkplain TiesConfiguration#CONF standard configuration} is used
144      */
145     public ShuffleGenerator(final int firstNum, final int num,
146             final String filePrefix, final String fileExt,
147             final TiesConfiguration conf) {
148         super(conf);
149         first = firstNum;
150         number = num;
151         prefix = filePrefix;
152         extension = fileExt;
153     }
154 
155     /***
156      * {@inheritDoc}
157      */
158     public void process(final List<String> collected, final ContextMap context)
159             throws IOException {
160         LinkedList<String> inputCopy;
161         List resultList;
162         Iterator resultIter;
163         String item;
164         final int last = first + number - 1;
165         final File outDirectory = IOUtils.determineOutputDirectory(getConfig());
166         File outFile;
167         Writer writer;
168 
169         // left-pad numbers with 0s if required
170         final NumberFormat formatter = NumberFormat.getIntegerInstance();
171         formatter.setMinimumIntegerDigits(Integer.toString(last).length());
172 
173         // generate + store shuffles
174         for (int i = first; i <= last; i++) {
175             // create a new copy of the list because it will be destroyed
176             inputCopy = new LinkedList<String>(collected);
177             resultList = shuffle(inputCopy);
178 
179             // write to shuffle file in the output directory
180             outFile = IOUtils.createOutFile(outDirectory,
181                 prefix + formatter.format(i), extension);
182             writer = IOUtils.openWriter(outFile, getConfig());
183 
184             try {
185                 resultIter = resultList.iterator();
186 
187                 // write each item in a single line
188                 while (resultIter.hasNext()) {
189                     item = (String) resultIter.next();
190                     writer.write(item);
191                     writer.write(TextUtils.LINE_SEPARATOR);
192                 }
193             } finally {
194                 // close the writer
195                 IOUtils.tryToClose(writer);
196             }
197         }
198     }
199 
200     /***
201      * Returns a string representation of this object.
202      *
203      * @return a textual representation
204      */
205     public String toString() {
206         return new ToStringBuilder(this)
207             .append("first", first)
208             .append("number", number)
209             .append("prefix", prefix)
210             .append("extension", extension)
211             .toString();
212     }
213 
214 }