View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.Writer;
27  import java.text.NumberFormat;
28  import java.util.ArrayList;
29  import java.util.Iterator;
30  import java.util.LinkedList;
31  import java.util.List;
32  import java.util.Random;
33  
34  import org.apache.commons.lang.builder.ToStringBuilder;
35  
36  import de.fu_berlin.ties.CollectingProcessor;
37  import de.fu_berlin.ties.ContextMap;
38  import de.fu_berlin.ties.TiesConfiguration;
39  import de.fu_berlin.ties.io.IOUtils;
40  import de.fu_berlin.ties.text.TextUtils;
41  
42  /***
43   * Arranges all input arguments (for example, files or URLs) in random
44   * "shuffles", so they can subsequently processed in random (but fixed) order.
45   * This helps generating input data for experiments. Each random shuffle will
46   * be stored in a file; each line of the file will contain one of the input
47   * arguments; the order of input arguments will be random. If the input
48   * arguments are URIs/URLs, this format correspondings to the
49   * <code>text/uri-list</code> media type defined in
50   * <a href="http://www.faqs.org/rfcs/rfc2483.html">RFC 2483</a>; it can be
51   * read using the {@link de.fu_berlin.ties.io.IOUtils#readURIList(CharSequence)}
52   * methods.
53   *
54   * <p>Also contains the static {@link #shuffle(List)} method so it can
55   * be used as a static utility class.
56   *
57   * @author Christian Siefkes
58   * @version $Revision: 1.5 $, $Date: 2004/11/16 10:41:19 $, $Author: siefkes $
59   */
60  public class ShuffleGenerator extends CollectingProcessor {
61  
62      /***
63       * Static utility method that "shuffles" a list by re-arranging its elements
64       * in random order. The original list will be destroyed by this method
65       * (i.e. it might be empty after this method returns)! It is recommended
66       * to pass a {@link LinkedList} (and not a {@link ArrayList}) as input
67       * argument because iteratively deleting elements from {@link ArrayList}s
68       * is very inefficient.
69       *
70       * @param input the list of elements to shuffle; should be a
71       * {@link LinkedList} for efficiency; will be destroyed by this method
72       * (i.e. it might be empty after this method returns)
73       * @return a new list containing the elements of the <code>input</code>
74       * in random order
75       */
76      public static <T> List<T> shuffle(final List<T> input) {
77          final Random random = new Random();
78          final List<T> result = new ArrayList<T>(input.size());
79  
80          while (!input.isEmpty()) {
81              // randomly remove a list element and append it to the result list
82              result.add(input.remove(random.nextInt(input.size())));
83          }
84  
85          return result;
86      }
87  
88      /***
89       * The number used in the name of the first generated shuffle file.
90       */
91      private final int first;
92  
93      /***
94       * The number of random shuffles to generate.
95       */
96      private final int number;
97  
98      /***
99       * Prefix used for the generated random shuffle files.
100      */
101     private final String prefix;
102 
103     /***
104      * Extension used for the generated random shuffle files.
105      */
106     private final String extension;
107 
108     /***
109      * Creates a new instance from the
110      * {@linkplain TiesConfiguration#CONF standard configuration}.
111      */
112     public ShuffleGenerator() {
113         this(TiesConfiguration.CONF);
114     }
115 
116     /***
117      * Creates a new instance from the provided configuration.
118      *
119      * @param conf used to configure this instance; must not be
120      * <code>null</code>
121      */
122     public ShuffleGenerator(final TiesConfiguration conf) {
123         this(conf.getInt("shuffle.first"), conf.getInt("shuffle.num"),
124             conf.getString("shuffle.prefix"), conf.getString("shuffle.ext"),
125             conf);
126     }
127 
128     /***
129      * Creates a new instance from the provided configuration. Generated
130      * shuffle files will be named from <code>filePrefixNUMBER.fileExt</code>,
131      * where <code>NUMBER</code> is a number from <em>firstNum</em>
132      * to <em>firstNum + num - 1</em>; if necessary, numbers will be padded
133      * with leading zeros so all numbers are of the same length
134      * (e.g. <em>01</em> to <em>10</em>).
135      *
136      * @param firstNum The number used in the name of the first generated
137      * shuffle file (typically 1)
138      * @param num The number of random shuffles to generate
139      * @param filePrefix Prefix used for the generated random shuffle files
140      * @param fileExt Extension used for the generated random shuffle files
141      * @param conf passed to the superclass; if <code>null</code>,
142      * the {@linkplain TiesConfiguration#CONF standard configuration} is used
143      */
144     public ShuffleGenerator(final int firstNum, final int num,
145             final String filePrefix, final String fileExt,
146             final TiesConfiguration conf) {
147         super(conf);
148         first = firstNum;
149         number = num;
150         prefix = filePrefix;
151         extension = fileExt;
152     }
153 
154     /***
155      * {@inheritDoc}
156      */
157     public void process(final List<String> collected, final ContextMap context)
158             throws IOException {
159         LinkedList<String> inputCopy;
160         List resultList;
161         Iterator resultIter;
162         String item;
163         final int last = first + number - 1;
164         final File outDirectory = IOUtils.determineOutputDirectory(getConfig());
165         File outFile;
166         Writer writer;
167 
168         // left-pad numbers with 0s if required
169         final NumberFormat formatter = NumberFormat.getIntegerInstance();
170         formatter.setMinimumIntegerDigits(Integer.toString(last).length());
171 
172         // generate + store shuffles
173         for (int i = first; i <= last; i++) {
174             // create a new copy of the list because it will be destroyed
175             inputCopy = new LinkedList<String>(collected);
176             resultList = shuffle(inputCopy);
177 
178             // write to shuffle file in the output directory
179             outFile = IOUtils.createOutFile(outDirectory,
180                 prefix + formatter.format(i), extension);
181             writer = IOUtils.openWriter(outFile, getConfig());
182 
183             try {
184                 resultIter = resultList.iterator();
185 
186                 // write each item in a single line
187                 while (resultIter.hasNext()) {
188                     item = (String) resultIter.next();
189                     writer.write(item);
190                     writer.write(TextUtils.LINE_SEPARATOR);
191                 }
192             } finally {
193                 // close the writer
194                 IOUtils.tryToClose(writer);
195             }
196         }
197     }
198 
199     /***
200      * Returns a string representation of this object.
201      *
202      * @return a textual representation
203      */
204     public String toString() {
205         return new ToStringBuilder(this)
206             .append("first", first)
207             .append("number", number)
208             .append("prefix", prefix)
209             .append("extension", extension)
210             .toString();
211     }
212 
213 }