1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.util.ArrayList;
25 import java.util.HashSet;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.SortedMap;
30 import java.util.TreeMap;
31
32 import org.apache.commons.collections.Bag;
33 import org.apache.commons.lang.builder.ToStringBuilder;
34 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
35 import org.apache.commons.math.stat.descriptive.SummaryStatistics;
36
37 import de.fu_berlin.ties.io.FieldContainer;
38 import de.fu_berlin.ties.io.FieldMap;
39 import de.fu_berlin.ties.io.StorableContainer;
40
41 /***
42 * Creates {@link org.apache.commons.math.stat.descriptive.StatisticalSummary}
43 * for any number of items ("keys") that occur zero or more times in any
44 * number of runs ("identifiers"). See the method descriptions for details.
45 *
46 * <p>Instances of this class are not thread-safe and must be synchronized
47 * externally, if required.
48 *
49 * @author Christian Siefkes
50 * @version $Revision: 1.4 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
51 */
52 public class ValueSummary implements StorableContainer {
53
54 /***
55 * Stores the individual bags (sets of counts).
56 */
57 private final List<Bag> bags = new ArrayList<Bag>();
58
59 /***
60 * Each element of this list stores the identifier for the corresponding
61 * {@link #bags} element.
62 */
63 private final List<String> identifiers = new ArrayList<String>();
64
65 /***
66 * Will be used to cache the result calculated by
67 * {@link #calculateSummaries()}.
68 */
69 private SortedMap<Object, StatisticalSummary> summaries = null;
70
71 /***
72 * Creates a new instance.
73 */
74 public ValueSummary() {
75 super();
76 }
77
78
79 /***
80 * Adds a set of counts (a bag) to the statistics.
81 *
82 * @param identifier how to refer to this bag
83 * @param bag the set of counts to add
84 * @throws IllegalStateException if {@link #calculateSummaries()} (or
85 * {@link #storeEntries(FieldContainer)} has already been invoked on this
86 * instance
87 */
88 public void add(final String identifier, final Bag bag)
89 throws IllegalStateException {
90 if (summaries == null) {
91 identifiers.add(identifier);
92 bags.add(bag);
93 } else {
94 throw new IllegalStateException("No add after calculateSummaries!");
95 }
96 }
97
98 /***
99 * Will calculate a {@link StatisticalSummary statistical summary} for each
100 * object contained in at least one of the bags
101 * {@link #add(String, Bag) added} to this instance. After invoking this
102 * method, this object is immutable -- any further calls to
103 * {@link #add(String, Bag)} will yield an {@link IllegalStateException}.
104 *
105 * @return the calculated summaries
106 */
107 @SuppressWarnings("unchecked")
108 public SortedMap<Object, StatisticalSummary> calculateSummaries() {
109
110 if (summaries == null) {
111 final Set<Object> keySet = new HashSet<Object>();
112
113
114 for (int i = 0; i < bags.size(); i++) {
115 keySet.addAll(bags.get(i).uniqueSet());
116 }
117
118 final Iterator<Object> keyIter = keySet.iterator();
119 Object key;
120 SummaryStatistics statsCalc;
121 summaries = new TreeMap<Object, StatisticalSummary>();
122
123
124 while (keyIter.hasNext()) {
125 key = keyIter.next();
126 statsCalc = SummaryStatistics.newInstance();
127
128
129 for (int i = 0; i < bags.size(); i++) {
130 statsCalc.addValue(bags.get(i).getCount(key));
131 }
132
133
134 summaries.put(key, statsCalc.getSummary());
135 }
136 }
137
138 return summaries;
139 }
140
141 /***
142 * Writes all relevant information to a field container for serialization.
143 * This implementation invokes {@link #calculateSummaries()} (if necessary)
144 * and stores the {@link StatisticalSummary#getMean() mean},
145 * {@link StatisticalSummary#getSum() sum}, and
146 * {@link StatisticalSummary#getStandardDeviation() standard deviation}
147 * as well as all individual counts for each {@link #add(String, Bag) added}
148 * key. Keys are serialized in alphabetic order.
149 *
150 * @param fContainer fContainer the field container to fill
151 */
152 public void storeEntries(final FieldContainer fContainer) {
153
154 calculateSummaries();
155 final Iterator<Object> keyIter = summaries.keySet().iterator();
156 Object key;
157 StatisticalSummary summary;
158 FieldMap fields;
159 int i;
160
161
162 while (keyIter.hasNext()) {
163 key = keyIter.next();
164 summary = summaries.get(key);
165 fields = new FieldMap();
166 fields.put("Name", key);
167 fields.put("Mean", summary.getMean());
168 fields.put("Sum", summary.getSum());
169 fields.put("Standard Deviation", summary.getStandardDeviation());
170
171
172
173 for (i = 0; i < bags.size(); i++) {
174 fields.put("<" + identifiers.get(i) + ">",
175 bags.get(i).getCount(key));
176 }
177 fContainer.add(fields);
178 }
179 }
180
181 /***
182 * Returns a compact string representation of this object.
183 *
184 * @return a string representation of this object
185 */
186 public String toString() {
187 return new ToStringBuilder(this)
188 .append("identifiers", identifiers)
189 .toString();
190 }
191
192 }