View Javadoc

1   /*
2    * Copyright (C) 2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.util.ArrayList;
25  import java.util.HashSet;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Set;
29  import java.util.SortedMap;
30  import java.util.TreeMap;
31  
32  import org.apache.commons.collections.Bag;
33  import org.apache.commons.lang.builder.ToStringBuilder;
34  import org.apache.commons.math.stat.descriptive.StatisticalSummary;
35  import org.apache.commons.math.stat.descriptive.SummaryStatistics;
36  
37  import de.fu_berlin.ties.io.FieldContainer;
38  import de.fu_berlin.ties.io.FieldMap;
39  import de.fu_berlin.ties.io.StorableContainer;
40  
41  /***
42   * Creates {@link org.apache.commons.math.stat.descriptive.StatisticalSummary}
43   * for any number of items ("keys") that occur zero or more times in any
44   * number of runs ("identifiers"). See the method descriptions for details.
45   *
46   * <p>Instances of this class are not thread-safe and must be synchronized
47   * externally, if required.
48   * 
49   * @author Christian Siefkes
50   * @version $Revision: 1.4 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
51   */
52  public class ValueSummary implements StorableContainer {
53  
54      /***
55       * Stores the individual bags (sets of counts).
56       */
57      private final List<Bag> bags = new ArrayList<Bag>();
58  
59      /***
60       * Each element of this list stores the identifier for the corresponding
61       * {@link #bags} element.
62       */
63      private final List<String> identifiers = new ArrayList<String>();
64  
65      /***
66       * Will be used to cache the result calculated by
67       * {@link #calculateSummaries()}.
68       */
69      private SortedMap<Object, StatisticalSummary> summaries = null;
70  
71      /***
72       * Creates a new instance.
73       */
74      public ValueSummary() {
75          super();
76      }
77  
78  
79      /***
80       * Adds a set of counts (a bag) to the statistics.
81       *
82       * @param identifier how to refer to this bag
83       * @param bag the set of counts to add
84       * @throws IllegalStateException if {@link #calculateSummaries()} (or
85       * {@link #storeEntries(FieldContainer)} has already been invoked on this
86       * instance
87       */
88      public void add(final String identifier, final Bag bag)
89      throws IllegalStateException {
90          if (summaries == null) {
91              identifiers.add(identifier);
92              bags.add(bag);
93          } else {
94              throw new IllegalStateException("No add after calculateSummaries!");
95          }
96      }
97  
98      /***
99       * Will calculate a {@link StatisticalSummary statistical summary} for each
100      * object contained in at least one of the bags
101      * {@link #add(String, Bag) added} to this instance. After invoking this
102      * method, this object is immutable -- any further calls to
103      * {@link #add(String, Bag)} will yield an {@link IllegalStateException}.
104      *
105      * @return the calculated summaries
106      */
107     @SuppressWarnings("unchecked")
108     public SortedMap<Object, StatisticalSummary> calculateSummaries() {
109         // calculate and cache summaries, if not yet done
110         if (summaries == null) {
111             final Set<Object> keySet = new HashSet<Object>();
112 
113             // collect all keys
114             for (int i = 0; i < bags.size(); i++) {
115                 keySet.addAll(bags.get(i).uniqueSet());
116             }
117 
118             final Iterator<Object> keyIter = keySet.iterator();
119             Object key;
120             SummaryStatistics statsCalc;
121             summaries = new TreeMap<Object, StatisticalSummary>();
122 
123             // calculate and store stats for each key
124             while (keyIter.hasNext()) {
125                 key = keyIter.next();
126                 statsCalc = SummaryStatistics.newInstance();
127 
128                 // for each key, add all counts (including 0 counts)
129                 for (int i = 0; i < bags.size(); i++) {
130                     statsCalc.addValue(bags.get(i).getCount(key));
131                 }
132 
133                 // convert into immutable StatisticalSummaries
134                 summaries.put(key, statsCalc.getSummary());
135             }
136         }
137 
138         return summaries;
139     }
140 
141     /***
142      * Writes all relevant information to a field container for serialization.
143      * This implementation invokes {@link #calculateSummaries()} (if necessary)
144      * and stores the {@link StatisticalSummary#getMean() mean},
145      * {@link StatisticalSummary#getSum() sum}, and
146      * {@link StatisticalSummary#getStandardDeviation() standard deviation}
147      * as well as all individual counts for each {@link #add(String, Bag) added}
148      * key. Keys are serialized in alphabetic order.
149      *
150      * @param fContainer fContainer the field container to fill
151      */
152     public void storeEntries(final FieldContainer fContainer) {
153         // ensure that summaries exist
154         calculateSummaries();
155         final Iterator<Object> keyIter = summaries.keySet().iterator();
156         Object key;
157         StatisticalSummary summary;
158         FieldMap fields;
159         int i;
160 
161         // serialize values for each key
162         while (keyIter.hasNext()) {
163             key = keyIter.next();
164             summary = summaries.get(key);
165             fields = new FieldMap();
166             fields.put("Name", key);
167             fields.put("Mean", summary.getMean());
168             fields.put("Sum", summary.getSum());
169             fields.put("Standard Deviation", summary.getStandardDeviation());
170 
171             // add individual counts, enclosing identifiers in <...> to prevent
172             // confusion with standard fields
173             for (i = 0; i < bags.size(); i++) {
174                 fields.put("<" + identifiers.get(i) + ">",
175                         bags.get(i).getCount(key));
176             }
177             fContainer.add(fields);
178         }
179     }
180 
181     /***
182      * Returns a compact string representation of this object.
183      *
184      * @return a string representation of this object
185      */
186     public String toString() {
187         return new ToStringBuilder(this)
188             .append("identifiers", identifiers)
189             .toString();
190     }
191 
192 }