View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.Set;
27  import java.util.SortedMap;
28  import java.util.TreeMap;
29  
30  import org.apache.commons.lang.StringUtils;
31  import org.apache.commons.lang.builder.ToStringBuilder;
32  
33  import de.fu_berlin.ties.io.FieldContainer;
34  import de.fu_berlin.ties.io.FieldMap;
35  import de.fu_berlin.ties.io.RestorableContainer;
36  import de.fu_berlin.ties.util.Util;
37  
38  /***
39   * Instances of this class manage multiple
40   * {@link de.fu_berlin.ties.eval.FMetrics} for different types. They also
41   * calculate average statistics over all types.
42   *
43   * <p>Instances of this class are not thread-safe and must be synchronized
44   * externally, if required.
45   *
46   * @author Christian Siefkes
47   * @version $Revision: 1.12 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
48   */
49  public class MultiFMetrics implements MultiFMetricsView, RestorableContainer {
50  
51      /***
52       * Reserved type used for the calculated sums and averages over all types:
53       * <code>&lt;All&gt;</code>. This string is reserved and cannot be used to
54       * mark regular types.
55       */
56      public static final String ALL = "<All>";
57  
58      /***
59       * Serialization key for the type.
60       */
61      public static final String KEY_TYPE = "Type";
62  
63      /***
64       * The sums and averages over all types.
65       */
66      private final FMetrics all;
67  
68      /***
69       * A sorted map from type Strings to the corresponding {@link FMetrics}.
70       */
71      private final SortedMap<String, FMetrics> typedMetrics =
72          new TreeMap<String, FMetrics>();
73  
74      /***
75       * Whether {@linkplain FMetricsSummary summaries} are calculated by this
76       * type.
77       */
78      private final boolean calculatingSummaries;
79  
80      /***
81       * Extension recommended for files storing metrics.
82       */
83      public static final String EXT_METRICS = "metrics";
84  
85      /***
86       * Creates a new instance, without calculating
87       * {@linkplain FMetricsSummary summaries}.
88       */
89      public MultiFMetrics() {
90          this(false);
91      }
92  
93      /***
94       * Creates a new instance.
95       *
96       * @param calcSummaries whether this type should calculate
97       * {@linkplain FMetricsSummary summaries}
98       */
99      public MultiFMetrics(final boolean calcSummaries) {
100         super();
101         calculatingSummaries = calcSummaries;
102         all = createMetrics();
103     }
104 
105     /***
106      * Helper factory method that creates a new {@link FMetrics} instance of the
107      * required type.
108      *
109      * @return a {@link SummaryFMetrics} instance if
110      * {@link #isCalculatingSummaries()}; a simple {@link FMetrics} instance
111      * otherwise
112      */
113     protected FMetrics createMetrics() {
114         if (calculatingSummaries) {
115             return new SummaryFMetrics();
116         } else {
117             return new FMetrics();
118         }
119     }
120 
121     /***
122      * Helper factory method that creates a new {@link FMetrics} instance of the
123      * required type from a field map.
124      *
125      * @param fieldMap field map used to initialize the new instance
126      * @return a {@link SummaryFMetrics} instance if
127      * {@link #isCalculatingSummaries()}; a simple {@link FMetrics} instance
128      * otherwise
129      * @throws IllegalArgumentException if at least one of the parameters is
130      * negative or missing from the field map
131      */
132     protected FMetrics createMetrics(final FieldMap fieldMap)
133             throws IllegalArgumentException {
134         if (calculatingSummaries) {
135             return new SummaryFMetrics(fieldMap);
136         } else {
137             return new FMetrics(fieldMap);
138         }
139     }
140 
141     /***
142      * Increases the number of false negatives for the given type by 1.
143      *
144      * @param type the type to update
145      * @throws IllegalArgumentException if <code>type</code> is
146      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
147      */
148     public void incFalseNeg(final String type)
149             throws IllegalArgumentException {
150         // update metrics for this type + for all types
151         final FMetrics metrics = lookupType(type);
152         metrics.incFalseNeg();
153         all.incFalseNeg();
154     }
155 
156     /***
157      * Increases the number of false positives for the given type by 1.
158      *
159      * @param type the type to update
160      * @throws IllegalArgumentException if <code>type</code> is
161      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
162      */
163     public void incFalsePos(final String type)
164             throws IllegalArgumentException {
165         // update metrics for this type + for all types
166         final FMetrics metrics = lookupType(type);
167         metrics.incFalsePos();
168         all.incFalsePos();
169     }
170 
171     /***
172      * Increases the number of true positives for the given type by 1.
173      *
174      * @param type the type to update
175      * @throws IllegalArgumentException if <code>type</code> is
176      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
177      */
178     public void incTruePos(final String type)
179             throws IllegalArgumentException {
180         // update metrics for this type + for all types
181         final FMetrics metrics = lookupType(type);
182         metrics.incTruePos();
183         all.incTruePos();
184     }
185 
186     /***
187      * Whether {@linkplain FMetricsSummary summaries} are calculated by this
188      * type.
189      *
190      * @return the value of the attribute
191      */
192     public boolean isCalculatingSummaries() {
193         return calculatingSummaries;
194     }
195 
196     /***
197      * Looks up and if necessary initializes the metrics for a given type.
198      * Also checks that the given type is valid, i.e. neither <code>null</code>
199      * nor empty nor equal to the reserved {@link #ALL} type.
200      *
201      * @param type the type to check
202      * @return the metrics for this type
203      * @throws IllegalArgumentException if <code>type</code> is
204      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
205      */
206     protected FMetrics lookupType(final String type)
207             throws IllegalArgumentException {
208         // check argument
209         if (StringUtils.isEmpty(type)) {
210             throw new IllegalArgumentException("Type cannot be null or empty");
211         }
212         if (ALL.equals(type)) {
213             throw new IllegalArgumentException("Cannot use the reserved "
214                 + "identifier " + ALL + " as regular type");
215         }
216 
217         FMetrics result = typedMetrics.get(type);
218         if (result == null) {
219             // create and store metrics for this type
220             result = createMetrics();
221             typedMetrics.put(type, result);
222         }
223         return result;
224     }
225 
226     /***
227      * Restores items stored in a field container and adds them to this
228      * instance. The provided field container must have been filled by calling
229      * {@link #storeEntries(FieldContainer)} on a object of this type.
230      * Any values already contained in this instance are added to, they are not
231      * replaced. The {@link #viewAll() sums and averages} are recalculated from
232      * the provided values, it is not directly read from the container.
233      * <em>Summary information cannot be restored.</em>
234      *
235      * @param fContainer the field container to read
236      * @throws IllegalArgumentException if <code>fContainer</code> contains
237      * a wrong kind of {@link FieldMap}s
238      */
239     public void restoreEntries(final FieldContainer fContainer)
240             throws IllegalArgumentException {
241         String currentType;
242         FMetrics currentMetrics;
243         FieldMap currentFM;
244         final Iterator entryIter = fContainer.entryIterator();
245 
246         while (entryIter.hasNext()) {
247             currentFM = (FieldMap) entryIter.next();
248             currentType = Util.asString(currentFM.get(KEY_TYPE));
249 
250             if (currentType == null) {
251                 throw new IllegalArgumentException("Wrong type of field map: "
252                     + KEY_TYPE + " field missing");
253             }
254 
255             // ignore reserved type (sums + averages will be recalculated)
256             if (!ALL.equals(currentType)) {
257                 currentMetrics = createMetrics(currentFM);
258 
259                 // feed as input to update to avoid overwriting exisiting values
260                 update(currentType, currentMetrics);
261             }
262         }
263     }
264 
265     /***
266      * {@inheritDoc}
267      */
268     public void storeEntries(final FieldContainer fContainer) {
269         storeEntries(fContainer, KEY_TYPE);
270     }
271 
272     /***
273      * Serialization variant that allows specifying a specific name for the
274      * type field. If you invoke the standard form
275      * {@link #storeEntries(FieldContainer)}, {@link #KEY_TYPE} will be used.
276      *
277      * @param fContainer the container to serialize to
278      * @param typeName key to use for the type field
279      */
280     public void storeEntries(final FieldContainer fContainer,
281             final String typeName) {
282         String currentType;
283         FMetrics currentMetrics;
284         FieldMap currentFM;
285         final Iterator<String> typeIter = typedMetrics.keySet().iterator();
286 
287         // store types in alphabetic order
288         while (typeIter.hasNext()) {
289             currentType = typeIter.next();
290             currentMetrics = typedMetrics.get(currentType);
291             currentFM = currentMetrics.storeFields();
292 
293             // add type field + store in container
294             currentFM.put(typeName, currentType);
295             fContainer.add(currentFM);
296         }
297 
298         // store averages
299         currentFM = all.storeFields();
300         currentFM.put(typeName, ALL);
301         fContainer.add(currentFM);
302     }
303 
304     /***
305      * Returns a string representation of this object.
306      * @return a textual representation
307      */
308     public String toString() {
309         return new ToStringBuilder(this)
310             .append("sums/averages", all)
311             .append("typed metrics", typedMetrics)
312             .append("calculating summaries", calculatingSummaries)
313             .toString();
314     }
315 
316     /***
317      * Returns the set of all types (Strings) currently stored in this
318      * instance. The set is immutable and cannot modified.
319      *
320      * @return the set of type names
321      */
322     public Set types() {
323         return Collections.unmodifiableSet(typedMetrics.keySet());
324     }
325 
326     /***
327      * Updates the statistics by adding the contents of the given multi-metrics.
328      *
329      * @param multiMetrics the metrics to add
330      */
331     public void update(final MultiFMetricsView multiMetrics) {
332         String currentType;
333         FMetricsView currentMetrics;
334         final Iterator typeIter = multiMetrics.types().iterator();
335         final FMetrics newSum = new FMetrics();
336 
337         // iterate types and update each one
338         while (typeIter.hasNext()) {
339             currentType = (String) typeIter.next();
340             currentMetrics = multiMetrics.view(currentType);
341             updateType(currentType, currentMetrics.getTruePos(),
342                 currentMetrics.getFalseNeg(), currentMetrics.getFalsePos());
343             newSum.update(currentMetrics);
344         }
345 
346         // update 'all' in a single step because otherwise the summaries
347         // (standard deviations etc.) would get mixed up
348         all.update(newSum);
349     }
350 
351     /***
352      * Updates the statistics for a specified type, increasing the stored values
353      * as specified.
354      *
355      * @param type the type to update
356      * @param input contains the number of true and false positives and
357      * false negatives to add
358      * @throws IllegalArgumentException if <code>type</code> is
359      * <code>null</code> or empty or equal to the reserved {@link #ALL} type or
360      * if at least one of the input values is negative
361      */
362     public void update(final String type, final EvalInput input) throws
363             IllegalArgumentException {
364         update(type, input.getTruePos(), input.getFalseNeg(),
365             input.getFalsePos());
366     }
367 
368     /***
369      * Updates the statistics for a specified type, increasing the stored values
370      * as specified.
371      *
372      * @param type the type to update
373      * @param addTruePos the number of new true positives to add
374      * @param addFalseNeg the number of new false negatives to add
375      * @param addFalsePos the number of new false positives to add
376      * @throws IllegalArgumentException f <code>type</code> is
377      * <code>null</code> or empty or equal to the reserved {@link #ALL} type or
378      * if at least one of the parameters is negative
379      */
380     public void update(final String type, final long addTruePos,
381             final long addFalseNeg, final long addFalsePos)
382             throws IllegalArgumentException {
383         updateType(type, addTruePos, addFalseNeg, addFalsePos);
384         all.update(addTruePos, addFalseNeg, addFalsePos);
385     }
386 
387     /***
388      * Helper method that updates only the metrics of a specified type,
389      * but not the {@linkplain #all sums and averages}.
390      *
391      * @param type the type to update
392      * @param addTruePos the number of new true positives to add
393      * @param addFalseNeg the number of new false negatives to add
394      * @param addFalsePos the number of new false positives to add
395      * @throws IllegalArgumentException f <code>type</code> is
396      * <code>null</code> or empty or equal to the reserved {@link #ALL} type or
397      * if at least one of the parameters is negative
398      */
399     private void updateType(final String type, final long addTruePos,
400             final long addFalseNeg, final long addFalsePos)
401             throws IllegalArgumentException {
402         // update metrics for this type + for all types
403         final FMetrics metrics = lookupType(type);
404         metrics.update(addTruePos, addFalseNeg, addFalsePos);
405     }
406 
407     /***
408      * Returns a read-only view of the {@link FMetrics} of the specified type.
409      * This is not a snapshot but will change whenever the underlying counts
410      * are changed.
411      *
412      * @param type the type to check
413      * @return a view of the counts and evaluation metrics for the given type;
414      * or <code>null</code> if no metrics exist for the given type
415      */
416     public FMetricsView view(final String type) {
417         return typedMetrics.get(type);
418     }
419 
420     /***
421      * Returns a read-only view of the {@link FMetrics} containing the sums and
422      * averages over all types. This is not a snapshot but will change whenever
423      * the underlying counts are changed.
424      *
425      * @return a view of the sums and averages over all types
426      */
427     public FMetricsView viewAll() {
428         return all;
429     }
430 
431     /***
432      * Optional operation that shows {@link FMetricsSummary statistical
433      * summaries of precision, recall, and F1 metrics} over all types,
434      * if {@linkplain #isCalculatingSummaries() calculated}. This is not a
435      * snapshot but will change whenever the underlying values are changed.
436      *
437      * @return a statistical summery over the metrics of all types, if
438      * calculated
439      * @throws UnsupportedOperationException if
440      * {@link #isCalculatingSummaries()} is <code>false</code>
441      */
442     public FMetricsSummary viewAllSummary()
443             throws UnsupportedOperationException {
444         if (calculatingSummaries) {
445             // thanks to the factory method we know that this interface is
446             // implemented
447             return (FMetricsSummary) all;
448         } else {
449             throw new UnsupportedOperationException("No summaries calculated");
450         }
451     }
452 
453     /***
454      * Optional operation that shows {@link FMetricsSummary statistical
455      * summaries of precision, recall, and F1 metrics} of the specified type,
456      * if {@linkplain #isCalculatingSummaries() calculated}. This is not a
457      * snapshot but will change whenever the underlying values are changed.
458      *
459      * @param type the type to check
460      * @return a statistical summery over the metrics of the given type, if
461      * calculated
462      * @throws UnsupportedOperationException if
463      * {@link #isCalculatingSummaries()} is <code>false</code>
464      */
465     public FMetricsSummary viewSummary(final String type)
466             throws UnsupportedOperationException {
467         if (calculatingSummaries) {
468             // thanks to the factory method we know that this interface is
469             // implemented
470             return (FMetricsSummary) typedMetrics.get(type);
471         } else {
472             throw new UnsupportedOperationException("No summaries calculated");
473         }
474     }
475 
476 }