View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.Set;
27  import java.util.SortedMap;
28  import java.util.TreeMap;
29  
30  import org.apache.commons.lang.StringUtils;
31  import org.apache.commons.lang.builder.ToStringBuilder;
32  
33  import de.fu_berlin.ties.io.FieldContainer;
34  import de.fu_berlin.ties.io.FieldMap;
35  import de.fu_berlin.ties.io.RestorableContainer;
36  import de.fu_berlin.ties.util.Util;
37  
38  /***
39   * Instances of this class manage multiple
40   * {@link de.fu_berlin.ties.eval.FMetrics} for different types. They also
41   * calculate average statistics over all types.
42   *
43   * <p>Instances of this class are not thread-safe and must be synchronized
44   * externally, if required.
45   *
46   * @author Christian Siefkes
47   * @version $Revision: 1.8 $, $Date: 2004/09/06 17:23:00 $, $Author: siefkes $
48   */
49  public class MultiFMetrics implements MultiFMetricsView, RestorableContainer {
50  
51      /***
52       * Reserved type used for the calculated sums and averages over all types:
53       * <code>&lt;All&gt;</code>. This string is reserved and cannot be used to
54       * mark regular types.
55       */
56      public static final String ALL = "<All>";
57  
58      /***
59       * Serialization key for the type.
60       */
61      public static final String KEY_TYPE = "Type";
62  
63      /***
64       * The sums and averages over all types.
65       */
66      private final FMetrics all;
67  
68      /***
69       * A sorted map from type Strings to the corresponding {@link FMetrics}.
70       */
71      private final SortedMap<String, FMetrics> typedMetrics =
72          new TreeMap<String, FMetrics>();
73  
74      /***
75       * Whether {@linkplain FMetricsSummary summaries} are calculated by this
76       * type.
77       */
78      private final boolean calculatingSummaries;
79  
80      /***
81       * Extension recommended for files storing metrics.
82       */
83      public static final String EXT_METRICS = "metrics";
84  
85      /***
86       * Creates a new instance, without calculating
87       * {@linkplain FMetricsSummary summaries}.
88       */
89      public MultiFMetrics() {
90          this(false);
91      }
92  
93      /***
94       * Creates a new instance.
95       *
96       * @param calcSummaries whether this type should calculate
97       * {@linkplain FMetricsSummary summaries}
98       */
99      public MultiFMetrics(final boolean calcSummaries) {
100         super();
101         calculatingSummaries = calcSummaries;
102         all = createMetrics();
103     }
104 
105     /***
106      * Helper factory method that creates a new {@link FMetrics} instance of the
107      * required type.
108      *
109      * @return a {@link SummaryFMetrics} instance if
110      * {@link #isCalculatingSummaries()}; a simple {@link FMetrics} instance
111      * otherwise
112      */
113     protected FMetrics createMetrics() {
114         if (calculatingSummaries) {
115             return new SummaryFMetrics();
116         } else {
117             return new FMetrics();
118         }
119     }
120 
121     /***
122      * Helper factory method that creates a new {@link FMetrics} instance of the
123      * required type from a field map.
124      *
125      * @param fieldMap field map used to initialize the new instance
126      * @return a {@link SummaryFMetrics} instance if
127      * {@link #isCalculatingSummaries()}; a simple {@link FMetrics} instance
128      * otherwise
129      * @throws IllegalArgumentException if at least one of the parameters is
130      * negative or missing from the field map
131      */
132     protected FMetrics createMetrics(final FieldMap fieldMap)
133             throws IllegalArgumentException {
134         if (calculatingSummaries) {
135             return new SummaryFMetrics(fieldMap);
136         } else {
137             return new FMetrics(fieldMap);
138         }
139     }
140 
141     /***
142      * Increases the number of false negatives for the given type by 1.
143      *
144      * @param type the type to update
145      * @throws IllegalArgumentException if <code>type</code> is
146      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
147      */
148     public void incFalseNeg(final String type)
149             throws IllegalArgumentException {
150         // update metrics for this type + for all types
151         final FMetrics metrics = lookupType(type);
152         metrics.incFalseNeg();
153         all.incFalseNeg();
154     }
155 
156     /***
157      * Increases the number of false positives for the given type by 1.
158      *
159      * @param type the type to update
160      * @throws IllegalArgumentException if <code>type</code> is
161      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
162      */
163     public void incFalsePos(final String type)
164             throws IllegalArgumentException {
165         // update metrics for this type + for all types
166         final FMetrics metrics = lookupType(type);
167         metrics.incFalsePos();
168         all.incFalsePos();
169     }
170 
171     /***
172      * Increases the number of true positives for the given type by 1.
173      *
174      * @param type the type to update
175      * @throws IllegalArgumentException if <code>type</code> is
176      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
177      */
178     public void incTruePos(final String type)
179             throws IllegalArgumentException {
180         // update metrics for this type + for all types
181         final FMetrics metrics = lookupType(type);
182         metrics.incTruePos();
183         all.incTruePos();
184     }
185 
186     /***
187      * Whether {@linkplain FMetricsSummary summaries} are calculated by this
188      * type.
189      *
190      * @return the value of the attribute
191      */
192     public boolean isCalculatingSummaries() {
193         return calculatingSummaries;
194     }
195 
196     /***
197      * Looks up and if necessary initializes the metrics for a given type.
198      * Also checks that the given type is valid, i.e. neither <code>null</code>
199      * nor empty nor equal to the reserved {@link #ALL} type.
200      *
201      * @param type the type to check
202      * @return the metrics for this type
203      * @throws IllegalArgumentException if <code>type</code> is
204      * <code>null</code> or empty or equal to the reserved {@link #ALL} type
205      */
206     protected FMetrics lookupType(final String type)
207             throws IllegalArgumentException {
208         // check argument
209         if (StringUtils.isEmpty(type)) {
210             throw new IllegalArgumentException("Type cannot be null or empty");
211         }
212         if (ALL.equals(type)) {
213             throw new IllegalArgumentException("Cannot use the reserved "
214                 + "identifier " + ALL + " as regular type");
215         }
216 
217         FMetrics result = typedMetrics.get(type);
218         if (result == null) {
219             // create and store metrics for this type
220             result = createMetrics();
221             typedMetrics.put(type, result);
222         }
223         return result;
224     }
225 
226     /***
227      * Restores items stored in a field container and adds them to this
228      * instance. The provided field container must have been filled by calling
229      * {@link #storeEntries(FieldContainer)} on a object of this type.
230      * Any values already contained in this instance are added to, they are not
231      * replaced. The {@link #viewAll() sums and averages} are recalculated from
232      * the provided values, it is not directly read from the container.
233      * <em>Summary information cannot be restored.</em>
234      *
235      * @param fContainer the field container to read
236      * @throws IllegalArgumentException if <code>fContainer</code> contains
237      * a wrong kind of {@link FieldMap}s
238      */
239     public void restoreEntries(final FieldContainer fContainer)
240             throws IllegalArgumentException {
241         String currentType;
242         FMetrics currentMetrics;
243         FieldMap currentFM;
244         final Iterator entryIter = fContainer.entryIterator();
245 
246         while (entryIter.hasNext()) {
247             currentFM = (FieldMap) entryIter.next();
248             currentType = Util.asString(currentFM.get(KEY_TYPE));
249 
250             if (currentType == null) {
251                 throw new IllegalArgumentException("Wrong type of field map: "
252                     + KEY_TYPE + " field missing");
253             }
254 
255             // ignore reserved type (sums + averages will be recalculated)
256             if (!ALL.equals(currentType)) {
257                 currentMetrics = createMetrics(currentFM);
258 
259                 // feed as input to update to avoid overwriting exisiting values
260                 update(currentType, currentMetrics);
261             }
262         }
263     }
264 
265     /***
266      * {@inheritDoc}
267      */
268     public void storeEntries(final FieldContainer fContainer) {
269         String currentType;
270         FMetrics currentMetrics;
271         FieldMap currentFM;
272         final Iterator<String> typeIter = typedMetrics.keySet().iterator();
273 
274         // store types in alphabetic order
275         while (typeIter.hasNext()) {
276             currentType = typeIter.next();
277             currentMetrics = typedMetrics.get(currentType);
278             currentFM = currentMetrics.storeFields();
279 
280             // add type field + store in container
281             currentFM.put(KEY_TYPE, currentType);
282             fContainer.add(currentFM);
283         }
284 
285         // store averages
286         currentFM = all.storeFields();
287         currentFM.put(KEY_TYPE, ALL);
288         fContainer.add(currentFM);
289     }
290 
291     /***
292      * Returns a string representation of this object.
293      * @return a textual representation
294      */
295     public String toString() {
296         return new ToStringBuilder(this)
297             .append("sums/averages", all)
298             .append("typed metrics", typedMetrics)
299             .append("calculating summaries", calculatingSummaries)
300             .toString();
301     }
302 
303     /***
304      * Returns the set of all types (Strings) currently stored in this
305      * instance. The set is immutable and cannot modified.
306      *
307      * @return the set of type names
308      */
309     public Set types() {
310         return Collections.unmodifiableSet(typedMetrics.keySet());
311     }
312 
313     /***
314      * Updates the statistics by adding the contents of the given multi-metrics.
315      *
316      * @param multiMetrics the metrics to add
317      */
318     public void update(final MultiFMetricsView multiMetrics) {
319         String currentType;
320         FMetricsView currentMetrics;
321         final Iterator typeIter = multiMetrics.types().iterator();
322         final FMetrics newSum = new FMetrics();
323 
324         // iterate types and update each one
325         while (typeIter.hasNext()) {
326             currentType = (String) typeIter.next();
327             currentMetrics = multiMetrics.view(currentType);
328             updateType(currentType, currentMetrics.getTruePos(),
329                 currentMetrics.getFalseNeg(), currentMetrics.getFalsePos());
330             newSum.update(currentMetrics);
331         }
332 
333         // update 'all' in a single step because otherwise the summaries
334         // (standard deviations etc.) would get mixed up
335         all.update(newSum);
336     }
337 
338     /***
339      * Updates the statistics for a specified type, increasing the stored values
340      * as specified.
341      *
342      * @param type the type to update
343      * @param input contains the number of true and false positives and
344      * false negatives to add
345      * @throws IllegalArgumentException if <code>type</code> is
346      * <code>null</code> or empty or equal to the reserved {@link #ALL} type or
347      * if at least one of the input values is negative
348      */
349     public void update(final String type, final EvalInput input) throws
350             IllegalArgumentException {
351         update(type, input.getTruePos(), input.getFalseNeg(),
352             input.getFalsePos());
353     }
354 
355     /***
356      * Updates the statistics for a specified type, increasing the stored values
357      * as specified.
358      *
359      * @param type the type to update
360      * @param addTruePos the number of new true positives to add
361      * @param addFalseNeg the number of new false negatives to add
362      * @param addFalsePos the number of new false positives to add
363      * @throws IllegalArgumentException f <code>type</code> is
364      * <code>null</code> or empty or equal to the reserved {@link #ALL} type or
365      * if at least one of the parameters is negative
366      */
367     public void update(final String type, final long addTruePos,
368             final long addFalseNeg, final long addFalsePos)
369             throws IllegalArgumentException {
370         updateType(type, addTruePos, addFalseNeg, addFalsePos);
371         all.update(addTruePos, addFalseNeg, addFalsePos);
372     }
373 
374     /***
375      * Helper method that updates only the metrics of a specified type,
376      * but not the {@linkplain #all sums and averages}.
377      *
378      * @param type the type to update
379      * @param addTruePos the number of new true positives to add
380      * @param addFalseNeg the number of new false negatives to add
381      * @param addFalsePos the number of new false positives to add
382      * @throws IllegalArgumentException f <code>type</code> is
383      * <code>null</code> or empty or equal to the reserved {@link #ALL} type or
384      * if at least one of the parameters is negative
385      */
386     private void updateType(final String type, final long addTruePos,
387             final long addFalseNeg, final long addFalsePos)
388             throws IllegalArgumentException {
389         // update metrics for this type + for all types
390         final FMetrics metrics = lookupType(type);
391         metrics.update(addTruePos, addFalseNeg, addFalsePos);
392     }
393 
394     /***
395      * Returns a read-only view of the {@link FMetrics} of the specified type.
396      * This is not a snapshot but will change whenever the underlying counts
397      * are changed.
398      *
399      * @param type the type to check
400      * @return a view of the counts and evaluation metrics for the given type;
401      * or <code>null</code> if no metrics exist for the given type
402      */
403     public FMetricsView view(final String type) {
404         return typedMetrics.get(type);
405     }
406 
407     /***
408      * Returns a read-only view of the {@link FMetrics} containing the sums and
409      * averages over all types. This is not a snapshot but will change whenever
410      * the underlying counts are changed.
411      *
412      * @return a view of the sums and averages over all types
413      */
414     public FMetricsView viewAll() {
415         return all;
416     }
417 
418     /***
419      * Optional operation that shows {@link FMetricsSummary statistical
420      * summaries of precision, recall, and F1 metrics} over all types,
421      * if {@linkplain #isCalculatingSummaries() calculated}. This is not a
422      * snapshot but will change whenever the underlying values are changed.
423      *
424      * @return a statistical summery over the metrics of all types, if
425      * calculated
426      * @throws UnsupportedOperationException if
427      * {@link #isCalculatingSummaries()} is <code>false</code>
428      */
429     public FMetricsSummary viewAllSummary()
430             throws UnsupportedOperationException {
431         if (calculatingSummaries) {
432             // thanks to the factory method we know that this interface is
433             // implemented
434             return (FMetricsSummary) all;
435         } else {
436             throw new UnsupportedOperationException("No summaries calculated");
437         }
438     }
439 
440     /***
441      * Optional operation that shows {@link FMetricsSummary statistical
442      * summaries of precision, recall, and F1 metrics} of the specified type,
443      * if {@linkplain #isCalculatingSummaries() calculated}. This is not a
444      * snapshot but will change whenever the underlying values are changed.
445      *
446      * @param type the type to check
447      * @return a statistical summery over the metrics of the given type, if
448      * calculated
449      * @throws UnsupportedOperationException if
450      * {@link #isCalculatingSummaries()} is <code>false</code>
451      */
452     public FMetricsSummary viewSummary(final String type)
453             throws UnsupportedOperationException {
454         if (calculatingSummaries) {
455             // thanks to the factory method we know that this interface is
456             // implemented
457             return (FMetricsSummary) typedMetrics.get(type);
458         } else {
459             throw new UnsupportedOperationException("No summaries calculated");
460         }
461     }
462 
463 }