View Javadoc

1   /*
2    * Copyright (C) 2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.util.Iterator;
25  import java.util.SortedSet;
26  
27  import de.fu_berlin.ties.extract.Extraction;
28  import de.fu_berlin.ties.text.TextUtils;
29  
30  /***
31   * Each instance of this class describe a mistake. They are used by the
32   * {@link de.fu_berlin.ties.eval.MistakeAnalyzer}.
33   *
34   * @author Christian Siefkes
35   * @version $Revision: 1.8 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
36   */
37  public class Mistake {
38  
39      /***
40       * The types of mistakes that can occur.
41       */
42      public enum MistakeTypes {
43          /*** Answer key is completely missing. */
44          CompletelyMissing,
45          /*** Prediction is completely spurious. */
46          CompletelySpurious,
47          /*** Prediction ended earlier than the answer key. */
48          EarlyEnd,
49          /*** Prediction started earlier than the answer key. */
50          EarlyStart,
51          /***
52           * Prediction was ignored (there was another more likely prediction).
53           */
54          Ignored,
55          /*** Prediction ended after the answer key. */
56          LateEnd,
57          /*** Prediction started after the answer key. */
58          LateStart,
59          /*** Prediction and answer key are of different types. */
60          WrongType
61      }
62  
63      /***
64       * Used in the confusion matrix to separated the (expected) answer key type
65       * from the actually encountered prediction type: {@value} (starts and ends
66       * with a space).
67       */
68      public static final String CONFUSION_SEPARATOR = " -> ";
69  
70      /***
71       * The answer key involved in the mistake (might be <code>null</code>).
72       */
73      private final Extraction answerKey;
74  
75      /***
76       * The prediction involved in the mistake (might be <code>null</code>).
77       */
78      private final Extraction prediction;
79  
80      /***
81       * A set of the mistake types that occurred.
82       */
83      private final SortedSet<MistakeTypes> mistakes;
84  
85      /***
86       * The {@linkplain de.fu_berlin.ties.classify.Prediction#getSource() source}
87       * document where this mistake occurred.
88       */
89      private final String source;
90  
91  
92      /***
93       * Creates a new instance. Either <code>myAnswerKey</code>
94       * or <code>myPrediction</code> might be <code>null</code>, but not both --
95       * if <code>myMistakes</code> contains a "WrongType" mistake, neither
96       * may be null.
97       *
98       * @param myAnswerKey the answer key involved in the mistake
99       * (might be <code>null</code>)
100      * @param myPrediction the prediction involved in the mistake
101      * (might be <code>null</code>
102      * @param myMistakes a set of the mistake types that occurred
103      * @param mySource the
104      * {@linkplain de.fu_berlin.ties.classify.Prediction#getSource() source}
105      * document where this mistake occurred
106      */
107     public Mistake(final Extraction myAnswerKey, final Extraction myPrediction,
108             final SortedSet<MistakeTypes> myMistakes, final String mySource) {
109         answerKey = myAnswerKey;
110         prediction = myPrediction;
111         mistakes = myMistakes;
112         source = mySource;
113     }
114 
115 
116     /***
117      * Flattens a set of mistake types into a string.
118      *
119      * @param mistakeSet the set of mistake types to flatten
120      * @return a flattened representation of the set
121      */
122     public static String flatten(final SortedSet<MistakeTypes> mistakeSet) {
123         final StringBuilder builder = new StringBuilder("[");
124         final Iterator<MistakeTypes> mistakeIter = mistakeSet.iterator();
125 
126         while (mistakeIter.hasNext()) {
127             builder.append(mistakeIter.next().toString());
128 
129             if (mistakeIter.hasNext()) {
130                 builder.append("+");
131             }
132         }
133 
134         builder.append("]");
135         return builder.toString();
136     }
137 
138     /***
139      * Returns a string representing the the confusion between the types of
140      * an answer key. The string will contain the type of the answer key
141      * and the type of the prection, separated by {@link #CONFUSION_SEPARATOR}.
142      *
143      * @param ansKey the answer key
144      * @param pred the prediction
145      * @return a string representation as described above
146      */
147     public static String confusionType(final Extraction ansKey,
148             final Extraction pred) {
149         return ansKey.getType() + CONFUSION_SEPARATOR + pred.getType();
150     }
151 
152     /***
153      * Returns the answer key involved in the mistake.
154      * @return the value of the attribute (might be <code>null</code>)
155      */
156     public Extraction getAnswerKey() {
157         return answerKey;
158     }
159 
160     /***
161      * Helper method that returns the number of tokens in a extraction in a
162      * formatted string.
163      *
164      * @param ext the extraction to use
165      * @return a formatted string such as "5 tokens" or "1 token"
166      */
167     private String formatLength(final Extraction ext) {
168         final int length = ext.getLastIndex() - ext.getIndex() + 1;
169         if (length > 1) {
170             return length + " tokens";
171         } else {
172             return length + " token";
173         }
174     }
175 
176     /***
177      * Returns the set of the mistake types that occurred.
178      * @return the value of the attribute
179      */
180     public SortedSet<MistakeTypes> getMistakes() {
181         return mistakes;
182     }
183 
184     /***
185      * Returns prediction involved in the mistake.
186      * @return the value of the attribute (might be <code>null</code>)
187      */
188     public Extraction getPrediction() {
189         return prediction;
190     }
191 
192     /***
193      * Returns the {@linkplain de.fu_berlin.ties.classify.Prediction#getSource()
194      * source} document where this mistake occurred.
195      *
196      * @return the value of the attribute
197      */
198     public String getSource() {
199         return source;
200     }
201 
202     /***
203      * Returns a string representation of this object comprising 2 or 3
204      * lines. The first line contains the {@link #flatten(SortedSet) flattened}
205      * set of mistakes, followed by a space and the
206      * {@link #confusionType(Extraction, Extraction)} if a
207      * {@link MistakeTypes#WrongType} mistake occurred. The following line(s)
208      * contain the {@link #getAnswerKey() answer key} (prefixed by "Answer: ")
209      * and the {@link #getPrediction() prediction} (prefixed by "Prediction: ")
210      * if they are present (either of them must always be present, but not
211      * necessarily both). Answer keys and predictions are followed by their
212      * number of tokens in paranthesis (e.g. "(5 tokens)" or "(1 token)").
213      *
214      * @return a string representation of this object as described above
215      */
216     public String toString() {
217         final StringBuilder builder = new StringBuilder(flatten(mistakes));
218 
219         if (mistakes.contains(MistakeTypes.WrongType)) {
220             // type confusion: append confusion type
221             builder.append(' ');
222             builder.append(confusionType(answerKey, prediction));
223         }
224 
225         // append answer key if present
226         if (answerKey != null) {
227             builder.append(TextUtils.LINE_SEPARATOR);
228             builder.append("Answer: ");
229             builder.append(answerKey.toString());
230             builder.append(" (").append(formatLength(answerKey)).append(")");
231         }
232 
233         // append prediction if present
234         if (prediction != null) {
235             builder.append(TextUtils.LINE_SEPARATOR);
236             builder.append("Prediction: ");
237             builder.append(prediction.toString());
238             builder.append(" (").append(formatLength(prediction)).append(")");
239         }
240 
241         return builder.toString();
242     }
243 
244 }