1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.util.Iterator;
25 import java.util.SortedSet;
26
27 import de.fu_berlin.ties.extract.Extraction;
28 import de.fu_berlin.ties.text.TextUtils;
29
30 /***
31 * Each instance of this class describe a mistake. They are used by the
32 * {@link de.fu_berlin.ties.eval.MistakeAnalyzer}.
33 *
34 * @author Christian Siefkes
35 * @version $Revision: 1.8 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
36 */
37 public class Mistake {
38
39 /***
40 * The types of mistakes that can occur.
41 */
42 public enum MistakeTypes {
43 /*** Answer key is completely missing. */
44 CompletelyMissing,
45 /*** Prediction is completely spurious. */
46 CompletelySpurious,
47 /*** Prediction ended earlier than the answer key. */
48 EarlyEnd,
49 /*** Prediction started earlier than the answer key. */
50 EarlyStart,
51 /***
52 * Prediction was ignored (there was another more likely prediction).
53 */
54 Ignored,
55 /*** Prediction ended after the answer key. */
56 LateEnd,
57 /*** Prediction started after the answer key. */
58 LateStart,
59 /*** Prediction and answer key are of different types. */
60 WrongType
61 }
62
63 /***
64 * Used in the confusion matrix to separated the (expected) answer key type
65 * from the actually encountered prediction type: {@value} (starts and ends
66 * with a space).
67 */
68 public static final String CONFUSION_SEPARATOR = " -> ";
69
70 /***
71 * The answer key involved in the mistake (might be <code>null</code>).
72 */
73 private final Extraction answerKey;
74
75 /***
76 * The prediction involved in the mistake (might be <code>null</code>).
77 */
78 private final Extraction prediction;
79
80 /***
81 * A set of the mistake types that occurred.
82 */
83 private final SortedSet<MistakeTypes> mistakes;
84
85 /***
86 * The {@linkplain de.fu_berlin.ties.classify.Prediction#getSource() source}
87 * document where this mistake occurred.
88 */
89 private final String source;
90
91
92 /***
93 * Creates a new instance. Either <code>myAnswerKey</code>
94 * or <code>myPrediction</code> might be <code>null</code>, but not both --
95 * if <code>myMistakes</code> contains a "WrongType" mistake, neither
96 * may be null.
97 *
98 * @param myAnswerKey the answer key involved in the mistake
99 * (might be <code>null</code>)
100 * @param myPrediction the prediction involved in the mistake
101 * (might be <code>null</code>
102 * @param myMistakes a set of the mistake types that occurred
103 * @param mySource the
104 * {@linkplain de.fu_berlin.ties.classify.Prediction#getSource() source}
105 * document where this mistake occurred
106 */
107 public Mistake(final Extraction myAnswerKey, final Extraction myPrediction,
108 final SortedSet<MistakeTypes> myMistakes, final String mySource) {
109 answerKey = myAnswerKey;
110 prediction = myPrediction;
111 mistakes = myMistakes;
112 source = mySource;
113 }
114
115
116 /***
117 * Flattens a set of mistake types into a string.
118 *
119 * @param mistakeSet the set of mistake types to flatten
120 * @return a flattened representation of the set
121 */
122 public static String flatten(final SortedSet<MistakeTypes> mistakeSet) {
123 final StringBuilder builder = new StringBuilder("[");
124 final Iterator<MistakeTypes> mistakeIter = mistakeSet.iterator();
125
126 while (mistakeIter.hasNext()) {
127 builder.append(mistakeIter.next().toString());
128
129 if (mistakeIter.hasNext()) {
130 builder.append("+");
131 }
132 }
133
134 builder.append("]");
135 return builder.toString();
136 }
137
138 /***
139 * Returns a string representing the the confusion between the types of
140 * an answer key. The string will contain the type of the answer key
141 * and the type of the prection, separated by {@link #CONFUSION_SEPARATOR}.
142 *
143 * @param ansKey the answer key
144 * @param pred the prediction
145 * @return a string representation as described above
146 */
147 public static String confusionType(final Extraction ansKey,
148 final Extraction pred) {
149 return ansKey.getType() + CONFUSION_SEPARATOR + pred.getType();
150 }
151
152 /***
153 * Returns the answer key involved in the mistake.
154 * @return the value of the attribute (might be <code>null</code>)
155 */
156 public Extraction getAnswerKey() {
157 return answerKey;
158 }
159
160 /***
161 * Helper method that returns the number of tokens in a extraction in a
162 * formatted string.
163 *
164 * @param ext the extraction to use
165 * @return a formatted string such as "5 tokens" or "1 token"
166 */
167 private String formatLength(final Extraction ext) {
168 final int length = ext.getLastIndex() - ext.getIndex() + 1;
169 if (length > 1) {
170 return length + " tokens";
171 } else {
172 return length + " token";
173 }
174 }
175
176 /***
177 * Returns the set of the mistake types that occurred.
178 * @return the value of the attribute
179 */
180 public SortedSet<MistakeTypes> getMistakes() {
181 return mistakes;
182 }
183
184 /***
185 * Returns prediction involved in the mistake.
186 * @return the value of the attribute (might be <code>null</code>)
187 */
188 public Extraction getPrediction() {
189 return prediction;
190 }
191
192 /***
193 * Returns the {@linkplain de.fu_berlin.ties.classify.Prediction#getSource()
194 * source} document where this mistake occurred.
195 *
196 * @return the value of the attribute
197 */
198 public String getSource() {
199 return source;
200 }
201
202 /***
203 * Returns a string representation of this object comprising 2 or 3
204 * lines. The first line contains the {@link #flatten(SortedSet) flattened}
205 * set of mistakes, followed by a space and the
206 * {@link #confusionType(Extraction, Extraction)} if a
207 * {@link MistakeTypes#WrongType} mistake occurred. The following line(s)
208 * contain the {@link #getAnswerKey() answer key} (prefixed by "Answer: ")
209 * and the {@link #getPrediction() prediction} (prefixed by "Prediction: ")
210 * if they are present (either of them must always be present, but not
211 * necessarily both). Answer keys and predictions are followed by their
212 * number of tokens in paranthesis (e.g. "(5 tokens)" or "(1 token)").
213 *
214 * @return a string representation of this object as described above
215 */
216 public String toString() {
217 final StringBuilder builder = new StringBuilder(flatten(mistakes));
218
219 if (mistakes.contains(MistakeTypes.WrongType)) {
220
221 builder.append(' ');
222 builder.append(confusionType(answerKey, prediction));
223 }
224
225
226 if (answerKey != null) {
227 builder.append(TextUtils.LINE_SEPARATOR);
228 builder.append("Answer: ");
229 builder.append(answerKey.toString());
230 builder.append(" (").append(formatLength(answerKey)).append(")");
231 }
232
233
234 if (prediction != null) {
235 builder.append(TextUtils.LINE_SEPARATOR);
236 builder.append("Prediction: ");
237 builder.append(prediction.toString());
238 builder.append(" (").append(formatLength(prediction)).append(")");
239 }
240
241 return builder.toString();
242 }
243
244 }