1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.io.IOException;
25 import java.io.Writer;
26 import java.util.ArrayList;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.SortedSet;
30
31 import org.apache.commons.collections.SortedBag;
32 import org.apache.commons.collections.bag.TreeBag;
33 import org.apache.commons.collections.bag.UnmodifiableSortedBag;
34 import org.apache.commons.lang.builder.ToStringBuilder;
35
36 import de.fu_berlin.ties.eval.Mistake.MistakeTypes;
37 import de.fu_berlin.ties.text.TextUtils;
38
39 /***
40 * Stores the results of a mistake analysis performed by
41 * {@link de.fu_berlin.ties.eval.MistakeAnalyzer}.
42 *
43 * @author Christian Siefkes
44 * @version $Revision: 1.10 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
45 */
46 public class MistakeMatrix {
47
48 /***
49 * The list of mistakes.
50 */
51 private final List<Mistake> mistakes = new ArrayList<Mistake>();
52
53 /***
54 * A count of the different types of {@link Mistake.MistakeTypes} that
55 * occurred.
56 */
57 private final SortedBag mistakeCount = new TreeBag();
58
59 /***
60 * A count of the different mistakes combinations that occurred, using the
61 * {@link Mistake#flatten(SortedSet) flattened} string representation.
62 */
63 private final SortedBag mistakeCombinationCount = new TreeBag();
64
65 /***
66 * A confusion matrix that counts how often each type was confused with
67 * any other type. Stores strings containing the expected answer key
68 * and the encountered prediction type as returned by
69 * {@link Mistake#confusionType(de.fu_berlin.ties.extract.Extraction,
70 * de.fu_berlin.ties.extract.Extraction)}. This instance
71 * counts any confusion instances, no matter whether other mistakes
72 * (e.g. early or late start) occurred as well.
73 */
74 private final SortedBag roughConfusionMatrix = new TreeBag();
75
76 /***
77 * A confusion matrix that counts how often each type was confused with
78 * any other type. Stores the strings as used by
79 * {@link #roughConfusionMatrix}. This instance only counts exact confusion
80 * instances, where the type confusion was the only mistake that occurred.
81 */
82 private final SortedBag exactConfusionMatrix = new TreeBag();
83
84 /***
85 * Creates a new instance.
86 */
87 public MistakeMatrix() {
88 super();
89 }
90
91 /***
92 * Adds a new mistake to this matrix.
93 *
94 * @param mistake the mistake
95 */
96 public void add(final Mistake mistake) {
97 mistakes.add(mistake);
98
99
100 final SortedSet<MistakeTypes> mTypeSet = mistake.getMistakes();
101 final Iterator<MistakeTypes> mTypeIter = mTypeSet.iterator();
102
103 while (mTypeIter.hasNext()) {
104 mistakeCount.add(mTypeIter.next());
105 }
106 mistakeCombinationCount.add(Mistake.flatten(mTypeSet));
107
108
109 if (mTypeSet.contains(MistakeTypes.WrongType)) {
110 final String confusionType = Mistake.confusionType(
111 mistake.getAnswerKey(), mistake.getPrediction());
112 roughConfusionMatrix.add(confusionType);
113
114 if (mTypeSet.size() == 1) {
115
116 exactConfusionMatrix.add(confusionType);
117 }
118 }
119 }
120
121 /***
122 * Prints the list of mistakes encountered by this instance.
123 * Mistakes are printed in original order in the format returned by
124 * {@link Mistake#toString()}. They are grouped by source files -- each
125 * new source file is introduced by a line containing the file name
126 * preceded and followed by "---".
127 *
128 * @param writer the writer to write to
129 * @throws IOException if an I/O error occurred
130 */
131 public void printMistakes(final Writer writer) throws IOException {
132 final Iterator<Mistake> mistakeIter = mistakes.iterator();
133 Mistake mistake;
134 String source;
135 String lastSource = null;
136
137
138 while (mistakeIter.hasNext()) {
139 mistake = mistakeIter.next();
140 source = mistake.getSource();
141
142
143 if (!source.equals(lastSource)) {
144
145 TextUtils.writeln(writer, "--- " + source + " ---");
146 lastSource = source;
147 }
148
149
150 TextUtils.writeln(writer, mistake.toString());
151 }
152 }
153
154 /***
155 * Returns a compact string representation of this object.
156 *
157 * @return a string representation of this object
158 */
159 public String toString() {
160 return new ToStringBuilder(this)
161 .append("number of mistakes", mistakes.size())
162 .append("size of confusion matrix",
163 roughConfusionMatrix.uniqueSet().size())
164 .toString();
165 }
166
167 /***
168 * Returns a confusion matrix that counts how often each type was confused
169 * with any other type. Stores the strings as used by
170 * {@link #roughConfusionMatrix}. This instance only counts exact confusion
171 * instances, where the type confusion was the only mistake that occurred.
172 *
173 * @return a read-only view of the attribute
174 */
175 public UnmodifiableSortedBag viewExactConfusionMatrix() {
176 return (UnmodifiableSortedBag)
177 UnmodifiableSortedBag.decorate(exactConfusionMatrix);
178 }
179
180 /***
181 * Returns a count of the different mistakes combinations that occurred,
182 * using the {@link Mistake#flatten(SortedSet) flattened} string
183 * representation.
184 *
185 * @return a read-only view of the attribute
186 */
187 public UnmodifiableSortedBag viewMistakeCombinationCount() {
188 return (UnmodifiableSortedBag)
189 UnmodifiableSortedBag.decorate(mistakeCombinationCount);
190 }
191
192 /***
193 * Returns a count of the different types of {@link Mistake.MistakeTypes}
194 * that occurred.
195 *
196 * @return a read-only view of the attribute
197 */
198 public UnmodifiableSortedBag viewMistakeCount() {
199 return (UnmodifiableSortedBag)
200 UnmodifiableSortedBag.decorate(mistakeCount);
201 }
202
203 /***
204 * Returns A confusion matrix that counts how often each type was confused
205 * with any other type. Stores strings containing the expected answer key
206 * and the encountered prediction type as returned by
207 * {@link Mistake#confusionType(de.fu_berlin.ties.extract.Extraction,
208 * de.fu_berlin.ties.extract.Extraction)}. This instance
209 * counts any confusion instances, no matter whether other mistakes
210 * (e.g. early or late start) occurred as well.
211 *
212 * @return a read-only view of the attribute
213 */
214 public UnmodifiableSortedBag viewRoughConfusionMatrix() {
215 return (UnmodifiableSortedBag)
216 UnmodifiableSortedBag.decorate(roughConfusionMatrix);
217 }
218
219 }