View Javadoc

1   /*
2    * Copyright (C) 2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.io.IOException;
25  import java.io.Writer;
26  import java.util.ArrayList;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.SortedSet;
30  
31  import org.apache.commons.collections.SortedBag;
32  import org.apache.commons.collections.bag.TreeBag;
33  import org.apache.commons.collections.bag.UnmodifiableSortedBag;
34  import org.apache.commons.lang.builder.ToStringBuilder;
35  
36  import de.fu_berlin.ties.eval.Mistake.MistakeTypes;
37  import de.fu_berlin.ties.text.TextUtils;
38  
39  /***
40   * Stores the results of a mistake analysis performed by
41   * {@link de.fu_berlin.ties.eval.MistakeAnalyzer}.
42   *
43   * @author Christian Siefkes
44   * @version $Revision: 1.10 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
45   */
46  public class MistakeMatrix {
47  
48      /***
49       * The list of mistakes.
50       */
51      private final List<Mistake> mistakes = new ArrayList<Mistake>();
52  
53      /***
54       * A count of the different types of {@link Mistake.MistakeTypes} that
55       * occurred.
56       */
57      private final SortedBag mistakeCount = new TreeBag();
58  
59      /***
60       * A count of the different mistakes combinations that occurred, using the
61       * {@link Mistake#flatten(SortedSet) flattened} string representation.
62       */
63      private final SortedBag mistakeCombinationCount = new TreeBag();
64  
65      /***
66       * A confusion matrix that counts how often each type was confused with
67       * any other type. Stores strings containing the expected answer key
68       * and the encountered prediction type as returned by
69       * {@link Mistake#confusionType(de.fu_berlin.ties.extract.Extraction,
70       * de.fu_berlin.ties.extract.Extraction)}. This instance
71       * counts any confusion instances, no matter whether other mistakes
72       * (e.g. early or late start) occurred as well.
73       */
74      private final SortedBag roughConfusionMatrix = new TreeBag();
75  
76      /***
77       * A confusion matrix that counts how often each type was confused with
78       * any other type. Stores the strings as used by
79       * {@link #roughConfusionMatrix}. This instance only counts exact confusion
80       * instances, where the type confusion was the only mistake that occurred.
81       */
82      private final SortedBag exactConfusionMatrix = new TreeBag();
83  
84      /***
85       * Creates a new instance.
86       */
87      public MistakeMatrix() {
88          super();
89      }
90  
91      /***
92       * Adds a new mistake to this matrix.
93       *
94       * @param mistake the mistake
95       */
96      public void add(final Mistake mistake) {
97          mistakes.add(mistake);
98  
99          // update mistake counts
100         final SortedSet<MistakeTypes> mTypeSet = mistake.getMistakes();
101         final Iterator<MistakeTypes> mTypeIter = mTypeSet.iterator();
102 
103         while (mTypeIter.hasNext()) {
104             mistakeCount.add(mTypeIter.next());
105         }
106         mistakeCombinationCount.add(Mistake.flatten(mTypeSet));
107 
108         // update confusion matrices if appropriate
109         if (mTypeSet.contains(MistakeTypes.WrongType)) {
110             final String confusionType = Mistake.confusionType(
111                     mistake.getAnswerKey(), mistake.getPrediction());
112             roughConfusionMatrix.add(confusionType);
113 
114             if (mTypeSet.size() == 1) {
115                 // the type confusion was the only mistake that occurred
116                 exactConfusionMatrix.add(confusionType);
117             }
118         }
119     }
120 
121     /***
122      * Prints the list of mistakes encountered by this instance.
123      * Mistakes are printed in original order in the format returned by
124      * {@link Mistake#toString()}. They are grouped by source files -- each
125      * new source file is introduced by a line containing the file name
126      * preceded and followed by "---".
127      *
128      * @param writer the writer to write to
129      * @throws IOException if an I/O error occurred
130      */
131     public void printMistakes(final Writer writer) throws IOException {
132         final Iterator<Mistake> mistakeIter = mistakes.iterator();
133         Mistake mistake;
134         String source;
135         String lastSource = null;
136 
137         // iterate and print all mistakes
138         while (mistakeIter.hasNext()) {
139             mistake = mistakeIter.next();
140             source = mistake.getSource();
141 
142             // print line identifying new source file if necessary
143             if (!source.equals(lastSource)) {
144                 // mistake belongs to a different source file than the last one
145                 TextUtils.writeln(writer, "--- " + source + " ---");
146                 lastSource = source;
147             }
148 
149             // print the mistake itself
150             TextUtils.writeln(writer, mistake.toString());
151         }
152     }
153 
154     /***
155      * Returns a compact string representation of this object.
156      *
157      * @return a string representation of this object
158      */
159     public String toString() {
160         return new ToStringBuilder(this)
161             .append("number of mistakes", mistakes.size())
162             .append("size of confusion matrix",
163                     roughConfusionMatrix.uniqueSet().size())
164             .toString();
165     }
166 
167     /***
168      * Returns a confusion matrix that counts how often each type was confused
169      * with any other type. Stores the strings as used by
170      * {@link #roughConfusionMatrix}. This instance only counts exact confusion
171      * instances, where the type confusion was the only mistake that occurred.
172      *
173      * @return a read-only view of the attribute
174      */
175     public UnmodifiableSortedBag viewExactConfusionMatrix() {
176         return (UnmodifiableSortedBag)
177                 UnmodifiableSortedBag.decorate(exactConfusionMatrix);
178     }
179 
180     /***
181      * Returns a count of the different mistakes combinations that occurred,
182      * using the {@link Mistake#flatten(SortedSet) flattened} string
183      * representation.
184      *
185      * @return a read-only view of the attribute
186      */
187     public UnmodifiableSortedBag viewMistakeCombinationCount() {
188         return (UnmodifiableSortedBag)
189                 UnmodifiableSortedBag.decorate(mistakeCombinationCount);
190     }
191 
192     /***
193      * Returns a count of the different types of {@link Mistake.MistakeTypes}
194      * that occurred.
195      *
196      * @return a read-only view of the attribute
197      */
198     public UnmodifiableSortedBag viewMistakeCount() {
199         return (UnmodifiableSortedBag)
200                 UnmodifiableSortedBag.decorate(mistakeCount);
201     }
202 
203     /***
204      * Returns A confusion matrix that counts how often each type was confused
205      * with any other type. Stores strings containing the expected answer key
206      * and the encountered prediction type as returned by
207      * {@link Mistake#confusionType(de.fu_berlin.ties.extract.Extraction,
208      * de.fu_berlin.ties.extract.Extraction)}. This instance
209      * counts any confusion instances, no matter whether other mistakes
210      * (e.g. early or late start) occurred as well.
211      *
212      * @return a read-only view of the attribute
213      */
214     public UnmodifiableSortedBag viewRoughConfusionMatrix() {
215         return (UnmodifiableSortedBag)
216                 UnmodifiableSortedBag.decorate(roughConfusionMatrix);
217     }
218 
219 }