View Javadoc

1   /*
2    * Copyright (C) 2003-2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.classify;
23  
24  import java.util.Iterator;
25  import java.util.LinkedList;
26  
27  import org.apache.commons.lang.builder.EqualsBuilder;
28  import org.apache.commons.lang.builder.HashCodeBuilder;
29  
30  import de.fu_berlin.ties.eval.EvalStatus;
31  import de.fu_berlin.ties.io.BaseStorable;
32  import de.fu_berlin.ties.io.FieldMap;
33  import de.fu_berlin.ties.util.Util;
34  
35  /***
36   * A prediction, wrapping the predicted class and the probability of the
37   * prediction.
38   *
39   * <p>Instances of this class are not thread-safe and must be synchronized
40   * externally, if required.
41   *
42   * @author Christian Siefkes
43   * @version $Revision: 1.22 $, $Date: 2004/11/25 13:35:55 $, $Author: siefkes $
44   */
45  public class Prediction extends BaseStorable {
46  
47      /***
48       * Serialization key for the predicted class.
49       */
50      public static final String KEY_TYPE = "Type";
51  
52      /***
53       * Serialization key for the source identifier.
54       */
55      public static final String KEY_SOURCE = "Source";
56  
57      /***
58       * Serialization key for the probability.
59       */
60      public static final String KEY_PROB = "Probability";
61  
62      /***
63       * Serialization key for the pR.
64       */
65      public static final String KEY_PR = "pR";
66  
67      /***
68       * The predicted type.
69       */
70      private final String type;
71  
72      /***
73       * The probabilities combinied of the prediction.
74       */
75      private LinkedList<Probability> probs = new LinkedList<Probability>();
76  
77      /***
78       * The {@linkplain EvalStatus evaluation status} of this instance.
79       */
80      private EvalStatus evalStatus;
81  
82      /***
83       * An optional identifier of the source of this prediction (e.g., the file
84       * name).
85       */
86      private String source;
87  
88      /***
89       * Creates a new instance from a field map, fulfilling the
90       * {@link de.fu_berlin.ties.io.Storable} contract.
91       *
92       * @param fieldMap map containing the serialized fields
93       */
94      public Prediction(final FieldMap fieldMap) {
95          this(Util.asString(fieldMap.get(KEY_TYPE)),
96              Util.asString(fieldMap.get(KEY_SOURCE)),
97              new Probability(
98                      // this will set -1 (confirmed) if the value is missing
99                      Util.asDouble(fieldMap.get(KEY_PROB)),
100                     // pR default to not-a-number
101                     fieldMap.containsKey(KEY_PR)
102                         ? Util.asDouble(fieldMap.get(KEY_PR)) : Double.NaN),
103             // this will use UNKNOWN if the field is missing or invalid
104             EvalStatus.parse(Util.asString(fieldMap.get(
105                     EvalStatus.KEY_EVAL_STATUS)), true));
106     }
107 
108     /***
109      * Creates a new instance, setting the evaluation status to
110      * {@link EvalStatus#UNKNOWN}.
111      *
112      * @param predicted the predicted type
113      * @param prob the probability of the prediction
114      */
115     public Prediction(final String predicted, final Probability prob) {
116         this(predicted, prob, EvalStatus.UNKNOWN);
117     }
118 
119     /***
120      * Creates a new instance.
121      *
122      * @param predicted the predicted type
123      * @param prob the probability of the prediction
124      * @param status the {@linkplain EvalStatus evaluation status} of this
125      * instance
126      */
127     public Prediction(final String predicted, final Probability prob,
128             final EvalStatus status) {
129         this(predicted, null, prob, status);
130     }
131 
132     /***
133      * Creates a new instance.
134      *
135      * @param predicted the predicted type
136      * @param sourceID an identifier of the source of this prediction (e.g., the
137      * file name), might be <code>null</code>
138      * @param prob the probability of the prediction
139      * @param status the {@linkplain EvalStatus evaluation status} of this
140      * instance
141      */
142     public Prediction(final String predicted, final String sourceID,
143             final Probability prob, final EvalStatus status) {
144         super();
145         type = predicted;
146         source = sourceID;
147         probs.add(prob);
148         evalStatus = status;
149     }
150 
151     /***
152      * Adds a new probability for the prediction. The actual probabilities
153      * are averaged over the combined probabilities.
154      *
155      * @param prob the new probability; might be <code>null</code>
156      * if the overall probability of the extraction should not be changed
157      * @param atEnd whether to add the new probability at the end or at the
158      * start
159      */
160     protected void addProb(final Probability prob, final boolean atEnd) {
161         if (atEnd) {
162             probs.addLast(prob);
163         } else {
164             probs.addFirst(prob);
165         }
166     }
167 
168     /***
169      * Indicates whether some other object is "equal to" this one, fulfulling
170      * the {@link Object#equals(java.lang.Object)} contract.
171      * {@linkplain #getEvalStatus() Evaluation status} and
172      * {@linkplain #getProbability() probability}
173      * are ignored when checking equality, only {@linkplain #getType() type}
174      * and {@linkplain #getSource() source} are compared.
175      *
176      * @param obj the reference object with which to compare
177      * @return <code>true</code> iff the specified object is a
178      * {@link Prediction} equal to this instance
179      */
180     public boolean equals(final Object obj) {
181         if (obj == this) {
182             return true;
183         } else if ((obj != null) && (getClass().equals(obj.getClass()))) {
184             // used getClass instead of instanceof because otherwise subclasses
185             // with additional fields would break the contract
186             final Prediction other = (Prediction) obj;
187             return new EqualsBuilder()
188                 .append(getType(), other.getType())
189                 .append(getSource(), other.getSource())
190                 .isEquals();
191         } else {
192             return false;
193         }
194     }
195 
196     /***
197      * Returns the {@linkplain EvalStatus evaluation status} of this instance.
198      * @return the value of the attribute
199      */
200     public EvalStatus getEvalStatus() {
201         return evalStatus;
202     }
203 
204     /***
205      * Calculates and returns the average probability.
206      *
207      * @return the average probability
208      */
209     public Probability getProbability() {
210         if (probs.size() == 1) {
211             return probs.getFirst();
212         } else {
213             // calculate average
214             double summedProb = 0.0;
215             double summedPR = 0.0;
216             int nonNullProbs = 0;
217             Probability currentProb;
218             final Iterator<Probability> probIter = probs.iterator();
219 
220             while (probIter.hasNext()) {
221                 currentProb = probIter.next();
222 
223                 // jump over probabilities that are null
224                 if (currentProb != null) {
225                     nonNullProbs++;
226 
227                     // using logarithm to multiply prob.s instead of adding
228                     summedProb += Math.log(currentProb.getProb());
229 
230                      if (!Double.isNaN(summedPR)) {
231                         if (Double.isNaN(currentProb.getPR())) {
232                             // set PR NaN if there are one of more NaN values
233                             summedPR = Double.NaN;
234                         } else {
235                             summedPR += currentProb.getPR();
236                         }
237                     }
238                 }
239             } // while
240 
241             // using exp. to multiply prob.s instead of adding (undo log)
242             final double averageProb = Math.exp(summedProb / nonNullProbs);
243 
244             if (Double.isNaN(summedPR)) {
245                 return new Probability(averageProb);
246             } else {
247                 return new Probability(averageProb, summedPR / nonNullProbs);
248             }
249         }
250     }
251 
252     /***
253      * Returns the identifier of the source of this prediction (e.g., the file
254      * name).
255      *
256      * @return the value of the attribute; or <code>null</code> if none
257      * specified
258      */
259     public String getSource() {
260         return source;
261     }
262 
263     /***
264      * Returns the predicted type (class).
265      * @return the predicted type (class)
266      */
267     public String getType() {
268         return type;
269     }
270 
271     /***
272      * Returns a hash code value for this object, fulfulling the
273      * {@link Object#hashCode()} contract.
274      * @return a hash code value for this object
275      */
276     public int hashCode() {
277         // you pick two hard-coded, randomly chosen, non-zero, odd numbers
278         // (preferably primes); ideally different for each class
279         return new HashCodeBuilder(3, 5)
280             .append(getType())
281             .append(getSource())
282             .toHashCode();
283     }
284 
285     /***
286      * Returns the number of probabilities combined in this prediction.
287      *
288      * @return the number of probabilities
289      */
290     protected int probCount() {
291         return probs.size();
292     }
293 
294     /***
295      * Deletes one of the probabilities used for the prediction. At least one
296      * probability must always remain, i.e. {@link #probCount()} must be 2 or
297      * more prior to calling this method.
298      *
299      * @param atEnd whether to delete the first or the last probability
300      * @return the removed probability
301      * @throws IllegalStateException if there is only one probability left
302      */
303     protected Probability removeProb(final boolean atEnd)
304     throws IllegalStateException {
305         // check that at least one prob will remain
306         if (probs.size() < 2) {
307             throw new IllegalStateException(
308                     "Cannot delete the last remaining probabity");
309         }
310 
311         if (atEnd) {
312             return probs.removeLast();
313         } else {
314             return probs.removeFirst();
315         }
316     }
317 
318     /***
319      * Modifies the {@linkplain EvalStatus evaluation status} of this instance.
320      *
321      * @param newEvalStatus the new value of the attribute
322      */
323     public void setEvalStatus(final EvalStatus newEvalStatus) {
324         evalStatus = newEvalStatus;
325     }
326 
327     /***
328      * Sets the identifier of the source of this prediction (e.g., the file
329      * name).
330      *
331      * @param newSource the new value of the attribute; or <code>null</code> to
332      * delete the current value
333      */
334     public void setSource(final String newSource) {
335         source = newSource;
336     }
337 
338     /***
339      * Stores all relevant fields of this object in a field map for
340      * serialization. An equivalent object can be created by calling
341      * {@link de.fu_berlin.ties.io.FieldMap#createObject(Class)} on the created
342      * field map.
343      *
344      * @return the created field map
345      */
346     public FieldMap storeFields() {
347         final FieldMap result = new FieldMap();
348         result.put(KEY_TYPE, getType());
349 
350         // omit source if null
351         if (source != null) {
352             result.put(KEY_SOURCE, getSource());
353         }
354 
355         // omit probability if negative, i.e. for confirmed values/answer keys
356         final Probability prob = getProbability();
357         if (prob.getProb() >= 0) {
358             result.put(KEY_PROB, new Double(prob.getProb()));
359         }
360 
361         // omit pR if not-a-number, i.e. unknown
362         final double pR = prob.getPR();
363         if (!Double.isNaN(pR)) {
364             result.put(KEY_PR, new Double(pR));
365         }
366 
367         // omit evaluation status if unknown
368         if (evalStatus != EvalStatus.UNKNOWN) {
369             result.put(EvalStatus.KEY_EVAL_STATUS, getEvalStatus());
370         }
371         return result;
372     }
373 
374 }