View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.classify;
23  
24  import java.util.HashMap;
25  import java.util.Iterator;
26  import java.util.Map;
27  
28  import org.apache.commons.configuration.Configuration;
29  import org.apache.commons.lang.builder.ToStringBuilder;
30  
31  import de.fu_berlin.ties.TiesConfiguration;
32  import de.fu_berlin.ties.util.Util;
33  
34  /***
35   * Reranks the predictions in a distribution by multiplying the {@linkplain
36   * de.fu_berlin.ties.classify.Prediction#getProbability() probabilities} of each
37   * of them with a bias, if specified for the type of the prediction.
38   * 
39   * @author Christian Siefkes
40   * @version $Revision: 1.5 $, $Date: 2004/11/19 14:04:19 $, $Author: siefkes $
41   */
42  public class Reranker {
43  
44      /***
45       * The map of biases used to rerank predictions. Keys are
46       * {@linkplain Prediction#getType() prediction types} (Strings); values are
47       * biases (Doubles). Predictions of types not contained in this map are left
48       * unchanged.
49       */
50      private final Map<String, Double> biasMap;
51  
52      /***
53       * Creates a new instance.
54       * 
55       * @param config the biases are read from keys starting with "bias" in
56       * this configuration (<code>rerank.classname = bias</code>, e.g.
57       * <code>rerank.spam = 0.9</code>)
58       */
59      public Reranker(final Configuration config) {
60          super();
61          biasMap = new HashMap<String, Double>();
62          final Configuration biasSubset = config.subset("bias");
63          final Iterator biasKeys = biasSubset.getKeys();
64          String type;
65          Object rawValue;
66          double value;
67  
68          // populate map from subset of configuration
69          while (biasKeys.hasNext()) {
70              type = (String) biasKeys.next();
71              rawValue = biasSubset.getProperty(type);
72  
73              // ignore empty/null values
74              if (!TiesConfiguration.propertyIsEmpty(rawValue)) {
75                  value = Util.asDouble(rawValue);
76                  biasMap.put(type, new Double(value));
77                  Util.LOG.debug("Reranker: Bias of " + type + " set to "
78                          + value);
79              }
80          }
81      }
82  
83      /***
84       * Creates a new instance.
85       * 
86       * @param biases a map from {@linkplain Prediction#getType() prediction
87       * types} (Strings) to biases (Doubles); predictions of types not contained
88       * in this map are left unchanged
89       */
90      public Reranker(final Map<String, Double> biases) {
91          super();
92          biasMap = biases;
93      }
94  
95      /***
96       * Reranks the predictions in a distribution.
97       *
98       * <p><em>The reranked probability estimates are not renormalized, so the
99       * result will <strong>not</strong> be a real probability distribution
100      * because sum of all probabilities will typically be different from 1.
101      * If you want to work on a real probability distribution you have to
102      * normalize it yourself.</em>
103      * 
104      * @param origDist the prediction distribution to rerank
105      * @return a new prediction distribution containing the reranked
106      * predictions; if no biases are defined at all (bias map is empty), the
107      * original distribution is returned
108      */
109     public PredictionDistribution rerank(
110             final PredictionDistribution origDist) {
111         if (biasMap.isEmpty()) {
112             return origDist;
113         } else {
114             final PredictionDistribution result = new PredictionDistribution();
115             final Iterator origIter = origDist.iterator();
116             Prediction origPred, newPred;
117             String type;
118             Probability origProbability;
119             double origProb, origPR;
120             double newProb, newPR;
121             double bias;
122 
123             while (origIter.hasNext()) {
124                 origPred = (Prediction) origIter.next();
125                 type = origPred.getType();
126 
127                 if (biasMap.containsKey(type)) {
128                     // multiply pR and prob. with bias
129                     bias = biasMap.get(type).doubleValue();
130                     origProbability = origPred.getProbability();
131                     origProb = origProbability.getProb();
132                     origPR = origProbability.getPR();
133 
134                     // change probability only if given (non-negative)
135                     if (origProb >= 0) {
136                         newProb = origProb * bias;
137                     } else {
138                         newProb = origProb;
139                     }
140 
141                     // change PR only if given, not if it's NaN
142                     if (!Double.isNaN(origPR)) {
143                         newPR = origPR * bias;
144                     } else {
145                         newPR = origPR;
146                     }
147 
148                     newPred = new Prediction(type, origPred.getSource(),
149                             new Probability(newProb, newPR),
150                             origPred.getEvalStatus());
151                 } else {
152                     // keep prediction unchanged
153                     newPred = origPred;
154                 }
155 
156                 // predictions are automatically ordered by the distribution
157                 result.add(newPred);
158             }
159 
160             return result;
161         }
162     }
163 
164     /***
165      * Returns a string representation of this object.
166      * 
167      * @return a textual representation
168      */
169     public String toString() {
170         return new ToStringBuilder(this).append(biasMap).toString();
171     }
172 
173 }