View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify;
23  
24  import java.util.HashMap;
25  import java.util.Iterator;
26  import java.util.Map;
27  
28  import org.apache.commons.configuration.Configuration;
29  import org.apache.commons.lang.builder.ToStringBuilder;
30  
31  import de.fu_berlin.ties.TiesConfiguration;
32  import de.fu_berlin.ties.util.Util;
33  
34  /***
35   * Reranks the predictions in a distribution by multiplying the {@linkplain
36   * de.fu_berlin.ties.classify.Prediction#getProbability() probabilities} of each
37   * of them with a bias, if specified for the type of the prediction.
38   * 
39   * @author Christian Siefkes
40   * @version $Revision: 1.11 $, $Date: 2006/10/21 16:03:55 $, $Author: siefkes $
41   */
42  public class Reranker {
43  
44      /***
45       * The map of biases used to rerank predictions. Keys are
46       * {@linkplain Prediction#getType() prediction types} (Strings); values are
47       * biases (Doubles). Predictions of types not contained in this map are left
48       * unchanged.
49       */
50      private final Map<String, Double> biasMap;
51  
52      /***
53       * Creates a new "dummy" instance instance that always returns the original
54       * prediction distribution.
55       */
56      public Reranker() {
57          // pass an empty map
58          this(new HashMap<String, Double>());
59      }
60  
61      /***
62       * Creates a new instance.
63       * 
64       * @param config the biases are read from keys starting with "bias" in
65       * this configuration (<code>bias.classname = bias</code>, e.g.
66       * <code>bias.spam = 0.9</code>)
67       */
68      public Reranker(final Configuration config) {
69          super();
70          biasMap = new HashMap<String, Double>();
71          final Configuration biasSubset = config.subset("bias");
72          final Iterator biasKeys = biasSubset.getKeys();
73          String type;
74          Object rawValue;
75          double value;
76  
77          // populate map from subset of configuration
78          while (biasKeys.hasNext()) {
79              type = (String) biasKeys.next();
80              rawValue = biasSubset.getProperty(type);
81  
82              // ignore empty/null values
83              if (!TiesConfiguration.propertyIsEmpty(rawValue)) {
84                  value = Util.asDouble(rawValue);
85                  biasMap.put(type, new Double(value));
86                  Util.LOG.debug("Reranker: Bias of " + type + " set to "
87                          + value);
88              }
89          }
90      }
91  
92      /***
93       * Creates a new instance.
94       * 
95       * @param biases a map from {@linkplain Prediction#getType() prediction
96       * types} (Strings) to biases (Doubles); predictions of types not contained
97       * in this map are left unchanged
98       */
99      public Reranker(final Map<String, Double> biases) {
100         super();
101         biasMap = biases;
102     }
103 
104     /***
105      * Reranks the predictions in a distribution.
106      *
107      * <p><em>The reranked probability estimates are not renormalized, so the
108      * result will <strong>not</strong> be a real probability distribution
109      * because sum of all probabilities will typically be different from 1.
110      * If you want to work on a real probability distribution you have to
111      * normalize it yourself.</em>
112      * 
113      * @param origDist the prediction distribution to rerank
114      * @return a new prediction distribution containing the reranked
115      * predictions; if no biases are defined at all (bias map is empty), the
116      * original distribution is returned
117      */
118     public PredictionDistribution rerank(
119             final PredictionDistribution origDist) {
120         if (biasMap.isEmpty()) {
121             return origDist;
122         } else {
123             final PredictionDistribution result = new PredictionDistribution();
124             final Iterator origIter = origDist.iterator();
125             Prediction origPred, newPred;
126             String type;
127             Probability origProbability;
128             double origProb, origPR;
129             double newProb, newPR;
130             double bias;
131 
132             while (origIter.hasNext()) {
133                 origPred = (Prediction) origIter.next();
134                 type = origPred.getType();
135 
136                 if (biasMap.containsKey(type)) {
137                     // multiply pR and prob. with bias
138                     bias = biasMap.get(type).doubleValue();
139                     origProbability = origPred.getProbability();
140                     origProb = origProbability.getProb();
141                     origPR = origProbability.getPR();
142 
143                     // change probability only if given (non-negative)
144                     if (origProb >= 0) {
145                         newProb = origProb * bias;
146                     } else {
147                         newProb = origProb;
148                     }
149 
150                     // change PR only if given, not if it's NaN
151                     if (!Double.isNaN(origPR)) {
152                         newPR = origPR * bias;
153                     } else {
154                         newPR = origPR;
155                     }
156 
157                     newPred = new Prediction(type, origPred.getSource(),
158                             new Probability(newProb, newPR),
159                             origPred.getEvalStatus());
160                 } else {
161                     // keep prediction unchanged
162                     newPred = origPred;
163                 }
164 
165                 // predictions are automatically ordered by the distribution
166                 result.add(newPred);
167             }
168 
169             return result;
170         }
171     }
172 
173     /***
174      * Returns a string representation of this object.
175      * 
176      * @return a textual representation
177      */
178     public String toString() {
179         return new ToStringBuilder(this).append(biasMap).toString();
180     }
181 
182 }