View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.extract.reestimate;
23  
24  import org.apache.commons.lang.StringUtils;
25  import org.apache.commons.lang.builder.ToStringBuilder;
26  
27  import de.fu_berlin.ties.ProcessingException;
28  import de.fu_berlin.ties.TiesConfiguration;
29  import de.fu_berlin.ties.context.ContextDetails;
30  import de.fu_berlin.ties.extract.Extraction;
31  import de.fu_berlin.ties.util.Util;
32  
33  /***
34   * A re-estimator recalculates the probabilites of extractions in a suitable
35   * way. Re-estimators can be <em>chained</em> -- in this case each estimator
36   * will work on the extractions returned by the previous one.
37   *
38   * @author Christian Siefkes
39   * @version $Revision: 1.8 $, $Date: 2006/10/21 16:04:17 $, $Author: siefkes $
40   */
41  public abstract class Reestimator {
42  
43      /***
44       * Configuration key specifying the list of re-estimators to chain.
45       */
46      public static final String CONFIG_REESTIMATORS = "reestimator.chain";
47  
48      /***
49       * Factory method that delegates to {@link #createReestimators()} using
50       * the {@linkplain TiesConfiguration#CONF standard configuration}.
51       *
52       * @return the last estimator in the created estimator chain; or
53       * <code>null</code> is no estimators are specified (parameter value is
54       * <code>null</code> or empty)
55       * @throws IllegalArgumentException if the value of the
56       * {@link #CONFIG_REESTIMATORS} key is invalid
57       * @throws ProcessingException if an error occurred while creating the
58       * re-estimator
59       */
60      public static Reestimator createReestimators()
61      throws IllegalArgumentException, ProcessingException {
62          return createReestimators(TiesConfiguration.CONF);
63      }
64  
65      /***
66       * Factory method that creates a re-estimator chain based on the
67       * {@link #CONFIG_REESTIMATORS} key in the provided configuration.
68       * This parameter must contain a list fully specified names of subclasses
69       * of this class) used in a chain.
70       *
71       * <p>Eeach specified re-estimator must provide a constructor accepting a
72       * preceding {@link Reestimator} as first argument and a
73       * {@link TiesConfiguration} as second argument.
74       *
75       * @param config the configuration to use
76       * @return the last re-estimator in the created re-estimator chain; or
77       * <code>null</code> is no re-estimator are specified (parameter value is
78       * <code>null</code> or empty)
79       * @throws IllegalArgumentException if the value of the
80       * {@link #CONFIG_REESTIMATORS} key is invalid
81       * @throws ProcessingException if an error occurred while creating the
82       * re-estimator
83       */
84      public static Reestimator createReestimators(
85              final TiesConfiguration config)
86      throws IllegalArgumentException, ProcessingException {
87          final String[] paramValue = config.getStringArray(CONFIG_REESTIMATORS);
88          Reestimator estimator = null;
89          int i = 0;
90  
91          try {
92              for (; i < paramValue.length; i++) {
93                  // initialize each of the specified re-estimators, handing over
94                  // the previous re-estimator as first single argument and the
95                  // TiesConfiguration as second argument
96                  if (StringUtils.isNotEmpty(paramValue[i])) {
97                      estimator = (Reestimator) Util.createObject(
98                          Class.forName(paramValue[i]),
99                          new Object[] {estimator, config},
100                         new Class[] {Reestimator.class,
101                             TiesConfiguration.class});
102                 }
103             }
104         } catch (ClassNotFoundException cnfe) {
105              // convert and rethrow exception
106              throw new ProcessingException(
107                  "Cannot create re-estimator chain from key "
108                  + CONFIG_REESTIMATORS + " because the class " + paramValue[i]
109                  + " is missing: " + cnfe.toString());
110          } catch (InstantiationException ie) {
111              // convert and rethrow exception
112              throw new ProcessingException(
113                  "Cannot create re-estimator chain from key "
114                  + CONFIG_REESTIMATORS + " because instantation of the class "
115                  + paramValue[i] + " failed: ", ie);
116         }
117 
118         // return the last re-estimator in the chain (if any)
119         return estimator;
120     }
121 
122     /***
123      * The preceding re-estimator used if this re-estimator is part of a
124      * <em>chain</em>; <code>null</code> otherwise.
125      */
126     private final Reestimator precedingReestimator;
127 
128     /***
129      * Creates a new instance.
130      *
131      * @param precReestimator the preceding re-estimator to use if this
132      * re-estimator is part of a <em>chain</em>; <code>null</code> otherwise
133      * @param config the configuration to use
134      */
135     public Reestimator(final Reestimator precReestimator,
136             final TiesConfiguration config) {
137         super();
138         precedingReestimator = precReestimator;
139     }
140 
141     /***
142      * Re-estimates the probability of an extraction.
143      *
144      * @param extraction the extraction to re-estimate
145      * @return the re-estimated extraction; or <code>null</code> if the
146      * extraction should be deleted
147      */
148     protected abstract Extraction doReestimate(final Extraction extraction);
149 
150     /***
151      * Trains this re-estimator on an extraction.
152      *
153      * @param extraction the extraction to train
154      */
155     protected abstract void doTrain(final Extraction extraction);
156 
157     /***
158      * Returns the preceding re-estimator used if this re-estimator is part of a
159      * <em>chain</em>.
160      *
161      * @return the preceding re-estimator, if any; or <code>null</code> if this
162      * re-estimator is not part of a chain resp. is the first re-estimator in a
163      * chain
164      */
165     public Reestimator getPrecedingReestimator() {
166         return precedingReestimator;
167     }
168 
169     /***
170      * Returns a string representation of this object.
171      *
172      * @return a textual representation
173      */
174     public String toString() {
175         final ToStringBuilder builder =  new ToStringBuilder(this);
176 
177         if (precedingReestimator != null) {
178             builder.append("preceding re-estimator", precedingReestimator);
179         }
180         return builder.toString();
181     }
182 
183     /***
184      * Re-estimates the probability of an extraction. This method calls itself
185      * on the {@linkplain #getPrecedingReestimator() preceding re-estimator}
186      * (if any) prior to delegating to the abstract
187      * {@link #doReestimate} method.
188      *
189      * @param extraction the extraction to re-estimate
190      * @return the re-estimated extraction; or <code>null</code> if the
191      * extraction should be deleted
192      */
193     public Extraction reestimate(final Extraction extraction) {
194         final Extraction actualExt;
195 
196         // call preceding re-estimator, if present
197         if (precedingReestimator != null) {
198             actualExt = precedingReestimator.reestimate(extraction);
199         } else {
200             actualExt = extraction;
201         }
202 
203         // delegate to abstract method, unless extraction has been deleted
204         if (actualExt == null) {
205             return null;
206         } else {
207             return doReestimate(actualExt);
208         }
209     }
210 
211     /***
212      * Trains this re-estimator on an extraction. This method calls itself on
213      * the {@linkplain #getPrecedingReestimator() preceding re-estimator}
214      * (if any) prior to delegating to the abstract
215      * {@link #doTrain} method.
216      *
217      * @param extraction the extraction to train
218      */
219     public void train(final Extraction extraction) {
220         // call preceding re-estimator, if present
221         if (precedingReestimator != null) {
222             precedingReestimator.train(extraction);
223         }
224 
225         // delegate to abstract method
226         doTrain(extraction);
227     }
228 
229     /***
230      * Trains this re-estimator on a token that is <em>not</em>part of any
231      * extraction. The default implementation does nothing, so all
232      * non-extraction tokens are ignored. Subclasses that need a different
233      * behavior can override this method.
234      *
235      * @param tokenDetails a {@link ContextDetails} containing all
236      * relevant information on the token and its context
237      */
238     public void trainOtherToken(final ContextDetails tokenDetails) {
239         // can be overwritten by subclasses
240     }
241 
242 }