1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.extract.reestimate;
23
24 import org.apache.commons.lang.StringUtils;
25 import org.apache.commons.lang.builder.ToStringBuilder;
26
27 import de.fu_berlin.ties.ProcessingException;
28 import de.fu_berlin.ties.TiesConfiguration;
29 import de.fu_berlin.ties.context.ContextDetails;
30 import de.fu_berlin.ties.extract.Extraction;
31 import de.fu_berlin.ties.util.Util;
32
33 /***
34 * A re-estimator recalculates the probabilites of extractions in a suitable
35 * way. Re-estimators can be <em>chained</em> -- in this case each estimator
36 * will work on the extractions returned by the previous one.
37 *
38 * @author Christian Siefkes
39 * @version $Revision: 1.8 $, $Date: 2006/10/21 16:04:17 $, $Author: siefkes $
40 */
41 public abstract class Reestimator {
42
43 /***
44 * Configuration key specifying the list of re-estimators to chain.
45 */
46 public static final String CONFIG_REESTIMATORS = "reestimator.chain";
47
48 /***
49 * Factory method that delegates to {@link #createReestimators()} using
50 * the {@linkplain TiesConfiguration#CONF standard configuration}.
51 *
52 * @return the last estimator in the created estimator chain; or
53 * <code>null</code> is no estimators are specified (parameter value is
54 * <code>null</code> or empty)
55 * @throws IllegalArgumentException if the value of the
56 * {@link #CONFIG_REESTIMATORS} key is invalid
57 * @throws ProcessingException if an error occurred while creating the
58 * re-estimator
59 */
60 public static Reestimator createReestimators()
61 throws IllegalArgumentException, ProcessingException {
62 return createReestimators(TiesConfiguration.CONF);
63 }
64
65 /***
66 * Factory method that creates a re-estimator chain based on the
67 * {@link #CONFIG_REESTIMATORS} key in the provided configuration.
68 * This parameter must contain a list fully specified names of subclasses
69 * of this class) used in a chain.
70 *
71 * <p>Eeach specified re-estimator must provide a constructor accepting a
72 * preceding {@link Reestimator} as first argument and a
73 * {@link TiesConfiguration} as second argument.
74 *
75 * @param config the configuration to use
76 * @return the last re-estimator in the created re-estimator chain; or
77 * <code>null</code> is no re-estimator are specified (parameter value is
78 * <code>null</code> or empty)
79 * @throws IllegalArgumentException if the value of the
80 * {@link #CONFIG_REESTIMATORS} key is invalid
81 * @throws ProcessingException if an error occurred while creating the
82 * re-estimator
83 */
84 public static Reestimator createReestimators(
85 final TiesConfiguration config)
86 throws IllegalArgumentException, ProcessingException {
87 final String[] paramValue = config.getStringArray(CONFIG_REESTIMATORS);
88 Reestimator estimator = null;
89 int i = 0;
90
91 try {
92 for (; i < paramValue.length; i++) {
93
94
95
96 if (StringUtils.isNotEmpty(paramValue[i])) {
97 estimator = (Reestimator) Util.createObject(
98 Class.forName(paramValue[i]),
99 new Object[] {estimator, config},
100 new Class[] {Reestimator.class,
101 TiesConfiguration.class});
102 }
103 }
104 } catch (ClassNotFoundException cnfe) {
105
106 throw new ProcessingException(
107 "Cannot create re-estimator chain from key "
108 + CONFIG_REESTIMATORS + " because the class " + paramValue[i]
109 + " is missing: " + cnfe.toString());
110 } catch (InstantiationException ie) {
111
112 throw new ProcessingException(
113 "Cannot create re-estimator chain from key "
114 + CONFIG_REESTIMATORS + " because instantation of the class "
115 + paramValue[i] + " failed: ", ie);
116 }
117
118
119 return estimator;
120 }
121
122 /***
123 * The preceding re-estimator used if this re-estimator is part of a
124 * <em>chain</em>; <code>null</code> otherwise.
125 */
126 private final Reestimator precedingReestimator;
127
128 /***
129 * Creates a new instance.
130 *
131 * @param precReestimator the preceding re-estimator to use if this
132 * re-estimator is part of a <em>chain</em>; <code>null</code> otherwise
133 * @param config the configuration to use
134 */
135 public Reestimator(final Reestimator precReestimator,
136 final TiesConfiguration config) {
137 super();
138 precedingReestimator = precReestimator;
139 }
140
141 /***
142 * Re-estimates the probability of an extraction.
143 *
144 * @param extraction the extraction to re-estimate
145 * @return the re-estimated extraction; or <code>null</code> if the
146 * extraction should be deleted
147 */
148 protected abstract Extraction doReestimate(final Extraction extraction);
149
150 /***
151 * Trains this re-estimator on an extraction.
152 *
153 * @param extraction the extraction to train
154 */
155 protected abstract void doTrain(final Extraction extraction);
156
157 /***
158 * Returns the preceding re-estimator used if this re-estimator is part of a
159 * <em>chain</em>.
160 *
161 * @return the preceding re-estimator, if any; or <code>null</code> if this
162 * re-estimator is not part of a chain resp. is the first re-estimator in a
163 * chain
164 */
165 public Reestimator getPrecedingReestimator() {
166 return precedingReestimator;
167 }
168
169 /***
170 * Returns a string representation of this object.
171 *
172 * @return a textual representation
173 */
174 public String toString() {
175 final ToStringBuilder builder = new ToStringBuilder(this);
176
177 if (precedingReestimator != null) {
178 builder.append("preceding re-estimator", precedingReestimator);
179 }
180 return builder.toString();
181 }
182
183 /***
184 * Re-estimates the probability of an extraction. This method calls itself
185 * on the {@linkplain #getPrecedingReestimator() preceding re-estimator}
186 * (if any) prior to delegating to the abstract
187 * {@link #doReestimate} method.
188 *
189 * @param extraction the extraction to re-estimate
190 * @return the re-estimated extraction; or <code>null</code> if the
191 * extraction should be deleted
192 */
193 public Extraction reestimate(final Extraction extraction) {
194 final Extraction actualExt;
195
196
197 if (precedingReestimator != null) {
198 actualExt = precedingReestimator.reestimate(extraction);
199 } else {
200 actualExt = extraction;
201 }
202
203
204 if (actualExt == null) {
205 return null;
206 } else {
207 return doReestimate(actualExt);
208 }
209 }
210
211 /***
212 * Trains this re-estimator on an extraction. This method calls itself on
213 * the {@linkplain #getPrecedingReestimator() preceding re-estimator}
214 * (if any) prior to delegating to the abstract
215 * {@link #doTrain} method.
216 *
217 * @param extraction the extraction to train
218 */
219 public void train(final Extraction extraction) {
220
221 if (precedingReestimator != null) {
222 precedingReestimator.train(extraction);
223 }
224
225
226 doTrain(extraction);
227 }
228
229 /***
230 * Trains this re-estimator on a token that is <em>not</em>part of any
231 * extraction. The default implementation does nothing, so all
232 * non-extraction tokens are ignored. Subclasses that need a different
233 * behavior can override this method.
234 *
235 * @param tokenDetails a {@link ContextDetails} containing all
236 * relevant information on the token and its context
237 */
238 public void trainOtherToken(final ContextDetails tokenDetails) {
239
240 }
241
242 }