1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Set;
25
26 import org.apache.commons.lang.StringUtils;
27 import org.apache.commons.lang.builder.ToStringBuilder;
28
29 import de.fu_berlin.ties.ContextMap;
30 import de.fu_berlin.ties.ProcessingException;
31 import de.fu_berlin.ties.TiesConfiguration;
32 import de.fu_berlin.ties.classify.PredictionDistribution;
33 import de.fu_berlin.ties.extract.amend.FinalReextractor;
34 import de.fu_berlin.ties.extract.reestimate.Reestimator;
35 import de.fu_berlin.ties.text.TokenDetails;
36 import de.fu_berlin.ties.util.Util;
37
38 /***
39 * A combination strategy combines sequential classifications. Instances of
40 * this class are not thread-safe. Subclasses must also provide a constructor
41 * that accepts a {@link java.util.Set} of valid class names as single argument;
42 * otherwise they cannot be loaded via reflection.
43 *
44 * @author Christian Siefkes
45 * @version $Revision: 1.26 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
46 */
47 public abstract class CombinationStrategy {
48
49 /***
50 * Configuration key specifying the combination strategy to use.
51 */
52 public static final String CONFIG_COMBINATION = "combination.strategy";
53
54 /***
55 * Factory method that delegates to
56 * {@link #createStrategy(Set, TiesConfiguration)} using the
57 * {@linkplain TiesConfiguration#CONF standard configuration}.
58 *
59 * @param classNames a set of valid class names (String)
60 * @return the created strategy
61 * @throws IllegalArgumentException if the value of the
62 * {@link #CONFIG_COMBINATION} key is missing or invalid
63 * @throws ProcessingException if an error occurred while creating the
64 * strategy
65 */
66 public static CombinationStrategy createStrategy(
67 final Set<String> classNames)
68 throws IllegalArgumentException, ProcessingException {
69 return createStrategy(classNames, TiesConfiguration.CONF);
70 }
71
72 /***
73 * Factory method that delegates to
74 * {@link #createStrategy(Set, String, TiesConfiguration)} using
75 * the {@link #CONFIG_COMBINATION} key in the provided configuration.
76 *
77 * @param classNames a set of valid class names (String)
78 * @param config the configuration to use
79 * @return the created strategy
80 * @throws IllegalArgumentException if the value of the
81 * {@link #CONFIG_COMBINATION} key is missing or invalid
82 * @throws ProcessingException if an error occurred while creating the
83 * strategy
84 */
85 public static CombinationStrategy createStrategy(
86 final Set<String> classNames, final TiesConfiguration config)
87 throws IllegalArgumentException, ProcessingException {
88 return createStrategy(classNames,
89 config.getString(CONFIG_COMBINATION, null), config);
90 }
91
92 /***
93 * Factory method that creates a combination strategy based on the
94 * provided <code>combinationName</code>.
95 *
96 * <p>Currently supported named:
97 * "BE" for {@link BeginEndStrategy},
98 * "BIA" for {@link BeginAfterStrategy},
99 * "BIE1" for {@link OpenCloseStrategy} with
100 * {@link OpenCloseStrategy#isUsingBE()} set to <code>false</code>,
101 * "BIE2" for {@link OpenCloseStrategy} with
102 * {@link OpenCloseStrategy#isUsingBE()} set to <code>true</code>,
103 * "IOB1" for {@link InsideOutsideStrategy} with
104 * {@link InsideOutsideStrategy#isBStartingAll()} set to <code>false</code>,
105 * "IOB2" for {@link InsideOutsideStrategy} with
106 * {@link InsideOutsideStrategy#isBStartingAll()} set to <code>true</code>;
107 * "Triv" for {@link TrivialStrategy}.
108 *
109 * <p>Otherwise the value must be the qualified name of a
110 * CombinationStrategy subclass accepting a {@link Set} (of valid class
111 * names) as first argument and a {@link TiesConfiguration} as second
112 * argument.
113 *
114 * @param classNames a set of valid class names (String)
115 * @param combinationName the name of the configuration to use, as
116 * explained above
117 * @param config used to configure the combination strategy
118 * @return the created strategy
119 * @throws IllegalArgumentException if the <code>combinationName</code>
120 * is <code>null</code> or invalid
121 * @throws ProcessingException if an error occurred while creating the
122 * strategy
123 */
124 public static CombinationStrategy createStrategy(
125 final Set<String> classNames, final String combinationName,
126 final TiesConfiguration config)
127 throws IllegalArgumentException, ProcessingException {
128 if (StringUtils.isEmpty(combinationName)) {
129 throw new IllegalArgumentException(
130 "Cannot create combination strategy -- no name given");
131 }
132
133 final String upperValue = combinationName.toUpperCase();
134 final CombinationStrategy result;
135
136 if ("BE".equals(upperValue)) {
137
138 result = new BeginEndStrategy(classNames, config);
139 } else if ("BIA".equals(upperValue)) {
140
141 result = new BeginAfterStrategy(classNames);
142 } else if ("BIE1".equals(upperValue)) {
143
144 result = new OpenCloseStrategy(classNames, false);
145 } else if ("BIE".equals(upperValue) || "BIE2".equals(upperValue)) {
146
147 result = new OpenCloseStrategy(classNames, true);
148 } else if ("IOB1".equals(upperValue)) {
149
150 result = new InsideOutsideStrategy(classNames, false);
151 } else if ("IOB2".equals(upperValue)) {
152
153 result = new InsideOutsideStrategy(classNames, true);
154 } else if ("TRIV".equals(upperValue)) {
155
156 result = new TrivialStrategy(classNames);
157 } else {
158
159
160
161 try {
162 result = (CombinationStrategy) Util.createObject(
163 Class.forName(combinationName),
164 new Object[] {classNames, config},
165 new Class[] {Set.class, TiesConfiguration.class});
166 } catch (ClassNotFoundException cnfe) {
167
168 throw new ProcessingException(
169 "Cannot create combination strategy from key "
170 + CONFIG_COMBINATION + "=" + combinationName,
171 cnfe);
172 } catch (InstantiationException ie) {
173
174 throw new ProcessingException(
175 "Cannot create combination strategy from key "
176 + CONFIG_COMBINATION + "=" + combinationName,
177 ie);
178 }
179 }
180 return result;
181 }
182
183 /***
184 * The current state (state of the last item); initially set via
185 * {@link #reset()}.
186 */
187 private CombinationState state = CombinationState.OUTSIDE;
188
189 /***
190 * The array of valid class names (Strings).
191 */
192 private final Set<String> validClasses;
193
194 /***
195 * Creates a new instance.
196 * @param theClasses a set of valid class names (String)
197 */
198 public CombinationStrategy(final Set<String> theClasses) {
199 super();
200 validClasses = theClasses;
201 state = CombinationState.OUTSIDE;
202 }
203
204 /***
205 * Builds a set array of class names (Strings) to pass to each classifier
206 * to consider for the next decision. Must be a subset of
207 * {@link #allClasses()}.
208 *
209 * @return a set of class names for each classifier
210 */
211 public abstract Set[] activeClasses();
212
213 /***
214 * Returns an array of immutable sets of all classes (Strings) that can
215 * possible occur during classification for each classifier.
216 * The "background" class (outside any extractions) should be the first
217 * member of each set.
218 *
219 * @return an array of immutable sets of class names
220 */
221 public abstract Set[] allClasses();
222
223 /***
224 * Thie method can be overwritten to pass objects to a
225 * {@link FinalReextractor}. The default implementation simply returns
226 * <code>null</code>. If you overwrite this method, you should typically
227 * overwrite the {@link #initReextractor(Reestimator)} method as well.
228 *
229 * @return a map of context objects for the re-extractor,
230 * or <code>null</code>
231 */
232 public ContextMap contextForReextractor() {
233 return null;
234 }
235
236 /***
237 * Returns the set of valid class names.
238 * @return a set of Strings containing the valid class names
239 */
240 protected Set<String> getValidClasses() {
241 return validClasses;
242 }
243
244 /***
245 * This method can be overwritten to initialize a {@link FinalReextractor}.
246 * The default implementation simply returns <code>null</code>.
247 * If you overwrite this method, you should typically overwrite the
248 * {@link #contextForReextractor()} method as well.
249 *
250 * @param reestimatorChain the chain of re-estimators used -- some
251 * implementations might need a specific re-estimator to work
252 * @return the re-extractor to use; or <code>null</code>
253 * @throws ProcessingException if an error occurs during initialization
254 */
255 public FinalReextractor initReextractor(final Reestimator reestimatorChain)
256 throws ProcessingException {
257 return null;
258 }
259
260 /***
261 * Resets the {@link #state()} of this instance to the initial value
262 * {@link CombinationState#OUTSIDE}. Also checks whether the last extraction
263 * should be discarded, analogously to
264 * {@link CombinationState#isDiscardPreceding()}. The default behavior
265 * is to always return <code>false</code>, but subclasses can modify
266 * this by overwriting the {@link #resetHook()} method.
267 *
268 * @return <code>true</code> iff the last extraction should be discarded
269 */
270 public final boolean reset() {
271 final boolean result = resetHook();
272 state = CombinationState.OUTSIDE;
273 return result;
274 }
275
276 /***
277 * Hook called by the {@link #reset()} method to query whether the last
278 * extraction should be discarded, analogously to
279 * {@link CombinationState#isDiscardPreceding()}.
280 *
281 * @return the value of the attribute
282 */
283 protected boolean resetHook() {
284 return false;
285 }
286
287 /***
288 * The current state (state of the last item).
289 * @return the current state
290 */
291 public CombinationState state() {
292 return state;
293 }
294
295 /***
296 * Returns a string representation of this object.
297 *
298 * @return a textual representation
299 */
300 public String toString() {
301 return new ToStringBuilder(this)
302 .append("valid classes", validClasses)
303 .append("last state", state)
304 .toString();
305 }
306
307 /***
308 * Translates an expected class to a class name to feed to the classifiers
309 * in training mode. This method reverses the direction of
310 * {@link #translateResult(PredictionDistribution[], TokenDetails)}.
311 *
312 * <p>The return value can be feed to the classifiers for training it.
313 *
314 * @param currentState the current state
315 * @return the array of classes to feed to each classifier for training
316 * @throws IllegalArgumentException if the specified argument is invalid
317 * (non-empty string that is not in the set of valid classes, or empty
318 * string when <code>null</code> was given in the preceding call)
319 */
320 public abstract String[] translateCurrentState(
321 final CombinationState currentState)
322 throws IllegalArgumentException;
323
324 /***
325 * Translates the classes predicted by the classifiers in a decision for the
326 * caller. The {@linkplain CombinationState#isEnd() end} information might
327 * be unreliably/absent for same classes.
328 *
329 * @param predictions array containing the prediction distributions of
330 * all classifier
331 * @param details details representing the classified token
332 * @return the translated result
333 * @throws IllegalArgumentException if the specified argument was invalid
334 * (not in the set of currently {@link #activeClasses() active classes}
335 */
336 public abstract CombinationState translateResult(
337 final PredictionDistribution[] predictions,
338 final TokenDetails details)
339 throws IllegalArgumentException;
340
341 /***
342 * Updates the current state (the class of the last item). You must always
343 * call this method after classifying
344 * ({@link #translateResult(PredictionDistribution[], TokenDetails)} and/or
345 * training ({@link #translateCurrentState(CombinationState)}) an item.
346 *
347 * @param newState the state of the last item
348 * @param predictions array containing the prediction distributions of
349 * all classifier for the last item
350 * @param details details representing the last item
351 * @throws IllegalArgumentException if the
352 * {@linkplain CombinationState#getType() type} of the argument is invalid
353 * (not in the set of {@link #getValidClasses() valid classes or
354 * <code>null</code>}
355 */
356 public void updateState(final CombinationState newState,
357 final PredictionDistribution[] predictions,
358 final TokenDetails details) throws IllegalArgumentException {
359
360 final String type = newState.getType();
361 if ((type != null) && !validClasses.isEmpty()
362 && !validClasses.contains(type)) {
363 throw new IllegalArgumentException("Not a valid type: "
364 + newState);
365 }
366 state = newState;
367 }
368
369 }