View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Set;
25  
26  import org.apache.commons.lang.StringUtils;
27  import org.apache.commons.lang.builder.ToStringBuilder;
28  
29  import de.fu_berlin.ties.ContextMap;
30  import de.fu_berlin.ties.ProcessingException;
31  import de.fu_berlin.ties.TiesConfiguration;
32  import de.fu_berlin.ties.classify.PredictionDistribution;
33  import de.fu_berlin.ties.extract.amend.FinalReextractor;
34  import de.fu_berlin.ties.extract.reestimate.Reestimator;
35  import de.fu_berlin.ties.text.TokenDetails;
36  import de.fu_berlin.ties.util.Util;
37  
38  /***
39   * A combination strategy combines sequential classifications. Instances of
40   * this class are not thread-safe. Subclasses must also provide a constructor
41   * that accepts a {@link java.util.Set} of valid class names as single argument;
42   * otherwise they cannot be loaded via reflection.
43   *
44   * @author Christian Siefkes
45   * @version $Revision: 1.26 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
46   */
47  public abstract class CombinationStrategy {
48  
49      /***
50       * Configuration key specifying the combination strategy to use.
51       */
52      public static final String CONFIG_COMBINATION = "combination.strategy";
53  
54      /***
55       * Factory method that delegates to
56       * {@link #createStrategy(Set, TiesConfiguration)} using the
57       * {@linkplain TiesConfiguration#CONF standard configuration}.
58       *
59       * @param classNames a set of valid class names (String)
60       * @return the created strategy
61       * @throws IllegalArgumentException if the value of the
62       * {@link #CONFIG_COMBINATION} key is missing or invalid
63       * @throws ProcessingException if an error occurred while creating the
64       * strategy
65       */
66      public static CombinationStrategy createStrategy(
67              final Set<String> classNames)
68      throws IllegalArgumentException, ProcessingException {
69          return createStrategy(classNames, TiesConfiguration.CONF);
70      }
71  
72      /***
73       * Factory method that delegates to
74       * {@link #createStrategy(Set, String, TiesConfiguration)} using
75       * the {@link #CONFIG_COMBINATION} key in the provided configuration.
76       *
77       * @param classNames a set of valid class names (String)
78       * @param config the configuration to use
79       * @return the created strategy
80       * @throws IllegalArgumentException if the value of the
81       * {@link #CONFIG_COMBINATION} key is missing or invalid
82       * @throws ProcessingException if an error occurred while creating the
83       * strategy
84       */
85      public static CombinationStrategy createStrategy(
86              final Set<String> classNames, final TiesConfiguration config)
87      throws IllegalArgumentException, ProcessingException {
88          return createStrategy(classNames,
89                  config.getString(CONFIG_COMBINATION, null), config);
90      }
91  
92      /***
93       * Factory method that creates a combination strategy based on the
94       * provided <code>combinationName</code>.
95       *
96       * <p>Currently supported named:
97       * "BE" for {@link BeginEndStrategy},
98       * "BIA" for {@link BeginAfterStrategy},
99       * "BIE1" for {@link OpenCloseStrategy} with
100      * {@link OpenCloseStrategy#isUsingBE()} set to <code>false</code>,
101      * "BIE2" for {@link OpenCloseStrategy} with
102      * {@link OpenCloseStrategy#isUsingBE()} set to <code>true</code>,
103      * "IOB1" for {@link InsideOutsideStrategy} with
104      * {@link InsideOutsideStrategy#isBStartingAll()} set to <code>false</code>,
105      * "IOB2" for {@link InsideOutsideStrategy} with
106      * {@link InsideOutsideStrategy#isBStartingAll()} set to <code>true</code>;
107      * "Triv" for {@link TrivialStrategy}.
108      *
109      * <p>Otherwise the value must be the qualified name of a
110      * CombinationStrategy subclass accepting a {@link Set} (of valid class
111      * names) as first argument and a {@link TiesConfiguration} as second
112      * argument.
113      *
114      * @param classNames a set of valid class names (String)
115      * @param combinationName the name of the configuration to use, as
116      * explained above
117      * @param config used to configure the combination strategy
118      * @return the created strategy
119      * @throws IllegalArgumentException if the <code>combinationName</code>
120      * is <code>null</code> or invalid
121      * @throws ProcessingException if an error occurred while creating the
122      * strategy
123      */
124     public static CombinationStrategy createStrategy(
125             final Set<String> classNames, final String combinationName,
126             final TiesConfiguration config)
127     throws IllegalArgumentException, ProcessingException {
128         if (StringUtils.isEmpty(combinationName)) {
129             throw new IllegalArgumentException(
130                     "Cannot create combination strategy -- no name given");
131         }
132 
133         final String upperValue = combinationName.toUpperCase();
134         final CombinationStrategy result;
135 
136         if ("BE".equals(upperValue)) {
137             // load Begin/End strategy
138             result = new BeginEndStrategy(classNames, config);
139         } else if ("BIA".equals(upperValue)) {
140             // load Begin/After strategy
141             result = new BeginAfterStrategy(classNames);
142         } else if ("BIE1".equals(upperValue)) {
143             // load Open/Close strategy with 3 prefix types
144             result = new OpenCloseStrategy(classNames, false);
145         } else if ("BIE".equals(upperValue) || "BIE2".equals(upperValue)) {
146             // load Open/Close strategy with 4 prefix types (BIE is legacy name)
147             result = new OpenCloseStrategy(classNames, true);
148         } else if ("IOB1".equals(upperValue)) {
149             // load Inside/Outside with B only where strictly required
150             result = new InsideOutsideStrategy(classNames, false);
151         } else if ("IOB2".equals(upperValue)) {
152             // load InsideOutside with B starting all instances
153             result = new InsideOutsideStrategy(classNames, true);
154         } else if ("TRIV".equals(upperValue)) {
155             // load trivial strategy
156             result = new TrivialStrategy(classNames);
157         } else {
158             // should be the qualified name of a CombinationStrategy subclass
159             // accepting a Set (of valid class names) as first argument and
160             // a TiesConfiguration as second argument
161             try {
162                 result = (CombinationStrategy) Util.createObject(
163                     Class.forName(combinationName),
164                     new Object[] {classNames, config},
165                     new Class[] {Set.class, TiesConfiguration.class});
166             } catch (ClassNotFoundException cnfe) {
167                 // convert and rethrow exception
168                 throw new ProcessingException(
169                     "Cannot create combination strategy from key "
170                     + CONFIG_COMBINATION + "=" + combinationName,
171                     cnfe);
172             } catch (InstantiationException ie) {
173                 // convert and rethrow exception
174                 throw new ProcessingException(
175                     "Cannot create combination strategy from key "
176                     + CONFIG_COMBINATION + "=" + combinationName,
177                     ie);
178             }
179         }
180         return result;
181     }
182 
183     /***
184      * The current state (state of the last item); initially set via
185      * {@link #reset()}.
186      */
187     private CombinationState state = CombinationState.OUTSIDE;
188 
189     /***
190      * The array of valid class names (Strings).
191      */
192     private final Set<String> validClasses;
193 
194     /***
195      * Creates a new instance.
196      * @param theClasses a set of valid class names (String)
197      */
198     public CombinationStrategy(final Set<String> theClasses) {
199         super();
200         validClasses = theClasses;
201         state = CombinationState.OUTSIDE;
202     }
203 
204     /***
205      * Builds a set array of class names (Strings) to pass to each classifier
206      * to consider for the next decision. Must be a subset of
207      * {@link #allClasses()}.
208      *
209      * @return a set of class names for each classifier
210      */
211     public abstract Set[] activeClasses();
212 
213     /***
214      * Returns an array of immutable sets of all classes (Strings) that can
215      * possible occur during classification for each classifier.
216      * The "background" class (outside any extractions) should be the first
217      * member of each set.
218      *
219      * @return an array of immutable sets of class names
220      */
221     public abstract Set[] allClasses();
222 
223     /***
224      * Thie method can be overwritten to pass objects to a
225      * {@link FinalReextractor}. The default implementation simply returns
226      * <code>null</code>. If you overwrite this method, you should typically
227      * overwrite the {@link #initReextractor(Reestimator)} method as well.
228      *
229      * @return a map of context objects for the re-extractor,
230      * or <code>null</code>
231      */
232     public ContextMap contextForReextractor() {
233         return null;
234     }
235 
236     /***
237      * Returns the set of valid class names.
238      * @return a set of Strings containing the valid class names
239      */
240     protected Set<String> getValidClasses() {
241         return validClasses;
242     }
243 
244     /***
245      * This method can be overwritten to initialize a {@link FinalReextractor}.
246      * The default implementation simply returns <code>null</code>.
247      * If you overwrite this method, you should typically overwrite the
248      * {@link #contextForReextractor()} method as well.
249      *
250      * @param reestimatorChain the chain of re-estimators used -- some
251      * implementations might need a specific re-estimator to work
252      * @return the re-extractor to use; or <code>null</code>
253      * @throws ProcessingException if an error occurs during initialization
254      */
255     public FinalReextractor initReextractor(final Reestimator reestimatorChain)
256     throws ProcessingException {
257         return null;
258     }
259 
260     /***
261      * Resets the {@link #state()} of this instance to the initial value
262      * {@link CombinationState#OUTSIDE}. Also checks whether the last extraction
263      * should be discarded, analogously to
264      * {@link CombinationState#isDiscardPreceding()}. The default behavior
265      * is to always return <code>false</code>, but subclasses can modify
266      * this by overwriting the {@link #resetHook()} method.
267      *
268      * @return <code>true</code> iff the last extraction should be discarded
269      */
270     public final boolean reset() {
271         final boolean result = resetHook();
272         state = CombinationState.OUTSIDE;
273         return result;
274     }
275 
276     /***
277      * Hook called by the {@link #reset()} method to query whether the last
278      * extraction should be discarded, analogously to
279      * {@link CombinationState#isDiscardPreceding()}.
280      *
281      * @return the value of the attribute
282      */
283     protected boolean resetHook() {
284         return false;
285     }
286 
287     /***
288      * The current state (state of the last item).
289      * @return the current state
290      */
291     public CombinationState state() {
292         return state;
293     }
294 
295     /***
296      * Returns a string representation of this object.
297      *
298      * @return a textual representation
299      */
300     public String toString() {
301         return new ToStringBuilder(this)
302             .append("valid classes", validClasses)
303             .append("last state", state)
304           .toString();
305     }
306 
307     /***
308      * Translates an expected class to a class name to feed to the classifiers
309      * in training mode. This method reverses the direction of
310      * {@link #translateResult(PredictionDistribution[], TokenDetails)}.
311      *
312      * <p>The return value can be feed to the classifiers for training it.
313      *
314      * @param currentState the current state
315      * @return the array of classes to feed to each classifier for training
316      * @throws IllegalArgumentException if the specified argument is invalid
317      * (non-empty string that is not in the set of valid classes, or empty
318      * string when <code>null</code> was given in the preceding call)
319      */
320     public abstract String[] translateCurrentState(
321             final CombinationState currentState)
322             throws IllegalArgumentException;
323 
324     /***
325      * Translates the classes predicted by the classifiers in a decision for the
326      * caller. The {@linkplain CombinationState#isEnd() end} information might
327      * be unreliably/absent for same classes.
328      *
329      * @param predictions array containing the prediction distributions of
330      * all classifier
331      * @param details details representing the classified token
332      * @return the translated result
333      * @throws IllegalArgumentException if the specified argument was invalid
334      * (not in the set of currently {@link #activeClasses() active classes}
335      */
336     public abstract CombinationState translateResult(
337             final PredictionDistribution[] predictions,
338             final TokenDetails details)
339     throws IllegalArgumentException;
340 
341     /***
342      * Updates the current state (the class of the last item). You must always
343      * call this method after classifying
344      * ({@link #translateResult(PredictionDistribution[], TokenDetails)} and/or
345      * training ({@link #translateCurrentState(CombinationState)}) an item.
346      *
347      * @param newState the state of the last item
348      * @param predictions array containing the prediction distributions of
349      * all classifier for the last item
350      * @param details details representing the last item
351      * @throws IllegalArgumentException if the
352      * {@linkplain CombinationState#getType() type} of the argument is invalid
353      * (not in the set of {@link #getValidClasses() valid classes or
354      * <code>null</code>}
355      */
356     public void updateState(final CombinationState newState,
357             final PredictionDistribution[] predictions,
358             final TokenDetails details) throws IllegalArgumentException {
359         // check that it's a valid state (if not null or no classes specified)
360         final String type = newState.getType();
361         if ((type != null) && !validClasses.isEmpty()
362                 && !validClasses.contains(type)) {
363             throw new IllegalArgumentException("Not a valid type: "
364                 + newState);
365         }
366         state = newState;
367     }
368 
369 }