View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Set;
25  
26  import org.apache.commons.lang.builder.ToStringBuilder;
27  
28  import de.fu_berlin.ties.ProcessingException;
29  import de.fu_berlin.ties.TiesConfiguration;
30  import de.fu_berlin.ties.classify.PredictionDistribution;
31  import de.fu_berlin.ties.util.Util;
32  
33  /***
34   * A combination strategy combines sequential classifications. Instances of
35   * this class are not thread-safe. Subclasses must also provide a constructor
36   * that accepts a {@link java.util.Set} of valid class names as single argument;
37   * otherwise they cannot be loaded via reflection.
38   *
39   * @author Christian Siefkes
40   * @version $Revision: 1.14 $, $Date: 2004/09/16 11:12:53 $, $Author: siefkes $
41   */
42  public abstract class CombinationStrategy {
43  
44      /***
45       * Configuration key specifying the combination strategy to use.
46       */
47      public static final String CONFIG_COMBINATION = "combination.strategy";
48  
49      /***
50       * Factory method that delegates to
51       * {@link #createStrategy(Set, TiesConfiguration)} using the
52       * {@linkplain TiesConfiguration#CONF standard configuration}.
53       *
54       * @param classNames a set of valid class names (String)
55       * @return the created strategy
56       * @throws IllegalArgumentException if the value of the
57       * {@link #CONFIG_COMBINATION} key is missing or invalid
58       * @throws ProcessingException if an error occurred while creating the
59       * strategy
60       */
61      public static CombinationStrategy createStrategy(
62              final Set<String> classNames)
63      throws IllegalArgumentException, ProcessingException {
64          return createStrategy(classNames, TiesConfiguration.CONF);
65      }
66  
67      /***
68       * Factory method that creates a combination strategy based on the
69       * {@link #CONFIG_COMBINATION} key in the provided configuration.
70       *
71       * <p>Currently supported values:
72       * "BE" for {@link BeginEndStrategy},
73       * "BIA" for {@link BeginAfterStrategy},
74       * "BIE" for {@link OpenCloseStrategy},
75       * "IOB1" for {@link InsideOutsideStrategy} with
76       * {@link InsideOutsideStrategy#isBStartingAll()} set to <code>false</code>,
77       * "IOB2" for {@link InsideOutsideStrategy} with
78       * {@link InsideOutsideStrategy#isBStartingAll()} set to <code>true</code>;
79       * "Triv" for {@link TrivialStrategy}.
80       *
81       * <p>Otherwise the value must be the qualified name of a
82       * CombinationStrategy subclass accepting a {@link Set} (of valid class
83       * names) as single argument.
84       *
85       * @param classNames a set of valid class names (String)
86       * @param config the configuration to use
87       * @return the created strategy
88       * @throws IllegalArgumentException if the value of the
89       * {@link #CONFIG_COMBINATION} key is missing or invalid
90       * @throws ProcessingException if an error occurred while creating the
91       * strategy
92       */
93      public static CombinationStrategy createStrategy(
94              final Set<String> classNames, final TiesConfiguration config)
95      throws IllegalArgumentException, ProcessingException {
96          if (!config.containsKey(CONFIG_COMBINATION)) {
97              throw new IllegalArgumentException(
98                  "Cannot create combination strategy -- missing key "
99                  + CONFIG_COMBINATION);
100         }
101 
102         final String combinationValue =
103             config.getString(CONFIG_COMBINATION);
104         final String upperValue = combinationValue.toUpperCase();
105         final CombinationStrategy result;
106 
107         if ("BE".equals(upperValue)) {
108             // load Begin/End strategy
109             result = new BeginEndStrategy(classNames);
110         } else if ("BIA".equals(upperValue)) {
111             // load Begin/After strategy
112             result = new BeginAfterStrategy(classNames);
113         } else if ("BIE".equals(upperValue)) {
114             // load Open/Close strategy
115             result = new OpenCloseStrategy(classNames);
116         } else if ("IOB1".equals(upperValue)) {
117             // load Inside/Outside with B only where strictly required
118             result = new InsideOutsideStrategy(classNames, false);
119         } else if ("IOB2".equals(upperValue)) {
120             // load InsideOutside with B starting all instances
121             result = new InsideOutsideStrategy(classNames, true);
122         } else if ("TRIV".equals(upperValue)) {
123             // load trivial strategy
124             result = new TrivialStrategy(classNames);
125         } else {
126             // should be the qualified name of a CombinationStrategy subclass
127             // accepting a Set (of valid class names) as single argument
128             try {
129                 result = (CombinationStrategy) Util.createObject(
130                     Class.forName(combinationValue),
131                     new Object[] {classNames}, Set.class);
132             } catch (ClassNotFoundException cnfe) {
133                 // convert and rethrow exception
134                 throw new ProcessingException(
135                     "Cannot create combination strategy from key "
136                     + CONFIG_COMBINATION + "=" + combinationValue,
137                     cnfe);
138             } catch (InstantiationException ie) {
139                 // convert and rethrow exception
140                 throw new ProcessingException(
141                     "Cannot create combination strategy from key "
142                     + CONFIG_COMBINATION + "=" + combinationValue,
143                     ie);
144             }
145         }
146         return result;
147     }
148 
149     /***
150      * The current state (state of the last item); initially set via
151      * {@link #reset()}.
152      */
153     private CombinationState state = CombinationState.OUTSIDE;
154 
155     /***
156      * The array of valid class names (Strings).
157      */
158     private final Set<String> validClasses;
159 
160     /***
161      * Creates a new instance.
162      * @param theClasses a set of valid class names (String)
163      */
164     public CombinationStrategy(final Set<String> theClasses) {
165         super();
166         validClasses = theClasses;
167         state = CombinationState.OUTSIDE;
168     }
169 
170     /***
171      * Builds a set array of class names (Strings) to pass to each classifier
172      * to consider for the next decision. Must be a subset of
173      * {@link #allClasses()}.
174      *
175      * @return a set of class names for each classifier
176      */
177     public abstract Set<String>[] activeClasses();
178 
179     /***
180      * Returns an array of immutable sets of all classes (Strings) that can
181      * possible occur during classification for each classifier.
182      * The "background" class (outside any extractions) should be the first
183      * member of each set.
184      *
185      * @return an array of immutable sets of class names
186      */
187     public abstract Set<String>[] allClasses();
188 
189     /***
190      * Translates an expected class to a class name to feed to the classifiers
191      * in training mode. This method reverses the direction of
192      * {@link #translateResult(PredictionDistribution[])}.
193      *
194      * <p>The return value can be feed to the classifiers for training it.
195      *
196      * @param currentState the current state
197      * @return the array of classes to feed to each classifier for training
198      * @throws IllegalArgumentException if the specified argument is invalid
199      * (non-empty string that is not in the set of valid classes, or empty
200      * string when <code>null</code> was given in the preceding call)
201      */
202     public abstract String[] translateCurrentState(
203             final CombinationState currentState)
204             throws IllegalArgumentException;
205 
206     /***
207      * Translates the classes predicted by the classifiers in a decision for the
208      * caller. The {@linkplain CombinationState#isEnd() end} information might
209      * be unreliably/absent for same classes.
210      *
211      * @param predictions array containing the prediction distributions of
212      * all classifier
213      * @return the translated result
214      * @throws IllegalArgumentException if the specified argument was invalid
215      * (not in the set of currently {@link #activeClasses() active classes}
216      */
217     public abstract CombinationState translateResult(
218             final PredictionDistribution[] predictions)
219     throws IllegalArgumentException;
220 
221     /***
222      * Returns the set of valid class names.
223      * @return a set of Strings containing the valid class names
224      */
225     protected Set getValidClasses() {
226         return validClasses;
227     }
228 
229     /***
230      * Resets the {@link #state()} of this instance to the initial value
231      * {@link CombinationState#OUTSIDE}. Also checks whether the last extraction
232      * should be discarded, analogously to
233      * {@link CombinationState#isDiscardPreceding()}. The default behavior
234      * is to always return <code>false</code>, but subclasses can modify
235      * this by overwriting the {@link #resetHook()} method.
236      *
237      * @return <code>true</code> iff the last extraction should be discarded
238      */
239     public final boolean reset() {
240         final boolean result = resetHook();
241         state = CombinationState.OUTSIDE;
242         return result;
243     }
244 
245     /***
246      * Hook called by the {@link #reset()} method to query whether the last
247      * extraction should be discarded, analogously to
248      * {@link CombinationState#isDiscardPreceding()}.
249      *
250      * @return the value of the attribute
251      */
252     protected boolean resetHook() {
253         return false;
254     }
255 
256     /***
257      * The current state (state of the last item).
258      * @return the current state
259      */
260     public CombinationState state() {
261         return state;
262     }
263 
264     /***
265      * Returns a string representation of this object.
266      *
267      * @return a textual representation
268      */
269     public String toString() {
270         return new ToStringBuilder(this)
271             .append("valid classes", validClasses)
272             .append("last state", state)
273           .toString();
274     }
275 
276     /***
277      * Updates the current state (the class of the last item). You must always
278      * call this method after classifying
279      * ({@link #translateResult(PredictionDistribution[])}) and/or training
280      * ({@link #translateCurrentState(CombinationState)}) an item.
281      *
282      * @param newState the state of the last item
283      * @throws IllegalArgumentException if the
284      * {@linkplain CombinationState#getType() type} of the argument is invalid
285      * (not in the set of {@link #getValidClasses() valid classes or
286      * <code>null</code>}
287      */
288     public void updateState(final CombinationState newState)
289             throws IllegalArgumentException {
290         // check that it's a valid state (if not null)
291         final String type = newState.getType();
292         if ((type != null) && (!validClasses.contains(type))) {
293             throw new IllegalArgumentException("Not a valid type: "
294                 + newState);
295         }
296         state = newState;
297     }
298 
299 }