View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.Set;
27  import java.util.SortedSet;
28  import java.util.TreeSet;
29  
30  import org.apache.commons.lang.builder.ToStringBuilder;
31  
32  import de.fu_berlin.ties.classify.PredictionDistribution;
33  
34  /***
35   * A combination strategy using inside/outside tagging. Classes are prefixed
36   * with "I-" (in this class) or "B-" (begin of a new instance of this class),
37   * "A" is used for anything else/outside of any class.
38   *
39   * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
40   * supported by this class and will always return <code>false</code>.
41   *
42   * @author Christian Siefkes
43   * @version $Revision: 1.9 $, $Date: 2004/09/15 16:08:45 $, $Author: siefkes $
44   */
45  public class InsideOutsideStrategy extends CombinationStrategy {
46  
47      /***
48       * Prefix used to classify the begin of a new instance.
49       */
50      private static final String BEGIN_PREFIX = "B-";
51  
52      /***
53       * Prefix used to classify items within instances.
54       */
55      private static final String IN_PREFIX = "I-";
56  
57      /***
58       * String used to classify outside/other. Actually called "A" so it will be
59       * picked in case of a tie.
60       */
61      private static final String OUTSIDE = "A";
62  
63      /***
64       * Whether the "B-" prefix is used for the first item of each instance
65       * ("O/B/I" mode) or only for the first item of instances immediately
66       * following an instance of the same class ("O/I/B" mode).
67       */
68      private final boolean bStartingAll;
69  
70      /***
71       * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
72       * efficiency.
73       */
74      private final TreeSet<String> bClasses;
75  
76      /***
77       * The set of all classes prefixed with {@link #IN_PREFIX}, stored for
78       * efficiency.
79       */
80      private final TreeSet<String> iClasses;
81  
82      /***
83       * An immutable set of all classes (Strings) that can possible occur
84       * during classification.
85       */
86      private final SortedSet[] allClasses;
87  
88      /***
89       * Creates a new instance, using "O/I/B" mode: the "B-" is only used
90       * where required for disambiguation, otherwise the "I-" prefix is used.
91       *
92       * @param theClasses a set of valid class names (String)
93       */
94      public InsideOutsideStrategy(final Set<String> theClasses) {
95          this(theClasses, false);
96      }
97  
98      /***
99       * Creates a new instance, using "O/I/B" mode (setting
100      * {@link #isBStartingAll()} to <code>false</code>). In this mode, the "B-"
101      * is only used where required for disambiguation, otherwise the "I-" prefix
102      * is used.
103      *
104      * @param theClasses a set of valid class names (String)
105      * @param bStartingAllClasses if <code>true</code>, the "B-" prefix is used
106      * for the first item of each instance ("O/B/I" mode); otherwise it is used
107      * only for first item of instances immediately following an instance of the
108      * same class ("O/I/B" mode)
109      */
110     public InsideOutsideStrategy(final Set<String> theClasses,
111                                  final boolean bStartingAllClasses) {
112         super(theClasses);
113         bStartingAll = bStartingAllClasses;
114         bClasses = new TreeSet<String>();
115         iClasses = new TreeSet<String>();
116         final Iterator iter = theClasses.iterator();
117         String currentClass;
118 
119         // prepend prefixes to all classes
120         while (iter.hasNext()) {
121             currentClass = (String) iter.next();
122             bClasses.add(BEGIN_PREFIX + currentClass);
123             iClasses.add(IN_PREFIX + currentClass);
124         }
125 
126         // initialize immutable set of all classes
127         final SortedSet<String> allValidClasses = new TreeSet<String>();
128         allValidClasses.addAll(bClasses);
129         allValidClasses.addAll(iClasses);
130         allValidClasses.add(OUTSIDE);
131         allClasses = new SortedSet[] {
132             Collections.unmodifiableSortedSet(allValidClasses)
133         };
134     }
135 
136     /***
137      * Builds a set of class names (Strings) to pass to the classifier
138      * to consider for the next decision. This implementation returns the
139      * classes in alphabetic order (using a {@link SortedSet}).
140      *
141      * @return a set of class names
142      */
143     public Set<String>[] activeClasses() {
144         final SortedSet<String> result;
145         if (bStartingAll) {
146             // all B classes allowed
147             result = new TreeSet<String>(bClasses);
148         } else {
149             // all I classes allowed
150             result = new TreeSet<String>(iClasses);
151         }
152 
153         if (state().getType() != null) {
154             if (bStartingAll) {
155                 // I is used to continue current instance
156                 result.add(IN_PREFIX + state().getType());
157             } else {
158                 // B is used to start new instance of same type
159                 result.add(BEGIN_PREFIX + state().getType());
160             }
161         }
162 
163         // O is always allows
164         result.add(OUTSIDE);
165         return new Set[] {result};
166     }
167 
168     /***
169      * {@inheritDoc}
170      */
171     public Set<String>[] allClasses() {
172         return allClasses;
173     }
174 
175     /***
176      * Whether the "B-" prefix is used for the first item of each instance
177      * ("O/B/I" mode) or only for the first item of instances immediately
178      * following an instance of the same class ("O/I/B" mode).
179      *
180      * @return the value of the attribute
181      */
182     public boolean isBStartingAll() {
183         return bStartingAll;
184     }
185 
186     /***
187      * {@inheritDoc}
188      */
189     public String[] translateCurrentState(final CombinationState currentState)
190             throws IllegalArgumentException {
191         final String result;
192         final String type = currentState.getType();
193 
194         if (type == null) {
195             // null: outside
196             result = OUTSIDE;
197         } else if (!currentState.isBegin()) {
198             // continuation of last instance (must have same type)
199             if (type.equals(state().getType())) {
200                 result = IN_PREFIX + type;
201             } else {
202                 throw new IllegalArgumentException(
203                     "Illegal continuation state: last state "
204                         + state().getType() + " differs from current state "
205                         + type);
206             }
207         } else if (type.equals(state().getType())) {
208             // new instance of same type
209             result = BEGIN_PREFIX + type;
210         } else {
211             // new instance, last class was different
212             if (bStartingAll) {
213                 result = BEGIN_PREFIX + type;
214             } else {
215                 result = IN_PREFIX + type;
216             }
217         }
218         return new String[] {result};
219     }
220 
221     /***
222      * {@inheritDoc}
223      */
224     public CombinationState translateResult(
225             final PredictionDistribution[] predictions)
226     throws IllegalArgumentException {
227         final CombinationState result;
228         final String predictedClass = predictions[0].best().getType();
229 
230         if (OUTSIDE.equals(predictedClass)) {
231             // outside any instance
232             result = CombinationState.OUTSIDE;
233         } else {
234             final String newClass;
235             final boolean isBegin;
236 
237             if (predictedClass.startsWith(BEGIN_PREFIX)) {
238                 // begin of a instance
239                 isBegin = true;
240                 newClass = predictedClass.substring(BEGIN_PREFIX.length());
241             } else if (predictedClass.startsWith(IN_PREFIX)) {
242                 // within a instance
243                 isBegin = false;
244                 newClass = predictedClass.substring(IN_PREFIX.length());
245             } else {
246                 throw new IllegalArgumentException(
247                     "Invalid predicted class: " + predictedClass);
248             }
249 
250             if (isBegin) {
251                 if (!bStartingAll) {
252                     // check that this is allowed (last item had same class)
253                     if (!newClass.equals(state().getType())) {
254                         throw new IllegalArgumentException("'O/I/B' mode: "
255                             + "B must occur after item of same class, but"
256                             + "old class (" + state().getType()
257                             + ") and new class (" + newClass + ") differ");
258                     }
259                 }
260 
261                 // start of new instance
262                 result = new CombinationState(newClass, true, false);
263             } else {
264                 if (bStartingAll) {
265                     // check that this is allowed (last item had same class)
266                     if (!newClass.equals(state().getType())) {
267                         throw new IllegalArgumentException("'O/B/I' mode: "
268                             + "I must occur after item of same class, but"
269                             + "old class (" + state().getType()
270                             + ") and new class (" + newClass + ") differ");
271                     }
272 
273                     // continue last instance
274                     result = new CombinationState(newClass, false, false);
275                 } else {
276                     // O/I/B mode:
277                     if (newClass.equals(state().getType())) {
278                         // continue last instance
279                         result = new CombinationState(newClass, false, false);
280                     } else {
281                         // start of new instance
282                         result = new CombinationState(newClass, true, false);
283                     }
284                 }
285             }
286         }
287         return result;
288     }
289 
290     /***
291      * Returns a string representation of this object.
292      *
293      * @return a textual representation
294      */
295     public String toString() {
296         return new ToStringBuilder(this)
297             .appendSuper(super.toString())
298             .append("B starts all", bStartingAll)
299             .toString();
300     }
301 
302 }