View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.Set;
27  import java.util.SortedSet;
28  import java.util.TreeSet;
29  
30  import de.fu_berlin.ties.classify.Prediction;
31  import de.fu_berlin.ties.classify.PredictionDistribution;
32  import de.fu_berlin.ties.text.TokenDetails;
33  
34  /***
35   * A combination strategy using begin/after tagging (also called "BIA" tagging
36   * due to the prefixed used). Classes are prefixed with "B-" (first element of
37   * an instance), "I-" (inner element of an instance), "A-" (first element
38   * <em>after</em> an instance that is not part of an other instance).
39   * "A" is used for anything else/outside of any class (except where the "A-"
40   * prefix is used).
41   *
42   * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
43   * supported by this class and will always return <code>false</code>.
44   *
45   * @author Christian Siefkes
46   * @version $Revision: 1.15 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
47   */
48  public class BeginAfterStrategy extends CombinationStrategy {
49  
50      /***
51       * The character terminating each prefix.
52       */
53      private static final char PREFIX_TERMINATOR = '-';
54  
55      /***
56       * Prefix used to classify the first element of an instance.
57       */
58      private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
59  
60      /***
61       * Prefix used to classify elements within instances.
62       */
63      private static final String IN_PREFIX = "I" + PREFIX_TERMINATOR;
64  
65      /***
66       * Prefix used to classify the first element <em>after</em> an instance
67       * that is not part of an other instance.
68       */
69      private static final String AFTER_PREFIX = "A" + PREFIX_TERMINATOR;
70  
71      /***
72       * String used to classify outside/other (except where "A-" is used).
73       * Actually called "A" so it will be picked in case of a tie ("A" is
74       * sorted before "A-" etc.).
75       */
76      private static final String OUTSIDE = "A";
77  
78      /***
79       * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
80       * efficiency.
81       */
82      private final TreeSet<String> bClasses = new TreeSet<String>();
83  
84      /***
85       * An immutable set of all classes (Strings) that can possible occur
86       * during classification.
87       */
88      private final SortedSet[] allClasses;
89  
90      /***
91       * Creates a new instance.
92       *
93       * @param theClasses a set of valid class names (String)
94       */
95      public BeginAfterStrategy(final Set<String> theClasses) {
96          super(theClasses);
97          final SortedSet<String> allValidClasses = new TreeSet<String>();
98          final Iterator<String> iter = theClasses.iterator();
99          String currentClass;
100 
101         // initialize immutable set of all classes + set of B classes
102         while (iter.hasNext()) {
103             currentClass = iter.next();
104             bClasses.add(BEGIN_PREFIX + currentClass);
105             allValidClasses.add(IN_PREFIX + currentClass);
106             allValidClasses.add(AFTER_PREFIX + currentClass);
107         }
108 
109         allValidClasses.addAll(bClasses);
110         allValidClasses.add(OUTSIDE);
111         allClasses = new SortedSet[] {
112             Collections.unmodifiableSortedSet(allValidClasses)
113         };
114     }
115 
116     /***
117      * Builds a set of class names (Strings) to pass to the classifier
118      * to consider for the next decision. This implementation returns the
119      * classes in alphabetic order (using a {@link SortedSet}).
120      *
121      * @return a set of class names
122      */
123     public Set[] activeClasses() {
124         // all B classes are always allowed
125         final SortedSet<String> result = new TreeSet<String>(bClasses);
126         final String lastType = state().getType();
127 
128         if ((lastType == null)) {
129             // O is allowed as well
130             result.add(OUTSIDE);
131         } else {
132             // I + A for current class are allowed as well
133             result.add(IN_PREFIX + lastType);
134             result.add(AFTER_PREFIX + lastType);
135         }
136         return new Set[] {result};
137     }
138 
139     /***
140      * {@inheritDoc}
141      */
142     public Set[] allClasses() {
143         return allClasses;
144     }
145 
146     /***
147      * {@inheritDoc}
148      */
149     public String[] translateCurrentState(final CombinationState currentState)
150             throws IllegalArgumentException {
151         final String result;
152         final String type = currentState.getType();
153         final String lastType = state().getType();
154 
155         if (type == null) {
156             if (lastType != null) {
157                 // mark as after last type
158                 result = AFTER_PREFIX + lastType;
159             } else {
160                 // mark as completely outside
161                 result = OUTSIDE;
162             }
163         } else {
164             if (currentState.isBegin()) {
165                 result = BEGIN_PREFIX + type;
166             } else {
167                 result = IN_PREFIX + type;
168             }
169         }
170         return new String[] {result};
171     }
172 
173     /***
174      * {@inheritDoc}
175      */
176     public CombinationState translateResult(
177             final PredictionDistribution[] predictions,
178             final TokenDetails details)
179     throws IllegalArgumentException {
180         final CombinationState result;
181         final Prediction best = predictions[0].best();
182         final String predictedClass = best.getType();
183 
184         if (OUTSIDE.equals(predictedClass)) {
185             // outside any instance
186             result = CombinationState.OUTSIDE;
187         } else {
188             final int prefixEndPos =
189                 predictedClass.indexOf(PREFIX_TERMINATOR);
190 
191             if (prefixEndPos >= 0) {
192                 final String prefix;
193                 final String newClass;
194 
195                 prefix = predictedClass.substring(0, prefixEndPos + 1);
196                 newClass = predictedClass.substring(prefixEndPos + 1);
197 
198                 if (BEGIN_PREFIX.equals(prefix)) {
199                     result = new CombinationState(newClass, true, false,
200                             best.getProbability());
201                 } else if (IN_PREFIX.equals(prefix)) {
202                     result = new CombinationState(newClass, false, false,
203                             best.getProbability());
204                 } else if (AFTER_PREFIX.equals(prefix)) {
205                     // now outside
206                     result = CombinationState.OUTSIDE;
207                 } else {
208                     throw new IllegalArgumentException(
209                         "Prefix " + prefix + " of predicted class "
210                             + predictedClass + " is invalid");
211                 }
212             } else {
213                 throw new IllegalArgumentException(
214                     "Invalid predicted class (no prefix): " + predictedClass);
215             }
216         }
217         return result;
218     }
219 
220 }