1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.Set;
27 import java.util.SortedSet;
28 import java.util.TreeSet;
29
30 import org.apache.commons.lang.builder.ToStringBuilder;
31
32 import de.fu_berlin.ties.classify.Prediction;
33 import de.fu_berlin.ties.classify.PredictionDistribution;
34 import de.fu_berlin.ties.text.TokenDetails;
35
36 /***
37 * A combination strategy using inside/outside tagging. Classes are prefixed
38 * with "I-" (in this class) or "B-" (begin of a new instance of this class),
39 * "A" is used for anything else/outside of any class.
40 *
41 * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
42 * supported by this class and will always return <code>false</code>.
43 *
44 * @author Christian Siefkes
45 * @version $Revision: 1.16 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
46 */
47 public class InsideOutsideStrategy extends CombinationStrategy {
48
49 /***
50 * Prefix used to classify the begin of a new instance.
51 */
52 private static final String BEGIN_PREFIX = "B-";
53
54 /***
55 * Prefix used to classify items within instances.
56 */
57 private static final String IN_PREFIX = "I-";
58
59 /***
60 * String used to classify outside/other. Actually called "A" so it will be
61 * picked in case of a tie.
62 */
63 private static final String OUTSIDE = "A";
64
65 /***
66 * Whether the "B-" prefix is used for the first item of each instance
67 * ("O/B/I" mode) or only for the first item of instances immediately
68 * following an instance of the same class ("O/I/B" mode).
69 */
70 private final boolean bStartingAll;
71
72 /***
73 * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
74 * efficiency.
75 */
76 private final TreeSet<String> bClasses;
77
78 /***
79 * The set of all classes prefixed with {@link #IN_PREFIX}, stored for
80 * efficiency.
81 */
82 private final TreeSet<String> iClasses;
83
84 /***
85 * An immutable set of all classes (Strings) that can possible occur
86 * during classification.
87 */
88 private final SortedSet[] allClasses;
89
90 /***
91 * Creates a new instance, using "O/I/B" mode: the "B-" is only used
92 * where required for disambiguation, otherwise the "I-" prefix is used.
93 *
94 * @param theClasses a set of valid class names (String)
95 */
96 public InsideOutsideStrategy(final Set<String> theClasses) {
97 this(theClasses, false);
98 }
99
100 /***
101 * Creates a new instance, using "O/I/B" mode (setting
102 * {@link #isBStartingAll()} to <code>false</code>). In this mode, the "B-"
103 * is only used where required for disambiguation, otherwise the "I-" prefix
104 * is used.
105 *
106 * @param theClasses a set of valid class names (String)
107 * @param bStartingAllClasses if <code>true</code>, the "B-" prefix is used
108 * for the first item of each instance ("O/B/I" mode); otherwise it is used
109 * only for first item of instances immediately following an instance of the
110 * same class ("O/I/B" mode)
111 */
112 public InsideOutsideStrategy(final Set<String> theClasses,
113 final boolean bStartingAllClasses) {
114 super(theClasses);
115 bStartingAll = bStartingAllClasses;
116 bClasses = new TreeSet<String>();
117 iClasses = new TreeSet<String>();
118 final Iterator iter = theClasses.iterator();
119 String currentClass;
120
121
122 while (iter.hasNext()) {
123 currentClass = (String) iter.next();
124 bClasses.add(BEGIN_PREFIX + currentClass);
125 iClasses.add(IN_PREFIX + currentClass);
126 }
127
128
129 final SortedSet<String> allValidClasses = new TreeSet<String>();
130 allValidClasses.addAll(bClasses);
131 allValidClasses.addAll(iClasses);
132 allValidClasses.add(OUTSIDE);
133 allClasses = new SortedSet[] {
134 Collections.unmodifiableSortedSet(allValidClasses)
135 };
136 }
137
138 /***
139 * Builds a set of class names (Strings) to pass to the classifier
140 * to consider for the next decision. This implementation returns the
141 * classes in alphabetic order (using a {@link SortedSet}).
142 *
143 * @return a set of class names
144 */
145 public Set[] activeClasses() {
146 final SortedSet<String> result;
147 if (bStartingAll) {
148
149 result = new TreeSet<String>(bClasses);
150 } else {
151
152 result = new TreeSet<String>(iClasses);
153 }
154
155 if (state().getType() != null) {
156 if (bStartingAll) {
157
158 result.add(IN_PREFIX + state().getType());
159 } else {
160
161 result.add(BEGIN_PREFIX + state().getType());
162 }
163 }
164
165
166 result.add(OUTSIDE);
167 return new Set[] {result};
168 }
169
170 /***
171 * {@inheritDoc}
172 */
173 public Set[] allClasses() {
174 return allClasses;
175 }
176
177 /***
178 * Whether the "B-" prefix is used for the first item of each instance
179 * ("O/B/I" mode) or only for the first item of instances immediately
180 * following an instance of the same class ("O/I/B" mode).
181 *
182 * @return the value of the attribute
183 */
184 public boolean isBStartingAll() {
185 return bStartingAll;
186 }
187
188 /***
189 * {@inheritDoc}
190 */
191 public String[] translateCurrentState(final CombinationState currentState)
192 throws IllegalArgumentException {
193 final String result;
194 final String type = currentState.getType();
195
196 if (type == null) {
197
198 result = OUTSIDE;
199 } else if (!currentState.isBegin()) {
200
201 if (type.equals(state().getType())) {
202 result = IN_PREFIX + type;
203 } else {
204 throw new IllegalArgumentException(
205 "Illegal continuation state: last state "
206 + state().getType() + " differs from current state "
207 + type);
208 }
209 } else if (type.equals(state().getType())) {
210
211 result = BEGIN_PREFIX + type;
212 } else {
213
214 if (bStartingAll) {
215 result = BEGIN_PREFIX + type;
216 } else {
217 result = IN_PREFIX + type;
218 }
219 }
220 return new String[] {result};
221 }
222
223 /***
224 * {@inheritDoc}
225 */
226 public CombinationState translateResult(
227 final PredictionDistribution[] predictions,
228 final TokenDetails details)
229 throws IllegalArgumentException {
230 final CombinationState result;
231 final Prediction best = predictions[0].best();
232 final String predictedClass = best.getType();
233
234 if (OUTSIDE.equals(predictedClass)) {
235
236 result = CombinationState.OUTSIDE;
237 } else {
238 final String newClass;
239 final boolean isBegin;
240
241 if (predictedClass.startsWith(BEGIN_PREFIX)) {
242
243 isBegin = true;
244 newClass = predictedClass.substring(BEGIN_PREFIX.length());
245 } else if (predictedClass.startsWith(IN_PREFIX)) {
246
247 isBegin = false;
248 newClass = predictedClass.substring(IN_PREFIX.length());
249 } else {
250 throw new IllegalArgumentException(
251 "Invalid predicted class: " + predictedClass);
252 }
253
254 if (isBegin) {
255 if (!bStartingAll) {
256
257 if (!newClass.equals(state().getType())) {
258 throw new IllegalArgumentException("'O/I/B' mode: "
259 + "B must occur after item of same class, but "
260 + "old class (" + state().getType()
261 + ") and new class (" + newClass + ") differ");
262 }
263 }
264
265
266 result = new CombinationState(newClass, true, false,
267 best.getProbability());
268 } else {
269 if (bStartingAll) {
270
271 if (!newClass.equals(state().getType())) {
272 throw new IllegalArgumentException("'O/B/I' mode: "
273 + "I must occur after item of same class, but"
274 + "old class (" + state().getType()
275 + ") and new class (" + newClass + ") differ");
276 }
277
278
279 result = new CombinationState(newClass, false, false,
280 best.getProbability());
281 } else {
282
283 if (newClass.equals(state().getType())) {
284
285 result = new CombinationState(newClass, false, false,
286 best.getProbability());
287 } else {
288
289 result = new CombinationState(newClass, true, false,
290 best.getProbability());
291 }
292 }
293 }
294 }
295 return result;
296 }
297
298 /***
299 * Returns a string representation of this object.
300 *
301 * @return a textual representation
302 */
303 public String toString() {
304 return new ToStringBuilder(this)
305 .appendSuper(super.toString())
306 .append("B starts all", bStartingAll)
307 .toString();
308 }
309
310 }