1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.Set;
27 import java.util.SortedSet;
28 import java.util.TreeSet;
29
30 import org.apache.commons.lang.builder.ToStringBuilder;
31
32 import de.fu_berlin.ties.classify.PredictionDistribution;
33
34 /***
35 * A combination strategy using inside/outside tagging. Classes are prefixed
36 * with "I-" (in this class) or "B-" (begin of a new instance of this class),
37 * "A" is used for anything else/outside of any class.
38 *
39 * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
40 * supported by this class and will always return <code>false</code>.
41 *
42 * @author Christian Siefkes
43 * @version $Revision: 1.9 $, $Date: 2004/09/15 16:08:45 $, $Author: siefkes $
44 */
45 public class InsideOutsideStrategy extends CombinationStrategy {
46
47 /***
48 * Prefix used to classify the begin of a new instance.
49 */
50 private static final String BEGIN_PREFIX = "B-";
51
52 /***
53 * Prefix used to classify items within instances.
54 */
55 private static final String IN_PREFIX = "I-";
56
57 /***
58 * String used to classify outside/other. Actually called "A" so it will be
59 * picked in case of a tie.
60 */
61 private static final String OUTSIDE = "A";
62
63 /***
64 * Whether the "B-" prefix is used for the first item of each instance
65 * ("O/B/I" mode) or only for the first item of instances immediately
66 * following an instance of the same class ("O/I/B" mode).
67 */
68 private final boolean bStartingAll;
69
70 /***
71 * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
72 * efficiency.
73 */
74 private final TreeSet<String> bClasses;
75
76 /***
77 * The set of all classes prefixed with {@link #IN_PREFIX}, stored for
78 * efficiency.
79 */
80 private final TreeSet<String> iClasses;
81
82 /***
83 * An immutable set of all classes (Strings) that can possible occur
84 * during classification.
85 */
86 private final SortedSet[] allClasses;
87
88 /***
89 * Creates a new instance, using "O/I/B" mode: the "B-" is only used
90 * where required for disambiguation, otherwise the "I-" prefix is used.
91 *
92 * @param theClasses a set of valid class names (String)
93 */
94 public InsideOutsideStrategy(final Set<String> theClasses) {
95 this(theClasses, false);
96 }
97
98 /***
99 * Creates a new instance, using "O/I/B" mode (setting
100 * {@link #isBStartingAll()} to <code>false</code>). In this mode, the "B-"
101 * is only used where required for disambiguation, otherwise the "I-" prefix
102 * is used.
103 *
104 * @param theClasses a set of valid class names (String)
105 * @param bStartingAllClasses if <code>true</code>, the "B-" prefix is used
106 * for the first item of each instance ("O/B/I" mode); otherwise it is used
107 * only for first item of instances immediately following an instance of the
108 * same class ("O/I/B" mode)
109 */
110 public InsideOutsideStrategy(final Set<String> theClasses,
111 final boolean bStartingAllClasses) {
112 super(theClasses);
113 bStartingAll = bStartingAllClasses;
114 bClasses = new TreeSet<String>();
115 iClasses = new TreeSet<String>();
116 final Iterator iter = theClasses.iterator();
117 String currentClass;
118
119
120 while (iter.hasNext()) {
121 currentClass = (String) iter.next();
122 bClasses.add(BEGIN_PREFIX + currentClass);
123 iClasses.add(IN_PREFIX + currentClass);
124 }
125
126
127 final SortedSet<String> allValidClasses = new TreeSet<String>();
128 allValidClasses.addAll(bClasses);
129 allValidClasses.addAll(iClasses);
130 allValidClasses.add(OUTSIDE);
131 allClasses = new SortedSet[] {
132 Collections.unmodifiableSortedSet(allValidClasses)
133 };
134 }
135
136 /***
137 * Builds a set of class names (Strings) to pass to the classifier
138 * to consider for the next decision. This implementation returns the
139 * classes in alphabetic order (using a {@link SortedSet}).
140 *
141 * @return a set of class names
142 */
143 public Set<String>[] activeClasses() {
144 final SortedSet<String> result;
145 if (bStartingAll) {
146
147 result = new TreeSet<String>(bClasses);
148 } else {
149
150 result = new TreeSet<String>(iClasses);
151 }
152
153 if (state().getType() != null) {
154 if (bStartingAll) {
155
156 result.add(IN_PREFIX + state().getType());
157 } else {
158
159 result.add(BEGIN_PREFIX + state().getType());
160 }
161 }
162
163
164 result.add(OUTSIDE);
165 return new Set[] {result};
166 }
167
168 /***
169 * {@inheritDoc}
170 */
171 public Set<String>[] allClasses() {
172 return allClasses;
173 }
174
175 /***
176 * Whether the "B-" prefix is used for the first item of each instance
177 * ("O/B/I" mode) or only for the first item of instances immediately
178 * following an instance of the same class ("O/I/B" mode).
179 *
180 * @return the value of the attribute
181 */
182 public boolean isBStartingAll() {
183 return bStartingAll;
184 }
185
186 /***
187 * {@inheritDoc}
188 */
189 public String[] translateCurrentState(final CombinationState currentState)
190 throws IllegalArgumentException {
191 final String result;
192 final String type = currentState.getType();
193
194 if (type == null) {
195
196 result = OUTSIDE;
197 } else if (!currentState.isBegin()) {
198
199 if (type.equals(state().getType())) {
200 result = IN_PREFIX + type;
201 } else {
202 throw new IllegalArgumentException(
203 "Illegal continuation state: last state "
204 + state().getType() + " differs from current state "
205 + type);
206 }
207 } else if (type.equals(state().getType())) {
208
209 result = BEGIN_PREFIX + type;
210 } else {
211
212 if (bStartingAll) {
213 result = BEGIN_PREFIX + type;
214 } else {
215 result = IN_PREFIX + type;
216 }
217 }
218 return new String[] {result};
219 }
220
221 /***
222 * {@inheritDoc}
223 */
224 public CombinationState translateResult(
225 final PredictionDistribution[] predictions)
226 throws IllegalArgumentException {
227 final CombinationState result;
228 final String predictedClass = predictions[0].best().getType();
229
230 if (OUTSIDE.equals(predictedClass)) {
231
232 result = CombinationState.OUTSIDE;
233 } else {
234 final String newClass;
235 final boolean isBegin;
236
237 if (predictedClass.startsWith(BEGIN_PREFIX)) {
238
239 isBegin = true;
240 newClass = predictedClass.substring(BEGIN_PREFIX.length());
241 } else if (predictedClass.startsWith(IN_PREFIX)) {
242
243 isBegin = false;
244 newClass = predictedClass.substring(IN_PREFIX.length());
245 } else {
246 throw new IllegalArgumentException(
247 "Invalid predicted class: " + predictedClass);
248 }
249
250 if (isBegin) {
251 if (!bStartingAll) {
252
253 if (!newClass.equals(state().getType())) {
254 throw new IllegalArgumentException("'O/I/B' mode: "
255 + "B must occur after item of same class, but"
256 + "old class (" + state().getType()
257 + ") and new class (" + newClass + ") differ");
258 }
259 }
260
261
262 result = new CombinationState(newClass, true, false);
263 } else {
264 if (bStartingAll) {
265
266 if (!newClass.equals(state().getType())) {
267 throw new IllegalArgumentException("'O/B/I' mode: "
268 + "I must occur after item of same class, but"
269 + "old class (" + state().getType()
270 + ") and new class (" + newClass + ") differ");
271 }
272
273
274 result = new CombinationState(newClass, false, false);
275 } else {
276
277 if (newClass.equals(state().getType())) {
278
279 result = new CombinationState(newClass, false, false);
280 } else {
281
282 result = new CombinationState(newClass, true, false);
283 }
284 }
285 }
286 }
287 return result;
288 }
289
290 /***
291 * Returns a string representation of this object.
292 *
293 * @return a textual representation
294 */
295 public String toString() {
296 return new ToStringBuilder(this)
297 .appendSuper(super.toString())
298 .append("B starts all", bStartingAll)
299 .toString();
300 }
301
302 }