1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.Set;
27 import java.util.SortedSet;
28 import java.util.TreeSet;
29
30 import de.fu_berlin.ties.classify.PredictionDistribution;
31
32 /***
33 * A combination strategy using open/close tagging (also called "BIE" tagging
34 * due to the prefixed used). Classes are prefixed with "B-" (first element of
35 * an instance), "I-" (inner element of an instance), "E-" (last element of an
36 * instance), or "BE-" (only element of an instance -- begin and end).
37 * "A" is used for anything else/outside of any class.
38 *
39 * @author Christian Siefkes
40 * @version $Revision: 1.7 $, $Date: 2004/09/15 16:08:45 $, $Author: siefkes $
41 */
42 public class OpenCloseStrategy extends CombinationStrategy {
43
44 /***
45 * The character terminating each prefix.
46 */
47 private static final char PREFIX_TERMINATOR = '-';
48
49 /***
50 * Prefix used to classify the first element of an instance.
51 */
52 private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
53
54 /***
55 * Prefix used to classify inner elements of an instance.
56 */
57 private static final String INNER_PREFIX = "I" + PREFIX_TERMINATOR;
58
59 /***
60 * Prefix used to classify the last element of an instance.
61 */
62 private static final String END_PREFIX = "E" + PREFIX_TERMINATOR;
63
64 /***
65 * Prefix used to classify the only element of an instance (begin and end).
66 */
67 private static final String BEGIN_END_PREFIX = "BE" + PREFIX_TERMINATOR;
68
69 /***
70 * String used to classify outside/other. Actually called "A" so it will be
71 * picked in case of a tie.
72 */
73 private static final String OUTSIDE = "A";
74
75 /***
76 * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
77 * efficiency.
78 */
79 private final TreeSet<String> bClasses = new TreeSet<String>();
80
81 /***
82 * The set of all classes prefixed with {@link #BEGIN_END_PREFIX}, stored
83 * for efficiency.
84 */
85 private final TreeSet<String> beClasses = new TreeSet<String>();
86
87 /***
88 * An immutable set of all classes (Strings) that can possible occur
89 * during classification.
90 */
91 private final SortedSet[] allClasses;
92
93 /***
94 * Creates a new instance.
95 *
96 * @param theClasses a set of valid class names (String)
97 */
98 public OpenCloseStrategy(final Set<String> theClasses) {
99 super(theClasses);
100 final SortedSet<String> allValidClasses = new TreeSet<String>();
101 final Iterator<String> iter = theClasses.iterator();
102 String currentClass;
103
104
105 while (iter.hasNext()) {
106 currentClass = iter.next();
107 bClasses.add(BEGIN_PREFIX + currentClass);
108 beClasses.add(BEGIN_END_PREFIX + currentClass);
109 allValidClasses.add(INNER_PREFIX + currentClass);
110 allValidClasses.add(END_PREFIX + currentClass);
111 }
112
113 allValidClasses.addAll(bClasses);
114 allValidClasses.addAll(beClasses);
115 allValidClasses.add(OUTSIDE);
116 allClasses = new SortedSet[] {
117 Collections.unmodifiableSortedSet(allValidClasses)
118 };
119 }
120
121 /***
122 * Builds a set of class names (Strings) to pass to the classifier
123 * to consider for the next decision. This implementation returns the
124 * classes in alphabetic order (using a {@link SortedSet}).
125 *
126 * @return a set of class names
127 */
128 public Set<String>[] activeClasses() {
129 final SortedSet<String> result;
130 final String lastType = state().getType();
131
132 if ((lastType == null) || state().isEnd()) {
133
134 result = new TreeSet<String>(bClasses);
135 result.addAll(beClasses);
136 result.add(OUTSIDE);
137 } else {
138
139 result = new TreeSet<String>();
140 result.add(INNER_PREFIX + lastType);
141 result.add(END_PREFIX + lastType);
142 }
143 return new Set[] {result};
144 }
145
146 /***
147 * {@inheritDoc}
148 */
149 public Set<String>[] allClasses() {
150 return allClasses;
151 }
152
153 /***
154 * {@inheritDoc}
155 */
156 public String[] translateCurrentState(final CombinationState currentState)
157 throws IllegalArgumentException {
158 final String result;
159 final String type = currentState.getType();
160
161 if (type == null) {
162
163 result = OUTSIDE;
164 } else {
165 final String prefix;
166
167 if (currentState.isBegin()) {
168 if (currentState.isEnd()) {
169 prefix = BEGIN_END_PREFIX;
170 } else {
171 prefix = BEGIN_PREFIX;
172 }
173 } else {
174 if (currentState.isEnd()) {
175 prefix = END_PREFIX;
176 } else {
177 prefix = INNER_PREFIX;
178 }
179 }
180 result = prefix + type;
181 }
182 return new String[] {result};
183 }
184
185 /***
186 * {@inheritDoc}
187 */
188 public CombinationState translateResult(
189 final PredictionDistribution[] predictions)
190 throws IllegalArgumentException {
191 final CombinationState result;
192 final String predictedClass = predictions[0].best().getType();
193
194 if (OUTSIDE.equals(predictedClass)) {
195
196 result = CombinationState.OUTSIDE;
197 } else {
198 final int prefixEndPos =
199 predictedClass.indexOf(PREFIX_TERMINATOR);
200
201 if (prefixEndPos >= 0) {
202 final String prefix;
203 final String newClass;
204 final boolean isBegin;
205 final boolean isEnd;
206
207 prefix = predictedClass.substring(0, prefixEndPos + 1);
208 newClass = predictedClass.substring(prefixEndPos + 1);
209
210 if (BEGIN_PREFIX.equals(prefix)) {
211 isBegin = true;
212 isEnd = false;
213 } else if (BEGIN_END_PREFIX.equals(prefix)) {
214 isBegin = true;
215 isEnd = true;
216 } else if (INNER_PREFIX.equals(prefix)) {
217 isBegin = false;
218 isEnd = false;
219 } else if (END_PREFIX.equals(prefix)) {
220 isBegin = false;
221 isEnd = true;
222 } else {
223 throw new IllegalArgumentException(
224 "Prefix " + prefix + " of predicted class "
225 + predictedClass + " is invalid");
226 }
227
228 if (getValidClasses().contains(newClass)) {
229 result = new CombinationState(newClass, isBegin, isEnd);
230 } else {
231 throw new IllegalArgumentException(
232 "Type " + newClass + " of predicted class "
233 + predictedClass + " is invalid");
234 }
235 } else {
236 throw new IllegalArgumentException(
237 "Invalid predicted class (no prefix): " + predictedClass);
238 }
239 }
240 return result;
241 }
242
243 }