1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.Set;
27 import java.util.SortedSet;
28 import java.util.TreeSet;
29
30 import de.fu_berlin.ties.classify.Prediction;
31 import de.fu_berlin.ties.classify.PredictionComparator;
32 import de.fu_berlin.ties.classify.PredictionDistribution;
33 import de.fu_berlin.ties.util.Util;
34
35 /***
36 * A combination strategy that uses two classifiers, one to recognize the begin
37 * of extractions and one to recognize the end.
38 *
39 * @author Christian Siefkes
40 * @version $Revision: 1.7 $, $Date: 2004/09/15 16:08:45 $, $Author: siefkes $
41 */
42 public class BeginEndStrategy extends CombinationStrategy {
43
44 /***
45 * The character terminating each prefix.
46 */
47 private static final char PREFIX_TERMINATOR = '-';
48
49 /***
50 * Prefix used for the begin classifier.
51 */
52 private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
53
54 /***
55 * Prefix used for the end classifier.
56 */
57 private static final String END_PREFIX = "E" + PREFIX_TERMINATOR;
58
59 /***
60 * String used to classify other (not begin resp. end). Actually called
61 * "A" so it will be picked in case of a tie.
62 */
63 private static final String OTHER = "A";
64
65 /***
66 * The set of all classes for the begin classifier, stored for efficiency.
67 */
68 private final SortedSet<String> beginClasses;
69
70 /***
71 * The set of all classes for the end classifier, stored for efficiency.
72 */
73 private final SortedSet<String> endClasses;
74
75 /***
76 * An immutable set of all classes (Strings) that can possible occur
77 * during classification.
78 */
79 private final SortedSet[] allClasses;
80
81 /***
82 * Creates a new instance.
83 *
84 * @param theClasses a set of valid class names (String)
85 */
86 public BeginEndStrategy(final Set<String> theClasses) {
87 super(theClasses);
88
89 final TreeSet<String> bClasses = new TreeSet<String>();
90 final TreeSet<String> eClasses = new TreeSet<String>();
91 final Iterator iter = theClasses.iterator();
92 String currentClass;
93
94 bClasses.add(OTHER);
95 eClasses.add(OTHER);
96
97
98 while (iter.hasNext()) {
99 currentClass = (String) iter.next();
100 bClasses.add(BEGIN_PREFIX + currentClass);
101 eClasses.add(END_PREFIX + currentClass);
102 }
103
104
105 beginClasses = Collections.unmodifiableSortedSet(bClasses);
106 endClasses = Collections.unmodifiableSortedSet(eClasses);
107 allClasses = new SortedSet[] {beginClasses, endClasses};
108 }
109
110 /***
111 * {@inheritDoc}
112 */
113 public Set<String>[] activeClasses() {
114
115 return allClasses;
116 }
117
118 /***
119 * {@inheritDoc}
120 */
121 public Set<String>[] allClasses() {
122 return allClasses;
123 }
124
125 /***
126 * {@inheritDoc}
127 */
128 protected boolean resetHook() {
129 final boolean result;
130
131 if (!state().isEnd() && (state().getType() != null)) {
132
133 result = true;
134 } else {
135 return false;
136 }
137 return result;
138 }
139
140 /***
141 * {@inheritDoc}
142 */
143 public String[] translateCurrentState(final CombinationState currentState)
144 throws IllegalArgumentException {
145 final String beginResult;
146 final String endResult;
147
148 if (currentState.isBegin()) {
149
150 beginResult = BEGIN_PREFIX + currentState.getType();
151 } else {
152 beginResult = OTHER;
153 }
154
155 if (currentState.isEnd()) {
156
157 endResult = END_PREFIX + currentState.getType();
158 } else {
159 endResult = OTHER;
160 }
161
162 return new String[] {beginResult, endResult};
163 }
164
165 /***
166 * {@inheritDoc}
167 */
168 public CombinationState translateResult(
169 final PredictionDistribution[] predictions)
170 throws IllegalArgumentException {
171 if (predictions.length != 2) {
172 throw new IllegalArgumentException(
173 "Illegal number of classifiers: " + predictions.length
174 + " instead of 2");
175 }
176
177 final CombinationState result;
178 final Prediction beginBest = predictions[0].best();
179 final Prediction endBest = predictions[1].best();
180 final PredictionComparator predComp = new PredictionComparator();
181
182 boolean isBegin = !beginBest.getType().equals(OTHER);
183 boolean isEnd = !endBest.getType().equals(OTHER);
184 final String newClass;
185
186
187 final String lastType = state().getType();
188 final boolean lastEnd = state().isEnd();
189
190 if (isBegin) {
191
192 final String beginClass =
193 beginBest.getType().substring(BEGIN_PREFIX.length());
194
195 if (isEnd) {
196 final String endClass =
197 endBest.getType().substring(END_PREFIX.length());
198
199 if (beginClass.equals(endClass)) {
200
201 newClass = beginClass;
202 } else if (endClass.equals(lastType) && !lastEnd) {
203
204
205 if (predComp.compare(beginBest, endBest) <= 0) {
206
207 newClass = endClass;
208 isBegin = false;
209 Util.LOG.debug("BeginEnd: end classification "
210 + endBest + " wins over begin classification "
211 + beginBest + "due to higher/equal confidence");
212 } else {
213
214 newClass = beginClass;
215 isEnd = false;
216 Util.LOG.debug("BeginEnd: begin classification "
217 + beginBest + " wins over end classification "
218 + endBest + "due to higher confidence");
219 }
220 } else {
221
222 newClass = beginClass;
223
224
225 }
226 } else {
227
228 newClass = beginClass;
229 }
230 } else {
231 if (isEnd) {
232 final String endClass =
233 endBest.getType().substring(END_PREFIX.length());
234
235
236 if (endClass.equals(lastType) && !lastEnd) {
237 newClass = endClass;
238 } else {
239
240 isEnd = false;
241
242
243
244 if (lastEnd) {
245
246 newClass = null;
247 } else {
248
249 newClass = lastType;
250 }
251 }
252 } else {
253
254 if (lastEnd) {
255
256 newClass = null;
257 } else {
258
259 newClass = lastType;
260 }
261 }
262 }
263
264 final boolean discardPrevious;
265
266 if (isBegin && !lastEnd && (lastType != null)
267 && !lastType.equals(newClass)) {
268
269
270 discardPrevious = true;
271 } else {
272 discardPrevious = false;
273 }
274
275 if (newClass != null) {
276 if (getValidClasses().contains(newClass)) {
277 result = new CombinationState(newClass, isBegin, isEnd,
278 discardPrevious);
279 } else {
280 throw new IllegalArgumentException(
281 "Type " + newClass + " derived from predictions "
282 + beginBest + " and " + endBest + " is invalid");
283 }
284 } else {
285 result = CombinationState.OUTSIDE;
286 }
287
288 return result;
289 }
290
291 }