1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.LinkedHashMap;
27 import java.util.Set;
28 import java.util.SortedSet;
29 import java.util.TreeSet;
30
31 import de.fu_berlin.ties.ContextMap;
32 import de.fu_berlin.ties.ProcessingException;
33 import de.fu_berlin.ties.TiesConfiguration;
34 import de.fu_berlin.ties.classify.Prediction;
35 import de.fu_berlin.ties.classify.PredictionComparator;
36 import de.fu_berlin.ties.classify.PredictionDistribution;
37 import de.fu_berlin.ties.classify.Probability;
38 import de.fu_berlin.ties.extract.amend.BeginEndReextractor;
39 import de.fu_berlin.ties.extract.amend.FinalReextractor;
40 import de.fu_berlin.ties.extract.reestimate.LengthFilter;
41 import de.fu_berlin.ties.extract.reestimate.Reestimator;
42 import de.fu_berlin.ties.text.TokenDetails;
43 import de.fu_berlin.ties.util.Util;
44
45 /***
46 * A combination strategy that uses two classifiers, one to recognize the begin
47 * of extractions and one to recognize the end.
48 *
49 * @author Christian Siefkes
50 * @version $Revision: 1.25 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
51 */
52 public class BeginEndStrategy extends CombinationStrategy {
53
54 /***
55 * The character terminating each prefix.
56 */
57 private static final char PREFIX_TERMINATOR = '-';
58
59 /***
60 * Prefix used for the begin classifier.
61 */
62 private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
63
64 /***
65 * Prefix used for the end classifier.
66 */
67 private static final String END_PREFIX = "E" + PREFIX_TERMINATOR;
68
69 /***
70 * String used to classify other (not begin resp. end). Actually called
71 * "A" so it will be picked in case of a tie.
72 */
73 private static final String OTHER = "A";
74
75
76 /***
77 * The set of all classes for the begin classifier, stored for efficiency.
78 */
79 private final SortedSet<String> beginClasses;
80
81 /***
82 * The set of all classes for the end classifier, stored for efficiency.
83 */
84 private final SortedSet<String> endClasses;
85
86 /***
87 * An immutable set of all classes (Strings) that can possible occur
88 * during classification.
89 */
90 private final SortedSet[] allClasses;
91
92 /***
93 * Used to configure this instance.
94 */
95 private final TiesConfiguration config;
96
97 /***
98 * Whether to use a {@link BeginEndReextractor} as second level.
99 */
100 private final boolean reextract;
101
102 /***
103 * Used to notify the {@link BeginEndReextractor} (if used) of positive
104 * begin predictions for each token index.
105 */
106 private BeginEndReextractor.PositivePredictionsMap beginMap = null;
107
108 /***
109 * Used to notify the {@link BeginEndReextractor} (if used) of positive
110 * end predictions for each token index.
111 */
112 private BeginEndReextractor.PositivePredictionsMap endMap = null;
113
114
115 /***
116 * Creates a new instance.
117 *
118 * @param theClasses a set of valid class names (String)
119 * @param conf used to configure this instance
120 */
121 public BeginEndStrategy(final Set<String> theClasses,
122 final TiesConfiguration conf) {
123 super(theClasses);
124
125
126 config = conf;
127 reextract = conf.getBoolean("combination.begin-end.level2");
128
129 if (reextract) {
130
131 beginMap = new BeginEndReextractor.PositivePredictionsMap();
132 endMap = new BeginEndReextractor.PositivePredictionsMap();
133 }
134
135 final TreeSet<String> bClasses = new TreeSet<String>();
136 final TreeSet<String> eClasses = new TreeSet<String>();
137 final Iterator iter = theClasses.iterator();
138 String currentClass;
139
140 bClasses.add(OTHER);
141 eClasses.add(OTHER);
142
143
144 while (iter.hasNext()) {
145 currentClass = (String) iter.next();
146 bClasses.add(BEGIN_PREFIX + currentClass);
147 eClasses.add(END_PREFIX + currentClass);
148 }
149
150
151 beginClasses = Collections.unmodifiableSortedSet(bClasses);
152 endClasses = Collections.unmodifiableSortedSet(eClasses);
153 allClasses = new SortedSet[] {beginClasses, endClasses};
154 }
155
156
157 /***
158 * {@inheritDoc}
159 */
160 public Set[] activeClasses() {
161
162 return allClasses;
163 }
164
165 /***
166 * {@inheritDoc}
167 */
168 public Set[] allClasses() {
169 return allClasses;
170 }
171
172 /***
173 * {@inheritDoc}
174 */
175 public ContextMap contextForReextractor() {
176 final ContextMap context;
177
178 if (reextract) {
179
180 context = new ContextMap();
181 context.put(BeginEndReextractor.CONTEXT_BEGIN_MAP, beginMap);
182 context.put(BeginEndReextractor.CONTEXT_END_MAP, endMap);
183 beginMap = new BeginEndReextractor.PositivePredictionsMap();
184 endMap = new BeginEndReextractor.PositivePredictionsMap();
185 } else {
186 context = null;
187 }
188
189 return context;
190 }
191
192 /***
193 * {@inheritDoc} This implementation returns a {@link BeginEndReextractor},
194 * if configured. In this case the re-estimator chain <strong>must</strong>
195 * contain a {@link LengthFilter}, otherwise this method will throw an
196 * {@link IllegalArgumentException}.
197 */
198 public FinalReextractor initReextractor(final Reestimator reestimatorChain)
199 throws ProcessingException {
200
201 if (reextract) {
202
203 Reestimator estimator = reestimatorChain;
204 LengthFilter lengthFilter = null;
205
206 while ((estimator != null) && (lengthFilter == null)) {
207 if (estimator instanceof LengthFilter) {
208
209 lengthFilter = (LengthFilter) estimator;
210 } else {
211
212 estimator = estimator.getPrecedingReestimator();
213 }
214 }
215
216 if (lengthFilter != null) {
217 return new BeginEndReextractor(getValidClasses(), config,
218 lengthFilter);
219 } else {
220 throw new IllegalArgumentException("No LengthFilter found in "
221 + "re-estimator chain -- cannot init BeginEndReextractor");
222 }
223 } else {
224 return null;
225 }
226 }
227
228 /***
229 * {@inheritDoc}
230 */
231 protected boolean resetHook() {
232 final boolean result;
233
234 if (!state().isEnd() && (state().getType() != null)) {
235
236 result = true;
237 } else {
238 return false;
239 }
240 return result;
241 }
242
243 /***
244 * Helper method that stores the positive (more likely than {@link #OTHER})
245 * predictions of a classifier in a map for communication with the
246 * {@link BeginEndReextractor}. This method should only be called if there
247 * is at least one positive prediction
248 *
249 * @param predIter an iterator over the predictions to store
250 * @param map the map to add to
251 * @param prefix a prefix to strip from the type of predictions
252 * @param tokenIndex the index of the current token
253 * @throws IllegalStateException if there are no positive predictions, i.e.
254 * if {@link #OTHER} is the most likely prediction
255 */
256 private void storePositivePredictions(final Iterator<Prediction> predIter,
257 final BeginEndReextractor.PositivePredictionsMap map,
258 final String prefix, final int tokenIndex)
259 throws IllegalStateException {
260
261 final LinkedHashMap<String, Prediction> bestMap =
262 new LinkedHashMap<String, Prediction>();
263 boolean done = false;
264 Prediction currentPred;
265 String currentType;
266
267 currentPred = predIter.next();
268 currentType = currentPred.getType();
269
270 while (predIter.hasNext() && !done) {
271 currentPred = predIter.next();
272 currentType = currentPred.getType();
273
274 if (currentType.equals(OTHER)) {
275
276 done = true;
277 } else {
278
279 bestMap.put(currentType.substring(prefix.length()),
280 currentPred);
281 }
282 }
283
284 if (!bestMap.isEmpty()) {
285
286 map.put(tokenIndex, bestMap);
287 } else {
288
289 throw new IllegalStateException("No positive predictions");
290 }
291 }
292
293 /***
294 * {@inheritDoc}
295 */
296 public String[] translateCurrentState(final CombinationState currentState)
297 throws IllegalArgumentException {
298 final String beginResult;
299 final String endResult;
300
301 if (currentState.isBegin()) {
302
303 beginResult = BEGIN_PREFIX + currentState.getType();
304 } else {
305 beginResult = OTHER;
306 }
307
308 if (currentState.isEnd()) {
309
310 endResult = END_PREFIX + currentState.getType();
311 } else {
312 endResult = OTHER;
313 }
314
315 return new String[] {beginResult, endResult};
316 }
317
318 /***
319 * {@inheritDoc}
320 */
321 public CombinationState translateResult(
322 final PredictionDistribution[] predictions,
323 final TokenDetails details)
324 throws IllegalArgumentException {
325 if (predictions.length != 2) {
326 throw new IllegalArgumentException(
327 "Illegal number of classifiers: " + predictions.length
328 + " instead of 2");
329 }
330
331 final CombinationState result;
332 final Probability prob;
333 final Prediction beginBest = predictions[0].best();
334 final Prediction endBest = predictions[1].best();
335 final PredictionComparator predComp = new PredictionComparator();
336
337 boolean isBegin = !beginBest.getType().equals(OTHER);
338 boolean isEnd = !endBest.getType().equals(OTHER);
339
340
341 final String lastType = state().getType();
342 final boolean lastEnd = state().isEnd();
343 final String newClass;
344
345 if (isBegin) {
346
347 final String beginClass =
348 beginBest.getType().substring(BEGIN_PREFIX.length());
349
350 if (isEnd) {
351 final String endClass =
352 endBest.getType().substring(END_PREFIX.length());
353
354 if (beginClass.equals(endClass)) {
355
356 newClass = beginClass;
357
358
359 final Prediction pred = new Prediction(null,
360 beginBest.getProbability());
361 pred.addProb(endBest.getProbability(), true);
362 prob = pred.getProbability();
363 } else if (endClass.equals(lastType) && !lastEnd) {
364
365
366 if (predComp.compare(beginBest, endBest) <= 0) {
367
368 newClass = endClass;
369 isBegin = false;
370 Util.LOG.debug("BeginEnd: end classification "
371 + endBest + " wins over begin classification "
372 + beginBest + "due to higher/equal confidence");
373
374
375 prob = endBest.getProbability();
376 } else {
377
378 newClass = beginClass;
379 isEnd = false;
380 Util.LOG.debug("BeginEnd: begin classification "
381 + beginBest + " wins over end classification "
382 + endBest + "due to higher confidence");
383
384
385 prob = beginBest.getProbability();
386 }
387 } else {
388
389 newClass = beginClass;
390
391
392 prob = beginBest.getProbability();
393 }
394 } else {
395
396 newClass = beginClass;
397
398
399 prob = beginBest.getProbability();
400 }
401 } else {
402 if (isEnd) {
403 final String endClass =
404 endBest.getType().substring(END_PREFIX.length());
405
406
407 if (endClass.equals(lastType) && !lastEnd) {
408 newClass = endClass;
409
410
411 prob = endBest.getProbability();
412 } else {
413
414 isEnd = false;
415
416
417
418 if (lastEnd) {
419
420 newClass = null;
421 } else {
422
423 newClass = lastType;
424 }
425
426
427 prob = null;
428 }
429 } else {
430
431 if (lastEnd) {
432
433 newClass = null;
434 } else {
435
436 newClass = lastType;
437 }
438
439
440 prob = null;
441 }
442 }
443
444 final boolean discardPrevious;
445
446 if (isBegin && !lastEnd && (lastType != null)
447 && !lastType.equals(newClass)) {
448
449
450 discardPrevious = true;
451 } else {
452 discardPrevious = false;
453 }
454
455 if (newClass != null) {
456 if (getValidClasses().contains(newClass)) {
457 result = new CombinationState(newClass, isBegin, isEnd, prob,
458 discardPrevious);
459 } else {
460 throw new IllegalArgumentException(
461 "Type " + newClass + " derived from predictions "
462 + beginBest + " and " + endBest + " is invalid");
463 }
464 } else {
465 result = CombinationState.OUTSIDE;
466 }
467
468 return result;
469 }
470
471 /***
472 * {@inheritDoc}
473 */
474 public void updateState(final CombinationState newState,
475 final PredictionDistribution[] predictions,
476 final TokenDetails details) throws IllegalArgumentException {
477
478 super.updateState(newState, predictions, details);
479
480
481 if (reextract) {
482
483
484 if (predictions[0] != null) {
485 final Prediction beginBest = predictions[0].best();
486 final boolean isBegin = !beginBest.getType().equals(OTHER);
487
488 if (isBegin) {
489 storePositivePredictions(predictions[0].iterator(),
490 beginMap, BEGIN_PREFIX, details.getIndex());
491 }
492 }
493
494 if (predictions[1] != null) {
495 final Prediction endBest = predictions[1].best();
496 final boolean isEnd = !endBest.getType().equals(OTHER);
497
498 if (isEnd) {
499 storePositivePredictions(predictions[1].iterator(), endMap,
500 END_PREFIX, details.getIndex());
501 }
502 }
503
504 }
505 }
506
507 }