1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.Set;
27 import java.util.SortedSet;
28 import java.util.TreeSet;
29
30 import de.fu_berlin.ties.classify.Prediction;
31 import de.fu_berlin.ties.classify.PredictionDistribution;
32 import de.fu_berlin.ties.text.TokenDetails;
33
34 /***
35 * A combination strategy using begin/after tagging (also called "BIA" tagging
36 * due to the prefixed used). Classes are prefixed with "B-" (first element of
37 * an instance), "I-" (inner element of an instance), "A-" (first element
38 * <em>after</em> an instance that is not part of an other instance).
39 * "A" is used for anything else/outside of any class (except where the "A-"
40 * prefix is used).
41 *
42 * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
43 * supported by this class and will always return <code>false</code>.
44 *
45 * @author Christian Siefkes
46 * @version $Revision: 1.15 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
47 */
48 public class BeginAfterStrategy extends CombinationStrategy {
49
50 /***
51 * The character terminating each prefix.
52 */
53 private static final char PREFIX_TERMINATOR = '-';
54
55 /***
56 * Prefix used to classify the first element of an instance.
57 */
58 private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
59
60 /***
61 * Prefix used to classify elements within instances.
62 */
63 private static final String IN_PREFIX = "I" + PREFIX_TERMINATOR;
64
65 /***
66 * Prefix used to classify the first element <em>after</em> an instance
67 * that is not part of an other instance.
68 */
69 private static final String AFTER_PREFIX = "A" + PREFIX_TERMINATOR;
70
71 /***
72 * String used to classify outside/other (except where "A-" is used).
73 * Actually called "A" so it will be picked in case of a tie ("A" is
74 * sorted before "A-" etc.).
75 */
76 private static final String OUTSIDE = "A";
77
78 /***
79 * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
80 * efficiency.
81 */
82 private final TreeSet<String> bClasses = new TreeSet<String>();
83
84 /***
85 * An immutable set of all classes (Strings) that can possible occur
86 * during classification.
87 */
88 private final SortedSet[] allClasses;
89
90 /***
91 * Creates a new instance.
92 *
93 * @param theClasses a set of valid class names (String)
94 */
95 public BeginAfterStrategy(final Set<String> theClasses) {
96 super(theClasses);
97 final SortedSet<String> allValidClasses = new TreeSet<String>();
98 final Iterator<String> iter = theClasses.iterator();
99 String currentClass;
100
101
102 while (iter.hasNext()) {
103 currentClass = iter.next();
104 bClasses.add(BEGIN_PREFIX + currentClass);
105 allValidClasses.add(IN_PREFIX + currentClass);
106 allValidClasses.add(AFTER_PREFIX + currentClass);
107 }
108
109 allValidClasses.addAll(bClasses);
110 allValidClasses.add(OUTSIDE);
111 allClasses = new SortedSet[] {
112 Collections.unmodifiableSortedSet(allValidClasses)
113 };
114 }
115
116 /***
117 * Builds a set of class names (Strings) to pass to the classifier
118 * to consider for the next decision. This implementation returns the
119 * classes in alphabetic order (using a {@link SortedSet}).
120 *
121 * @return a set of class names
122 */
123 public Set[] activeClasses() {
124
125 final SortedSet<String> result = new TreeSet<String>(bClasses);
126 final String lastType = state().getType();
127
128 if ((lastType == null)) {
129
130 result.add(OUTSIDE);
131 } else {
132
133 result.add(IN_PREFIX + lastType);
134 result.add(AFTER_PREFIX + lastType);
135 }
136 return new Set[] {result};
137 }
138
139 /***
140 * {@inheritDoc}
141 */
142 public Set[] allClasses() {
143 return allClasses;
144 }
145
146 /***
147 * {@inheritDoc}
148 */
149 public String[] translateCurrentState(final CombinationState currentState)
150 throws IllegalArgumentException {
151 final String result;
152 final String type = currentState.getType();
153 final String lastType = state().getType();
154
155 if (type == null) {
156 if (lastType != null) {
157
158 result = AFTER_PREFIX + lastType;
159 } else {
160
161 result = OUTSIDE;
162 }
163 } else {
164 if (currentState.isBegin()) {
165 result = BEGIN_PREFIX + type;
166 } else {
167 result = IN_PREFIX + type;
168 }
169 }
170 return new String[] {result};
171 }
172
173 /***
174 * {@inheritDoc}
175 */
176 public CombinationState translateResult(
177 final PredictionDistribution[] predictions,
178 final TokenDetails details)
179 throws IllegalArgumentException {
180 final CombinationState result;
181 final Prediction best = predictions[0].best();
182 final String predictedClass = best.getType();
183
184 if (OUTSIDE.equals(predictedClass)) {
185
186 result = CombinationState.OUTSIDE;
187 } else {
188 final int prefixEndPos =
189 predictedClass.indexOf(PREFIX_TERMINATOR);
190
191 if (prefixEndPos >= 0) {
192 final String prefix;
193 final String newClass;
194
195 prefix = predictedClass.substring(0, prefixEndPos + 1);
196 newClass = predictedClass.substring(prefixEndPos + 1);
197
198 if (BEGIN_PREFIX.equals(prefix)) {
199 result = new CombinationState(newClass, true, false,
200 best.getProbability());
201 } else if (IN_PREFIX.equals(prefix)) {
202 result = new CombinationState(newClass, false, false,
203 best.getProbability());
204 } else if (AFTER_PREFIX.equals(prefix)) {
205
206 result = CombinationState.OUTSIDE;
207 } else {
208 throw new IllegalArgumentException(
209 "Prefix " + prefix + " of predicted class "
210 + predictedClass + " is invalid");
211 }
212 } else {
213 throw new IllegalArgumentException(
214 "Invalid predicted class (no prefix): " + predictedClass);
215 }
216 }
217 return result;
218 }
219
220 }