1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.combi;
23
24 import java.util.Collections;
25 import java.util.Iterator;
26 import java.util.Set;
27 import java.util.SortedSet;
28 import java.util.TreeSet;
29
30 import de.fu_berlin.ties.classify.PredictionDistribution;
31
32 /***
33 * A combination strategy using begin/after tagging (also called "BIA" tagging
34 * due to the prefixed used). Classes are prefixed with "B-" (first element of
35 * an instance), "I-" (inner element of an instance), "A-" (first element
36 * <em>after</em> an instance that is not part of an other instance).
37 * "A" is used for anything else/outside of any class (except where the "A-"
38 * prefix is used).
39 *
40 * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
41 * supported by this class and will always return <code>false</code>.
42 *
43 * @author Christian Siefkes
44 * @version $Revision: 1.9 $, $Date: 2004/09/16 11:12:33 $, $Author: siefkes $
45 */
46 public class BeginAfterStrategy extends CombinationStrategy {
47
48 /***
49 * The character terminating each prefix.
50 */
51 private static final char PREFIX_TERMINATOR = '-';
52
53 /***
54 * Prefix used to classify the first element of an instance.
55 */
56 private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
57
58 /***
59 * Prefix used to classify elements within instances.
60 */
61 private static final String IN_PREFIX = "I" + PREFIX_TERMINATOR;
62
63 /***
64 * Prefix used to classify the first element <em>after</em> an instance
65 * that is not part of an other instance.
66 */
67 private static final String AFTER_PREFIX = "A" + PREFIX_TERMINATOR;
68
69 /***
70 * String used to classify outside/other (except where "A-" is used).
71 * Actually called "A" so it will be picked in case of a tie ("A" is
72 * sorted before "A-" etc.).
73 */
74 private static final String OUTSIDE = "A";
75
76 /***
77 * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
78 * efficiency.
79 */
80 private final TreeSet<String> bClasses = new TreeSet<String>();
81
82 /***
83 * An immutable set of all classes (Strings) that can possible occur
84 * during classification.
85 */
86 private final SortedSet[] allClasses;
87
88 /***
89 * Creates a new instance.
90 *
91 * @param theClasses a set of valid class names (String)
92 */
93 public BeginAfterStrategy(final Set<String> theClasses) {
94 super(theClasses);
95 final SortedSet<String> allValidClasses = new TreeSet<String>();
96 final Iterator<String> iter = theClasses.iterator();
97 String currentClass;
98
99
100 while (iter.hasNext()) {
101 currentClass = iter.next();
102 bClasses.add(BEGIN_PREFIX + currentClass);
103 allValidClasses.add(IN_PREFIX + currentClass);
104 allValidClasses.add(AFTER_PREFIX + currentClass);
105 }
106
107 allValidClasses.addAll(bClasses);
108 allValidClasses.add(OUTSIDE);
109 allClasses = new SortedSet[] {
110 Collections.unmodifiableSortedSet(allValidClasses)
111 };
112 }
113
114 /***
115 * Builds a set of class names (Strings) to pass to the classifier
116 * to consider for the next decision. This implementation returns the
117 * classes in alphabetic order (using a {@link SortedSet}).
118 *
119 * @return a set of class names
120 */
121 public Set<String>[] activeClasses() {
122
123 final SortedSet<String> result = new TreeSet<String>(bClasses);
124 final String lastType = state().getType();
125
126 if ((lastType == null)) {
127
128 result.add(OUTSIDE);
129 } else {
130
131 result.add(IN_PREFIX + lastType);
132 result.add(AFTER_PREFIX + lastType);
133 }
134 return new Set[] {result};
135 }
136
137 /***
138 * {@inheritDoc}
139 */
140 public Set<String>[] allClasses() {
141 return allClasses;
142 }
143
144 /***
145 * {@inheritDoc}
146 */
147 public String[] translateCurrentState(final CombinationState currentState)
148 throws IllegalArgumentException {
149 final String result;
150 final String type = currentState.getType();
151 final String lastType = state().getType();
152
153 if (type == null) {
154 if (lastType != null) {
155
156 result = AFTER_PREFIX + lastType;
157 } else {
158
159 result = OUTSIDE;
160 }
161 } else {
162 if (currentState.isBegin()) {
163 result = BEGIN_PREFIX + type;
164 } else {
165 result = IN_PREFIX + type;
166 }
167 }
168 return new String[] {result};
169 }
170
171 /***
172 * {@inheritDoc}
173 */
174 public CombinationState translateResult(
175 final PredictionDistribution[] predictions)
176 throws IllegalArgumentException {
177 final CombinationState result;
178 final String predictedClass = predictions[0].best().getType();
179
180 if (OUTSIDE.equals(predictedClass)) {
181
182 result = CombinationState.OUTSIDE;
183 } else {
184 final int prefixEndPos =
185 predictedClass.indexOf(PREFIX_TERMINATOR);
186
187 if (prefixEndPos >= 0) {
188 final String prefix;
189 final String newClass;
190
191 prefix = predictedClass.substring(0, prefixEndPos + 1);
192 newClass = predictedClass.substring(prefixEndPos + 1);
193
194 if (BEGIN_PREFIX.equals(prefix)) {
195 result = new CombinationState(newClass, true, false);
196 } else if (IN_PREFIX.equals(prefix)) {
197 result = new CombinationState(newClass, false, false);
198 } else if (AFTER_PREFIX.equals(prefix)) {
199
200 result = CombinationState.OUTSIDE;
201 } else {
202 throw new IllegalArgumentException(
203 "Prefix " + prefix + " of predicted class "
204 + predictedClass + " is invalid");
205 }
206 } else {
207 throw new IllegalArgumentException(
208 "Invalid predicted class (no prefix): " + predictedClass);
209 }
210 }
211 return result;
212 }
213
214 }