1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.classify.feature;
23
24 import java.util.ArrayList;
25 import java.util.Collection;
26 import java.util.Iterator;
27 import java.util.List;
28
29 import org.apache.commons.lang.builder.ToStringBuilder;
30
31 import de.fu_berlin.ties.text.TextTokenizer;
32 import de.fu_berlin.ties.text.TextUtils;
33
34 /***
35 * A feature vector contains the features representing an instance. Subclasses
36 * must implement the {@link #store()} method to provide a collection for
37 * storing the features.
38 *
39 * <p>Instances of this class are thread-safe if and only if the provided
40 * collection is thread-safe (normally this won't be the case).
41 *
42 * @author Christian Siefkes
43 * @version $Revision: 1.15 $, $Date: 2006/10/21 16:03:57 $, $Author: siefkes $
44 */
45 public abstract class FeatureVector {
46
47 /***
48 * Recursively follows {@linkplain #getTransformed() transformations} of a
49 * feature vector, returning the last transformation.
50 *
51 * @param features the feature vector to use
52 * @return the last transformation of the given feature vector
53 */
54 public static FeatureVector lastTransformation(
55 final FeatureVector features) {
56
57 final List<FeatureVector> transformations =
58 new ArrayList<FeatureVector>();
59 FeatureVector myFeatures = features;
60 transformations.add(myFeatures);
61
62
63 while (myFeatures.getTransformed() != null) {
64 myFeatures = myFeatures.getTransformed();
65 transformations.add(myFeatures);
66 }
67
68
69 return transformations.get(transformations.size() - 1);
70 }
71
72 /***
73 * The sum of the {@linkplain #strength(Feature) strength values}
74 * of all features currently contained in this vector.
75 */
76
77
78 /***
79 * A references to a {@linkplain FeatureTransformer transformed}
80 * representation of this feature vector; <code>null</code> if this instance
81 * has never been transformed.
82 */
83 private FeatureVector transformed = null;
84
85
86 /***
87 * Creates a new instance.
88 */
89 public FeatureVector() {
90 super();
91 }
92
93
94 /***
95 * Adds a feature to this vector.
96 *
97 * @param feature the feature to add
98 */
99 public void add(final Feature feature) {
100
101 store().add(feature);
102
103
104
105 }
106
107 /***
108 * Adds all of the features in the specified Collection to this vector,
109 * in the order they are returned by the specified Collection's Iterator.
110 *
111 * @param coll a collection of features to add
112 * @throws ClassCastException if the collection contains elements that are
113 * not {@link Feature}s
114 */
115 public void addAll(final Collection coll) throws ClassCastException {
116 Feature feature;
117 final Iterator iter = coll.iterator();
118
119 while (iter.hasNext()) {
120 feature = (Feature) iter.next();
121 add(feature);
122 }
123 }
124
125 /***
126 * Adds all of the features in the specified feature vector to this vector,
127 * in the order they are returned by the specified feature vector.
128 *
129 * @param fv the vector of features to add
130 */
131 public void addAll(final FeatureVector fv) {
132 addAll(fv.store());
133 }
134
135 /***
136 * Tokenizes a text, creating and adding a feature for each token.
137 *
138 * @param text the text to tokenize
139 * @param tokenizer the tokenizer to use
140 */
141 public void addAllTokens(final CharSequence text,
142 final TextTokenizer tokenizer) {
143 tokenizer.reset(text);
144 String token;
145
146 while ((token = tokenizer.nextToken()) != null) {
147 add(new DefaultFeature(token));
148 }
149 }
150
151 /***
152 * Returns a sum of the {@linkplain #strength(Feature) strength values}
153 * of all features contained in this vector.
154 *
155 * @return the summed strength of all features
156 */
157
158
159
160
161 /***
162 * Returns a reference to a {@linkplain FeatureTransformer transformed}
163 * representation of this feature vector; <code>null</code> if this instance
164 * has never been transformed.
165 *
166 * @return the value of the attribute
167 */
168 public FeatureVector getTransformed() {
169 return transformed;
170 }
171
172 /***
173 * Flattens the contained features into a single character sequence, without
174 * including comments. Features are separated by newlines.
175 *
176 * @return the resulting character sequence, created by printing calling
177 * {@link Feature#getRepresentation()} on each feature in the vector; each
178 * feature representation is followed by a newline
179 */
180 public CharSequence flatten() {
181 return flatten(false);
182 }
183
184 /***
185 * Flattens a list of features into a single character sequence. Features
186 * are separated by newlines.
187 *
188 * @param inclComments whether or not to include comments
189 * @return the resulting character sequence, created by printing calling
190 * {@link Feature#getRepresentation()} on each feature in the vector; each
191 * feature representation is followed by a newline
192 */
193 public CharSequence flatten(final boolean inclComments) {
194 final StringBuilder result = new StringBuilder();
195 final Iterator iter = iterator();
196 Feature currentFeature;
197 String currentRep;
198
199 while (iter.hasNext()) {
200 currentFeature = (Feature) iter.next();
201 if (inclComments) {
202 currentRep = currentFeature.getFullRepresentation();
203 } else {
204 currentRep = currentFeature.getRepresentation();
205 }
206
207
208 if (currentRep != null) {
209 result.append(currentRep);
210 result.append(TextUtils.LINE_SEPARATOR);
211 }
212 }
213 return result;
214 }
215
216 /***
217 * Returns an iterator over the {@link Feature}s stored in this vector.
218 *
219 * @return an iterator over the stored features
220 */
221 public Iterator iterator() {
222 return store().iterator();
223 }
224
225 /***
226 * Invokes the static {@link #lastTransformation(FeatureVector)} method,
227 * passing this instance as argument.
228 *
229 * @return the last transformation of this instance
230 */
231 public FeatureVector lastTransformation() {
232 return FeatureVector.lastTransformation(this);
233 }
234
235 /***
236 * Empty method that can by implemented by child classes, for example if
237 * they calculate strength values in some special way. This method is
238 * called at the start of each {@link #add(Feature)} operation.
239 *
240 * @param feature the feature to add
241 */
242
243
244
245 /***
246 * Modifies the summed strength of all features contained in this instance.
247 * This method exists for the convenience of child classes that calculate
248 * strength values in some special way.
249 *
250 * @param newSum the new value of the attribute
251 */
252
253
254
255
256 /***
257 * Sets a new reference to a {@linkplain FeatureTransformer transformed}
258 * representation of this feature vector.
259 *
260 * @param newTransformed the new value of the attribute
261 */
262 public void setTransformed(final FeatureVector newTransformed) {
263 this.transformed = newTransformed;
264 }
265
266 /***
267 * Returns the number of features stored in this vector.
268 *
269 * @return the number of features
270 */
271 public int size() {
272 return store().size();
273 }
274
275 /***
276 * Returns a strength value for a feature contained in this vector.
277 * This implementation simply delegates to {@link Feature#getStrength()}.
278 *
279 * @param feature the feature to consider
280 * @return a strength value for the specified feature
281 */
282
283
284
285
286 /***
287 * Returns the collection used for storing the features. The properties
288 * of a feature vector depend on the kind of collection that is used.
289 *
290 * @return the collection used for storing the features.
291 */
292 protected abstract Collection<Feature> store();
293
294 /***
295 * Returns a string representation of this object.
296 *
297 * @return a textual representation
298 */
299 public String toString() {
300 return new ToStringBuilder(this)
301 .append("size", size())
302 .toString();
303 }
304
305 }