1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.classify.feature;
23
24 import java.util.Collection;
25 import java.util.Iterator;
26
27 import org.apache.commons.lang.builder.ToStringBuilder;
28
29 import de.fu_berlin.ties.text.TextTokenizer;
30 import de.fu_berlin.ties.text.TextUtils;
31
32 /***
33 * A feature vector contains the features representing an instance. Subclasses
34 * must implement the {@link #store()} method to provide a collection for
35 * storing the features.
36 *
37 * <p>Instances of this class are thread-safe if and only if the provided
38 * collection is thread-safe (normally this won't be the case).
39 *
40 * @author Christian Siefkes
41 * @version $Revision: 1.6 $, $Date: 2004/09/06 17:22:10 $, $Author: siefkes $
42 */
43 public abstract class FeatureVector {
44
45 /***
46 * The sum of the {@linkplain #strength(Feature) strength values}
47 * of all features currently contained in this vector.
48 */
49 private double summedStrength = 0.0;
50
51 /***
52 * Creates a new instance.
53 */
54 public FeatureVector() {
55 super();
56 }
57
58 /***
59 * Adds a feature to this vector.
60 *
61 * @param feature the feature to add
62 */
63 public void add(final Feature feature) {
64 preAddHook(feature);
65 store().add(feature);
66
67
68 summedStrength += strength(feature);
69 }
70
71 /***
72 * Adds all of the features in the specified Collection to this vector,
73 * in the order they are returned by the specified Collection's Iterator.
74 *
75 * @param coll a collection of features to add
76 * @throws ClassCastException if the collection contains elements that are
77 * not {@link Feature}s
78 */
79 public void addAll(final Collection coll) throws ClassCastException {
80 Feature feature;
81 final Iterator iter = coll.iterator();
82
83 while (iter.hasNext()) {
84 feature = (Feature) iter.next();
85 add(feature);
86 }
87 }
88
89 /***
90 * Adds all of the features in the specified feature vector to this vector,
91 * in the order they are returned by the specified feature vector.
92 *
93 * @param fv the vector of features to add
94 */
95 public void addAll(final FeatureVector fv) {
96 addAll(fv.store());
97 }
98
99 /***
100 * Tokenizes a text, creating and adding a feature for each token.
101 *
102 * @param text the text to tokenize
103 * @param tokenizer the tokenizer to use
104 */
105 public void addAllTokens(final CharSequence text,
106 final TextTokenizer tokenizer) {
107 tokenizer.reset(text);
108 String token;
109
110 while ((token = tokenizer.nextToken()) != null) {
111 add(new DefaultFeature(token));
112 }
113 }
114
115 /***
116 * Returns a sum of the {@linkplain #strength(Feature) strength values}
117 * of all features contained in this vector.
118 *
119 * @return the summed strength of all features
120 */
121 public double getSummedStrength() {
122 return summedStrength;
123 }
124
125 /***
126 * Flattens the contained features into a single character sequence, without
127 * including comments. Features are separated by newlines.
128 *
129 * @return the resulting character sequence, created by printing calling
130 * {@link Feature#getRepresentation()} on each feature in the vector; each
131 * feature representation is followed by a newline
132 */
133 public CharSequence flatten() {
134 return flatten(false);
135 }
136
137 /***
138 * Flattens a list of features into a single character sequence. Features
139 * are separated by newlines.
140 *
141 * @param inclComments whether or not to include comments
142 * @return the resulting character sequence, created by printing calling
143 * {@link Feature#getRepresentation()} on each feature in the vector; each
144 * feature representation is followed by a newline
145 */
146 public CharSequence flatten(final boolean inclComments) {
147 final StringBuffer result = new StringBuffer();
148 final Iterator iter = iterator();
149 Feature currentFeature;
150 String currentRep;
151
152 while (iter.hasNext()) {
153 currentFeature = (Feature) iter.next();
154 if (inclComments) {
155 currentRep = currentFeature.getFullRepresentation();
156 } else {
157 currentRep = currentFeature.getRepresentation();
158 }
159
160
161 if (currentRep != null) {
162 result.append(currentRep);
163 result.append(TextUtils.LINE_SEPARATOR);
164 }
165 }
166 return result;
167 }
168
169 /***
170 * Returns an iterator over the {@link Feature}s stored in this vector.
171 *
172 * @return an iterator over the stored features
173 */
174 public Iterator iterator() {
175 return store().iterator();
176 }
177
178 /***
179 * Empty method that can by implemented by child classes, for example if
180 * they calculate strength values in some special way. This method is
181 * called at the start of each {@link #add(Feature)} operation.
182 *
183 * @param feature the feature to add
184 */
185 protected void preAddHook(final Feature feature) {
186 }
187
188 /***
189 * Modifies the summed strength of all features contained in this instance.
190 * This method exists for the convenience of child classes that calculate
191 * strength values in some special way.
192 *
193 * @param newSum the new value of the attribute
194 */
195 protected void setSummedStrength(final double newSum) {
196 summedStrength = newSum;
197 }
198
199 /***
200 * Returns the number of features stored in this vector.
201 *
202 * @return the number of features
203 */
204 public int size() {
205 return store().size();
206 }
207
208 /***
209 * Returns a strength value for a feature contained in this vector.
210 * This implementation simply delegates to {@link Feature#getStrength()}.
211 *
212 * @param feature the feature to consider
213 * @return a strength value for the specified feature
214 */
215 public double strength(final Feature feature) {
216 return feature.getStrength();
217 }
218
219 /***
220 * Returns the collection used for storing the features. The properties
221 * of a feature vector depend on the kind of collection that is used.
222 *
223 * @return the collection used for storing the features.
224 */
225 protected abstract Collection<Feature> store();
226
227 /***
228 * Returns a string representation of this object.
229 *
230 * @return a textual representation
231 */
232 public String toString() {
233 return new ToStringBuilder(this)
234 .append("feature store", store())
235 .toString();
236 }
237
238 }