View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify.feature;
23  
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.Iterator;
27  import java.util.List;
28  
29  import org.apache.commons.lang.builder.ToStringBuilder;
30  
31  import de.fu_berlin.ties.text.TextTokenizer;
32  import de.fu_berlin.ties.text.TextUtils;
33  
34  /***
35   * A feature vector contains the features representing an instance. Subclasses
36   * must implement the {@link #store()} method to provide a collection for
37   * storing the features.
38   *
39   * <p>Instances of this class are thread-safe if and only if the provided
40   * collection is thread-safe (normally this won't be the case).
41   *
42   * @author Christian Siefkes
43   * @version $Revision: 1.15 $, $Date: 2006/10/21 16:03:57 $, $Author: siefkes $
44   */
45  public abstract class FeatureVector {
46  
47      /***
48       * Recursively follows {@linkplain #getTransformed() transformations} of a
49       * feature vector, returning the last transformation.
50       *
51       * @param features the feature vector to use
52       * @return the last transformation of the given feature vector
53       */
54      public static FeatureVector lastTransformation(
55              final FeatureVector features) {
56          // store all transformation in a list
57          final List<FeatureVector> transformations =
58              new ArrayList<FeatureVector>();
59          FeatureVector myFeatures = features;
60          transformations.add(myFeatures);
61  
62          // recursively follow transformations until reaching null
63          while (myFeatures.getTransformed() != null) {
64              myFeatures = myFeatures.getTransformed();
65              transformations.add(myFeatures);
66          }
67  
68          // return last transformation
69          return transformations.get(transformations.size() - 1);
70      }
71  
72      /***
73       * The sum of the {@linkplain #strength(Feature) strength values}
74       * of all features currently contained in this vector.
75       */
76  //    private double summedStrength = 0.0;
77  
78      /***
79       * A references to a {@linkplain FeatureTransformer transformed}
80       * representation of this feature vector; <code>null</code> if this instance
81       * has never been transformed.
82       */
83      private FeatureVector transformed = null;
84  
85  
86      /***
87       * Creates a new instance.
88       */
89      public FeatureVector() {
90          super();
91      }
92  
93  
94      /***
95       * Adds a feature to this vector.
96       *
97       * @param feature the feature to add
98       */
99      public void add(final Feature feature) {
100 //        preAddHook(feature);
101         store().add(feature);
102 
103         // add feature strength to summed strength
104 //        summedStrength += strength(feature);
105     }
106 
107     /***
108      * Adds all of the features in the specified Collection to this vector,
109      * in the order they are returned by the specified Collection's Iterator.
110      *
111      * @param coll a collection of features to add
112      * @throws ClassCastException if the collection contains elements that are
113      * not {@link Feature}s
114      */
115     public void addAll(final Collection coll) throws ClassCastException {
116         Feature feature;
117         final Iterator iter = coll.iterator();
118 
119         while (iter.hasNext()) {
120             feature = (Feature) iter.next();
121             add(feature);
122         }
123     }
124 
125     /***
126      * Adds all of the features in the specified feature vector to this vector,
127      * in the order they are returned by the specified feature vector.
128      *
129      * @param fv the vector of features to add
130      */
131     public void addAll(final FeatureVector fv) {
132         addAll(fv.store());
133     }
134 
135     /***
136      * Tokenizes a text, creating and adding a feature for each token.
137      *
138      * @param text the text to tokenize
139      * @param tokenizer the tokenizer to use
140      */
141     public void addAllTokens(final CharSequence text,
142                              final TextTokenizer tokenizer) {
143         tokenizer.reset(text);
144         String token;
145 
146         while ((token = tokenizer.nextToken()) != null) {
147             add(new DefaultFeature(token));
148         }
149     }
150 
151     /***
152      * Returns a sum of the {@linkplain #strength(Feature) strength values}
153      * of all features contained in this vector.
154      *
155      * @return the summed strength of all features
156      */
157 /*    public double getSummedStrength() {
158         return summedStrength;
159     } */
160 
161     /***
162      * Returns a reference to a {@linkplain FeatureTransformer transformed}
163      * representation of this feature vector; <code>null</code> if this instance
164      * has never been transformed.
165      *
166      * @return the value of the attribute
167      */
168     public FeatureVector getTransformed() {
169         return transformed;
170     }
171 
172     /***
173      * Flattens the contained features into a single character sequence, without
174      * including comments. Features are separated by newlines.
175      *
176      * @return the resulting character sequence, created by printing calling
177      * {@link Feature#getRepresentation()} on each feature in the vector; each
178      * feature representation is followed by a newline
179      */
180     public CharSequence flatten() {
181         return flatten(false);
182     }
183 
184     /***
185      * Flattens a list of features into a single character sequence. Features
186      * are separated by newlines.
187      *
188      * @param inclComments whether or not to include comments
189      * @return the resulting character sequence, created by printing calling
190      * {@link Feature#getRepresentation()} on each feature in the vector; each
191      * feature representation is followed by a newline
192      */
193     public CharSequence flatten(final boolean inclComments) {
194         final StringBuilder result = new StringBuilder();
195         final Iterator iter = iterator();
196         Feature currentFeature;
197         String currentRep;
198 
199         while (iter.hasNext()) {
200             currentFeature = (Feature) iter.next();
201             if (inclComments) {
202                 currentRep = currentFeature.getFullRepresentation();
203             } else {
204                 currentRep = currentFeature.getRepresentation();
205             }
206 
207             // representation w/o comment might be null
208             if (currentRep != null) {
209                 result.append(currentRep);
210                 result.append(TextUtils.LINE_SEPARATOR);
211             }
212         }
213         return result;
214     }
215 
216     /***
217      * Returns an iterator over the {@link Feature}s stored in this vector.
218      *
219      * @return an iterator over the stored features
220      */
221     public Iterator iterator() {
222         return store().iterator();
223     }
224 
225     /***
226      * Invokes the static {@link #lastTransformation(FeatureVector)} method,
227      * passing this instance as argument.
228      *
229      * @return the last transformation of this instance
230      */
231     public FeatureVector lastTransformation() {
232         return FeatureVector.lastTransformation(this);
233     }
234 
235     /***
236      * Empty method that can by implemented by child classes, for example if
237      * they calculate strength values in some special way. This method is
238      * called at the start of each {@link #add(Feature)} operation.
239      *
240      * @param feature the feature to add
241      */
242 /*    protected void preAddHook(final Feature feature) {
243     } */
244 
245     /***
246      * Modifies the summed strength of all features contained in this instance.
247      * This method exists for the convenience of child classes that calculate
248      * strength values in some special way.
249      *
250      * @param newSum the new value of the attribute
251      */
252 /*    protected void setSummedStrength(final double newSum) {
253         summedStrength = newSum;
254     } */
255 
256     /***
257      * Sets a new reference to a {@linkplain FeatureTransformer transformed}
258      * representation of this feature vector.
259      *
260      * @param newTransformed the new value of the attribute
261      */
262     public void setTransformed(final FeatureVector newTransformed) {
263         this.transformed = newTransformed;
264     }
265 
266     /***
267      * Returns the number of features stored in this vector.
268      *
269      * @return the number of features
270      */
271     public int size() {
272         return store().size();
273     }
274 
275     /***
276      * Returns a strength value for a feature contained in this vector.
277      * This implementation simply delegates to {@link Feature#getStrength()}.
278      *
279      * @param feature the feature to consider
280      * @return a strength value for the specified feature
281      */
282 /*    public double strength(final Feature feature) {
283         return feature.getStrength();
284     } */
285 
286     /***
287      * Returns the collection used for storing the features. The properties
288      * of a feature vector depend on the kind of collection that is used.
289      *
290      * @return the collection used for storing the features.
291      */
292     protected abstract Collection<Feature> store();
293 
294     /***
295      * Returns a string representation of this object.
296      *
297      * @return a textual representation
298      */
299     public String toString() {
300         return new ToStringBuilder(this)
301             .append("size", size())
302             .toString();
303     }
304 
305 }