View Javadoc

1   /*
2    * Copyright (C) 2003-2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.context;
23  
24  import java.util.LinkedList;
25  import java.util.List;
26  
27  import org.apache.commons.lang.builder.ToStringBuilder;
28  
29  import de.fu_berlin.ties.classify.feature.Feature;
30  import de.fu_berlin.ties.text.TextUtils;
31  
32  /***
33   * An immutable representation of the local part of a feature, useful to cache
34   * and re-use the parts of feature representation that do not depend on the
35   * relative position of the element to classify. Must be wrapped in a
36   * {@link de.fu_berlin.ties.context.GlobalFeature} for classification.
37   *
38   * @author Christian Siefkes
39   * @version $Revision: 1.10 $, $Date: 2004/09/06 17:22:41 $, $Author: siefkes $
40   */
41  public class LocalFeature extends Feature {
42  
43      /***
44       * Constant for a marker feature that marks the start of an axis.
45       */
46      protected static final LocalFeature MARKER_START = new
47          LocalFeature("", FeatureType.MARKER, "Start", null, "Start of axis");
48  
49      /***
50       * Character opening a quoted value.
51       */
52      protected static final char OPEN = '{';
53  
54      /***
55       * Character closing a quoted value.
56       */
57      protected static final char CLOSE = '}';
58  
59      /***
60       * Factory method creating a feature that wraps an attribute value.
61       * It the <code>featureValue</code> contains whitespace, it is
62       * split and up to <code>splitMaximum</code> different features are created
63       * for the subsequences (discarding initial surplus sequences, if any).
64       *
65       * @param element the name of the element this feature is attached to
66       * @param attributeName the name of the attribute; must not be empty
67       * @param featureValue the textual content to wrap
68       * @param splitMaximum the maximum number of features to generate
69       * @return a list of created feature
70       * @throws IllegalArgumentException if the specified attribute name is
71       * empty (and thus not a valid name)
72       */
73      public static List<LocalFeature> createAttributeFeatures(
74              final String element, final String attributeName,
75              final String featureValue, final int splitMaximum)
76              throws IllegalArgumentException {
77          if (attributeName.length() == 0) {
78              throw new IllegalArgumentException("Attribute name is empty!");
79          }
80          return createFeatures(element, attributeName, featureValue,
81              splitMaximum, true);
82      }
83  
84      /***
85       * Factory method creating a feature that wraps a calculated value.
86       *
87       * @param element the name of the element this feature is attached to
88       * @param calcName the name identifying the calculated value
89       * @param featureValue the calculated value to wrap; should not contain
90       * whitespace (otherwise the classifier will get confesed)
91       * @return the created feature
92       */
93      public static LocalFeature createCalculatedFeature(final String element,
94              final String calcName, final String featureValue) {
95          return new LocalFeature(element, FeatureType.CALCULATED, calcName,
96              featureValue, null);
97      }
98  
99      /***
100      * Factory method creating a feature that wraps a calculated value that
101      * might contain whitespace.
102      * It the <code>featureValue</code> contains whitespace, it is
103      * split and up to <code>splitMaximum</code> different features are created
104      * for the subsequences (discarding initial surplus sequences, if any).
105      *
106      * @param element the name of the element this feature is attached to
107      * @param calcName the name identifying the calculated value
108      * @param featureValue the textual content to wrap
109      * @param splitMaximum the maximum number of features to generate
110      * @return a list of created features
111      * @throws IllegalArgumentException if the specified <code>calcName</code>
112      * is empty (and thus not a valid name)
113      */
114     public static List<LocalFeature> createCalculatedFeatures(
115             final String element, final String calcName,
116             final String featureValue, final int splitMaximum)
117             throws IllegalArgumentException {
118         if (calcName.length() == 0) {
119             throw new IllegalArgumentException("Calculated name is empty!");
120         }
121         return createFeatures(element, calcName, featureValue,
122             splitMaximum, false);
123     }
124 
125     /***
126      * Factory method creating a feature that just represents an element itself.
127      *
128      * @param element the name of the element this feature is attached to
129      * @return the created feature
130      */
131     public static LocalFeature createElementFeature(final String element) {
132         return new LocalFeature(element, FeatureType.ELEMENT, "", null, null);
133     }
134 
135     /***
136      * Helper method creating feature(s) that wrap an attribute or calculated
137      * value or textual content. It the <code>featureValue</code> contains
138      * whitespace, it is split and up to <code>splitMaximum</code> different
139      * features are created for the subsequences (discarding any initial
140      * surplus sequences).
141      *
142      * @param element the name of the element this feature is attached to
143      * @param attributeName the name of the attribute; or the empty string when
144      * wrapping textual content
145      * @param featureValue the attribute value or textual content to wrap
146      * @param splitMaximum the maximum number of features to generate
147      * @return a list of created feature
148      * @param isAttribute whether this is a attribute or calculated feature
149      * (ignored for text features, i.e. when <code>attributeName</code> is
150      * empty)
151      */
152     private static List<LocalFeature> createFeatures(final String element,
153             final String attributeName, final String featureValue,
154             final int splitMaximum, final boolean isAttribute) {
155         final List<LocalFeature> result = new LinkedList<LocalFeature>();
156         final String[] splitValues = TextUtils.splitString(featureValue,
157             splitMaximum);
158 
159         // determine feature type
160         final FeatureType featureType;
161         if (attributeName.length() == 0) {
162             featureType = FeatureType.TEXT;
163         } else if (isAttribute) {
164             featureType = FeatureType.ATTRIBUTE;
165         } else {
166             featureType = FeatureType.CALCULATED;
167         }
168 
169         for (int i = 0; i < splitValues.length; i++) {
170             result.add(new LocalFeature(element, featureType, attributeName,
171                 splitValues[i], null));
172         }
173         return result;
174     }
175 
176     /***
177      * Factory method creating a feature that wraps textual content of an
178      * element. It the <code>featureValue</code> contains whitespace, it is
179      * split and up to <code>splitMaximum</code> different features are created
180      * for the subsequences (discarding initial surplus sequences, if any).
181      *
182      * @param element the name of the element this feature is attached to
183      * @param featureValue the textual content to wrap
184      * @param splitMaximum the maximum number of features to generate
185      * @return a list of created feature
186      */
187     public static List<LocalFeature> createTextFeatures(final String element,
188             final String featureValue, final int splitMaximum) {
189         return createFeatures(element, "", featureValue,
190             splitMaximum, false);
191     }
192 
193     /***
194      * Quotes a value text (by enclosing it between {@link #OPEN} and
195      * {@link #CLOSE} character).
196      *
197      * @param text the text to quote (might be <code>null</code>)
198      * @return the quoted value, or the empty string if <code>text</code> is
199      * <code>null</code>
200      */
201     public static String quote(final String text) {
202         final String result;
203         if (text != null) {
204             result = OPEN + text + CLOSE;
205         } else {
206             result = "";
207         }
208         return result;
209     }
210 
211     /***
212      * The name of the element this feature is attached to.
213      */
214     private final String elementName;
215 
216     /***
217      * The type of this feature.
218      */
219     private final FeatureType type;
220 
221     /***
222      * The name of this feature (empty for textual features).
223      */
224     private final String name;
225 
226     /***
227      * The value wrapped by this feature.
228      */
229     private final String value;
230 
231     /***
232      * The representation of this feature, calculated by combining the other
233      * fields. Stored for efficiency reasons.
234      */
235     private final String representation;
236 
237     /***
238      * Non-public constructor. Use the factory methods to create new
239      * instances; or use one of the predefined marker constants.
240      *
241      * @param element the name of the element this feature is attached to
242      * (should be empty for marker features)
243      * @param myType the type of this feature
244      * @param myName the name of this feature (should be empty for textual
245      * or element features)
246      * @param myValue the value wrapped by this feature (might be
247      * <code>null</code> for marker or element features)
248      * @param myComment a comment on this feature, ignored for classification;
249      * might be <code>null</code>
250      */
251     protected LocalFeature(final String element, final FeatureType myType,
252             final String myName, final String myValue, final String myComment) {
253         super(myComment);
254         elementName = element;
255         type = myType;
256         name = myName;
257         value = myValue;
258         representation = elementName + type.getMark() + name + quote(value);
259     }
260 
261     /***
262      * Returns the name of the element this feature is attached to.
263      *
264      * @return the name of the element
265      */
266     public String getElementName() {
267         return elementName;
268     }
269 
270     /***
271      * Returns the name of this feature.
272      *
273      * @return the name of this feature (the empty string if this is a textual
274      * feature)
275      */
276     public String getName() {
277         return name;
278     }
279 
280     /***
281      * Returns the representation of this feature, to be used for
282      * classification.
283      *
284      * @return the feature representation, or <code>null</code> if this feature
285      * contains only a comment
286      */
287     public String getRepresentation() {
288         return representation;
289     }
290 
291     /***
292      * Returns the type of this feature.
293      *
294      * @return the type
295      */
296     public FeatureType getType() {
297         return type;
298     }
299 
300     /***
301      * Returns the value wrapped by this feature.
302      *
303      * @return the value
304      */
305     public String getValue() {
306         return value;
307     }
308 
309     /***
310      * Returns a string representation of this object.
311      *
312      * @return a textual representation combining all relevant fields
313      */
314     public String toString() {
315         return new ToStringBuilder(this)
316             .appendSuper(super.toString())
317             .append("representation", representation)
318             .toString();
319     }
320 
321 }