View Javadoc

1   /*
2    * Copyright (C) 2003-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify.feature;
23  
24  import org.apache.commons.lang.builder.ToStringBuilder;
25  
26  import de.fu_berlin.ties.util.Util;
27  
28  /***
29   * An abstract base class for immutable feature representation used for
30   * classification.
31   *
32   * @author Christian Siefkes
33   * @version $Revision: 1.14 $, $Date: 2006/10/21 16:03:57 $, $Author: siefkes $
34   */
35  public abstract class Feature {
36  
37      /***
38       * An optional comment on this feature, ignored for classification.
39       */
40      private final String comment;
41  
42      /***
43       * Caches the compact rep. for this feature.
44       */
45      private Long compact = null; // lazy initialization
46  
47      /***
48       * An array of objects that can be used by a user to store additional
49       * information about this feature; not used by default.
50       */
51      private Object[] userData;
52  
53      /***
54       * Creates a new instance, without storing a comment.
55       */
56      public Feature() {
57          this(null);
58      }
59  
60      /***
61       * Creates a new instance.
62       *
63       * @param myComment a comment on this feature, ignored for classification;
64       * might be <code>null</code>
65       */
66      public Feature(final String myComment) {
67          super();
68          comment = myComment;
69      }
70  
71      /***
72       * Returns a compact representation for this object. This implementation
73       * delegates to {@link Util#longHash(String)}.
74       *
75       * @return a long hash code value representing this object
76       */
77      public Long compactRepresentation() {
78          Long cr = compact;
79  
80          if (cr == null) {
81              // calculate + cache value (long String hash)
82              String rep = getRepresentation();
83  
84              if (rep == null) {
85                  // also consider comments if actual representation is missing
86                  rep = getFullRepresentation();
87              }
88  
89              cr = Long.valueOf(Util.longHash(rep));
90              compact = cr;
91          }
92  
93          return cr;
94      }
95  
96      /***
97       * Indicates whether some other object is "equal to" this one, fulfulling
98       * the {@link Object#equals(java.lang.Object)} contract. To be considered
99       * equal, the object must be a {@link Feature}. If the
100      * {@linkplain #getRepresentation() representation} of this instance is not
101      * <code>null</code>, the representations of the two features are compared.
102      * Otherwise the {@linkplain #getFullRepresentation() full representations}
103      * are compared. Thus for normal features comments and other fields will
104      * be ignored. For comment-only features, comments will be considered,
105      * other fields will be ignored.
106      *
107      * @param obj the reference object with which to compare
108      * @return <code>true</code> iff the specified object is a
109      * {@link Feature} equal to this instance
110      */
111     public boolean equals(final Object obj) {
112         if (obj == this) {
113             return true;
114         } else if (obj instanceof Feature) {
115             final Feature other = (Feature) obj;
116             final String myRep = getRepresentation();
117             if (myRep != null) {
118                 // compare representations if present
119                 return myRep.equals(other.getRepresentation());
120             } else {
121                 // otherwise compare full representations (incl. comments)
122                 return getFullRepresentation().equals(
123                     other.getFullRepresentation());
124             }
125         } else {
126             return false;
127         }
128     }
129 
130     /***
131      * Returns the comment attached to this feature, if any.
132      *
133      * @return the comment, or <code>null</code> is no comment was stored
134      */
135     public String getComment() {
136         return comment;
137     }
138 
139     /***
140      * Returns an array of objects that can be used by a user to store
141      * additional information about this feature; not used by default.
142      *
143      * @return the value of the attribute, defaults to <code>null</code>
144      */
145     public Object[] getUserData() {
146         return userData;
147     }
148 
149     /***
150      * Prints a full representation of this feature that contains both
151      * representation (if any) and comment (if any). The comment is preceded
152      * by a "#" character.
153      *
154      * @return the full representation
155      */
156     public String getFullRepresentation() {
157         return getFullRepresentation("#");
158     }
159 
160     /***
161      * Prints a full representation of this feature that contains both
162      * representation (if any) and comment (if any). The comment is preceded by
163      * the specified <code>separator</code> (surrounded by spaces).
164      *
165      * @param separator used to introduce the comment
166      * @return the full representation
167      */
168     public String getFullRepresentation(final String separator) {
169         final StringBuilder result = new StringBuilder();
170         final String representation = getRepresentation();
171         if ((representation != null) && (representation.length() > 0)) {
172             result.append(representation);
173         }
174 
175         if ((comment != null) && (comment.length() > 0)) {
176             if (result.length() > 0) {
177                 result.append(' ');
178             }
179             result.append(separator).append(' ').append(comment);
180         }
181         return result.toString();
182     }
183 
184     /***
185      * Abstract method for returning the representation of this feature, to be
186      * used for classification.
187      *
188      * @return the feature representation, or <code>null</code> if this feature
189      * contains only a comment
190      */
191     public abstract String getRepresentation();
192 
193     /***
194      * Returns a strength value for this feature. This method can be
195      * overwritten to assign higher strenghts to more important or more frequent
196      * features. This implementation always returns 1.0.
197      *
198      * <p>Typically you should call {@link FeatureVector#strength(Feature)}
199      * instead of this method to allow feature vectors to modify the strenghts
200      * of the stored features.
201      *
202      * @return a strength value for this feature -- 1.0 in this implementation
203      */
204 /*    public float getStrength() {
205         return 1.0f;
206     } */
207 
208     /***
209      * Returns a hash code value for this object, fulfulling the
210      * {@link Object#hashCode()} contract.
211      *
212      * @return a hash code value for this object
213      * @deprecated use {@link #compactRepresentation()} instead
214      */
215     public int hashCode() {
216         final String myRep = getRepresentation();
217         if (myRep != null) {
218             // hash representation if present
219             return myRep.hashCode();
220         } else {
221             // otherwise hash full representation (incl. comments)
222             return getFullRepresentation().hashCode();
223         }
224     }
225 
226     /***
227      * Replaces the array of objects that can be used by a user to stor
228      * additional information about this feature.
229      *
230      * @param newUserData the new value of the attribute
231      */
232     public void setUserData(final Object[] newUserData) {
233         this.userData = newUserData;
234     }
235 
236     /***
237      * Returns a string representation of this object.
238      *
239      * @return a textual representation
240      */
241     public String toString() {
242         final ToStringBuilder builder = new ToStringBuilder(this);
243         if ((comment != null) && (comment.length() > 0)) {
244             builder.append("comment", comment);
245         }
246         return builder.toString();
247     }
248 
249 }