View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify.feature;
23  
24  import java.util.Collection;
25  import java.util.HashSet;
26  import java.util.Set;
27  
28  /***
29   * This feature vector implementation stores a set of features.
30   * The order of features is not preserved and duplicates are discarded.
31   *
32   * <p>Instances of this class are not thread-safe and must be synchronized
33   * externally, if required.
34   *
35   * @author Christian Siefkes
36   * @version $Revision: 1.17 $, $Date: 2006/10/21 16:03:57 $, $Author: siefkes $
37   */
38  public class FeatureSet extends FeatureVector {
39  
40      /***
41       * Constants specifying that feature frequencies are not considered
42       * when calculating {@linkplain FeatureVector#strength(Feature) strength}
43       * values.
44       */
45  //    public static final String STRENGTH_CONSTANT = "constant";
46  
47      /***
48       * Constants specifying that the logarithm of feature frequencies is
49       * considered when calculating {@linkplain FeatureVector#strength(Feature)
50       * strength} values (1.0 is added to log(<em>f</em>) to avoid the result
51       * becoming 0).
52       */
53  //    public static final String STRENGTH_LOG = "log";
54  
55      /***
56       * Constants specifying that the square root of feature frequencies is
57       * considered when calculating {@linkplain FeatureVector#strength(Feature)
58       * strength} values.
59       */
60  //    public static final String STRENGTH_SQUARE_ROOT = "sqrt";
61  
62      /***
63       * Constants specifying that feature frequencies are considered linear
64       * (as is) when calculating {@linkplain FeatureVector#strength(Feature)
65       * strength} values (a feature occurring twice as frequently will be
66       * twice as strong).
67       */
68  //    public static final String STRENGTH_LINEAR = "linear";
69  
70  
71      /***
72       * Store features in an set.
73       */
74      private final Set<Feature> store = new HashSet<Feature>();
75  //    private final Bag store = new HashBag();
76  
77      /***
78       * The type of method used to consider feature frequencies when determining
79       * {@linkplain #strength(Feature) strength} values.
80       */
81  //    private final String strengthType;
82  
83      /***
84       * Creates a new instance.
85       */
86      public FeatureSet() {
87          super();
88      }
89  
90  
91      /***
92       * Creates a new instance.
93       *
94       * @param strengthMethod The type of method used to consider feature
95       * frequencies when determining {@linkplain #strength(Feature) strength}
96       * values -- should be one of the <code>STRENGTH</code> constants
97       * defined in this class
98       */
99  /*    public FeatureSet(final String strengthMethod) {
100         super();
101 
102         // convert to lower case + internalize to allow efficient comparison
103         strengthType = strengthMethod.toLowerCase().intern();
104     } */
105 
106     /***
107      * {@inheritDoc}
108      */
109 /* no need to overwrite this since we switched from HashBag to HashSet
110     public Iterator iterator() {
111         // iterate each feature only once
112         return store.uniqueSet().iterator();
113     } */
114 
115     /***
116      * Returns the type of method used to consider feature frequencies when
117      * determining {@linkplain #strength(Feature) strength} values.
118      *
119      * @return the value of the attribute -- should be one of the
120      * <code>STRENGTH</code> constants defined in this class
121      */
122 /*    public String getStrengthType() {
123         return strengthType;
124     } */
125 
126     /***
127      * Implementation of the hook provided by the superclass to ensure that
128      * the {@linkplain FeatureVector#getSummedStrength() summed strength}
129      * is updated correctly.
130      *
131      * @param feature the feature to add
132      */
133 /*    protected void preAddHook(final Feature feature) {
134         if (store.contains(feature)) {
135             // subtract old feature strength from sum -- the superclass will
136             // re-add the new (overall) strength after adding the feature
137             setSummedStrength(getSummedStrength() - strength(feature));
138         }
139     } */
140 
141     /***
142      * {@inheritDoc}
143      */
144 /* no need to overwrite this since we switched from HashBag to HashSet
145     public int size() {
146         // count each feature only once
147         return store.uniqueSet().size();
148     } */
149 
150     /***
151      * {@inheritDoc}
152      */
153     protected Collection<Feature> store() {
154         return store;
155     }
156 
157     /***
158      * Returns a strength value for a feature contained in this vector.
159      *
160      * @param feature the feature to consider
161      * @return a strength value for the specified feature
162      */
163 /*    public double strength(final Feature feature) {
164         final double baseStrength = super.strength(feature);
165         final int frequency = store.getCount(feature);
166         final double result;
167 
168         // strengthType is internalized so we can do identity comparisons
169         if (strengthType == STRENGTH_CONSTANT) {
170             result = baseStrength;
171         } else if (strengthType == STRENGTH_LOG) {
172             // adding 1 so the result will never be 0.0 (but 1.0 for singletons)
173             result = baseStrength * (Math.log(frequency) + 1.0);
174         } else if (strengthType == STRENGTH_SQUARE_ROOT) {
175             result = baseStrength * Math.sqrt(frequency);
176         } else if (strengthType == STRENGTH_LINEAR) {
177             result = baseStrength * frequency;
178         } else {
179             throw new IllegalArgumentException("Unsupported strength type: "
180                     + strengthType);
181         }
182 
183         return result;
184     } */
185 
186 }