1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.classify.feature;
23
24 import java.util.Collection;
25 import java.util.Iterator;
26
27 import org.apache.commons.collections.Bag;
28 import org.apache.commons.collections.bag.HashBag;
29
30 /***
31 * This feature vector implementation stores a multi-set of features.
32 * The order of features is not preserved. Internally it uses a
33 * {@link org.apache.commons.collections.bag.HashBag} as storage.
34 *
35 * <p>Instances of this class are not thread-safe and must be synchronized
36 * externally, if required.
37 *
38 * @author Christian Siefkes
39 * @version $Revision: 1.11 $, $Date: 2004/09/06 17:22:10 $, $Author: siefkes $
40 */
41 public class FeatureSet extends FeatureVector {
42
43 /***
44 * Constants specifying that feature frequencies are not considered
45 * when calculating {@linkplain FeatureVector#strength(Feature) strength}
46 * values.
47 */
48 public static final String STRENGTH_CONSTANT = "constant";
49
50 /***
51 * Constants specifying that the logarithm of feature frequencies is
52 * considered when calculating {@linkplain FeatureVector#strength(Feature)
53 * strength} values (1.0 is added to log(<em>f</em>) to avoid the result
54 * becoming 0).
55 */
56 public static final String STRENGTH_LOG = "log";
57
58 /***
59 * Constants specifying that the square root of feature frequencies is
60 * considered when calculating {@linkplain FeatureVector#strength(Feature)
61 * strength} values.
62 */
63 public static final String STRENGTH_SQUARE_ROOT = "sqrt";
64
65 /***
66 * Constants specifying that feature frequencies are considered linear
67 * (as is) when calculating {@linkplain FeatureVector#strength(Feature)
68 * strength} values (a feature occurring twice as frequently will be
69 * twice as strong).
70 */
71 public static final String STRENGTH_LINEAR = "linear";
72
73
74 /***
75 * Store features in an multi-set (bag).
76 */
77 private final Bag store = new HashBag();
78
79 /***
80 * The type of method used to consider feature frequencies when determining
81 * {@linkplain #strength(Feature) strength} values.
82 */
83 private final String strengthType;
84
85 /***
86 * Creates a new instance.
87 *
88 * @param strengthMethod The type of method used to consider feature
89 * frequencies when determining {@linkplain #strength(Feature) strength}
90 * values -- should be one of the <code>STRENGTH</code> constants
91 * defined in this class
92 */
93 public FeatureSet(final String strengthMethod) {
94 super();
95
96
97 strengthType = strengthMethod.toLowerCase().intern();
98 }
99
100 /***
101 * {@inheritDoc}
102 */
103 public Iterator iterator() {
104
105 return store.uniqueSet().iterator();
106 }
107
108 /***
109 * Returns the type of method used to consider feature frequencies when
110 * determining {@linkplain #strength(Feature) strength} values.
111 *
112 * @return the value of the attribute -- should be one of the
113 * <code>STRENGTH</code> constants defined in this class
114 */
115 public String getStrengthType() {
116 return strengthType;
117 }
118
119 /***
120 * Implementation of the hook provided by the superclass to ensure that
121 * the {@linkplain FeatureVector#getSummedStrength() summed strength}
122 * is updated correctly.
123 *
124 * @param feature the feature to add
125 */
126 protected void preAddHook(final Feature feature) {
127 if (store.contains(feature)) {
128
129
130 setSummedStrength(getSummedStrength() - strength(feature));
131 }
132 }
133
134 /***
135 * {@inheritDoc}
136 */
137 public int size() {
138
139 return store.uniqueSet().size();
140 }
141
142 /***
143 * {@inheritDoc}
144 */
145 protected Collection<Feature> store() {
146 return store;
147 }
148
149 /***
150 * Returns a strength value for a feature contained in this vector.
151 * This implementation simply delegates to {@link Feature#getStrength()}.
152 *
153 * @param feature the feature to consider
154 * @return a strength value for the specified feature
155 */
156 public double strength(final Feature feature) {
157 final double baseStrength = super.strength(feature);
158 final int frequency = store.getCount(feature);
159 final double result;
160
161
162 if (strengthType == STRENGTH_CONSTANT) {
163 result = baseStrength;
164 } else if (strengthType == STRENGTH_LOG) {
165
166 result = baseStrength * (Math.log(frequency) + 1.0);
167 } else if (strengthType == STRENGTH_SQUARE_ROOT) {
168 result = baseStrength * Math.sqrt(frequency);
169 } else if (strengthType == STRENGTH_LINEAR) {
170 result = baseStrength * frequency;
171 } else {
172 throw new IllegalArgumentException("Unsupported strength type: "
173 + strengthType);
174 }
175
176
177
178
179
180
181
182 return result;
183 }
184
185 }