1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.classify.winnow;
23
24 import java.util.Collection;
25 import java.util.Iterator;
26 import java.util.Map;
27 import java.util.TreeMap;
28
29 import org.apache.commons.collections.MapIterator;
30 import org.apache.commons.lang.ObjectUtils;
31 import org.apache.commons.lang.builder.ToStringBuilder;
32 import org.dom4j.Element;
33
34 import de.fu_berlin.ties.TiesConfiguration;
35 import de.fu_berlin.ties.classify.feature.Feature;
36 import de.fu_berlin.ties.io.ObjectElement;
37 import de.fu_berlin.ties.util.AdaptableLRUMap;
38 import de.fu_berlin.ties.util.CollUtils;
39 import de.fu_berlin.ties.util.MultiValueMap;
40 import de.fu_berlin.ties.util.Util;
41
42 /***
43 * Default WinnowStore implementation.
44 *
45 * @author Christian Siefkes
46 * @version $Revision: 1.4 $, $Date: 2006/10/21 16:03:59 $, $Author: siefkes $
47 */
48 public class DefaultWinnowStore extends WinnowStore {
49
50 /***
51 * The initial weight of each feature. This implementation prunes
52 * the one of the candidate features whose weights deviate least from this
53 * initial weight.
54 */
55 private final float initWeight;
56
57 /***
58 * Map storing the feature weights. Uses the {@linkplain
59 * de.fu_berlin.ties.classify.feature.Feature compact representation} of
60 * features as keys and an array of floats storing the weight for each
61 * class as values.
62 */
63 private final AdaptableLRUMap store;
64
65
66 /***
67 * Creates a new instance.
68 *
69 * @param initialWeight The initial weight of each feature -- this
70 * implementation prunes features whose weights deviate least from this
71 * initial weight
72 * @param config Used to configure this instance
73 * @param configSuffix Optional suffix appended to the configuration keys
74 * when configuring this instance; might be <code>null</code>
75 */
76 public DefaultWinnowStore(final float initialWeight,
77 final TiesConfiguration config, final String configSuffix) {
78 super(config, configSuffix);
79 initWeight = initialWeight;
80 store = initStore(config, configSuffix);
81 }
82
83 /***
84 * Creates a new instance.
85 *
86 * @param initialWeight The initial weight of each feature -- this
87 * implementation prunes features whose weights deviate least from this
88 * initial weight
89 * @param featureNum The number of features to store
90 * @param ignoreIrrelevant whether features within a certain range around
91 * the default weight are ignored during classification
92 * @param candidates The number of candidates to consider for each pruning
93 * operation
94 * @param pruneNum The number of elements to remove by each pruning
95 * operation, must not be larger than <code>candidates</code>
96 */
97 public DefaultWinnowStore(final float initialWeight, final int featureNum,
98 final boolean ignoreIrrelevant, final int candidates,
99 final int pruneNum) {
100 super(ignoreIrrelevant);
101 initWeight = initialWeight;
102 store = initStore(featureNum, candidates, pruneNum);
103 }
104
105 /***
106 * Creates a new instance from an XML element, fulfilling the
107 * recommandation of the {@link de.fu_berlin.ties.io.XMLStorable} interface.
108 *
109 * @param element the XML element containing the serialized representation
110 */
111 public DefaultWinnowStore(final Element element) {
112 this(Util.asFloat(element.attributeValue(ATTRIB_INIT_WEIGHTS)),
113 Util.asInt(element.attributeValue(ATTRIB_MAX_SIZE)),
114
115 Util.asBoolean(ObjectUtils.defaultIfNull(element.attributeValue(
116 ATTRIB_IGNORE_IRRELEVANT), Boolean.FALSE)),
117 Util.asInt(element.attributeValue(ATTRIB_PRUNE_CANDIDATES)),
118 Util.asInt(element.attributeValue(ATTRIB_PRUNE_NUMBER)));
119
120 final Iterator featureIter =
121 element.elementIterator(ELEMENT_FEATURE);
122 Element featureElem;
123 Long featureHash;
124 float[] featureWeights;
125
126
127 while (featureIter.hasNext()) {
128 featureElem = (Element) featureIter.next();
129 featureHash = Long.valueOf(Util.asLong(
130 featureElem.attributeValue(ATTRIB_HASH)));
131 featureWeights = CollUtils.asFloatArray(
132 featureElem.attributeValue(ATTRIB_WEIGHTS));
133 putWeights(featureHash, featureWeights);
134 }
135 }
136
137
138 /***
139 * {@inheritDoc}
140 */
141 public float[] getWeights(final Feature feature) {
142 return (float[])
143 store.get(feature.compactRepresentation());
144 }
145
146 /***
147 * {@inheritDoc}
148 */
149 public void putWeights(final Feature feature, final float[] weights) {
150
151 putWeights(feature.compactRepresentation(), weights);
152 }
153
154 /***
155 * Stores new weights for a feature.
156 *
157 * @param featureHash the {@linkplain Feature#compactRepresentation()
158 * compact representation} of the feature to use
159 * @param weights The new weights of this feature
160 */
161 private void putWeights(final Long featureHash, final float[] weights) {
162 store.put(featureHash, weights);
163 }
164
165 /***
166 * {@inheritDoc}
167 */
168 public void removed(final Object key) {
169 removeFromRelevantKeys((Long) key);
170 }
171
172 /***
173 * {@inheritDoc}
174 * This implementation sorts the candidate by the deviation of their weights
175 * from the initial weights, so candidates with lower deviation will be
176 * pruned first. This are candidates that were involved in fewer unbalanced
177 * promotions and demotions than the other candidate.
178 */
179 public Map.Entry[] sortForPruning(final Map.Entry[] candidates) {
180
181 Map.Entry candidate;
182 float[] weights;
183 float deviation;
184 int j;
185 MultiValueMap<Float, Integer> sortedMap =
186 new MultiValueMap<Float, Integer>(
187 new TreeMap<Float, Collection<Integer>>());
188
189 for (int i = 0; i < candidates.length; i++) {
190 candidate = candidates[i];
191
192 weights = (float[]) candidate.getValue();
193
194 deviation = 0;
195
196 for (j = 0; j < weights.length; j++) {
197 deviation += Math.abs(weights[j] - initWeight);
198 }
199
200
201 sortedMap.put(new Float(deviation), Integer.valueOf(i));
202 }
203
204
205 final Collection<Integer> sortedValues = sortedMap.values();
206 Iterator<Integer> valueIter = sortedValues.iterator();
207 final Map.Entry[] result = new Map.Entry[candidates.length];
208 int i = 0;
209
210 while (valueIter.hasNext()) {
211 result[i] = candidates[valueIter.next().intValue()];
212 i++;
213 }
214
215
216 if (i != candidates.length) {
217 throw new RuntimeException("Implementation error: Retrieved "
218 + i + " pruning candidates " + " instead of "
219 + candidates.length + " ones");
220 }
221
222 return result;
223 }
224
225 /***
226 * {@inheritDoc}
227 */
228 protected AdaptableLRUMap store() {
229 return store;
230 }
231
232 /***
233 * {@inheritDoc}
234 */
235 public ObjectElement toElement() {
236
237 final ObjectElement result =
238 new ObjectElement(ELEMENT_MAIN, this.getClass());
239 result.addAttribute(ATTRIB_INIT_WEIGHTS, Float.toString(initWeight));
240 result.addAttribute(ATTRIB_MAX_SIZE, Integer.toString(maxSize()));
241 result.addAttribute(ATTRIB_IGNORE_IRRELEVANT,
242 Boolean.toString(isIgnoringIrrelevant()));
243 result.addAttribute(ATTRIB_PRUNE_CANDIDATES,
244 Integer.toString(store.getCandidateNumber()));
245 result.addAttribute(ATTRIB_PRUNE_NUMBER,
246 Integer.toString(store.getPruneNumber()));
247
248 final MapIterator mapIter = store.mapIterator();
249 Element featureElem;
250 Long featureHash;
251 float[] featureWeights;
252
253
254 while (mapIter.hasNext()) {
255 featureHash = (Long) mapIter.next();
256 featureWeights = (float[]) mapIter.getValue();
257 featureElem = result.addElement(ELEMENT_FEATURE);
258 featureElem.addAttribute(ATTRIB_HASH, featureHash.toString());
259 featureElem.addAttribute(ATTRIB_WEIGHTS,
260 CollUtils.flatten(featureWeights));
261 }
262
263 return result;
264 }
265
266 /***
267 * Returns a string representation of this object.
268 *
269 * @return a textual representation
270 */
271 public String toString() {
272 return new ToStringBuilder(this)
273 .append("current size", size())
274 .append("maximum size", maxSize())
275 .append("ignore irrelevant", isIgnoringIrrelevant())
276 .append("prune number", store.getPruneNumber())
277 .append("prune candidates", store.getCandidateNumber())
278 .toString();
279 }
280
281 }