1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.classify.winnow;
23
24 import java.util.HashSet;
25 import java.util.Set;
26
27 import org.dom4j.QName;
28
29 import de.fu_berlin.ties.TiesConfiguration;
30 import de.fu_berlin.ties.classify.feature.Feature;
31 import de.fu_berlin.ties.io.XMLStorable;
32 import de.fu_berlin.ties.util.AdaptableLRUMap;
33 import de.fu_berlin.ties.util.Pruner;
34 import de.fu_berlin.ties.util.RemoveCallback;
35 import de.fu_berlin.ties.util.Util;
36 import de.fu_berlin.ties.xml.dom.DOMUtils;
37
38 /***
39 * Feature store used by {@link Winnow}. Controls the number of stored
40 * features and implements pruning based on the LRU (least recently used)
41 * principle.
42 *
43 * <p>Instances of this class are synchronized externally by Winnow.
44 *
45 * @author Christian Siefkes
46 * @version $Revision: 1.33 $, $Date: 2006/10/21 16:03:59 $, $Author: siefkes $
47 */
48 public abstract class WinnowStore
49 implements Pruner, RemoveCallback, XMLStorable {
50
51 /***
52 * Configuration parameter: whether "irrelevant" keys should be
53 * {@linkplain #isIgnoringIrrelevant() ignored}.
54 */
55 protected static final String CONFIG_IGNORE_IRRELEVANT =
56 "classifier.winnow.ignore.irrelevant";
57
58 /***
59 * Configuration parameter: Whether a shared feature store is used for all
60 * Winnow instances.
61 */
62 public static final String CONFIG_SHARED_STORE =
63 "classifier.winnow.shared-store";
64
65 /***
66 * Name of the main element used for XML serialization.
67 */
68 public static final QName ELEMENT_MAIN = DOMUtils.defaultName("features");
69
70 /***
71 * Name of the element used to store features for XML serialization.
72 */
73 protected static final QName ELEMENT_FEATURE =
74 DOMUtils.defaultName("feature");
75
76 /***
77 * Name of the attribute used to store feature hashes for XML serialization.
78 */
79 protected static final QName ATTRIB_HASH = DOMUtils.defaultName("hash");
80
81 /***
82 * Name of the attribute used to store feature weight arrays for XML
83 * serialization.
84 */
85 protected static final QName ATTRIB_WEIGHTS =
86 DOMUtils.defaultName("weights");
87
88 /***
89 * Name of the attribute used to store the initial weight for XML
90 * serialization.
91 */
92 protected static final QName ATTRIB_INIT_WEIGHTS =
93 DOMUtils.defaultName("init-weight");
94
95 /***
96 * Attribute name used for XML serialization.
97 */
98 protected static final QName ATTRIB_IGNORE_IRRELEVANT =
99 DOMUtils.defaultName("ignore-irrelevant");
100
101 /***
102 * Name of the XML attribute used to store number of elements to remove by
103 * each pruning operation.
104 */
105 protected static final QName ATTRIB_PRUNE_NUMBER =
106 DOMUtils.defaultName("prune-number");
107
108 /***
109 * Name of the XML attribute used to store number of candidates to consider
110 * for pruning.
111 */
112 protected static final QName ATTRIB_PRUNE_CANDIDATES =
113 DOMUtils.defaultName("prune-candidates");
114
115 /***
116 * Name of the attribute used to store the maximum number of features for
117 * XML serialization.
118 */
119 protected static final QName ATTRIB_MAX_SIZE =
120 DOMUtils.defaultName("max-size");
121
122
123 /***
124 * Factory method that creates and configures a new instance.
125 *
126 * @param initialWeight The initial weight of each feature -- this
127 * implementation prunes features whose weights deviate least from this
128 * initial weight
129 * @param config Used to configure this instance
130 * @param configSuffix Optional suffix appended to the configuration keys
131 * when configuring this instance; might be <code>null</code>
132 * @return the created store
133 */
134 public static WinnowStore create(final float initialWeight,
135 final TiesConfiguration config, final String configSuffix) {
136 final WinnowStore result;
137 final boolean useSharedStore = config.getBoolean(config.adaptKey(
138 CONFIG_SHARED_STORE, configSuffix));
139
140 if (useSharedStore) {
141 result = new SharedWinnowStore(initialWeight, config, configSuffix);
142 } else {
143 result =
144 new DefaultWinnowStore(initialWeight, config, configSuffix);
145 Util.LOG.debug("Initialized default winnow store");
146 }
147
148 return result;
149 }
150
151 /***
152 * Initializes the set of keys (feature hashes) that are relevant for
153 * classification.
154 *
155 * @param ignoreIrrelevant whether irrelevant features should be ignored
156 * @return the set to use; or <code>null</code> if
157 * <code>ignoreIrrelevant</code> is <code>false</code>
158 */
159 private static Set<Long> initRelevantKeys(final boolean ignoreIrrelevant) {
160 if (ignoreIrrelevant) {
161 return new HashSet<Long>();
162 } else {
163
164 return null;
165 }
166 }
167
168
169 /***
170 * Whether features within a certain range around the default weight are
171 * ignored during classification.
172 */
173 private final boolean ignoringIrrelevant;
174
175 /***
176 * A set of keys (feature hashes) that are relevant for classification;
177 * only used if {@linkplain #isIgnoringIrrelevant() irrelevant features are
178 * ignored}.
179 */
180 private final Set<Long> relevantKeys;
181
182
183 /***
184 * Creates a new instance.
185 *
186 * @param config Used to configure this instance
187 * @param configSuffix Optional suffix appended to the configuration keys
188 * when configuring this instance; might be <code>null</code>
189 */
190 public WinnowStore(final TiesConfiguration config,
191 final String configSuffix) {
192 this(config.getBoolean(config.adaptKey(CONFIG_IGNORE_IRRELEVANT,
193 configSuffix)));
194 }
195
196 /***
197 * Creates a new instance.
198 *
199 * @param ignoreIrrelevant whether features within a certain range around
200 * the default weight are ignored during classification
201 */
202 public WinnowStore(final boolean ignoreIrrelevant) {
203 super();
204 ignoringIrrelevant = ignoreIrrelevant;
205 relevantKeys = initRelevantKeys(ignoreIrrelevant);
206 }
207
208 /***
209 * Allows raw access to the internal store. Used by the {@link #maxSize()},
210 * {@link #size()} and {@link #reset()} methods.
211 *
212 * @return the value of the attribute
213 */
214 protected abstract AdaptableLRUMap store();
215
216 /***
217 * Destroys the store if it will never be used again. The default
218 * implementation delegates to {@link #reset()}, but subclasses can
219 * overwrite this behaviour if appropriate.
220 */
221 public void destroy() {
222 reset();
223 }
224
225 /***
226 * Returns the weights of a feature.
227 *
228 * @param feature the feature to look up
229 * @return The weights of this feature
230 */
231 public abstract float[] getWeights(final Feature feature);
232
233 /***
234 * Helper method that initializes the internal store.
235 *
236 * @param featureNum The number of features to store
237 * @param candidates The number of candidates to consider for each pruning
238 * operation
239 * @param pruneNum The number of elements to remove by each pruning
240 * operation, must not be larger than <code>candidates</code>
241 * @return the store to use
242 */
243 protected AdaptableLRUMap initStore(final int featureNum,
244 final int candidates, final int pruneNum) {
245 return new AdaptableLRUMap(featureNum, this, this, candidates,
246 pruneNum);
247 }
248
249 /***
250 * Helper method that initializes the internal store.
251 *
252 * @param config Used to configure this instance
253 * @param configSuffix Optional suffix appended to the configuration keys
254 * when configuring this instance; might be <code>null</code>
255 * @return the store to use
256 */
257 protected AdaptableLRUMap initStore(final TiesConfiguration config,
258 final String configSuffix) {
259 return initStore(config.getInt(config.adaptKey(
260 "classifier.winnow.features", configSuffix)),
261 config.getInt(config.adaptKey(
262 "prune.candidates", configSuffix)),
263 config.getInt(config.adaptKey(
264 "prune.num", configSuffix)));
265 }
266
267 /***
268 * Whether features within a certain range around the default weight are
269 * ignored during classification.
270 *
271 * @return the value of the attribute
272 */
273 public boolean isIgnoringIrrelevant() {
274 return ignoringIrrelevant;
275 }
276
277 /***
278 * Whether a feature is considered relevant for classification.
279 *
280 * @param feature the feature to look up
281 * @return whether the specified feature is relevant for classification
282 */
283 public boolean isRelevant(final Feature feature) {
284 if (ignoringIrrelevant) {
285 return relevantKeys.contains(feature.compactRepresentation());
286 } else {
287
288 return true;
289 }
290 }
291
292 /***
293 * Returns the maximum number of features that can be stored by this
294 * instance.
295 *
296 * @return the value of the attribute
297 */
298 public int maxSize() {
299 return store().maxSize();
300 }
301
302 /***
303 * Stores new weights for a feature.
304 *
305 * @param feature the feature to use
306 * @param weights The new weights of this feature
307 */
308 public abstract void putWeights(final Feature feature,
309 final float[] weights);
310
311 /***
312 * Helper method that removes a key from the set of relevant keys.
313 * If no relevant keys are used, this method does nothing.
314 *
315 * @param key the key to remove
316 */
317 protected void removeFromRelevantKeys(final Long key) {
318 if (relevantKeys != null) {
319
320 relevantKeys.remove(key);
321 }
322 }
323
324 /***
325 * Resets the store, completely deleting the prediction model.
326 */
327 public void reset() {
328 store().clear();
329
330 if (relevantKeys != null) {
331 relevantKeys.clear();
332 }
333 }
334
335 /***
336 * Marks a feature as relevant or irrelevant for classification.
337 * Initially all features are considered irrelevant.
338 *
339 * @param feature the feature to mark
340 * @param relevant whether or not the feature is relevant
341 */
342 public void setRelevant(final Feature feature,
343 final boolean relevant) {
344 final Long featureHash = feature.compactRepresentation();
345
346 if (relevantKeys != null) {
347 if (relevant) {
348 relevantKeys.add(featureHash);
349 } else {
350 relevantKeys.remove(featureHash);
351 }
352 }
353 }
354
355 /***
356 * Returns the number of features currently stored by this instance.
357 *
358 * @return the value of the attribute
359 */
360 public int size() {
361 return store().size();
362 }
363
364 }