View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify.winnow;
23  
24  import java.util.HashSet;
25  import java.util.Set;
26  
27  import org.dom4j.QName;
28  
29  import de.fu_berlin.ties.TiesConfiguration;
30  import de.fu_berlin.ties.classify.feature.Feature;
31  import de.fu_berlin.ties.io.XMLStorable;
32  import de.fu_berlin.ties.util.AdaptableLRUMap;
33  import de.fu_berlin.ties.util.Pruner;
34  import de.fu_berlin.ties.util.RemoveCallback;
35  import de.fu_berlin.ties.util.Util;
36  import de.fu_berlin.ties.xml.dom.DOMUtils;
37  
38  /***
39   * Feature store used by {@link Winnow}. Controls the number of stored
40   * features and implements pruning based on the LRU (least recently used)
41   * principle.
42   *
43   * <p>Instances of this class are synchronized externally by Winnow.
44   *
45   * @author Christian Siefkes
46   * @version $Revision: 1.33 $, $Date: 2006/10/21 16:03:59 $, $Author: siefkes $
47   */
48  public abstract class WinnowStore
49  implements Pruner, RemoveCallback, XMLStorable {
50  
51      /***
52       * Configuration parameter: whether "irrelevant" keys should be
53       * {@linkplain #isIgnoringIrrelevant() ignored}.
54       */
55      protected static final String CONFIG_IGNORE_IRRELEVANT =
56          "classifier.winnow.ignore.irrelevant";
57  
58      /***
59       * Configuration parameter: Whether a shared feature store is used for all
60       * Winnow instances.
61       */
62      public static final String CONFIG_SHARED_STORE =
63          "classifier.winnow.shared-store";
64  
65      /***
66       * Name of the main element used for XML serialization.
67       */
68      public static final QName ELEMENT_MAIN = DOMUtils.defaultName("features");
69  
70      /***
71       * Name of the element used to store features for XML serialization.
72       */
73      protected static final QName ELEMENT_FEATURE =
74          DOMUtils.defaultName("feature");
75  
76      /***
77       * Name of the attribute used to store feature hashes for XML serialization.
78       */
79      protected static final QName ATTRIB_HASH = DOMUtils.defaultName("hash");
80  
81      /***
82       * Name of the attribute used to store feature weight arrays for XML
83       * serialization.
84       */
85      protected static final QName ATTRIB_WEIGHTS =
86          DOMUtils.defaultName("weights");
87  
88      /***
89       * Name of the attribute used to store the initial weight for XML
90       * serialization.
91       */
92      protected static final QName ATTRIB_INIT_WEIGHTS =
93          DOMUtils.defaultName("init-weight");
94  
95      /***
96       * Attribute name used for XML serialization.
97       */
98      protected static final QName ATTRIB_IGNORE_IRRELEVANT =
99          DOMUtils.defaultName("ignore-irrelevant");
100 
101     /***
102      * Name of the XML attribute used to store number of elements to remove by
103      * each pruning operation.
104      */
105     protected static final QName ATTRIB_PRUNE_NUMBER =
106         DOMUtils.defaultName("prune-number");
107 
108     /***
109      * Name of the XML attribute used to store number of candidates to consider
110      * for pruning.
111      */
112     protected static final QName ATTRIB_PRUNE_CANDIDATES =
113         DOMUtils.defaultName("prune-candidates");
114 
115     /***
116      * Name of the attribute used to store the maximum number of features for
117      * XML serialization.
118      */
119     protected static final QName ATTRIB_MAX_SIZE =
120         DOMUtils.defaultName("max-size");
121 
122 
123     /***
124      * Factory method that creates and configures a new instance.
125      *
126      * @param initialWeight The initial weight of each feature -- this
127      * implementation prunes features whose weights deviate least from this
128      * initial weight
129      * @param config Used to configure this instance
130      * @param configSuffix Optional suffix appended to the configuration keys
131      * when configuring this instance; might be <code>null</code>
132      * @return the created store
133      */
134     public static WinnowStore create(final float initialWeight,
135             final TiesConfiguration config, final String configSuffix) {
136         final WinnowStore result;
137         final boolean useSharedStore = config.getBoolean(config.adaptKey(
138                 CONFIG_SHARED_STORE, configSuffix));
139 
140         if (useSharedStore) {
141             result = new SharedWinnowStore(initialWeight, config, configSuffix);
142         } else {
143             result =
144                 new DefaultWinnowStore(initialWeight, config, configSuffix);
145             Util.LOG.debug("Initialized default winnow store");
146         }
147 
148         return result;
149     }
150 
151     /***
152      * Initializes the set of keys (feature hashes) that are relevant for
153      * classification.
154      *
155      * @param ignoreIrrelevant whether irrelevant features should be ignored
156      * @return the set to use; or <code>null</code> if
157      * <code>ignoreIrrelevant</code> is <code>false</code>
158      */
159     private static Set<Long> initRelevantKeys(final boolean ignoreIrrelevant) {
160         if (ignoreIrrelevant) {
161             return new HashSet<Long>();
162         } else {
163             // not used
164             return null;
165         }
166     }
167 
168 
169     /***
170      * Whether features within a certain range around the default weight are
171      * ignored during classification.
172      */
173     private final boolean ignoringIrrelevant;
174 
175     /***
176      * A set of keys (feature hashes) that are relevant for classification;
177      * only used if {@linkplain #isIgnoringIrrelevant() irrelevant features are
178      * ignored}.
179      */
180     private final Set<Long> relevantKeys;
181 
182 
183     /***
184      * Creates a new instance.
185      *
186      * @param config Used to configure this instance
187      * @param configSuffix Optional suffix appended to the configuration keys
188      * when configuring this instance; might be <code>null</code>
189      */
190     public WinnowStore(final TiesConfiguration config,
191                        final String configSuffix) {
192         this(config.getBoolean(config.adaptKey(CONFIG_IGNORE_IRRELEVANT,
193                 configSuffix)));
194     }
195 
196     /***
197      * Creates a new instance.
198      *
199      * @param ignoreIrrelevant whether features within a certain range around
200      * the default weight are ignored during classification
201      */
202     public WinnowStore(final boolean ignoreIrrelevant) {
203         super();
204         ignoringIrrelevant = ignoreIrrelevant;
205         relevantKeys = initRelevantKeys(ignoreIrrelevant);
206     }
207 
208     /***
209      * Allows raw access to the internal store. Used by the {@link #maxSize()},
210      * {@link #size()} and {@link #reset()} methods.
211      *
212      * @return the value of the attribute
213      */
214     protected abstract AdaptableLRUMap store();
215 
216     /***
217      * Destroys the store if it will never be used again. The default
218      * implementation delegates to {@link #reset()}, but subclasses can
219      * overwrite this behaviour if appropriate.
220      */
221     public void destroy() {
222         reset();
223     }
224 
225     /***
226      * Returns the weights of a feature.
227      *
228      * @param feature the feature to look up
229      * @return The weights of this feature
230      */
231     public abstract float[] getWeights(final Feature feature);
232 
233     /***
234      * Helper method that initializes the internal store.
235      *
236      * @param featureNum The number of features to store
237      * @param candidates The number of candidates to consider for each pruning
238      * operation
239      * @param pruneNum The number of elements to remove by each pruning
240      * operation, must not be larger than <code>candidates</code>
241      * @return the store to use
242      */
243     protected AdaptableLRUMap initStore(final int featureNum,
244             final int candidates, final int pruneNum) {
245         return new AdaptableLRUMap(featureNum, this, this, candidates,
246                 pruneNum);
247     }
248 
249     /***
250      * Helper method that initializes the internal store.
251      *
252      * @param config Used to configure this instance
253      * @param configSuffix Optional suffix appended to the configuration keys
254      * when configuring this instance; might be <code>null</code>
255      * @return the store to use
256      */
257     protected AdaptableLRUMap initStore(final TiesConfiguration config,
258             final String configSuffix) {
259         return initStore(config.getInt(config.adaptKey(
260                 "classifier.winnow.features", configSuffix)),
261                 config.getInt(config.adaptKey(
262                         "prune.candidates", configSuffix)),
263                 config.getInt(config.adaptKey(
264                         "prune.num", configSuffix)));
265     }
266 
267     /***
268      * Whether features within a certain range around the default weight are
269      * ignored during classification.
270      *
271      * @return the value of the attribute
272      */
273     public boolean isIgnoringIrrelevant() {
274         return ignoringIrrelevant;
275     }
276 
277     /***
278      * Whether a feature is considered relevant for classification.
279      *
280      * @param feature the feature to look up
281      * @return whether the specified feature is relevant for classification
282      */
283     public boolean isRelevant(final Feature feature) {
284         if (ignoringIrrelevant) {
285             return relevantKeys.contains(feature.compactRepresentation());
286         } else {
287             // all features are considered relevant
288             return true;
289         }
290     }
291 
292     /***
293      * Returns the maximum number of features that can be stored by this
294      * instance.
295      *
296      * @return the value of the attribute
297      */
298     public int maxSize() {
299         return store().maxSize();
300     }
301 
302     /***
303      * Stores new weights for a feature.
304      *
305      * @param feature the feature to use
306      * @param weights The new weights of this feature
307      */
308     public abstract void putWeights(final Feature feature,
309             final float[] weights);
310 
311     /***
312      * Helper method that removes a key from the set of relevant keys.
313      * If no relevant keys are used, this method does nothing.
314      *
315      * @param key the key to remove
316      */
317     protected void removeFromRelevantKeys(final Long key) {
318         if (relevantKeys != null) {
319             // remove from set of relevant keys
320             relevantKeys.remove(key);
321         }
322     }
323 
324     /***
325      * Resets the store, completely deleting the prediction model.
326      */
327     public void reset() {
328         store().clear();
329 
330         if (relevantKeys != null) {
331             relevantKeys.clear();
332         }
333     }
334 
335     /***
336      * Marks a feature as relevant or irrelevant for classification.
337      * Initially all features are considered irrelevant.
338      *
339      * @param feature the feature to mark
340      * @param relevant whether or not the feature is relevant
341      */
342     public void setRelevant(final Feature feature,
343             final boolean relevant) {
344         final Long featureHash = feature.compactRepresentation();
345 
346         if (relevantKeys != null) {
347             if (relevant) {
348                 relevantKeys.add(featureHash);
349             } else {
350                 relevantKeys.remove(featureHash);
351             }
352         }
353     }
354 
355     /***
356      * Returns the number of features currently stored by this instance.
357      *
358      * @return the value of the attribute
359      */
360     public int size() {
361         return store().size();
362     }
363 
364 }