View Javadoc

1   /*
2    * Copyright (C) 2005-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify.winnow;
23  
24  import java.util.Collection;
25  import java.util.Iterator;
26  import java.util.Map;
27  import java.util.TreeMap;
28  
29  import org.apache.commons.collections.MapIterator;
30  import org.apache.commons.lang.ObjectUtils;
31  import org.apache.commons.lang.builder.ToStringBuilder;
32  import org.dom4j.Element;
33  
34  import de.fu_berlin.ties.TiesConfiguration;
35  import de.fu_berlin.ties.classify.feature.Feature;
36  import de.fu_berlin.ties.io.ObjectElement;
37  import de.fu_berlin.ties.util.AdaptableLRUMap;
38  import de.fu_berlin.ties.util.CollUtils;
39  import de.fu_berlin.ties.util.MultiValueMap;
40  import de.fu_berlin.ties.util.Util;
41  
42  /***
43   * Default WinnowStore implementation.
44   * 
45   * @author Christian Siefkes
46   * @version $Revision: 1.4 $, $Date: 2006/10/21 16:03:59 $, $Author: siefkes $
47   */
48  public class DefaultWinnowStore extends WinnowStore {
49  
50      /***
51       * The initial weight of each feature. This implementation prunes
52       * the one of the candidate features whose weights deviate least from this
53       * initial weight.
54       */
55      private final float initWeight;
56  
57      /***
58       * Map storing the feature weights. Uses the {@linkplain
59       * de.fu_berlin.ties.classify.feature.Feature compact representation} of
60       * features as keys and an array of floats storing the weight for each
61       * class as values.
62       */
63      private final AdaptableLRUMap store;
64  
65  
66      /***
67       * Creates a new instance.
68       *
69       * @param initialWeight The initial weight of each feature -- this
70       * implementation prunes features whose weights deviate least from this
71       * initial weight
72       * @param config Used to configure this instance
73       * @param configSuffix Optional suffix appended to the configuration keys
74       * when configuring this instance; might be <code>null</code>
75       */
76      public DefaultWinnowStore(final float initialWeight,
77              final TiesConfiguration config, final String configSuffix) {
78          super(config, configSuffix);
79          initWeight = initialWeight;
80          store = initStore(config, configSuffix);
81      }
82  
83      /***
84       * Creates a new instance.
85       *
86       * @param initialWeight The initial weight of each feature -- this
87       * implementation prunes features whose weights deviate least from this
88       * initial weight
89       * @param featureNum The number of features to store
90       * @param ignoreIrrelevant whether features within a certain range around
91       * the default weight are ignored during classification
92       * @param candidates The number of candidates to consider for each pruning
93       * operation
94       * @param pruneNum The number of elements to remove by each pruning
95       * operation, must not be larger than <code>candidates</code>
96       */
97      public DefaultWinnowStore(final float initialWeight, final int featureNum,
98              final boolean ignoreIrrelevant, final int candidates,
99              final int pruneNum) {
100         super(ignoreIrrelevant);
101         initWeight = initialWeight;
102         store = initStore(featureNum, candidates, pruneNum);
103     }
104 
105     /***
106      * Creates a new instance from an XML element, fulfilling the
107      * recommandation of the {@link de.fu_berlin.ties.io.XMLStorable} interface.
108      *
109      * @param element the XML element containing the serialized representation
110      */
111     public DefaultWinnowStore(final Element element) {
112         this(Util.asFloat(element.attributeValue(ATTRIB_INIT_WEIGHTS)),
113                 Util.asInt(element.attributeValue(ATTRIB_MAX_SIZE)),
114                 // false is default value for legacy serializations
115                 Util.asBoolean(ObjectUtils.defaultIfNull(element.attributeValue(
116                         ATTRIB_IGNORE_IRRELEVANT), Boolean.FALSE)),
117                 Util.asInt(element.attributeValue(ATTRIB_PRUNE_CANDIDATES)),
118                 Util.asInt(element.attributeValue(ATTRIB_PRUNE_NUMBER)));
119 
120            final Iterator featureIter =
121                element.elementIterator(ELEMENT_FEATURE);
122            Element featureElem;
123            Long featureHash;
124            float[] featureWeights;
125 
126            // restore feature weights
127            while (featureIter.hasNext()) {
128                featureElem = (Element) featureIter.next();
129                featureHash = Long.valueOf(Util.asLong(
130                        featureElem.attributeValue(ATTRIB_HASH)));
131                featureWeights = CollUtils.asFloatArray(
132                        featureElem.attributeValue(ATTRIB_WEIGHTS));
133                putWeights(featureHash, featureWeights);
134            }
135     }
136 
137 
138     /***
139      * {@inheritDoc}
140      */
141     public float[] getWeights(final Feature feature) {
142         return (float[])
143                 store.get(feature.compactRepresentation());
144     }
145 
146     /***
147      * {@inheritDoc}
148      */
149     public void putWeights(final Feature feature, final float[] weights) {
150         // delegate to private method
151         putWeights(feature.compactRepresentation(), weights);
152     }
153 
154     /***
155      * Stores new weights for a feature.
156      *
157      * @param featureHash the {@linkplain Feature#compactRepresentation()
158      * compact representation} of the feature to use
159      * @param weights The new weights of this feature
160      */
161     private void putWeights(final Long featureHash, final float[] weights) {
162         store.put(featureHash, weights);
163     }
164 
165     /***
166      * {@inheritDoc}
167      */
168     public void removed(final Object key) {
169         removeFromRelevantKeys((Long) key);
170     }
171 
172     /***
173      * {@inheritDoc}
174      * This implementation sorts the candidate by the deviation of their weights
175      * from the initial weights, so candidates with lower deviation will be
176      * pruned first. This are candidates that were involved in fewer unbalanced
177      * promotions and demotions than the other candidate.
178      */
179     public Map.Entry[] sortForPruning(final Map.Entry[] candidates) {
180         // sort candidate by weight deviation
181         Map.Entry candidate;
182         float[] weights;
183         float deviation;
184         int j;
185         MultiValueMap<Float, Integer> sortedMap =
186             new MultiValueMap<Float, Integer>(
187                     new TreeMap<Float, Collection<Integer>>());
188 
189         for (int i = 0; i < candidates.length; i++) {
190             candidate = candidates[i];
191             //Util.LOG.debug("Retrieving weights for " + i + "-th candidate");
192             weights = (float[]) candidate.getValue();
193             //Util.LOG.debug("Retrieved weights for candidate");
194             deviation = 0;
195 
196             for (j = 0; j < weights.length; j++) {
197                 deviation += Math.abs(weights[j] - initWeight);
198             }
199 
200             // map from weight deviations to collections of positions in the map
201             sortedMap.put(new Float(deviation), Integer.valueOf(i));
202         }
203 
204         // convert collected values into array; TreeMap sorts them by weight
205         final Collection<Integer> sortedValues = sortedMap.values();
206         Iterator<Integer> valueIter = sortedValues.iterator();
207         final Map.Entry[] result = new Map.Entry[candidates.length];
208         int i = 0;
209 
210         while (valueIter.hasNext()) {
211             result[i] = candidates[valueIter.next().intValue()];
212             i++;
213         }
214 
215         // ensure that we retrieved all candidates
216         if (i != candidates.length) {
217             throw new RuntimeException("Implementation error: Retrieved "
218                 + i + " pruning candidates " + " instead of "
219                 + candidates.length + " ones");
220         }
221 
222         return result;
223     }
224 
225     /***
226      * {@inheritDoc}
227      */
228     protected AdaptableLRUMap store() {
229         return store;
230     }
231 
232     /***
233      * {@inheritDoc}
234      */
235     public ObjectElement toElement() {
236         // create main element with global attributes
237         final ObjectElement result =
238             new ObjectElement(ELEMENT_MAIN, this.getClass());
239         result.addAttribute(ATTRIB_INIT_WEIGHTS, Float.toString(initWeight));
240         result.addAttribute(ATTRIB_MAX_SIZE, Integer.toString(maxSize()));
241         result.addAttribute(ATTRIB_IGNORE_IRRELEVANT,
242                 Boolean.toString(isIgnoringIrrelevant()));
243         result.addAttribute(ATTRIB_PRUNE_CANDIDATES,
244                 Integer.toString(store.getCandidateNumber()));
245         result.addAttribute(ATTRIB_PRUNE_NUMBER,
246                 Integer.toString(store.getPruneNumber()));
247 
248         final MapIterator mapIter = store.mapIterator();
249         Element featureElem;
250         Long featureHash;
251         float[] featureWeights;
252 
253         // store hash key + weight array of each feature
254         while (mapIter.hasNext()) {
255             featureHash = (Long) mapIter.next();
256             featureWeights = (float[]) mapIter.getValue();
257             featureElem = result.addElement(ELEMENT_FEATURE);
258             featureElem.addAttribute(ATTRIB_HASH, featureHash.toString());
259             featureElem.addAttribute(ATTRIB_WEIGHTS,
260                     CollUtils.flatten(featureWeights));
261         }
262 
263         return result;
264     }
265 
266     /***
267      * Returns a string representation of this object.
268      *
269      * @return a textual representation
270      */
271     public String toString() {
272         return new ToStringBuilder(this)
273             .append("current size", size())
274             .append("maximum size", maxSize())
275             .append("ignore irrelevant", isIgnoringIrrelevant())
276             .append("prune number", store.getPruneNumber())
277             .append("prune candidates", store.getCandidateNumber())
278             .toString();
279     }
280 
281 }