View Javadoc

1   /*
2    * Copyright (C) 2005-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.classify.winnow;
23  
24  import java.util.ArrayList;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Map;
28  
29  import org.apache.commons.collections.MapIterator;
30  import org.apache.commons.lang.ObjectUtils;
31  import org.apache.commons.lang.StringUtils;
32  import org.apache.commons.lang.builder.ToStringBuilder;
33  import org.dom4j.Element;
34  import org.dom4j.QName;
35  
36  import de.fu_berlin.ties.TiesConfiguration;
37  import de.fu_berlin.ties.classify.feature.Feature;
38  import de.fu_berlin.ties.io.ObjectElement;
39  import de.fu_berlin.ties.util.AdaptableLRUMap;
40  import de.fu_berlin.ties.util.CollUtils;
41  import de.fu_berlin.ties.util.Util;
42  import de.fu_berlin.ties.xml.dom.DOMUtils;
43  
44  /***
45   * A WinnowStore implementation that is shared between multiple classifiers to
46   * allow more efficient access. <strong>Not that this class is not synchronized
47   * and requires class-wide (not only instance-wide synchronization) if
48   * instances of this type are to be used in different threads!</strong>
49   * 
50   * @author Christian Siefkes
51   * @version $Revision: 1.11 $, $Date: 2006/10/21 16:03:59 $, $Author: siefkes $
52   */
53  public class SharedWinnowStore extends WinnowStore {
54  
55      /***
56       * Attribute name used for XML serialization.
57       */
58      private static final QName ATTRIB_SLOT =
59          DOMUtils.defaultName("slot");
60  
61      /***
62       * A shared object that stores the feature weights for all instances
63       * of this type. Uses the
64       * {@linkplain de.fu_berlin.ties.classify.feature.Feature compact
65       * representation} of features as keys and an array of Objects storing
66       * the weights for all instances (each instance sets its Object to an
67       * array of floats).
68       */
69      private static AdaptableLRUMap store = null;
70  
71      /***
72       * A list of all instances of this type. Should be synchronized on itself
73       * when write access is necessary (in the constructor of new instances).
74       */
75      private static final List<SharedWinnowStore> ALL_INSTANCES =
76          new ArrayList<SharedWinnowStore>();
77  
78      /***
79       * Marker object used for weights for which no weights have been stored.
80       */
81      private static final Object[] UNKNOWN_MARKER = new Object[] {};
82  
83  
84      /***
85       * The number of the slot reserved by this instance in the shared
86       * {@link #store}.
87       */
88      private final int slotNo;
89  
90  
91      /***
92       * Creates a new instance.
93       *
94       * @param initialWeight the initial weight of each feature (currently not
95       * used)
96       * @param config used to configure this instance
97       * @param configSuffix optional suffix appended to the configuration keys
98       * when configuring this instance; might be <code>null</code>
99       */
100     public SharedWinnowStore(final float initialWeight,
101             final TiesConfiguration config, final String configSuffix) {
102         this(initialWeight, config.getBoolean(config.adaptKey(
103                 CONFIG_IGNORE_IRRELEVANT, configSuffix)), config, configSuffix);
104     }
105 
106     /***
107      * Creates a new instance.
108      *
109      * @param initialWeight The initial weight of each feature (currently not
110      * used)
111      * @param ignoreIrrelevant whether features within a certain range around
112      * the default weight are ignored during classification
113      * @param config used to configure this instance
114      * @param configSuffix optional suffix appended to the configuration keys
115      * when configuring this instance; might be <code>null</code>
116      */
117     public SharedWinnowStore(final float initialWeight,
118             final boolean ignoreIrrelevant, final TiesConfiguration config,
119             final String configSuffix) {
120         super(ignoreIrrelevant);
121 
122         // add to list of all instance, using next available slot number
123         slotNo = ALL_INSTANCES.size();
124         ALL_INSTANCES.add(this);
125 
126         if (slotNo == 0) {
127             // this is the first instance: init shared store
128             store = initStore(config, configSuffix);
129             Util.LOG.debug("Initialized shared winnow store");
130         }
131     }
132 
133     /***
134      * Creates a new instance from an XML element, fulfilling the
135      * recommandation of the {@link de.fu_berlin.ties.io.XMLStorable} interface.
136      *
137      * @param element the XML element containing the serialized representation
138      * @throws IllegalArgumentException if deserialization fails due to errors
139      * in the provided XML element
140      */
141     public SharedWinnowStore(final Element element)
142     throws IllegalArgumentException {
143         // false is default value for legacy serializations
144         super(Util.asBoolean(ObjectUtils.defaultIfNull(element.attributeValue(
145                 ATTRIB_IGNORE_IRRELEVANT), Boolean.FALSE)));
146 
147         // add to list of all instance, using next available slot number
148         slotNo = ALL_INSTANCES.size();
149         ALL_INSTANCES.add(this);
150         final int expectedSlotNo =
151             Util.asInt(element.attributeValue(ATTRIB_SLOT));
152 
153         if (slotNo != expectedSlotNo) {
154             // we expect all instances to be serialized and deserialized in
155             // the same order
156             throw new IllegalArgumentException(
157                     "SharedWinnowStore was deserialized to slot no. " + slotNo
158                     + " instead of the expected " + expectedSlotNo);
159         }
160 
161         if (slotNo == 0) {
162             // deserialize shared store
163             store = initStore(
164                     Util.asInt(element.attributeValue(ATTRIB_MAX_SIZE)),
165                     Util.asInt(element.attributeValue(
166                             ATTRIB_PRUNE_CANDIDATES)),
167                     Util.asInt(element.attributeValue(ATTRIB_PRUNE_NUMBER)));
168 
169             final Iterator featureIter =
170                 element.elementIterator(ELEMENT_FEATURE);
171             List weightsElemList;
172             Iterator weightsElemIter;
173             Element featureElem, weightsElem;
174             Long featureHash;
175             String trimmedText;
176             Object[] allWeights;
177             float[] nthWeights;
178             int i;
179 
180             // restore feature weights
181             while (featureIter.hasNext()) {
182                 featureElem = (Element) featureIter.next();
183                 featureHash = Long.valueOf(Util.asLong(
184                         featureElem.attributeValue(ATTRIB_HASH)));
185                 weightsElemList = featureElem.elements(ATTRIB_WEIGHTS);
186                 weightsElemIter = weightsElemList.iterator();
187                 allWeights = new Object[weightsElemList.size()];
188                 i = 0;
189 
190                 while (weightsElemIter.hasNext()) {
191                     weightsElem = (Element) weightsElemIter.next();
192                     trimmedText = weightsElem.getTextTrim();
193 
194                     if (StringUtils.isNotEmpty(trimmedText)) {
195                         nthWeights = CollUtils.asFloatArray(trimmedText);
196                         allWeights[i] = nthWeights;
197                     }
198 
199                     i++;
200                 }
201 
202                 store.put(featureHash, allWeights);
203             }
204 
205             Util.LOG.debug("Loaded shared winnow store");
206         }
207     }
208 
209     /***
210      * {@inheritDoc} <strong>This shared implementation also deletes the list
211      * of registered instances so <em>all</em> shared instances will become
212      * unusable!</strong>
213      */
214     public void destroy() {
215 /*        // temporarily determine average classifiers using each feature
216         if (!store.isEmpty()) {
217             int weightCount = 0;
218             final Iterator iter = store.entrySet().iterator();
219             Map.Entry currentEntry;
220             Object[] currentWeights;
221             int i;
222 
223             while (iter.hasNext()) {
224                 currentEntry = (Map.Entry) iter.next();
225                 currentWeights = (Object[]) currentEntry.getValue();
226 
227                 for (i = 0; i < currentWeights.length; i++) {
228                     if (currentWeights[i] != null) {
229                         weightCount++;
230                     }
231                 }
232             }
233 
234             final float average = (float) weightCount / store.size();
235             Util.LOG.warn(
236                     "SharedWinnowStore: average weight tuples per feature: "
237                     + average + " (features: "+ store.size()
238                     + ", weight tuples: " + weightCount + ")");
239         } */
240 
241         // delegate to superclass + reset list of all instances
242         super.destroy();
243 
244         if (!ALL_INSTANCES.isEmpty()) {
245             ALL_INSTANCES.clear();
246         }
247     }
248 
249     /***
250      * {@inheritDoc}
251      */
252     public float[] getWeights(final Feature feature) {
253         // check if weights are stored in the feature
254         Object[] allWeights = feature.getUserData();
255 
256         if (allWeights == null) {
257             // if not, we retrieve them from the store and place them there
258             final Long featureHash = feature.compactRepresentation();
259             allWeights = (Object[]) store.get(featureHash);
260 
261             if (allWeights != null) {
262                 feature.setUserData(allWeights);
263             } else {
264                 // mark feature as unknown so other stores don't need to look
265                 // it up again
266                 feature.setUserData(UNKNOWN_MARKER);
267             }
268         }
269 
270         if ((allWeights != null) && (allWeights != UNKNOWN_MARKER)) {
271             return (float[]) allWeights[slotNo];
272         } else {
273             return null;
274         }
275     }
276 
277     /***
278      * {@inheritDoc}
279      */
280     public void putWeights(final Feature feature, final float[] weights) {
281         // check if weights are stored in the feature
282         Object[] allWeights = feature.getUserData();
283 
284         if ((allWeights == null) || (allWeights == UNKNOWN_MARKER)) {
285             // if not, we check/update the store
286             final Long featureHash = feature.compactRepresentation();
287 
288             if (store.containsKey(featureHash)) {
289                 if (allWeights == UNKNOWN_MARKER) {
290                     // not supposed to happen
291                     Util.LOG.error("Feature " + featureHash
292                         + " was marked as unknown but IS contained in store!");
293                 }
294 
295                 allWeights = (Object[]) store.get(featureHash);
296             } else {
297                 allWeights = new Object[ALL_INSTANCES.size()];
298                 store.put(featureHash, allWeights);
299             }
300 
301             // store in feature
302             feature.setUserData(allWeights);
303         }
304 
305         // replace my value
306         allWeights[slotNo] = weights;
307     }
308 
309 
310     /***
311      * {@inheritDoc}
312      */
313     public void removed(final Object key) {
314         // remove from relevant keys of all instances, if they are used
315         if (isIgnoringIrrelevant()) {
316             final Long keyAsLong = (Long) key;
317             final Iterator<SharedWinnowStore> instanceIter =
318                 ALL_INSTANCES.iterator();
319 
320             while (instanceIter.hasNext()) {
321                 instanceIter.next().removeFromRelevantKeys(keyAsLong);
322             }
323         }
324     }
325 
326     /***
327      * {@inheritDoc}
328      * The current implementation does not support this functionality,
329      * always throwing an {@link UnsupportedOperationException} instead.
330      * Use {@link DefaultWinnowStore} instead if you want support for
331      * pruning of multiple candidates.
332      */
333     public Map.Entry[] sortForPruning(final Map.Entry[] candidates) {
334         throw new UnsupportedOperationException("Shared Winnow Store "
335                 + "does not support pruning of multiple candidates");
336     }
337 
338     /***
339      * {@inheritDoc}
340      */
341     protected AdaptableLRUMap store() {
342         return store;
343     }
344 
345     /***
346      * {@inheritDoc}
347      */
348     public ObjectElement toElement() {
349         // create main element with global attributes
350         final ObjectElement result =
351             new ObjectElement(ELEMENT_MAIN, this.getClass());
352         result.addAttribute(ATTRIB_SLOT, Integer.toString(slotNo));
353         result.addAttribute(ATTRIB_IGNORE_IRRELEVANT,
354                 Boolean.toString(isIgnoringIrrelevant()));
355 
356         if (slotNo == 0) {
357             // serialize shared store
358             result.addAttribute(ATTRIB_MAX_SIZE, Integer.toString(maxSize()));
359             result.addAttribute(ATTRIB_PRUNE_CANDIDATES,
360                     Integer.toString(store.getCandidateNumber()));
361             result.addAttribute(ATTRIB_PRUNE_NUMBER,
362                     Integer.toString(store.getPruneNumber()));
363 
364             final MapIterator mapIter = store.mapIterator();
365             Element featureElem, weightsElem;
366             Long featureHash;
367             Object[] allWeights;
368             float[] nthWeights;
369             int i;
370 
371             // store hash key in attribut and each weight arrays in subelement
372             while (mapIter.hasNext()) {
373                 featureHash = (Long) mapIter.next();
374                 featureElem = result.addElement(ELEMENT_FEATURE);
375                 featureElem.addAttribute(ATTRIB_HASH, featureHash.toString());
376 
377                 allWeights = (Object[]) mapIter.getValue();
378 
379                 for (i = 0; i < allWeights.length; i++) {
380                     nthWeights = (float[]) allWeights[i];
381                     weightsElem = featureElem.addElement(ATTRIB_WEIGHTS);
382 
383                     if (nthWeights != null) {
384                         weightsElem.addText(CollUtils.flatten(nthWeights));
385                     }
386                 }
387             }
388         }
389 
390         return result;
391     }
392 
393     /***
394      * Returns a string representation of this object.
395      *
396      * @return a textual representation
397      */
398     public String toString() {
399         final ToStringBuilder builder = new ToStringBuilder(this)
400             .append("slot no.", slotNo);
401 
402         if (slotNo == 0) {
403             // first slot: append details about shared store etc.
404             builder.append("current size", size())
405                 .append("maximum size", maxSize())
406                 .append("ignore irrelevant", isIgnoringIrrelevant());
407         }
408 
409         return builder.toString();
410     }
411 
412 }