1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.classify.winnow;
23
24 import java.util.ArrayList;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Map;
28
29 import org.apache.commons.collections.MapIterator;
30 import org.apache.commons.lang.ObjectUtils;
31 import org.apache.commons.lang.StringUtils;
32 import org.apache.commons.lang.builder.ToStringBuilder;
33 import org.dom4j.Element;
34 import org.dom4j.QName;
35
36 import de.fu_berlin.ties.TiesConfiguration;
37 import de.fu_berlin.ties.classify.feature.Feature;
38 import de.fu_berlin.ties.io.ObjectElement;
39 import de.fu_berlin.ties.util.AdaptableLRUMap;
40 import de.fu_berlin.ties.util.CollUtils;
41 import de.fu_berlin.ties.util.Util;
42 import de.fu_berlin.ties.xml.dom.DOMUtils;
43
44 /***
45 * A WinnowStore implementation that is shared between multiple classifiers to
46 * allow more efficient access. <strong>Not that this class is not synchronized
47 * and requires class-wide (not only instance-wide synchronization) if
48 * instances of this type are to be used in different threads!</strong>
49 *
50 * @author Christian Siefkes
51 * @version $Revision: 1.11 $, $Date: 2006/10/21 16:03:59 $, $Author: siefkes $
52 */
53 public class SharedWinnowStore extends WinnowStore {
54
55 /***
56 * Attribute name used for XML serialization.
57 */
58 private static final QName ATTRIB_SLOT =
59 DOMUtils.defaultName("slot");
60
61 /***
62 * A shared object that stores the feature weights for all instances
63 * of this type. Uses the
64 * {@linkplain de.fu_berlin.ties.classify.feature.Feature compact
65 * representation} of features as keys and an array of Objects storing
66 * the weights for all instances (each instance sets its Object to an
67 * array of floats).
68 */
69 private static AdaptableLRUMap store = null;
70
71 /***
72 * A list of all instances of this type. Should be synchronized on itself
73 * when write access is necessary (in the constructor of new instances).
74 */
75 private static final List<SharedWinnowStore> ALL_INSTANCES =
76 new ArrayList<SharedWinnowStore>();
77
78 /***
79 * Marker object used for weights for which no weights have been stored.
80 */
81 private static final Object[] UNKNOWN_MARKER = new Object[] {};
82
83
84 /***
85 * The number of the slot reserved by this instance in the shared
86 * {@link #store}.
87 */
88 private final int slotNo;
89
90
91 /***
92 * Creates a new instance.
93 *
94 * @param initialWeight the initial weight of each feature (currently not
95 * used)
96 * @param config used to configure this instance
97 * @param configSuffix optional suffix appended to the configuration keys
98 * when configuring this instance; might be <code>null</code>
99 */
100 public SharedWinnowStore(final float initialWeight,
101 final TiesConfiguration config, final String configSuffix) {
102 this(initialWeight, config.getBoolean(config.adaptKey(
103 CONFIG_IGNORE_IRRELEVANT, configSuffix)), config, configSuffix);
104 }
105
106 /***
107 * Creates a new instance.
108 *
109 * @param initialWeight The initial weight of each feature (currently not
110 * used)
111 * @param ignoreIrrelevant whether features within a certain range around
112 * the default weight are ignored during classification
113 * @param config used to configure this instance
114 * @param configSuffix optional suffix appended to the configuration keys
115 * when configuring this instance; might be <code>null</code>
116 */
117 public SharedWinnowStore(final float initialWeight,
118 final boolean ignoreIrrelevant, final TiesConfiguration config,
119 final String configSuffix) {
120 super(ignoreIrrelevant);
121
122
123 slotNo = ALL_INSTANCES.size();
124 ALL_INSTANCES.add(this);
125
126 if (slotNo == 0) {
127
128 store = initStore(config, configSuffix);
129 Util.LOG.debug("Initialized shared winnow store");
130 }
131 }
132
133 /***
134 * Creates a new instance from an XML element, fulfilling the
135 * recommandation of the {@link de.fu_berlin.ties.io.XMLStorable} interface.
136 *
137 * @param element the XML element containing the serialized representation
138 * @throws IllegalArgumentException if deserialization fails due to errors
139 * in the provided XML element
140 */
141 public SharedWinnowStore(final Element element)
142 throws IllegalArgumentException {
143
144 super(Util.asBoolean(ObjectUtils.defaultIfNull(element.attributeValue(
145 ATTRIB_IGNORE_IRRELEVANT), Boolean.FALSE)));
146
147
148 slotNo = ALL_INSTANCES.size();
149 ALL_INSTANCES.add(this);
150 final int expectedSlotNo =
151 Util.asInt(element.attributeValue(ATTRIB_SLOT));
152
153 if (slotNo != expectedSlotNo) {
154
155
156 throw new IllegalArgumentException(
157 "SharedWinnowStore was deserialized to slot no. " + slotNo
158 + " instead of the expected " + expectedSlotNo);
159 }
160
161 if (slotNo == 0) {
162
163 store = initStore(
164 Util.asInt(element.attributeValue(ATTRIB_MAX_SIZE)),
165 Util.asInt(element.attributeValue(
166 ATTRIB_PRUNE_CANDIDATES)),
167 Util.asInt(element.attributeValue(ATTRIB_PRUNE_NUMBER)));
168
169 final Iterator featureIter =
170 element.elementIterator(ELEMENT_FEATURE);
171 List weightsElemList;
172 Iterator weightsElemIter;
173 Element featureElem, weightsElem;
174 Long featureHash;
175 String trimmedText;
176 Object[] allWeights;
177 float[] nthWeights;
178 int i;
179
180
181 while (featureIter.hasNext()) {
182 featureElem = (Element) featureIter.next();
183 featureHash = Long.valueOf(Util.asLong(
184 featureElem.attributeValue(ATTRIB_HASH)));
185 weightsElemList = featureElem.elements(ATTRIB_WEIGHTS);
186 weightsElemIter = weightsElemList.iterator();
187 allWeights = new Object[weightsElemList.size()];
188 i = 0;
189
190 while (weightsElemIter.hasNext()) {
191 weightsElem = (Element) weightsElemIter.next();
192 trimmedText = weightsElem.getTextTrim();
193
194 if (StringUtils.isNotEmpty(trimmedText)) {
195 nthWeights = CollUtils.asFloatArray(trimmedText);
196 allWeights[i] = nthWeights;
197 }
198
199 i++;
200 }
201
202 store.put(featureHash, allWeights);
203 }
204
205 Util.LOG.debug("Loaded shared winnow store");
206 }
207 }
208
209 /***
210 * {@inheritDoc} <strong>This shared implementation also deletes the list
211 * of registered instances so <em>all</em> shared instances will become
212 * unusable!</strong>
213 */
214 public void destroy() {
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242 super.destroy();
243
244 if (!ALL_INSTANCES.isEmpty()) {
245 ALL_INSTANCES.clear();
246 }
247 }
248
249 /***
250 * {@inheritDoc}
251 */
252 public float[] getWeights(final Feature feature) {
253
254 Object[] allWeights = feature.getUserData();
255
256 if (allWeights == null) {
257
258 final Long featureHash = feature.compactRepresentation();
259 allWeights = (Object[]) store.get(featureHash);
260
261 if (allWeights != null) {
262 feature.setUserData(allWeights);
263 } else {
264
265
266 feature.setUserData(UNKNOWN_MARKER);
267 }
268 }
269
270 if ((allWeights != null) && (allWeights != UNKNOWN_MARKER)) {
271 return (float[]) allWeights[slotNo];
272 } else {
273 return null;
274 }
275 }
276
277 /***
278 * {@inheritDoc}
279 */
280 public void putWeights(final Feature feature, final float[] weights) {
281
282 Object[] allWeights = feature.getUserData();
283
284 if ((allWeights == null) || (allWeights == UNKNOWN_MARKER)) {
285
286 final Long featureHash = feature.compactRepresentation();
287
288 if (store.containsKey(featureHash)) {
289 if (allWeights == UNKNOWN_MARKER) {
290
291 Util.LOG.error("Feature " + featureHash
292 + " was marked as unknown but IS contained in store!");
293 }
294
295 allWeights = (Object[]) store.get(featureHash);
296 } else {
297 allWeights = new Object[ALL_INSTANCES.size()];
298 store.put(featureHash, allWeights);
299 }
300
301
302 feature.setUserData(allWeights);
303 }
304
305
306 allWeights[slotNo] = weights;
307 }
308
309
310 /***
311 * {@inheritDoc}
312 */
313 public void removed(final Object key) {
314
315 if (isIgnoringIrrelevant()) {
316 final Long keyAsLong = (Long) key;
317 final Iterator<SharedWinnowStore> instanceIter =
318 ALL_INSTANCES.iterator();
319
320 while (instanceIter.hasNext()) {
321 instanceIter.next().removeFromRelevantKeys(keyAsLong);
322 }
323 }
324 }
325
326 /***
327 * {@inheritDoc}
328 * The current implementation does not support this functionality,
329 * always throwing an {@link UnsupportedOperationException} instead.
330 * Use {@link DefaultWinnowStore} instead if you want support for
331 * pruning of multiple candidates.
332 */
333 public Map.Entry[] sortForPruning(final Map.Entry[] candidates) {
334 throw new UnsupportedOperationException("Shared Winnow Store "
335 + "does not support pruning of multiple candidates");
336 }
337
338 /***
339 * {@inheritDoc}
340 */
341 protected AdaptableLRUMap store() {
342 return store;
343 }
344
345 /***
346 * {@inheritDoc}
347 */
348 public ObjectElement toElement() {
349
350 final ObjectElement result =
351 new ObjectElement(ELEMENT_MAIN, this.getClass());
352 result.addAttribute(ATTRIB_SLOT, Integer.toString(slotNo));
353 result.addAttribute(ATTRIB_IGNORE_IRRELEVANT,
354 Boolean.toString(isIgnoringIrrelevant()));
355
356 if (slotNo == 0) {
357
358 result.addAttribute(ATTRIB_MAX_SIZE, Integer.toString(maxSize()));
359 result.addAttribute(ATTRIB_PRUNE_CANDIDATES,
360 Integer.toString(store.getCandidateNumber()));
361 result.addAttribute(ATTRIB_PRUNE_NUMBER,
362 Integer.toString(store.getPruneNumber()));
363
364 final MapIterator mapIter = store.mapIterator();
365 Element featureElem, weightsElem;
366 Long featureHash;
367 Object[] allWeights;
368 float[] nthWeights;
369 int i;
370
371
372 while (mapIter.hasNext()) {
373 featureHash = (Long) mapIter.next();
374 featureElem = result.addElement(ELEMENT_FEATURE);
375 featureElem.addAttribute(ATTRIB_HASH, featureHash.toString());
376
377 allWeights = (Object[]) mapIter.getValue();
378
379 for (i = 0; i < allWeights.length; i++) {
380 nthWeights = (float[]) allWeights[i];
381 weightsElem = featureElem.addElement(ATTRIB_WEIGHTS);
382
383 if (nthWeights != null) {
384 weightsElem.addText(CollUtils.flatten(nthWeights));
385 }
386 }
387 }
388 }
389
390 return result;
391 }
392
393 /***
394 * Returns a string representation of this object.
395 *
396 * @return a textual representation
397 */
398 public String toString() {
399 final ToStringBuilder builder = new ToStringBuilder(this)
400 .append("slot no.", slotNo);
401
402 if (slotNo == 0) {
403
404 builder.append("current size", size())
405 .append("maximum size", maxSize())
406 .append("ignore irrelevant", isIgnoringIrrelevant());
407 }
408
409 return builder.toString();
410 }
411
412 }