1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.filter;
23
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27
28 import org.apache.commons.lang.builder.ToStringBuilder;
29 import org.dom4j.Document;
30 import org.dom4j.Element;
31 import org.dom4j.NodeFilter;
32
33 import de.fu_berlin.ties.ProcessingException;
34 import de.fu_berlin.ties.TiesConfiguration;
35 import de.fu_berlin.ties.classify.Reranker;
36 import de.fu_berlin.ties.classify.feature.FeatureVector;
37 import de.fu_berlin.ties.context.LocalFeature;
38 import de.fu_berlin.ties.context.PriorRecognitions;
39 import de.fu_berlin.ties.context.Representation;
40 import de.fu_berlin.ties.extract.Extraction;
41 import de.fu_berlin.ties.xml.dom.DOMUtils;
42
43 /***
44 * A trainable filter that uses a
45 * {@link de.fu_berlin.ties.context.Representation}to convert elements into
46 * feature vectors.
47 *
48 * @author Christian Siefkes
49 * @version $Revision: 1.9 $, $Date: 2004/11/19 14:05:00 $, $Author: siefkes $
50 */
51 public class RepresentationFilter extends TrainableFilter {
52
53 /***
54 * The representation used to convert elements into feature vectors.
55 */
56 private final Representation representation;
57
58 /***
59 * Feature cache used by the representation.
60 */
61 private final Map<Element, List<LocalFeature>> featureCache =
62 new HashMap<Element, List<LocalFeature>>();
63
64 /***
65 * The purpose of this filter, e.g. "Sentence" if it is used for sentence
66 * filtering.
67 */
68 private final String purpose;
69
70 /***
71 * List of prior recognitions used by the representation.
72 */
73 private PriorRecognitions priorRecogs;
74
75 /***
76 * Creates a new instance.
77 *
78 * @param conf used to initialize the trainable classifier by calling {@link
79 * de.fu_berlin.ties.classify.TrainableClassifier#createClassifier(
80 * java.util.Set, TiesConfiguration)}
81 * @param positiveFilter this filter is queried to decide whether this class
82 * {@link #prefers(Element) prefers}an element
83 * @param negativeFilter this filter is queried to decide whether this class
84 * {@link #avoids(Element) avoids}an element
85 * @param rerank an optional reranker that recalculates probabilities to
86 * introduce a bias (can be used to favor recall over precision, by setting
87 * a bias > 1 for the "true" class, etc.); might be <code>null</code>
88 * @param rep the representation used to convert elements into feature
89 * vectors
90 * @param filterPurpose the purpose of this filter, e.g. "Sentence" if it is
91 * used for sentence filtering
92 * @throws ProcessingException if the initialization of the trainable
93 * classifier fails
94 */
95 public RepresentationFilter(final TiesConfiguration conf,
96 final NodeFilter positiveFilter, final NodeFilter negativeFilter,
97 final Reranker rerank, final Representation rep,
98 final String filterPurpose) throws ProcessingException {
99 super(conf, positiveFilter, negativeFilter, rerank);
100 representation = rep;
101 purpose = filterPurpose;
102 }
103
104 /***
105 * {@inheritDoc}
106 */
107 public FeatureVector buildFeatures(final Element element) {
108
109 final StringBuffer mainText = new StringBuffer();
110 DOMUtils.collectText(element, mainText);
111
112
113 return representation.buildContext(element, "", mainText.toString(),
114 "", priorRecogs, featureCache, purpose);
115 }
116
117 /***
118 * {@inheritDoc}
119 */
120 public void init(final Document document) {
121
122 featureCache.clear();
123 priorRecogs = representation.createRecognitionBuffer();
124
125
126 super.init(document);
127 }
128
129 /***
130 * {@inheritDoc}
131 */
132 public boolean matches(final Element element) throws ProcessingException {
133
134 final boolean result = super.matches(element);
135
136
137 final Extraction newRecognition;
138
139 if (result) {
140
141 final StringBuffer collectedText = new StringBuffer();
142 DOMUtils.collectText(element, collectedText);
143 newRecognition = new Extraction(DOMUtils.name(element),
144 collectedText.toString());
145 } else {
146
147 newRecognition = new Extraction("<null>", "");
148 }
149
150
151 newRecognition.setSealed(true);
152 priorRecogs.add(newRecognition);
153
154 return result;
155 }
156
157 /***
158 * Returns a string representation of this object.
159 *
160 * @return a textual representation
161 */
162 public String toString() {
163 return new ToStringBuilder(this)
164 .appendSuper(super.toString())
165 .append("representation", representation)
166 .toString();
167 }
168
169 }