1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.eval;
23
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.Reader;
27 import java.io.Writer;
28 import java.util.Collection;
29 import java.util.Iterator;
30
31 import org.apache.commons.collections.MultiHashMap;
32 import org.apache.commons.collections.MultiMap;
33
34 import de.fu_berlin.ties.ContextMap;
35 import de.fu_berlin.ties.ProcessingException;
36 import de.fu_berlin.ties.TextProcessor;
37 import de.fu_berlin.ties.TiesConfiguration;
38 import de.fu_berlin.ties.extract.EvaluatedExtractionContainer;
39 import de.fu_berlin.ties.extract.Extraction;
40 import de.fu_berlin.ties.extract.ExtractionContainer;
41 import de.fu_berlin.ties.extract.Extractor;
42 import de.fu_berlin.ties.extract.TargetStructure;
43 import de.fu_berlin.ties.io.FieldContainer;
44 import de.fu_berlin.ties.io.IOUtils;
45 import de.fu_berlin.ties.util.Util;
46
47 /***
48 * A processor that can be used to re-evaluate the contents of an
49 * {@link de.fu_berlin.ties.extract.EvaluatedExtractionContainer}. This can
50 * be used to switch the
51 * {@link de.fu_berlin.ties.extract.EvaluatedExtractionContainer#isMatchingAll()
52 * match mode}.
53 *
54 * @author Christian Siefkes
55 * @version $Revision: 1.10 $, $Date: 2004/06/17 14:01:41 $, $Author: siefkes $
56 */
57 public class ReEvaluator extends TextProcessor {
58
59 /***
60 * Creates a new instance, using a default extension and the
61 * {@linkplain TiesConfiguration#CONF standard configuration}.
62 */
63 public ReEvaluator() {
64 this(Extractor.EXT_EXTRACTIONS);
65 }
66
67 /***
68 * Creates a new instance, using the
69 * {@linkplain TiesConfiguration#CONF standard configuration}.
70 *
71 * @param outExt the extension to use for output files
72 */
73 public ReEvaluator(final String outExt) {
74 this(outExt, TiesConfiguration.CONF);
75 }
76
77 /***
78 * Creates a new instance.
79 *
80 * @param outExt the extension to use for output files
81 * @param conf the configuration to use
82 */
83 public ReEvaluator(final String outExt, final TiesConfiguration conf) {
84 super(outExt, conf);
85 }
86
87 /***
88 * Created an empty container for evaluating extractions, using the
89 * stored configuration to initialize
90 * {@linkplain ExtractionContainer#getTargetStructure() target structure}
91 * and {@linkplain EvaluatedExtractionContainer#isMatchingAll() match mode}.
92 * Subclasses can overwrite this method if the configured values are
93 * inadequate.
94 *
95 * @return the created empty container
96 */
97 protected EvaluatedExtractionContainer createdEvalContainer() {
98 return new EvaluatedExtractionContainer(
99 new TargetStructure(getConfig()), getConfig());
100 }
101
102 /***
103 * Re-evaluates the contents of an extraction container.
104 *
105 * @param orgExtractions the extractions to re-evaluate
106 * @param newEvaluated the re-evaluated extractions are added to this
107 * conainer
108 */
109 public void reEvalulate(final ExtractionContainer orgExtractions,
110 final EvaluatedExtractionContainer newEvaluated) {
111
112 final MultiMap batchMap = new MultiHashMap();
113 final Iterator extIter = orgExtractions.iterator();
114 Extraction currentExt;
115
116 while (extIter.hasNext()) {
117 currentExt = (Extraction) extIter.next();
118 batchMap.put(currentExt.getSource(), currentExt);
119 }
120
121 ExtractionContainer currentPredictions;
122 ExtractionContainer currentAnswers;
123 final Iterator batchIter = batchMap.keySet().iterator();
124 String currentSource;
125 Collection currentColl;
126 Iterator collIter;
127 EvalStatus currentStatus;
128
129
130 while (batchIter.hasNext()) {
131 currentSource = (String) batchIter.next();
132 currentColl = (Collection) batchMap.get(currentSource);
133 currentPredictions =
134 new ExtractionContainer(newEvaluated.getTargetStructure());
135 currentAnswers =
136 new ExtractionContainer(newEvaluated.getTargetStructure());
137 collIter = currentColl.iterator();
138
139 while (collIter.hasNext()) {
140 currentExt = (Extraction) collIter.next();
141 currentStatus = currentExt.getEvalStatus();
142
143
144 if (currentStatus.isPredictionState()) {
145 currentPredictions.add(currentExt);
146 }
147 if (currentStatus.isAnswerState()) {
148 currentAnswers.add(currentExt);
149 }
150 }
151
152
153 newEvaluated.evaluateBatch(currentPredictions, currentAnswers,
154 currentSource);
155 }
156 }
157
158 /***
159 * Re-evaluates the serialized contents of an extraction container,
160 * delegating to
161 * {@link #reEvalulate(ExtractionContainer, EvaluatedExtractionContainer)}.
162 *
163 * @param reader reader containg the extractions to re-evaluate in
164 * {@link de.fu_berlin.ties.io.DelimSepValues} format; not closed by this
165 * method
166 * @return the re-evaluated extractions
167 * @throws IOException if an I/O error occurs while reading the extractions
168 */
169 public EvaluatedExtractionContainer reEvalulate(final Reader reader)
170 throws IOException {
171 final EvaluatedExtractionContainer result = createdEvalContainer();
172
173
174 final FieldContainer fContainer =
175 FieldContainer.createFieldContainer(reader);
176 final ExtractionContainer orgExtraction =
177 new ExtractionContainer(result.getTargetStructure(), fContainer);
178
179
180 reEvalulate(orgExtraction, result);
181 return result;
182 }
183
184 /***
185 * {@inheritDoc}
186 */
187 protected void doProcess(final Reader reader, final Writer writer,
188 final ContextMap context) throws IOException, ProcessingException {
189 final EvaluatedExtractionContainer result = reEvalulate(reader);
190
191
192 final FieldContainer storage = FieldContainer.createFieldContainer();
193 result.storeEntries(storage);
194 storage.store(writer);
195
196
197 final File metricsFile = IOUtils.createOutFile((File)
198 context.get(KEY_DIRECTORY), (String) context.get(KEY_LOCAL_NAME),
199 MultiFMetrics.EXT_METRICS);
200 final Writer metricsWriter = IOUtils.openWriter(metricsFile,
201 getConfig());
202
203 try {
204 final FieldContainer metricsStorage =
205 FieldContainer.createFieldContainer();
206 result.viewMetrics().storeEntries(metricsStorage);
207 metricsStorage.store(metricsWriter);
208 metricsWriter.flush();
209 } finally {
210 IOUtils.tryToClose(metricsWriter);
211 }
212
213 Util.LOG.info("Stored corresponding metrics in " + metricsFile);
214 }
215
216 }