View Javadoc

1   /*
2    * Copyright (C) 2004-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.Reader;
27  import java.io.Writer;
28  import java.util.Collection;
29  import java.util.Iterator;
30  
31  import org.apache.commons.collections.MultiHashMap;
32  import org.apache.commons.collections.MultiMap;
33  
34  import de.fu_berlin.ties.ContextMap;
35  import de.fu_berlin.ties.ProcessingException;
36  import de.fu_berlin.ties.TextProcessor;
37  import de.fu_berlin.ties.TiesConfiguration;
38  import de.fu_berlin.ties.extract.EvaluatedExtractionContainer;
39  import de.fu_berlin.ties.extract.Extraction;
40  import de.fu_berlin.ties.extract.ExtractionContainer;
41  import de.fu_berlin.ties.extract.Extractor;
42  import de.fu_berlin.ties.extract.TargetStructure;
43  import de.fu_berlin.ties.io.FieldContainer;
44  import de.fu_berlin.ties.io.IOUtils;
45  import de.fu_berlin.ties.util.Util;
46  
47  /***
48   * A processor that can be used to re-evaluate the contents of an
49   * {@link de.fu_berlin.ties.extract.EvaluatedExtractionContainer}. This can
50   * be used to switch the
51   * {@link de.fu_berlin.ties.extract.EvaluatedExtractionContainer#isMatchingAll()
52   * match mode}.
53   *
54   * @author Christian Siefkes
55   * @version $Revision: 1.15 $, $Date: 2006/10/21 16:04:11 $, $Author: siefkes $
56   */
57  public class ReEvaluator extends TextProcessor {
58  
59      /***
60       * Creates a new instance, using a default extension and the
61       * {@linkplain TiesConfiguration#CONF standard configuration}.
62       */
63      public ReEvaluator() {
64          this(Extractor.EXT_EXTRACTIONS);
65      }
66  
67      /***
68       * Creates a new instance, using the
69       * {@linkplain TiesConfiguration#CONF standard configuration}.
70       *
71       * @param outExt the extension to use for output files
72       */
73      public ReEvaluator(final String outExt) {
74          this(outExt, TiesConfiguration.CONF);
75      }
76  
77      /***
78       * Creates a new instance.
79       *
80       * @param outExt the extension to use for output files
81       * @param conf the configuration to use
82       */
83      public ReEvaluator(final String outExt, final TiesConfiguration conf) {
84          super(outExt, conf);
85      }
86  
87      /***
88       * Created an empty container for evaluating extractions, using the
89       * stored configuration to initialize
90       * {@linkplain ExtractionContainer#getTargetStructure() target structure}
91       * and {@linkplain EvaluatedExtractionContainer#isMatchingAll() match mode}.
92       * Subclasses can overwrite this method if the configured values are
93       * inadequate.
94       *
95       * @return the created empty container
96       */
97      protected EvaluatedExtractionContainer createEvalContainer() {
98          return new EvaluatedExtractionContainer(
99              new TargetStructure(getConfig()), getConfig());
100     }
101 
102     /***
103      * Re-evaluates the contents of an extraction container.
104      *
105      * @param orgExtractions the extractions to re-evaluate
106      * @param newEvaluated the re-evaluated extractions are added to this
107      * conainer
108      */
109     public void reEvalulate(final ExtractionContainer orgExtractions,
110             final EvaluatedExtractionContainer newEvaluated) {
111         // sort into batches based on sources (might include "null" batch)
112         final MultiMap batchMap = new MultiHashMap();
113         final Iterator extIter = orgExtractions.iterator();
114         Extraction currentExt;
115 
116         while (extIter.hasNext()) {
117             currentExt = (Extraction) extIter.next();
118             batchMap.put(currentExt.getSource(), currentExt);
119         }
120 
121         ExtractionContainer currentPredictions;
122         ExtractionContainer currentAnswers;
123         final Iterator batchIter = batchMap.keySet().iterator();
124         String currentSource;
125         Collection currentColl;
126         Iterator collIter;
127         EvalStatus currentStatus;
128 
129         // separated and re-evaluate extractions for each batch
130         while (batchIter.hasNext()) {
131             currentSource = (String) batchIter.next();
132             currentColl = (Collection) batchMap.get(currentSource);
133             currentPredictions =
134                 new ExtractionContainer(newEvaluated.getTargetStructure());
135             currentAnswers =
136                 new ExtractionContainer(newEvaluated.getTargetStructure());
137             collIter = currentColl.iterator();
138 
139             while (collIter.hasNext()) {
140                 currentExt = (Extraction) collIter.next();
141                 currentStatus = currentExt.getEvalStatus();
142 
143                 // put each extraction into suitable container(s)
144                 if (currentStatus.isPredictionState()) {
145                     currentPredictions.add(currentExt);
146                 }
147                 if (currentStatus.isAnswerState()) {
148                     currentAnswers.add(currentExt);
149                 }
150             }
151 
152             // evaluate batch
153             newEvaluated.evaluateBatch(currentPredictions, currentAnswers,
154                 currentSource);
155         }
156     }
157 
158     /***
159      * Re-evaluates the serialized contents of an extraction container,
160      * delegating to
161      * {@link #reEvalulate(ExtractionContainer, EvaluatedExtractionContainer)}.
162      *
163      * @param reader reader containg the extractions to re-evaluate in
164      * {@link de.fu_berlin.ties.io.DelimSepValues} format; not closed by this
165      * method
166      * @return the re-evaluated extractions
167      * @throws IOException if an I/O error occurs while reading the extractions
168      */
169     public EvaluatedExtractionContainer reEvalulate(final Reader reader)
170             throws IOException {
171         final EvaluatedExtractionContainer result = createEvalContainer();
172 
173         // read original extraction
174         final FieldContainer fContainer =
175             FieldContainer.createFieldContainer(getConfig());
176         fContainer.read(reader);
177         final ExtractionContainer orgExtraction =
178             new ExtractionContainer(result.getTargetStructure(), fContainer);
179 
180         // delegate + return filled result
181         reEvalulate(orgExtraction, result);
182         return result;
183     }
184 
185     /***
186      * {@inheritDoc}
187      */
188     protected void doProcess(final Reader reader, final Writer writer,
189             final ContextMap context) throws IOException, ProcessingException {
190         final EvaluatedExtractionContainer result = reEvalulate(reader);
191 
192         // serialize results + metrics
193         final FieldContainer storage =
194             FieldContainer.createFieldContainer(getConfig());
195         result.storeEntries(storage);
196         storage.store(writer);
197 
198         // serialize metrics in same format
199         final File metricsFile = IOUtils.createOutFile((File)
200             context.get(KEY_DIRECTORY), (String) context.get(KEY_LOCAL_NAME),
201             MultiFMetrics.EXT_METRICS);
202         final Writer metricsWriter = IOUtils.openWriter(metricsFile,
203             getConfig());
204 
205         try {
206             final FieldContainer metricsStorage =
207                 FieldContainer.createFieldContainer(getConfig());
208             result.viewMetrics().storeEntries(metricsStorage);
209             metricsStorage.store(metricsWriter);
210             metricsWriter.flush();
211         } finally {
212             IOUtils.tryToClose(metricsWriter);
213         }
214 
215         Util.LOG.info("Stored corresponding metrics in " + metricsFile);
216     }
217 
218 }