View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.eval;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.Reader;
27  import java.io.Writer;
28  import java.util.Collection;
29  import java.util.Iterator;
30  
31  import org.apache.commons.collections.MultiHashMap;
32  import org.apache.commons.collections.MultiMap;
33  
34  import de.fu_berlin.ties.ContextMap;
35  import de.fu_berlin.ties.ProcessingException;
36  import de.fu_berlin.ties.TextProcessor;
37  import de.fu_berlin.ties.TiesConfiguration;
38  import de.fu_berlin.ties.extract.EvaluatedExtractionContainer;
39  import de.fu_berlin.ties.extract.Extraction;
40  import de.fu_berlin.ties.extract.ExtractionContainer;
41  import de.fu_berlin.ties.extract.Extractor;
42  import de.fu_berlin.ties.extract.TargetStructure;
43  import de.fu_berlin.ties.io.FieldContainer;
44  import de.fu_berlin.ties.io.IOUtils;
45  import de.fu_berlin.ties.util.Util;
46  
47  /***
48   * A processor that can be used to re-evaluate the contents of an
49   * {@link de.fu_berlin.ties.extract.EvaluatedExtractionContainer}. This can
50   * be used to switch the
51   * {@link de.fu_berlin.ties.extract.EvaluatedExtractionContainer#isMatchingAll()
52   * match mode}.
53   *
54   * @author Christian Siefkes
55   * @version $Revision: 1.10 $, $Date: 2004/06/17 14:01:41 $, $Author: siefkes $
56   */
57  public class ReEvaluator extends TextProcessor {
58  
59      /***
60       * Creates a new instance, using a default extension and the
61       * {@linkplain TiesConfiguration#CONF standard configuration}.
62       */
63      public ReEvaluator() {
64          this(Extractor.EXT_EXTRACTIONS);
65      }
66  
67      /***
68       * Creates a new instance, using the
69       * {@linkplain TiesConfiguration#CONF standard configuration}.
70       *
71       * @param outExt the extension to use for output files
72       */
73      public ReEvaluator(final String outExt) {
74          this(outExt, TiesConfiguration.CONF);
75      }
76  
77      /***
78       * Creates a new instance.
79       *
80       * @param outExt the extension to use for output files
81       * @param conf the configuration to use
82       */
83      public ReEvaluator(final String outExt, final TiesConfiguration conf) {
84          super(outExt, conf);
85      }
86  
87      /***
88       * Created an empty container for evaluating extractions, using the
89       * stored configuration to initialize
90       * {@linkplain ExtractionContainer#getTargetStructure() target structure}
91       * and {@linkplain EvaluatedExtractionContainer#isMatchingAll() match mode}.
92       * Subclasses can overwrite this method if the configured values are
93       * inadequate.
94       *
95       * @return the created empty container
96       */
97      protected EvaluatedExtractionContainer createdEvalContainer() {
98          return new EvaluatedExtractionContainer(
99              new TargetStructure(getConfig()), getConfig());
100     }
101 
102     /***
103      * Re-evaluates the contents of an extraction container.
104      *
105      * @param orgExtractions the extractions to re-evaluate
106      * @param newEvaluated the re-evaluated extractions are added to this
107      * conainer
108      */
109     public void reEvalulate(final ExtractionContainer orgExtractions,
110             final EvaluatedExtractionContainer newEvaluated) {
111         // sort into batches based on sources (might include "null" batch)
112         final MultiMap batchMap = new MultiHashMap();
113         final Iterator extIter = orgExtractions.iterator();
114         Extraction currentExt;
115 
116         while (extIter.hasNext()) {
117             currentExt = (Extraction) extIter.next();
118             batchMap.put(currentExt.getSource(), currentExt);
119         }
120 
121         ExtractionContainer currentPredictions;
122         ExtractionContainer currentAnswers;
123         final Iterator batchIter = batchMap.keySet().iterator();
124         String currentSource;
125         Collection currentColl;
126         Iterator collIter;
127         EvalStatus currentStatus;
128 
129         // separated and re-evaluate extractions for each batch
130         while (batchIter.hasNext()) {
131             currentSource = (String) batchIter.next();
132             currentColl = (Collection) batchMap.get(currentSource);
133             currentPredictions =
134                 new ExtractionContainer(newEvaluated.getTargetStructure());
135             currentAnswers =
136                 new ExtractionContainer(newEvaluated.getTargetStructure());
137             collIter = currentColl.iterator();
138 
139             while (collIter.hasNext()) {
140                 currentExt = (Extraction) collIter.next();
141                 currentStatus = currentExt.getEvalStatus();
142 
143                 // put each extraction into suitable container(s)
144                 if (currentStatus.isPredictionState()) {
145                     currentPredictions.add(currentExt);
146                 }
147                 if (currentStatus.isAnswerState()) {
148                     currentAnswers.add(currentExt);
149                 }
150             }
151 
152             // evaluate batch
153             newEvaluated.evaluateBatch(currentPredictions, currentAnswers,
154                 currentSource);
155         }
156     }
157 
158     /***
159      * Re-evaluates the serialized contents of an extraction container,
160      * delegating to
161      * {@link #reEvalulate(ExtractionContainer, EvaluatedExtractionContainer)}.
162      *
163      * @param reader reader containg the extractions to re-evaluate in
164      * {@link de.fu_berlin.ties.io.DelimSepValues} format; not closed by this
165      * method
166      * @return the re-evaluated extractions
167      * @throws IOException if an I/O error occurs while reading the extractions
168      */
169     public EvaluatedExtractionContainer reEvalulate(final Reader reader)
170             throws IOException {
171         final EvaluatedExtractionContainer result = createdEvalContainer();
172 
173         // read original extraction
174         final FieldContainer fContainer =
175             FieldContainer.createFieldContainer(reader);
176         final ExtractionContainer orgExtraction =
177             new ExtractionContainer(result.getTargetStructure(), fContainer);
178 
179         // delegate + return filled result
180         reEvalulate(orgExtraction, result);
181         return result;
182     }
183 
184     /***
185      * {@inheritDoc}
186      */
187     protected void doProcess(final Reader reader, final Writer writer,
188             final ContextMap context) throws IOException, ProcessingException {
189         final EvaluatedExtractionContainer result = reEvalulate(reader);
190 
191         // serialize results + metrics
192         final FieldContainer storage = FieldContainer.createFieldContainer();
193         result.storeEntries(storage);
194         storage.store(writer);
195 
196         // serialize metrics in same format
197         final File metricsFile = IOUtils.createOutFile((File)
198             context.get(KEY_DIRECTORY), (String) context.get(KEY_LOCAL_NAME),
199             MultiFMetrics.EXT_METRICS);
200         final Writer metricsWriter = IOUtils.openWriter(metricsFile,
201             getConfig());
202 
203         try {
204             final FieldContainer metricsStorage =
205                 FieldContainer.createFieldContainer();
206             result.viewMetrics().storeEntries(metricsStorage);
207             metricsStorage.store(metricsWriter);
208             metricsWriter.flush();
209         } finally {
210             IOUtils.tryToClose(metricsWriter);
211         }
212 
213         Util.LOG.info("Stored corresponding metrics in " + metricsFile);
214     }
215 
216 }