View Javadoc

1   /*
2    * Copyright (C) 2005-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Iterator;
25  import java.util.LinkedHashMap;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  
29  import org.apache.commons.configuration.Configuration;
30  import org.apache.commons.lang.builder.ToStringBuilder;
31  
32  import de.fu_berlin.ties.TiesConfiguration;
33  
34  /***
35   * A strategy adapter translates labels to the internal labels expected by a
36   * {@link de.fu_berlin.ties.combi.CombinationStrategy}, using a list of regular
37   * expressions and replacement texts (or the other way around). This can be
38   * used to interpret labeling systems that follow the model of a specific
39   * combination strategy but use a different naming convention. For example,
40   * the state "O" (outside) usually used by IOB1/IOB2 tagging can be translated
41   * to the state "A" internally used by
42   * {@link de.fu_berlin.ties.combi.InsideOutsideStrategy}. Note that patterns
43   * are {@link java.util.regex.Matcher#matches() matched} against complete label
44   * texts, matches that cover only parts of label texts are not considered.
45   *
46   * <p>Instances of this class are not thread-safe and must be synchronized
47   * externally, if required.
48   *
49   * @author Christian Siefkes
50   * @version $Revision: 1.5 $, $Date: 2006/10/21 16:04:01 $, $Author: siefkes $
51   */
52  public class StrategyAdapter {
53  
54      /***
55       * Factory method for creating a dummy adapter that simple returns the
56       * labels generated by the combination strategy without modifying them
57       * in any way.
58       *
59       * @return a dummy adapter that doesn't modify any labels
60       */
61      public static StrategyAdapter createDummyAdapter() {
62          return new StrategyAdapter(new LinkedHashMap<Pattern, String>());
63      }
64  
65      /***
66       * Creates an ordered map from regex patterns to replacement texts from
67       * a configuration (subset).
68       *
69       * @param conf the configuration (subset) to use
70       * @return the map from regular expressions to replacement texts
71       */
72      private static LinkedHashMap<Pattern, String> createReplacementMap(
73              final Configuration conf) {
74          final LinkedHashMap<Pattern, String> result =
75              new LinkedHashMap<Pattern, String>();
76          final Iterator keyIter = conf.getKeys();
77          String key, value;
78  
79          // convert each key/value pair into a pattern/replacement pair
80          while (keyIter.hasNext()) {
81              key = (String) keyIter.next();
82              value = conf.getString(key);
83              result.put(Pattern.compile(key), value);
84          }
85  
86          return result;
87      }
88  
89  
90      /***
91       * An ordered map from regex pattern matchers to replacement texts.
92       */
93      private final LinkedHashMap<Matcher, String> replacementMap;
94  
95  
96      /***
97       * Creates a new instance.
98       *
99       * @param conf used to configure this instance
100      */
101     public StrategyAdapter(final TiesConfiguration conf) {
102         this(createReplacementMap(conf.subset("combination.adapter")));
103     }
104 
105     /***
106      * Creates a new instance.
107      *
108      * @param replacements an ordered map from regex patterns to replacement
109      * texts
110      */
111     public StrategyAdapter(final LinkedHashMap<Pattern, String> replacements) {
112         super();
113         replacementMap =
114             new LinkedHashMap<Matcher, String>(replacements.size());
115         final Iterator<Pattern> patternIter = replacements.keySet().iterator();
116         Pattern pattern;
117 
118         // create a matcher for each pattern (for efficiency)
119         while (patternIter.hasNext()) {
120             pattern = patternIter.next();
121             replacementMap.put(pattern.matcher(""), replacements.get(pattern));
122         }
123     }
124 
125     /***
126      * Translates the array of labels generated by a combination strategy
127      * to the corresponding labels following some external convention (or
128      * the other way around).
129      *
130      * @param labels the array of labels to translate
131      * @return a same-size array of labels translated by applying all
132      * configured replacements in the specified order
133      */
134     public String[] translate(final String[] labels) {
135         if (replacementMap.isEmpty()) {
136             // we are a dummy adapter -- nothing to do
137             return labels;
138         } else {
139             final String[] result = new String[labels.length];
140             Iterator<Matcher> matcherIter;
141             Matcher matcher;
142             String replacement;
143             String text;
144             final StringBuffer buffer = new StringBuffer();
145 
146             for (int i = 0; i < labels.length; i++) {
147                 matcherIter = replacementMap.keySet().iterator();
148                 text = labels[i];
149 
150                 // apply all replacements in specified order
151                 while (matcherIter.hasNext()) {
152                     matcher = matcherIter.next();
153                     replacement = replacementMap.get(matcher);
154                     matcher.reset(text);
155 
156                     if (matcher.matches()) {
157                         // matcher matches the complete input text
158                         matcher.appendReplacement(buffer, replacement);
159                         text = buffer.toString();
160                         buffer.setLength(0);
161                     }
162                 }
163 
164                 result[i] = text;
165             }
166 
167             return result;
168         }
169     }
170 
171     /***
172      * Returns a string representation of this object.
173      *
174      * @return a textual representation
175      */
176     public String toString() {
177         return new ToStringBuilder(this)
178             .append("replacement map", replacementMap)
179             .toString();
180     }
181 
182 }