View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.Set;
27  import java.util.SortedSet;
28  import java.util.TreeSet;
29  
30  import de.fu_berlin.ties.classify.PredictionDistribution;
31  
32  /***
33   * A combination strategy using open/close tagging (also called "BIE" tagging
34   * due to the prefixed used). Classes are prefixed with "B-" (first element of
35   * an instance), "I-" (inner element of an instance), "E-" (last element of an
36   * instance), or "BE-" (only element of an instance -- begin and end).
37   * "A" is used for anything else/outside of any class.
38   *
39   * @author Christian Siefkes
40   * @version $Revision: 1.7 $, $Date: 2004/09/15 16:08:45 $, $Author: siefkes $
41   */
42  public class OpenCloseStrategy extends CombinationStrategy {
43  
44      /***
45       * The character terminating each prefix.
46       */
47      private static final char PREFIX_TERMINATOR = '-';
48  
49      /***
50       * Prefix used to classify the first element of an instance.
51       */
52      private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
53  
54      /***
55       * Prefix used to classify inner elements of an instance.
56       */
57      private static final String INNER_PREFIX = "I" + PREFIX_TERMINATOR;
58  
59      /***
60       * Prefix used to classify the last element of an instance.
61       */
62      private static final String END_PREFIX = "E" + PREFIX_TERMINATOR;
63  
64      /***
65       * Prefix used to classify the only element of an instance (begin and end).
66       */
67      private static final String BEGIN_END_PREFIX = "BE" + PREFIX_TERMINATOR;
68  
69      /***
70       * String used to classify outside/other. Actually called "A" so it will be
71       * picked in case of a tie.
72       */
73      private static final String OUTSIDE = "A";
74  
75      /***
76       * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
77       * efficiency.
78       */
79      private final TreeSet<String> bClasses = new TreeSet<String>();
80  
81      /***
82       * The set of all classes prefixed with {@link #BEGIN_END_PREFIX}, stored
83       * for efficiency.
84       */
85      private final TreeSet<String> beClasses = new TreeSet<String>();
86  
87      /***
88       * An immutable set of all classes (Strings) that can possible occur
89       * during classification.
90       */
91      private final SortedSet[] allClasses;
92  
93      /***
94       * Creates a new instance.
95       *
96       * @param theClasses a set of valid class names (String)
97       */
98      public OpenCloseStrategy(final Set<String> theClasses) {
99          super(theClasses);
100         final SortedSet<String> allValidClasses = new TreeSet<String>();
101         final Iterator<String> iter = theClasses.iterator();
102         String currentClass;
103 
104         // initialize immutable set of all classes + sets of B/BE classes
105         while (iter.hasNext()) {
106             currentClass = iter.next();
107             bClasses.add(BEGIN_PREFIX + currentClass);
108             beClasses.add(BEGIN_END_PREFIX + currentClass);
109             allValidClasses.add(INNER_PREFIX + currentClass);
110             allValidClasses.add(END_PREFIX + currentClass);
111         }
112 
113         allValidClasses.addAll(bClasses);
114         allValidClasses.addAll(beClasses);
115         allValidClasses.add(OUTSIDE);
116         allClasses = new SortedSet[] {
117             Collections.unmodifiableSortedSet(allValidClasses)
118         };
119     }
120 
121     /***
122      * Builds a set of class names (Strings) to pass to the classifier
123      * to consider for the next decision. This implementation returns the
124      * classes in alphabetic order (using a {@link SortedSet}).
125      *
126      * @return a set of class names
127      */
128     public Set<String>[] activeClasses() {
129         final SortedSet<String> result;
130         final String lastType = state().getType();
131 
132         if ((lastType == null) || state().isEnd()) {
133             // all B + BE classes allowed + O
134             result = new TreeSet<String>(bClasses);
135             result.addAll(beClasses);
136             result.add(OUTSIDE);
137         } else {
138             // only I + E for current class allowed
139             result = new TreeSet<String>();
140             result.add(INNER_PREFIX + lastType);
141             result.add(END_PREFIX + lastType);
142         }
143         return new Set[] {result};
144     }
145 
146     /***
147      * {@inheritDoc}
148      */
149     public Set<String>[] allClasses() {
150         return allClasses;
151     }
152 
153     /***
154      * {@inheritDoc}
155      */
156     public String[] translateCurrentState(final CombinationState currentState)
157             throws IllegalArgumentException {
158         final String result;
159         final String type = currentState.getType();
160 
161         if (type == null) {
162             // null: outside
163             result = OUTSIDE;
164         } else {
165             final String prefix;
166 
167             if (currentState.isBegin()) {
168                 if (currentState.isEnd()) {
169                     prefix = BEGIN_END_PREFIX;
170                 } else {
171                     prefix = BEGIN_PREFIX;
172                 }
173             } else {
174                 if (currentState.isEnd()) {
175                     prefix = END_PREFIX;
176                 } else {
177                     prefix = INNER_PREFIX;
178                 }
179             }
180             result = prefix + type;
181         }
182         return new String[] {result};
183     }
184 
185     /***
186      * {@inheritDoc}
187      */
188     public CombinationState translateResult(
189             final PredictionDistribution[] predictions)
190     throws IllegalArgumentException {
191         final CombinationState result;
192         final String predictedClass = predictions[0].best().getType();
193 
194         if (OUTSIDE.equals(predictedClass)) {
195             // outside any instance
196             result = CombinationState.OUTSIDE;
197         } else {
198             final int prefixEndPos =
199                 predictedClass.indexOf(PREFIX_TERMINATOR);
200 
201             if (prefixEndPos >= 0) {
202                 final String prefix;
203                 final String newClass;
204                 final boolean isBegin;
205                 final boolean isEnd;
206 
207                 prefix = predictedClass.substring(0, prefixEndPos + 1);
208                 newClass = predictedClass.substring(prefixEndPos + 1);
209 
210                 if (BEGIN_PREFIX.equals(prefix)) {
211                     isBegin = true;
212                     isEnd = false;
213                 } else if (BEGIN_END_PREFIX.equals(prefix)) {
214                     isBegin = true;
215                     isEnd = true;
216                 } else if (INNER_PREFIX.equals(prefix)) {
217                     isBegin = false;
218                     isEnd = false;
219                 } else if (END_PREFIX.equals(prefix)) {
220                     isBegin = false;
221                     isEnd = true;
222                 } else {
223                     throw new IllegalArgumentException(
224                         "Prefix " + prefix + " of predicted class "
225                             + predictedClass + " is invalid");
226                 }
227 
228                 if (getValidClasses().contains(newClass)) {
229                     result = new CombinationState(newClass, isBegin, isEnd);
230                 } else {
231                     throw new IllegalArgumentException(
232                         "Type " + newClass + " of predicted class "
233                             + predictedClass + " is invalid");
234                 }
235             } else {
236                 throw new IllegalArgumentException(
237                     "Invalid predicted class (no prefix): " + predictedClass);
238             }
239         }
240         return result;
241     }
242 
243 }