View Javadoc

1   /*
2    * Copyright (C) 2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.combi;
23  
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.Set;
27  import java.util.SortedSet;
28  import java.util.TreeSet;
29  
30  import de.fu_berlin.ties.classify.PredictionDistribution;
31  
32  /***
33   * A combination strategy using begin/after tagging (also called "BIA" tagging
34   * due to the prefixed used). Classes are prefixed with "B-" (first element of
35   * an instance), "I-" (inner element of an instance), "A-" (first element
36   * <em>after</em> an instance that is not part of an other instance).
37   * "A" is used for anything else/outside of any class (except where the "A-"
38   * prefix is used).
39   *
40   * <p>{@link de.fu_berlin.ties.combi.CombinationState#isEnd()} is not
41   * supported by this class and will always return <code>false</code>.
42   *
43   * @author Christian Siefkes
44   * @version $Revision: 1.9 $, $Date: 2004/09/16 11:12:33 $, $Author: siefkes $
45   */
46  public class BeginAfterStrategy extends CombinationStrategy {
47  
48      /***
49       * The character terminating each prefix.
50       */
51      private static final char PREFIX_TERMINATOR = '-';
52  
53      /***
54       * Prefix used to classify the first element of an instance.
55       */
56      private static final String BEGIN_PREFIX = "B" + PREFIX_TERMINATOR;
57  
58      /***
59       * Prefix used to classify elements within instances.
60       */
61      private static final String IN_PREFIX = "I" + PREFIX_TERMINATOR;
62  
63      /***
64       * Prefix used to classify the first element <em>after</em> an instance
65       * that is not part of an other instance.
66       */
67      private static final String AFTER_PREFIX = "A" + PREFIX_TERMINATOR;
68  
69      /***
70       * String used to classify outside/other (except where "A-" is used).
71       * Actually called "A" so it will be picked in case of a tie ("A" is
72       * sorted before "A-" etc.).
73       */
74      private static final String OUTSIDE = "A";
75  
76      /***
77       * The set of all classes prefixed with {@link #BEGIN_PREFIX}, stored for
78       * efficiency.
79       */
80      private final TreeSet<String> bClasses = new TreeSet<String>();
81  
82      /***
83       * An immutable set of all classes (Strings) that can possible occur
84       * during classification.
85       */
86      private final SortedSet[] allClasses;
87  
88      /***
89       * Creates a new instance.
90       *
91       * @param theClasses a set of valid class names (String)
92       */
93      public BeginAfterStrategy(final Set<String> theClasses) {
94          super(theClasses);
95          final SortedSet<String> allValidClasses = new TreeSet<String>();
96          final Iterator<String> iter = theClasses.iterator();
97          String currentClass;
98  
99          // initialize immutable set of all classes + set of B classes
100         while (iter.hasNext()) {
101             currentClass = iter.next();
102             bClasses.add(BEGIN_PREFIX + currentClass);
103             allValidClasses.add(IN_PREFIX + currentClass);
104             allValidClasses.add(AFTER_PREFIX + currentClass);
105         }
106 
107         allValidClasses.addAll(bClasses);
108         allValidClasses.add(OUTSIDE);
109         allClasses = new SortedSet[] {
110             Collections.unmodifiableSortedSet(allValidClasses)
111         };
112     }
113 
114     /***
115      * Builds a set of class names (Strings) to pass to the classifier
116      * to consider for the next decision. This implementation returns the
117      * classes in alphabetic order (using a {@link SortedSet}).
118      *
119      * @return a set of class names
120      */
121     public Set<String>[] activeClasses() {
122         // all B classes are always allowed
123         final SortedSet<String> result = new TreeSet<String>(bClasses);
124         final String lastType = state().getType();
125 
126         if ((lastType == null)) {
127             // O is allowed as well
128             result.add(OUTSIDE);
129         } else {
130             // I + A for current class are allowed as well
131             result.add(IN_PREFIX + lastType);
132             result.add(AFTER_PREFIX + lastType);
133         }
134         return new Set[] {result};
135     }
136 
137     /***
138      * {@inheritDoc}
139      */
140     public Set<String>[] allClasses() {
141         return allClasses;
142     }
143 
144     /***
145      * {@inheritDoc}
146      */
147     public String[] translateCurrentState(final CombinationState currentState)
148             throws IllegalArgumentException {
149         final String result;
150         final String type = currentState.getType();
151         final String lastType = state().getType();
152 
153         if (type == null) {
154             if (lastType != null) {
155                 // mark as after last type
156                 result = AFTER_PREFIX + lastType;
157             } else {
158                 // mark as completely outside
159                 result = OUTSIDE;
160             }
161         } else {
162             if (currentState.isBegin()) {
163                 result = BEGIN_PREFIX + type;
164             } else {
165                 result = IN_PREFIX + type;
166             }
167         }
168         return new String[] {result};
169     }
170 
171     /***
172      * {@inheritDoc}
173      */
174     public CombinationState translateResult(
175             final PredictionDistribution[] predictions)
176     throws IllegalArgumentException {
177         final CombinationState result;
178         final String predictedClass = predictions[0].best().getType();
179 
180         if (OUTSIDE.equals(predictedClass)) {
181             // outside any instance
182             result = CombinationState.OUTSIDE;
183         } else {
184             final int prefixEndPos =
185                 predictedClass.indexOf(PREFIX_TERMINATOR);
186 
187             if (prefixEndPos >= 0) {
188                 final String prefix;
189                 final String newClass;
190 
191                 prefix = predictedClass.substring(0, prefixEndPos + 1);
192                 newClass = predictedClass.substring(prefixEndPos + 1);
193 
194                 if (BEGIN_PREFIX.equals(prefix)) {
195                     result = new CombinationState(newClass, true, false);
196                 } else if (IN_PREFIX.equals(prefix)) {
197                     result = new CombinationState(newClass, false, false);
198                 } else if (AFTER_PREFIX.equals(prefix)) {
199                     // now outside
200                     result = CombinationState.OUTSIDE;
201                 } else {
202                     throw new IllegalArgumentException(
203                         "Prefix " + prefix + " of predicted class "
204                             + predictedClass + " is invalid");
205                 }
206             } else {
207                 throw new IllegalArgumentException(
208                     "Invalid predicted class (no prefix): " + predictedClass);
209             }
210         }
211         return result;
212     }
213 
214 }