1   package de.fu_berlin.ties.text;
2   
3   import junit.framework.TestCase;
4   // JUnitDoclet begin import
5   import de.fu_berlin.ties.TiesConfiguration;
6   // JUnitDoclet end import
7   
8   /***
9   * Generated by JUnitDoclet, a tool provided by
10  * ObjectFab GmbH under LGPL.
11  * Please see www.junitdoclet.org, www.gnu.org
12  * and www.objectfab.de for informations about
13  * the tool, the licence and the authors.
14  */
15  
16  
17  public class TextTokenizerTest
18  // JUnitDoclet begin extends_implements
19  extends TestCase
20  
21  // JUnitDoclet end extends_implements
22  {
23    // JUnitDoclet begin class
24    private TextTokenizer thoroughTokenizer = null;
25    private TextTokenizer configTokenizer = null;
26  
27    private static final String[] inputs = new String[] {
28        "", // empty string
29        " \t\n ", // only whitespace
30        "tokens followed by whitespace\n",
31        "   whitespace followed by tokens",
32        " tokens surrounded by   whitespace\t ",
33        "different2345Kinds-of!!!?tokens+=*without$whitespace",
34        "223D A13C, BH-129 1st.Noon-1 12:00 123,456.00 $37.00 $$45.",
35        "whitespace\tcontaining tabs and control\000 character"
36    };
37  
38    private static final String[][] expectedThoroughOutputs = {
39        { },
40        { },
41        { "tokens", "followed", "by", "whitespace" },
42        { "whitespace", "followed", "by", "tokens" },
43        { "tokens", "surrounded", "by", "whitespace" },
44        { "different", "2345", "Kinds", "-", "of", "!!!", "?", "tokens",
45            "+=", "*", "without", "$", "whitespace" },
46        { "223", "D", "A", "13", "C", ",", "BH", "-", "129", "1", "st", ".",
47              "Noon", "-", "1", "12", ":", "00", "123", ",", "456", ".", "00",
48              "$", "37", ".", "00", "$$", "45", "." },
49        { "whitespace", "containing", "tabs", "and", "control", "character" },
50    };
51    
52    private static final String[][] expectedConfigOutputs = {
53        { },
54        { },
55        { "tokens", "followed", "by", "whitespace" },
56        { "whitespace", "followed", "by", "tokens" },
57        { "tokens", "surrounded", "by", "whitespace" },
58        { "different2345Kinds", "-", "of", "!!!", "?", "tokens",
59            "+=", "*", "without", "$", "whitespace" },
60        { "223D", "A13C", ",", "BH", "-", "129", "1st", ".", "Noon", "-", "1",
61              "12:00", "123,456.00", "$37.00", "$$45", "." },
62        { "whitespace", "containing", "tabs", "and", "control", "character" },
63    };
64    
65    private static final String[][] expectedThoroughWhitespace = {
66      { "" },
67      { " \t\n ", },
68      { "", " ", " ", " ", "\n" },
69      { "   ", " ", " ", " ", "" },
70      { " ", " ", " ", "   ", "\t " },
71      { "", "", "", "", "", "", "", "", "", "", "", "", "", "" },
72      { "", "", " ", "", "", "", " ", "", "", " ", "", "",
73          "", "", "", " ", "", "", " ", "", "", "", "",
74          " ", "", "", "", " ", "", "", "" },
75      { "", "\t", " ", " ", " ", "\000 ", "" },
76    };
77    // JUnitDoclet end class
78    
79    public TextTokenizerTest(String name) {
80      // JUnitDoclet begin method TextTokenizerTest
81      super(name);
82      // JUnitDoclet end method TextTokenizerTest
83    }
84    
85    public de.fu_berlin.ties.text.TextTokenizer createInstance() throws Exception {
86      // JUnitDoclet begin method testcase.createInstance
87      return TokenizerFactory.createThoroughTokenizer("");
88      // JUnitDoclet end method testcase.createInstance
89    }
90    
91    protected void setUp() throws Exception {
92      // JUnitDoclet begin method testcase.setUp
93      super.setUp();
94      thoroughTokenizer = createInstance();
95      configTokenizer =
96          new TokenizerFactory(TiesConfiguration.CONF).createTokenizer("");
97      // JUnitDoclet end method testcase.setUp
98    }
99    
100   protected void tearDown() throws Exception {
101     // JUnitDoclet begin method testcase.tearDown
102     thoroughTokenizer = null;
103     configTokenizer = null;
104     super.tearDown();
105     // JUnitDoclet end method testcase.tearDown
106   }
107   
108   public void testCapturedText() throws Exception {
109     // JUnitDoclet begin method capturedText
110     // JUnitDoclet end method capturedText
111   }
112   
113   public void testHasPrecedingWhitespace() throws Exception {
114     // JUnitDoclet begin method hasPrecedingWhitespace
115     boolean whitespace;
116     int j;
117 
118     for (int i = 0; i < inputs.length; i++) {
119         thoroughTokenizer.reset(inputs[i]);
120         j = 0;
121         String token;
122 
123         while ((token = thoroughTokenizer.nextToken()) != null) {
124             whitespace = thoroughTokenizer.hasPrecedingWhitespace();
125             if (whitespace != expectedThoroughWhitespace[i][j].length() > 0) {
126                 System.out.println("Whitespace failure preceding " + token
127                     + ": query returns " + whitespace + " but expected '"
128                     + expectedThoroughWhitespace[i][j] + "'");
129             }
130             assertEquals(whitespace,
131                 expectedThoroughWhitespace[i][j].length() > 0);
132             j++;
133         }
134 
135         // final whitespace
136         assertEquals(thoroughTokenizer.hasPrecedingWhitespace(),
137             expectedThoroughWhitespace[i][j].length() > 0);
138         assertEquals(expectedThoroughWhitespace[i].length, j+1);
139     }
140     // JUnitDoclet end method hasPrecedingWhitespace
141   }
142   
143   public void testInitialWhitespaceCount() throws Exception {
144     // JUnitDoclet begin method initialWhitespaceCount
145     // JUnitDoclet end method initialWhitespaceCount
146   }
147   
148   public void testIsValidWhitespace() throws Exception {
149     // JUnitDoclet begin method isValidWhitespace
150     // JUnitDoclet end method isValidWhitespace
151   }
152   
153   public void testLeftText() throws Exception {
154     // JUnitDoclet begin method leftText
155     // JUnitDoclet end method leftText
156   }
157   
158   public void testNextToken() throws Exception {
159     // JUnitDoclet begin method nextToken
160     String token;
161     int j;
162 
163     for (int i = 0; i < inputs.length; i++) {
164         thoroughTokenizer.reset(inputs[i]);
165         configTokenizer.reset(inputs[i]);
166         j = 0;
167 
168         while ((token = thoroughTokenizer.nextToken()) != null) {
169             if (!token.equals(expectedThoroughOutputs[i][j])) {
170                 System.out.println("thorough: '" + token + "' != '"
171                     + expectedThoroughOutputs[i][j] + "'");
172             }
173             assertEquals(token, expectedThoroughOutputs[i][j]);
174             j++;
175         }
176         assertEquals(expectedThoroughOutputs[i].length, j);
177 
178         j = 0;
179         while ((token = configTokenizer.nextToken()) != null) {
180             if (!token.equals(expectedConfigOutputs[i][j])) {
181                 System.out.println("config: '" + token + "' != '"
182                     + expectedConfigOutputs[i][j] + "'");
183             }
184             assertEquals(token, expectedConfigOutputs[i][j]);
185             j++;
186         }
187         assertEquals(expectedConfigOutputs[i].length, j);
188     }
189 
190     // various tokenization patterns
191     String[] patterns = {
192     "[^//p{Z}//p{C}]+",                                                 // P
193     "[^//p{Z}//p{C}][-.,://p{L}//p{M}//p{N}]*[^//p{Z}//p{C}]?",         // C
194     "[^//p{Z}//p{C}][-//p{L}//p{M}//p{N}]*[^//p{Z}//p{C}]?",            // S
195     "[^//p{Z}//p{C}][/!?#]?[-//p{L}//p{M}//p{N}]*(?:[\"\'=;]|/?>|:/*)?" // X
196     };
197     TextTokenizer tokenizer;
198     for (String pattern: patterns) {
199         tokenizer = new de.fu_berlin.ties.text.TextTokenizer(
200                 new String[] {pattern},
201                 TokenizerFactory.WHITESPACE_CONTROL_OTHER,
202         "<a href=\"mailto:siefkes@inf.fu-berlin.de\">Click here</a> to mail me."
203                 );
204         while ((token = tokenizer.nextToken()) != null) {
205             System.out.print(token + ' ');
206         }
207         System.out.println();
208     }
209 
210     // JUnitDoclet end method nextToken
211   }
212   
213   public void testPrecedingWhitespace() throws Exception {
214     // JUnitDoclet begin method precedingWhitespace
215     String whitespace;
216     int j;
217 
218     for (int i = 0; i < inputs.length; i++) {
219         thoroughTokenizer.reset(inputs[i]);
220         j = 0;
221 
222         while ((thoroughTokenizer.nextToken()) != null) {
223             whitespace = thoroughTokenizer.precedingWhitespace();
224             assertEquals(whitespace, expectedThoroughWhitespace[i][j]);
225             j++;
226         }
227 
228         // final whitespace
229         assertEquals(thoroughTokenizer.precedingWhitespace(),
230             expectedThoroughWhitespace[i][j]);
231         assertEquals(expectedThoroughWhitespace[i].length, j+1);
232     }
233     // JUnitDoclet end method precedingWhitespace
234   }
235   
236   public void testPrecedingWhitespaceIsValid() throws Exception {
237     // JUnitDoclet begin method precedingWhitespaceIsValid
238     // JUnitDoclet end method precedingWhitespaceIsValid
239   }
240   
241   public void testReset() throws Exception {
242     // JUnitDoclet begin method reset
243     // tested in the other test methods
244     // JUnitDoclet end method reset
245   }
246   
247   public void testRightText() throws Exception {
248     // JUnitDoclet begin method rightText
249     // JUnitDoclet end method rightText
250   }
251   
252   public void testSetGetNormalizedWhitespace() throws Exception {
253     // JUnitDoclet begin method setNormalizedWhitespace getNormalizedWhitespace
254     java.lang.String[] tests = {"", " ", "a", "A", "???", "???", "0123456789", "012345678901234567890", "\n", null};
255     
256     for (int i = 0; i < tests.length; i++) {
257       thoroughTokenizer.setNormalizedWhitespace(tests[i]);
258       assertEquals(tests[i], thoroughTokenizer.getNormalizedWhitespace());
259     }
260     // JUnitDoclet end method setNormalizedWhitespace getNormalizedWhitespace
261   }
262   
263   public void testSetIsNormalizedWhitespacePrepended() throws Exception {
264     // JUnitDoclet begin method setNormalizedWhitespacePrepended isNormalizedWhitespacePrepended
265     boolean[] tests = {true, false};
266     
267     for (int i = 0; i < tests.length; i++) {
268       thoroughTokenizer.setNormalizedWhitespacePrepended(tests[i]);
269       assertEquals(tests[i], thoroughTokenizer.isNormalizedWhitespacePrepended());
270     }
271 
272     String token, expected;
273     int j;
274     thoroughTokenizer.setNormalizedWhitespacePrepended(true);
275 
276     for (int i = 0; i < inputs.length; i++) {
277         thoroughTokenizer.reset(inputs[i]);
278         j = 0;
279 
280         while ((token = thoroughTokenizer.nextToken()) != null) {
281             expected = thoroughTokenizer.hasPrecedingWhitespace() ?
282                 " " + expectedThoroughOutputs[i][j]
283                 : expectedThoroughOutputs[i][j];
284             assertEquals(token, expected);
285             j++;
286         }
287         assertEquals(expectedThoroughOutputs[i].length, j);
288     }
289     // JUnitDoclet end method setNormalizedWhitespacePrepended isNormalizedWhitespacePrepended
290   }
291   
292   public void testSetIsWhitespacePatternEnsured() throws Exception {
293     // JUnitDoclet begin method setWhitespacePatternEnsured isWhitespacePatternEnsured
294     boolean[] tests = {true, false};
295     
296     for (int i = 0; i < tests.length; i++) {
297       thoroughTokenizer.setWhitespacePatternEnsured(tests[i]);
298       assertEquals(tests[i], thoroughTokenizer.isWhitespacePatternEnsured());
299     }
300     // JUnitDoclet end method setWhitespacePatternEnsured isWhitespacePatternEnsured
301   }
302   
303   public void testToString() throws Exception {
304     // JUnitDoclet begin method toString
305     // JUnitDoclet end method toString
306   }
307   
308   public void testTrailingWhitespaceCount() throws Exception {
309     // JUnitDoclet begin method trailingWhitespaceCount
310     // JUnitDoclet end method trailingWhitespaceCount
311   }
312   
313   
314   
315   /***
316   * JUnitDoclet moves marker to this method, if there is not match
317   * for them in the regenerated code and if the marker is not empty.
318   * This way, no test gets lost when regenerating after renaming.
319   * Method testVault is supposed to be empty.
320   */
321   public void testVault() throws Exception {
322     // JUnitDoclet begin method testcase.testVault
323     // JUnitDoclet end method testcase.testVault
324   }
325   
326   public static void main(String[] args) {
327     // JUnitDoclet begin method testcase.main
328     junit.textui.TestRunner.run(TextTokenizerTest.class);
329     // JUnitDoclet end method testcase.main
330   }
331 }