1 package de.fu_berlin.ties.text;
2
3 import junit.framework.TestCase;
4
5 import de.fu_berlin.ties.TiesConfiguration;
6
7
8 /***
9 * Generated by JUnitDoclet, a tool provided by
10 * ObjectFab GmbH under LGPL.
11 * Please see www.junitdoclet.org, www.gnu.org
12 * and www.objectfab.de for informations about
13 * the tool, the licence and the authors.
14 */
15
16
17 public class TextTokenizerTest
18
19 extends TestCase
20
21
22 {
23
24 private TextTokenizer thoroughTokenizer = null;
25 private TextTokenizer configTokenizer = null;
26
27 private static final String[] inputs = new String[] {
28 "",
29 " \t\n ",
30 "tokens followed by whitespace\n",
31 " whitespace followed by tokens",
32 " tokens surrounded by whitespace\t ",
33 "different2345Kinds-of!!!?tokens+=*without$whitespace",
34 "223D A13C, BH-129 1st.Noon-1 12:00 123,456.00 $37.00 $$45.",
35 "whitespace\tcontaining tabs and control\000 character"
36 };
37
38 private static final String[][] expectedThoroughOutputs = {
39 { },
40 { },
41 { "tokens", "followed", "by", "whitespace" },
42 { "whitespace", "followed", "by", "tokens" },
43 { "tokens", "surrounded", "by", "whitespace" },
44 { "different", "2345", "Kinds", "-", "of", "!!!", "?", "tokens",
45 "+=", "*", "without", "$", "whitespace" },
46 { "223", "D", "A", "13", "C", ",", "BH", "-", "129", "1", "st", ".",
47 "Noon", "-", "1", "12", ":", "00", "123", ",", "456", ".", "00",
48 "$", "37", ".", "00", "$$", "45", "." },
49 { "whitespace", "containing", "tabs", "and", "control", "character" },
50 };
51
52 private static final String[][] expectedConfigOutputs = {
53 { },
54 { },
55 { "tokens", "followed", "by", "whitespace" },
56 { "whitespace", "followed", "by", "tokens" },
57 { "tokens", "surrounded", "by", "whitespace" },
58 { "different2345Kinds", "-", "of", "!!!", "?", "tokens",
59 "+=", "*", "without", "$", "whitespace" },
60 { "223D", "A13C", ",", "BH", "-", "129", "1st", ".", "Noon", "-", "1",
61 "12:00", "123,456.00", "$37.00", "$$45", "." },
62 { "whitespace", "containing", "tabs", "and", "control", "character" },
63 };
64
65 private static final String[][] expectedThoroughWhitespace = {
66 { "" },
67 { " \t\n ", },
68 { "", " ", " ", " ", "\n" },
69 { " ", " ", " ", " ", "" },
70 { " ", " ", " ", " ", "\t " },
71 { "", "", "", "", "", "", "", "", "", "", "", "", "", "" },
72 { "", "", " ", "", "", "", " ", "", "", " ", "", "",
73 "", "", "", " ", "", "", " ", "", "", "", "",
74 " ", "", "", "", " ", "", "", "" },
75 { "", "\t", " ", " ", " ", "\000 ", "" },
76 };
77
78
79 public TextTokenizerTest(String name) {
80
81 super(name);
82
83 }
84
85 public de.fu_berlin.ties.text.TextTokenizer createInstance() throws Exception {
86
87 return TokenizerFactory.createThoroughTokenizer("");
88
89 }
90
91 protected void setUp() throws Exception {
92
93 super.setUp();
94 thoroughTokenizer = createInstance();
95 configTokenizer =
96 new TokenizerFactory(TiesConfiguration.CONF).createTokenizer("");
97
98 }
99
100 protected void tearDown() throws Exception {
101
102 thoroughTokenizer = null;
103 configTokenizer = null;
104 super.tearDown();
105
106 }
107
108 public void testCapturedText() throws Exception {
109
110
111 }
112
113 public void testHasPrecedingWhitespace() throws Exception {
114
115 boolean whitespace;
116 int j;
117
118 for (int i = 0; i < inputs.length; i++) {
119 thoroughTokenizer.reset(inputs[i]);
120 j = 0;
121 String token;
122
123 while ((token = thoroughTokenizer.nextToken()) != null) {
124 whitespace = thoroughTokenizer.hasPrecedingWhitespace();
125 if (whitespace != expectedThoroughWhitespace[i][j].length() > 0) {
126 System.out.println("Whitespace failure preceding " + token
127 + ": query returns " + whitespace + " but expected '"
128 + expectedThoroughWhitespace[i][j] + "'");
129 }
130 assertEquals(whitespace,
131 expectedThoroughWhitespace[i][j].length() > 0);
132 j++;
133 }
134
135
136 assertEquals(thoroughTokenizer.hasPrecedingWhitespace(),
137 expectedThoroughWhitespace[i][j].length() > 0);
138 assertEquals(expectedThoroughWhitespace[i].length, j+1);
139 }
140
141 }
142
143 public void testInitialWhitespaceCount() throws Exception {
144
145
146 }
147
148 public void testIsValidWhitespace() throws Exception {
149
150
151 }
152
153 public void testLeftText() throws Exception {
154
155
156 }
157
158 public void testNextToken() throws Exception {
159
160 String token;
161 int j;
162
163 for (int i = 0; i < inputs.length; i++) {
164 thoroughTokenizer.reset(inputs[i]);
165 configTokenizer.reset(inputs[i]);
166 j = 0;
167
168 while ((token = thoroughTokenizer.nextToken()) != null) {
169 if (!token.equals(expectedThoroughOutputs[i][j])) {
170 System.out.println("thorough: '" + token + "' != '"
171 + expectedThoroughOutputs[i][j] + "'");
172 }
173 assertEquals(token, expectedThoroughOutputs[i][j]);
174 j++;
175 }
176 assertEquals(expectedThoroughOutputs[i].length, j);
177
178 j = 0;
179 while ((token = configTokenizer.nextToken()) != null) {
180 if (!token.equals(expectedConfigOutputs[i][j])) {
181 System.out.println("config: '" + token + "' != '"
182 + expectedConfigOutputs[i][j] + "'");
183 }
184 assertEquals(token, expectedConfigOutputs[i][j]);
185 j++;
186 }
187 assertEquals(expectedConfigOutputs[i].length, j);
188 }
189
190
191 String[] patterns = {
192 "[^//p{Z}//p{C}]+",
193 "[^//p{Z}//p{C}][-.,://p{L}//p{M}//p{N}]*[^//p{Z}//p{C}]?",
194 "[^//p{Z}//p{C}][-//p{L}//p{M}//p{N}]*[^//p{Z}//p{C}]?",
195 "[^//p{Z}//p{C}][/!?#]?[-//p{L}//p{M}//p{N}]*(?:[\"\'=;]|/?>|:/*)?"
196 };
197 TextTokenizer tokenizer;
198 for (String pattern: patterns) {
199 tokenizer = new de.fu_berlin.ties.text.TextTokenizer(
200 new String[] {pattern},
201 TokenizerFactory.WHITESPACE_CONTROL_OTHER,
202 "<a href=\"mailto:siefkes@inf.fu-berlin.de\">Click here</a> to mail me."
203 );
204 while ((token = tokenizer.nextToken()) != null) {
205 System.out.print(token + ' ');
206 }
207 System.out.println();
208 }
209
210
211 }
212
213 public void testPrecedingWhitespace() throws Exception {
214
215 String whitespace;
216 int j;
217
218 for (int i = 0; i < inputs.length; i++) {
219 thoroughTokenizer.reset(inputs[i]);
220 j = 0;
221
222 while ((thoroughTokenizer.nextToken()) != null) {
223 whitespace = thoroughTokenizer.precedingWhitespace();
224 assertEquals(whitespace, expectedThoroughWhitespace[i][j]);
225 j++;
226 }
227
228
229 assertEquals(thoroughTokenizer.precedingWhitespace(),
230 expectedThoroughWhitespace[i][j]);
231 assertEquals(expectedThoroughWhitespace[i].length, j+1);
232 }
233
234 }
235
236 public void testPrecedingWhitespaceIsValid() throws Exception {
237
238
239 }
240
241 public void testReset() throws Exception {
242
243
244
245 }
246
247 public void testRightText() throws Exception {
248
249
250 }
251
252 public void testSetGetNormalizedWhitespace() throws Exception {
253
254 java.lang.String[] tests = {"", " ", "a", "A", "???", "???", "0123456789", "012345678901234567890", "\n", null};
255
256 for (int i = 0; i < tests.length; i++) {
257 thoroughTokenizer.setNormalizedWhitespace(tests[i]);
258 assertEquals(tests[i], thoroughTokenizer.getNormalizedWhitespace());
259 }
260
261 }
262
263 public void testSetIsNormalizedWhitespacePrepended() throws Exception {
264
265 boolean[] tests = {true, false};
266
267 for (int i = 0; i < tests.length; i++) {
268 thoroughTokenizer.setNormalizedWhitespacePrepended(tests[i]);
269 assertEquals(tests[i], thoroughTokenizer.isNormalizedWhitespacePrepended());
270 }
271
272 String token, expected;
273 int j;
274 thoroughTokenizer.setNormalizedWhitespacePrepended(true);
275
276 for (int i = 0; i < inputs.length; i++) {
277 thoroughTokenizer.reset(inputs[i]);
278 j = 0;
279
280 while ((token = thoroughTokenizer.nextToken()) != null) {
281 expected = thoroughTokenizer.hasPrecedingWhitespace() ?
282 " " + expectedThoroughOutputs[i][j]
283 : expectedThoroughOutputs[i][j];
284 assertEquals(token, expected);
285 j++;
286 }
287 assertEquals(expectedThoroughOutputs[i].length, j);
288 }
289
290 }
291
292 public void testSetIsWhitespacePatternEnsured() throws Exception {
293
294 boolean[] tests = {true, false};
295
296 for (int i = 0; i < tests.length; i++) {
297 thoroughTokenizer.setWhitespacePatternEnsured(tests[i]);
298 assertEquals(tests[i], thoroughTokenizer.isWhitespacePatternEnsured());
299 }
300
301 }
302
303 public void testToString() throws Exception {
304
305
306 }
307
308 public void testTrailingWhitespaceCount() throws Exception {
309
310
311 }
312
313
314
315 /***
316 * JUnitDoclet moves marker to this method, if there is not match
317 * for them in the regenerated code and if the marker is not empty.
318 * This way, no test gets lost when regenerating after renaming.
319 * Method testVault is supposed to be empty.
320 */
321 public void testVault() throws Exception {
322
323
324 }
325
326 public static void main(String[] args) {
327
328 junit.textui.TestRunner.run(TextTokenizerTest.class);
329
330 }
331 }