1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.io;
23
24 import java.io.BufferedInputStream;
25 import java.io.File;
26 import java.io.FileInputStream;
27 import java.io.FileOutputStream;
28 import java.io.FileWriter;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.io.OutputStream;
33 import java.io.OutputStreamWriter;
34 import java.io.PushbackInputStream;
35 import java.io.Reader;
36 import java.io.UnsupportedEncodingException;
37 import java.io.Writer;
38 import java.net.URL;
39 import java.nio.charset.Charset;
40 import java.util.ArrayList;
41 import java.util.List;
42 import java.util.zip.GZIPInputStream;
43 import java.util.zip.GZIPOutputStream;
44
45 import org.apache.commons.configuration.Configuration;
46 import org.apache.commons.lang.StringUtils;
47
48 import de.fu_berlin.ties.TextProcessor;
49 import de.fu_berlin.ties.TiesConfiguration;
50 import de.fu_berlin.ties.text.TextUtils;
51 import de.fu_berlin.ties.util.MutableInt;
52 import de.fu_berlin.ties.util.Util;
53
54 /***
55 * A static class that provides utility constants and methods for I/O.
56 * No instances of this class can be created, only the static members
57 * should be used.
58 *
59 * @author Christian Siefkes
60 * @version $Revision: 1.35 $, $Date: 2006/11/30 12:38:52 $, $Author: siefkes $
61 */
62 public final class IOUtils {
63
64 /***
65 * Configuration and context key: the character set to use when reading and
66 * writing local files. If omitted, the default charset of the current
67 * platform is used.
68 */
69 public static final String KEY_LOCAL_CHARSET = "charset";
70
71 /***
72 * Configuration key: whether to compress your data in <code>gzip</code>
73 * format. Used by
74 * {@link #openCompressableOutStream(OutputStream, TiesConfiguration)}.
75 */
76 public static final String CONFIG_COMPRESS_GZIP = "compress.gzip";
77
78 /***
79 * The standard block size recommended for I/O.
80 */
81 public static final int STANDARD_BLOCK_SIZE = 64 * 1024;
82
83 /***
84 * The standard character set defined by the HTTP standard: {@value}.
85 */
86 public static final String STANDARD_HTTP_CHARSET = "ISO-8859-1";
87
88 /***
89 * The standard Unicode-compatible character set: {@value}.
90 */
91 public static final String STANDARD_UNICODE_CHARSET = "UTF-8";
92
93 /***
94 * The character set used internally by Java to represent strings: {@value}.
95 */
96 public static final String INTERNAL_JAVA_CHARSET = "UTF-16";
97
98 /***
99 * The character separating the base name from the extension of a file
100 * (a dot).
101 */
102 public static final char EXT_SEPARATOR = '.';
103
104 /***
105 * The maximum BOM size.
106 */
107 private static final int BOM_SIZE = 4;
108
109 /***
110 * Default directory that can be given as "parent" when
111 * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
112 * objects. Initially set to <code>null</code> so the "system-dependent
113 * default directory" (typically the working directory) will be used.
114 * Synchronized via the {@link #DD_GUARD}.
115 */
116 private static File defaultDirectory = null;
117
118 /***
119 * Used to guard synchronization of the {@link #defaultDirectory}.
120 */
121 private static final Object DD_GUARD = new Object();
122
123 /***
124 * Creates a file for writing output to. Delegates to
125 * {@link #createOutFile(File, String, String)} without specifying an
126 * extension.
127 *
128 * @param directory the directory in which the file should be created;
129 * if <code>null</code> or not writable, the file is created in the
130 * current working directory instead
131 * @param localName the local name of the input file --- the extension of
132 * this file (if any) is replaced by the specified output extension
133 * @return the create file
134 */
135 public static File createOutFile(final File directory, final
136 String localName) {
137 return createOutFile(directory, localName, null);
138 }
139
140 /***
141 * Creates a file for writing output to. The file is created in the
142 * specified directory if given and writable; otherwise it is created
143 * in the current working directory. The resulting file can also be used
144 * as a directory (by calling its {@link File#mkdir()} method).
145 *
146 * <p>The name of the file is derivated from the given local name by
147 * replacing the extension of the input file (if any) by the specified
148 * <code>outExtension</code>. If a file with this name already exists,
149 * "2" or the next available number is inserted before the extension unless
150 * an unused file name is found.
151 *
152 * <p>E.g. for input file "test.txt" and output extension "out", the file
153 * "test.out" is created. If this file already exists, the file "test2.out"
154 * is created instead (or "test3.out" etc. unless an unsed name is found).
155 *
156 * @param directory the directory in which the file should be created;
157 * if <code>null</code> or not writable, the file is created in the
158 * current working directory instead
159 * @param localName the local name of the input file --- the extension of
160 * this file (if any) is replaced by the specified output extension
161 * @param outExtension the extension to append to the output file
162 * (without a starting dot); ignored if empty or <code>null</code>
163 * @return the create file
164 */
165 public static File createOutFile(final File directory, final
166 String localName, final String outExtension) {
167 return createOutFile(directory, localName, outExtension, null);
168 }
169
170 /***
171 * Creates a file for writing output to. The file is created in the
172 * specified directory if given and writable; otherwise it is created
173 * in the current working directory. The resulting file can also be used
174 * as a directory (by calling its {@link File#mkdir()} method).
175 *
176 * <p>The name of the file is derivated from the given local name by
177 * replacing the extension of the input file (if any) by the specified
178 * <code>outExtension</code>. If a file with this name already exists,
179 * "2" or the next available number is inserted before the extension unless
180 * an unused file name is found.
181 *
182 * <p>E.g. for input file "test.txt" and output extension "out", the file
183 * "test.out" is created. If this file already exists, the file "test2.out"
184 * is created instead (or "test3.out" etc. unless an unsed name is found).
185 *
186 * @param directory the directory in which the file should be created;
187 * if <code>null</code> or not writable, the file is created in the
188 * current working directory instead
189 * @param localName the local name of the input file --- the extension of
190 * this file (if any) is replaced by the specified output extension
191 * @param outExtension the extension to append to the output file
192 * (without a starting dot); ignored if empty or <code>null</code>
193 * @param last if not <code>null</code>, assumped to wrap the last numeric
194 * prefix already in use, i.e. the first tried prefix will the the value
195 * of this + 1; after determining a file name, the value will be set to the
196 * numeric prefix used this time
197 * @return the create file
198 */
199 public static File createOutFile(final File directory, final
200 String localName, final String outExtension,
201 final MutableInt last) {
202 final File myDirectory;
203 if (directory == null) {
204
205 myDirectory = null;
206 } else if (!directory.canWrite()) {
207
208 myDirectory = null;
209 } else {
210 myDirectory = directory;
211 }
212
213 final String inputExt = getExtension(localName);
214 final String baseName;
215 if ("".equals(inputExt)) {
216 baseName = localName;
217 } else {
218
219 baseName = localName.substring(0,
220 localName.length() - inputExt.length() - 1);
221 }
222
223 final String fullExt;
224 if (StringUtils.isEmpty(outExtension)) {
225
226 fullExt = "";
227 } else {
228
229 fullExt = EXT_SEPARATOR + outExtension;
230 }
231
232 final String separator;
233 if ((baseName.length() > 0) && (Character.isDigit(
234 baseName.charAt(baseName.length() - 1)))) {
235
236 separator = "-";
237 } else {
238 separator = "";
239 }
240
241 File result;
242 int counter;
243 if (last == null) {
244
245 counter = 1;
246 result = new File(myDirectory, baseName + fullExt);
247 } else {
248
249 counter = last.getValue() + 1;
250 result = new File(myDirectory, baseName + separator + counter
251 + fullExt);
252 }
253
254 while (result.exists()) {
255
256 counter++;
257 result = new File(myDirectory, baseName + separator + counter
258 + fullExt);
259 }
260
261 if (last != null) {
262
263 last.setValue(counter);
264 }
265
266 return result;
267 }
268
269 /***
270 * Returns the charset used by an InputStreamReader.
271 *
272 * @param reader the reader to check
273 * @return the charset used by the reader
274 */
275 public static Charset determineCharset(final InputStreamReader reader) {
276 return Charset.forName(reader.getEncoding());
277 }
278
279 /***
280 * Returns the canoncical name of the charset used by an InputStreamReader.
281 * This method always returns the <em>canonical</em> (standard) name.
282 * It should preferably be used instead of calling
283 * {@link InputStreamReader#getEncoding()} directly, because that method
284 * often returns a non-standard ("historical") name.
285 *
286 * @param reader the reader to check
287 * @return the canonical name of the charset used by the reader
288 */
289 public static String determineCharsetName(final InputStreamReader reader) {
290 return determineCharset(reader).name();
291 }
292
293 /***
294 * Returns the charset used by an OutputStreamWriter.
295 *
296 * @param writer the writer to check
297 * @return the charset used by the writer
298 */
299 public static Charset determineCharset(final OutputStreamWriter writer) {
300 return Charset.forName(writer.getEncoding());
301 }
302
303 /***
304 * Returns the canoncical name of the charset used by an OutputStreamWriter.
305 * This method always returns the <em>canonical</em> (standard) name.
306 * It should preferably be used instead of calling
307 * {@link OutputStreamWriter#getEncoding()} directly, because that method
308 * often returns a non-standard ("historical") name.
309 *
310 * @param writer the writer to check
311 * @return the canonical name of the charset used by the writer
312 */
313 public static String determineCharsetName(final OutputStreamWriter writer) {
314 return determineCharset(writer).name();
315 }
316
317 /***
318 * Determines the output to directory to use, reading it from the
319 * {@link TextProcessor#KEY_OUT_DIRECTORY} configuration key in a given
320 * configuration.
321 *
322 * @param config configuration used to determine the output directory
323 * (read from the {@link TextProcessor#KEY_OUT_DIRECTORY} key)
324 * @return the output directory to use; or <code>null</code> if none
325 * is configured
326 */
327 public static File determineOutputDirectory(
328 final TiesConfiguration config) {
329 if (config.containsKey(TextProcessor.KEY_OUT_DIRECTORY)) {
330 return new File(config.getString(TextProcessor.KEY_OUT_DIRECTORY));
331 } else {
332 return null;
333 }
334 }
335
336 /***
337 * Returns the base name of a file (the local name without
338 * {@link #getExtension(File)} and preceding {@link #EXT_SEPARATOR dot}).
339 *
340 * @param file the file to check
341 * @return the base name of the given file
342 */
343 public static String getBaseName(final File file) {
344
345
346
347 return getBaseName(getLocalName(file.getName(), false));
348 }
349
350 /***
351 * Helper method that returns the base name of a file name or URL.
352 *
353 * @param localName the {@linkplain #getLocalName(String, boolean) local
354 * name} of the file or URL path to check
355 * @return the base name of the given string
356 */
357 public static String getBaseName(final String localName) {
358 final int lastDot = localName.lastIndexOf(EXT_SEPARATOR);
359
360 if ((lastDot >= 0)) {
361 return localName.substring(0, lastDot);
362 } else {
363
364 return localName;
365 }
366 }
367
368 /***
369 * Returns the base name of an URL (the local name without
370 * {@link #getExtension(File)} and preceding {@link #EXT_SEPARATOR dot}).
371 *
372 * @param url the URL to check
373 * @return the base name of the given URL
374 */
375 public static String getBaseName(final URL url) {
376
377 final String localName = getLocalName(url, false);
378 return getBaseName(localName);
379 }
380
381 /***
382 * Returns an default directory that can be given as "parent" when
383 * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
384 * objects. Initially set to <code>null</code> so the "system-dependent
385 * default directory" (typically the working directory) will be used.
386 *
387 * @return the default directory
388 */
389 public static File getDefaultDirectory() {
390 synchronized (DD_GUARD) {
391 return defaultDirectory;
392 }
393 }
394
395 /***
396 * Returns the extension of a file.
397 *
398 * @param file the file to check
399 * @return the extension of the given file; or an empty string if no
400 * extension exists
401 */
402 public static String getExtension(final File file) {
403
404
405
406 return getExtension(getLocalName(file.getName(), false));
407 }
408
409 /***
410 * Helper method that returns the file extension of a file name or URL.
411 *
412 * @param localName the {@linkplain #getLocalName(String, boolean) local
413 * name} of the file or URL path to check
414 * @return the extension of the given string; or an empty string if no
415 * extension exists
416 */
417 public static String getExtension(final String localName) {
418 final int lastDot = localName.lastIndexOf(EXT_SEPARATOR);
419
420 if ((lastDot >= 0)) {
421 return localName.substring(lastDot + 1);
422 } else {
423 return "";
424 }
425 }
426
427 /***
428 * Returns the file extension of an URL.
429 *
430 * @param url the URL to check
431 * @return the extension of the given URL; or an empty string if no
432 * extension exists
433 */
434 public static String getExtension(final URL url) {
435
436 final String localName = getLocalName(url, false);
437 return getExtension(localName);
438 }
439
440 /***
441 * Returns the local name of an URL. This is the last existing element of
442 * from the path component of the URL, typically the local file name
443 * (without directories).
444 *
445 * <p>For URLs that are recognizable as directories (i.e. end in a slash),
446 * the name of the final directory is returned if <code>acceptDir</code>
447 * is <code>true</code> -- otherwise the empty string is returned.
448 *
449 * @param url the URL to check
450 * @param acceptDir whether to return a final directory name
451 * (see description)
452 * @return the local name
453 */
454 public static String getLocalName(final URL url, final boolean acceptDir) {
455
456
457 final String localName = getLocalName(url.getPath(), acceptDir);
458
459
460 final int firstSemicolon = localName.indexOf(';');
461
462 if (firstSemicolon >= 0) {
463 return localName.substring(0, firstSemicolon);
464 } else {
465 return localName;
466 }
467 }
468
469 /***
470 * Helper method that returns the local name of a file or URL path.
471 *
472 * <p>For paths that are recognizable as directories (i.e. end in a slash),
473 * the name of the final directory is returned if <code>acceptDir</code>
474 * is <code>true</code> -- otherwise the empty string is returned.
475 *
476 * @param path the file or URL path to check
477 * @param acceptDir whether to return a final directory name
478 * (see description)
479 * @return the local name
480 */
481 private static String getLocalName(final String path,
482 final boolean acceptDir) {
483
484 final String usedPath;
485 if (path.endsWith("/") || path.endsWith("//")) {
486
487 if (acceptDir) {
488
489 usedPath = path.substring(0, path.length() - 1);
490 } else {
491
492 return "";
493 }
494 } else {
495
496 usedPath = path;
497 }
498
499 final int lastSlashOrBackslash = Math.max(path.lastIndexOf('/'),
500 path.lastIndexOf('//'));
501
502 final String result;
503 if (lastSlashOrBackslash >= 0) {
504 result = usedPath.substring(lastSlashOrBackslash + 1);
505 } else {
506
507 result = usedPath;
508 }
509 return result;
510 }
511
512 /***
513 * Opens an input stream that might have been compressed in
514 * <code>gzip</code> format. This method autodetects whether the stream
515 * has been compressed and returns a stream that allows accessing the
516 * uncompressed data.
517 *
518 * @param in the raw input stream (either uncompressed or in
519 * <code>gzip</code> format)
520 * @return a stream that allows accessing the uncompressed data
521 * @throws IOException if an I/O error has occurred
522 */
523 public static InputStream openCompressableInStream(final InputStream in)
524 throws IOException {
525
526 final InputStream markableStream;
527 InputStream result;
528
529 if (in.markSupported()) {
530 markableStream = in;
531 } else {
532
533 markableStream = new BufferedInputStream(in);
534 }
535
536
537 markableStream.mark(1024);
538
539
540 try {
541 result = new GZIPInputStream(markableStream);
542 } catch (IOException ioe) {
543
544
545 markableStream.reset();
546 result = markableStream;
547 }
548 return result;
549 }
550
551 /***
552 * Transparently opens an input stream that may use compression to store
553 * the data (in <code>gzip</code> format).
554 *
555 * @param out the original output stream
556 * @param doCompress whether or not to use compression
557 * @return an output stream to use for storing data; will be a
558 * {@link GZIPOutputStream} wrapping <code>out</code> if
559 * <code>doCompress</code> is <code>true</code>; otherwise it will be the
560 * raw <code>out</code> stream
561 * @throws IOException if an I/O error has occurred
562 */
563 public static OutputStream openCompressableOutStream(final OutputStream out,
564 final boolean doCompress) throws IOException {
565 if (doCompress) {
566
567 return new GZIPOutputStream(out);
568 } else {
569
570 return out;
571 }
572 }
573
574 /***
575 * Transparently opens an input stream that may use compression to store
576 * the data (in <code>gzip</code> format). Checks the
577 * {@link #CONFIG_COMPRESS_GZIP} parameter to decide whether or not to
578 * use compression.
579 *
580 * @param out the original output stream
581 * @param config the configuration to use
582 * @return an output stream to use for storing data;
583 * @throws IOException if an I/O error has occurred
584 */
585 public static OutputStream openCompressableOutStream(final OutputStream out,
586 final TiesConfiguration config) throws IOException {
587
588 return openCompressableOutStream(out, config, null);
589 }
590
591 /***
592 * Transparently opens an input stream that may use compression to store
593 * the data (in <code>gzip</code> format). Checks the
594 * {@link #CONFIG_COMPRESS_GZIP} parameter (with an optional suffix) to
595 * decide whether or not to use compression.
596 *
597 * @param out the original output stream
598 * @param config the configuration to use
599 * @param suffix an optional suffix that allows
600 * {@linkplain TiesConfiguration#adaptKey(String, String) overwriting} the
601 * general value of the configuration paramter with a more specified value
602 * @return an output stream to use for storing data
603 * @throws IOException if an I/O error has occurred
604 */
605 public static OutputStream openCompressableOutStream(final OutputStream out,
606 final TiesConfiguration config, final String suffix)
607 throws IOException {
608
609 return openCompressableOutStream(out, config.getBoolean(
610 config.adaptKey(CONFIG_COMPRESS_GZIP, suffix)));
611 }
612
613 /***
614 * Opens an reader on a local file. Uses the {@link #KEY_LOCAL_CHARSET
615 * configured character set} -- if not specified, the default charset of
616 * the current platform is used instead. Compressed files are automatically
617 * decompressed (using the
618 * {@link #openCompressableInStream(InputStream)} method). Don't forget to
619 * finally {@linkplain #tryToClose(Reader) close} any reader you open!
620 *
621 * @param file the file to read
622 * @param config the configuration to use
623 * @return a reader on the local file
624 * @throws IOException if an I/O error has occurred
625 */
626 public static InputStreamReader openReader(final File file,
627 final Configuration config) throws IOException {
628
629 return openReader(file, config.getString(KEY_LOCAL_CHARSET, null));
630 }
631
632 /***
633 * Opens an reader on a local file, using a given charset. Compressed files
634 * are automatically decompressed (using the
635 * {@link #openCompressableInStream(InputStream)} method). Don't forget to
636 * finally {@linkplain #tryToClose(Reader) close} any reader you open!
637 *
638 * @param file the file to read
639 * @param charset the character set to use for reading the file;
640 * if <code>null</code>, the default charset of the current platform is used
641 * @return a reader on the local file
642 * @throws IOException if an I/O error has occurred
643 */
644 public static InputStreamReader openReader(final File file,
645 final String charset) throws IOException {
646 final InputStream in =
647 openCompressableInStream(new FileInputStream(file));
648 final InputStreamReader reader;
649
650 if (charset == null) {
651
652 reader = new InputStreamReader(in);
653 } else {
654
655 reader = new InputStreamReader(in, charset);
656 }
657 return reader;
658 }
659
660 /***
661 * Opens a reader on an input stream that uses a Unicode character set
662 * (UTF-8, UTF-16, or UTF-32) and optionally a
663 * <a href="http://www.unicode.org/unicode/faq/utf_bom.html">BOM
664 * (byte order mark)</a> to identify the used charset. UTF-8 is used if BOM
665 * is not found.
666 *
667 * <p>Adapted from the
668 * <a href="http://koti.mbnet.fi/akini/java/unicodereader/"
669 * ><code>UnicodeReader</code></a> class created by Thomas Weidenfeller and
670 * Aki Nieminen.
671 *
672 * @param in the input stream to wrap
673 * @return a reader on the stream
674 * @throws IOException if an I/O error has occurred
675 */
676 public static InputStreamReader openUnicodeReader(final InputStream in)
677 throws IOException {
678 final PushbackInputStream pushbackIn =
679 new PushbackInputStream(in, BOM_SIZE);
680
681 final String encoding;
682 final byte[] bom = new byte[BOM_SIZE];
683 final int n, unread;
684 n = pushbackIn.read(bom, 0, bom.length);
685
686
687 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB)
688 && (bom[2] == (byte) 0xBF)) {
689 encoding = STANDARD_UNICODE_CHARSET;
690 unread = n - 3;
691 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
692 encoding = "UTF-16BE";
693 unread = n - 2;
694 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
695 encoding = "UTF-16LE";
696 unread = n - 2;
697 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00)
698 && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
699 encoding = "UTF-32BE";
700 unread = n - 4;
701 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)
702 && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
703 encoding = "UTF-32LE";
704 unread = n - 4;
705 } else {
706
707 encoding = STANDARD_UNICODE_CHARSET;
708 unread = n;
709 }
710
711
712 if (unread > 0) {
713 pushbackIn.unread(bom, (n - unread), unread);
714 }
715
716
717 return new InputStreamReader(pushbackIn, encoding);
718 }
719
720 /***
721 * Opens a writer that uses the standard Unicode character set
722 * {@linkplain #STANDARD_UNICODE_CHARSET UTF-8}.
723 *
724 * @param out the output stream to wrap
725 * @return a writer on the stream
726 */
727 public static OutputStreamWriter openUnicodeWriter(final OutputStream out) {
728 try {
729 return new OutputStreamWriter(out, STANDARD_UNICODE_CHARSET);
730 } catch (UnsupportedEncodingException uee) {
731
732 throw new RuntimeException("Unexpected error: virtual machine does "
733 + "not support the standard charaset set"
734 + STANDARD_HTTP_CHARSET);
735 }
736 }
737
738 /***
739 * Opens an writer on a local file. Uses the {@link #KEY_LOCAL_CHARSET
740 * configured character set} -- if not specified, the default charset of
741 * the current platform is used instead. Don't forget to finally
742 * {@linkplain #tryToClose(Writer) close} any writer you open!
743 *
744 * @param file the file to write to
745 * @param config the configuration to use
746 * @return a writer on the local file
747 * @throws IOException if the file is a directory or for some
748 * other reason cannot be opened for writing
749 * @throws UnsupportedEncodingException if the named charset is not
750 * supported
751 */
752 public static Writer openWriter(final File file,
753 final Configuration config)
754 throws IOException, UnsupportedEncodingException {
755
756 return openWriter(file, config.getString(KEY_LOCAL_CHARSET, null));
757 }
758
759 /***
760 * Opens an writer on a local file, using a given charset. Don't forget to
761 * finally {@linkplain #tryToClose(Writer) close} any writer you open!
762 *
763 * @param file the file to write to
764 * @param charset the character set to use for writing the file;
765 * if <code>null</code>, the default charset of the current platform is used
766 * @return a writer on the local file
767 * @throws IOException if the file is a directory or for some
768 * other reason cannot be opened for writing
769 * @throws UnsupportedEncodingException if the named charset is not
770 * supported
771 */
772 public static Writer openWriter(final File file, final String charset)
773 throws IOException, UnsupportedEncodingException {
774 final Writer writer;
775
776 if (charset == null) {
777
778 writer = new FileWriter(file);
779 } else {
780
781 writer =
782 new OutputStreamWriter(new FileOutputStream(file), charset);
783 }
784 return writer;
785 }
786
787 /***
788 * Reads the contents of a reader into a string. The reader is <em>not</em>
789 * closed by this method.
790 *
791 * @param reader the reader to use
792 * @return the contents of the reader
793 * @throws IOException if an I/O error occurs
794 */
795 public static String readToString(final Reader reader) throws IOException {
796 final StringBuilder result = new StringBuilder();
797
798 final char[] buffer = new char[STANDARD_BLOCK_SIZE];
799 int charsRead;
800
801 while ((charsRead = reader.read(buffer)) > -1) {
802 result.append(buffer, 0, charsRead);
803 }
804
805 return result.toString();
806 }
807
808 /***
809 * Reads the contents of a reader into a writer. Neither reader nor writer
810 * are closed by this method.
811 *
812 * @param reader the reader to read from
813 * @param writer the writer to write to write to
814 * @return the number of characters read
815 * @throws IOException if an I/O error occurs
816 */
817 public static int readToWriter(final Reader reader, final Writer writer)
818 throws IOException {
819 final char[] buffer = new char[STANDARD_BLOCK_SIZE];
820 int charsRead;
821 int allCharsRead = 0;
822
823 while ((charsRead = reader.read(buffer)) > -1) {
824 writer.write(buffer, 0, charsRead);
825 allCharsRead += charsRead;
826 }
827
828 writer.flush();
829 return allCharsRead;
830 }
831
832 /***
833 * Reads available bytes from the input stream until the <code>bytes</code>
834 * array is full or end-of-input is reached or an end-of-line character is
835 * encountered. The last condition holds iff the last read byte is 10 or 13,
836 * i.e. '\n' or '\r' in ASCII- and UTF-8-compatible charsets).
837 *
838 * @param in the stream to read from
839 * @param bytes the array of bytes to fill
840 * @return the number of bytes read (will be > 0 and <= bytes.length).
841 * @throws IOException if an error occurs while reading from the stream
842 * or if reading times out
843 */
844 public static int readUntilLineEnd(final InputStream in, final byte[] bytes)
845 throws IOException {
846 int bytesStored = 0;
847 int newBytesRead;
848 boolean done = false;
849
850 do {
851
852 newBytesRead = in.read(bytes, bytesStored,
853 bytes.length - bytesStored);
854
855
856 if (newBytesRead < 0) {
857
858 done = true;
859 } else {
860 bytesStored += newBytesRead;
861
862 if (bytes[bytesStored-1] == 10 || bytes[bytesStored-1] == 13) {
863 done = true;
864 } else if (bytesStored == bytes.length) {
865 done = true;
866 }
867 }
868 } while (!done);
869
870 return bytesStored;
871 }
872
873 /***
874 * Converts an URI list as defined in RFC 2483 (MIME type
875 * <code>text/uri-list</code>) into an array of strings. Comment lines
876 * in the input are ignored.
877 *
878 * @param in the URI list to convert
879 * @return an array of strings containing the URIs/URLs listed in the input
880 */
881 public static String[] readURIList(final CharSequence in) {
882 final String[] lines = TextUtils.splitLines(in);
883 final List<String> uriList = new ArrayList<String>(lines.length);
884
885
886 for (int i = 0; i < lines.length; i++) {
887 if (!lines[i].startsWith("#")) {
888 uriList.add(lines[i]);
889 }
890 }
891
892
893 String[] result = new String[uriList.size()];
894 return uriList.toArray(result);
895 }
896
897 /***
898 * Converts an URI list as defined in RFC 2483 (MIME type
899 * <code>text/uri-list</code>) into an array of strings. Comment lines
900 * in the input are ignored.
901 *
902 * @param in a reader containing the URI list to convert
903 * @return an array of strings containing the URIs/URLs listed in the input
904 * @throws IOException if an I/O error occurs
905 */
906 public static String[] readURIList(final Reader in) throws IOException {
907 final String inSequence = readToString(in);
908 return readURIList(inSequence);
909 }
910
911 /***
912 * Resolves a filename. Relative filenames are resolved against a given
913 * parent directory, while absolute filenames are returned as is.
914 *
915 * @param directory the parent directory (only used for relative filenames)
916 * @param filename the name of the file
917 * @return an object representing the file
918 */
919 public static File resolveFilename(final File directory,
920 final String filename) {
921 final File file = new File(filename);
922 if (file.isAbsolute()) {
923
924 return file;
925 } else {
926
927 return new File(directory, filename);
928 }
929 }
930
931 /***
932 * Modifies the default directory that can be given as "parent" when
933 * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
934 * objects. Can be set to <code>null</code> so the "system-dependent
935 * default directory" (typically the working directory) will be used.
936 *
937 * @param directory the new directory to use
938 * @throws IllegalArgumentException if the given directory is neither
939 * <code>null</code> nor an existing directory
940 */
941 public static void setDefaultDirectory(final File directory)
942 throws IllegalArgumentException {
943 if ((directory != null) && !directory.isDirectory()) {
944 throw new IllegalArgumentException("Default directory must be "
945 + "<null> or an existing directory, but " + directory
946 + " is not");
947 }
948
949 synchronized (DD_GUARD) {
950 defaultDirectory = directory;
951 }
952 }
953
954 /***
955 * Convenience method for closing an input stream. If the specified stream
956 * is <code>null</code>, this method does nothing. Any {@link
957 * java.io.IOException} thrown during closing is swallowed by this method.
958 *
959 * @param in the input stream to close (might be <code>null</code>)
960 * @return <code>true</code> if the stream was closed successfully,
961 * <code>false</code> otherwise (the stream was <code>null</code> or an
962 * exception was thrown during closing)
963 */
964 public static boolean tryToClose(final InputStream in) {
965 boolean result = false;
966
967 if (in != null) {
968 try {
969 in.close();
970 result = true;
971 } catch (IOException ioe) {
972 Util.LOG.warn("Exception while trying to close InputStream "
973 + in + ": " + ioe.toString());
974 }
975 }
976
977 return result;
978 }
979
980 /***
981 * Convenience method for closing an output stream. If the specified stream
982 * is <code>null</code>, this method does nothing. Any {@link
983 * java.io.IOException} thrown during closing is swallowed by this method.
984 *
985 * @param out the output stream to close (might be <code>null</code>)
986 * @return <code>true</code> if the stream was closed successfully,
987 * <code>false</code> otherwise (the stream was <code>null</code> or an
988 * exception was thrown during closing)
989 */
990 public static boolean tryToClose(final OutputStream out) {
991 boolean result = false;
992
993 if (out != null) {
994 try {
995 out.close();
996 result = true;
997 } catch (IOException ioe) {
998 Util.LOG.warn("Exception while trying to close OutputStream "
999 + out + ": " + ioe.toString());
1000 }
1001 }
1002
1003 return result;
1004 }
1005
1006 /***
1007 * Convenience method for closing a reader. If the specified reader is
1008 * <code>null</code>, this method does nothing. Any {@link
1009 * java.io.IOException} thrown during closing is swallowed by this method.
1010 *
1011 * @param reader the reader to close (might be <code>null</code>)
1012 * @return <code>true</code> if the reader was closed successfully,
1013 * <code>false</code> otherwise (reader was <code>null</code> or an
1014 * exception was thrown during closing)
1015 */
1016 public static boolean tryToClose(final Reader reader) {
1017 boolean result = false;
1018
1019 if (reader != null) {
1020 try {
1021 reader.close();
1022 result = true;
1023 } catch (IOException ioe) {
1024 Util.LOG.warn("Exception while trying to close Reader "
1025 + reader + ": " + ioe.toString());
1026 }
1027 }
1028
1029 return result;
1030 }
1031
1032 /***
1033 * Convenience method for closing a writer. If the specified writer is
1034 * <code>null</code>, this method does nothing. Any {@link
1035 * java.io.IOException} thrown during closing is swallowed by this method.
1036 *
1037 * @param writer the writer to close (might be <code>null</code>)
1038 * @return <code>true</code> if the writer was closed successfully,
1039 * <code>false</code> otherwise (writer was <code>null</code> or an
1040 * exception was thrown during closing)
1041 */
1042 public static boolean tryToClose(final Writer writer) {
1043 boolean result = false;
1044
1045 if (writer != null) {
1046 try {
1047 writer.close();
1048 result = true;
1049 } catch (IOException ioe) {
1050 Util.LOG.warn("Exception while trying to close Writer "
1051 + writer + ": " + ioe.toString());
1052 }
1053 }
1054
1055 return result;
1056 }
1057
1058 /***
1059 * Returns the user's current working directory. This is just a shortcut
1060 * for calling <code>System.getProperty("user.dir")</code>.
1061 *
1062 * @return the current working directory
1063 */
1064 public static String userDir() {
1065 return System.getProperty("user.dir");
1066 }
1067
1068 /***
1069 * Returns the user's home directory. This is just a shortcut for calling
1070 * <code>System.getProperty("user.home")</code>.
1071 *
1072 * @return the home directory
1073 */
1074 public static String userHome() {
1075 return System.getProperty("user.home");
1076 }
1077
1078 /***
1079 * Writes a line of text to a writer, followed by a
1080 * {@linkplain TextUtils#LINE_SEPARATOR line separator}.
1081 *
1082 * @param text the text to send to the writer
1083 * @param writer the writer to write to; will neither be flushed nor closed
1084 * by this method
1085 * @throws IOException if an I/O error occurs
1086 */
1087 public static void writeLine(final String text, final Writer writer)
1088 throws IOException {
1089 writer.write(text);
1090 writer.write(TextUtils.LINE_SEPARATOR);
1091 }
1092
1093 /***
1094 * Writes the contents of a character sequence to a writer. The writer
1095 * is flushed but not closed by this method.
1096 *
1097 * @param input the text to send to the writer
1098 * @param writer the writer to write to; flushed but not closed by this
1099 * method
1100 * @throws IOException if an I/O error occurs
1101 */
1102 public static void writeToWriter(final CharSequence input,
1103 final Writer writer) throws IOException {
1104 writer.write(input.toString());
1105 writer.flush();
1106 }
1107
1108 /***
1109 * Private constructor prevents creation of instances.
1110 */
1111 private IOUtils() {
1112 super();
1113 }
1114
1115 }