View Javadoc

1   /*
2    * Copyright (C) 2003-2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.io;
23  
24  import java.io.BufferedInputStream;
25  import java.io.File;
26  import java.io.FileInputStream;
27  import java.io.FileOutputStream;
28  import java.io.FileWriter;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.InputStreamReader;
32  import java.io.OutputStream;
33  import java.io.OutputStreamWriter;
34  import java.io.PushbackInputStream;
35  import java.io.Reader;
36  import java.io.UnsupportedEncodingException;
37  import java.io.Writer;
38  import java.net.URL;
39  import java.nio.charset.Charset;
40  import java.util.ArrayList;
41  import java.util.List;
42  import java.util.zip.GZIPInputStream;
43  import java.util.zip.GZIPOutputStream;
44  
45  import org.apache.commons.configuration.Configuration;
46  import org.apache.commons.lang.StringUtils;
47  
48  import de.fu_berlin.ties.TextProcessor;
49  import de.fu_berlin.ties.TiesConfiguration;
50  import de.fu_berlin.ties.text.TextUtils;
51  import de.fu_berlin.ties.util.MutableInt;
52  import de.fu_berlin.ties.util.Util;
53  
54  /***
55   * A static class that provides utility constants and methods for I/O.
56   * No instances of this class can be created, only the static members
57   * should be used.
58   *
59   * @author Christian Siefkes
60   * @version $Revision: 1.21 $, $Date: 2004/12/09 18:10:32 $, $Author: siefkes $
61   */
62  public final class IOUtils {
63  
64      /***
65       * Configuration and context key: the character set to use when reading and
66       * writing local files. If omitted, the default charset of the current
67       * platform is used.
68       */
69      public static final String KEY_LOCAL_CHARSET = "charset";
70  
71      /***
72       * Configuration key: whether to compress your data in <code>gzip</code>
73       * format. Used by
74       * {@link #openCompressableOutStream(OutputStream, TiesConfiguration)}.
75       */
76      public static final String CONFIG_COMPRESS_GZIP = "compress.gzip";
77  
78      /***
79       * The standard block size recommended for I/O.
80       */
81      public static final int STANDARD_BLOCK_SIZE = 64 * 1024;
82  
83      /***
84       * The standard character set defined by the HTTP standard.
85       */
86      public static final String STANDARD_HTTP_CHARSET = "ISO-8859-1";
87  
88      /***
89       * The standard Unicode-compatible character set.
90       */
91      public static final String STANDARD_UNICODE_CHARSET = "UTF-8";
92  
93      /***
94       * The character separating the base name from the extension of a file
95       * (a dot).
96       */
97      public static final char EXT_SEPARATOR = '.';
98  
99      /***
100      * The maximum BOM size.
101      */
102     private static final int BOM_SIZE = 4;
103 
104     /***
105      * Default directory that can be given as "parent" when
106      * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
107      * objects. Initially set to <code>null</code> so the "system-dependent
108      * default directory" (typically the working directory) will be used.
109      * Synchronized via the {@link #DD_GUARD}.
110      */
111     private static File defaultDirectory = null;
112 
113     /***
114      * Used to guard synchronization of the {@link #defaultDirectory}.
115      */
116     private static final Object DD_GUARD = new Object();
117 
118     /***
119      * Creates a file for writing output to. Delegates to
120      * {@link #createOutFile(File, String, String)} without specifying an
121      * extension.
122      *
123      * @param directory the directory in which the file should be created;
124      * if <code>null</code> or not writable, the file is created in the
125      * current working directory instead
126      * @param localName the local name of the input file --- the extension of
127      * this file (if any) is replaced by the specified output extension
128      * @return the create file
129      */
130     public static File createOutFile(final File directory, final
131             String localName) {
132         return createOutFile(directory, localName, null);
133     }
134 
135     /***
136      * Creates a file for writing output to. The file is created in the
137      * specified directory if given and writable; otherwise it is created
138      * in the current working directory.  The resulting file can also be used
139      * as a directory (by calling its {@link File#mkdir()} method).
140      *
141      * <p>The name of the file is derivated from the given local name by
142      * replacing the extension of the input file (if any) by the specified
143      * <code>outExtension</code>. If a file with this name already exists,
144      * "2" or the next available number is inserted before the extension unless
145      * an unused file name is found.
146      *
147      * <p>E.g. for input file "test.txt" and output extension "out", the file
148      * "test.out" is created. If this file already exists, the file "test2.out"
149      * is created instead (or "test3.out" etc. unless an unsed name is found).
150      *
151      * @param directory the directory in which the file should be created;
152      * if <code>null</code> or not writable, the file is created in the
153      * current working directory instead
154      * @param localName the local name of the input file --- the extension of
155      * this file (if any) is replaced by the specified output extension
156      * @param outExtension the extension to append to the output file
157      * (without a starting dot); ignored if empty or <code>null</code>
158      * @return the create file
159      */
160     public static File createOutFile(final File directory, final
161             String localName, final String outExtension) {
162         return createOutFile(directory, localName, outExtension, null);
163     }
164 
165     /***
166      * Creates a file for writing output to. The file is created in the
167      * specified directory if given and writable; otherwise it is created
168      * in the current working directory.  The resulting file can also be used
169      * as a directory (by calling its {@link File#mkdir()} method).
170      *
171      * <p>The name of the file is derivated from the given local name by
172      * replacing the extension of the input file (if any) by the specified
173      * <code>outExtension</code>. If a file with this name already exists,
174      * "2" or the next available number is inserted before the extension unless
175      * an unused file name is found.
176      *
177      * <p>E.g. for input file "test.txt" and output extension "out", the file
178      * "test.out" is created. If this file already exists, the file "test2.out"
179      * is created instead (or "test3.out" etc. unless an unsed name is found).
180      *
181      * @param directory the directory in which the file should be created;
182      * if <code>null</code> or not writable, the file is created in the
183      * current working directory instead
184      * @param localName the local name of the input file --- the extension of
185      * this file (if any) is replaced by the specified output extension
186      * @param outExtension the extension to append to the output file
187      * (without a starting dot); ignored if empty or <code>null</code>
188      * @param last if not <code>null</code>, assumped to wrap the last numeric
189      * prefix already in use, i.e. the first tried prefix will the the value
190      * of this + 1; after determining a file name, the value will be set to the
191      * numeric prefix used this time
192      * @return the create file
193      */
194     public static File createOutFile(final File directory, final
195             String localName, final String outExtension,
196             final MutableInt last) {
197         final File myDirectory;
198         if (directory == null) {
199             // no directory given: write to working dir
200             myDirectory = null;
201         } else if (!directory.canWrite()) {
202             // cannot write to specified directory: write to working dir
203             myDirectory = null;
204         } else {
205             myDirectory = directory;
206         }
207 
208         final String inputExt = getExtension(localName);
209         final String baseName;
210         if ("".equals(inputExt)) {
211             baseName = localName;
212         } else {
213             // remove input extension + preceding dot
214             baseName = localName.substring(0,
215                 localName.length() - inputExt.length() - 1);
216         }
217 
218         final String fullExt;
219         if (StringUtils.isEmpty(outExtension)) {
220             // ignore empty or null extension
221             fullExt = "";
222         } else {
223             // prepend dot
224             fullExt = EXT_SEPARATOR + outExtension;
225         }
226 
227         final String separator;
228         if ((baseName.length() > 0) && (Character.isDigit(
229                 baseName.charAt(baseName.length() - 1)))) {
230             // base name ends in a digit: separate with "-" from the counter
231             separator = "-";
232         } else {
233             separator = "";
234         }
235 
236         File result;
237         int counter;
238         if (last == null) {
239             // initially try without extension
240             counter = 1;
241             result = new File(myDirectory, baseName + fullExt);
242         } else {
243             // start with last value + 1
244             counter = last.getValue() + 1;
245             result = new File(myDirectory, baseName + separator + counter
246                 + fullExt);
247         }
248 
249         while (result.exists()) {
250             // increate count until a non-existing file has been found
251             counter++;
252             result = new File(myDirectory, baseName + separator + counter
253                 + fullExt);
254         }
255 
256         if (last != null) {
257             // store used counter
258             last.setValue(counter);
259         }
260 
261         return result;
262     }
263 
264     /***
265      * Returns the charset used by an InputStreamReader.
266      *
267      * @param reader the reader to check
268      * @return the charset used by the reader
269      */
270     public static Charset determineCharset(final InputStreamReader reader) {
271         return Charset.forName(reader.getEncoding());
272     }
273 
274     /***
275      * Returns the canoncical name of the charset used by an InputStreamReader.
276      * This method always returns the <em>canonical</em> (standard) name.
277      * It should preferably be used instead of calling
278      * {@link InputStreamReader#getEncoding()} directly, because that method
279      * often returns a non-standard ("historical") name.
280      *
281      * @param reader the reader to check
282      * @return the canonical name of the charset used by the reader
283      */
284     public static String determineCharsetName(final InputStreamReader reader) {
285         return determineCharset(reader).name();
286     }
287 
288     /***
289      * Returns the charset used by an OutputStreamWriter.
290      *
291      * @param writer the writer to check
292      * @return the charset used by the writer
293      */
294     public static Charset determineCharset(final OutputStreamWriter writer) {
295         return Charset.forName(writer.getEncoding());
296     }
297 
298     /***
299      * Returns the canoncical name of the charset used by an OutputStreamWriter.
300      * This method always returns the <em>canonical</em> (standard) name.
301      * It should preferably be used instead of calling
302      * {@link OutputStreamWriter#getEncoding()} directly, because that method
303      * often returns a non-standard ("historical") name.
304      *
305      * @param writer the writer to check
306      * @return the canonical name of the charset used by the writer
307      */
308     public static String determineCharsetName(final OutputStreamWriter writer) {
309         return determineCharset(writer).name();
310     }
311 
312     /***
313      * Determines the output to directory to use, reading it from the
314      * {@link TextProcessor#KEY_OUT_DIRECTORY} configuration key in a given
315      * configuration.
316      *
317      * @param config configuration used to determine the output directory
318      * (read from the {@link TextProcessor#KEY_OUT_DIRECTORY} key)
319      * @return the output directory to use; or <code>null</code> if none
320      * is configured
321      */
322     public static File determineOutputDirectory(
323             final TiesConfiguration config) {
324         if (config.containsKey(TextProcessor.KEY_OUT_DIRECTORY)) {
325             return new File(config.getString(TextProcessor.KEY_OUT_DIRECTORY));
326         } else {
327             return null;
328         }
329     }
330 
331     /***
332      * Returns the base name of a file (the local name without
333      * {@link #getExtension(File)} and preceding {@link #EXT_SEPARATOR dot}).
334      *
335      * @param file the file to check
336      * @return the base name of the given file
337     */
338     public static String getBaseName(final File file) {
339         // for file names valid in the current file system, the surrounding
340         // call to getLocalName should be unnecessary; but it's helpful to
341         // ensure correctness for file names from other OS
342         return getBaseName(getLocalName(file.getName(), false));
343     }
344 
345     /***
346      * Helper method that returns the base name of a file name or URL.
347      *
348      * @param name the local name of the file or URL path to check
349      * @return the base name of the given string
350      */
351     private static String getBaseName(final String name) {
352         final int lastDot = name.lastIndexOf(EXT_SEPARATOR);
353 
354         if ((lastDot >= 0)) {
355             return name.substring(0, lastDot);
356         } else {
357             // return full name
358             return name;
359         }
360     }
361 
362     /***
363      * Returns the base name of an URL (the local name without
364      * {@link #getExtension(File)} and preceding {@link #EXT_SEPARATOR dot}).
365      *
366      * @param url the URL to check
367      * @return the base name of the given URL
368     */
369     public static String getBaseName(final URL url) {
370         // use local name (files only, no directories)
371         final String localName = getLocalName(url, false);
372         return getBaseName(localName);
373     }
374 
375     /***
376      * Returns an default directory that can be given as "parent" when
377      * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
378      * objects. Initially set to <code>null</code> so the "system-dependent
379      * default directory" (typically the working directory) will be used.
380      *
381      * @return the default directory
382      */
383     public static File getDefaultDirectory() {
384         synchronized (DD_GUARD) {
385             return defaultDirectory;
386         }
387     }
388 
389     /***
390      * Returns the extension of a file.
391      *
392      * @param file the file to check
393      * @return the extension of the given file; or an empty string if no
394      * extension exists
395     */
396     public static String getExtension(final File file) {
397         // for file names valid in the current file system, the surrounding
398         // call to getLocalName should be unnecessary; but it's helpful to
399         // ensure correctness for file names from other OS
400         return getExtension(getLocalName(file.getName(), false));
401     }
402 
403     /***
404      * Helper method that returns the file extension of a file name or URL.
405      *
406      * @param name the local name of the file or URL path to check
407      * @return the extension of the given string; or an empty string if no
408      * extension exists
409      */
410     private static String getExtension(final String name) {
411         final int lastDot = name.lastIndexOf(EXT_SEPARATOR);
412 
413         if ((lastDot >= 0)) {
414             return name.substring(lastDot + 1);
415         } else {
416             return "";
417         }
418     }
419 
420     /***
421      * Returns the file extension of an URL.
422      *
423      * @param url the URL to check
424      * @return the extension of the given URL; or an empty string if no
425      * extension exists
426     */
427     public static String getExtension(final URL url) {
428         // use local name (files only, no directories)
429         final String localName = getLocalName(url, false);
430         return getExtension(localName);
431     }
432 
433     /***
434      * Returns the local name of an URL. This is the last existing element of
435      * from the path component of the URL, typically the local file name
436      * (without directories).
437      *
438      * <p>For URLs that are recognizable as directories (i.e. end in a slash),
439      * the name of the final directory is returned if <code>acceptDir</code>
440      * is <code>true</code> -- otherwise the empty string is returned.
441      *
442      * @param url the URL to check
443      * @param acceptDir whether to return a final directory name
444      * (see description)
445      * @return the local name
446      */
447     public static String getLocalName(final URL url, final boolean acceptDir) {
448         // the path component of an URL is what we need to check
449         // (without a final ?-query, #-reference or ;-session info)
450         final String localName = getLocalName(url.getPath(), acceptDir);
451 
452         // remove parameters after ";" (e.g. session info) if any exist
453         final int firstSemicolon = localName.indexOf(';');
454 
455         if (firstSemicolon >= 0) {
456             return localName.substring(0, firstSemicolon);
457         } else {
458             return localName;
459         }
460     }
461 
462     /***
463      * Helper method that returns the local name of a file or URL path.
464      *
465      * <p>For paths that are recognizable as directories (i.e. end in a slash),
466      * the name of the final directory is returned if <code>acceptDir</code>
467      * is <code>true</code> -- otherwise the empty string is returned.
468      *
469      * @param path the file or URL path to check
470      * @param acceptDir whether to return a final directory name
471      * (see description)
472      * @return the local name
473      */
474     private static String getLocalName(final String path,
475             final boolean acceptDir) {
476         // check for both '/' (Unix, URLs) and '\' (Windows, just to make sure)
477         final String usedPath;
478         if (path.endsWith("/") || path.endsWith("//")) {
479             // this is a directory URL
480             if (acceptDir) {
481                 // remove trailing slash
482                 usedPath = path.substring(0, path.length() - 1);
483             } else {
484                 // no dir allowed -- return empty string
485                 return "";
486             }
487         } else {
488             // use complete path
489             usedPath = path;
490         }
491 
492         final int lastSlashOrBackslash = Math.max(path.lastIndexOf('/'),
493             path.lastIndexOf('//'));
494 
495         final String result;
496         if (lastSlashOrBackslash >= 0) {
497             result = usedPath.substring(lastSlashOrBackslash + 1);
498         } else {
499             // return complete used path
500             result = usedPath;
501         }
502         return result;
503     }
504 
505     /***
506      * Opens an input stream that might have been compressed in
507      * <code>gzip</code> format. This method autodetects whether the stream
508      * has been compressed and returns a stream that allows accessing the
509      * uncompressed data.
510      *
511      * @param in the raw input stream (either uncompressed or in
512      * <code>gzip</code> format
513      * @return a stream that allows accessing the uncompressed data
514      * @throws IOException if an I/O error has occurred
515      */
516     public static InputStream openCompressableInStream(InputStream in)
517     throws IOException {
518         // we need a stream that supports mark + reset
519         final InputStream markableStream;
520         InputStream result;
521 
522         if (in.markSupported()) {
523             markableStream = in;
524         } else {
525             // wrap in buffered stream to get support for mark + reset
526             markableStream = new BufferedInputStream(in);
527         }
528 
529         // mark initial position
530         markableStream.mark(1024);
531 
532         // try to wrap in GZIP stream
533         try {
534             result = new GZIPInputStream(markableStream);
535         } catch (IOException ioe) {
536             // IOexception is thrown if the stream isn't in gzip format:
537             // reset stream to marked position and return stream "as is"
538             markableStream.reset();
539             result = markableStream;
540         }
541         return result;
542     }
543 
544     /***
545      * Transparently opens an input stream that may use compression to store
546      * the data (in <code>gzip</code> format).
547      * 
548      * @param out the original output stream
549      * @param doCompress whether or not to use compression
550      * @return to output stream to use for storing data; will be a
551      * {@link GZIPOutputStream} wrapping <code>out</code> if
552      * <code>doCompress</code> is <code>true</code>; otherwise it will be the
553      * raw <code>out</code> stream
554      * @throws IOException if an I/O error has occurred
555      */
556     public static OutputStream openCompressableOutStream(OutputStream out,
557             boolean doCompress) throws IOException {
558         if (doCompress) {
559             // wrap in GZIP stream
560             return new GZIPOutputStream(out);
561         } else {
562             // return stream as is
563             return out;
564         }
565     }
566 
567     /***
568      * Transparently opens an input stream that may use compression to store
569      * the data (in <code>gzip</code> format). Checks the
570      * {@link #CONFIG_COMPRESS_GZIP} parameter to decide whether or not to
571      * use compression.
572      * 
573      * @param out the original output stream
574      * @param config the configuration to use
575      * @return to output stream to use for storing data;
576      * @throws IOException if an I/O error has occurred
577      */
578     public static OutputStream openCompressableOutStream(OutputStream out,
579             TiesConfiguration config) throws IOException {
580         return openCompressableOutStream(out,
581                 config.getBoolean(CONFIG_COMPRESS_GZIP));
582     }
583 
584     /***
585      * Opens an reader on a local file. Uses the {@link #KEY_LOCAL_CHARSET
586      * configured character set} -- if not specified, the default charset of
587      * the current platform is used instead. Compressed files are automatically
588      * decompressed (using the
589      * {@link #openCompressableInStream(InputStream)} method). Don't forget to
590      * finally {@linkplain #tryToClose(Reader) close} any reader you open!
591      *
592      * @param file the file to read
593      * @param config the configuration to use
594      * @return a reader on the local file
595      * @throws IOException if an I/O error has occurred
596      */
597     public static InputStreamReader openReader(final File file,
598             final Configuration config) throws IOException {
599         // delegate
600         return openReader(file, config.getString(KEY_LOCAL_CHARSET, null));
601     }
602 
603     /***
604      * Opens an reader on a local file, using a given charset. Compressed files
605      * are automatically decompressed (using the
606      * {@link #openCompressableInStream(InputStream)} method). Don't forget to
607      * finally {@linkplain #tryToClose(Reader) close} any reader you open!
608      *
609      * @param file the file to read
610      * @param charset the character set to use for reading the file;
611      * if <code>null</code>, the default charset of the current platform is used
612      * @return a reader on the local file
613      * @throws IOException if an I/O error has occurred
614      */
615     public static InputStreamReader openReader(final File file,
616             final String charset) throws IOException {
617         final InputStream in =
618             openCompressableInStream(new FileInputStream(file));
619         final InputStreamReader reader;
620 
621         if (charset == null) {
622             // use default charset
623             reader = new InputStreamReader(in);
624         } else {
625             // use given charset
626             reader = new InputStreamReader(in, charset);
627         }
628         return reader;
629     }
630 
631     /***
632      * Opens a reader on an input stream that uses a Unicode character set
633      * (UTF-8, UTF-16, or UTF-32) and optionally a
634      * <a href="http://www.unicode.org/unicode/faq/utf_bom.html">BOM
635      * (byte order mark)</a> to identify the used charset. UTF-8 is used if BOM
636      * is not found.
637      *
638      * <p>Adapted from the
639      * <a href="http://koti.mbnet.fi/akini/java/unicodereader/"
640      * ><code>UnicodeReader</code></a> class created by Thomas Weidenfeller and
641      * Aki Nieminen.
642      *
643      * @param in the input stream to wrap
644      * @return a reader on the stream
645      * @throws IOException if an I/O error has occurred
646      */
647     public static InputStreamReader openUnicodeReader(final InputStream in)
648     throws IOException {
649         final PushbackInputStream pushbackIn =
650             new PushbackInputStream(in, BOM_SIZE);
651 
652         final String encoding;
653         final byte bom[] = new byte[BOM_SIZE];
654         final int n, unread;
655         n = pushbackIn.read(bom, 0, bom.length);
656 
657         // check BOM to determine charset
658         if (  (bom[0] == (byte)0xEF) && (bom[1] == (byte)0xBB) &&
659               (bom[2] == (byte)0xBF) ) {
660             encoding = STANDARD_UNICODE_CHARSET; // UTF-8
661             unread = n - 3;
662         } else if ( (bom[0] == (byte)0xFE) && (bom[1] == (byte)0xFF) ) {
663             encoding = "UTF-16BE";
664             unread = n - 2;
665         } else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) ) {
666             encoding = "UTF-16LE";
667             unread = n - 2;
668         } else if ( (bom[0] == (byte)0x00) && (bom[1] == (byte)0x00) &&
669                     (bom[2] == (byte)0xFE) && (bom[3] == (byte)0xFF)) {
670             encoding = "UTF-32BE";
671             unread = n - 4;
672         } else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) &&
673                     (bom[2] == (byte)0x00) && (bom[3] == (byte)0x00)) {
674             encoding = "UTF-32LE";
675             unread = n - 4;
676         } else {
677             // Unicode BOM mark not found, unread all bytes and use UTF-8
678             encoding = STANDARD_UNICODE_CHARSET; // UTF-8
679             unread = n;
680         }
681 
682         // push back any read bytes that are not part of the BOM
683         if (unread > 0) {
684             pushbackIn.unread(bom, (n - unread), unread);
685         }
686 
687         // Use determined encoding
688         return new InputStreamReader(pushbackIn, encoding);
689     }
690 
691     /***
692      * Opens a writer that uses the standard Unicode character set
693      * {@linkplain #STANDARD_UNICODE_CHARSET UTF-8}.
694      *
695      * @param out the output stream to wrap
696      * @return a writer on the stream
697      */
698     public static OutputStreamWriter openUnicodeWriter(final OutputStream out) {
699         try {
700             return new OutputStreamWriter(out, STANDARD_UNICODE_CHARSET);            
701         } catch (UnsupportedEncodingException uee) {
702             // not supposed to happen as each virtual machine must support UTF-8
703             throw new RuntimeException("Unexpected error: virtual machine does "
704                     + "not support the standard charaset set"
705                     + STANDARD_HTTP_CHARSET);
706         }
707     }
708 
709     /***
710      * Opens an writer on a local file. Uses the {@link #KEY_LOCAL_CHARSET
711      * configured character set} -- if not specified, the default charset of
712      * the current platform is used instead. Don't forget to finally
713      * {@linkplain #tryToClose(Writer) close} any writer you open!
714      *
715      * @param file the file to write to
716      * @param config the configuration to use
717      * @return a writer on the local file
718      * @throws IOException if the file is a directory or for some
719      * other reason cannot be opened for writing
720      * @throws UnsupportedEncodingException if the named charset is not
721      * supported
722      */
723     public static Writer openWriter(final File file,
724             final Configuration config)
725             throws IOException, UnsupportedEncodingException {
726         // delegate
727         return openWriter(file, config.getString(KEY_LOCAL_CHARSET, null));
728     }
729 
730     /***
731      * Opens an writer on a local file, using a given charset. Don't forget to
732      * finally {@linkplain #tryToClose(Writer) close} any writer you open!
733      *
734      * @param file the file to write to
735      * @param charset the character set to use for writing the file;
736      * if <code>null</code>, the default charset of the current platform is used
737      * @return a writer on the local file
738      * @throws IOException if the file is a directory or for some
739      * other reason cannot be opened for writing
740      * @throws UnsupportedEncodingException if the named charset is not
741      * supported
742      */
743     public static Writer openWriter(final File file, final String charset)
744             throws IOException, UnsupportedEncodingException {
745         final Writer writer;
746 
747         if (charset == null) {
748             // use default charset
749             writer = new FileWriter(file);
750         } else {
751             // use given charset
752             writer =
753                 new OutputStreamWriter(new FileOutputStream(file), charset);
754         }
755         return writer;
756     }
757 
758     /***
759      * Reads the contents of a reader into a string. The reader is <em>not</em>
760      * closed by this method.
761      *
762      * @param reader the reader to use
763      * @return the contents of the reader
764      * @throws IOException if an I/O error occurs
765      */
766     public static String readToString(final Reader reader) throws IOException {
767         final StringBuffer result = new StringBuffer();
768 
769         final char[] buffer = new char[STANDARD_BLOCK_SIZE];
770         int charsRead;
771 
772         while ((charsRead = reader.read(buffer)) > -1) {
773             result.append(buffer, 0, charsRead);
774         }
775 
776         return result.toString();
777     }
778 
779     /***
780      * Reads the contents of a reader into a writer. Neither reader nor writer
781      * are closed by this method.
782      *
783      * @param reader the reader to read from
784      * @param writer the writer to write to write to
785      * @return the number of characters read
786      * @throws IOException if an I/O error occurs
787      */
788     public static int readToWriter(final Reader reader, final Writer writer)
789             throws IOException {
790         final char[] buffer = new char[STANDARD_BLOCK_SIZE];
791         int charsRead;
792         int allCharsRead = 0;
793 
794         while ((charsRead = reader.read(buffer)) > -1) {
795             writer.write(buffer, 0, charsRead);
796             allCharsRead += charsRead;
797         }
798 
799         writer.flush();
800         return allCharsRead;
801     }
802 
803     /***
804      * Converts an URI list as defined in RFC 2483 (MIME type
805      * <code>text/uri-list</code>) into an array of strings. Comment lines
806      * in the input are ignored.
807      *
808      * @param in the URI list to convert
809      * @return an array of strings containing the URIs/URLs listed in the input
810      */
811     public static String[] readURIList(final CharSequence in) {
812         final String[] lines = TextUtils.splitLines(in);
813         final List<String> uriList = new ArrayList<String>(lines.length);
814 
815         // discard comment URLs (starting with #)
816         for (int i = 0; i < lines.length; i++) {
817             if (!lines[i].startsWith("#")) {
818                 uriList.add(lines[i]);
819             }
820         }
821 
822         // convert to array
823         String[] result = new String[uriList.size()];
824         return uriList.toArray(result);
825     }
826 
827     /***
828      * Converts an URI list as defined in RFC 2483 (MIME type
829      * <code>text/uri-list</code>) into an array of strings. Comment lines
830      * in the input are ignored.
831      *
832      * @param in a reader containing the URI list to convert
833      * @return an array of strings containing the URIs/URLs listed in the input
834      * @throws IOException if an I/O error occurs
835      */
836     public static String[] readURIList(final Reader in) throws IOException {
837         final String inSequence = readToString(in);
838         return readURIList(inSequence);
839     }
840 
841     /***
842      * Modifies the default directory that can be given as "parent" when
843      * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
844      * objects. Can be set to <code>null</code> so the "system-dependent
845      * default directory" (typically the working directory) will be used.
846      *
847      * @param directory the new directory to use
848      * @throws IllegalArgumentException if the given directory is neither
849      * <code>null</code> nor an existing directory
850      */
851     public static void setDefaultDirectory(final File directory)
852             throws IllegalArgumentException {
853         if ((directory != null) && !directory.isDirectory()) {
854             throw new IllegalArgumentException("Default directory must be "
855                 + "<null> or an existing directory, but " + directory
856                 + " is not");
857         }
858 
859         synchronized (DD_GUARD) {
860             defaultDirectory = directory;
861         }
862     }
863 
864     /***
865      * Convenience method for closing an input stream. If the specified stream
866      * is <code>null</code>, this method does nothing. Any {@link
867      * java.io.IOException} thrown during closing is swallowed by this method.
868      *
869      * @param in the input stream to close (might be <code>null</code>)
870      * @return <code>true</code> if the stream was closed successfully,
871      * <code>false</code> otherwise (the stream was <code>null</code> or an
872      * exception was thrown during closing)
873      */
874     public static boolean tryToClose(final InputStream in) {
875         boolean result = false;
876 
877         if (in != null) {
878             try {
879                 in.close();
880                 result = true;
881             } catch (IOException ioe) {
882                 Util.LOG.warn("Exception while trying to close InputStream "
883                     + in + ": " + ioe.toString());
884             }
885         }
886 
887         return result;
888     }
889 
890     /***
891      * Convenience method for closing an output stream. If the specified stream
892      * is <code>null</code>, this method does nothing. Any {@link
893      * java.io.IOException} thrown during closing is swallowed by this method.
894      *
895      * @param out the output stream to close (might be <code>null</code>)
896      * @return <code>true</code> if the stream was closed successfully,
897      * <code>false</code> otherwise (the stream was <code>null</code> or an
898      * exception was thrown during closing)
899      */
900     public static boolean tryToClose(final OutputStream out) {
901         boolean result = false;
902 
903         if (out != null) {
904             try {
905                 out.close();
906                 result = true;
907             } catch (IOException ioe) {
908                 Util.LOG.warn("Exception while trying to close OutputStream "
909                     + out + ": " + ioe.toString());
910             }
911         }
912 
913         return result;
914     }
915 
916     /***
917      * Convenience method for closing a reader. If the specified reader is
918      * <code>null</code>, this method does nothing. Any {@link
919      * java.io.IOException} thrown during closing is swallowed by this method.
920      *
921      * @param reader the reader to close (might be <code>null</code>)
922      * @return <code>true</code> if the reader was closed successfully,
923      * <code>false</code> otherwise (reader was <code>null</code> or an
924      * exception was thrown during closing)
925      */
926     public static boolean tryToClose(final Reader reader) {
927         boolean result = false;
928 
929         if (reader != null) {
930             try {
931                 reader.close();
932                 result = true;
933             } catch (IOException ioe) {
934                 Util.LOG.warn("Exception while trying to close Reader "
935                     + reader + ": " + ioe.toString());
936             }
937         }
938 
939         return result;
940     }
941 
942     /***
943      * Convenience method for closing a writer. If the specified writer is
944      * <code>null</code>, this method does nothing. Any {@link
945      * java.io.IOException} thrown during closing is swallowed by this method.
946      *
947      * @param writer the writer to close (might be <code>null</code>)
948      * @return <code>true</code> if the writer was closed successfully,
949      * <code>false</code> otherwise (writer was <code>null</code> or an
950      * exception was thrown during closing)
951      */
952     public static boolean tryToClose(final Writer writer) {
953         boolean result = false;
954 
955         if (writer != null) {
956             try {
957                 writer.close();
958                 result = true;
959             } catch (IOException ioe) {
960                 Util.LOG.warn("Exception while trying to close Writer "
961                     + writer + ": " + ioe.toString());
962             }
963         }
964 
965         return result;
966     }
967 
968     /***
969      * Writes the contents of a character sequence to a writer. The writer
970      * is flushed but not closed by this method.
971      *
972      * @param input the text to send to the wrier
973      * @param writer the writer to write to write to
974      * @throws IOException if an I/O error occurs
975      */
976     public static void writeToWriter(final CharSequence input,
977             final Writer writer) throws IOException {
978         writer.write(input.toString());
979         writer.flush();
980     }
981 
982     /***
983      * Private constructor prevents creation of instances.
984      */
985     private IOUtils() {
986         super();
987     }
988 
989 }