View Javadoc

1   /*
2    * Copyright (C) 2003-2006 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This program is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU General Public License as published by
9    * the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program; if not, visit
19   * http://www.gnu.org/licenses/gpl.html or write to the Free Software
20   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21   */
22  package de.fu_berlin.ties.io;
23  
24  import java.io.BufferedInputStream;
25  import java.io.File;
26  import java.io.FileInputStream;
27  import java.io.FileOutputStream;
28  import java.io.FileWriter;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.InputStreamReader;
32  import java.io.OutputStream;
33  import java.io.OutputStreamWriter;
34  import java.io.PushbackInputStream;
35  import java.io.Reader;
36  import java.io.UnsupportedEncodingException;
37  import java.io.Writer;
38  import java.net.URL;
39  import java.nio.charset.Charset;
40  import java.util.ArrayList;
41  import java.util.List;
42  import java.util.zip.GZIPInputStream;
43  import java.util.zip.GZIPOutputStream;
44  
45  import org.apache.commons.configuration.Configuration;
46  import org.apache.commons.lang.StringUtils;
47  
48  import de.fu_berlin.ties.TextProcessor;
49  import de.fu_berlin.ties.TiesConfiguration;
50  import de.fu_berlin.ties.text.TextUtils;
51  import de.fu_berlin.ties.util.MutableInt;
52  import de.fu_berlin.ties.util.Util;
53  
54  /***
55   * A static class that provides utility constants and methods for I/O.
56   * No instances of this class can be created, only the static members
57   * should be used.
58   *
59   * @author Christian Siefkes
60   * @version $Revision: 1.35 $, $Date: 2006/11/30 12:38:52 $, $Author: siefkes $
61   */
62  public final class IOUtils {
63  
64      /***
65       * Configuration and context key: the character set to use when reading and
66       * writing local files. If omitted, the default charset of the current
67       * platform is used.
68       */
69      public static final String KEY_LOCAL_CHARSET = "charset";
70  
71      /***
72       * Configuration key: whether to compress your data in <code>gzip</code>
73       * format. Used by
74       * {@link #openCompressableOutStream(OutputStream, TiesConfiguration)}.
75       */
76      public static final String CONFIG_COMPRESS_GZIP = "compress.gzip";
77  
78      /***
79       * The standard block size recommended for I/O.
80       */
81      public static final int STANDARD_BLOCK_SIZE = 64 * 1024;
82  
83      /***
84       * The standard character set defined by the HTTP standard: {@value}.
85       */
86      public static final String STANDARD_HTTP_CHARSET = "ISO-8859-1";
87  
88      /***
89       * The standard Unicode-compatible character set: {@value}.
90       */
91      public static final String STANDARD_UNICODE_CHARSET = "UTF-8";
92  
93      /***
94       * The character set used internally by Java to represent strings: {@value}.
95       */
96      public static final String INTERNAL_JAVA_CHARSET = "UTF-16";
97  
98      /***
99       * The character separating the base name from the extension of a file
100      * (a dot).
101      */
102     public static final char EXT_SEPARATOR = '.';
103 
104     /***
105      * The maximum BOM size.
106      */
107     private static final int BOM_SIZE = 4;
108 
109     /***
110      * Default directory that can be given as "parent" when
111      * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
112      * objects. Initially set to <code>null</code> so the "system-dependent
113      * default directory" (typically the working directory) will be used.
114      * Synchronized via the {@link #DD_GUARD}.
115      */
116     private static File defaultDirectory = null;
117 
118     /***
119      * Used to guard synchronization of the {@link #defaultDirectory}.
120      */
121     private static final Object DD_GUARD = new Object();
122 
123     /***
124      * Creates a file for writing output to. Delegates to
125      * {@link #createOutFile(File, String, String)} without specifying an
126      * extension.
127      *
128      * @param directory the directory in which the file should be created;
129      * if <code>null</code> or not writable, the file is created in the
130      * current working directory instead
131      * @param localName the local name of the input file --- the extension of
132      * this file (if any) is replaced by the specified output extension
133      * @return the create file
134      */
135     public static File createOutFile(final File directory, final
136             String localName) {
137         return createOutFile(directory, localName, null);
138     }
139 
140     /***
141      * Creates a file for writing output to. The file is created in the
142      * specified directory if given and writable; otherwise it is created
143      * in the current working directory.  The resulting file can also be used
144      * as a directory (by calling its {@link File#mkdir()} method).
145      *
146      * <p>The name of the file is derivated from the given local name by
147      * replacing the extension of the input file (if any) by the specified
148      * <code>outExtension</code>. If a file with this name already exists,
149      * "2" or the next available number is inserted before the extension unless
150      * an unused file name is found.
151      *
152      * <p>E.g. for input file "test.txt" and output extension "out", the file
153      * "test.out" is created. If this file already exists, the file "test2.out"
154      * is created instead (or "test3.out" etc. unless an unsed name is found).
155      *
156      * @param directory the directory in which the file should be created;
157      * if <code>null</code> or not writable, the file is created in the
158      * current working directory instead
159      * @param localName the local name of the input file --- the extension of
160      * this file (if any) is replaced by the specified output extension
161      * @param outExtension the extension to append to the output file
162      * (without a starting dot); ignored if empty or <code>null</code>
163      * @return the create file
164      */
165     public static File createOutFile(final File directory, final
166             String localName, final String outExtension) {
167         return createOutFile(directory, localName, outExtension, null);
168     }
169 
170     /***
171      * Creates a file for writing output to. The file is created in the
172      * specified directory if given and writable; otherwise it is created
173      * in the current working directory.  The resulting file can also be used
174      * as a directory (by calling its {@link File#mkdir()} method).
175      *
176      * <p>The name of the file is derivated from the given local name by
177      * replacing the extension of the input file (if any) by the specified
178      * <code>outExtension</code>. If a file with this name already exists,
179      * "2" or the next available number is inserted before the extension unless
180      * an unused file name is found.
181      *
182      * <p>E.g. for input file "test.txt" and output extension "out", the file
183      * "test.out" is created. If this file already exists, the file "test2.out"
184      * is created instead (or "test3.out" etc. unless an unsed name is found).
185      *
186      * @param directory the directory in which the file should be created;
187      * if <code>null</code> or not writable, the file is created in the
188      * current working directory instead
189      * @param localName the local name of the input file --- the extension of
190      * this file (if any) is replaced by the specified output extension
191      * @param outExtension the extension to append to the output file
192      * (without a starting dot); ignored if empty or <code>null</code>
193      * @param last if not <code>null</code>, assumped to wrap the last numeric
194      * prefix already in use, i.e. the first tried prefix will the the value
195      * of this + 1; after determining a file name, the value will be set to the
196      * numeric prefix used this time
197      * @return the create file
198      */
199     public static File createOutFile(final File directory, final
200             String localName, final String outExtension,
201             final MutableInt last) {
202         final File myDirectory;
203         if (directory == null) {
204             // no directory given: write to working dir
205             myDirectory = null;
206         } else if (!directory.canWrite()) {
207             // cannot write to specified directory: write to working dir
208             myDirectory = null;
209         } else {
210             myDirectory = directory;
211         }
212 
213         final String inputExt = getExtension(localName);
214         final String baseName;
215         if ("".equals(inputExt)) {
216             baseName = localName;
217         } else {
218             // remove input extension + preceding dot
219             baseName = localName.substring(0,
220                 localName.length() - inputExt.length() - 1);
221         }
222 
223         final String fullExt;
224         if (StringUtils.isEmpty(outExtension)) {
225             // ignore empty or null extension
226             fullExt = "";
227         } else {
228             // prepend dot
229             fullExt = EXT_SEPARATOR + outExtension;
230         }
231 
232         final String separator;
233         if ((baseName.length() > 0) && (Character.isDigit(
234                 baseName.charAt(baseName.length() - 1)))) {
235             // base name ends in a digit: separate with "-" from the counter
236             separator = "-";
237         } else {
238             separator = "";
239         }
240 
241         File result;
242         int counter;
243         if (last == null) {
244             // initially try without extension
245             counter = 1;
246             result = new File(myDirectory, baseName + fullExt);
247         } else {
248             // start with last value + 1
249             counter = last.getValue() + 1;
250             result = new File(myDirectory, baseName + separator + counter
251                 + fullExt);
252         }
253 
254         while (result.exists()) {
255             // increate count until a non-existing file has been found
256             counter++;
257             result = new File(myDirectory, baseName + separator + counter
258                 + fullExt);
259         }
260 
261         if (last != null) {
262             // store used counter
263             last.setValue(counter);
264         }
265 
266         return result;
267     }
268 
269     /***
270      * Returns the charset used by an InputStreamReader.
271      *
272      * @param reader the reader to check
273      * @return the charset used by the reader
274      */
275     public static Charset determineCharset(final InputStreamReader reader) {
276         return Charset.forName(reader.getEncoding());
277     }
278 
279     /***
280      * Returns the canoncical name of the charset used by an InputStreamReader.
281      * This method always returns the <em>canonical</em> (standard) name.
282      * It should preferably be used instead of calling
283      * {@link InputStreamReader#getEncoding()} directly, because that method
284      * often returns a non-standard ("historical") name.
285      *
286      * @param reader the reader to check
287      * @return the canonical name of the charset used by the reader
288      */
289     public static String determineCharsetName(final InputStreamReader reader) {
290         return determineCharset(reader).name();
291     }
292 
293     /***
294      * Returns the charset used by an OutputStreamWriter.
295      *
296      * @param writer the writer to check
297      * @return the charset used by the writer
298      */
299     public static Charset determineCharset(final OutputStreamWriter writer) {
300         return Charset.forName(writer.getEncoding());
301     }
302 
303     /***
304      * Returns the canoncical name of the charset used by an OutputStreamWriter.
305      * This method always returns the <em>canonical</em> (standard) name.
306      * It should preferably be used instead of calling
307      * {@link OutputStreamWriter#getEncoding()} directly, because that method
308      * often returns a non-standard ("historical") name.
309      *
310      * @param writer the writer to check
311      * @return the canonical name of the charset used by the writer
312      */
313     public static String determineCharsetName(final OutputStreamWriter writer) {
314         return determineCharset(writer).name();
315     }
316 
317     /***
318      * Determines the output to directory to use, reading it from the
319      * {@link TextProcessor#KEY_OUT_DIRECTORY} configuration key in a given
320      * configuration.
321      *
322      * @param config configuration used to determine the output directory
323      * (read from the {@link TextProcessor#KEY_OUT_DIRECTORY} key)
324      * @return the output directory to use; or <code>null</code> if none
325      * is configured
326      */
327     public static File determineOutputDirectory(
328             final TiesConfiguration config) {
329         if (config.containsKey(TextProcessor.KEY_OUT_DIRECTORY)) {
330             return new File(config.getString(TextProcessor.KEY_OUT_DIRECTORY));
331         } else {
332             return null;
333         }
334     }
335 
336     /***
337      * Returns the base name of a file (the local name without
338      * {@link #getExtension(File)} and preceding {@link #EXT_SEPARATOR dot}).
339      *
340      * @param file the file to check
341      * @return the base name of the given file
342     */
343     public static String getBaseName(final File file) {
344         // for file names valid in the current file system, the surrounding
345         // call to getLocalName should be unnecessary; but it's helpful to
346         // ensure correctness for file names from other OS
347         return getBaseName(getLocalName(file.getName(), false));
348     }
349 
350     /***
351      * Helper method that returns the base name of a file name or URL.
352      *
353      * @param localName the {@linkplain #getLocalName(String, boolean) local
354      * name} of the file or URL path to check
355      * @return the base name of the given string
356      */
357     public static String getBaseName(final String localName) {
358         final int lastDot = localName.lastIndexOf(EXT_SEPARATOR);
359 
360         if ((lastDot >= 0)) {
361             return localName.substring(0, lastDot);
362         } else {
363             // return full name
364             return localName;
365         }
366     }
367 
368     /***
369      * Returns the base name of an URL (the local name without
370      * {@link #getExtension(File)} and preceding {@link #EXT_SEPARATOR dot}).
371      *
372      * @param url the URL to check
373      * @return the base name of the given URL
374     */
375     public static String getBaseName(final URL url) {
376         // use local name (files only, no directories)
377         final String localName = getLocalName(url, false);
378         return getBaseName(localName);
379     }
380 
381     /***
382      * Returns an default directory that can be given as "parent" when
383      * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
384      * objects. Initially set to <code>null</code> so the "system-dependent
385      * default directory" (typically the working directory) will be used.
386      *
387      * @return the default directory
388      */
389     public static File getDefaultDirectory() {
390         synchronized (DD_GUARD) {
391             return defaultDirectory;
392         }
393     }
394 
395     /***
396      * Returns the extension of a file.
397      *
398      * @param file the file to check
399      * @return the extension of the given file; or an empty string if no
400      * extension exists
401     */
402     public static String getExtension(final File file) {
403         // for file names valid in the current file system, the surrounding
404         // call to getLocalName should be unnecessary; but it's helpful to
405         // ensure correctness for file names from other OS
406         return getExtension(getLocalName(file.getName(), false));
407     }
408 
409     /***
410      * Helper method that returns the file extension of a file name or URL.
411      *
412      * @param localName the {@linkplain #getLocalName(String, boolean) local
413      * name} of the file or URL path to check
414      * @return the extension of the given string; or an empty string if no
415      * extension exists
416      */
417     public static String getExtension(final String localName) {
418         final int lastDot = localName.lastIndexOf(EXT_SEPARATOR);
419 
420         if ((lastDot >= 0)) {
421             return localName.substring(lastDot + 1);
422         } else {
423             return "";
424         }
425     }
426 
427     /***
428      * Returns the file extension of an URL.
429      *
430      * @param url the URL to check
431      * @return the extension of the given URL; or an empty string if no
432      * extension exists
433     */
434     public static String getExtension(final URL url) {
435         // use local name (files only, no directories)
436         final String localName = getLocalName(url, false);
437         return getExtension(localName);
438     }
439 
440     /***
441      * Returns the local name of an URL. This is the last existing element of
442      * from the path component of the URL, typically the local file name
443      * (without directories).
444      *
445      * <p>For URLs that are recognizable as directories (i.e. end in a slash),
446      * the name of the final directory is returned if <code>acceptDir</code>
447      * is <code>true</code> -- otherwise the empty string is returned.
448      *
449      * @param url the URL to check
450      * @param acceptDir whether to return a final directory name
451      * (see description)
452      * @return the local name
453      */
454     public static String getLocalName(final URL url, final boolean acceptDir) {
455         // the path component of an URL is what we need to check
456         // (without a final ?-query, #-reference or ;-session info)
457         final String localName = getLocalName(url.getPath(), acceptDir);
458 
459         // remove parameters after ";" (e.g. session info) if any exist
460         final int firstSemicolon = localName.indexOf(';');
461 
462         if (firstSemicolon >= 0) {
463             return localName.substring(0, firstSemicolon);
464         } else {
465             return localName;
466         }
467     }
468 
469     /***
470      * Helper method that returns the local name of a file or URL path.
471      *
472      * <p>For paths that are recognizable as directories (i.e. end in a slash),
473      * the name of the final directory is returned if <code>acceptDir</code>
474      * is <code>true</code> -- otherwise the empty string is returned.
475      *
476      * @param path the file or URL path to check
477      * @param acceptDir whether to return a final directory name
478      * (see description)
479      * @return the local name
480      */
481     private static String getLocalName(final String path,
482             final boolean acceptDir) {
483         // check for both '/' (Unix, URLs) and '\' (Windows, just to make sure)
484         final String usedPath;
485         if (path.endsWith("/") || path.endsWith("//")) {
486             // this is a directory URL
487             if (acceptDir) {
488                 // remove trailing slash
489                 usedPath = path.substring(0, path.length() - 1);
490             } else {
491                 // no dir allowed -- return empty string
492                 return "";
493             }
494         } else {
495             // use complete path
496             usedPath = path;
497         }
498 
499         final int lastSlashOrBackslash = Math.max(path.lastIndexOf('/'),
500             path.lastIndexOf('//'));
501 
502         final String result;
503         if (lastSlashOrBackslash >= 0) {
504             result = usedPath.substring(lastSlashOrBackslash + 1);
505         } else {
506             // return complete used path
507             result = usedPath;
508         }
509         return result;
510     }
511 
512     /***
513      * Opens an input stream that might have been compressed in
514      * <code>gzip</code> format. This method autodetects whether the stream
515      * has been compressed and returns a stream that allows accessing the
516      * uncompressed data.
517      *
518      * @param in the raw input stream (either uncompressed or in
519      * <code>gzip</code> format)
520      * @return a stream that allows accessing the uncompressed data
521      * @throws IOException if an I/O error has occurred
522      */
523     public static InputStream openCompressableInStream(final InputStream in)
524     throws IOException {
525         // we need a stream that supports mark + reset
526         final InputStream markableStream;
527         InputStream result;
528 
529         if (in.markSupported()) {
530             markableStream = in;
531         } else {
532             // wrap in buffered stream to get support for mark + reset
533             markableStream = new BufferedInputStream(in);
534         }
535 
536         // mark initial position
537         markableStream.mark(1024);
538 
539         // try to wrap in GZIP stream
540         try {
541             result = new GZIPInputStream(markableStream);
542         } catch (IOException ioe) {
543             // IOexception is thrown if the stream isn't in gzip format:
544             // reset stream to marked position and return stream "as is"
545             markableStream.reset();
546             result = markableStream;
547         }
548         return result;
549     }
550 
551     /***
552      * Transparently opens an input stream that may use compression to store
553      * the data (in <code>gzip</code> format).
554      * 
555      * @param out the original output stream
556      * @param doCompress whether or not to use compression
557      * @return an output stream to use for storing data; will be a
558      * {@link GZIPOutputStream} wrapping <code>out</code> if
559      * <code>doCompress</code> is <code>true</code>; otherwise it will be the
560      * raw <code>out</code> stream
561      * @throws IOException if an I/O error has occurred
562      */
563     public static OutputStream openCompressableOutStream(final OutputStream out,
564             final boolean doCompress) throws IOException {
565         if (doCompress) {
566             // wrap in GZIP stream
567             return new GZIPOutputStream(out);
568         } else {
569             // return stream as is
570             return out;
571         }
572     }
573 
574     /***
575      * Transparently opens an input stream that may use compression to store
576      * the data (in <code>gzip</code> format). Checks the
577      * {@link #CONFIG_COMPRESS_GZIP} parameter to decide whether or not to
578      * use compression.
579      * 
580      * @param out the original output stream
581      * @param config the configuration to use
582      * @return an output stream to use for storing data;
583      * @throws IOException if an I/O error has occurred
584      */
585     public static OutputStream openCompressableOutStream(final OutputStream out,
586             final TiesConfiguration config) throws IOException {
587         // delegate without using a config suffix
588         return openCompressableOutStream(out, config, null);
589     }
590 
591     /***
592      * Transparently opens an input stream that may use compression to store
593      * the data (in <code>gzip</code> format). Checks the
594      * {@link #CONFIG_COMPRESS_GZIP} parameter (with an optional suffix) to
595      * decide whether or not to use compression.
596      * 
597      * @param out the original output stream
598      * @param config the configuration to use
599      * @param suffix an optional suffix that allows
600      * {@linkplain TiesConfiguration#adaptKey(String, String) overwriting} the
601      * general value of the configuration paramter with a more specified value
602      * @return an output stream to use for storing data
603      * @throws IOException if an I/O error has occurred
604      */
605     public static OutputStream openCompressableOutStream(final OutputStream out,
606             final TiesConfiguration config, final String suffix)
607     throws IOException {
608         // delegate reading boolean value from config
609         return openCompressableOutStream(out, config.getBoolean(
610                 config.adaptKey(CONFIG_COMPRESS_GZIP, suffix)));
611     }
612 
613     /***
614      * Opens an reader on a local file. Uses the {@link #KEY_LOCAL_CHARSET
615      * configured character set} -- if not specified, the default charset of
616      * the current platform is used instead. Compressed files are automatically
617      * decompressed (using the
618      * {@link #openCompressableInStream(InputStream)} method). Don't forget to
619      * finally {@linkplain #tryToClose(Reader) close} any reader you open!
620      *
621      * @param file the file to read
622      * @param config the configuration to use
623      * @return a reader on the local file
624      * @throws IOException if an I/O error has occurred
625      */
626     public static InputStreamReader openReader(final File file,
627             final Configuration config) throws IOException {
628         // delegate
629         return openReader(file, config.getString(KEY_LOCAL_CHARSET, null));
630     }
631 
632     /***
633      * Opens an reader on a local file, using a given charset. Compressed files
634      * are automatically decompressed (using the
635      * {@link #openCompressableInStream(InputStream)} method). Don't forget to
636      * finally {@linkplain #tryToClose(Reader) close} any reader you open!
637      *
638      * @param file the file to read
639      * @param charset the character set to use for reading the file;
640      * if <code>null</code>, the default charset of the current platform is used
641      * @return a reader on the local file
642      * @throws IOException if an I/O error has occurred
643      */
644     public static InputStreamReader openReader(final File file,
645             final String charset) throws IOException {
646         final InputStream in =
647             openCompressableInStream(new FileInputStream(file));
648         final InputStreamReader reader;
649 
650         if (charset == null) {
651             // use default charset
652             reader = new InputStreamReader(in);
653         } else {
654             // use given charset
655             reader = new InputStreamReader(in, charset);
656         }
657         return reader;
658     }
659 
660     /***
661      * Opens a reader on an input stream that uses a Unicode character set
662      * (UTF-8, UTF-16, or UTF-32) and optionally a
663      * <a href="http://www.unicode.org/unicode/faq/utf_bom.html">BOM
664      * (byte order mark)</a> to identify the used charset. UTF-8 is used if BOM
665      * is not found.
666      *
667      * <p>Adapted from the
668      * <a href="http://koti.mbnet.fi/akini/java/unicodereader/"
669      * ><code>UnicodeReader</code></a> class created by Thomas Weidenfeller and
670      * Aki Nieminen.
671      *
672      * @param in the input stream to wrap
673      * @return a reader on the stream
674      * @throws IOException if an I/O error has occurred
675      */
676     public static InputStreamReader openUnicodeReader(final InputStream in)
677     throws IOException {
678         final PushbackInputStream pushbackIn =
679             new PushbackInputStream(in, BOM_SIZE);
680 
681         final String encoding;
682         final byte[] bom = new byte[BOM_SIZE];
683         final int n, unread;
684         n = pushbackIn.read(bom, 0, bom.length);
685 
686         // check BOM to determine charset
687         if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB)
688                 && (bom[2] == (byte) 0xBF)) {
689             encoding = STANDARD_UNICODE_CHARSET; // UTF-8
690             unread = n - 3;
691         } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
692             encoding = "UTF-16BE";
693             unread = n - 2;
694         } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
695             encoding = "UTF-16LE";
696             unread = n - 2;
697         } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00)
698                 && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
699             encoding = "UTF-32BE";
700             unread = n - 4;
701         } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)
702                 && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
703             encoding = "UTF-32LE";
704             unread = n - 4;
705         } else {
706             // Unicode BOM mark not found, unread all bytes and use UTF-8
707             encoding = STANDARD_UNICODE_CHARSET; // UTF-8
708             unread = n;
709         }
710 
711         // push back any read bytes that are not part of the BOM
712         if (unread > 0) {
713             pushbackIn.unread(bom, (n - unread), unread);
714         }
715 
716         // Use determined encoding
717         return new InputStreamReader(pushbackIn, encoding);
718     }
719 
720     /***
721      * Opens a writer that uses the standard Unicode character set
722      * {@linkplain #STANDARD_UNICODE_CHARSET UTF-8}.
723      *
724      * @param out the output stream to wrap
725      * @return a writer on the stream
726      */
727     public static OutputStreamWriter openUnicodeWriter(final OutputStream out) {
728         try {
729             return new OutputStreamWriter(out, STANDARD_UNICODE_CHARSET);
730         } catch (UnsupportedEncodingException uee) {
731             // not supposed to happen as each virtual machine must support UTF-8
732             throw new RuntimeException("Unexpected error: virtual machine does "
733                     + "not support the standard charaset set"
734                     + STANDARD_HTTP_CHARSET);
735         }
736     }
737 
738     /***
739      * Opens an writer on a local file. Uses the {@link #KEY_LOCAL_CHARSET
740      * configured character set} -- if not specified, the default charset of
741      * the current platform is used instead. Don't forget to finally
742      * {@linkplain #tryToClose(Writer) close} any writer you open!
743      *
744      * @param file the file to write to
745      * @param config the configuration to use
746      * @return a writer on the local file
747      * @throws IOException if the file is a directory or for some
748      * other reason cannot be opened for writing
749      * @throws UnsupportedEncodingException if the named charset is not
750      * supported
751      */
752     public static Writer openWriter(final File file,
753             final Configuration config)
754             throws IOException, UnsupportedEncodingException {
755         // delegate
756         return openWriter(file, config.getString(KEY_LOCAL_CHARSET, null));
757     }
758 
759     /***
760      * Opens an writer on a local file, using a given charset. Don't forget to
761      * finally {@linkplain #tryToClose(Writer) close} any writer you open!
762      *
763      * @param file the file to write to
764      * @param charset the character set to use for writing the file;
765      * if <code>null</code>, the default charset of the current platform is used
766      * @return a writer on the local file
767      * @throws IOException if the file is a directory or for some
768      * other reason cannot be opened for writing
769      * @throws UnsupportedEncodingException if the named charset is not
770      * supported
771      */
772     public static Writer openWriter(final File file, final String charset)
773             throws IOException, UnsupportedEncodingException {
774         final Writer writer;
775 
776         if (charset == null) {
777             // use default charset
778             writer = new FileWriter(file);
779         } else {
780             // use given charset
781             writer =
782                 new OutputStreamWriter(new FileOutputStream(file), charset);
783         }
784         return writer;
785     }
786 
787     /***
788      * Reads the contents of a reader into a string. The reader is <em>not</em>
789      * closed by this method.
790      *
791      * @param reader the reader to use
792      * @return the contents of the reader
793      * @throws IOException if an I/O error occurs
794      */
795     public static String readToString(final Reader reader) throws IOException {
796         final StringBuilder result = new StringBuilder();
797 
798         final char[] buffer = new char[STANDARD_BLOCK_SIZE];
799         int charsRead;
800 
801         while ((charsRead = reader.read(buffer)) > -1) {
802             result.append(buffer, 0, charsRead);
803         }
804 
805         return result.toString();
806     }
807 
808     /***
809      * Reads the contents of a reader into a writer. Neither reader nor writer
810      * are closed by this method.
811      *
812      * @param reader the reader to read from
813      * @param writer the writer to write to write to
814      * @return the number of characters read
815      * @throws IOException if an I/O error occurs
816      */
817     public static int readToWriter(final Reader reader, final Writer writer)
818             throws IOException {
819         final char[] buffer = new char[STANDARD_BLOCK_SIZE];
820         int charsRead;
821         int allCharsRead = 0;
822 
823         while ((charsRead = reader.read(buffer)) > -1) {
824             writer.write(buffer, 0, charsRead);
825             allCharsRead += charsRead;
826         }
827 
828         writer.flush();
829         return allCharsRead;
830     }
831 
832     /***
833      * Reads available bytes from the input stream until the <code>bytes</code>
834      * array is full or end-of-input is reached or an end-of-line character is
835      * encountered. The last condition holds iff the last read byte is 10 or 13,
836      * i.e. '\n' or '\r' in ASCII- and UTF-8-compatible charsets).
837      *
838      * @param in the stream to read from
839      * @param bytes the array of bytes to fill
840      * @return the number of bytes read (will be > 0 and <= bytes.length).
841      * @throws IOException if an error occurs while reading from the stream
842      * or if reading times out 
843      */
844     public static int readUntilLineEnd(final InputStream in, final byte[] bytes)
845     throws IOException {
846         int bytesStored = 0;
847         int newBytesRead;
848         boolean done = false;
849 
850         do {
851             // read available bytes into array (but not more than fit)
852             newBytesRead = in.read(bytes, bytesStored,
853                     bytes.length - bytesStored);
854 
855             // check if we are done
856             if (newBytesRead < 0) {
857                 // reached end-of-input
858                 done = true;
859             } else {
860                 bytesStored += newBytesRead;
861                 // check last read byte
862                 if (bytes[bytesStored-1] == 10 || bytes[bytesStored-1] == 13) {
863                     done = true; // read '\n' or '\r'
864                 } else if (bytesStored == bytes.length) {
865                     done = true; // byte array is full
866                 }
867             }
868         } while (!done);
869 
870         return bytesStored;
871     }
872 
873     /***
874      * Converts an URI list as defined in RFC 2483 (MIME type
875      * <code>text/uri-list</code>) into an array of strings. Comment lines
876      * in the input are ignored.
877      *
878      * @param in the URI list to convert
879      * @return an array of strings containing the URIs/URLs listed in the input
880      */
881     public static String[] readURIList(final CharSequence in) {
882         final String[] lines = TextUtils.splitLines(in);
883         final List<String> uriList = new ArrayList<String>(lines.length);
884 
885         // discard comment URLs (starting with #)
886         for (int i = 0; i < lines.length; i++) {
887             if (!lines[i].startsWith("#")) {
888                 uriList.add(lines[i]);
889             }
890         }
891 
892         // convert to array
893         String[] result = new String[uriList.size()];
894         return uriList.toArray(result);
895     }
896 
897     /***
898      * Converts an URI list as defined in RFC 2483 (MIME type
899      * <code>text/uri-list</code>) into an array of strings. Comment lines
900      * in the input are ignored.
901      *
902      * @param in a reader containing the URI list to convert
903      * @return an array of strings containing the URIs/URLs listed in the input
904      * @throws IOException if an I/O error occurs
905      */
906     public static String[] readURIList(final Reader in) throws IOException {
907         final String inSequence = readToString(in);
908         return readURIList(inSequence);
909     }
910 
911     /***
912      * Resolves a filename. Relative filenames are resolved against a given
913      * parent directory, while absolute filenames are returned as is.
914      *
915      * @param directory the parent directory (only used for relative filenames)
916      * @param filename the name of the file
917      * @return an object representing the file
918      */
919     public static File resolveFilename(final File directory,
920             final String filename) {
921         final File file = new File(filename);
922         if (file.isAbsolute()) {
923             // absolute filename: return as is
924             return file;
925         } else {
926             // relative filename: resolve against parent directory
927             return new File(directory, filename);
928         }
929     }
930 
931     /***
932      * Modifies the default directory that can be given as "parent" when
933      * {@linkplain File#File(java.io.File, java.lang.String) constructing} file
934      * objects. Can be set to <code>null</code> so the "system-dependent
935      * default directory" (typically the working directory) will be used.
936      *
937      * @param directory the new directory to use
938      * @throws IllegalArgumentException if the given directory is neither
939      * <code>null</code> nor an existing directory
940      */
941     public static void setDefaultDirectory(final File directory)
942             throws IllegalArgumentException {
943         if ((directory != null) && !directory.isDirectory()) {
944             throw new IllegalArgumentException("Default directory must be "
945                 + "<null> or an existing directory, but " + directory
946                 + " is not");
947         }
948 
949         synchronized (DD_GUARD) {
950             defaultDirectory = directory;
951         }
952     }
953 
954     /***
955      * Convenience method for closing an input stream. If the specified stream
956      * is <code>null</code>, this method does nothing. Any {@link
957      * java.io.IOException} thrown during closing is swallowed by this method.
958      *
959      * @param in the input stream to close (might be <code>null</code>)
960      * @return <code>true</code> if the stream was closed successfully,
961      * <code>false</code> otherwise (the stream was <code>null</code> or an
962      * exception was thrown during closing)
963      */
964     public static boolean tryToClose(final InputStream in) {
965         boolean result = false;
966 
967         if (in != null) {
968             try {
969                 in.close();
970                 result = true;
971             } catch (IOException ioe) {
972                 Util.LOG.warn("Exception while trying to close InputStream "
973                     + in + ": " + ioe.toString());
974             }
975         }
976 
977         return result;
978     }
979 
980     /***
981      * Convenience method for closing an output stream. If the specified stream
982      * is <code>null</code>, this method does nothing. Any {@link
983      * java.io.IOException} thrown during closing is swallowed by this method.
984      *
985      * @param out the output stream to close (might be <code>null</code>)
986      * @return <code>true</code> if the stream was closed successfully,
987      * <code>false</code> otherwise (the stream was <code>null</code> or an
988      * exception was thrown during closing)
989      */
990     public static boolean tryToClose(final OutputStream out) {
991         boolean result = false;
992 
993         if (out != null) {
994             try {
995                 out.close();
996                 result = true;
997             } catch (IOException ioe) {
998                 Util.LOG.warn("Exception while trying to close OutputStream "
999                     + out + ": " + ioe.toString());
1000             }
1001         }
1002 
1003         return result;
1004     }
1005 
1006     /***
1007      * Convenience method for closing a reader. If the specified reader is
1008      * <code>null</code>, this method does nothing. Any {@link
1009      * java.io.IOException} thrown during closing is swallowed by this method.
1010      *
1011      * @param reader the reader to close (might be <code>null</code>)
1012      * @return <code>true</code> if the reader was closed successfully,
1013      * <code>false</code> otherwise (reader was <code>null</code> or an
1014      * exception was thrown during closing)
1015      */
1016     public static boolean tryToClose(final Reader reader) {
1017         boolean result = false;
1018 
1019         if (reader != null) {
1020             try {
1021                 reader.close();
1022                 result = true;
1023             } catch (IOException ioe) {
1024                 Util.LOG.warn("Exception while trying to close Reader "
1025                     + reader + ": " + ioe.toString());
1026             }
1027         }
1028 
1029         return result;
1030     }
1031 
1032     /***
1033      * Convenience method for closing a writer. If the specified writer is
1034      * <code>null</code>, this method does nothing. Any {@link
1035      * java.io.IOException} thrown during closing is swallowed by this method.
1036      *
1037      * @param writer the writer to close (might be <code>null</code>)
1038      * @return <code>true</code> if the writer was closed successfully,
1039      * <code>false</code> otherwise (writer was <code>null</code> or an
1040      * exception was thrown during closing)
1041      */
1042     public static boolean tryToClose(final Writer writer) {
1043         boolean result = false;
1044 
1045         if (writer != null) {
1046             try {
1047                 writer.close();
1048                 result = true;
1049             } catch (IOException ioe) {
1050                 Util.LOG.warn("Exception while trying to close Writer "
1051                     + writer + ": " + ioe.toString());
1052             }
1053         }
1054 
1055         return result;
1056     }
1057 
1058     /***
1059      * Returns the user's current working directory. This is just a shortcut
1060      * for calling <code>System.getProperty("user.dir")</code>.
1061      *
1062      * @return the current working directory
1063      */
1064     public static String userDir() {
1065         return System.getProperty("user.dir");
1066     }
1067 
1068     /***
1069      * Returns the user's home directory. This is just a shortcut for calling
1070      * <code>System.getProperty("user.home")</code>.
1071      *
1072      * @return the home directory
1073      */
1074     public static String userHome() {
1075         return System.getProperty("user.home");
1076     }
1077 
1078     /***
1079      * Writes a line of text to a writer, followed by a
1080      * {@linkplain TextUtils#LINE_SEPARATOR line separator}.
1081      *
1082      * @param text the text to send to the writer
1083      * @param writer the writer to write to; will neither be flushed nor closed
1084      * by this method
1085      * @throws IOException if an I/O error occurs
1086      */
1087     public static void writeLine(final String text, final Writer writer)
1088     throws IOException {
1089         writer.write(text);
1090         writer.write(TextUtils.LINE_SEPARATOR);
1091     }
1092 
1093     /***
1094      * Writes the contents of a character sequence to a writer. The writer
1095      * is flushed but not closed by this method.
1096      *
1097      * @param input the text to send to the writer
1098      * @param writer the writer to write to; flushed but not closed by this
1099      * method
1100      * @throws IOException if an I/O error occurs
1101      */
1102     public static void writeToWriter(final CharSequence input,
1103             final Writer writer) throws IOException {
1104         writer.write(input.toString());
1105         writer.flush();
1106     }
1107 
1108     /***
1109      * Private constructor prevents creation of instances.
1110      */
1111     private IOUtils() {
1112         super();
1113     }
1114 
1115 }