View Javadoc

1   /*
2    * Copyright (C) 2003-2004 Christian Siefkes <christian@siefkes.net>.
3    * Development of this software is supported by the German Research Society,
4    * Berlin-Brandenburg Graduate School in Distributed Information Systems
5    * (DFG grant no. GRK 316).
6    *
7    * This library is free software; you can redistribute it and/or
8    * modify it under the terms of the GNU Lesser General Public
9    * License as published by the Free Software Foundation; either
10   * version 2.1 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, visit
19   * http://www.gnu.org/licenses/lgpl.html or write to the Free Software
20   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
21   */
22  package de.fu_berlin.ties.io;
23  
24  import java.io.File;
25  import java.net.URL;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  
29  import org.apache.commons.lang.StringUtils;
30  import org.apache.commons.lang.builder.ToStringBuilder;
31  
32  import de.fu_berlin.ties.TiesConfiguration;
33  import de.fu_berlin.ties.util.Util;
34  
35  /***
36   * A simple wrapper for the MIME type and the character set of a document.
37   *
38   * @author Christian Siefkes
39   * @version $Revision: 1.2 $, $Date: 2004/05/31 19:14:24 $, $Author: siefkes $
40   */
41  public final class ContentType {
42  
43      /***
44       * Context key recommended to store the MIME type string.
45       */
46      public static final String KEY_MIME_TYPE = "MIME-Type";
47  
48      /***
49       * MIME type for HTML.
50       */
51      public static final String MIME_HTML = "text/html";
52  
53      /***
54       * MIME type for plain text.
55       */
56      public static final String MIME_PLAIN = "text/plain";
57  
58      /***
59       * The regex pattern for parsing the "Content-Type" header field of an URL.
60       * The first capturing group captures the MIME type, the second one the
61       * character set, if specified.
62       */
63      private static final Pattern CONTENT_TYPE_PATTERN = Pattern.compile(
64          "([^//s;]+)(?:.*?;//s*charset//s*=//s*\"?([^//s;\"]*)\"?//s*;?)?",
65          Pattern.CASE_INSENSITIVE);
66  
67      /***
68       * Determines the content type based on a file name.
69       *
70       * @param file the file name
71       * @param config the configuration to use
72       * @return the determined content type
73       */
74      public static ContentType determineContentType(final File file,
75              final TiesConfiguration config) {
76          final String extension = (file == null)
77              ? "" : IOUtils.getExtension(file);
78          return determineContentType(extension, null, config);
79      }
80  
81      /***
82       * Helper method to determines the content type based an "Content-Type"
83       * header and/or file name extension. The "Content-Type" header is
84       * examined first, if it is specified (not null or empty). If this does
85       * not lead to a result, the extension is examined.
86       *
87       * @param extension the file name extension (might be <code>null</code>)
88       * @param contentTypeHeader the "Content-Type" header of an URL, as returned
89       * by {@link java.net.URLConnection#getContentType()} (might be
90       * <code>null</code>)
91       * @param config the configuration to use
92       * @return the determined content type
93       */
94      private static ContentType determineContentType(final String extension,
95              final String contentTypeHeader, final TiesConfiguration config) {
96          String type = null;
97          String charSet = null;
98  
99          // parse Content-Type header, if given
100         if (StringUtils.isNotEmpty(contentTypeHeader)) {
101             final Matcher contentTypeMatcher =
102                 CONTENT_TYPE_PATTERN.matcher(contentTypeHeader);
103             if (contentTypeMatcher.find()) {
104                 type = contentTypeMatcher.group(1);
105                 charSet = contentTypeMatcher.group(2);
106             } else {
107                 // otherwise it's not a valid MIME type
108                 Util.LOG.warn("Not a valid Content-Type header: '"
109                         + contentTypeHeader + "'");
110             }
111         }
112 
113         // check extension if no valid Content-Type header given
114         if (type == null) {
115             // convert extension to lower case
116             final String normalizedExtension = (extension == null)
117                 ? "" : extension.toLowerCase();
118 
119             if (!"".equals(normalizedExtension)) {
120                 // check config for extension, using null as default value
121                 type = config.getString(TiesConfiguration.joinKey("ext",
122                                 normalizedExtension), null);
123             }
124         }
125 
126         // check config to replace alternative MIME type by main one
127         if (type != null) {
128             final String mimeLookupKey =
129                 TiesConfiguration.joinKey("mime", type);
130             if (config.containsKey(mimeLookupKey)) {
131                 type = config.getString(mimeLookupKey);
132             }
133         }
134 
135         return new ContentType(type, charSet);
136     }
137 
138     /***
139      * Helper method to determines the content type based an "Content-Type"
140      * header and/or URL extension. The "Content-Type" header is
141      * examined first, if it is specified (not null or empty). If this does
142      * not lead to a result, the extension of the URL is examined.
143      *
144      * @param url the URL (might be <code>null</code>)
145      * @param contentTypeHeader the "Content-Type" header of an URL, as returned
146      * by {@link java.net.URLConnection#getContentType()} (might be
147      * <code>null</code>)
148      * @param config the configuration to use
149      * @return the determined content type
150      */
151     public static ContentType determineContentType(final URL url,
152             final String contentTypeHeader, final TiesConfiguration config) {
153         final String extension = (url == null)
154             ? "" : IOUtils.getExtension(url);
155         return determineContentType(extension, contentTypeHeader, config);
156     }
157 
158 
159     /***
160      * The character set.
161      */
162     private final String charset;
163 
164     /***
165      * The MIME type, <code>null</code> if unknown.
166      */
167     private final String mimeType;
168 
169     /***
170      * Non-public constructor for internal use. Use the factory methods instead.
171      *
172      * @param type the MIME type of this document, <code>null</code> if unknown;
173      * will be converted to lower-case
174      * @param characterSet the character set of this document,
175      * <code>null</code> if unknown
176      */
177     private ContentType(final String type, final String characterSet) {
178         super();
179         mimeType = StringUtils.lowerCase(type);
180         charset = characterSet;
181     }
182 
183     /***
184      * Returns the character set.
185      *
186      * @return the character set, or <code>null</code> if unknown
187      */
188     public String getCharset() {
189         return charset;
190     }
191 
192     /***
193      * Returns the MIME type.
194      *
195      * @return the MIME type, or <code>null</code> if unknown
196      */
197     public String getMimeType() {
198         return mimeType;
199     }
200 
201     /***
202      * Returns a string representation of this object.
203      *
204      * @return a textual representation
205      */
206     public String toString() {
207         return new ToStringBuilder(this)
208             .append("mime type", mimeType)
209             .append("charset", charset)
210             .toString();
211     }
212 
213 }