1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package de.fu_berlin.ties.io;
23
24 import java.io.File;
25 import java.net.URL;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import org.apache.commons.lang.StringUtils;
30 import org.apache.commons.lang.builder.ToStringBuilder;
31
32 import de.fu_berlin.ties.TiesConfiguration;
33 import de.fu_berlin.ties.util.Util;
34
35 /***
36 * A simple wrapper for the MIME type and the character set of a document.
37 *
38 * @author Christian Siefkes
39 * @version $Revision: 1.2 $, $Date: 2004/05/31 19:14:24 $, $Author: siefkes $
40 */
41 public final class ContentType {
42
43 /***
44 * Context key recommended to store the MIME type string.
45 */
46 public static final String KEY_MIME_TYPE = "MIME-Type";
47
48 /***
49 * MIME type for HTML.
50 */
51 public static final String MIME_HTML = "text/html";
52
53 /***
54 * MIME type for plain text.
55 */
56 public static final String MIME_PLAIN = "text/plain";
57
58 /***
59 * The regex pattern for parsing the "Content-Type" header field of an URL.
60 * The first capturing group captures the MIME type, the second one the
61 * character set, if specified.
62 */
63 private static final Pattern CONTENT_TYPE_PATTERN = Pattern.compile(
64 "([^//s;]+)(?:.*?;//s*charset//s*=//s*\"?([^//s;\"]*)\"?//s*;?)?",
65 Pattern.CASE_INSENSITIVE);
66
67 /***
68 * Determines the content type based on a file name.
69 *
70 * @param file the file name
71 * @param config the configuration to use
72 * @return the determined content type
73 */
74 public static ContentType determineContentType(final File file,
75 final TiesConfiguration config) {
76 final String extension = (file == null)
77 ? "" : IOUtils.getExtension(file);
78 return determineContentType(extension, null, config);
79 }
80
81 /***
82 * Helper method to determines the content type based an "Content-Type"
83 * header and/or file name extension. The "Content-Type" header is
84 * examined first, if it is specified (not null or empty). If this does
85 * not lead to a result, the extension is examined.
86 *
87 * @param extension the file name extension (might be <code>null</code>)
88 * @param contentTypeHeader the "Content-Type" header of an URL, as returned
89 * by {@link java.net.URLConnection#getContentType()} (might be
90 * <code>null</code>)
91 * @param config the configuration to use
92 * @return the determined content type
93 */
94 private static ContentType determineContentType(final String extension,
95 final String contentTypeHeader, final TiesConfiguration config) {
96 String type = null;
97 String charSet = null;
98
99
100 if (StringUtils.isNotEmpty(contentTypeHeader)) {
101 final Matcher contentTypeMatcher =
102 CONTENT_TYPE_PATTERN.matcher(contentTypeHeader);
103 if (contentTypeMatcher.find()) {
104 type = contentTypeMatcher.group(1);
105 charSet = contentTypeMatcher.group(2);
106 } else {
107
108 Util.LOG.warn("Not a valid Content-Type header: '"
109 + contentTypeHeader + "'");
110 }
111 }
112
113
114 if (type == null) {
115
116 final String normalizedExtension = (extension == null)
117 ? "" : extension.toLowerCase();
118
119 if (!"".equals(normalizedExtension)) {
120
121 type = config.getString(TiesConfiguration.joinKey("ext",
122 normalizedExtension), null);
123 }
124 }
125
126
127 if (type != null) {
128 final String mimeLookupKey =
129 TiesConfiguration.joinKey("mime", type);
130 if (config.containsKey(mimeLookupKey)) {
131 type = config.getString(mimeLookupKey);
132 }
133 }
134
135 return new ContentType(type, charSet);
136 }
137
138 /***
139 * Helper method to determines the content type based an "Content-Type"
140 * header and/or URL extension. The "Content-Type" header is
141 * examined first, if it is specified (not null or empty). If this does
142 * not lead to a result, the extension of the URL is examined.
143 *
144 * @param url the URL (might be <code>null</code>)
145 * @param contentTypeHeader the "Content-Type" header of an URL, as returned
146 * by {@link java.net.URLConnection#getContentType()} (might be
147 * <code>null</code>)
148 * @param config the configuration to use
149 * @return the determined content type
150 */
151 public static ContentType determineContentType(final URL url,
152 final String contentTypeHeader, final TiesConfiguration config) {
153 final String extension = (url == null)
154 ? "" : IOUtils.getExtension(url);
155 return determineContentType(extension, contentTypeHeader, config);
156 }
157
158
159 /***
160 * The character set.
161 */
162 private final String charset;
163
164 /***
165 * The MIME type, <code>null</code> if unknown.
166 */
167 private final String mimeType;
168
169 /***
170 * Non-public constructor for internal use. Use the factory methods instead.
171 *
172 * @param type the MIME type of this document, <code>null</code> if unknown;
173 * will be converted to lower-case
174 * @param characterSet the character set of this document,
175 * <code>null</code> if unknown
176 */
177 private ContentType(final String type, final String characterSet) {
178 super();
179 mimeType = StringUtils.lowerCase(type);
180 charset = characterSet;
181 }
182
183 /***
184 * Returns the character set.
185 *
186 * @return the character set, or <code>null</code> if unknown
187 */
188 public String getCharset() {
189 return charset;
190 }
191
192 /***
193 * Returns the MIME type.
194 *
195 * @return the MIME type, or <code>null</code> if unknown
196 */
197 public String getMimeType() {
198 return mimeType;
199 }
200
201 /***
202 * Returns a string representation of this object.
203 *
204 * @return a textual representation
205 */
206 public String toString() {
207 return new ToStringBuilder(this)
208 .append("mime type", mimeType)
209 .append("charset", charset)
210 .toString();
211 }
212
213 }