Skip to content

Commit ba6e8a9

Browse files
HTTPCLIENT-2159: Fix handling of charset in ContentType for specific media types (#483)
* Updated ContentType to ensure that no charset is included for media types like application/octet-stream, multipart/form-data, and image/*, which do not require a charset as per the RFC. * Refactored the toString() method to properly handle the omission of charset for these media types. * Adjusted the creation methods to better handle implicit charsets and added validation for reserved characters in MIME types.
1 parent 5e5eb0b commit ba6e8a9

File tree

2 files changed

+269
-40
lines changed

2 files changed

+269
-40
lines changed

httpcore5/src/main/java/org/apache/hc/core5/http/ContentType.java

Lines changed: 152 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
import org.apache.hc.core5.annotation.Contract;
4343
import org.apache.hc.core5.annotation.ThreadingBehavior;
44+
import org.apache.hc.core5.http.message.BasicHeaderValueFormatter;
4445
import org.apache.hc.core5.http.message.BasicNameValuePair;
4546
import org.apache.hc.core5.http.message.MessageSupport;
4647
import org.apache.hc.core5.http.message.ParserCursor;
@@ -67,105 +68,125 @@ public final class ContentType implements Serializable {
6768
*/
6869
private static final String CHARSET = "charset";
6970

71+
/**
72+
* Flag indicating whether the charset is implicit.
73+
* <p>
74+
* When {@code implicitCharset} is {@code true}, the charset will not be explicitly
75+
* included in the string representation of this {@link ContentType} (i.e., in the {@code toString} method),
76+
* unless it is required for the given MIME type.
77+
* If {@code implicitCharset} is {@code false}, the charset will always be included in the string representation,
78+
* unless the MIME type explicitly disallows charset parameters (e.g., certain binary or multipart types).
79+
* </p>
80+
* <p>
81+
* This flag is essential for proper handling of content types where the charset is either implied by the specification
82+
* (e.g., JSON is always UTF-8) or where including a charset is not meaningful (e.g., binary types like
83+
* {@code application/octet-stream}).
84+
* </p>
85+
*
86+
* @since 5.5
87+
*/
88+
private final boolean implicitCharset;
89+
90+
7091
// constants
7192
public static final ContentType APPLICATION_ATOM_XML = create(
72-
"application/atom+xml", StandardCharsets.UTF_8);
93+
"application/atom+xml", StandardCharsets.UTF_8, false);
7394
public static final ContentType APPLICATION_FORM_URLENCODED = create(
74-
"application/x-www-form-urlencoded", StandardCharsets.ISO_8859_1);
95+
"application/x-www-form-urlencoded", StandardCharsets.ISO_8859_1, true);
7596
public static final ContentType APPLICATION_JSON = create(
76-
"application/json", StandardCharsets.UTF_8);
97+
"application/json", StandardCharsets.UTF_8, true);
7798

7899
/**
79100
* Public constant media type for {@code application/x-ndjson}.
80101
* @since 5.1
81102
*/
82103
public static final ContentType APPLICATION_NDJSON = create(
83-
"application/x-ndjson", StandardCharsets.UTF_8);
104+
"application/x-ndjson", StandardCharsets.UTF_8, true);
84105

85106
public static final ContentType APPLICATION_OCTET_STREAM = create(
86-
"application/octet-stream", (Charset) null);
107+
"application/octet-stream", (Charset) null, true);
87108
/**
88109
* Public constant media type for {@code application/pdf}.
89110
* @since 5.1
90111
*/
91112
public static final ContentType APPLICATION_PDF = create(
92-
"application/pdf", StandardCharsets.UTF_8);
113+
"application/pdf", (Charset) null, true);
93114

94115
public static final ContentType APPLICATION_SOAP_XML = create(
95-
"application/soap+xml", StandardCharsets.UTF_8);
116+
"application/soap+xml", StandardCharsets.UTF_8, false);
96117
public static final ContentType APPLICATION_SVG_XML = create(
97-
"application/svg+xml", StandardCharsets.UTF_8);
118+
"application/svg+xml", StandardCharsets.UTF_8, false);
98119
public static final ContentType APPLICATION_XHTML_XML = create(
99-
"application/xhtml+xml", StandardCharsets.UTF_8);
120+
"application/xhtml+xml", StandardCharsets.UTF_8, false);
100121
public static final ContentType APPLICATION_XML = create(
101-
"application/xml", StandardCharsets.UTF_8);
122+
"application/xml", StandardCharsets.UTF_8, false);
102123
/**
103124
* Public constant media type for {@code application/problem+json}.
104125
* @see <a href="https://tools.ietf.org/html/rfc7807#section-6.1">Problem Details for HTTP APIs, 6.1. application/problem+json</a>
105126
* @since 5.1
106127
*/
107128
public static final ContentType APPLICATION_PROBLEM_JSON = create(
108-
"application/problem+json", StandardCharsets.UTF_8);
129+
"application/problem+json", StandardCharsets.UTF_8, true);
109130
/**
110131
* Public constant media type for {@code application/problem+xml}.
111132
* @see <a href="https://tools.ietf.org/html/rfc7807#section-6.2">Problem Details for HTTP APIs, 6.2. application/problem+xml</a>
112133
* @since 5.1
113134
*/
114135
public static final ContentType APPLICATION_PROBLEM_XML = create(
115-
"application/problem+xml", StandardCharsets.UTF_8);
136+
"application/problem+xml", StandardCharsets.UTF_8, false);
116137

117138
/**
118139
* Public constant media type for {@code application/rss+xml}.
119140
* @since 5.1
120141
*/
121142
public static final ContentType APPLICATION_RSS_XML = create(
122-
"application/rss+xml", StandardCharsets.UTF_8);
143+
"application/rss+xml", StandardCharsets.UTF_8, false);
123144

124145
public static final ContentType IMAGE_BMP = create(
125-
"image/bmp");
146+
"image/bmp", (Charset) null, true);
126147
public static final ContentType IMAGE_GIF = create(
127-
"image/gif");
148+
"image/gif", (Charset) null, true);
128149
public static final ContentType IMAGE_JPEG = create(
129-
"image/jpeg");
150+
"image/jpeg", (Charset) null, true);
130151
public static final ContentType IMAGE_PNG = create(
131-
"image/png");
152+
"image/png", (Charset) null, true);
132153
public static final ContentType IMAGE_SVG = create(
133-
"image/svg+xml");
154+
"image/svg+xml", (Charset) null, false);
134155
public static final ContentType IMAGE_TIFF = create(
135-
"image/tiff");
156+
"image/tiff", (Charset) null, true);
136157
public static final ContentType IMAGE_WEBP = create(
137-
"image/webp");
158+
"image/webp", (Charset) null, true);
138159
public static final ContentType MULTIPART_FORM_DATA = create(
139-
"multipart/form-data", StandardCharsets.ISO_8859_1);
160+
"multipart/form-data", StandardCharsets.ISO_8859_1, true);
140161

141162
/**
142163
* Public constant media type for {@code multipart/mixed}.
143164
* @since 5.1
144165
*/
145166
public static final ContentType MULTIPART_MIXED = create(
146-
"multipart/mixed", StandardCharsets.ISO_8859_1);
167+
"multipart/mixed", StandardCharsets.ISO_8859_1, true);
147168

148169
/**
149170
* Public constant media type for {@code multipart/related}.
150171
* @since 5.1
151172
*/
152173
public static final ContentType MULTIPART_RELATED = create(
153-
"multipart/related", StandardCharsets.ISO_8859_1);
174+
"multipart/related", StandardCharsets.ISO_8859_1, true);
154175

155176
public static final ContentType TEXT_HTML = create(
156-
"text/html", StandardCharsets.UTF_8);
177+
"text/html", StandardCharsets.UTF_8, true);
157178

158179
/**
159180
* Public constant media type for {@code text/markdown}.
160181
* @since 5.1
161182
*/
162183
public static final ContentType TEXT_MARKDOWN = create(
163-
"text/markdown", StandardCharsets.UTF_8);
184+
"text/markdown", StandardCharsets.UTF_8, false);
164185

165186
public static final ContentType TEXT_PLAIN = create(
166-
"text/plain", StandardCharsets.UTF_8);
187+
"text/plain", StandardCharsets.UTF_8, false);
167188
public static final ContentType TEXT_XML = create(
168-
"text/xml", StandardCharsets.UTF_8);
189+
"text/xml", StandardCharsets.UTF_8, false);
169190
/**
170191
* Public constant media type for {@code text/event-stream}.
171192
* @see <a href="https://www.w3.org/TR/eventsource/">Server-Sent Events W3C recommendation</a>
@@ -175,7 +196,7 @@ public final class ContentType implements Serializable {
175196
"text/event-stream", StandardCharsets.UTF_8);
176197

177198
public static final ContentType WILDCARD = create(
178-
"*/*", (Charset) null);
199+
"*/*", (Charset) null, true);
179200

180201
/**
181202
* An empty immutable {@code NameValuePair} array.
@@ -225,18 +246,42 @@ public final class ContentType implements Serializable {
225246
ContentType(
226247
final String mimeType,
227248
final Charset charset) {
228-
this.mimeType = mimeType;
229-
this.charset = charset;
230-
this.params = null;
249+
this (mimeType,charset,null, false);
231250
}
232251

233252
ContentType(
234253
final String mimeType,
235254
final Charset charset,
236255
final NameValuePair[] params) {
256+
257+
this (mimeType,charset,params, false);
258+
}
259+
260+
/**
261+
* Constructs a new instance of {@link ContentType} with the given MIME type, charset, parameters,
262+
* and an implicit charset flag.
263+
* <p>
264+
* If {@code implicitCharset} is set to {@code true}, the charset will not be explicitly
265+
* included in the string representation of this content type (i.e., the {@code toString} method)
266+
* unless it is required for the given MIME type.
267+
* If {@code implicitCharset} is {@code false}, the charset will always be included in the
268+
* string representation unless the MIME type is one of those that should not include a charset.
269+
* </p>
270+
*
271+
* @param mimeType the MIME type. It must not be {@code null} or empty and must not contain
272+
* reserved characters such as {@code <">, <;>, <,>}.
273+
* @param charset the character set for this content type. This can be {@code null}.
274+
* @param params optional parameters for this content type. If {@code null}, no additional
275+
* parameters will be included.
276+
* @param implicitCharset whether the charset is implicit. If {@code true}, the charset is not
277+
* included in the {@code toString} output unless required.
278+
* @since 5.5
279+
*/
280+
ContentType(final String mimeType, final Charset charset, final NameValuePair[] params, final boolean implicitCharset) {
237281
this.mimeType = mimeType;
238282
this.charset = charset;
239-
this.params = params;
283+
this.implicitCharset = implicitCharset;
284+
this.params = params != null ? params.clone() : null;
240285
}
241286

242287
public String getMimeType() {
@@ -288,8 +333,21 @@ public String toString() {
288333
buf.append(this.mimeType);
289334
if (this.params != null) {
290335
buf.append("; ");
291-
MessageSupport.formatParameters(buf, this.params);
292-
} else if (this.charset != null) {
336+
boolean first = true;
337+
for (int i = 0; i < params.length; i++) {
338+
final NameValuePair param = params[i];
339+
if (!first) {
340+
buf.append("; ");
341+
}
342+
if (param.getName().equalsIgnoreCase("charset") && implicitCharset) {
343+
continue;
344+
}
345+
BasicHeaderValueFormatter.INSTANCE.formatNameValuePair(buf, param, false);
346+
first = false;
347+
}
348+
349+
} else if (this.charset != null && !implicitCharset) {
350+
// Append charset only if it's not one of the types that shouldn't have charset
293351
buf.append("; charset=");
294352
buf.append(this.charset.name());
295353
}
@@ -306,6 +364,58 @@ private static boolean valid(final String s) {
306364
return true;
307365
}
308366

367+
368+
/**
369+
* Creates a new instance of {@link ContentType} with the given MIME type, charset,
370+
* and an implicit charset flag.
371+
* <p>
372+
* This method allows specifying whether the charset should be implicit or explicit.
373+
* If {@code implicitCharset} is set to {@code true}, the charset will not be explicitly
374+
* included in the string representation of this content type (i.e., the {@code toString} method),
375+
* unless it is required for the given MIME type. If {@code implicitCharset} is {@code false},
376+
* the charset will always be included unless the MIME type does not allow a charset.
377+
* </p>
378+
*
379+
* @param mimeType the MIME type. It must not be {@code null} or empty and must not contain
380+
* reserved characters such as {@code <">, <;>, <,>}.
381+
* @param charset the character set for this content type. This can be {@code null}.
382+
* @param implicitCharset whether the charset is implicit. If {@code true}, the charset is
383+
* not included in the {@code toString} output unless required.
384+
* @return a new instance of {@link ContentType}.
385+
* @throws IllegalArgumentException if the MIME type is invalid or contains reserved characters.
386+
* @since 5.5
387+
*/
388+
public static ContentType create(final String mimeType, final Charset charset, final boolean implicitCharset) {
389+
final String normalizedMimeType = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type"));
390+
Args.check(valid(normalizedMimeType), "MIME type may not contain reserved characters");
391+
return new ContentType(normalizedMimeType, charset, null, implicitCharset);
392+
}
393+
394+
/**
395+
* Creates a new instance of {@link ContentType} with the given MIME type, parameters,
396+
* and an implicit charset flag.
397+
* <p>
398+
* This method allows specifying additional parameters for the content type and whether
399+
* the charset should be implicit or explicit. If {@code implicitCharset} is {@code true},
400+
* the charset will not be included in the string representation unless required.
401+
* </p>
402+
*
403+
* @param mimeType the MIME type. It must not be {@code null} or empty and must not contain
404+
* reserved characters such as {@code <">, <;>, <,>}.
405+
* @param implicitCharset whether the charset is implicit. If {@code true}, the charset is
406+
* not included in the {@code toString} output unless required.
407+
* @param params optional parameters for the content type. Can be {@code null}.
408+
* @return a new instance of {@link ContentType}.
409+
* @throws IllegalArgumentException if the MIME type is invalid or contains reserved characters.
410+
* @throws UnsupportedCharsetException if the charset provided in the parameters is not supported.
411+
* @since 5.5
412+
*/
413+
public static ContentType create(final String mimeType, final boolean implicitCharset, final NameValuePair... params) throws UnsupportedCharsetException {
414+
final String type = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type"));
415+
Args.check(valid(type), "MIME type may not contain reserved characters");
416+
return create(mimeType, params != null ? params.clone() : null, implicitCharset);
417+
}
418+
309419
/**
310420
* Creates a new instance of {@link ContentType}.
311421
*
@@ -315,9 +425,7 @@ private static boolean valid(final String s) {
315425
* @return content type
316426
*/
317427
public static ContentType create(final String mimeType, final Charset charset) {
318-
final String normalizedMimeType = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type"));
319-
Args.check(valid(normalizedMimeType), "MIME type may not contain reserved characters");
320-
return new ContentType(normalizedMimeType, charset);
428+
return create(mimeType, charset, false);
321429
}
322430

323431
/**
@@ -356,6 +464,10 @@ private static ContentType create(final HeaderElement helem, final boolean stric
356464
}
357465

358466
private static ContentType create(final String mimeType, final NameValuePair[] params, final boolean strict) {
467+
return create(mimeType, params != null ? params.clone() : null, strict, false);
468+
}
469+
470+
private static ContentType create(final String mimeType, final NameValuePair[] params, final boolean strict, final boolean implicitCharset) {
359471
Charset charset = null;
360472
if (params != null) {
361473
for (final NameValuePair param : params) {
@@ -374,7 +486,7 @@ private static ContentType create(final String mimeType, final NameValuePair[] p
374486
}
375487
}
376488
}
377-
return new ContentType(mimeType, charset, params != null && params.length > 0 ? params : null);
489+
return new ContentType(mimeType, charset, params != null && params.length > 0 ? params : null, implicitCharset);
378490
}
379491

380492
/**
@@ -517,11 +629,11 @@ public ContentType withParameters(
517629
for (final Map.Entry<String, String> entry: paramMap.entrySet()) {
518630
newParams.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
519631
}
520-
return create(this.getMimeType(), newParams.toArray(EMPTY_NAME_VALUE_PAIR_ARRAY), true);
632+
return create(this.getMimeType(), newParams.toArray(EMPTY_NAME_VALUE_PAIR_ARRAY), true, this.implicitCharset);
521633
}
522634

523635
public boolean isSameMimeType(final ContentType contentType) {
524636
return contentType != null && mimeType.equalsIgnoreCase(contentType.getMimeType());
525637
}
526638

527-
}
639+
}

0 commit comments

Comments
 (0)