Skip to content

Commit

Permalink
Merge pull request #463 from metafacture/extendHttpOpener
Browse files Browse the repository at this point in the history
Extend `HttpOpener`. (#460)
  • Loading branch information
blackwinter authored Sep 8, 2022
2 parents d057f20 + b672731 commit b142587
Show file tree
Hide file tree
Showing 3 changed files with 541 additions and 31 deletions.
2 changes: 2 additions & 0 deletions metafacture-io/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ dependencies {
implementation 'commons-io:commons-io:2.5'
implementation 'org.apache.commons:commons-compress:1.21'
runtimeOnly 'org.tukaani:xz:1.6'
testImplementation 'com.github.tomakehurst:wiremock-jre8:2.33.2'
testImplementation 'junit:junit:4.12'
testImplementation 'org.mockito:mockito-core:2.5.5'
testImplementation 'org.assertj:assertj-core:3.11.1'
testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21'
}
241 changes: 210 additions & 31 deletions metafacture-io/src/main/java/org/metafacture/io/HttpOpener.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
* Copyright 2013, 2022 Deutsche Nationalbibliothek et al
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
Expand All @@ -24,63 +24,150 @@
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.SequenceInputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

/**
* Opens a {@link URLConnection} and passes a reader to the receiver.
* Opens an {@link HttpURLConnection} and passes a reader to the receiver.
*
* @author Christoph Böhme
* @author Jan Schnasse
* @author Jens Wille
*/
@Description("Opens an HTTP resource. Supports the setting of `Accept` and `Accept-Charset` as HTTP header fields, as well as generic headers (separated by `\\n`).")
@Description("Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset` and `Content-Type`, as well as generic headers (separated by `\\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header = `*/*`, `Accept-Charset` header (`encoding`) = `UTF-8`, `errorPrefix` = `ERROR: `.")
@In(String.class)
@Out(Reader.class)
@FluxCommand("open-http")
public final class HttpOpener extends DefaultObjectPipe<String, ObjectReceiver<Reader>> {

private static final Pattern HEADER_FIELD_SEPARATOR = Pattern.compile("\n");
private static final Pattern HEADER_VALUE_SEPARATOR = Pattern.compile(":");
public static final String ACCEPT_DEFAULT = "*/*";
public static final String ACCEPT_HEADER = "accept";
public static final String CONTENT_TYPE_HEADER = "content-type";
public static final String DEFAULT_PREFIX = "ERROR: ";
public static final String ENCODING_DEFAULT = "UTF-8";
public static final String ENCODING_HEADER = "accept-charset";
public static final String INPUT_DESIGNATOR = "@-";

private static final String ACCEPT_HEADER = "accept";
private static final String ENCODING_HEADER = "accept-charset";
public static final String DEFAULT_METHOD_NAME = "GET";
public static final Method DEFAULT_METHOD = Method.valueOf(DEFAULT_METHOD_NAME);

private static final String ACCEPT_DEFAULT = "*/*";
private static final String ENCODING_DEFAULT = "UTF-8";
public static final String HEADER_FIELD_SEPARATOR = "\n";
public static final String HEADER_VALUE_SEPARATOR = ":";

private static final Pattern HEADER_FIELD_SEPARATOR_PATTERN = Pattern.compile(HEADER_FIELD_SEPARATOR);
private static final Pattern HEADER_VALUE_SEPARATOR_PATTERN = Pattern.compile(HEADER_VALUE_SEPARATOR);

private final Map<String, String> headers = new HashMap<>();

private Method method;
private String body;
private String errorPrefix;
private String url;
private boolean inputUsed;

public enum Method {

DELETE(false, true),
GET(false, true),
HEAD(false, false),
OPTIONS(false, true),
POST(true, true),
PUT(true, true),
TRACE(false, true);

private final boolean requestHasBody;
private final boolean responseHasBody;

Method(final boolean requestHasBody, final boolean responseHasBody) {
this.requestHasBody = requestHasBody;
this.responseHasBody = responseHasBody;
}

/**
* Checks whether the request method accepts a request body.
*
* @return true if the request method accepts a request body
*/
public boolean getRequestHasBody() {
return requestHasBody;
}

/**
* Checks whether the request method returns a response body.
*
* @return true if the request method returns a response body
*/
public boolean getResponseHasBody() {
return responseHasBody;
}

}

/**
* Creates an instance of {@link HttpOpener}.
*/
public HttpOpener() {
setAccept(ACCEPT_DEFAULT);
setEncoding(ENCODING_DEFAULT);
setErrorPrefix(DEFAULT_PREFIX);
setMethod(DEFAULT_METHOD);
setUrl(INPUT_DESIGNATOR);
}

/**
* Sets the HTTP accept header value. This is a mime-type such as text/plain
* or text/html. The default value of the accept is *&#47;* which means
* any mime-type.
* Sets the HTTP {@value ACCEPT_HEADER} header value. This is a MIME type
* such as {@code text/plain} or {@code application/json}. The default
* value for the accept header is {@value ACCEPT_DEFAULT} which means
* any MIME type.
*
* @param accept mime-type to use for the HTTP accept header
* @param accept MIME type to use for the HTTP accept header
*/
public void setAccept(final String accept) {
setHeader(ACCEPT_HEADER, accept);
}

/**
* Sets the preferred encoding of the HTTP response. This value is in the
* accept-charset header. Additonally, the encoding is used for reading the
* HTTP resonse if it does not specify an encoding. The default value for
* the encoding is UTF-8.
* Sets the HTTP request body. The default value for the request body is
* {@value INPUT_DESIGNATOR} <i>if the {@link #setMethod(Method) request
* method} accepts a request body</i>, which means it will use the {@link
* #process(String) input data} data as request body <i>if the input has
* not already been used</i>; otherwise, no request body will be set by
* default.
*
* <p>If a request body has been set, but the request method does not
* accept a body, the method <i>may</i> be changed to {@code POST}.
*
* @param body the request body
*/
public void setBody(final String body) {
this.body = body;
}

/**
* Sets the HTTP {@value CONTENT_TYPE_HEADER} header value. This is a
* MIME type such as {@code text/plain} or {@code application/json}.
*
* @param contentType MIME type to use for the HTTP content-type header
*/
public void setContentType(final String contentType) {
setHeader(CONTENT_TYPE_HEADER, contentType);
}

/**
* Sets the HTTP {@value ENCODING_HEADER} header value. This is the
* preferred encoding for the HTTP response. Additionally, the encoding
* is used for reading the HTTP response if it does not specify a content
* encoding. The default for the encoding is {@value ENCODING_DEFAULT}.
*
* @param encoding name of the encoding used for the accept-charset HTTP
* header
Expand All @@ -90,14 +177,28 @@ public void setEncoding(final String encoding) {
}

/**
* Sets a request property, or multiple request properties separated by
* {@code \n}.
* Sets the error prefix. The default error prefix is
* {@value DEFAULT_PREFIX}.
*
* @param errorPrefix the error prefix
*/
public void setErrorPrefix(final String errorPrefix) {
this.errorPrefix = errorPrefix;
}

/**
* Sets a request property (header), or multiple request properties
* separated by {@value HEADER_FIELD_SEPARATOR}. Header name and value
* are separated by {@value HEADER_VALUE_SEPARATOR}. The header name is
* case-insensitive.
*
* @param header request property line
*
* @see #setHeader(String, String)
*/
public void setHeader(final String header) {
Arrays.stream(HEADER_FIELD_SEPARATOR.split(header)).forEach(h -> {
final String[] parts = HEADER_VALUE_SEPARATOR.split(h, 2);
Arrays.stream(HEADER_FIELD_SEPARATOR_PATTERN.split(header)).forEach(h -> {
final String[] parts = HEADER_VALUE_SEPARATOR_PATTERN.split(h, 2);
if (parts.length == 2) {
setHeader(parts[0], parts[1].trim());
}
Expand All @@ -108,7 +209,7 @@ public void setHeader(final String header) {
}

/**
* Sets a request property.
* Sets a request property (header). The header name is case-insensitive.
*
* @param key request property key
* @param value request property value
Expand All @@ -117,21 +218,99 @@ public void setHeader(final String key, final String value) {
headers.put(key.toLowerCase(), value);
}

/**
* Sets the HTTP request method. The default request method is
* {@value DEFAULT_METHOD_NAME}.
*
* @param method the request method
*/
public void setMethod(final Method method) {
this.method = method;
}

/**
* Sets the HTTP request URL. The default value for the request URL is
* {@value INPUT_DESIGNATOR}, which means it will use the {@link
* #process(String) input data} as request URL.
*
* @param url the request URL
*/
public void setUrl(final String url) {
this.url = url;
}

@Override
public void process(final String urlStr) {
public void process(final String input) {
try {
final URL url = new URL(urlStr);
final URLConnection con = url.openConnection();
headers.forEach(con::addRequestProperty);
String enc = con.getContentEncoding();
if (enc == null) {
enc = headers.get(ENCODING_HEADER);
final String requestUrl = getInput(input, url);
final String requestBody = getInput(input,
body == null && method.getRequestHasBody() ? INPUT_DESIGNATOR : body);

final HttpURLConnection connection =
(HttpURLConnection) new URL(requestUrl).openConnection();

connection.setRequestMethod(method.name());
headers.forEach(connection::addRequestProperty);

if (requestBody != null) {
connection.setDoOutput(true);
connection.getOutputStream().write(requestBody.getBytes());
}
getReceiver().process(new InputStreamReader(con.getInputStream(), enc));

final InputStream inputStream = getInputStream(connection);
final String contentEncoding = getEncoding(connection.getContentEncoding());

getReceiver().process(new InputStreamReader(inputStream, contentEncoding));
}
catch (final IOException e) {
throw new MetafactureException(e);
}
}

private String getInput(final String input, final String value) {
final String result;

if (!INPUT_DESIGNATOR.equals(value)) {
result = value;
}
else if (inputUsed) {
result = null;
}
else {
inputUsed = true;
result = input;
}

return result;
}

private InputStream getInputStream(final HttpURLConnection connection) throws IOException {
try {
return connection.getInputStream();
}
catch (final IOException e) {
final InputStream errorStream = connection.getErrorStream();
if (errorStream != null) {
return getErrorStream(errorStream);
}
else {
throw e;
}
}
}

private InputStream getErrorStream(final InputStream errorStream) {
if (errorPrefix != null) {
final InputStream errorPrefixStream = new ByteArrayInputStream(errorPrefix.getBytes());
return new SequenceInputStream(errorPrefixStream, errorStream);
}
else {
return errorStream;
}
}

private String getEncoding(final String contentEncoding) {
return contentEncoding != null ? contentEncoding : headers.get(ENCODING_HEADER);
}

}
Loading

0 comments on commit b142587

Please sign in to comment.