From 80ae128f358d9bb3d06fc0cdf9ceab907d29d49a Mon Sep 17 00:00:00 2001 From: Leandro Doctors Date: Thu, 30 Jan 2025 22:11:48 +0100 Subject: [PATCH] Implement Binary Upload (Fixes #2126) --- .../write-binary-content-via-raw-data.sh | 54 +++++++++ .github/workflows/build.yml | 3 + .../rest-api/src/blaze/rest_api/routes.clj | 12 +- .../src/blaze/middleware/fhir/resource.clj | 55 ++++++++- .../blaze/middleware/fhir/resource_test.clj | 111 ++++++++++++++---- 5 files changed, 205 insertions(+), 30 deletions(-) create mode 100755 .github/scripts/write-binary-content-via-raw-data.sh diff --git a/.github/scripts/write-binary-content-via-raw-data.sh b/.github/scripts/write-binary-content-via-raw-data.sh new file mode 100755 index 000000000..837bf63e6 --- /dev/null +++ b/.github/scripts/write-binary-content-via-raw-data.sh @@ -0,0 +1,54 @@ +#!/bin/bash -e + +# This script creates a large binary resource (8 MiB) and verifies that its binary content +# * can be read correctly via direct binary upload, and that +# * it's properly base64-encoded. + +BASE="http://localhost:8080/fhir" + +# Create temporary files for original and downloaded data +TEMP_ORIGINAL=$(mktemp) +TEMP_DOWNLOAD=$(mktemp) +TEMP_JSON=$(mktemp) + +# Ensure cleanup of temporary files +trap 'rm -f "$TEMP_ORIGINAL" "$TEMP_DOWNLOAD" "$TEMP_JSON"' EXIT + +echo "Testing direct binary upload and download..." + +# Generate 8 MiB of random binary data +dd if=/dev/urandom bs=8388608 count=1 2>/dev/null > "$TEMP_ORIGINAL" + +# Create Binary resource via direct binary upload +ID=$(curl -s \ + -H 'Content-Type: application/octet-stream' \ + --data-binary "@$TEMP_ORIGINAL" \ + "$BASE/Binary" | \ + jq -r '.id') + +echo "Created Binary resource via direct binary upload with ID: $ID" + +# Download as JSON format to verify base64 encoding +curl -s \ + -H 'Accept: application/fhir+json' \ + "$BASE/Binary/$ID" > "$TEMP_JSON" + +# Extract the base64 content and decode it +jq -r '.data' "$TEMP_JSON" | base64 -d > "$TEMP_DOWNLOAD" + +# Compare files directly +if [ -n "$ID" ] && cmp -s "$TEMP_ORIGINAL" "$TEMP_DOWNLOAD"; then + echo "✅ Direct Binary: Successfully verified 8 MiB binary content integrity and base64 encoding" +else + echo "🆘 Direct Binary: Content verification failed" + echo "Server response (JSON):" + cat "$TEMP_JSON" + echo "Original size: $(wc -c < "$TEMP_ORIGINAL") bytes" + echo "Downloaded size: $(wc -c < "$TEMP_DOWNLOAD") bytes" + # Show first few bytes of both files in hex for debugging + echo "First 32 bytes of original:" + hexdump -C "$TEMP_ORIGINAL" | head -n 2 + echo "First 32 bytes of downloaded:" + hexdump -C "$TEMP_DOWNLOAD" | head -n 2 + exit 1 +fi diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e8716345c..7b5bb4509 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1074,6 +1074,9 @@ jobs: - name: Binary Content Download - found (via XML) run: .github/scripts/read-binary-content-via-xml-found.sh + - name: Binary Content Upload (via raw data) + run: .github/scripts/write-binary-content-via-raw-data.sh + - name: Conditional Delete - Check Referential Integrity Violated run: .github/scripts/conditional-delete-type/check-referential-integrity-violated.sh diff --git a/modules/rest-api/src/blaze/rest_api/routes.clj b/modules/rest-api/src/blaze/rest_api/routes.clj index 61240f7b8..43de62e10 100644 --- a/modules/rest-api/src/blaze/rest_api/routes.clj +++ b/modules/rest-api/src/blaze/rest_api/routes.clj @@ -35,6 +35,10 @@ {:name :auth-guard :wrap auth-guard/wrap-auth-guard}) +(def ^:private wrap-binary-data + {:name :binary-data + :wrap resource/wrap-binary-data}) + (def ^:private wrap-resource {:name :resource :wrap resource/wrap-resource}) @@ -111,7 +115,9 @@ :blaze.rest-api.interaction/handler)}) (contains? interactions :create) (assoc :post {:interaction "create" - :middleware [wrap-resource] + :middleware (if (= name "Binary") + [wrap-binary-data] + [wrap-resource]) :handler (-> interactions :create :blaze.rest-api.interaction/handler)}) (contains? interactions :conditional-delete-type) @@ -179,7 +185,9 @@ :blaze.rest-api.interaction/handler)}) (contains? interactions :update) (assoc :put {:interaction "update" - :middleware [wrap-resource] + :middleware (if (= name "Binary") + [wrap-binary-data] + [wrap-resource]) :handler (-> interactions :update :blaze.rest-api.interaction/handler)}) (contains? interactions :delete) diff --git a/modules/rest-util/src/blaze/middleware/fhir/resource.clj b/modules/rest-util/src/blaze/middleware/fhir/resource.clj index 9e391d770..5d345725c 100644 --- a/modules/rest-util/src/blaze/middleware/fhir/resource.clj +++ b/modules/rest-util/src/blaze/middleware/fhir/resource.clj @@ -7,6 +7,7 @@ [blaze.anomaly :as ba :refer [if-ok when-ok]] [blaze.async.comp :as ac] [blaze.fhir.spec :as fhir-spec] + [blaze.fhir.spec.type :as type] [clojure.data.xml.jvm.parse :as xml-jvm] [clojure.data.xml.tree :as xml-tree] [clojure.java.io :as io] @@ -16,7 +17,10 @@ [ring.util.request :as request]) (:import [com.ctc.wstx.api WstxInputProperties] + [java.io InputStream] [java.io Reader] + [java.util Base64$Encoder] + [java.util Base64] [javax.xml.stream XMLInputFactory])) (set! *warn-on-reflection* true) @@ -95,6 +99,23 @@ (assoc request :body resource)) (ba/incorrect "Missing HTTP body."))) +(defn- get-binary-data [body] + (with-open [_ (prom/timer parse-duration-seconds "binary")] + (.encodeToString ^Base64$Encoder (Base64/getEncoder) (.readAllBytes ^InputStream body)))) + +(defn- resource-request-binary-data [{:keys [body headers] :as request}] + (if body + ;; `when-ok` is not needed here because: + ;; * Binary data is not parsed nor validated, and + ;; * Base64-encoding should not fail under normal circumstances. + (let [b64-encoded-data (get-binary-data body) + content-type (get headers "content-type")] + (assoc request :body + {:fhir/type :fhir/Binary + :contentType (type/code content-type) + :data (type/base64Binary b64-encoded-data)})) + (ba/incorrect "Missing HTTP body."))) + (defn- unsupported-media-type-msg [media-type] (format "Unsupported media type `%s` expect one of `application/fhir+json` or `application/fhir+xml`." media-type)) @@ -109,19 +130,43 @@ :http/status 415)) (if (str/blank? (slurp (:body request))) (assoc request :body nil) - (ba/incorrect "Content-Type header expected, but is missing.")))) + (ba/incorrect "Missing Content-Type header for FHIR resources.")))) + +(defn- binary-resource-request [request] + (if-let [content-type (request/content-type request)] + (cond + (str/starts-with? content-type "application/fhir+json") (resource-request-json request) + (str/starts-with? content-type "application/fhir+xml") (resource-request-xml request) + :else + (resource-request-binary-data request)) + (ba/incorrect "Missing Content-Type header for binary resources."))) (defn wrap-resource "Middleware to parse a resource from the body according the content-type header. - Updates the :body key in the request map on successful parsing and conforming - the resource to the internal format. + If the resource is successfully parsed and conformed to the internal format, + updates the :body key in the request map. - Returns an OperationOutcome in the internal format, skipping the handler, with - an appropriate error on parsing and conforming errors." + In case on errors, returns an OperationOutcome in the internal format with the + appropriate error and skips the handler." [handler] (fn [request] (if-ok [request (resource-request request)] (handler request) ac/completed-future))) + +(defn wrap-binary-data + "Middleware to parse binary data from the body according the content-type + header. + + If the resource is successfully parsed and conformed to the internal format, + updates the :body key in the request map. + + In case on errors, returns an OperationOutcome in the internal format with the + appropriate error and skips the handler." + [handler] + (fn [request] + (if-ok [request (binary-resource-request request)] + (handler request) + ac/completed-future))) diff --git a/modules/rest-util/test/blaze/middleware/fhir/resource_test.clj b/modules/rest-util/test/blaze/middleware/fhir/resource_test.clj index d17f6df1f..592ad954a 100644 --- a/modules/rest-util/test/blaze/middleware/fhir/resource_test.clj +++ b/modules/rest-util/test/blaze/middleware/fhir/resource_test.clj @@ -2,9 +2,10 @@ (:require [blaze.async.comp :as ac] [blaze.fhir.spec :as fhir-spec] + [blaze.fhir.spec.type :as type] [blaze.fhir.test-util] [blaze.handler.util :as handler-util] - [blaze.middleware.fhir.resource :refer [wrap-resource]] + [blaze.middleware.fhir.resource :refer [wrap-binary-data wrap-resource]] [blaze.test-util :as tu :refer [satisfies-prop]] [clojure.spec.test.alpha :as st] [clojure.string :as str] @@ -15,7 +16,9 @@ [taoensso.timbre :as log]) (:import [java.io ByteArrayInputStream] - [java.nio.charset StandardCharsets])) + [java.nio.charset StandardCharsets] + [java.util Base64$Encoder] + [java.util Base64])) (set! *warn-on-reflection* true) (st/instrument) @@ -23,18 +26,24 @@ (test/use-fixtures :each tu/fixture) -(defn wrap-error [handler] +(defn- wrap-error [handler] (fn [request] (-> (handler request) (ac/exceptionally handler-util/error-response)))) -(def resource-handler - "A handler which just returns the :body from the request." +(def ^:private resource-handler + "A handler which just returns the :body from a non-binary resource request." (-> (comp ac/completed-future :body) wrap-resource wrap-error)) -(defn input-stream +(def ^:private binary-resource-handler + "A handler which just returns the :body from a binary resource request." + (-> (comp ac/completed-future :body) + wrap-binary-data + wrap-error)) + +(defn- string-input-stream ([^String s] (ByteArrayInputStream. (.getBytes s StandardCharsets/UTF_8))) ([^String s closed?] @@ -42,20 +51,40 @@ (close [] (reset! closed? true))))) +(defn- binary-input-stream + ([^bytes data] + (ByteArrayInputStream. data)) + ([^bytes data closed?] + (proxy [ByteArrayInputStream] [data] + (close [] + (reset! closed? true))))) + +(defn- encode-binary-data [^bytes data] + (.encodeToString ^Base64$Encoder (Base64/getEncoder) data)) + +(defn- bytes-of-random-binary-data [lenght] + (byte-array (repeatedly lenght #(rand-int 256)))) + +(defn- resource-as-json [b64-encoded-binary-data content-type] + (str "{\"data\" : \"" b64-encoded-binary-data "\", \"resourceType\" : \"Binary\", \"contentType\" : \"" content-type "\"}")) + +(defn- resource-as-xml [b64-encoded-binary-data content-type] + (str "")) + (deftest json-test (testing "possible content types" (doseq [content-type ["application/fhir+json" "text/json" "application/json"]] (let [closed? (atom false)] (given @(resource-handler {:headers {"content-type" content-type} - :body (input-stream "{\"resourceType\": \"Patient\"}" closed?)}) + :body (string-input-stream "{\"resourceType\": \"Patient\"}" closed?)}) fhir-spec/fhir-type := :fhir/Patient) (is (true? @closed?))))) (testing "empty body" (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream "")}) + :body (string-input-stream "")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -65,7 +94,7 @@ (testing "body with invalid JSON" (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream "x")}) + :body (string-input-stream "x")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -76,7 +105,7 @@ ;; There is no XML analogy to this JSON test, since XML has no objects. (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream "1")}) + :body (string-input-stream "1")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -88,7 +117,7 @@ (testing "body with invalid resource" (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream "{\"resourceType\": \"Patient\", \"gender\": {}}")}) + :body (string-input-stream "{\"resourceType\": \"Patient\", \"gender\": {}}")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -99,7 +128,7 @@ (testing "body with bundle with null resource" (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream "{\"resourceType\": \"Bundle\", \"entry\": [{\"resource\": null}]}")}) + :body (string-input-stream "{\"resourceType\": \"Bundle\", \"entry\": [{\"resource\": null}]}")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -110,7 +139,7 @@ (testing "body with bundle with invalid resource" (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream "{\"resourceType\": \"Bundle\", \"entry\": [{\"resource\": {\"resourceType\": \"Patient\", \"gender\": {}}}]}")}) + :body (string-input-stream "{\"resourceType\": \"Bundle\", \"entry\": [{\"resource\": {\"resourceType\": \"Patient\", \"gender\": {}}}]}")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -121,7 +150,7 @@ (testing "long attribute values are allowed (JSON-wrapped Binary data)" (given @(resource-handler {:headers {"content-type" "application/fhir+json"} - :body (input-stream (str "{\"data\" : \"" (apply str (repeat (* 8 1024 1024) \a)) "\", \"resourceType\" : \"Binary\"}"))}) + :body (string-input-stream (str "{\"data\" : \"" (apply str (repeat (* 8 1024 1024) \a)) "\", \"resourceType\" : \"Binary\"}"))}) fhir-spec/fhir-type := :fhir/Binary))) (deftest xml-test @@ -130,14 +159,14 @@ (let [closed? (atom false)] (given @(resource-handler {:headers {"content-type" content-type} - :body (input-stream "" closed?)}) + :body (string-input-stream "" closed?)}) fhir-spec/fhir-type := :fhir/Patient) (is (true? @closed?))))) (testing "empty body" (given @(resource-handler {:headers {"content-type" "application/fhir+xml"} - :body (input-stream "")}) + :body (string-input-stream "")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -150,7 +179,7 @@ (given @(resource-handler {:request-method :post :headers {"content-type" "application/fhir+xml"} - :body (input-stream input-string)}) + :body (string-input-stream input-string)}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -160,7 +189,7 @@ (testing "body with invalid resource" (given @(resource-handler {:headers {"content-type" "application/fhir+xml"} - :body (input-stream "")}) + :body (string-input-stream "")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -170,7 +199,7 @@ (testing "body with bundle with empty resource" (given @(resource-handler {:headers {"content-type" "application/fhir+xml"} - :body (input-stream "")}) + :body (string-input-stream "")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -180,7 +209,7 @@ (testing "body with bundle with invalid resource" (given @(resource-handler {:headers {"content-type" "application/fhir+xml"} - :body (input-stream "")}) + :body (string-input-stream "")}) :status := 400 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error" @@ -190,9 +219,45 @@ (testing "long attribute values are allowed (XML-wrapped Binary data)" (given @(resource-handler {:headers {"content-type" "application/fhir+xml"} - :body (input-stream (str ""))}) + :body (string-input-stream (str ""))}) fhir-spec/fhir-type := :fhir/Binary))) +(deftest binary-test + (testing "when sending FHIR-wrapped Binary data" + (testing "both handlers should return the same for both wrappers (JSON and XML)" + (let [b64-encoded-binary-data "MTA1NjE0Cg==" + binary-resource-data (type/base64Binary b64-encoded-binary-data) + binary-resource-content-type "text/plain"] + (doseq [handler [resource-handler binary-resource-handler] + [fhir-content-type resource-string-representation] + [["application/fhir+json;charset=utf-8" (resource-as-json b64-encoded-binary-data binary-resource-content-type)] + ["application/fhir+xml;charset=utf-8" (resource-as-xml b64-encoded-binary-data binary-resource-content-type)]]] + (let [closed? (atom false)] + (given @(handler + {:headers {"content-type" fhir-content-type} + :body (string-input-stream resource-string-representation closed?)}) + {:fhir/type :fhir/Binary + :contentType (type/code binary-resource-content-type) + :data binary-resource-data}) + (is closed?)))))) + + (testing "when sending raw binary resource handling" + (let [small-data (bytes-of-random-binary-data 16) + large-data (bytes-of-random-binary-data (* 8 1024 1024))] + (doseq [[content-type raw-data] + [["text/plain" small-data] + ["application/octet-stream" large-data]]] + (let [closed? (atom false) + encoded-data (encode-binary-data raw-data)] + (given @(binary-resource-handler + {:headers {"content-type" content-type} + :body (binary-input-stream raw-data closed?)}) + {:fhir/type :fhir/Binary + :resourceType "Binary" + :contentType content-type + :data (type/base64Binary encoded-data)} + (is @closed?))))))) + (def ^:private whitespace (gen/fmap str/join (gen/vector (gen/elements [" " "\n" "\r" "\t"])))) @@ -200,11 +265,11 @@ (testing "blank body without content type header results in a nil body" (satisfies-prop 10 (prop/for-all [s whitespace] - (nil? @(resource-handler {:body (input-stream s)}))))) + (nil? @(resource-handler {:body (string-input-stream s)}))))) (testing "other content is invalid" (testing "with unknown content-type header" - (given @(resource-handler {:headers {"content-type" "text/plain"} :body (input-stream "foo")}) + (given @(resource-handler {:headers {"content-type" "text/plain"} :body (string-input-stream "foo")}) :status := 415 [:body fhir-spec/fhir-type] := :fhir/OperationOutcome [:body :issue 0 :severity] := #fhir/code"error"