diff --git a/etp-core/etp-backend/src/dev/clj/user.clj b/etp-core/etp-backend/src/dev/clj/user.clj index d828e567b..c1b89337a 100644 --- a/etp-core/etp-backend/src/dev/clj/user.clj +++ b/etp-core/etp-backend/src/dev/clj/user.clj @@ -85,6 +85,6 @@ (defn generate-aineisto! "Generates the aineisto with `aineisto-id` into aws-s3-client" [aineisto-id] - (require 'solita.etp.service.aineisto) - ((resolve 'solita.etp.service.aineisto/update-aineisto-in-s3!) + (require 'solita.etp.service.csv-to-s3) + ((resolve 'solita.etp.service.csv-to-s3/update-aineisto-in-s3!) (db 2) {:id -5 :rooli -1} (aws-s3-client) aineisto-id)) diff --git a/etp-core/etp-backend/src/main/clj/solita/etp/api/aineisto.clj b/etp-core/etp-backend/src/main/clj/solita/etp/api/aineisto.clj index 213c3dee7..481387957 100644 --- a/etp-core/etp-backend/src/main/clj/solita/etp/api/aineisto.clj +++ b/etp-core/etp-backend/src/main/clj/solita/etp/api/aineisto.clj @@ -10,6 +10,7 @@ [solita.etp.schema.common :as common-schema] [solita.etp.service.aineisto :as aineisto-service] [solita.etp.service.rooli :as rooli-service] + [solita.etp.service.csv-to-s3 :as csv-to-s3-service] [solita.etp.service.kayttaja :as kayttaja-service])) (defn first-address [x-forwarded-for] @@ -65,7 +66,7 @@ :handler (fn [{:keys [db whoami aws-s3-client]}] (r/response (concurrent/run-background - #(aineisto-service/update-aineistot-in-s3! + #(csv-to-s3-service/update-aineistot-in-s3! db whoami aws-s3-client) diff --git a/etp-core/etp-backend/src/main/clj/solita/etp/api/public_csv.clj b/etp-core/etp-backend/src/main/clj/solita/etp/api/public_csv.clj new file mode 100644 index 000000000..6f2e2bfdb --- /dev/null +++ b/etp-core/etp-backend/src/main/clj/solita/etp/api/public_csv.clj @@ -0,0 +1,22 @@ +(ns solita.etp.api.public-csv + (:require [ring.util.response :as r] + [solita.etp.security :as security] + [solita.etp.service.concurrent :as concurrent] + [solita.etp.service.csv-to-s3 :as csv-to-s3])) + + +(def internal-routes + [["/public-csv" + ["/update" + {:post {:summary "Päivitä julkiset CSV-tiedostot S3:ssa." + :middleware [[security/wrap-db-application-name]] + :responses {200 {:body nil}} + :handler (fn [{:keys [db whoami aws-s3-client]}] + (r/response + (concurrent/run-background + #(csv-to-s3/update-public-csv-in-s3! + db + whoami + aws-s3-client + {:where nil}) + "Public csv update failed")))}}]]]) \ No newline at end of file diff --git a/etp-core/etp-backend/src/main/clj/solita/etp/handler.clj b/etp-core/etp-backend/src/main/clj/solita/etp/handler.clj index 54ac03706..8ec22cc8e 100644 --- a/etp-core/etp-backend/src/main/clj/solita/etp/handler.clj +++ b/etp-core/etp-backend/src/main/clj/solita/etp/handler.clj @@ -17,6 +17,7 @@ [ring.middleware.cookies :as cookies] [schema.coerce] [schema.core] + [solita.etp.api.public-csv :as public-csv-api] [schema.core :as s] [solita.etp.api.aineisto :as aineisto-api] [solita.etp.api.energiatodistus :as energiatodistus-api] @@ -169,7 +170,8 @@ (concat (tag "Laskutus API" laskutus-api/routes) (tag "Laatija Internal API" laatija-api/internal-routes) (tag "Energiatodistus Internal API" energiatodistus-api/internal-routes) - (tag "Aineisto Internal API" aineisto-api/internal-routes))]] + (tag "Aineisto Internal API" aineisto-api/internal-routes) + (tag "Public csv Internal API" public-csv-api/internal-routes))]] (when config/allow-palveluvayla-api ["/palveluvayla" ["/openapi.json" {:get {:no-doc true :openapi {:info {:title "Energiatodistuspalvelu API" :description "Hae energiatodistuksia pdf tai json muodoissa"} :id "Palveluväylä"} diff --git a/etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj b/etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj index 81dee36b0..a197a0b4d 100644 --- a/etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj +++ b/etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj @@ -1,12 +1,12 @@ (ns solita.etp.service.aineisto (:require - [clojure.java.jdbc :as jdbc] - [clojure.network.ip :as ip] - [clojure.tools.logging :as log] - [solita.etp.service.file :as file] - [solita.etp.service.energiatodistus-csv :as energiatodistus-csv] - [solita.etp.db :as db] - [solita.etp.service.luokittelu :as luokittelu-service]) + [clojure.java.jdbc :as jdbc] + [clojure.network.ip :as ip] + [clojure.tools.logging :as log] + [solita.etp.service.file :as file] + [solita.etp.service.energiatodistus-csv :as energiatodistus-csv] + [solita.etp.db :as db] + [solita.etp.service.luokittelu :as luokittelu-service]) (:import (java.time Instant) (java.nio.charset StandardCharsets) (java.nio ByteBuffer))) @@ -89,31 +89,9 @@ (defn aineisto-reducible-query [db whoami aineisto-id] (as-> aineisto-id val - (aineisto-key val) - (aineisto-sources val) - (not-nil-aineisto-source val) - (val db whoami))) - -(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id] - (log/info (str "Starting updating of aineisto (id: " aineisto-id ").")) - (let [csv-reducible-query (aineisto-reducible-query db whoami aineisto-id) - key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv") - ;; This part is used to store rows until it reaches 5MB which - ;; is the minimum requirement by `upload-part-fn`. - current-part (ByteBuffer/allocate (* 8 1024 1024)) - upload-parts-fn (fn [upload-part-fn] - (csv-reducible-query (fn [^String row] - (let [row-bytes (.getBytes row StandardCharsets/UTF_8)] - (.put current-part row-bytes) - (when (< (* 5 1024 1024) (.position current-part)) - (upload-part-fn (extract-byte-array-and-reset! current-part)))))) - ;;The last part needs to be uploaded separately (unless the size was a multiple of 5MB) - (when (not= 0 (.position current-part)) - (upload-part-fn (extract-byte-array-and-reset! current-part))))] - (file/upsert-file-in-parts aws-s3-client key upload-parts-fn) - (log/info (str "Updating of aineisto (id: " aineisto-id ") finished.")))) - -(defn update-aineistot-in-s3! [db whoami aws-s3-client] - (update-aineisto-in-s3! db whoami aws-s3-client 1) - (update-aineisto-in-s3! db whoami aws-s3-client 2) - (update-aineisto-in-s3! db whoami aws-s3-client 3)) + (aineisto-key val) + (aineisto-sources val) + (not-nil-aineisto-source val) + (val db whoami))) + + diff --git a/etp-core/etp-backend/src/main/clj/solita/etp/service/csv_to_s3.clj b/etp-core/etp-backend/src/main/clj/solita/etp/service/csv_to_s3.clj new file mode 100644 index 000000000..bcd24e578 --- /dev/null +++ b/etp-core/etp-backend/src/main/clj/solita/etp/service/csv_to_s3.clj @@ -0,0 +1,58 @@ +(ns solita.etp.service.csv-to-s3 + (:require [clojure.tools.logging :as log] + [solita.etp.service.energiatodistus-csv :as energiatodistus-csv] + [solita.etp.service.file :as file] + [solita.etp.service.aineisto :as aineisto-service]) + (:import [java.nio ByteBuffer] + [java.nio.charset StandardCharsets])) + +(def ^:private buffer-size (* 8 1024 1024)) ; 8MB +(def ^:private upload-threshold (* 5 1024 1024)) ; 5MB + +(defn- create-buffer [] + (ByteBuffer/allocate buffer-size)) + +(def public-csv-key + "/api/public/energiatodistukset/csv/energiatodistukset.csv") + +(defn aineisto-key [aineisto-id] + (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv")) + +(defn- create-upload-parts-fn [csv-reducible-query] + (let [current-part (create-buffer)] + (fn [upload-part-fn] + (csv-reducible-query + (fn [^String row] + (let [row-bytes (.getBytes row StandardCharsets/UTF_8)] + (.put current-part row-bytes) + (when (> (.position current-part) upload-threshold) + (upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part)))))) + ;; Upload any remaining data + (when (not= 0 (.position current-part)) + (upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part)))))) + +(defn- process-csv-to-s3! [aws-s3-client key csv-reducible-query log-start log-end] + (log/info log-start) + (let [upload-parts-fn (create-upload-parts-fn csv-reducible-query)] + (file/upsert-file-in-parts aws-s3-client key upload-parts-fn) + (log/info log-end))) + +(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id] + (let [csv-query (aineisto-service/aineisto-reducible-query db whoami aineisto-id) + key (aineisto-key aineisto-id) + start-msg (str "Starting updating of aineisto (id: " aineisto-id ").") + end-msg (str "Updating of aineisto (id: " aineisto-id ") finished.")] + (process-csv-to-s3! aws-s3-client key csv-query start-msg end-msg))) + +(defn update-public-csv-in-s3! [db whoami aws-s3-client query] + (let [csv-query (energiatodistus-csv/energiatodistukset-public-csv db whoami query)] + (process-csv-to-s3! + aws-s3-client + public-csv-key + csv-query + "Starting updating of public energiatodistus." + "Updating of public energiatodistus finished."))) + +(defn update-aineistot-in-s3! [db whoami aws-s3-client] + (doseq [id [1 2 3]] + (update-aineisto-in-s3! db whoami aws-s3-client id))) diff --git a/etp-core/etp-backend/src/test/clj/solita/etp/service/aineisto_test.clj b/etp-core/etp-backend/src/test/clj/solita/etp/service/aineisto_test.clj index 1d38b5e5d..1c3bc9b95 100644 --- a/etp-core/etp-backend/src/test/clj/solita/etp/service/aineisto_test.clj +++ b/etp-core/etp-backend/src/test/clj/solita/etp/service/aineisto_test.clj @@ -1,5 +1,6 @@ (ns solita.etp.service.aineisto-test (:require [solita.etp.service.aineisto :as aineisto] + [solita.etp.service.csv-to-s3 :as csv-to-s3] [solita.etp.service.file :as file] [solita.etp.service.kayttaja :as kayttaja-service] [solita.etp.test-data.generators :as generators] @@ -99,15 +100,15 @@ (t/deftest update-aineistot-test (t/testing "Aineistot don't exist before generating" - (t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv"))) - (t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv"))) - (t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv")))) + (t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 1)))) + (t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 2)))) + (t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 3))))) (t/testing "Aineistot exist after generating" - (aineisto/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*) - (t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv"))) - (t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv"))) - (t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv")))) + (csv-to-s3/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*) + (t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 1)))) + (t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 2)))) + (t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 3))))) (t/testing "New energiatodistus shows up correctly when updating aineistot" (let [;; Add laatija @@ -136,21 +137,21 @@ ;; Update aineistot. Todistus-1 should be included after the update, ;; but todistus-2 should be not as it's not signed yet. - (aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*) + (csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*) ;; Aineisto 1 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset. - (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")] + (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 1))] (t/is (true? (str/includes? first rakennustunnus-1))) (t/is (nil? second))) ;; Aineisto 2 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset. - (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/2/energiatodistukset.csv")] + (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 2))] (t/is (true? (str/includes? first rakennustunnus-1))) (t/is (nil? second))) ;; Aineisto 3 - Test that one row exists and that the rakennustunnus can't be found as this set should be ;; anonymized. - (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/3/energiatodistukset.csv")] + (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 3))] (t/is (false? (str/includes? first rakennustunnus-1))) (t/is (false? (nil? first))) (t/is (nil? second))) @@ -159,23 +160,23 @@ (test-data.energiatodistus/sign! todistus-2-id laatija-id true) ;; Update aineistot. Now todistus-1 and todistus-2 should be in the csv. - (aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*) + (csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*) ;; Aineisto 1 - Test that both rakennustunnus exist. It does not matter which one is which ;; as the order of them is not guaranteed. - (let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")] + (let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 1))] (t/is (true? (is-included-in-exactly-one? rakennustunnus-1 csv-et-lines))) (t/is (true? (is-included-in-exactly-one? rakennustunnus-2 csv-et-lines)))) ;; Aineisto 2 - Test that both rakennustunnus exist. It does not matter which one is which ;; as the order of them is not guaranteed. - (let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/2/energiatodistukset.csv")] + (let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 2))] (t/is (true? (is-included-in-exactly-one? rakennustunnus-1 csv-et-lines))) (t/is (true? (is-included-in-exactly-one? rakennustunnus-2 csv-et-lines)))) ;; Aineisto 3 - Test that two rows exists and that either of the rakennustunnukset can't be found ;; as this set is be anonymized. - (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/3/energiatodistukset.csv")] + (let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 3))] ;; Rakennustunnus-1 can't be found (t/is (false? (str/includes? first rakennustunnus-1))) (t/is (false? (str/includes? second rakennustunnus-1))) diff --git a/etp-core/etp-backend/src/test/clj/solita/etp/service/csv_to_s3_test.clj b/etp-core/etp-backend/src/test/clj/solita/etp/service/csv_to_s3_test.clj new file mode 100644 index 000000000..7bed3ac1a --- /dev/null +++ b/etp-core/etp-backend/src/test/clj/solita/etp/service/csv_to_s3_test.clj @@ -0,0 +1,15 @@ +(ns solita.etp.service.csv-to-s3-test + (:require [solita.etp.service.csv-to-s3 :as csv-to-s3] + [solita.etp.service.file :as file] + [clojure.test :as t] + [solita.etp.test-system :as ts])) + +(t/use-fixtures :each ts/fixture) + +(t/deftest test-public-csv-to-s3 + (t/testing "Public csv doesn't exist before generating" + (t/is (false? (file/file-exists? ts/*aws-s3-client* csv-to-s3/public-csv-key)))) + + (t/testing "Public csv exists after generating" + (csv-to-s3/update-public-csv-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client* {:where nil}) + (t/is (true? (file/file-exists? ts/*aws-s3-client* csv-to-s3/public-csv-key)))))