Skip to content

Commit

Permalink
Create new namespace for csv-to-s3 functions and refactor the code
Browse files Browse the repository at this point in the history
  • Loading branch information
etahto committed Dec 17, 2024
1 parent 3477882 commit d92a8fd
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 49 deletions.
4 changes: 2 additions & 2 deletions etp-core/etp-backend/src/dev/clj/user.clj
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,6 @@
(defn generate-aineisto!
"Generates the aineisto with `aineisto-id` into aws-s3-client"
[aineisto-id]
(require 'solita.etp.service.aineisto)
((resolve 'solita.etp.service.aineisto/update-aineisto-in-s3!)
(require 'solita.etp.service.csv-to-s3)
((resolve 'solita.etp.service.csv-to-s3/update-aineisto-in-s3!)
(db 2) {:id -5 :rooli -1} (aws-s3-client) aineisto-id))
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
[solita.etp.schema.common :as common-schema]
[solita.etp.service.aineisto :as aineisto-service]
[solita.etp.service.rooli :as rooli-service]
[solita.etp.service.csv-to-s3 :as csv-to-s3-service]
[solita.etp.service.kayttaja :as kayttaja-service]))

(defn first-address [x-forwarded-for]
Expand Down Expand Up @@ -65,7 +66,7 @@
:handler (fn [{:keys [db whoami aws-s3-client]}]
(r/response
(concurrent/run-background
#(aineisto-service/update-aineistot-in-s3!
#(csv-to-s3-service/update-aineistot-in-s3!
db
whoami
aws-s3-client)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
(:require [ring.util.response :as r]
[solita.etp.security :as security]
[solita.etp.service.concurrent :as concurrent]
[solita.etp.service.aineisto :as aineisto-service]
[solita.etp.service.csv-to-s3 :as csv-to-s3]
[solita.etp.service.kayttaja :as kayttaja-service]))


Expand All @@ -17,7 +17,7 @@
:handler (fn [{:keys [db whoami aws-s3-client]}]
(r/response
(concurrent/run-background
#(aineisto-service/update-public-csv-in-s3!
#(csv-to-s3/update-public-csv-in-s3!
db
whoami
aws-s3-client
Expand Down
41 changes: 0 additions & 41 deletions etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj
Original file line number Diff line number Diff line change
Expand Up @@ -94,45 +94,4 @@
(not-nil-aineisto-source val)
(val db whoami)))

(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id]
(log/info (str "Starting updating of aineisto (id: " aineisto-id ")."))
(let [csv-reducible-query (aineisto-reducible-query db whoami aineisto-id)
key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv")
;; This part is used to store rows until it reaches 5MB which
;; is the minimum requirement by `upload-part-fn`.
current-part (ByteBuffer/allocate (* 8 1024 1024))
upload-parts-fn (fn [upload-part-fn]
(csv-reducible-query (fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (< (* 5 1024 1024) (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))))
;;The last part needs to be uploaded separately (unless the size was a multiple of 5MB)
(when (not= 0 (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info (str "Updating of aineisto (id: " aineisto-id ") finished."))))

(defn update-public-csv-in-s3! [db whoami aws-s3-client query]
(log/info "Starting updating of public energiato.")
(let [csv-reducible-query (energiatodistus-csv/energiatodistukset-public-csv db whoami query)
key "/api/csv/public/energiatodistukset.csv"
;; Allocate an 8MB ByteBuffer to store CSV rows until it reaches 5MB
current-part (ByteBuffer/allocate (* 8 1024 1024))
upload-parts-fn (fn [upload-part-fn]
(csv-reducible-query (fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (> (.position current-part) (* 5 1024 1024))
(upload-part-fn (extract-byte-array-and-reset! current-part))))))
;; Upload the last part if it doesn't reach 5MB
(when (not= 0 (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info "Updating of public energiato finished.")))

(defn update-aineistot-in-s3! [db whoami aws-s3-client]
(update-aineisto-in-s3! db whoami aws-s3-client 1)
(update-aineisto-in-s3! db whoami aws-s3-client 2)
(update-aineisto-in-s3! db whoami aws-s3-client 3))

53 changes: 53 additions & 0 deletions etp-core/etp-backend/src/main/clj/solita/etp/service/csv_to_s3.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
(ns solita.etp.service.csv-to-s3
(:require [clojure.tools.logging :as log]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.service.file :as file]
[solita.etp.service.aineisto :as aineisto-service])
(:import [java.nio ByteBuffer]
[java.nio.charset StandardCharsets]))

(def ^:private buffer-size (* 8 1024 1024)) ; 8MB
(def ^:private upload-threshold (* 5 1024 1024)) ; 5MB

(defn- create-buffer []
(ByteBuffer/allocate buffer-size))

(defn- create-upload-parts-fn [csv-reducible-query]
(let [current-part (create-buffer)]
(fn [upload-part-fn]
(csv-reducible-query
(fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (> (.position current-part) upload-threshold)
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part))))))
;; Upload any remaining data
(when (not= 0 (.position current-part))
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part))))))

(defn- process-csv-to-s3! [aws-s3-client key csv-reducible-query log-start log-end]
(log/info log-start)
(let [upload-parts-fn (create-upload-parts-fn csv-reducible-query)]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info log-end)))

(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id]
(let [csv-query (aineisto-service/aineisto-reducible-query db whoami aineisto-id)
key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv")
start-msg (str "Starting updating of aineisto (id: " aineisto-id ").")
end-msg (str "Updating of aineisto (id: " aineisto-id ") finished.")]
(process-csv-to-s3! aws-s3-client key csv-query start-msg end-msg)))

(defn update-public-csv-in-s3! [db whoami aws-s3-client query]
(let [csv-query (energiatodistus-csv/energiatodistukset-public-csv db whoami query)
key "/api/csv/public/energiatodistukset.csv"]
(process-csv-to-s3!
aws-s3-client
key
csv-query
"Starting updating of public energiatodistus."
"Updating of public energiatodistus finished.")))

(defn update-aineistot-in-s3! [db whoami aws-s3-client]
(doseq [id [1 2 3]]
(update-aineisto-in-s3! db whoami aws-s3-client id)))
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns solita.etp.service.aineisto-test
(:require [solita.etp.service.aineisto :as aineisto]
[solita.etp.service.csv-to-s3 :as csv-to-s3]
[solita.etp.service.file :as file]
[solita.etp.service.kayttaja :as kayttaja-service]
[solita.etp.test-data.generators :as generators]
Expand Down Expand Up @@ -104,7 +105,7 @@
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv"))))

(t/testing "Aineistot exist after generating"
(aineisto/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*)
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv")))
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv")))
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv"))))
Expand Down Expand Up @@ -136,7 +137,7 @@

;; Update aineistot. Todistus-1 should be included after the update,
;; but todistus-2 should be not as it's not signed yet.
(aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)

;; Aineisto 1 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")]
Expand All @@ -159,7 +160,7 @@
(test-data.energiatodistus/sign! todistus-2-id laatija-id true)

;; Update aineistot. Now todistus-1 and todistus-2 should be in the csv.
(aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)

;; Aineisto 1 - Test that both rakennustunnus exist. It does not matter which one is which
;; as the order of them is not guaranteed.
Expand Down

0 comments on commit d92a8fd

Please sign in to comment.