Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new internal endpoint for updating public csv data into S3 #645

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions etp-core/etp-backend/src/dev/clj/user.clj
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,6 @@
(defn generate-aineisto!
"Generates the aineisto with `aineisto-id` into aws-s3-client"
[aineisto-id]
(require 'solita.etp.service.aineisto)
((resolve 'solita.etp.service.aineisto/update-aineisto-in-s3!)
(require 'solita.etp.service.csv-to-s3)
((resolve 'solita.etp.service.csv-to-s3/update-aineisto-in-s3!)
(db 2) {:id -5 :rooli -1} (aws-s3-client) aineisto-id))
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
[solita.etp.schema.common :as common-schema]
[solita.etp.service.aineisto :as aineisto-service]
[solita.etp.service.rooli :as rooli-service]
[solita.etp.service.csv-to-s3 :as csv-to-s3-service]
[solita.etp.service.kayttaja :as kayttaja-service]))

(defn first-address [x-forwarded-for]
Expand Down Expand Up @@ -65,7 +66,7 @@
:handler (fn [{:keys [db whoami aws-s3-client]}]
(r/response
(concurrent/run-background
#(aineisto-service/update-aineistot-in-s3!
#(csv-to-s3-service/update-aineistot-in-s3!
db
whoami
aws-s3-client)
Expand Down
23 changes: 23 additions & 0 deletions etp-core/etp-backend/src/main/clj/solita/etp/api/public_csv.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(ns solita.etp.api.public-csv
(:require [ring.util.response :as r]
[solita.etp.security :as security]
[solita.etp.service.concurrent :as concurrent]
[solita.etp.service.csv-to-s3 :as csv-to-s3]
[solita.etp.service.kayttaja :as kayttaja-service]))


(def internal-routes
[["/public-csv"
["/update"
{:post {:summary "Päivitä julkiset CSV-tiedostot S3:ssa."
:middleware [[security/wrap-db-application-name]]
:responses {200 {:body nil}}
:handler (fn [{:keys [db whoami aws-s3-client]}]
(r/response
(concurrent/run-background
#(csv-to-s3/update-public-csv-in-s3!
db
whoami
aws-s3-client
{:where nil})
"Aineistot update failed")))}}]]])
4 changes: 3 additions & 1 deletion etp-core/etp-backend/src/main/clj/solita/etp/handler.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
[ring.middleware.cookies :as cookies]
[schema.coerce]
[schema.core]
[solita.etp.api.public-csv :as public-csv-api]
[schema.core :as s]
[solita.etp.api.aineisto :as aineisto-api]
[solita.etp.api.energiatodistus :as energiatodistus-api]
Expand Down Expand Up @@ -169,7 +170,8 @@
(concat (tag "Laskutus API" laskutus-api/routes)
(tag "Laatija Internal API" laatija-api/internal-routes)
(tag "Energiatodistus Internal API" energiatodistus-api/internal-routes)
(tag "Aineisto Internal API" aineisto-api/internal-routes))]]
(tag "Aineisto Internal API" aineisto-api/internal-routes)
(tag "Public csv Internal API" public-csv-api/internal-routes))]]
(when config/allow-palveluvayla-api
["/palveluvayla" ["/openapi.json" {:get {:no-doc true :openapi {:info {:title "Energiatodistuspalvelu API" :description "Hae energiatodistuksia pdf tai json muodoissa"}
:id "Palveluväylä"}
Expand Down
48 changes: 13 additions & 35 deletions etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
(ns solita.etp.service.aineisto
(:require
[clojure.java.jdbc :as jdbc]
[clojure.network.ip :as ip]
[clojure.tools.logging :as log]
[solita.etp.service.file :as file]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.db :as db]
[solita.etp.service.luokittelu :as luokittelu-service])
[clojure.java.jdbc :as jdbc]
[clojure.network.ip :as ip]
[clojure.tools.logging :as log]
[solita.etp.service.file :as file]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.db :as db]
[solita.etp.service.luokittelu :as luokittelu-service])
(:import (java.time Instant)
(java.nio.charset StandardCharsets)
(java.nio ByteBuffer)))
Expand Down Expand Up @@ -89,31 +89,9 @@

(defn aineisto-reducible-query [db whoami aineisto-id]
(as-> aineisto-id val
(aineisto-key val)
(aineisto-sources val)
(not-nil-aineisto-source val)
(val db whoami)))

(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id]
(log/info (str "Starting updating of aineisto (id: " aineisto-id ")."))
(let [csv-reducible-query (aineisto-reducible-query db whoami aineisto-id)
key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tämäkin olisi ollut hyvä silloin aikoinaan varmaan tehdä omaksi funktiokseen ja käyttää myös testeissä sitä kautta. Refaktoroi toki, jos huvittaa.

;; This part is used to store rows until it reaches 5MB which
;; is the minimum requirement by `upload-part-fn`.
current-part (ByteBuffer/allocate (* 8 1024 1024))
upload-parts-fn (fn [upload-part-fn]
(csv-reducible-query (fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (< (* 5 1024 1024) (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))))
;;The last part needs to be uploaded separately (unless the size was a multiple of 5MB)
(when (not= 0 (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info (str "Updating of aineisto (id: " aineisto-id ") finished."))))

(defn update-aineistot-in-s3! [db whoami aws-s3-client]
(update-aineisto-in-s3! db whoami aws-s3-client 1)
(update-aineisto-in-s3! db whoami aws-s3-client 2)
(update-aineisto-in-s3! db whoami aws-s3-client 3))
(aineisto-key val)
(aineisto-sources val)
(not-nil-aineisto-source val)
(val db whoami)))


58 changes: 58 additions & 0 deletions etp-core/etp-backend/src/main/clj/solita/etp/service/csv_to_s3.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
(ns solita.etp.service.csv-to-s3
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nimeäminen on vaikeata

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Voisi olla myös joku s3-caching tjsp.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

En pidä tuosta ehdotuksestani enää yhtään. Joku s3-csv-caching olisi ollut parempi ehdotus, mutta tämäkin on kyllä mielestäni ihan kuvaava nimi.

(:require [clojure.tools.logging :as log]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.service.file :as file]
[solita.etp.service.aineisto :as aineisto-service])
(:import [java.nio ByteBuffer]
[java.nio.charset StandardCharsets]))

(def ^:private buffer-size (* 8 1024 1024)) ; 8MB
(def ^:private upload-threshold (* 5 1024 1024)) ; 5MB

(defn- create-buffer []
(ByteBuffer/allocate buffer-size))

(def public-csv-key
"/api/csv/public/energiatodistukset.csv")

(defn aineisto-key [aineisto-id]
(str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv"))

(defn- create-upload-parts-fn [csv-reducible-query]
(let [current-part (create-buffer)]
(fn [upload-part-fn]
(csv-reducible-query
(fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (> (.position current-part) upload-threshold)
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part))))))
;; Upload any remaining data
(when (not= 0 (.position current-part))
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part))))))

(defn- process-csv-to-s3! [aws-s3-client key csv-reducible-query log-start log-end]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ehkä tuo logitusten parametrisointi tässä kontekstissa oli vähän overkill mutta tulipahan tehtyä

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Varmaan ihan hyvä, että tulee ainakin muistettua logittaa, kun käyttää tätä funktiota.

(log/info log-start)
(let [upload-parts-fn (create-upload-parts-fn csv-reducible-query)]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info log-end)))

(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id]
(let [csv-query (aineisto-service/aineisto-reducible-query db whoami aineisto-id)
key (aineisto-key aineisto-id)
start-msg (str "Starting updating of aineisto (id: " aineisto-id ").")
end-msg (str "Updating of aineisto (id: " aineisto-id ") finished.")]
(process-csv-to-s3! aws-s3-client key csv-query start-msg end-msg)))

(defn update-public-csv-in-s3! [db whoami aws-s3-client query]
(let [csv-query (energiatodistus-csv/energiatodistukset-public-csv db whoami query)]
(process-csv-to-s3!
aws-s3-client
public-csv-key
csv-query
"Starting updating of public energiatodistus."
"Updating of public energiatodistus finished.")))

(defn update-aineistot-in-s3! [db whoami aws-s3-client]
(doseq [id [1 2 3]]
(update-aineisto-in-s3! db whoami aws-s3-client id)))
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns solita.etp.service.aineisto-test
(:require [solita.etp.service.aineisto :as aineisto]
[solita.etp.service.csv-to-s3 :as csv-to-s3]
[solita.etp.service.file :as file]
[solita.etp.service.kayttaja :as kayttaja-service]
[solita.etp.test-data.generators :as generators]
Expand Down Expand Up @@ -99,15 +100,15 @@

(t/deftest update-aineistot-test
(t/testing "Aineistot don't exist before generating"
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv")))
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv")))
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv"))))
(t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 1))))
(t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 2))))
(t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 3)))))

(t/testing "Aineistot exist after generating"
(aineisto/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*)
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv")))
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv")))
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv"))))
(csv-to-s3/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*)
(t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 1))))
(t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 2))))
(t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 3)))))

(t/testing "New energiatodistus shows up correctly when updating aineistot"
(let [;; Add laatija
Expand Down Expand Up @@ -136,21 +137,21 @@

;; Update aineistot. Todistus-1 should be included after the update,
;; but todistus-2 should be not as it's not signed yet.
(aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)

;; Aineisto 1 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 1))]
(t/is (true? (str/includes? first rakennustunnus-1)))
(t/is (nil? second)))

;; Aineisto 2 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/2/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 2))]
(t/is (true? (str/includes? first rakennustunnus-1)))
(t/is (nil? second)))

;; Aineisto 3 - Test that one row exists and that the rakennustunnus can't be found as this set should be
;; anonymized.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/3/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 3))]
(t/is (false? (str/includes? first rakennustunnus-1)))
(t/is (false? (nil? first)))
(t/is (nil? second)))
Expand All @@ -159,23 +160,23 @@
(test-data.energiatodistus/sign! todistus-2-id laatija-id true)

;; Update aineistot. Now todistus-1 and todistus-2 should be in the csv.
(aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)

;; Aineisto 1 - Test that both rakennustunnus exist. It does not matter which one is which
;; as the order of them is not guaranteed.
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")]
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 1))]
(t/is (true? (is-included-in-exactly-one? rakennustunnus-1 csv-et-lines)))
(t/is (true? (is-included-in-exactly-one? rakennustunnus-2 csv-et-lines))))

;; Aineisto 2 - Test that both rakennustunnus exist. It does not matter which one is which
;; as the order of them is not guaranteed.
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/2/energiatodistukset.csv")]
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 2))]
(t/is (true? (is-included-in-exactly-one? rakennustunnus-1 csv-et-lines)))
(t/is (true? (is-included-in-exactly-one? rakennustunnus-2 csv-et-lines))))

;; Aineisto 3 - Test that two rows exists and that either of the rakennustunnukset can't be found
;; as this set is be anonymized.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/3/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 3))]
;; Rakennustunnus-1 can't be found
(t/is (false? (str/includes? first rakennustunnus-1)))
(t/is (false? (str/includes? second rakennustunnus-1)))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(ns solita.etp.service.csv-to-s3-test
(:require [solita.etp.service.csv-to-s3 :as csv-to-s3]
[solita.etp.service.file :as file]
[clojure.test :as t]
[solita.etp.test-system :as ts]))

(t/use-fixtures :each ts/fixture)

(t/deftest test-public-csv-to-s3
(t/testing "Public csv doesn't exist before generating"
(t/is (false? (file/file-exists? ts/*aws-s3-client* csv-to-s3/public-csv-key))))

(t/testing "Public csv exists after generating"
(csv-to-s3/update-public-csv-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client* {:where nil})
(t/is (true? (file/file-exists? ts/*aws-s3-client* csv-to-s3/public-csv-key)))))
Loading