-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new internal endpoint for updating public csv data into S3 #645
base: main
Are you sure you want to change the base?
Changes from 4 commits
3477882
d92a8fd
0349692
f866526
6cbacd7
56c5602
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
(ns solita.etp.api.public-csv | ||
(:require [ring.util.response :as r] | ||
[solita.etp.security :as security] | ||
[solita.etp.service.concurrent :as concurrent] | ||
[solita.etp.service.csv-to-s3 :as csv-to-s3] | ||
[solita.etp.service.kayttaja :as kayttaja-service])) | ||
|
||
|
||
(def internal-routes | ||
[["/public-csv" | ||
["/update" | ||
{:post {:summary "Päivitä julkiset CSV-tiedostot S3:ssa." | ||
:middleware [[security/wrap-db-application-name | ||
(kayttaja-service/system-kayttaja :aineisto)] | ||
[security/wrap-whoami-for-internal-aineisto-api]] | ||
:responses {200 {:body nil}} | ||
:handler (fn [{:keys [db whoami aws-s3-client]}] | ||
(r/response | ||
(concurrent/run-background | ||
#(csv-to-s3/update-public-csv-in-s3! | ||
db | ||
whoami | ||
aws-s3-client | ||
{:where nil}) | ||
"Aineistot update failed")))}}]]]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
(ns solita.etp.service.aineisto | ||
(:require | ||
[clojure.java.jdbc :as jdbc] | ||
[clojure.network.ip :as ip] | ||
[clojure.tools.logging :as log] | ||
[solita.etp.service.file :as file] | ||
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv] | ||
[solita.etp.db :as db] | ||
[solita.etp.service.luokittelu :as luokittelu-service]) | ||
[clojure.java.jdbc :as jdbc] | ||
[clojure.network.ip :as ip] | ||
[clojure.tools.logging :as log] | ||
[solita.etp.service.file :as file] | ||
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv] | ||
[solita.etp.db :as db] | ||
[solita.etp.service.luokittelu :as luokittelu-service]) | ||
(:import (java.time Instant) | ||
(java.nio.charset StandardCharsets) | ||
(java.nio ByteBuffer))) | ||
|
@@ -89,31 +89,9 @@ | |
|
||
(defn aineisto-reducible-query [db whoami aineisto-id] | ||
(as-> aineisto-id val | ||
(aineisto-key val) | ||
(aineisto-sources val) | ||
(not-nil-aineisto-source val) | ||
(val db whoami))) | ||
|
||
(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id] | ||
(log/info (str "Starting updating of aineisto (id: " aineisto-id ").")) | ||
(let [csv-reducible-query (aineisto-reducible-query db whoami aineisto-id) | ||
key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tämäkin olisi ollut hyvä silloin aikoinaan varmaan tehdä omaksi funktiokseen ja käyttää myös testeissä sitä kautta. Refaktoroi toki, jos huvittaa. |
||
;; This part is used to store rows until it reaches 5MB which | ||
;; is the minimum requirement by `upload-part-fn`. | ||
current-part (ByteBuffer/allocate (* 8 1024 1024)) | ||
upload-parts-fn (fn [upload-part-fn] | ||
(csv-reducible-query (fn [^String row] | ||
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)] | ||
(.put current-part row-bytes) | ||
(when (< (* 5 1024 1024) (.position current-part)) | ||
(upload-part-fn (extract-byte-array-and-reset! current-part)))))) | ||
;;The last part needs to be uploaded separately (unless the size was a multiple of 5MB) | ||
(when (not= 0 (.position current-part)) | ||
(upload-part-fn (extract-byte-array-and-reset! current-part))))] | ||
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn) | ||
(log/info (str "Updating of aineisto (id: " aineisto-id ") finished.")))) | ||
|
||
(defn update-aineistot-in-s3! [db whoami aws-s3-client] | ||
(update-aineisto-in-s3! db whoami aws-s3-client 1) | ||
(update-aineisto-in-s3! db whoami aws-s3-client 2) | ||
(update-aineisto-in-s3! db whoami aws-s3-client 3)) | ||
(aineisto-key val) | ||
(aineisto-sources val) | ||
(not-nil-aineisto-source val) | ||
(val db whoami))) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
(ns solita.etp.service.csv-to-s3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nimeäminen on vaikeata There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Voisi olla myös joku There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. En pidä tuosta ehdotuksestani enää yhtään. Joku |
||
(:require [clojure.tools.logging :as log] | ||
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv] | ||
[solita.etp.service.file :as file] | ||
[solita.etp.service.aineisto :as aineisto-service]) | ||
(:import [java.nio ByteBuffer] | ||
[java.nio.charset StandardCharsets])) | ||
|
||
(def ^:private buffer-size (* 8 1024 1024)) ; 8MB | ||
(def ^:private upload-threshold (* 5 1024 1024)) ; 5MB | ||
|
||
(defn- create-buffer [] | ||
(ByteBuffer/allocate buffer-size)) | ||
|
||
(defn- create-upload-parts-fn [csv-reducible-query] | ||
(let [current-part (create-buffer)] | ||
(fn [upload-part-fn] | ||
(csv-reducible-query | ||
(fn [^String row] | ||
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)] | ||
(.put current-part row-bytes) | ||
(when (> (.position current-part) upload-threshold) | ||
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part)))))) | ||
;; Upload any remaining data | ||
(when (not= 0 (.position current-part)) | ||
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part)))))) | ||
|
||
(defn- process-csv-to-s3! [aws-s3-client key csv-reducible-query log-start log-end] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ehkä tuo logitusten parametrisointi tässä kontekstissa oli vähän overkill mutta tulipahan tehtyä There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Varmaan ihan hyvä, että tulee ainakin muistettua logittaa, kun käyttää tätä funktiota. |
||
(log/info log-start) | ||
(let [upload-parts-fn (create-upload-parts-fn csv-reducible-query)] | ||
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn) | ||
(log/info log-end))) | ||
|
||
(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id] | ||
(let [csv-query (aineisto-service/aineisto-reducible-query db whoami aineisto-id) | ||
key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nämäkin keyt voisi muuten siirtää serviceen. Vrt. |
||
start-msg (str "Starting updating of aineisto (id: " aineisto-id ").") | ||
end-msg (str "Updating of aineisto (id: " aineisto-id ") finished.")] | ||
(process-csv-to-s3! aws-s3-client key csv-query start-msg end-msg))) | ||
|
||
(defn update-public-csv-in-s3! [db whoami aws-s3-client query] | ||
(let [csv-query (energiatodistus-csv/energiatodistukset-public-csv db whoami query) | ||
key "/api/csv/public/energiatodistukset.csv"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tälle voisi tehdä jonkin |
||
(process-csv-to-s3! | ||
aws-s3-client | ||
key | ||
csv-query | ||
"Starting updating of public energiatodistus." | ||
"Updating of public energiatodistus finished."))) | ||
|
||
(defn update-aineistot-in-s3! [db whoami aws-s3-client] | ||
(doseq [id [1 2 3]] | ||
(update-aineisto-in-s3! db whoami aws-s3-client id))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
(ns solita.etp.service.csv-to-s3-test | ||
(:require [solita.etp.service.csv-to-s3 :as csv-to-s3] | ||
[solita.etp.service.file :as file] | ||
[clojure.test :as t] | ||
[solita.etp.test-system :as ts])) | ||
|
||
(t/use-fixtures :each ts/fixture) | ||
|
||
(t/deftest test-public-csv-to-s3 | ||
(t/testing "Public csv doesn't exist before generating" | ||
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/csv/public/energiatodistukset.csv")))) | ||
|
||
(t/testing "Public csv exists after generating" | ||
(csv-to-s3/update-public-csv-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client* {:where nil}) | ||
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/csv/public/energiatodistukset.csv"))))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Täälläkin voisi sitten hakea file-keyt servicen kautta. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nämä middlewaret voisivat varmaan olla jotain muuta.
Vaikuttavat ainakin niihin, että saako tehdä tietokantakyselyt, ja jos jotain muutettaisiin, niin audit tauluihin tulisi merkintä käyttäjästä.
Koska csv:tä pystyy hakemaan public puolelta, niin voisi varmaan hyppiä siitä routesta taaksepäin ja päätellä, mitä kannattaisi olla, tai tarvitaanko näitä edes🤔
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Jos nyt tajusin oikein tämän niin routessa takaisinpäin hyppimällä näyttäisi löytyvän publicin alta seuraava middleware, tämä varmaan riittäisi julkisen csvn latauksen tapauksessa?
["/public" {:middleware [[security/wrap-db-application-name]]}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Joo luulisin niin. Lisäksi tuli vähän mietittyä, että se saattaisi vaikutta jopa siihen mitä csv:hen tulee. En nyt ole ihan varma vaikuttaisiko, mutta kai se kulkeutuu tuonne
solita.etp.service.energiatodistus-search/whoami-sql
asti 🤔Mutta nyt ainakin pitäisi olla sama. Jos keksii testin, jolla testaa vähän tuota sisältö, niin voisi olla hyvä.