diff --git a/Cargo.lock b/Cargo.lock index 92642af6..7fd451fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -384,18 +384,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] name = "async-trait" -version = "0.1.78" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -409,9 +409,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "backtrace" @@ -465,7 +465,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.53", + "syn 2.0.58", "which", ] @@ -522,9 +522,9 @@ checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" [[package]] name = "byteorder" @@ -577,9 +577,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.35" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" +checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", @@ -629,9 +629,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", @@ -661,14 +661,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -876,9 +876,9 @@ dependencies = [ [[package]] name = "der" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" +checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" dependencies = [ "const-oid", "pem-rfc7468", @@ -1011,9 +1011,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "filetime" @@ -1190,7 +1190,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -1454,9 +1454,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -1760,9 +1760,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" @@ -1885,13 +1885,12 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libredox" -version = "0.0.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.5.0", "libc", - "redox_syscall", ] [[package]] @@ -1929,9 +1928,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lz4_flex" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ "twox-hash", ] @@ -1948,9 +1947,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "mime" @@ -2133,7 +2132,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2201,7 +2200,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2212,9 +2211,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.101" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -2345,7 +2344,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2359,9 +2358,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -2443,12 +2442,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2536,9 +2535,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -2574,9 +2573,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", @@ -2608,9 +2607,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "reqwest" @@ -2816,9 +2815,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.9.2" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ "bitflags 1.3.2", "core-foundation", @@ -2829,9 +2828,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" dependencies = [ "core-foundation-sys", "libc", @@ -2866,14 +2865,14 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", @@ -3263,9 +3262,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "subtle" @@ -3286,9 +3285,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.53" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -3371,7 +3370,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -3421,9 +3420,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -3444,7 +3443,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -3525,7 +3524,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -3694,7 +3693,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -3728,7 +3727,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4008,7 +4007,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 043eaf3d..a82ac893 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,7 +90,6 @@ sqlx = { version = "0.7", optional = true, default-features = false, features = thiserror = "1" tokio = { version = "1", default-features = false, optional = true } - [dev-dependencies] approx = "0.5.1" bytes = "1.5.0" diff --git a/js/Cargo.lock b/js/Cargo.lock index 273e2932..a0021dfe 100644 --- a/js/Cargo.lock +++ b/js/Cargo.lock @@ -391,18 +391,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] name = "async-trait" -version = "0.1.78" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -416,15 +416,15 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" dependencies = [ "addr2line", "cc", @@ -500,9 +500,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" @@ -522,9 +522,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.35" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" +checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", @@ -536,9 +536,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", @@ -568,14 +568,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -760,9 +760,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "flatbuffers" @@ -896,7 +896,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -1031,6 +1031,7 @@ dependencies = [ "wasm-streams", "web-sys", "zstd", + "zstd-sys", ] [[package]] @@ -1108,9 +1109,9 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "h2" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51ee2dd2e4f378392eeff5d51618cd9a63166a2513846bbc55f21cfacd9199d4" +checksum = "816ec7294445779408f36fe57bc5b7fc1cf59664059096c65f905c1c61f58069" dependencies = [ "bytes", "fnv", @@ -1320,9 +1321,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.5" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown", @@ -1371,9 +1372,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" @@ -1477,13 +1478,12 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libredox" -version = "0.0.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.5.0", "libc", - "redox_syscall", ] [[package]] @@ -1510,18 +1510,18 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lz4_flex" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ "twox-hash", ] [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "mime" @@ -1661,7 +1661,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -1723,7 +1723,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -1734,9 +1734,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.101" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -1852,7 +1852,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -1881,14 +1881,14 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -1964,9 +1964,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", @@ -1975,9 +1975,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", @@ -1998,15 +1998,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "reqwest" -version = "0.12.0" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58b48d98d932f4ee75e541614d32a7f44c889b72bd9c2e04d95edd135989df88" +checksum = "2d66674f2b6fb864665eea7a3c1ac4e3dfacd2fda83cf6f935a612e01b0e3338" dependencies = [ "base64 0.21.7", "bytes", @@ -2142,9 +2142,9 @@ checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" [[package]] name = "security-framework" -version = "2.9.2" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ "bitflags 1.3.2", "core-foundation", @@ -2155,9 +2155,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" dependencies = [ "core-foundation-sys", "libc", @@ -2203,14 +2203,14 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", @@ -2246,9 +2246,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snafu" @@ -2314,9 +2314,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" @@ -2331,9 +2331,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.53" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -2405,7 +2405,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2445,9 +2445,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -2467,7 +2467,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2559,7 +2559,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -2695,7 +2695,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -2729,7 +2729,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2762,7 +2762,7 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] @@ -3008,7 +3008,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.58", ] [[package]] diff --git a/js/Cargo.toml b/js/Cargo.toml index ebed2792..f34c0567 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -58,7 +58,7 @@ io_parquet_compressions = [ io_parquet_brotli = ["io_parquet", "parquet/brotli"] io_parquet_gzip = ["io_parquet", "parquet/flate2"] io_parquet_snappy = ["io_parquet", "parquet/snap"] -io_parquet_zstd = ["io_parquet", "parquet/zstd", "dep:zstd"] +io_parquet_zstd = ["io_parquet", "parquet/zstd", "dep:zstd", "dep:zstd-sys"] io_parquet_lz4 = ["io_parquet", "parquet/lz4"] # Include scalar classes @@ -104,6 +104,11 @@ zstd = { version = "*", features = [ "thin", ], default-features = false, optional = true } +# TODO: temporary to fix parquet wasm build +# upstream issue: https://github.com/gyscos/zstd-rs/issues/269 +zstd-sys = { version = "=2.0.9", optional = true, default-features = false } + + [dependencies.web-sys] version = "0.3.4" features = ['console'] diff --git a/js/src/io/flatgeobuf.rs b/js/src/io/flatgeobuf.rs index 1a5e7548..1ae43167 100644 --- a/js/src/io/flatgeobuf.rs +++ b/js/src/io/flatgeobuf.rs @@ -1,11 +1,11 @@ use std::io::Cursor; +use arrow_wasm::Table; use geoarrow::io::flatgeobuf::{read_flatgeobuf as _read_flatgeobuf, FlatGeobufReaderOptions}; // use parquet_wasm::utils::assert_parquet_file_not_empty; use wasm_bindgen::prelude::*; use crate::error::WasmResult; -use crate::table::GeoTable; /// Read a FlatGeobuf file into GeoArrow memory /// @@ -25,7 +25,7 @@ use crate::table::GeoTable; /// @param file Uint8Array containing FlatGeobuf data /// @returns Uint8Array containing Arrow data in [IPC Stream format](https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format). To parse this into an Arrow table, pass to `tableFromIPC` in the Arrow JS bindings. #[wasm_bindgen(js_name = readFlatGeobuf)] -pub fn read_flatgeobuf(file: &[u8], batch_size: Option) -> WasmResult { +pub fn read_flatgeobuf(file: &[u8], batch_size: Option) -> WasmResult { // assert_parquet_file_not_empty(parquet_file)?; let mut cursor = Cursor::new(file); let options = FlatGeobufReaderOptions { @@ -33,5 +33,6 @@ pub fn read_flatgeobuf(file: &[u8], batch_size: Option) -> WasmResult) -> WasmResult { +pub fn read_geojson(file: &[u8], batch_size: Option) -> WasmResult
{ // assert_parquet_file_not_empty(parquet_file)?; let mut cursor = Cursor::new(file); let geo_table = _read_geojson(&mut cursor, batch_size)?; - Ok(GeoTable(geo_table)) + let (schema, batches) = geo_table.into_inner(); + Ok(Table::new(schema, batches)) } diff --git a/js/src/io/parquet/async.rs b/js/src/io/parquet/async.rs index d927e9af..101199a0 100644 --- a/js/src/io/parquet/async.rs +++ b/js/src/io/parquet/async.rs @@ -1,3 +1,4 @@ +use arrow_wasm::Table; use geoarrow::array::CoordType; use geoarrow::io::parquet::ParquetDataset as _ParquetDataset; use geoarrow::io::parquet::ParquetFile as _ParquetFile; @@ -5,7 +6,6 @@ use wasm_bindgen::prelude::*; use crate::error::WasmResult; use crate::io::parquet::async_file_reader::HTTPFileReader; -use crate::table::GeoTable; #[wasm_bindgen] pub struct ParquetFile { @@ -46,18 +46,20 @@ impl ParquetFile { Ok(bbox.map(|b| b.to_vec())) } - pub async fn read(&self) -> WasmResult { + pub async fn read(&self) -> WasmResult
{ let table = self.file.read(None, None, &Default::default()).await?; - Ok(table.into()) + let (schema, batches) = table.into_inner(); + Ok(Table::new(schema, batches)) } #[wasm_bindgen(js_name = readRowGroups)] - pub async fn read_row_groups(&self, row_groups: Vec) -> WasmResult { + pub async fn read_row_groups(&self, row_groups: Vec) -> WasmResult
{ let table = self .file .read_row_groups(row_groups, &CoordType::Interleaved) .await?; - Ok(table.into()) + let (schema, batches) = table.into_inner(); + Ok(Table::new(schema, batches)) } } diff --git a/js/src/io/parquet/sync.rs b/js/src/io/parquet/sync.rs index 5e515448..3907cf31 100644 --- a/js/src/io/parquet/sync.rs +++ b/js/src/io/parquet/sync.rs @@ -1,10 +1,10 @@ +use arrow_wasm::Table; // use parquet_wasm::utils::assert_parquet_file_not_empty; use bytes::Bytes; use geoarrow::io::parquet::{read_geoparquet as _read_geoparquet, GeoParquetReaderOptions}; use wasm_bindgen::prelude::*; use crate::error::WasmResult; -use crate::table::GeoTable; /// Read a GeoParquet file into GeoArrow memory /// @@ -24,12 +24,13 @@ use crate::table::GeoTable; /// @param file Uint8Array containing GeoParquet data /// @returns Uint8Array containing Arrow data in [IPC Stream format](https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format). To parse this into an Arrow table, pass to `tableFromIPC` in the Arrow JS bindings. #[wasm_bindgen(js_name = readGeoParquet)] -pub fn read_geoparquet(file: Vec) -> WasmResult { +pub fn read_geoparquet(file: Vec) -> WasmResult
{ // assert_parquet_file_not_empty(parquet_file)?; let options = GeoParquetReaderOptions { batch_size: 65536, ..Default::default() }; let geo_table = _read_geoparquet(Bytes::from(file), options)?; - Ok(GeoTable(geo_table)) + let (schema, batches) = geo_table.into_inner(); + Ok(Table::new(schema, batches)) } diff --git a/js/src/lib.rs b/js/src/lib.rs index d5f4e572..ff447051 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -11,8 +11,6 @@ pub mod io; pub mod reproject; #[cfg(feature = "scalar")] pub mod scalar; -#[cfg(feature = "table")] -pub mod table; #[cfg(feature = "vector")] pub mod vector; // pub mod transform_origin; diff --git a/js/src/table.rs b/js/src/table.rs deleted file mode 100644 index 5b28baac..00000000 --- a/js/src/table.rs +++ /dev/null @@ -1,27 +0,0 @@ -use arrow_wasm::Table; -use wasm_bindgen::prelude::*; - -#[wasm_bindgen] -pub struct GeoTable(pub(crate) geoarrow::table::GeoTable); - -#[wasm_bindgen] -impl GeoTable { - /// Consume this GeoTable and convert into a non-spatial Arrow Table - #[wasm_bindgen(js_name = intoTable)] - pub fn into_table(self) -> Table { - let (schema, batches, _) = self.0.into_inner(); - Table::new(schema, batches) - } -} - -impl From for GeoTable { - fn from(value: geoarrow::table::GeoTable) -> Self { - Self(value) - } -} - -impl From for geoarrow::table::GeoTable { - fn from(value: GeoTable) -> Self { - value.0 - } -} diff --git a/js/tests/js/flatgeobuf.test.ts b/js/tests/js/flatgeobuf.test.ts index 8f8a2205..3c45566c 100644 --- a/js/tests/js/flatgeobuf.test.ts +++ b/js/tests/js/flatgeobuf.test.ts @@ -8,8 +8,7 @@ geoarrow.set_panic_hook(); it("read FlatGeobuf", () => { const path = "../fixtures/flatgeobuf/nz-building-outlines-small.fgb"; const buffer = new Uint8Array(readFileSync(path)); - const geoWasmTable = geoarrow.readFlatGeobuf(buffer); - const wasmTable = geoWasmTable.intoTable(); + const wasmTable = geoarrow.readFlatGeobuf(buffer); const arrowIPCBuffer = wasmTable.intoIPCStream(); const arrowJsTable = tableFromIPC(arrowIPCBuffer); const geometryIdx = arrowJsTable.schema.fields.findIndex( diff --git a/js/tests/js/geoparquet.test.ts b/js/tests/js/geoparquet.test.ts index 0df94baa..ed99c1f0 100644 --- a/js/tests/js/geoparquet.test.ts +++ b/js/tests/js/geoparquet.test.ts @@ -8,8 +8,7 @@ geoarrow.set_panic_hook(); it("read GeoParquet", () => { const path = "../fixtures/geoparquet/nybb.parquet"; const buffer = new Uint8Array(readFileSync(path)); - const geoWasmTable = geoarrow.readGeoParquet(buffer); - const wasmTable = geoWasmTable.intoTable(); + const wasmTable = geoarrow.readGeoParquet(buffer); const arrowIPCBuffer = wasmTable.intoIPCStream(); const arrowJsTable = tableFromIPC(arrowIPCBuffer); const geometryIdx = arrowJsTable.schema.fields.findIndex( diff --git a/python/core/Cargo.lock b/python/core/Cargo.lock index 611dc584..07ed18d4 100644 --- a/python/core/Cargo.lock +++ b/python/core/Cargo.lock @@ -330,7 +330,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -341,7 +341,7 @@ checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -640,9 +640,9 @@ dependencies = [ [[package]] name = "der" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" +checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" dependencies = [ "const-oid", "pem-rfc7468", @@ -903,7 +903,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -1110,9 +1110,9 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "h2" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -1129,9 +1129,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51ee2dd2e4f378392eeff5d51618cd9a63166a2513846bbc55f21cfacd9199d4" +checksum = "816ec7294445779408f36fe57bc5b7fc1cf59664059096c65f905c1c61f58069" dependencies = [ "bytes", "fnv", @@ -1341,7 +1341,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.3.25", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "httparse", @@ -1364,7 +1364,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.3", + "h2 0.4.4", "http 1.1.0", "http-body 1.0.0", "httparse", @@ -1684,9 +1684,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lz4_flex" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ "twox-hash", ] @@ -1713,9 +1713,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memoffset" @@ -1920,7 +1920,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -2007,7 +2007,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -2027,9 +2027,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.101" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -2156,7 +2156,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -2185,14 +2185,14 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -2332,7 +2332,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -2345,7 +2345,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -2481,7 +2481,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.3.25", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.28", @@ -2527,7 +2527,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.4.3", + "h2 0.4.4", "http 1.1.0", "http-body 1.0.0", "http-body-util", @@ -2794,9 +2794,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.9.2" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ "bitflags 1.3.2", "core-foundation", @@ -2807,9 +2807,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" dependencies = [ "core-foundation-sys", "libc", @@ -2844,7 +2844,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -3252,9 +3252,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.55" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -3323,7 +3323,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -3363,9 +3363,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -3386,7 +3386,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -3510,7 +3510,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -3670,7 +3670,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -3704,7 +3704,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3974,7 +3974,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.55", + "syn 2.0.58", ] [[package]] @@ -3985,27 +3985,27 @@ checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" [[package]] name = "zstd" -version = "0.13.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.1.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", "pkg-config", diff --git a/python/core/src/algorithm/native/explode.rs b/python/core/src/algorithm/native/explode.rs index f8a2a27e..3cce67d2 100644 --- a/python/core/src/algorithm/native/explode.rs +++ b/python/core/src/algorithm/native/explode.rs @@ -1,6 +1,6 @@ use crate::error::PyGeoArrowResult; use crate::table::GeoTable; -use geoarrow::algorithm::native::Explode; +use geoarrow::algorithm::native::ExplodeTable; use pyo3::prelude::*; /// Explode a table. @@ -28,6 +28,6 @@ impl GeoTable { /// Returns: /// A new table with multi-part geometries exploded to separate rows. pub fn explode(&self) -> PyGeoArrowResult { - Ok(self.0.explode()?.into()) + Ok(self.0.explode(None)?.into()) } } diff --git a/python/core/src/ffi/from_python/table.rs b/python/core/src/ffi/from_python/table.rs index bd856c24..d118e4e0 100644 --- a/python/core/src/ffi/from_python/table.rs +++ b/python/core/src/ffi/from_python/table.rs @@ -2,7 +2,7 @@ use crate::ffi::from_python::utils::import_arrow_c_stream; use crate::table::GeoTable; use arrow::ffi_stream::ArrowArrayStreamReader as ArrowRecordBatchStreamReader; use arrow_array::RecordBatchReader; -use geoarrow::datatypes::GeoDataType; +use geoarrow::algorithm::native::DowncastTable; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3::{PyAny, PyResult}; @@ -20,24 +20,29 @@ impl<'a> FromPyObject<'a> for GeoTable { batches.push(batch); } - let table = geoarrow::table::GeoTable::from_arrow(batches, schema, None, None) + let table = geoarrow::table::Table::try_new(schema, batches) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + let table = table + .downcast(true) .map_err(|e| PyValueError::new_err(e.to_string()))?; - if let Ok(data_type) = table.geometry_data_type() { - match data_type { - GeoDataType::LargeLineString(_) - | GeoDataType::LargePolygon(_) - | GeoDataType::LargeMultiPoint(_) - | GeoDataType::LargeMultiLineString(_) - | GeoDataType::LargeMultiPolygon(_) - | GeoDataType::LargeMixed(_) - | GeoDataType::LargeWKB - | GeoDataType::LargeGeometryCollection(_) => return Err(PyValueError::new_err( - "Unable to downcast from large to small offsets. Are your offsets 2^31 long?", - )), - _ => (), - } - } + // TODO: restore validation that all arrays have i32 offsets + + // if let Ok(data_type) = table.geometry_data_type() { + // match data_type { + // GeoDataType::LargeLineString(_) + // | GeoDataType::LargePolygon(_) + // | GeoDataType::LargeMultiPoint(_) + // | GeoDataType::LargeMultiLineString(_) + // | GeoDataType::LargeMultiPolygon(_) + // | GeoDataType::LargeMixed(_) + // | GeoDataType::LargeWKB + // | GeoDataType::LargeGeometryCollection(_) => return Err(PyValueError::new_err( + // "Unable to downcast from large to small offsets. Are your offsets 2^31 long?", + // )), + // _ => (), + // } + // } Ok(table.into()) } diff --git a/python/core/src/ffi/to_python/table.rs b/python/core/src/ffi/to_python/table.rs index 92bea3f0..50335c49 100644 --- a/python/core/src/ffi/to_python/table.rs +++ b/python/core/src/ffi/to_python/table.rs @@ -20,7 +20,7 @@ impl GeoTable { &self, _requested_schema: Option, ) -> PyGeoArrowResult { - let (schema, batches, _) = self.0.clone().into_inner(); + let (schema, batches) = self.0.clone().into_inner(); let record_batch_reader = Box::new(RecordBatchIterator::new( batches.into_iter().map(Ok), schema, diff --git a/python/core/src/interop/geopandas/from_geopandas.rs b/python/core/src/interop/geopandas/from_geopandas.rs index f3755653..3de680cf 100644 --- a/python/core/src/interop/geopandas/from_geopandas.rs +++ b/python/core/src/interop/geopandas/from_geopandas.rs @@ -161,7 +161,7 @@ impl GeoTable { }; Ok( - geoarrow::table::GeoTable::from_arrow_and_geometry(batches, schema, chunked_geometry)? + geoarrow::table::Table::from_arrow_and_geometry(batches, schema, chunked_geometry)? .into(), ) } diff --git a/python/core/src/interop/geopandas/to_geopandas.rs b/python/core/src/interop/geopandas/to_geopandas.rs index aa505af0..1ac5cf6f 100644 --- a/python/core/src/interop/geopandas/to_geopandas.rs +++ b/python/core/src/interop/geopandas/to_geopandas.rs @@ -47,7 +47,7 @@ impl GeoTable { let cloned_table = GeoTable(self.0.clone()); let pyarrow_table = pyarrow_mod.call_method1(intern!(py, "table"), (cloned_table,))?; - let geometry_column_index = self.0.geometry_column_index(); + let geometry_column_index = self.0.default_geometry_column_idx()?; let pyarrow_table = pyarrow_table.call_method1(intern!(py, "remove_column"), (geometry_column_index,))?; @@ -58,7 +58,7 @@ impl GeoTable { )?; let pandas_df = pyarrow_table.call_method(intern!(py, "to_pandas"), (), Some(kwargs))?; - let geometry = self.0.geometry()?; + let geometry = self.0.geometry_column(Some(geometry_column_index))?; let shapely_geometry = match geometry.data_type() { GeoDataType::Point(_) => ChunkedPointArray(geometry.as_ref().as_point().clone()) .to_shapely(py)? diff --git a/python/core/src/table/mod.rs b/python/core/src/table/mod.rs index a7f09ef8..13c167d0 100644 --- a/python/core/src/table/mod.rs +++ b/python/core/src/table/mod.rs @@ -10,7 +10,7 @@ use pyo3::prelude::*; /// /// This is similar to a GeoPandas [`GeoDataFrame`][geopandas.GeoDataFrame]. #[pyclass(module = "geoarrow.rust.core._rust")] -pub struct GeoTable(pub(crate) geoarrow::table::GeoTable); +pub struct GeoTable(pub(crate) geoarrow::table::Table); #[pymethods] impl GeoTable { @@ -20,7 +20,7 @@ impl GeoTable { /// A chunked geometry array #[getter] pub fn geometry(&self) -> PyGeoArrowResult { - let chunked_geom_arr = self.0.geometry()?; + let chunked_geom_arr = self.0.geometry_column(None)?; Python::with_gil(|py| chunked_geometry_array_to_pyobject(py, chunked_geom_arr)) } @@ -35,13 +35,13 @@ impl GeoTable { } } -impl From for GeoTable { - fn from(value: geoarrow::table::GeoTable) -> Self { +impl From for GeoTable { + fn from(value: geoarrow::table::Table) -> Self { Self(value) } } -impl From for geoarrow::table::GeoTable { +impl From for geoarrow::table::Table { fn from(value: GeoTable) -> Self { value.0 } diff --git a/src/algorithm/native/downcast.rs b/src/algorithm/native/downcast.rs index ce361227..ec07cc8b 100644 --- a/src/algorithm/native/downcast.rs +++ b/src/algorithm/native/downcast.rs @@ -1,11 +1,10 @@ -#![allow(unused_variables, dead_code)] +#![allow(unused_variables)] use std::collections::HashSet; use std::sync::Arc; -use arrow_array::{OffsetSizeTrait, RecordBatch}; +use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -use arrow_schema::Schema; use crate::algorithm::native::cast::Cast; use crate::array::offset_builder::OffsetsBuilder; @@ -14,7 +13,7 @@ use crate::array::*; use crate::chunked_array::*; use crate::datatypes::GeoDataType; use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; use crate::GeometryArrayTrait; pub trait Downcast { @@ -64,6 +63,7 @@ fn can_downcast_offsets_i32(buffer: &OffsetBuffer) -> boo /// Downcast an i64 offset buffer to i32 /// /// This copies the buffer into an i32 +#[allow(dead_code)] fn downcast_offsets(buffer: &OffsetBuffer) -> OffsetBuffer { if O::IS_LARGE { let mut builder = OffsetsBuilder::with_capacity(buffer.len_proxy()); @@ -693,51 +693,44 @@ impl Downcast for &dyn ChunkedGeometryArrayTrait { } } -impl Downcast for GeoTable { - type Output = Result; - - fn downcasted_data_type(&self, small_offsets: bool) -> GeoDataType { - self.geometry_data_type().unwrap() - } - fn downcast(&self, small_offsets: bool) -> Self::Output { - let downcasted_chunked_geometry = self.geometry()?.as_ref().downcast(small_offsets); - - let (schema, batches, geometry_column_index) = self.clone().into_inner(); +pub trait DowncastTable { + /// If possible, convert this array to a simpler and/or smaller data type + /// + /// Conversions include: + /// + /// - MultiPoint -> Point + /// - MultiLineString -> LineString + /// - MultiPolygon -> Polygon + /// - MixedGeometry -> any of the 6 concrete types + /// - GeometryCollection -> MixedGeometry or any of the 6 concrete types + /// + /// If small_offsets is `true`, it will additionally try to convert `i64` offset buffers to + /// `i32` if the offsets would not overflow. + fn downcast(&self, small_offsets: bool) -> Result
; +} - // Keep all fields except the existing geometry field - let mut new_fields = schema - .fields() +impl DowncastTable for Table { + fn downcast(&self, small_offsets: bool) -> Result
{ + let downcasted_columns = self + .geometry_column_indices() .iter() - .enumerate() - .filter_map(|(i, field)| { - if i == geometry_column_index { - None - } else { - Some(field.clone()) - } - }) - .collect::>(); - - // Add the new geometry column at the end of the new fields - new_fields.push(downcasted_chunked_geometry.extension_field()); - let new_geometry_column_index = new_fields.len() - 1; - - // Construct a new schema with the new fields - let new_schema = Arc::new(Schema::new(new_fields).with_metadata(schema.metadata.clone())); - - assert_eq!(batches.len(), downcasted_chunked_geometry.num_chunks()); - let new_batches = batches - .into_iter() - .zip(downcasted_chunked_geometry.geometry_chunks()) - .map(|(mut batch, geom_chunk)| { - batch.remove_column(geometry_column_index); - let mut columns = batch.columns().to_vec(); - columns.push(geom_chunk.to_array_ref()); - RecordBatch::try_new(new_schema.clone(), columns).unwrap() + .map(|idx| { + let geometry = self.geometry_column(Some(*idx))?; + Ok((*idx, geometry.as_ref().downcast(small_offsets))) }) - .collect(); + .collect::>>()?; + + let mut new_table = self.clone(); + + for (column_idx, column) in downcasted_columns.iter() { + let prev_field = self.schema().field(*column_idx); + let new_field = column + .data_type() + .to_field(prev_field.name(), prev_field.is_nullable()); + new_table.set_column(*column_idx, new_field.into(), column.array_refs())?; + } - GeoTable::try_new(new_schema.clone(), new_batches, new_geometry_column_index) + Ok(new_table) } } diff --git a/src/algorithm/native/explode.rs b/src/algorithm/native/explode.rs index 5af25b35..2e26ab03 100644 --- a/src/algorithm/native/explode.rs +++ b/src/algorithm/native/explode.rs @@ -11,7 +11,7 @@ use crate::chunked_array::{ }; use crate::datatypes::GeoDataType; use crate::error::{GeoArrowError, Result}; -use crate::table::GeoTable; +use crate::table::Table; use crate::GeometryArrayTrait; pub trait Explode { @@ -218,18 +218,28 @@ impl Explode for &dyn ChunkedGeometryArrayTrait { } } -impl Explode for GeoTable { - type Output = Result; +pub trait ExplodeTable { + /// Returns the exploded geometries and, if an explode needs to happen, the indices that should + /// be passed into a [`take`][arrow::compute::take] operation. + fn explode(&self, index: Option) -> Result
; +} - fn explode(&self) -> Self::Output { - let geometry_column = self.geometry()?; +impl ExplodeTable for Table { + fn explode(&self, index: Option) -> Result
{ + let index = if let Some(index) = index { + index + } else { + self.default_geometry_column_idx()? + }; + + let geometry_column = self.geometry_column(Some(index))?; let (exploded_geometry, take_indices) = geometry_column.as_ref().explode()?; // TODO: optionally use rayon? if let Some(take_indices) = take_indices { // Remove existing geometry column let mut new_table = self.clone(); - new_table.remove_column(new_table.geometry_column_index()); + new_table.remove_column(index); let field = exploded_geometry.extension_field(); @@ -263,9 +273,8 @@ impl Explode for GeoTable { let mut schema_builder = SchemaBuilder::from(new_table.schema().as_ref().clone()); schema_builder.push(field.clone()); let schema = schema_builder.finish(); - let geometry_column_index = schema.fields().len() - 1; - GeoTable::try_new(schema.into(), new_batches, geometry_column_index) + Table::try_new(schema.into(), new_batches) } else { // No take is necessary; nothing happens Ok(self.clone()) diff --git a/src/algorithm/native/mod.rs b/src/algorithm/native/mod.rs index 5632b9cc..e23ae32c 100644 --- a/src/algorithm/native/mod.rs +++ b/src/algorithm/native/mod.rs @@ -21,8 +21,8 @@ mod unary; pub use binary::Binary; pub use cast::Cast; pub use concatenate::Concatenate; -pub use downcast::Downcast; -pub use explode::Explode; +pub use downcast::{Downcast, DowncastTable}; +pub use explode::{Explode, ExplodeTable}; pub use map_chunks::MapChunks; pub use map_coords::MapCoords; pub use rechunk::Rechunk; diff --git a/src/chunked_array/chunked_array.rs b/src/chunked_array/chunked_array.rs index df26ea8e..f23c85f1 100644 --- a/src/chunked_array/chunked_array.rs +++ b/src/chunked_array/chunked_array.rs @@ -280,6 +280,8 @@ pub trait ChunkedGeometryArrayTrait: std::fmt::Debug + Send + Sync { fn num_chunks(&self) -> usize; fn as_ref(&self) -> &dyn ChunkedGeometryArrayTrait; + + fn array_refs(&self) -> Vec>; } impl ChunkedGeometryArrayTrait for ChunkedPointArray { @@ -308,6 +310,13 @@ impl ChunkedGeometryArrayTrait for ChunkedPointArray { fn as_ref(&self) -> &dyn ChunkedGeometryArrayTrait { self } + + fn array_refs(&self) -> Vec> { + self.chunks + .iter() + .map(|chunk| chunk.to_array_ref()) + .collect() + } } macro_rules! impl_trait { @@ -338,6 +347,13 @@ macro_rules! impl_trait { fn as_ref(&self) -> &dyn ChunkedGeometryArrayTrait { self } + + fn array_refs(&self) -> Vec> { + self.chunks + .iter() + .map(|chunk| chunk.to_array_ref()) + .collect() + } } }; } @@ -377,6 +393,13 @@ impl ChunkedGeometryArrayTrait for ChunkedRectArray { fn as_ref(&self) -> &dyn ChunkedGeometryArrayTrait { self } + + fn array_refs(&self) -> Vec> { + self.chunks + .iter() + .map(|chunk| chunk.to_array_ref()) + .collect() + } } /// Construct diff --git a/src/io/csv/reader.rs b/src/io/csv/reader.rs index c3ceea1d..6368e2af 100644 --- a/src/io/csv/reader.rs +++ b/src/io/csv/reader.rs @@ -6,7 +6,7 @@ use crate::array::CoordType; use crate::error::Result; use crate::io::geozero::array::MixedGeometryStreamBuilder; use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions}; -use crate::table::GeoTable; +use crate::table::Table; /// Options for the CSV reader. pub struct CSVReaderOptions { @@ -32,12 +32,12 @@ impl Default for CSVReaderOptions { } } -/// Read a CSV file to a GeoTable +/// Read a CSV file to a Table pub fn read_csv( reader: R, geometry_column_name: &str, options: CSVReaderOptions, -) -> Result { +) -> Result
{ let mut csv = CsvReader::new(geometry_column_name, reader); let table_builder_options = GeoTableBuilderOptions::new( options.coord_type, diff --git a/src/io/csv/writer.rs b/src/io/csv/writer.rs index 1b190087..403f3eca 100644 --- a/src/io/csv/writer.rs +++ b/src/io/csv/writer.rs @@ -1,11 +1,11 @@ use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; use geozero::csv::CsvWriter; use geozero::GeozeroDatasource; use std::io::Write; -/// Write a GeoTable to CSV -pub fn write_csv(table: &mut GeoTable, writer: W) -> Result<()> { +/// Write a Table to CSV +pub fn write_csv(table: &mut Table, writer: W) -> Result<()> { let mut csv_writer = CsvWriter::new(writer); table.process(&mut csv_writer)?; Ok(()) diff --git a/src/io/display/table.rs b/src/io/display/table.rs index d0d6bf38..1976325a 100644 --- a/src/io/display/table.rs +++ b/src/io/display/table.rs @@ -1,10 +1,10 @@ use std::fmt; -use crate::table::GeoTable; +use crate::table::Table; -impl fmt::Display for GeoTable { +impl fmt::Display for Table { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "GeoTable")?; + writeln!(f, "Table")?; for field in self.schema().fields() { writeln!(f, "{}: {}", field.name(), field.data_type())?; } diff --git a/src/io/flatgeobuf/reader/async.rs b/src/io/flatgeobuf/reader/async.rs index 6af638bc..ef49aa73 100644 --- a/src/io/flatgeobuf/reader/async.rs +++ b/src/io/flatgeobuf/reader/async.rs @@ -5,20 +5,20 @@ use http_range_client::AsyncBufferedHttpRangeClient; use object_store::path::Path; use object_store::ObjectStore; -use crate::algorithm::native::Downcast; +use crate::algorithm::native::DowncastTable; use crate::array::*; use crate::error::{GeoArrowError, Result}; use crate::io::flatgeobuf::reader::common::{infer_schema, FlatGeobufReaderOptions}; use crate::io::flatgeobuf::reader::object_store_reader::ObjectStoreWrapper; use crate::io::geozero::array::MixedGeometryStreamBuilder; use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions}; -use crate::table::GeoTable; +use crate::table::Table; pub async fn read_flatgeobuf_async( reader: T, location: Path, options: FlatGeobufReaderOptions, -) -> Result { +) -> Result
{ let head = reader.head(&location).await?; let object_store_wrapper = ObjectStoreWrapper { diff --git a/src/io/flatgeobuf/reader/sync.rs b/src/io/flatgeobuf/reader/sync.rs index bb3bb2a0..df6e8dde 100644 --- a/src/io/flatgeobuf/reader/sync.rs +++ b/src/io/flatgeobuf/reader/sync.rs @@ -19,22 +19,22 @@ //! the GeomProcessor conversion from geozero, after initializing buffers with a better estimate of //! the total length. -use crate::algorithm::native::Downcast; +use crate::algorithm::native::DowncastTable; use crate::array::*; use crate::error::{GeoArrowError, Result}; use crate::io::flatgeobuf::reader::common::{infer_schema, FlatGeobufReaderOptions}; use crate::io::geozero::array::MixedGeometryStreamBuilder; use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions}; -use crate::table::GeoTable; +use crate::table::Table; use flatgeobuf::{FgbReader, GeometryType}; use std::io::{Read, Seek}; use std::sync::Arc; -/// Read a FlatGeobuf file to a GeoTable +/// Read a FlatGeobuf file to a Table pub fn read_flatgeobuf( file: &mut R, options: FlatGeobufReaderOptions, -) -> Result { +) -> Result
{ let reader = FgbReader::open(file)?; let header = reader.header(); diff --git a/src/io/flatgeobuf/writer.rs b/src/io/flatgeobuf/writer.rs index d0881a4e..36c10ed6 100644 --- a/src/io/flatgeobuf/writer.rs +++ b/src/io/flatgeobuf/writer.rs @@ -4,23 +4,23 @@ use flatgeobuf::{FgbWriter, FgbWriterOptions}; use geozero::GeozeroDatasource; use crate::error::GeoArrowError; -use crate::table::GeoTable; +use crate::table::Table; -// TODO: always write CRS saved in GeoTable metadata (you can do this by adding an option) -/// Write a GeoTable to a FlatGeobuf file. +// TODO: always write CRS saved in Table metadata (you can do this by adding an option) +/// Write a Table to a FlatGeobuf file. pub fn write_flatgeobuf( - table: &mut GeoTable, + table: &mut Table, writer: W, name: &str, ) -> Result<(), GeoArrowError> { write_flatgeobuf_with_options(table, writer, name, Default::default()) } -/// Write a GeoTable to a FlatGeobuf file with specific writer options. +/// Write a Table to a FlatGeobuf file with specific writer options. /// /// Note: this `name` argument is what OGR observes as the layer name of the file. pub fn write_flatgeobuf_with_options( - table: &mut GeoTable, + table: &mut Table, writer: W, name: &str, options: FgbWriterOptions, @@ -32,9 +32,13 @@ pub fn write_flatgeobuf_with_options( Ok(()) } -fn infer_flatgeobuf_geometry_type(table: &GeoTable) -> flatgeobuf::GeometryType { +fn infer_flatgeobuf_geometry_type(table: &Table) -> flatgeobuf::GeometryType { let fields = &table.schema().fields; - let geometry_field = &fields[table.geometry_column_index()]; + if table.geometry_column_indices().len() != 1 { + panic!("Only one geometry column currently supported in FlatGeobuf writer"); + } + + let geometry_field = &fields[table.geometry_column_indices()[0]]; if let Some(extension_name) = geometry_field.metadata().get("ARROW:extension:name") { let geometry_type = match extension_name.as_str() { "geoarrow.point" => flatgeobuf::GeometryType::Point, diff --git a/src/io/gdal/reader.rs b/src/io/gdal/reader.rs index 1f2f360f..14b1257a 100644 --- a/src/io/gdal/reader.rs +++ b/src/io/gdal/reader.rs @@ -7,12 +7,12 @@ use gdal::vector::Layer; use gdal::vector::LayerAccess; use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; -/// Read a GDAL layer to a GeoTable +/// Read a GDAL layer to a Table /// /// Note that this expects GDAL 3.8 or later to propagate the CRS information correctly. -pub fn read_gdal(layer: &mut Layer, batch_size: Option) -> Result { +pub fn read_gdal(layer: &mut Layer, batch_size: Option) -> Result
{ // Instantiate an `ArrowArrayStream` for OGR to write into let mut output_stream = FFI_ArrowArrayStream::empty(); @@ -40,7 +40,7 @@ pub fn read_gdal(layer: &mut Layer, batch_size: Option) -> Result, ArrowError>>()?; - GeoTable::from_arrow(batches, schema, None, None) + Table::try_new(schema, batches) } #[cfg(test)] @@ -55,7 +55,7 @@ mod test { let dataset = Dataset::open(Path::new("fixtures/flatgeobuf/countries.fgb"))?; let mut layer = dataset.layer(0)?; let table = read_gdal(&mut layer, None)?; - dbg!(table.geometry_data_type()?); + dbg!(table.geometry_column_indices()); Ok(()) } diff --git a/src/io/geojson/reader.rs b/src/io/geojson/reader.rs index 5404ef8f..46d072fc 100644 --- a/src/io/geojson/reader.rs +++ b/src/io/geojson/reader.rs @@ -6,10 +6,10 @@ use crate::array::CoordType; use crate::error::Result; use crate::io::geozero::array::MixedGeometryStreamBuilder; use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions}; -use crate::table::GeoTable; +use crate::table::Table; -/// Read a GeoJSON file to a GeoTable. -pub fn read_geojson(reader: R, batch_size: Option) -> Result { +/// Read a GeoJSON file to a Table. +pub fn read_geojson(reader: R, batch_size: Option) -> Result
{ let mut geojson = GeoJsonReader(reader); // TODO: set CRS to epsg:4326? let options = GeoTableBuilderOptions::new( diff --git a/src/io/geojson/writer.rs b/src/io/geojson/writer.rs index d39d2d74..b497993f 100644 --- a/src/io/geojson/writer.rs +++ b/src/io/geojson/writer.rs @@ -1,13 +1,13 @@ use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; use geozero::geojson::GeoJsonWriter; use geozero::GeozeroDatasource; use std::io::Write; -/// Write a GeoTable to GeoJSON +/// Write a Table to GeoJSON /// /// Note: Does not reproject to WGS84 for you -pub fn write_geojson(table: &mut GeoTable, writer: W) -> Result<()> { +pub fn write_geojson(table: &mut Table, writer: W) -> Result<()> { let mut geojson = GeoJsonWriter::new(writer); table.process(&mut geojson)?; Ok(()) diff --git a/src/io/geojson_lines/reader.rs b/src/io/geojson_lines/reader.rs index 36d391d9..fab2ee9d 100644 --- a/src/io/geojson_lines/reader.rs +++ b/src/io/geojson_lines/reader.rs @@ -6,13 +6,13 @@ use crate::array::CoordType; use crate::error::Result; use crate::io::geozero::array::MixedGeometryStreamBuilder; use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions}; -use crate::table::GeoTable; +use crate::table::Table; /// Read a GeoJSON Lines file /// /// This expects a GeoJSON Feature on each line of a text file, with a newline character separating /// each Feature. -pub fn read_geojson_lines(reader: R, batch_size: Option) -> Result { +pub fn read_geojson_lines(reader: R, batch_size: Option) -> Result
{ let mut geojson_line_reader = GeoJsonLineReader::new(reader); // TODO: set crs to epsg:4326? diff --git a/src/io/geojson_lines/writer.rs b/src/io/geojson_lines/writer.rs index ac606753..ffbba786 100644 --- a/src/io/geojson_lines/writer.rs +++ b/src/io/geojson_lines/writer.rs @@ -3,10 +3,10 @@ use geozero::GeozeroDatasource; use std::io::Write; use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; /// Write a table to newline-delimited GeoJSON -pub fn write_geojson_lines(table: &mut GeoTable, writer: W) -> Result<()> { +pub fn write_geojson_lines(table: &mut Table, writer: W) -> Result<()> { let mut geojson_writer = GeoJsonLineWriter::new(writer); table.process(&mut geojson_writer)?; Ok(()) diff --git a/src/io/geozero/table/builder/table.rs b/src/io/geozero/table/builder/table.rs index 606dad0f..ce64e699 100644 --- a/src/io/geozero/table/builder/table.rs +++ b/src/io/geozero/table/builder/table.rs @@ -2,15 +2,16 @@ use std::mem::replace; use std::sync::Arc; use arrow_array::RecordBatch; -use arrow_schema::{Schema, SchemaBuilder}; +use arrow_schema::Schema; use geozero::{FeatureProcessor, GeomProcessor, PropertyProcessor}; -use crate::algorithm::native::Downcast; +use crate::algorithm::native::DowncastTable; use crate::array::metadata::ArrayMetadata; use crate::array::CoordType; +use crate::chunked_array::from_geoarrow_chunks; use crate::error::{GeoArrowError, Result}; use crate::io::geozero::table::builder::properties::PropertiesBatchBuilder; -use crate::table::GeoTable; +use crate::table::Table; use crate::trait_::{GeometryArrayBuilder, GeometryArrayTrait}; /// Options for creating a GeoTableBuilder. @@ -70,7 +71,7 @@ impl Default for GeoTableBuilderOptions { // TODO: // - This is schemaless, you need to validate that the schema doesn't change (maybe allow the user to pass in a schema?) and/or upcast data -/// A builder for creating a GeoTable from a row-based source. +/// A builder for creating a Table from a row-based source. pub struct GeoTableBuilder { /// The max number of rows in each batch /// @@ -189,7 +190,7 @@ impl GeoTableBuilder { Ok(()) } - pub fn finish(mut self) -> Result { + pub fn finish(mut self) -> Result
{ // If there are rows that haven't flushed yet, flush them to batches if self.geom_builder.len() > 0 { self.flush_batch()?; @@ -201,35 +202,19 @@ impl GeoTableBuilder { // TODO: validate schema compatibility of batches and geometry arrays - let batch = self.batches.first().unwrap(); - let schema = batch.schema(); + let batches = self.batches; + let schema = batches[0].schema(); + let mut table = Table::try_new(schema, batches)?; - // Set geometry column after property columns - let geometry_column_index = schema.fields().len(); - - let first_geom_arr = self.geom_arrays.first().unwrap(); - - let mut new_schema = SchemaBuilder::with_capacity(schema.fields().len() + 1); - schema - .fields() + let geom_slices = self + .geom_arrays .iter() - .for_each(|field| new_schema.push(field.clone())); - new_schema.push(first_geom_arr.extension_field()); - let new_schema = Arc::new(new_schema.finish()); - - // Need to add the geometry column onto the table - let batches = self - .batches - .into_iter() - .zip(self.geom_arrays) - .map(|(batch, geom_arr)| { - let mut columns = batch.columns().to_vec(); - columns.push(geom_arr.to_array_ref()); - Ok(RecordBatch::try_new(new_schema.clone(), columns)?) - }) - .collect::>>()?; - - let table = GeoTable::try_new(new_schema, batches, geometry_column_index)?; + .map(|chunk| chunk.as_ref()) + .collect::>(); + let geom_col = from_geoarrow_chunks(&geom_slices)?; + let geom_field = geom_col.extension_field(); + + table.append_column(geom_field, geom_col.array_refs())?; table.downcast(false) } } diff --git a/src/io/geozero/table/data_source.rs b/src/io/geozero/table/data_source.rs index f5957bd4..cf2d7c12 100644 --- a/src/io/geozero/table/data_source.rs +++ b/src/io/geozero/table/data_source.rs @@ -2,7 +2,7 @@ use crate::array::geometry::GeometryArray; use crate::io::geozero::scalar::process_geometry; -use crate::table::GeoTable; +use crate::table::Table; use crate::trait_::GeometryArrayAccessor; use arrow_array::{ BinaryArray, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, @@ -13,19 +13,23 @@ use arrow_schema::{DataType, Schema}; use geozero::error::GeozeroError; use geozero::{ColumnValue, FeatureProcessor, GeomProcessor, GeozeroDatasource, PropertyProcessor}; -impl GeozeroDatasource for GeoTable { +impl GeozeroDatasource for Table { fn process(&mut self, processor: &mut P) -> Result<(), GeozeroError> { process_geotable(self, processor) } } fn process_geotable( - table: &mut GeoTable, + table: &mut Table, processor: &mut P, ) -> Result<(), GeozeroError> { let schema = table.schema(); let batches = table.batches(); - let geometry_column_index = table.geometry_column_index(); + let geometry_column_index = table.default_geometry_column_idx().map_err(|_err| { + GeozeroError::Dataset( + "Writing through geozero not supported with multiple geometries".to_string(), + ) + })?; processor.dataset_begin(None)?; diff --git a/src/io/ipc/reader.rs b/src/io/ipc/reader.rs index f616c875..11ed2e96 100644 --- a/src/io/ipc/reader.rs +++ b/src/io/ipc/reader.rs @@ -4,20 +4,20 @@ use arrow_ipc::reader::{FileReader, StreamReader}; use arrow_schema::ArrowError; use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; /// Read into a Table from Arrow IPC (Feather v2) file. -pub fn read_ipc(reader: R) -> Result { +pub fn read_ipc(reader: R) -> Result
{ let reader = FileReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::, ArrowError>>()?; - GeoTable::from_arrow(batches, schema, None, None) + Table::try_new(schema, batches) } /// Read into a Table from Arrow IPC record batch stream. -pub fn read_ipc_stream(reader: R) -> Result { +pub fn read_ipc_stream(reader: R) -> Result
{ let reader = StreamReader::try_new(reader, None)?; let schema = reader.schema(); let batches = reader.collect::, ArrowError>>()?; - GeoTable::from_arrow(batches, schema, None, None) + Table::try_new(schema, batches) } diff --git a/src/io/ipc/writer.rs b/src/io/ipc/writer.rs index 7b39fe27..19dd657e 100644 --- a/src/io/ipc/writer.rs +++ b/src/io/ipc/writer.rs @@ -3,10 +3,10 @@ use std::io::Write; use arrow_ipc::writer::{FileWriter, StreamWriter}; use crate::error::Result; -use crate::table::GeoTable; +use crate::table::Table; -/// Write a GeoTable to an Arrow IPC (Feather v2) file -pub fn write_ipc(table: &mut GeoTable, writer: W) -> Result<()> { +/// Write a Table to an Arrow IPC (Feather v2) file +pub fn write_ipc(table: &mut Table, writer: W) -> Result<()> { let mut writer = FileWriter::try_new(writer, table.schema())?; table .batches() @@ -16,8 +16,8 @@ pub fn write_ipc(table: &mut GeoTable, writer: W) -> Result<()> { Ok(()) } -/// Write a GeoTable to an Arrow IPC stream -pub fn write_ipc_stream(table: &mut GeoTable, writer: W) -> Result<()> { +/// Write a Table to an Arrow IPC stream +pub fn write_ipc_stream(table: &mut Table, writer: W) -> Result<()> { let mut writer = StreamWriter::try_new(writer, table.schema())?; table .batches() diff --git a/src/io/parquet/reader/async.rs b/src/io/parquet/reader/async.rs index 582bccf2..57e30da3 100644 --- a/src/io/parquet/reader/async.rs +++ b/src/io/parquet/reader/async.rs @@ -5,7 +5,7 @@ use crate::io::parquet::reader::spatial_filter::{ apply_bbox_row_groups, ParquetBboxPaths, ParquetBboxStatistics, }; use crate::io::parquet::reader::GeoParquetReaderOptions; -use crate::table::GeoTable; +use crate::table::Table; use arrow_schema::SchemaRef; use futures::stream::TryStreamExt; @@ -15,11 +15,11 @@ use parquet::arrow::async_reader::{AsyncFileReader, ParquetRecordBatchStreamBuil use parquet::arrow::ProjectionMask; use serde_json::Value; -/// Asynchronously read a GeoParquet file to a GeoTable. +/// Asynchronously read a GeoParquet file to a Table. pub async fn read_geoparquet_async( reader: R, options: GeoParquetReaderOptions, -) -> Result { +) -> Result
{ let mut builder = ParquetRecordBatchStreamBuilder::new(reader) .await? .with_batch_size(options.batch_size); @@ -37,19 +37,16 @@ pub async fn read_geoparquet_async( async fn read_builder( builder: ParquetRecordBatchStreamBuilder, coord_type: &CoordType, -) -> Result { +) -> Result
{ let (arrow_schema, geometry_column_index, target_geo_data_type) = build_arrow_schema(&builder, coord_type)?; let stream = builder.build()?; let batches = stream.try_collect::<_>().await?; - GeoTable::from_arrow( - batches, - arrow_schema, - Some(geometry_column_index), - target_geo_data_type, - ) + let mut table = Table::try_new(arrow_schema, batches)?; + table.parse_geometry_to_native(geometry_column_index, target_geo_data_type)?; + Ok(table) } #[derive(Clone, Default)] @@ -244,7 +241,7 @@ impl ParquetFile { bbox: Option, bbox_paths: Option<&ParquetBboxPaths>, coord_type: &CoordType, - ) -> Result { + ) -> Result
{ let builder = self.builder(bbox, bbox_paths)?; read_builder(builder, coord_type).await } @@ -254,7 +251,7 @@ impl ParquetFile { &self, row_groups: Vec, coord_type: &CoordType, - ) -> Result { + ) -> Result
{ let builder = self .builder(None::, None)? .with_row_groups(row_groups); @@ -326,7 +323,7 @@ impl ParquetDataset { bbox: Option, bbox_paths: Option<&ParquetBboxPaths>, coord_type: &CoordType, - ) -> Result { + ) -> Result
{ let futures = self .files .iter() @@ -336,19 +333,18 @@ impl ParquetDataset { .into_iter() .collect::>>()?; - let geometry_column_index = tables[0].geometry_column_index(); let schema = tables[0].schema().clone(); let batches = tables .into_iter() .flat_map(|table| { if !table.is_empty() { - table.batches().clone() + table.batches().to_vec() } else { vec![] } }) .collect(); - GeoTable::try_new(schema, batches, geometry_column_index) + Table::try_new(schema, batches) } } diff --git a/src/io/parquet/reader/sync.rs b/src/io/parquet/reader/sync.rs index 0b44ab0e..49686975 100644 --- a/src/io/parquet/reader/sync.rs +++ b/src/io/parquet/reader/sync.rs @@ -1,16 +1,16 @@ use crate::error::Result; use crate::io::parquet::metadata::build_arrow_schema; use crate::io::parquet::GeoParquetReaderOptions; -use crate::table::GeoTable; +use crate::table::Table; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::file::reader::ChunkReader; -/// Read a GeoParquet file to a GeoTable. +/// Read a GeoParquet file to a Table. pub fn read_geoparquet( reader: R, options: GeoParquetReaderOptions, -) -> Result { +) -> Result
{ let builder = ParquetRecordBatchReaderBuilder::try_new(reader)?.with_batch_size(options.batch_size); @@ -24,12 +24,9 @@ pub fn read_geoparquet( batches.push(maybe_batch?); } - GeoTable::from_arrow( - batches, - arrow_schema, - Some(geometry_column_index), - target_geo_data_type, - ) + let mut table = Table::try_new(arrow_schema, batches)?; + table.parse_geometry_to_native(geometry_column_index, target_geo_data_type)?; + Ok(table) } #[cfg(test)] diff --git a/src/io/parquet/writer/async.rs b/src/io/parquet/writer/async.rs index 6fef3266..9a30e341 100644 --- a/src/io/parquet/writer/async.rs +++ b/src/io/parquet/writer/async.rs @@ -2,7 +2,7 @@ use crate::error::Result; use crate::io::parquet::writer::encode::encode_record_batch; use crate::io::parquet::writer::metadata::GeoParquetMetadataBuilder; use crate::io::parquet::writer::options::GeoParquetWriterOptions; -use crate::table::GeoTable; +use crate::table::Table; use arrow_array::RecordBatch; use arrow_schema::Schema; use parquet::arrow::AsyncArrowWriter; @@ -10,7 +10,7 @@ use parquet::file::metadata::KeyValue; use tokio::io::AsyncWrite; pub async fn write_geoparquet_async( - table: &mut GeoTable, + table: &mut Table, writer: W, options: &GeoParquetWriterOptions, ) -> Result<()> { diff --git a/src/io/parquet/writer/metadata.rs b/src/io/parquet/writer/metadata.rs index 0bcf0018..7623a091 100644 --- a/src/io/parquet/writer/metadata.rs +++ b/src/io/parquet/writer/metadata.rs @@ -12,7 +12,6 @@ use crate::datatypes::GeoDataType; use crate::error::{GeoArrowError, Result}; use crate::io::parquet::metadata::{GeoParquetColumnMetadata, GeoParquetMetadata}; use crate::io::parquet::writer::options::{GeoParquetWriterEncoding, GeoParquetWriterOptions}; -use crate::table::GeoTable; /// The actual encoding of the geometry in the Parquet file. /// @@ -187,35 +186,6 @@ impl GeoParquetMetadataBuilder { }) } - // TODO: now that `try_new` exists above, we can probably remove this `from_table`? - #[allow(dead_code)] - pub fn from_table(table: &GeoTable, options: &GeoParquetWriterOptions) -> Result { - let mut columns = HashMap::with_capacity(1); - - let geom_column_index = table.geometry_column_index(); - let geom_column_name = table - .schema() - .field(table.geometry_column_index()) - .name() - .clone(); - let geom_column = table.geometry()?; - let array_meta = geom_column.geometry_chunks().first().unwrap().metadata(); - let column_info = ColumnInfo::try_new( - geom_column_name, - options.encoding, - &table.geometry_data_type()?, - array_meta.as_ref().clone(), - )?; - columns.insert(geom_column_index, column_info); - - let output_schema = create_output_schema(table.schema(), &columns); - Ok(Self { - primary_column: None, - columns, - output_schema, - }) - } - #[allow(dead_code)] fn update_bounds(&mut self, bounds: &HashMap) { for (column_idx, column_bounds) in bounds.iter() { diff --git a/src/io/parquet/writer/sync.rs b/src/io/parquet/writer/sync.rs index 294d8fda..8afb2949 100644 --- a/src/io/parquet/writer/sync.rs +++ b/src/io/parquet/writer/sync.rs @@ -4,14 +4,14 @@ use crate::error::Result; use crate::io::parquet::writer::encode::encode_record_batch; use crate::io::parquet::writer::metadata::GeoParquetMetadataBuilder; use crate::io::parquet::writer::options::GeoParquetWriterOptions; -use crate::table::GeoTable; +use crate::table::Table; use arrow_array::RecordBatch; use arrow_schema::Schema; use parquet::arrow::ArrowWriter; use parquet::file::metadata::KeyValue; pub fn write_geoparquet( - table: &mut GeoTable, + table: &mut Table, writer: W, options: &GeoParquetWriterOptions, ) -> Result<()> { diff --git a/src/io/postgis/reader.rs b/src/io/postgis/reader.rs index e157da60..e452a28d 100644 --- a/src/io/postgis/reader.rs +++ b/src/io/postgis/reader.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use crate::error::{GeoArrowError, Result}; use crate::io::geozero::array::MixedGeometryStreamBuilder; use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions}; -use crate::table::GeoTable; +use crate::table::Table; use crate::trait_::GeometryArrayBuilder; /// A wrapper for an EWKB-encoded postgis geometry @@ -169,7 +169,7 @@ impl GeoTableBuilder { pub async fn read_postgis<'c, E: Executor<'c, Database = Postgres>>( executor: E, sql: &str, -) -> Result> { +) -> Result> { let query = sqlx::query::(sql); let mut result_stream = query.fetch(executor); let mut table_builder: Option>> = None; diff --git a/src/table/mod.rs b/src/table/mod.rs index 24a02687..bfcdcf42 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,15 +1,16 @@ //! Abstractions for Arrow tables. Useful for dataset IO where data will have geometries and //! attributes. +use std::ops::Deref; use std::sync::Arc; -use arrow_array::{ArrayRef, RecordBatch}; -use arrow_schema::{FieldRef, SchemaBuilder, SchemaRef}; +use arrow_array::{Array, ArrayRef, RecordBatch}; +use arrow_schema::{ArrowError, FieldRef, Schema, SchemaBuilder, SchemaRef}; -use crate::algorithm::native::Downcast; +use crate::algorithm::native::{Cast, Downcast}; use crate::array::*; +use crate::chunked_array::ChunkedArray; use crate::chunked_array::{from_arrow_chunks, from_geoarrow_chunks, ChunkedGeometryArrayTrait}; -use crate::chunked_array::{ChunkedArray, ChunkedGeometryArray}; use crate::datatypes::GeoDataType; use crate::error::{GeoArrowError, Result}; use crate::io::wkb::from_wkb; @@ -28,25 +29,34 @@ static GEOARROW_EXTENSION_NAMES: Set<&'static str> = phf_set! { "ogc.wkb", }; +/// An Arrow table that MAY contain one or more geospatial columns. +/// +/// This Table object is designed to be interoperable with non-geospatial Arrow libraries, and thus +/// does not _require_ a geometry column. #[derive(Debug, PartialEq, Clone)] -pub struct GeoTable { +pub struct Table { schema: SchemaRef, batches: Vec, - geometry_column_index: usize, } -impl GeoTable { - pub fn try_new( - schema: SchemaRef, - batches: Vec, - geometry_column_index: usize, - ) -> Result { - // TODO: validate - Ok(Self { - schema, - batches, - geometry_column_index, - }) +impl Table { + pub fn try_new(schema: SchemaRef, batches: Vec) -> Result { + for batch in batches.iter() { + // Don't check schema metadata in comparisons. + // TODO: I have some issues in the Parquet reader where the batches are missing the + // schema metadata. + if batch.schema().fields() != schema.fields() { + return Err(GeoArrowError::General(format!( + "Schema is not consistent across batches. Expected {}, got {}. With expected metadata: {:?}, got {:?}", + schema, + batch.schema(), + schema.metadata(), + batch.schema().metadata() + ))); + } + } + + Ok(Self { schema, batches }) } pub fn from_arrow_and_geometry( @@ -69,12 +79,98 @@ impl GeoTable { new_batches.push(RecordBatch::try_new(new_schema.clone(), columns)?); } - let geometry_column_index = new_schema.fields().len() - 1; - Self::try_new(new_schema, new_batches, geometry_column_index) + Self::try_new(new_schema, new_batches) + } + + /// Cast the geometry at `index` to a different data type + pub fn cast_geometry(&mut self, index: usize, to_type: &GeoDataType) -> Result<()> { + let orig_field = self.schema().field(index); + + let array_slices = self + .batches() + .iter() + .map(|batch| batch.column(index).as_ref()) + .collect::>(); + let chunked_geometry = from_arrow_chunks(array_slices.as_slice(), orig_field)?; + let casted_geometry = chunked_geometry.as_ref().cast(to_type)?; + let casted_arrays = casted_geometry.array_refs(); + let casted_field = to_type.to_field(orig_field.name(), orig_field.is_nullable()); + + self.set_column(index, casted_field.into(), casted_arrays)?; + + Ok(()) + } + + /// Parse the geometry at `index` to a GeoArrow-native type + /// + /// Use [Self::cast_geometry] if you know the target data type + pub fn parse_geometry_to_native( + &mut self, + index: usize, + target_geo_data_type: Option, + ) -> Result<()> { + let orig_field = self.schema().field(index); + + let array_slices = self + .batches() + .iter() + .map(|batch| batch.column(index).as_ref()) + .collect::>(); + let chunked_geometry = from_arrow_chunks(array_slices.as_slice(), orig_field)?; + + let target_geo_data_type = + target_geo_data_type.unwrap_or(GeoDataType::LargeMixed(Default::default())); + + // Parse WKB + let new_geometry = match chunked_geometry.data_type() { + GeoDataType::WKB => { + let parsed_chunks = chunked_geometry + .as_ref() + .as_wkb() + .chunks() + .iter() + .map(|chunk| from_wkb(chunk, target_geo_data_type, true)) + .collect::>>()?; + let parsed_chunks_refs = parsed_chunks + .iter() + .map(|chunk| chunk.as_ref()) + .collect::>(); + from_geoarrow_chunks(parsed_chunks_refs.as_slice())? + .as_ref() + .downcast(true) + } + GeoDataType::LargeWKB => { + let parsed_chunks = chunked_geometry + .as_ref() + .as_large_wkb() + .chunks() + .iter() + .map(|chunk| from_wkb(chunk, target_geo_data_type, true)) + .collect::>>()?; + let parsed_chunks_refs = parsed_chunks + .iter() + .map(|chunk| chunk.as_ref()) + .collect::>(); + from_geoarrow_chunks(parsed_chunks_refs.as_slice())? + .as_ref() + .downcast(true) + } + _ => chunked_geometry, + }; + + let new_field = new_geometry + .data_type() + .to_field(orig_field.name(), orig_field.is_nullable()); + let new_arrays = new_geometry.array_refs(); + + self.set_column(index, new_field.into(), new_arrays)?; + + Ok(()) } // Note: This function is relatively complex because we want to parse any WKB columns to // geoarrow-native arrays + #[deprecated] pub fn from_arrow( batches: Vec, schema: SchemaRef, @@ -82,9 +178,7 @@ impl GeoTable { target_geo_data_type: Option, ) -> Result { if batches.is_empty() { - // TODO: Better handling of empty tables - return Self::try_new(schema, batches, geometry_column_index.unwrap()); - // return Err(GeoArrowError::General("empty input".to_string())); + return Self::try_new(schema, batches); } let num_batches = batches.len(); @@ -174,7 +268,6 @@ impl GeoTable { new_schema.push(chunked_geometry_array.extension_field()); let new_schema = Arc::new(new_schema.finish()); - let new_geometry_column_index = new_schema.fields().len() - 1; let mut new_record_batches = Vec::with_capacity(num_batches); for (mut new_batch, geom_chunk) in new_batches @@ -185,7 +278,7 @@ impl GeoTable { new_record_batches.push(RecordBatch::try_new(new_schema.clone(), new_batch).unwrap()); } - GeoTable::try_new(new_schema, new_record_batches, new_geometry_column_index) + Table::try_new(new_schema, new_record_batches) } pub fn len(&self) -> usize { @@ -196,24 +289,83 @@ impl GeoTable { self.len() == 0 } - pub fn into_inner(self) -> (SchemaRef, Vec, usize) { - (self.schema, self.batches, self.geometry_column_index) + pub fn into_inner(self) -> (SchemaRef, Vec) { + (self.schema, self.batches) } pub fn schema(&self) -> &SchemaRef { &self.schema } - pub fn batches(&self) -> &Vec { + pub fn batches(&self) -> &[RecordBatch] { &self.batches } - pub fn geometry_column_index(&self) -> usize { - self.geometry_column_index + /// Find the indices of all geometry columns in this table. + /// + /// This may be an empty Vec if the table contains no geometry columns, or a vec with more than + /// one element if the table contains multiple tagged geometry columns. + pub fn geometry_column_indices(&self) -> Vec { + let mut geom_indices = vec![]; + for (field_idx, field) in self.schema().fields().iter().enumerate() { + let meta = field.metadata(); + if let Some(ext_name) = meta.get("ARROW:extension:name") { + if GEOARROW_EXTENSION_NAMES.contains(ext_name.as_str()) { + geom_indices.push(field_idx); + } + } + } + geom_indices } - pub fn geometry_data_type(&self) -> Result { - Ok(*self.geometry()?.data_type()) + pub fn default_geometry_column_idx(&self) -> Result { + let geom_col_indices = self.geometry_column_indices(); + if geom_col_indices.len() != 1 { + Err(GeoArrowError::General( + "Cannot use default geometry column when multiple geometry columns exist in table" + .to_string(), + )) + } else { + Ok(geom_col_indices[0]) + } + } + + /// Access the geometry chunked array at the provided column index. + pub fn geometry_column( + &self, + index: Option, + ) -> Result> { + let index = if let Some(index) = index { + index + } else { + let geom_indices = self.geometry_column_indices(); + if geom_indices.len() == 1 { + geom_indices[0] + } else { + return Err(GeoArrowError::General( + "`index` must be provided when multiple geometry columns exist.".to_string(), + )); + } + }; + + let field = self.schema.field(index); + let array_refs = self + .batches + .iter() + .map(|batch| batch.column(index).as_ref()) + .collect::>(); + from_arrow_chunks(array_refs.as_slice(), field) + } + + /// Access all geometry chunked arrays from the table. + /// + /// This may return an empty `Vec` if there are no geometry columns in the table, or may return + /// more than one element if there are multiple geometry columns. + pub fn geometry_columns(&self) -> Result>> { + self.geometry_column_indices() + .into_iter() + .map(|index| self.geometry_column(Some(index))) + .collect() } /// The number of columns in this table. @@ -221,6 +373,37 @@ impl GeoTable { self.schema.fields().len() } + /// Replace the column at index `i` with the given field and arrays. + pub fn set_column( + &mut self, + i: usize, + field: FieldRef, + column: Vec>, + ) -> Result<()> { + let mut fields = self.schema().fields().deref().to_vec(); + fields[i] = field; + let schema = Arc::new(Schema::new_with_metadata( + fields, + self.schema().metadata().clone(), + )); + + let batches = self + .batches + .iter() + .zip(column) + .map(|(batch, array)| { + let mut arrays = batch.columns().to_vec(); + arrays[i] = array; + RecordBatch::try_new(schema.clone(), arrays) + }) + .collect::, ArrowError>>()?; + + self.schema = schema; + self.batches = batches; + + Ok(()) + } + pub(crate) fn remove_column(&mut self, i: usize) -> ChunkedArray { // NOTE: remove_column drops schema metadata as of // https://github.com/apache/arrow-rs/issues/5327 @@ -237,18 +420,13 @@ impl GeoTable { ChunkedArray::new(removed_chunks) } - #[allow(dead_code)] - pub(crate) fn append_column( - &mut self, - field: FieldRef, - column: ChunkedArray, - ) -> Result { - assert_eq!(self.batches().len(), column.chunks().len()); + pub fn append_column(&mut self, field: FieldRef, column: Vec>) -> Result { + assert_eq!(self.batches().len(), column.len()); let new_batches = self .batches .iter_mut() - .zip(column.chunks) + .zip(column) .map(|(batch, array)| { let mut schema_builder = SchemaBuilder::from(batch.schema().as_ref().clone()); schema_builder.push(field.clone()); @@ -271,145 +449,4 @@ impl GeoTable { Ok(self.schema.fields().len() - 1) } - - /// Access the geometry column of the table - pub fn geometry(&self) -> Result> { - let field = self.schema.field(self.geometry_column_index); - let array_refs = self - .batches - .iter() - .map(|batch| batch.column(self.geometry_column_index)) - .collect::>(); - let geo_data_type = GeoDataType::try_from(field)?; - match geo_data_type { - GeoDataType::Point(_) => { - let chunks: Result> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LineString(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeLineString(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::Polygon(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargePolygon(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::MultiPoint(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeMultiPoint(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::MultiLineString(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeMultiLineString(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::MultiPolygon(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeMultiPolygon(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::Mixed(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeMixed(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::GeometryCollection(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeGeometryCollection(_) => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::WKB => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::LargeWKB => { - let chunks: Result>> = array_refs - .into_iter() - .map(|arr| arr.as_ref().try_into()) - .collect(); - Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - GeoDataType::Rect => { - // tryfrom not implemented for RectArray - todo!() - // let chunks: Result> = array_refs - // .into_iter() - // .map(|arr| arr.as_ref().try_into()) - // .collect(); - // Ok(Arc::new(ChunkedGeometryArray::new(chunks?))) - } - } - } } diff --git a/src/test/point.rs b/src/test/point.rs index 1ede971e..916a8a5b 100644 --- a/src/test/point.rs +++ b/src/test/point.rs @@ -5,7 +5,7 @@ use arrow_schema::{DataType, Field, Schema}; use geo::{point, Point}; use crate::array::PointArray; -use crate::table::GeoTable; +use crate::table::Table; use crate::test::properties; use crate::GeometryArrayTrait; @@ -31,7 +31,7 @@ pub(crate) fn point_array() -> PointArray { vec![p0(), p1(), p2()].as_slice().into() } -pub(crate) fn table() -> GeoTable { +pub(crate) fn table() -> Table { let point_array = point_array(); let u8_array = properties::u8_array(); let string_array = properties::string_array(); @@ -53,5 +53,5 @@ pub(crate) fn table() -> GeoTable { ) .unwrap(); - GeoTable::try_new(schema, vec![batch], 2).unwrap() + Table::try_new(schema, vec![batch]).unwrap() }