From a0563412d1e9d4785209b46084dce6e893f26ff8 Mon Sep 17 00:00:00 2001 From: garrettladley Date: Tue, 20 Feb 2024 00:57:42 -0500 Subject: [PATCH 1/4] scrapper mvp done | TODO: testing, CI, README, flag in main.go that pipes in SAC Super Club UUID --- scraper/.gitignore | 1 + scraper/clubs/Cargo.lock | 1472 +++++++++++++++++++++ scraper/clubs/Cargo.toml | 24 + scraper/clubs/src/cli.rs | 17 + scraper/clubs/src/domain/category_tag.rs | 165 +++ scraper/clubs/src/domain/club.rs | 64 + scraper/clubs/src/domain/mod.rs | 2 + scraper/clubs/src/dumper/mod.rs | 1 + scraper/clubs/src/dumper/sql.rs | 36 + scraper/clubs/src/lib.rs | 4 + scraper/clubs/src/main.rs | 32 + scraper/clubs/src/scraper/mod.rs | 3 + scraper/clubs/src/scraper/scraped_club.rs | 58 + 13 files changed, 1879 insertions(+) create mode 100644 scraper/.gitignore create mode 100644 scraper/clubs/Cargo.lock create mode 100644 scraper/clubs/Cargo.toml create mode 100644 scraper/clubs/src/cli.rs create mode 100644 scraper/clubs/src/domain/category_tag.rs create mode 100644 scraper/clubs/src/domain/club.rs create mode 100644 scraper/clubs/src/domain/mod.rs create mode 100644 scraper/clubs/src/dumper/mod.rs create mode 100644 scraper/clubs/src/dumper/sql.rs create mode 100644 scraper/clubs/src/lib.rs create mode 100644 scraper/clubs/src/main.rs create mode 100644 scraper/clubs/src/scraper/mod.rs create mode 100644 scraper/clubs/src/scraper/scraped_club.rs diff --git a/scraper/.gitignore b/scraper/.gitignore new file mode 100644 index 000000000..2f7896d1d --- /dev/null +++ b/scraper/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/scraper/clubs/Cargo.lock b/scraper/clubs/Cargo.lock new file mode 100644 index 000000000..2e130b5b8 --- /dev/null +++ b/scraper/clubs/Cargo.lock @@ -0,0 +1,1472 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "bumpalo" +version = "3.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d32a994c2b3ca201d9b263612a374263f05e7adde37c4707f693dcd375076d1f" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.0", +] + +[[package]] +name = "clap" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", +] + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "h2" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" + +[[package]] +name = "http" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "js-sys" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "lipsum" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c5e9ef2d2ad6fe67a59ace27c203c8d3a71d195532ee82e3bbe0d5f9a9ca541" +dependencies = [ + "rand", + "rand_chacha", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.4.2", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae94056a791d0e1217d18b6cbdccb02c61e3054fc69893607f4067e3bb0b1fd1" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "reqwest" +version = "0.11.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags 2.4.2", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "sac_scraper" +version = "0.1.0" +dependencies = [ + "chrono", + "clap", + "lipsum", + "once_cell", + "rand", + "reqwest", + "serde", + "serde_json", + "strum", + "strum_macros", + "tokio", + "url", + "uuid", + "voca_rs", +] + +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "socket2" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "stfu8" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978" + +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + +[[package]] +name = "strum" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" + +[[package]] +name = "strum_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "voca_rs" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e44efbf25e32768d5ecd22244feacc3d3b3eca72d318f5ef0a4764c2c158e18" +dependencies = [ + "regex", + "stfu8", + "unicode-segmentation", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877b9c3f61ceea0e56331985743b13f3d25c406a7098d45180fb5f09bc19ed97" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" + +[[package]] +name = "web-sys" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] diff --git a/scraper/clubs/Cargo.toml b/scraper/clubs/Cargo.toml new file mode 100644 index 000000000..93415c04c --- /dev/null +++ b/scraper/clubs/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "sac_club_scraper" +version = "0.1.0" +edition = "2021" +authors = ["Garrett Ladley"] + + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.4", features = ["derive"] } +strum = "0.26" +strum_macros = "0.26" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +url = "2.5.0" +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } +reqwest = "0.11.24" +voca_rs = "1.15.2" +rand = "0.8.5" +once_cell = "1.8.0" +lipsum = "0.9.0" +chrono = "0.4.19" +uuid = "1.7.0" diff --git a/scraper/clubs/src/cli.rs b/scraper/clubs/src/cli.rs new file mode 100644 index 000000000..7e3b15193 --- /dev/null +++ b/scraper/clubs/src/cli.rs @@ -0,0 +1,17 @@ +use std::path::PathBuf; + +use clap::Parser; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct Args { + /// Top N results to parse + #[clap(short, long, default_value = "2")] + pub top_n: usize, + /// Output file + #[clap(short, long, default_value = "mock.sql")] + pub output: PathBuf, + /// Parent club UUID + #[clap(short, long, default_value = "00000000-0000-0000-0000-000000000000")] + pub parent: uuid::Uuid, +} diff --git a/scraper/clubs/src/domain/category_tag.rs b/scraper/clubs/src/domain/category_tag.rs new file mode 100644 index 000000000..1954211d3 --- /dev/null +++ b/scraper/clubs/src/domain/category_tag.rs @@ -0,0 +1,165 @@ +use once_cell::sync::Lazy; +use strum::IntoEnumIterator; +use strum_macros::Display; +use strum_macros::EnumIter; + +#[derive(Debug)] +pub struct Tag { + pub category: Category, + pub name: String, +} + +#[derive(Debug)] +pub enum Category { + PreProfessional, + CulturalAndIdentity, + ArtsAndCreativity, + SportsAndRecreation, + ScienceAndTechnology, + CommunityServiceAndAdvocacy, + MediaAndCommunication, +} + +fn tags_for_category() -> Vec { + T::iter() + .map(|item| Tag { + category: T::category(), + name: item.to_string(), + }) + .collect() +} + +pub static TAGS: Lazy> = Lazy::new(|| { + let mut tags = Vec::new(); + + tags.extend(tags_for_category::()); + tags.extend(tags_for_category::()); + tags.extend(tags_for_category::()); + tags.extend(tags_for_category::()); + tags.extend(tags_for_category::()); + tags.extend(tags_for_category::()); + tags.extend(tags_for_category::()); + + tags +}); + +trait TagCategory { + fn category() -> Category; +} + +#[derive(EnumIter, Display)] +pub enum PreProfessional { + Premed, + Prelaw, + Other, +} + +impl TagCategory for PreProfessional { + fn category() -> Category { + Category::PreProfessional + } +} + +#[derive(EnumIter, Display)] +pub enum CulturalAndIdentity { + Judaism, + Christianity, + Hinduism, + Islam, + LatinAmerica, + AfricanAmerican, + AsianAmerican, + LGBTQ, + Other, +} + +impl TagCategory for CulturalAndIdentity { + fn category() -> Category { + Category::CulturalAndIdentity + } +} + +#[derive(EnumIter, Display)] +pub enum ArtsAndCreativity { + PerformingArts, + VisualArts, + CreativeWriting, + Music, + Other, +} + +impl TagCategory for ArtsAndCreativity { + fn category() -> Category { + Category::ArtsAndCreativity + } +} + +#[derive(EnumIter, Display)] +pub enum SportsAndRecreation { + Soccer, + Hiking, + Climbing, + Lacrosse, + Other, +} + +impl TagCategory for SportsAndRecreation { + fn category() -> Category { + Category::SportsAndRecreation + } +} + +#[derive(EnumIter, Display)] +pub enum ScienceAndTechnology { + Mathematics, + Physics, + Biology, + Chemistry, + EnvironmentalScience, + Geology, + Neuroscience, + Psychology, + SoftwareEngineering, + ArtificialIntelligence, + DataScience, + MechanicalEngineering, + ElectricalEngineering, + IndustrialEngineering, + Other, +} + +impl TagCategory for ScienceAndTechnology { + fn category() -> Category { + Category::ScienceAndTechnology + } +} + +#[derive(EnumIter, Display)] +pub enum CommunityServiceAndAdvocacy { + Volunteerism, + EnvironmentalAdvocacy, + HumanRights, + CommunityOutreach, + Other, +} + +impl TagCategory for CommunityServiceAndAdvocacy { + fn category() -> Category { + Category::CommunityServiceAndAdvocacy + } +} + +#[derive(EnumIter, Display)] +pub enum MediaAndCommunication { + Journalism, + Broadcasting, + Film, + PublicRelations, + Other, +} + +impl TagCategory for MediaAndCommunication { + fn category() -> Category { + Category::MediaAndCommunication + } +} diff --git a/scraper/clubs/src/domain/club.rs b/scraper/clubs/src/domain/club.rs new file mode 100644 index 000000000..f5a9ccb9f --- /dev/null +++ b/scraper/clubs/src/domain/club.rs @@ -0,0 +1,64 @@ +use rand::thread_rng; +use strum::IntoEnumIterator; +use strum_macros::{Display, EnumIter}; + +use rand::{seq::IteratorRandom, Rng}; + +use voca_rs::Voca; + +use crate::scraper::ScrapedClub; + +use crate::domain::category_tag::{Tag, TAGS}; + +#[derive(Debug, PartialEq, EnumIter, Display)] +pub enum RecruitmentCycle { + #[strum(serialize = "fall")] + Fall, + #[strum(serialize = "spring")] + Spring, + #[strum(serialize = "fallSpring")] + FallSpring, + #[strum(serialize = "always")] + Always, +} + +#[derive(Debug, PartialEq, EnumIter, Display)] +pub enum RecruitmentType { + #[strum(serialize = "unrestricted")] + Unrestricted, + #[strum(serialize = "application")] + Tryout, + #[strum(serialize = "application")] + Application, +} + +#[derive(Debug)] +pub struct Club<'a> { + pub name: &'a String, + pub preview: &'a String, + pub description: String, + pub num_members: usize, + pub is_recruiting: bool, + pub recruitment_cycle: RecruitmentCycle, + pub recruitment_type: RecruitmentType, + pub tags: Vec<&'a Tag>, +} + +impl<'a> From<&'a ScrapedClub> for Club<'a> { + fn from(scraped: &'a ScrapedClub) -> Self { + let mut rng = thread_rng(); + + let num_tags = rng.gen_range(1..8); + + Club { + name: &scraped.name, + preview: &scraped.preview, + description: scraped.description._strip_tags().replace(" ", " "), + num_members: rng.gen_range(1..1024), + is_recruiting: rng.gen_bool(0.5), + recruitment_cycle: RecruitmentCycle::iter().choose(&mut rng).unwrap(), + recruitment_type: RecruitmentType::iter().choose(&mut rng).unwrap(), + tags: TAGS.iter().choose_multiple(&mut rng, num_tags), + } + } +} diff --git a/scraper/clubs/src/domain/mod.rs b/scraper/clubs/src/domain/mod.rs new file mode 100644 index 000000000..30ad97579 --- /dev/null +++ b/scraper/clubs/src/domain/mod.rs @@ -0,0 +1,2 @@ +pub mod category_tag; +pub mod club; diff --git a/scraper/clubs/src/dumper/mod.rs b/scraper/clubs/src/dumper/mod.rs new file mode 100644 index 000000000..2752f636b --- /dev/null +++ b/scraper/clubs/src/dumper/mod.rs @@ -0,0 +1 @@ +pub mod sql; diff --git a/scraper/clubs/src/dumper/sql.rs b/scraper/clubs/src/dumper/sql.rs new file mode 100644 index 000000000..d32a0fb1f --- /dev/null +++ b/scraper/clubs/src/dumper/sql.rs @@ -0,0 +1,36 @@ +use chrono::Local; +use std::{error::Error, fs::File, io::Write, path::PathBuf}; + +use crate::domain::club::Club; + +pub fn dump(clubs: Vec, path: PathBuf, parent: uuid::Uuid) -> Result<(), Box> { + let mut file = File::create(path)?; + + writeln!(file, "-- AUTOGENERATED MOCK DATA, DO NOT MODIFY")?; + writeln!( + file, + "-- GENERATED AT {}", + Local::now().format("%Y-%m-%d %H:%M:%S") + )?; + + writeln!(file, "BEGIN;\n")?; + + for club in clubs { + writeln!( + file, + r#"INSERT INTO "clubs" ("name", "preview", "description", "num_members", "is_recruiting", "recruitment_cycle", "recruitment_type", "parent", ) VALUES ('{}', '{}', '{}', {}, {}, '{}', '{}', {});"#, + club.name.replace('\'', "''"), + club.preview.replace('\'', "''"), + club.description.replace('\'', "''"), + club.num_members, + club.is_recruiting, + club.recruitment_cycle, + club.recruitment_type, + parent, + )?; + } + + writeln!(file, "\nCOMMIT;")?; + + Ok(()) +} diff --git a/scraper/clubs/src/lib.rs b/scraper/clubs/src/lib.rs new file mode 100644 index 000000000..8a9b6d544 --- /dev/null +++ b/scraper/clubs/src/lib.rs @@ -0,0 +1,4 @@ +pub mod cli; +pub mod domain; +pub mod dumper; +pub mod scraper; diff --git a/scraper/clubs/src/main.rs b/scraper/clubs/src/main.rs new file mode 100644 index 000000000..15e5e0507 --- /dev/null +++ b/scraper/clubs/src/main.rs @@ -0,0 +1,32 @@ +use std::process::exit; + +use clap::Parser; +use sac_scraper::{cli::Args, domain::club::Club, dumper::sql::dump}; + +use reqwest::Error; +use sac_scraper::scraper::ClubsResponse; + +#[tokio::main] +async fn main() -> Result<(), Error> { + let args = Args::parse(); + + let response = reqwest::get(format!("https://neu.campuslabs.com/engage/api/discovery/search/organizations?orderBy[0]=UpperName%20asc&top={}&skip=0", args.top_n)).await?; + + if !response.status().is_success() { + println!("Request failed with status: {}", response.status()); + exit(1); + } + + let body = response.text().await?; + + let response: ClubsResponse = serde_json::from_str(&body).expect("Failed to deserialize"); + + dump( + response.scraped_clubs.iter().map(Club::from).collect(), + args.output, + args.parent, + ) + .expect("Failed to dump"); + + Ok(()) +} diff --git a/scraper/clubs/src/scraper/mod.rs b/scraper/clubs/src/scraper/mod.rs new file mode 100644 index 000000000..63dd30715 --- /dev/null +++ b/scraper/clubs/src/scraper/mod.rs @@ -0,0 +1,3 @@ +pub mod scraped_club; + +pub use scraped_club::{ClubsResponse, ScrapedClub}; diff --git a/scraper/clubs/src/scraper/scraped_club.rs b/scraper/clubs/src/scraper/scraped_club.rs new file mode 100644 index 000000000..a9e9eccce --- /dev/null +++ b/scraper/clubs/src/scraper/scraped_club.rs @@ -0,0 +1,58 @@ +use std::fmt; + +use rand::{thread_rng, Rng}; +use serde::{ + de::{self, Visitor}, + Deserializer, +}; + +use lipsum::lipsum; + +#[derive(Debug, serde::Deserialize)] +pub struct ClubsResponse { + #[serde(rename = "value")] + pub scraped_clubs: Vec, +} + +#[derive(Debug, serde::Deserialize)] +pub struct ScrapedClub { + #[serde(rename = "Name", deserialize_with = "deserialize_with_lipsum")] + pub name: String, + #[serde(rename = "Summary", deserialize_with = "deserialize_with_lipsum")] + pub preview: String, + #[serde(rename = "Description", deserialize_with = "deserialize_with_lipsum")] + pub description: String, +} + +fn deserialize_with_lipsum<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + struct StringOrLipsum; + + impl<'de> Visitor<'de> for StringOrLipsum { + type Value = String; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string or null for a lipsum replacement") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + Ok(value.to_owned()) + } + + fn visit_unit(self) -> Result + where + E: de::Error, + { + let mut rng = thread_rng(); + + Ok(lipsum(rng.gen_range(16..128))) + } + } + + deserializer.deserialize_any(StringOrLipsum) +} From 1101ab8282fe2e3bffc94fa2d758037730fcc8b9 Mon Sep 17 00:00:00 2001 From: garrettladley Date: Tue, 20 Feb 2024 01:01:20 -0500 Subject: [PATCH 2/4] bump default num_clubs | append if file exists --- scraper/clubs/src/cli.rs | 2 +- scraper/clubs/src/dumper/sql.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scraper/clubs/src/cli.rs b/scraper/clubs/src/cli.rs index 7e3b15193..bb9af8885 100644 --- a/scraper/clubs/src/cli.rs +++ b/scraper/clubs/src/cli.rs @@ -6,7 +6,7 @@ use clap::Parser; #[command(author, version, about, long_about = None)] pub struct Args { /// Top N results to parse - #[clap(short, long, default_value = "2")] + #[clap(short, long, default_value = "1024")] pub top_n: usize, /// Output file #[clap(short, long, default_value = "mock.sql")] diff --git a/scraper/clubs/src/dumper/sql.rs b/scraper/clubs/src/dumper/sql.rs index d32a0fb1f..e7534f227 100644 --- a/scraper/clubs/src/dumper/sql.rs +++ b/scraper/clubs/src/dumper/sql.rs @@ -4,7 +4,7 @@ use std::{error::Error, fs::File, io::Write, path::PathBuf}; use crate::domain::club::Club; pub fn dump(clubs: Vec, path: PathBuf, parent: uuid::Uuid) -> Result<(), Box> { - let mut file = File::create(path)?; + let mut file = OpenOptions::new().create(true).append(true).open(&path)?; writeln!(file, "-- AUTOGENERATED MOCK DATA, DO NOT MODIFY")?; writeln!( From 86b6afac9199f660d251b6faf4ccf7be4b6b9cbb Mon Sep 17 00:00:00 2001 From: garrettladley Date: Tue, 20 Feb 2024 18:30:19 -0500 Subject: [PATCH 3/4] final version (?) --- scraper/clubs/Cargo.lock | 5 +- scraper/clubs/Cargo.toml | 2 +- scraper/clubs/README.md | 19 +++ scraper/clubs/src/domain/category.rs | 69 ++++++++++ scraper/clubs/src/domain/category_tag.rs | 165 ----------------------- scraper/clubs/src/domain/club.rs | 16 +-- scraper/clubs/src/domain/mod.rs | 6 +- scraper/clubs/src/domain/tag.rs | 114 ++++++++++++++++ scraper/clubs/src/dumper/category.rs | 14 ++ scraper/clubs/src/dumper/club.rs | 22 +++ scraper/clubs/src/dumper/dump.rs | 38 ++++++ scraper/clubs/src/dumper/mod.rs | 5 +- scraper/clubs/src/dumper/sql.rs | 36 ----- scraper/clubs/src/dumper/tag.rs | 15 +++ scraper/clubs/src/main.rs | 28 ++-- 15 files changed, 332 insertions(+), 222 deletions(-) create mode 100644 scraper/clubs/README.md create mode 100644 scraper/clubs/src/domain/category.rs delete mode 100644 scraper/clubs/src/domain/category_tag.rs create mode 100644 scraper/clubs/src/domain/tag.rs create mode 100644 scraper/clubs/src/dumper/category.rs create mode 100644 scraper/clubs/src/dumper/club.rs create mode 100644 scraper/clubs/src/dumper/dump.rs delete mode 100644 scraper/clubs/src/dumper/sql.rs create mode 100644 scraper/clubs/src/dumper/tag.rs diff --git a/scraper/clubs/Cargo.lock b/scraper/clubs/Cargo.lock index 2e130b5b8..1e857ee9d 100644 --- a/scraper/clubs/Cargo.lock +++ b/scraper/clubs/Cargo.lock @@ -870,7 +870,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] -name = "sac_scraper" +name = "sac_club_scraper" version = "0.1.0" dependencies = [ "chrono", @@ -1211,6 +1211,9 @@ name = "uuid" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +dependencies = [ + "getrandom", +] [[package]] name = "vcpkg" diff --git a/scraper/clubs/Cargo.toml b/scraper/clubs/Cargo.toml index 93415c04c..ce2e2bb1b 100644 --- a/scraper/clubs/Cargo.toml +++ b/scraper/clubs/Cargo.toml @@ -21,4 +21,4 @@ rand = "0.8.5" once_cell = "1.8.0" lipsum = "0.9.0" chrono = "0.4.19" -uuid = "1.7.0" +uuid = { version = "1.7.0", features = ["v4"] } diff --git a/scraper/clubs/README.md b/scraper/clubs/README.md new file mode 100644 index 000000000..ee9c66b32 --- /dev/null +++ b/scraper/clubs/README.md @@ -0,0 +1,19 @@ +# SAC Club Scraper + +A Rust CLI that scrapes Clubs from to be used as mock data for natural language search. + +> [!NOTE] +> It is assumed you have Rust installed on your machine. If not, you can install it [here](https://www.rust-lang.org/tools/install). + +## Usage + +```console +Usage: sac_club_scraper [OPTIONS] + +Options: + -t, --top-n Top N results to parse [default: 1024] + -o, --output Output file [default: mock.sql] + -p, --parent Parent club UUID [default: 00000000-0000-0000-0000-000000000000] + -h, --help Print help + -V, --version Print version +``` diff --git a/scraper/clubs/src/domain/category.rs b/scraper/clubs/src/domain/category.rs new file mode 100644 index 000000000..702f021ff --- /dev/null +++ b/scraper/clubs/src/domain/category.rs @@ -0,0 +1,69 @@ +use once_cell::sync::Lazy; +use strum::IntoEnumIterator; +use strum_macros::{Display, EnumIter}; + +use crate::domain::tag::{ + ArtsAndCreativityTag, CommunityServiceAndAdvocacyTag, CulturalAndIdentityTag, + MediaAndCommunicationTag, PreProfessionalTag, ScienceAndTechnologyTag, SportsAndRecreationTag, + Tag, +}; + +#[derive(Debug, EnumIter, Display)] +pub enum CategoryExample { + PreProfessional(PreProfessionalTag), + CulturalAndIdentity(CulturalAndIdentityTag), + ArtsAndCreativity(ArtsAndCreativityTag), + SportsAndRecreation(SportsAndRecreationTag), + ScienceAndTechnology(ScienceAndTechnologyTag), + CommunityServiceAndAdvocacy(CommunityServiceAndAdvocacyTag), + MediaAndCommunication(MediaAndCommunicationTag), +} + +#[derive(Debug)] +pub struct Category { + pub id: uuid::Uuid, + pub category: CategoryExample, + pub tags: Vec, +} + +macro_rules! match_category { + ($expr:expr, $id:expr, $( $variant:ident => $type:ty ),*) => {{ + fn create_tags_for_category( + category_id: uuid::Uuid, + ) -> Vec { + T::iter() + .map(|tag| Tag::new(&tag.to_string(), category_id)) + .collect() + } + + match $expr { + $( + CategoryExample::$variant(_) => create_tags_for_category::<$type>($id), + )* + } + }}; +} + +pub static CATEGORIES: Lazy> = Lazy::new(|| { + CategoryExample::iter() + .map(|category_example| { + let id = uuid::Uuid::new_v4(); + + let tags = match_category! { category_example, id, + PreProfessional => PreProfessionalTag, + CulturalAndIdentity => CulturalAndIdentityTag, + ArtsAndCreativity => ArtsAndCreativityTag, + SportsAndRecreation => SportsAndRecreationTag, + ScienceAndTechnology => ScienceAndTechnologyTag, + CommunityServiceAndAdvocacy => CommunityServiceAndAdvocacyTag, + MediaAndCommunication => MediaAndCommunicationTag + }; + + Category { + id, + category: category_example, + tags, + } + }) + .collect() +}); diff --git a/scraper/clubs/src/domain/category_tag.rs b/scraper/clubs/src/domain/category_tag.rs deleted file mode 100644 index 1954211d3..000000000 --- a/scraper/clubs/src/domain/category_tag.rs +++ /dev/null @@ -1,165 +0,0 @@ -use once_cell::sync::Lazy; -use strum::IntoEnumIterator; -use strum_macros::Display; -use strum_macros::EnumIter; - -#[derive(Debug)] -pub struct Tag { - pub category: Category, - pub name: String, -} - -#[derive(Debug)] -pub enum Category { - PreProfessional, - CulturalAndIdentity, - ArtsAndCreativity, - SportsAndRecreation, - ScienceAndTechnology, - CommunityServiceAndAdvocacy, - MediaAndCommunication, -} - -fn tags_for_category() -> Vec { - T::iter() - .map(|item| Tag { - category: T::category(), - name: item.to_string(), - }) - .collect() -} - -pub static TAGS: Lazy> = Lazy::new(|| { - let mut tags = Vec::new(); - - tags.extend(tags_for_category::()); - tags.extend(tags_for_category::()); - tags.extend(tags_for_category::()); - tags.extend(tags_for_category::()); - tags.extend(tags_for_category::()); - tags.extend(tags_for_category::()); - tags.extend(tags_for_category::()); - - tags -}); - -trait TagCategory { - fn category() -> Category; -} - -#[derive(EnumIter, Display)] -pub enum PreProfessional { - Premed, - Prelaw, - Other, -} - -impl TagCategory for PreProfessional { - fn category() -> Category { - Category::PreProfessional - } -} - -#[derive(EnumIter, Display)] -pub enum CulturalAndIdentity { - Judaism, - Christianity, - Hinduism, - Islam, - LatinAmerica, - AfricanAmerican, - AsianAmerican, - LGBTQ, - Other, -} - -impl TagCategory for CulturalAndIdentity { - fn category() -> Category { - Category::CulturalAndIdentity - } -} - -#[derive(EnumIter, Display)] -pub enum ArtsAndCreativity { - PerformingArts, - VisualArts, - CreativeWriting, - Music, - Other, -} - -impl TagCategory for ArtsAndCreativity { - fn category() -> Category { - Category::ArtsAndCreativity - } -} - -#[derive(EnumIter, Display)] -pub enum SportsAndRecreation { - Soccer, - Hiking, - Climbing, - Lacrosse, - Other, -} - -impl TagCategory for SportsAndRecreation { - fn category() -> Category { - Category::SportsAndRecreation - } -} - -#[derive(EnumIter, Display)] -pub enum ScienceAndTechnology { - Mathematics, - Physics, - Biology, - Chemistry, - EnvironmentalScience, - Geology, - Neuroscience, - Psychology, - SoftwareEngineering, - ArtificialIntelligence, - DataScience, - MechanicalEngineering, - ElectricalEngineering, - IndustrialEngineering, - Other, -} - -impl TagCategory for ScienceAndTechnology { - fn category() -> Category { - Category::ScienceAndTechnology - } -} - -#[derive(EnumIter, Display)] -pub enum CommunityServiceAndAdvocacy { - Volunteerism, - EnvironmentalAdvocacy, - HumanRights, - CommunityOutreach, - Other, -} - -impl TagCategory for CommunityServiceAndAdvocacy { - fn category() -> Category { - Category::CommunityServiceAndAdvocacy - } -} - -#[derive(EnumIter, Display)] -pub enum MediaAndCommunication { - Journalism, - Broadcasting, - Film, - PublicRelations, - Other, -} - -impl TagCategory for MediaAndCommunication { - fn category() -> Category { - Category::MediaAndCommunication - } -} diff --git a/scraper/clubs/src/domain/club.rs b/scraper/clubs/src/domain/club.rs index f5a9ccb9f..c215751d9 100644 --- a/scraper/clubs/src/domain/club.rs +++ b/scraper/clubs/src/domain/club.rs @@ -8,7 +8,7 @@ use voca_rs::Voca; use crate::scraper::ScrapedClub; -use crate::domain::category_tag::{Tag, TAGS}; +use crate::domain::tag::{Tag, TAGS}; #[derive(Debug, PartialEq, EnumIter, Display)] pub enum RecruitmentCycle { @@ -33,26 +33,26 @@ pub enum RecruitmentType { } #[derive(Debug)] -pub struct Club<'a> { - pub name: &'a String, - pub preview: &'a String, +pub struct Club { + pub name: String, + pub preview: String, pub description: String, pub num_members: usize, pub is_recruiting: bool, pub recruitment_cycle: RecruitmentCycle, pub recruitment_type: RecruitmentType, - pub tags: Vec<&'a Tag>, + pub tags: Vec<&'static &'static Tag>, } -impl<'a> From<&'a ScrapedClub> for Club<'a> { +impl<'a> From<&'a ScrapedClub> for Club { fn from(scraped: &'a ScrapedClub) -> Self { let mut rng = thread_rng(); let num_tags = rng.gen_range(1..8); Club { - name: &scraped.name, - preview: &scraped.preview, + name: scraped.name.clone().replace("(Tentative) ", ""), + preview: scraped.preview.clone(), description: scraped.description._strip_tags().replace(" ", " "), num_members: rng.gen_range(1..1024), is_recruiting: rng.gen_bool(0.5), diff --git a/scraper/clubs/src/domain/mod.rs b/scraper/clubs/src/domain/mod.rs index 30ad97579..c46eabc10 100644 --- a/scraper/clubs/src/domain/mod.rs +++ b/scraper/clubs/src/domain/mod.rs @@ -1,2 +1,6 @@ -pub mod category_tag; +pub mod category; pub mod club; +pub mod tag; + +pub use category::Category; +pub use tag::Tag; diff --git a/scraper/clubs/src/domain/tag.rs b/scraper/clubs/src/domain/tag.rs new file mode 100644 index 000000000..a01d082a2 --- /dev/null +++ b/scraper/clubs/src/domain/tag.rs @@ -0,0 +1,114 @@ +use once_cell::sync::Lazy; +use strum_macros::Display; +use strum_macros::EnumIter; + +use crate::domain::category::CATEGORIES; + +#[derive(Debug)] +pub struct Tag { + pub id: uuid::Uuid, + pub name: String, + pub category_id: uuid::Uuid, +} + +impl Tag { + pub fn new(name: &str, category_id: uuid::Uuid) -> Self { + Self { + id: uuid::Uuid::new_v4(), + name: name.to_string(), + category_id, + } + } +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum PreProfessionalTag { + Premed, + Prelaw, + #[default] + Other, +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum CulturalAndIdentityTag { + Judaism, + Christianity, + Hinduism, + Islam, + LatinAmerica, + AfricanAmerican, + AsianAmerican, + LGBTQ, + #[default] + Other, +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum ArtsAndCreativityTag { + PerformingArts, + VisualArts, + CreativeWriting, + Music, + #[default] + Other, +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum SportsAndRecreationTag { + Soccer, + Hiking, + Climbing, + Lacrosse, + #[default] + Other, +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum ScienceAndTechnologyTag { + Mathematics, + Physics, + Biology, + Chemistry, + EnvironmentalScience, + Geology, + Neuroscience, + Psychology, + SoftwareEngineering, + ArtificialIntelligence, + DataScience, + MechanicalEngineering, + ElectricalEngineering, + IndustrialEngineering, + #[default] + Other, +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum CommunityServiceAndAdvocacyTag { + Volunteerism, + EnvironmentalAdvocacy, + HumanRights, + CommunityOutreach, + #[default] + Other, +} + +#[derive(Debug, Default, EnumIter, Display)] +pub enum MediaAndCommunicationTag { + Journalism, + Broadcasting, + Film, + PublicRelations, + #[default] + Other, +} + +pub static TAGS: Lazy> = Lazy::new(|| { + let mut tags = Vec::new(); + + CATEGORIES + .iter() + .for_each(|category| category.tags.iter().for_each(|tag| tags.push(tag))); + + tags +}); diff --git a/scraper/clubs/src/dumper/category.rs b/scraper/clubs/src/dumper/category.rs new file mode 100644 index 000000000..ed4f1c45e --- /dev/null +++ b/scraper/clubs/src/dumper/category.rs @@ -0,0 +1,14 @@ +use std::{error::Error, fs::File, io::Write}; + +use crate::domain::Category; + +pub fn dump(categories: &Vec, file: &mut File) -> Result<(), Box> { + for category in categories { + writeln!( + file, + r#"INSERT INTO "categories" ("id", "name") VALUES ('{}', '{}');"#, + category.id, category.category + )?; + } + Ok(()) +} diff --git a/scraper/clubs/src/dumper/club.rs b/scraper/clubs/src/dumper/club.rs new file mode 100644 index 000000000..1cb9fd63e --- /dev/null +++ b/scraper/clubs/src/dumper/club.rs @@ -0,0 +1,22 @@ +use std::{error::Error, fs::File, io::Write}; + +use crate::domain::club::Club; + +pub fn dump(clubs: Vec, file: &mut File, parent: uuid::Uuid) -> Result<(), Box> { + for club in clubs { + writeln!( + file, + r#"INSERT INTO "clubs" ("name", "preview", "description", "num_members", "is_recruiting", "recruitment_cycle", "recruitment_type", "parent") VALUES ('{}', '{}', '{}', {}, {}, '{}', '{}', '{}');"#, + club.name.replace('\'', "''"), + club.preview.replace('\'', "''"), + club.description.replace('\'', "''"), + club.num_members, + club.is_recruiting, + club.recruitment_cycle, + club.recruitment_type, + parent, + )?; + } + + Ok(()) +} diff --git a/scraper/clubs/src/dumper/dump.rs b/scraper/clubs/src/dumper/dump.rs new file mode 100644 index 000000000..43a709855 --- /dev/null +++ b/scraper/clubs/src/dumper/dump.rs @@ -0,0 +1,38 @@ +use std::{error::Error, fs::File, io::Write}; + +use chrono::Local; + +use crate::domain::{club::Club, Category, Tag}; + +fn autogen_commenter(file: &mut File) -> Result<(), Box> { + writeln!(file, "-- AUTOGENERATED MOCK DATA, DO NOT MODIFY")?; + writeln!( + file, + "-- GENERATED AT {}", + Local::now().format("%Y-%m-%d %H:%M:%S") + )?; + + Ok(()) +} + +pub fn dump_all( + categories: &Vec, + tags: &Vec<&'static Tag>, + clubs: Vec, + club_parent: uuid::Uuid, + file: &mut File, +) -> Result<(), Box> { + autogen_commenter(file)?; + + writeln!(file, "BEGIN;")?; + + crate::dumper::category::dump(categories, file)?; + + crate::dumper::tag::dump(tags, file)?; + + crate::dumper::club::dump(clubs, file, club_parent)?; + + writeln!(file, "COMMIT;")?; + + Ok(()) +} diff --git a/scraper/clubs/src/dumper/mod.rs b/scraper/clubs/src/dumper/mod.rs index 2752f636b..844485b68 100644 --- a/scraper/clubs/src/dumper/mod.rs +++ b/scraper/clubs/src/dumper/mod.rs @@ -1 +1,4 @@ -pub mod sql; +pub mod category; +pub mod club; +pub mod dump; +pub mod tag; diff --git a/scraper/clubs/src/dumper/sql.rs b/scraper/clubs/src/dumper/sql.rs deleted file mode 100644 index e7534f227..000000000 --- a/scraper/clubs/src/dumper/sql.rs +++ /dev/null @@ -1,36 +0,0 @@ -use chrono::Local; -use std::{error::Error, fs::File, io::Write, path::PathBuf}; - -use crate::domain::club::Club; - -pub fn dump(clubs: Vec, path: PathBuf, parent: uuid::Uuid) -> Result<(), Box> { - let mut file = OpenOptions::new().create(true).append(true).open(&path)?; - - writeln!(file, "-- AUTOGENERATED MOCK DATA, DO NOT MODIFY")?; - writeln!( - file, - "-- GENERATED AT {}", - Local::now().format("%Y-%m-%d %H:%M:%S") - )?; - - writeln!(file, "BEGIN;\n")?; - - for club in clubs { - writeln!( - file, - r#"INSERT INTO "clubs" ("name", "preview", "description", "num_members", "is_recruiting", "recruitment_cycle", "recruitment_type", "parent", ) VALUES ('{}', '{}', '{}', {}, {}, '{}', '{}', {});"#, - club.name.replace('\'', "''"), - club.preview.replace('\'', "''"), - club.description.replace('\'', "''"), - club.num_members, - club.is_recruiting, - club.recruitment_cycle, - club.recruitment_type, - parent, - )?; - } - - writeln!(file, "\nCOMMIT;")?; - - Ok(()) -} diff --git a/scraper/clubs/src/dumper/tag.rs b/scraper/clubs/src/dumper/tag.rs new file mode 100644 index 000000000..7393a073f --- /dev/null +++ b/scraper/clubs/src/dumper/tag.rs @@ -0,0 +1,15 @@ +use std::{error::Error, fs::File, io::Write}; + +use crate::domain::tag::Tag; + +pub fn dump(tags: &Vec<&'static Tag>, file: &mut File) -> Result<(), Box> { + for tag in tags { + writeln!( + file, + r#"INSERT INTO "tags" ("id", "name", "category_id") VALUES ('{}', '{}', '{}');"#, + tag.id, tag.name, tag.category_id + )?; + } + + Ok(()) +} diff --git a/scraper/clubs/src/main.rs b/scraper/clubs/src/main.rs index 15e5e0507..2751648a0 100644 --- a/scraper/clubs/src/main.rs +++ b/scraper/clubs/src/main.rs @@ -1,13 +1,17 @@ -use std::process::exit; +use std::error::Error; +use std::ops::Deref; +use std::{fs::OpenOptions, process::exit}; use clap::Parser; -use sac_scraper::{cli::Args, domain::club::Club, dumper::sql::dump}; +use sac_club_scraper::domain::category::CATEGORIES; +use sac_club_scraper::domain::tag::TAGS; +use sac_club_scraper::dumper::dump::dump_all; +use sac_club_scraper::{cli::Args, domain::club::Club}; -use reqwest::Error; -use sac_scraper::scraper::ClubsResponse; +use sac_club_scraper::scraper::ClubsResponse; #[tokio::main] -async fn main() -> Result<(), Error> { +async fn main() -> Result<(), Box> { let args = Args::parse(); let response = reqwest::get(format!("https://neu.campuslabs.com/engage/api/discovery/search/organizations?orderBy[0]=UpperName%20asc&top={}&skip=0", args.top_n)).await?; @@ -21,12 +25,18 @@ async fn main() -> Result<(), Error> { let response: ClubsResponse = serde_json::from_str(&body).expect("Failed to deserialize"); - dump( + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(args.output)?; + + dump_all( + CATEGORIES.deref(), + TAGS.deref(), response.scraped_clubs.iter().map(Club::from).collect(), - args.output, args.parent, - ) - .expect("Failed to dump"); + &mut file, + )?; Ok(()) } From efc38a4bb26369dcd0b55ea54e9996e6950ef030 Mon Sep 17 00:00:00 2001 From: garrettladley Date: Tue, 20 Feb 2024 18:39:28 -0500 Subject: [PATCH 4/4] CI --- .github/workflows/club_scraper.yml | 105 +++++++++++++++++++++++++++++ scraper/clubs/README.md | 14 +++- 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/club_scraper.yml diff --git a/.github/workflows/club_scraper.yml b/.github/workflows/club_scraper.yml new file mode 100644 index 000000000..e3b6877bb --- /dev/null +++ b/.github/workflows/club_scraper.yml @@ -0,0 +1,105 @@ +name: Club Scraper + +permissions: read-all + +on: + push: + paths: + - scraper/club/** + - .github/workflows/club_scraper.yml + pull_request: + types: [opened] + paths: + - scraper/club/** + - .github/workflows/club_scraper.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + MANIFEST_PATH: ./scraper/clubs/Cargo.toml + +jobs: + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + args: --manifest-path ${{ env.MANIFEST_PATH }} + fmt: + name: Format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --manifest-path ${{ env.MANIFEST_PATH }} --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add clippy + - uses: actions-rs/cargo@v1 + with: + command: clippy + args: --manifest-path ${{ env.MANIFEST_PATH }} --all-targets --all-features -- -D warnings + + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: Swatinem/rust-cache@v1 + with: + key: ${{ runner.os }}-check + - uses: actions-rs/cargo@v1 + with: + command: check + args: --manifest-path ${{ env.MANIFEST_PATH }} + env: + RUSTFLAGS: -D warnings + + coverage: + name: Code Coverage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: cargo install cargo-tarpaulin + - uses: actions-rs/cargo@v1 + with: + command: tarpaulin + args: --manifest-path ${{ env.MANIFEST_PATH }} --verbose --workspace diff --git a/scraper/clubs/README.md b/scraper/clubs/README.md index ee9c66b32..75ba7036e 100644 --- a/scraper/clubs/README.md +++ b/scraper/clubs/README.md @@ -1,4 +1,16 @@ -# SAC Club Scraper + + +

SAC Club Scraper

+ +
+ +
+ + + Club Scraper Workflow Status + +
A Rust CLI that scrapes Clubs from to be used as mock data for natural language search.