From e8f8bbe309def7284e679a9f9e209bd990edd7a3 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sat, 20 Jul 2024 18:15:22 +1000 Subject: [PATCH 01/12] Add datasets API Signed-off-by: Aisuko --- backend/pyproject.toml | 2 + backend/requirements.txt | 621 ++++++++++++++++-- backend/src/api/endpoints.py | 4 +- backend/src/api/routes/chat.py | 12 +- backend/src/api/routes/datasets.py | 49 ++ backend/src/config/settings/const.py | 1 + backend/src/repository/crud/vectors_helper.py | 15 + backend/src/repository/datasets_eng.py | 43 ++ backend/src/repository/rag/chat.py | 163 +---- backend/src/repository/vector_database.py | 18 +- 10 files changed, 714 insertions(+), 214 deletions(-) create mode 100644 backend/src/api/routes/datasets.py create mode 100644 backend/src/repository/crud/vectors_helper.py create mode 100644 backend/src/repository/datasets_eng.py diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 02bf53b..1c9c27d 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -34,6 +34,7 @@ sqlalchemy = "2.0.29" trio = "0.25.0" uvicorn = "0.29.0" openai = "1.35.7" +datasets = "2.18.0" [tool.poetry.dev-dependencies] @@ -66,6 +67,7 @@ uvicorn = "0.29.0" openai = "1.35.7" pre-commit="3.7.0" pytest="8.1.1" +datasets = "2.18.0" [build-system] diff --git a/backend/requirements.txt b/backend/requirements.txt index 366297d..6446263 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,3 +1,83 @@ +aiohttp==3.9.5 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8 \ + --hash=sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c \ + --hash=sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475 \ + --hash=sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed \ + --hash=sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf \ + --hash=sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372 \ + --hash=sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81 \ + --hash=sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f \ + --hash=sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1 \ + --hash=sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd \ + --hash=sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a \ + --hash=sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb \ + --hash=sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46 \ + --hash=sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de \ + --hash=sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78 \ + --hash=sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c \ + --hash=sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771 \ + --hash=sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb \ + --hash=sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430 \ + --hash=sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233 \ + --hash=sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156 \ + --hash=sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9 \ + --hash=sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59 \ + --hash=sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888 \ + --hash=sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c \ + --hash=sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c \ + --hash=sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da \ + --hash=sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424 \ + --hash=sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2 \ + --hash=sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb \ + --hash=sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8 \ + --hash=sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a \ + --hash=sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10 \ + --hash=sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0 \ + --hash=sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09 \ + --hash=sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031 \ + --hash=sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4 \ + --hash=sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3 \ + --hash=sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa \ + --hash=sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a \ + --hash=sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe \ + --hash=sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a \ + --hash=sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2 \ + --hash=sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1 \ + --hash=sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323 \ + --hash=sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b \ + --hash=sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b \ + --hash=sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106 \ + --hash=sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac \ + --hash=sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6 \ + --hash=sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832 \ + --hash=sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75 \ + --hash=sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6 \ + --hash=sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d \ + --hash=sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72 \ + --hash=sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db \ + --hash=sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a \ + --hash=sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da \ + --hash=sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678 \ + --hash=sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b \ + --hash=sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24 \ + --hash=sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed \ + --hash=sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f \ + --hash=sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e \ + --hash=sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58 \ + --hash=sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a \ + --hash=sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342 \ + --hash=sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558 \ + --hash=sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2 \ + --hash=sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551 \ + --hash=sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595 \ + --hash=sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee \ + --hash=sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11 \ + --hash=sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d \ + --hash=sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7 \ + --hash=sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f +aiosignal==1.3.1 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc \ + --hash=sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17 alembic==1.13.1 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:2edcc97bed0bd3272611ce3a98d98279e9c209e7186e43e75bbb1b2bdfdbcc43 \ --hash=sha256:4932c8558bf68f2ee92b9bbcb8218671c627064d5b08939437af6d77dc05e595 @@ -91,9 +171,9 @@ attrs==23.2.0 ; python_version >= "3.11" and python_version < "4.0" \ azure-core==1.30.2 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:a14dc210efcd608821aa472d9fb8e8d035d29b68993819147bc290a8ac224472 \ --hash=sha256:cf019c1ca832e96274ae85abd3d9f752397194d9fea3b41487290562ac8abe4a -azure-storage-blob==12.20.0 ; python_version >= "3.11" and python_version < "4.0" \ - --hash=sha256:de6b3bf3a90e9341a6bcb96a2ebe981dffff993e9045818f6549afea827a52a9 \ - --hash=sha256:eeb91256e41d4b5b9bad6a87fd0a8ade07dd58aa52344e2c8d2746e27a017d3b +azure-storage-blob==12.21.0 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:b9722725072f5b7373c0f4dd6d78fbae2bb37bffc5c3e01731ab8c750ee8dd7e \ + --hash=sha256:f9ede187dd5a0ef296b583a7c1861c6938ddd6708d6e70f4203a163c2ab42d43 bcrypt==4.0.1 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:089098effa1bc35dc055366740a067a2fc76987e8ec75349eb9484061c54f535 \ --hash=sha256:08d2947c490093a11416df18043c27abe3921558d2c03e2076ccb28a116cb6d0 \ @@ -302,6 +382,12 @@ cryptography==42.0.8 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e +datasets==2.18.0 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:cdf8b8c6abf7316377ba4f49f9589a4c74556d6b481afd0abd2284f3d69185cb \ + --hash=sha256:f1bbf0e2896917a914de01cbd37075b14deea3837af87ad0d9f697388ccaeb50 +dill==0.3.8 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca \ + --hash=sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7 distro==1.9.0 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed \ --hash=sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2 @@ -317,6 +403,93 @@ environs==9.5.0 ; python_version >= "3.11" and python_version < "4.0" \ fastapi==0.110.0 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:266775f0dcc95af9d3ef39bad55cff525329a931d5fd51930aadd4f428bf7ff3 \ --hash=sha256:87a1f6fb632a218222c5984be540055346a8f5d8a68e8f6fb647b1dc9934de4b +filelock==3.15.4 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ + --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +frozenlist==1.4.1 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7 \ + --hash=sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98 \ + --hash=sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad \ + --hash=sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5 \ + --hash=sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae \ + --hash=sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e \ + --hash=sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a \ + --hash=sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701 \ + --hash=sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d \ + --hash=sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6 \ + --hash=sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6 \ + --hash=sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106 \ + --hash=sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75 \ + --hash=sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868 \ + --hash=sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a \ + --hash=sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0 \ + --hash=sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1 \ + --hash=sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826 \ + --hash=sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec \ + --hash=sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6 \ + --hash=sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950 \ + --hash=sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19 \ + --hash=sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0 \ + --hash=sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8 \ + --hash=sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a \ + --hash=sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09 \ + --hash=sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86 \ + --hash=sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c \ + --hash=sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5 \ + --hash=sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b \ + --hash=sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b \ + --hash=sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d \ + --hash=sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0 \ + --hash=sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea \ + --hash=sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776 \ + --hash=sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a \ + --hash=sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897 \ + --hash=sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7 \ + --hash=sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09 \ + --hash=sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9 \ + --hash=sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe \ + --hash=sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd \ + --hash=sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742 \ + --hash=sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09 \ + --hash=sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0 \ + --hash=sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932 \ + --hash=sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1 \ + --hash=sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a \ + --hash=sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49 \ + --hash=sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d \ + --hash=sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7 \ + --hash=sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480 \ + --hash=sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89 \ + --hash=sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e \ + --hash=sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b \ + --hash=sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82 \ + --hash=sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb \ + --hash=sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068 \ + --hash=sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8 \ + --hash=sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b \ + --hash=sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb \ + --hash=sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2 \ + --hash=sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11 \ + --hash=sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b \ + --hash=sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc \ + --hash=sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0 \ + --hash=sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497 \ + --hash=sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17 \ + --hash=sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0 \ + --hash=sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2 \ + --hash=sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439 \ + --hash=sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5 \ + --hash=sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac \ + --hash=sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825 \ + --hash=sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887 \ + --hash=sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced \ + --hash=sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74 +fsspec==2024.2.0 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8 \ + --hash=sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84 +fsspec[http]==2024.2.0 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8 \ + --hash=sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84 greenlet==3.0.3 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67 \ --hash=sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6 \ @@ -440,6 +613,9 @@ httpcore==1.0.5 ; python_version >= "3.11" and python_version < "4.0" \ httpx==0.27.0 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5 \ --hash=sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5 +huggingface-hub==0.24.0 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:6c7092736b577d89d57b3cdfea026f1b0dc2234ae783fa0d59caf1bf7d52dfa7 \ + --hash=sha256:7ad92edefb93d8145c061f6df8d99df2ff85f8379ba5fac8a95aca0642afa5d7 idna==3.7 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 @@ -519,6 +695,110 @@ marshmallow==3.21.3 ; python_version >= "3.11" and python_version < "4.0" \ minio==7.2.7 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:473d5d53d79f340f3cd632054d0c82d2f93177ce1af2eac34a235bea55708d98 \ --hash=sha256:59d1f255d852fe7104018db75b3bebbd987e538690e680f7c5de835e422de837 +multidict==6.0.5 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556 \ + --hash=sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c \ + --hash=sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29 \ + --hash=sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b \ + --hash=sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8 \ + --hash=sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7 \ + --hash=sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd \ + --hash=sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40 \ + --hash=sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6 \ + --hash=sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3 \ + --hash=sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c \ + --hash=sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9 \ + --hash=sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5 \ + --hash=sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae \ + --hash=sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442 \ + --hash=sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9 \ + --hash=sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc \ + --hash=sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c \ + --hash=sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea \ + --hash=sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5 \ + --hash=sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50 \ + --hash=sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182 \ + --hash=sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453 \ + --hash=sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e \ + --hash=sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600 \ + --hash=sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733 \ + --hash=sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda \ + --hash=sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241 \ + --hash=sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461 \ + --hash=sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e \ + --hash=sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e \ + --hash=sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b \ + --hash=sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e \ + --hash=sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7 \ + --hash=sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386 \ + --hash=sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd \ + --hash=sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9 \ + --hash=sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf \ + --hash=sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee \ + --hash=sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5 \ + --hash=sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a \ + --hash=sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271 \ + --hash=sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54 \ + --hash=sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4 \ + --hash=sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496 \ + --hash=sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb \ + --hash=sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319 \ + --hash=sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3 \ + --hash=sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f \ + --hash=sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527 \ + --hash=sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed \ + --hash=sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604 \ + --hash=sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef \ + --hash=sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8 \ + --hash=sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5 \ + --hash=sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5 \ + --hash=sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626 \ + --hash=sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c \ + --hash=sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d \ + --hash=sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c \ + --hash=sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc \ + --hash=sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc \ + --hash=sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b \ + --hash=sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38 \ + --hash=sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450 \ + --hash=sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1 \ + --hash=sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f \ + --hash=sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3 \ + --hash=sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755 \ + --hash=sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226 \ + --hash=sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a \ + --hash=sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046 \ + --hash=sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf \ + --hash=sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479 \ + --hash=sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e \ + --hash=sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1 \ + --hash=sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a \ + --hash=sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83 \ + --hash=sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929 \ + --hash=sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93 \ + --hash=sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a \ + --hash=sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c \ + --hash=sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44 \ + --hash=sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89 \ + --hash=sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba \ + --hash=sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e \ + --hash=sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da \ + --hash=sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24 \ + --hash=sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423 \ + --hash=sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef +multiprocess==0.70.16 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41 \ + --hash=sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1 \ + --hash=sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a \ + --hash=sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee \ + --hash=sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3 \ + --hash=sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435 \ + --hash=sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a \ + --hash=sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054 \ + --hash=sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02 \ + --hash=sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec \ + --hash=sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a \ + --hash=sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e numpy==2.0.0 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f \ --hash=sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238 \ @@ -695,43 +975,46 @@ psycopg2-binary==2.9.9 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957 \ --hash=sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53 \ --hash=sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52 -pyarrow==16.1.0 ; python_version >= "3.11" and python_version < "4.0" \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 +pyarrow-hotfix==0.6 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945 \ + --hash=sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178 +pyarrow==17.0.0 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a \ + --hash=sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca \ + --hash=sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597 \ + --hash=sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c \ + --hash=sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb \ + --hash=sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977 \ + --hash=sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3 \ + --hash=sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687 \ + --hash=sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7 \ + --hash=sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204 \ + --hash=sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28 \ + --hash=sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087 \ + --hash=sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15 \ + --hash=sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc \ + --hash=sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2 \ + --hash=sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155 \ + --hash=sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df \ + --hash=sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22 \ + --hash=sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a \ + --hash=sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b \ + --hash=sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03 \ + --hash=sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda \ + --hash=sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07 \ + --hash=sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204 \ + --hash=sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b \ + --hash=sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c \ + --hash=sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545 \ + --hash=sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655 \ + --hash=sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420 \ + --hash=sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5 \ + --hash=sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4 \ + --hash=sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8 \ + --hash=sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053 \ + --hash=sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145 \ + --hash=sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047 \ + --hash=sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8 pycparser==2.22 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc @@ -888,12 +1171,64 @@ python-slugify==8.0.4 ; python_version >= "3.11" and python_version < "4.0" \ pytz==2024.1 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812 \ --hash=sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 +pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \ + --hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \ + --hash=sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df \ + --hash=sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741 \ + --hash=sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206 \ + --hash=sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27 \ + --hash=sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595 \ + --hash=sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62 \ + --hash=sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98 \ + --hash=sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696 \ + --hash=sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290 \ + --hash=sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9 \ + --hash=sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d \ + --hash=sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6 \ + --hash=sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867 \ + --hash=sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47 \ + --hash=sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486 \ + --hash=sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6 \ + --hash=sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3 \ + --hash=sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007 \ + --hash=sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938 \ + --hash=sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0 \ + --hash=sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c \ + --hash=sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735 \ + --hash=sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d \ + --hash=sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28 \ + --hash=sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4 \ + --hash=sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba \ + --hash=sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8 \ + --hash=sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef \ + --hash=sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5 \ + --hash=sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd \ + --hash=sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3 \ + --hash=sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0 \ + --hash=sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515 \ + --hash=sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c \ + --hash=sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c \ + --hash=sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924 \ + --hash=sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34 \ + --hash=sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43 \ + --hash=sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859 \ + --hash=sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673 \ + --hash=sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54 \ + --hash=sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a \ + --hash=sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b \ + --hash=sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab \ + --hash=sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa \ + --hash=sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c \ + --hash=sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585 \ + --hash=sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d \ + --hash=sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f requests==2.32.3 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 -setuptools==70.3.0 ; python_version >= "3.11" and python_version < "4.0" \ - --hash=sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5 \ - --hash=sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc +setuptools==71.0.4 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:48297e5d393a62b7cb2a10b8f76c63a73af933bd809c9e0d0d6352a1a0135dd8 \ + --hash=sha256:ed2feca703be3bdbd94e6bb17365d91c6935c6b2a8d0bb09b66a2c435ba0b1a5 six==1.16.0 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 @@ -1059,3 +1394,203 @@ uvicorn==0.29.0 ; python_version >= "3.11" and python_version < "4.0" \ win32-setctime==1.1.0 ; python_version >= "3.11" and python_version < "4.0" and sys_platform == "win32" \ --hash=sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2 \ --hash=sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad +xxhash==3.4.1 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:00f2fdef6b41c9db3d2fc0e7f94cb3db86693e5c45d6de09625caad9a469635b \ + --hash=sha256:0379d6cf1ff987cd421609a264ce025e74f346e3e145dd106c0cc2e3ec3f99a9 \ + --hash=sha256:0aac5010869240e95f740de43cd6a05eae180c59edd182ad93bf12ee289484fa \ + --hash=sha256:0c786a6cd74e8765c6809892a0d45886e7c3dc54de4985b4a5eb8b630f3b8e3b \ + --hash=sha256:0e041ce5714f95251a88670c114b748bca3bf80cc72400e9f23e6d0d59cf2681 \ + --hash=sha256:10e0a619cdd1c0980e25eb04e30fe96cf8f4324758fa497080af9c21a6de573f \ + --hash=sha256:11f11357c86d83e53719c592021fd524efa9cf024dc7cb1dfb57bbbd0d8713f2 \ + --hash=sha256:1d03f1c0d16d24ea032e99f61c552cb2b77d502e545187338bea461fde253583 \ + --hash=sha256:1d0ae4c2e7698adef58710d6e7a32ff518b66b98854b1c68e70eee504ad061d8 \ + --hash=sha256:200a5a3ad9c7c0c02ed1484a1d838b63edcf92ff538770ea07456a3732c577f4 \ + --hash=sha256:2070b6d5bbef5ee031666cf21d4953c16e92c2f8a24a94b5c240f8995ba3b1d0 \ + --hash=sha256:21287bcdd299fdc3328cc0fbbdeaa46838a1c05391264e51ddb38a3f5b09611f \ + --hash=sha256:23cfd9ca09acaf07a43e5a695143d9a21bf00f5b49b15c07d5388cadf1f9ce11 \ + --hash=sha256:248d3e83d119770f96003271fe41e049dd4ae52da2feb8f832b7a20e791d2920 \ + --hash=sha256:25dc66be3db54f8a2d136f695b00cfe88018e59ccff0f3b8f545869f376a8a46 \ + --hash=sha256:2a8ba6181514681c2591840d5632fcf7356ab287d4aff1c8dea20f3c78097088 \ + --hash=sha256:2be491723405e15cc099ade1280133ccfbf6322d2ef568494fb7d07d280e7eee \ + --hash=sha256:312eba88ffe0a05e332e3a6f9788b73883752be63f8588a6dc1261a3eaaaf2b2 \ + --hash=sha256:36ad4457644c91a966f6fe137d7467636bdc51a6ce10a1d04f365c70d6a16d7e \ + --hash=sha256:3b685fab18876b14a8f94813fa2ca80cfb5ab6a85d31d5539b7cd749ce9e3624 \ + --hash=sha256:4178f78d70e88f1c4a89ff1ffe9f43147185930bb962ee3979dba15f2b1cc799 \ + --hash=sha256:419ffe34c17ae2df019a4685e8d3934d46b2e0bbe46221ab40b7e04ed9f11137 \ + --hash=sha256:41ddeae47cf2828335d8d991f2d2b03b0bdc89289dc64349d712ff8ce59d0647 \ + --hash=sha256:431625fad7ab5649368c4849d2b49a83dc711b1f20e1f7f04955aab86cd307bc \ + --hash=sha256:43984c0a92f06cac434ad181f329a1445017c33807b7ae4f033878d860a4b0f2 \ + --hash=sha256:450401f42bbd274b519d3d8dcf3c57166913381a3d2664d6609004685039f9d3 \ + --hash=sha256:4603a0f642a1e8d7f3ba5c4c25509aca6a9c1cc16f85091004a7028607ead663 \ + --hash=sha256:4c76a77dbd169450b61c06fd2d5d436189fc8ab7c1571d39265d4822da16df22 \ + --hash=sha256:4cb11d8debab1626181633d184b2372aaa09825bde709bf927704ed72765bed1 \ + --hash=sha256:543c7fcbc02bbb4840ea9915134e14dc3dc15cbd5a30873a7a5bf66039db97ec \ + --hash=sha256:562d8b8f783c6af969806aaacf95b6c7b776929ae26c0cd941d54644ea7ef51e \ + --hash=sha256:58c49083801885273e262c0f5bbeac23e520564b8357fbb18fb94ff09d3d3ea5 \ + --hash=sha256:595b252943b3552de491ff51e5bb79660f84f033977f88f6ca1605846637b7c6 \ + --hash=sha256:5bef2a7dc7b4f4beb45a1edbba9b9194c60a43a89598a87f1a0226d183764189 \ + --hash=sha256:5dab508ac39e0ab988039bc7f962c6ad021acd81fd29145962b068df4148c476 \ + --hash=sha256:6066d88c9329ab230e18998daec53d819daeee99d003955c8db6fc4971b45ca3 \ + --hash=sha256:6127813abc1477f3a83529b6bbcfeddc23162cece76fa69aee8f6a8a97720562 \ + --hash=sha256:64da57d5ed586ebb2ecdde1e997fa37c27fe32fe61a656b77fabbc58e6fbff6e \ + --hash=sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2 \ + --hash=sha256:672b273040d5d5a6864a36287f3514efcd1d4b1b6a7480f294c4b1d1ee1b8de0 \ + --hash=sha256:696b4e18b7023527d5c50ed0626ac0520edac45a50ec7cf3fc265cd08b1f4c03 \ + --hash=sha256:6a9ff50a3cf88355ca4731682c168049af1ca222d1d2925ef7119c1a78e95b3b \ + --hash=sha256:6d3472fd4afef2a567d5f14411d94060099901cd8ce9788b22b8c6f13c606a93 \ + --hash=sha256:6d42b24d1496deb05dee5a24ed510b16de1d6c866c626c2beb11aebf3be278b9 \ + --hash=sha256:6e66df260fed01ed8ea790c2913271641c58481e807790d9fca8bfd5a3c13844 \ + --hash=sha256:6fa45e8cbfbadb40a920fe9ca40c34b393e0b067082d94006f7f64e70c7490a6 \ + --hash=sha256:719a378930504ab159f7b8e20fa2aa1896cde050011af838af7e7e3518dd82de \ + --hash=sha256:71be94265b6c6590f0018bbf73759d21a41c6bda20409782d8117e76cd0dfa8b \ + --hash=sha256:743612da4071ff9aa4d055f3f111ae5247342931dedb955268954ef7201a71ff \ + --hash=sha256:74fb5cb9406ccd7c4dd917f16630d2e5e8cbbb02fc2fca4e559b2a47a64f4940 \ + --hash=sha256:7688d7c02149a90a3d46d55b341ab7ad1b4a3f767be2357e211b4e893efbaaf6 \ + --hash=sha256:7a97322e9a7440bf3c9805cbaac090358b43f650516486746f7fa482672593df \ + --hash=sha256:8106d88da330f6535a58a8195aa463ef5281a9aa23b04af1848ff715c4398fb4 \ + --hash=sha256:8c59f3e46e7daf4c589e8e853d700ef6607afa037bfad32c390175da28127e8c \ + --hash=sha256:8cc07256eff0795e0f642df74ad096f8c5d23fe66bc138b83970b50fc7f7f6c5 \ + --hash=sha256:911035345932a153c427107397c1518f8ce456f93c618dd1c5b54ebb22e73747 \ + --hash=sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f \ + --hash=sha256:92693c487e39523a80474b0394645b393f0ae781d8db3474ccdcead0559ccf45 \ + --hash=sha256:93805bc3233ad89abf51772f2ed3355097a5dc74e6080de19706fc447da99cd3 \ + --hash=sha256:961d948b7b1c1b6c08484bbce3d489cdf153e4122c3dfb07c2039621243d8795 \ + --hash=sha256:9804b9eb254d4b8cc83ab5a2002128f7d631dd427aa873c8727dba7f1f0d1c2b \ + --hash=sha256:9c0f7b2d547d72c7eda7aa817acf8791f0146b12b9eba1d4432c531fb0352228 \ + --hash=sha256:9ecb6c987b62437c2f99c01e97caf8d25660bf541fe79a481d05732e5236719c \ + --hash=sha256:9f3025a0d5d8cf406a9313cd0d5789c77433ba2004b1c75439b67678e5136537 \ + --hash=sha256:9fd28a9da300e64e434cfc96567a8387d9a96e824a9be1452a1e7248b7763b78 \ + --hash=sha256:a15cbf3a9c40672523bdb6ea97ff74b443406ba0ab9bca10ceccd9546414bd84 \ + --hash=sha256:a162840cf4de8a7cd8720ff3b4417fbc10001eefdd2d21541a8226bb5556e3bb \ + --hash=sha256:a55e0506fdb09640a82ec4f44171273eeabf6f371a4ec605633adb2837b5d9d5 \ + --hash=sha256:a8b4977963926f60b0d4f830941c864bed16aa151206c01ad5c531636da5708e \ + --hash=sha256:a90356ead70d715fe64c30cd0969072de1860e56b78adf7c69d954b43e29d9fa \ + --hash=sha256:aabf37fb8fa27430d50507deeab2ee7b1bcce89910dd10657c38e71fee835594 \ + --hash=sha256:ac56eebb364e44c85e1d9e9cc5f6031d78a34f0092fea7fc80478139369a8b4a \ + --hash=sha256:b2746035f518f0410915e247877f7df43ef3372bf36cfa52cc4bc33e85242641 \ + --hash=sha256:b29728cff2c12f3d9f1d940528ee83918d803c0567866e062683f300d1d2eff3 \ + --hash=sha256:b41edaf05734092f24f48c0958b3c6cbaaa5b7e024880692078c6b1f8247e2fc \ + --hash=sha256:b526015a973bfbe81e804a586b703f163861da36d186627e27524f5427b0d520 \ + --hash=sha256:b5beb1c6a72fdc7584102f42c4d9df232ee018ddf806e8c90906547dfb43b2da \ + --hash=sha256:b736a2a2728ba45017cb67785e03125a79d246462dfa892d023b827007412c52 \ + --hash=sha256:b9097af00ebf429cc7c0e7d2fdf28384e4e2e91008130ccda8d5ae653db71e54 \ + --hash=sha256:bb11628470a6004dc71a09fe90c2f459ff03d611376c1debeec2d648f44cb693 \ + --hash=sha256:bbe750d512982ee7d831838a5dee9e9848f3fb440e4734cca3f298228cc957a6 \ + --hash=sha256:c09c49473212d9c87261d22c74370457cfff5db2ddfc7fd1e35c80c31a8c14ce \ + --hash=sha256:c44d584afdf3c4dbb3277e32321d1a7b01d6071c1992524b6543025fb8f4206f \ + --hash=sha256:c4bbba9b182697a52bc0c9f8ec0ba1acb914b4937cd4a877ad78a3b3eeabefb3 \ + --hash=sha256:c9e1b646af61f1fc7083bb7b40536be944f1ac67ef5e360bca2d73430186971a \ + --hash=sha256:ca7783b20e3e4f3f52f093538895863f21d18598f9a48211ad757680c3bd006f \ + --hash=sha256:d6322c4291c3ff174dcd104fae41500e75dad12be6f3085d119c2c8a80956c51 \ + --hash=sha256:d699b921af0dcde50ab18be76c0d832f803034d80470703700cb7df0fbec2832 \ + --hash=sha256:d77d09a1113899fad5f354a1eb4f0a9afcf58cefff51082c8ad643ff890e30cf \ + --hash=sha256:dd59ed668801c3fae282f8f4edadf6dc7784db6d18139b584b6d9677ddde1b6b \ + --hash=sha256:dfd7a6cc483e20b4ad90224aeb589e64ec0f31e5610ab9957ff4314270b2bf31 \ + --hash=sha256:e01226b6b6a1ffe4e6bd6d08cfcb3ca708b16f02eb06dd44f3c6e53285f03e4f \ + --hash=sha256:e17032f5a4fea0a074717fe33477cb5ee723a5f428de7563e75af64bfc1b1e10 \ + --hash=sha256:e867f68a8f381ea12858e6d67378c05359d3a53a888913b5f7d35fbf68939d5f \ + --hash=sha256:e9f749999ed80f3955a4af0eb18bb43993f04939350b07b8dd2f44edc98ffee9 \ + --hash=sha256:ebbb1616435b4a194ce3466d7247df23499475c7ed4eb2681a1fa42ff766aff6 \ + --hash=sha256:ef2e194262f5db16075caea7b3f7f49392242c688412f386d3c7b07c7733a70a \ + --hash=sha256:ef73a53fe90558a4096e3256752268a8bdc0322f4692ed928b6cd7ce06ad4fe3 \ + --hash=sha256:f1d7c69a1e9ca5faa75546fdd267f214f63f52f12692f9b3a2f6467c9e67d5e7 \ + --hash=sha256:f31ce76489f8601cc7b8713201ce94b4bd7b7ce90ba3353dccce7e9e1fee71fa \ + --hash=sha256:f3ff8dbd0ec97aec842476cb8ccc3e17dd288cd6ce3c8ef38bff83d6eb927817 \ + --hash=sha256:fa122124d2e3bd36581dd78c0efa5f429f5220313479fb1072858188bc2d5ff1 \ + --hash=sha256:faec30437919555b039a8bdbaba49c013043e8f76c999670aef146d33e05b3a0 \ + --hash=sha256:fc6dbd5fc3c9886a9e041848508b7fb65fd82f94cc793253990f81617b61fe49 \ + --hash=sha256:fc860d887c5cb2f524899fb8338e1bb3d5789f75fac179101920d9afddef284b \ + --hash=sha256:fd79d4087727daf4d5b8afe594b37d611ab95dc8e29fe1a7517320794837eb7d \ + --hash=sha256:fd7bddb3a5b86213cc3f2c61500c16945a1b80ecd572f3078ddbbe68f9dabdfb \ + --hash=sha256:fe0a98d990e433013f41827b62be9ab43e3cf18e08b1483fcc343bda0d691182 +yarl==1.9.4 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51 \ + --hash=sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce \ + --hash=sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559 \ + --hash=sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0 \ + --hash=sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81 \ + --hash=sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc \ + --hash=sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4 \ + --hash=sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c \ + --hash=sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130 \ + --hash=sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136 \ + --hash=sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e \ + --hash=sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec \ + --hash=sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7 \ + --hash=sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1 \ + --hash=sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455 \ + --hash=sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099 \ + --hash=sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129 \ + --hash=sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10 \ + --hash=sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142 \ + --hash=sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98 \ + --hash=sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa \ + --hash=sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7 \ + --hash=sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525 \ + --hash=sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c \ + --hash=sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9 \ + --hash=sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c \ + --hash=sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8 \ + --hash=sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b \ + --hash=sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf \ + --hash=sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23 \ + --hash=sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd \ + --hash=sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27 \ + --hash=sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f \ + --hash=sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece \ + --hash=sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434 \ + --hash=sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec \ + --hash=sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff \ + --hash=sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78 \ + --hash=sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d \ + --hash=sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863 \ + --hash=sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53 \ + --hash=sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31 \ + --hash=sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15 \ + --hash=sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5 \ + --hash=sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b \ + --hash=sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57 \ + --hash=sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3 \ + --hash=sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1 \ + --hash=sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f \ + --hash=sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad \ + --hash=sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c \ + --hash=sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7 \ + --hash=sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2 \ + --hash=sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b \ + --hash=sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2 \ + --hash=sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b \ + --hash=sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9 \ + --hash=sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be \ + --hash=sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e \ + --hash=sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984 \ + --hash=sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4 \ + --hash=sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074 \ + --hash=sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2 \ + --hash=sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392 \ + --hash=sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91 \ + --hash=sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541 \ + --hash=sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf \ + --hash=sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572 \ + --hash=sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66 \ + --hash=sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575 \ + --hash=sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14 \ + --hash=sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5 \ + --hash=sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1 \ + --hash=sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e \ + --hash=sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551 \ + --hash=sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17 \ + --hash=sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead \ + --hash=sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0 \ + --hash=sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe \ + --hash=sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234 \ + --hash=sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0 \ + --hash=sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7 \ + --hash=sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34 \ + --hash=sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42 \ + --hash=sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385 \ + --hash=sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78 \ + --hash=sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be \ + --hash=sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958 \ + --hash=sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749 \ + --hash=sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec diff --git a/backend/src/api/endpoints.py b/backend/src/api/endpoints.py index f1bcca3..395ae4f 100644 --- a/backend/src/api/endpoints.py +++ b/backend/src/api/endpoints.py @@ -16,21 +16,21 @@ import fastapi from src.api.routes.account import router as account_router -from src.api.routes.ai_model import router as ai_model_router from src.api.routes.authentication import router as auth_router from src.api.routes.chat import router as chat_router from src.api.routes.file import router as file_router from src.api.routes.train import router as train_router from src.api.routes.version import router as version_router from src.api.routes.health import router as health_router +from src.api.routes.datasets import router as datasets_router router = fastapi.APIRouter() router.include_router(router=account_router) router.include_router(router=auth_router) router.include_router(router=chat_router) -router.include_router(router=ai_model_router) router.include_router(router=train_router) router.include_router(router=file_router) router.include_router(router=version_router) router.include_router(router=health_router) +router.include_router(router=datasets_router) \ No newline at end of file diff --git a/backend/src/api/routes/chat.py b/backend/src/api/routes/chat.py index 07bc79f..43b8b25 100644 --- a/backend/src/api/routes/chat.py +++ b/backend/src/api/routes/chat.py @@ -197,9 +197,15 @@ async def chat( match session.type: case "rag": - # TODO: Implement RAG - pass - case _: + stream_func: ContentStream = rag_chat_repo.inference_with_rag( + session_id=session.id, + input_msg=chat_in_msg.message, + temperature=chat_in_msg.temperature, + top_k=chat_in_msg.top_k, + top_p=chat_in_msg.top_p, + n_predict=chat_in_msg.n_predict, + ) + case _: # default is chat robot stream_func: ContentStream = rag_chat_repo.inference( session_id=session.id, input_msg=chat_in_msg.message, diff --git a/backend/src/api/routes/datasets.py b/backend/src/api/routes/datasets.py new file mode 100644 index 0000000..cd27d9c --- /dev/null +++ b/backend/src/api/routes/datasets.py @@ -0,0 +1,49 @@ +# coding=utf-8 + +# Copyright [2024] [SkywardAI] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastapi +from src.models.schemas.dataset import DatasetResponse + +router = fastapi.APIRouter(prefix="/ds", tags=["datasets"]) + + +@router.get( + path="/list", + name="datasets:get-dataset-list", + response_model=DatasetResponse, + status_code=fastapi.status.HTTP_200_OK, +) +async def get_dataset_list() -> list[DatasetResponse]: + pass + + +@router.get( + path="/{name}", + name="datasets:get-dataset-by-name", + response_model=DatasetResponse, + status_code=fastapi.status.HTTP_200_OK, +) +async def get_dataset_by_name(name: str) -> DatasetResponse: + pass + + +@router.post( + path="/{name}", + name="datasets:create-dataset", + response_model=DatasetResponse, + status_code=fastapi.status.HTTP_201_CREATED, +) +async def load_dataset(name: str) -> bool: + pass diff --git a/backend/src/config/settings/const.py b/backend/src/config/settings/const.py index f78adbd..93e4a1c 100644 --- a/backend/src/config/settings/const.py +++ b/backend/src/config/settings/const.py @@ -3,6 +3,7 @@ MAX_SQL_LENGTH = 200 DEFAULT_COLLECTION = "default_collection" +# embedding dimension depending on model DEFAULT_DIM = 384 # DEFAULT MODELS diff --git a/backend/src/repository/crud/vectors_helper.py b/backend/src/repository/crud/vectors_helper.py new file mode 100644 index 0000000..30f9035 --- /dev/null +++ b/backend/src/repository/crud/vectors_helper.py @@ -0,0 +1,15 @@ +# coding=utf-8 + +# Copyright [2024] [SkywardAI] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/backend/src/repository/datasets_eng.py b/backend/src/repository/datasets_eng.py new file mode 100644 index 0000000..badd02e --- /dev/null +++ b/backend/src/repository/datasets_eng.py @@ -0,0 +1,43 @@ +# coding=utf-8 + +# Copyright [2024] [SkywardAI] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datasets import load_dataset +from src.repository.vector_database import vector_db + + +class DatasetEng: + def __init__(self): + pass + + def get_dataset_list(self): + pass + + def get_dataset_by_name(self, name: str): + pass + + @classmethod + async def load_dataset(cls, name: str) -> dict: + """ + Load dataset from the given name, must connect to the internet + + No need to consider the memory usage + """ + + ds = load_dataset(name) + ds_list = ds.to_list() + + await vector_db.create_collection(collection_name=name) + + return await vector_db.insert_list(collection_name=name, data_list=ds_list) diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py index 115fb49..8b253e6 100644 --- a/backend/src/repository/rag/chat.py +++ b/backend/src/repository/rag/chat.py @@ -13,17 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import csv +from typing import Any +from collections.abc import AsyncGenerator import loguru import httpx -import re -from src.models.schemas.train import TrainFileIn -from src.config.settings.const import UPLOAD_FILE_PATH, RAG_NUM + from src.repository.rag.base import BaseRAGRepository from src.repository.inference_eng import InferenceHelper from src.utilities.httpkit.httpx_kit import httpx_kit -from typing import Any -from collections.abc import AsyncGenerator class RAGChatModelRepository(BaseRAGRepository): @@ -38,158 +35,6 @@ async def load_model(self, session_id: int, model_name: str) -> bool: return False return True - def search_context(self, query, n_results=RAG_NUM): - """ - Search the context in the vector database - """ - # TODO: Implement the search context function - pass - - async def get_response(self, session_id: int, input_msg: str, chat_repo) -> str: - # context = self.search_context(input_msg) - # TODO: Implement the inference function - pass - - async def load_csv_file(self, file_name: str, model_name: str) -> bool: - # read file named file_name and convert the content into a list of strings @Aisuko - loguru.logger.info(file_name) - loguru.logger.info(model_name) - data = [] - with open(UPLOAD_FILE_PATH + file_name, "r") as file: - # Create a CSV reader - reader = csv.reader(file) - # Iterate over each row in the CSV - for row in reader: - # Add the row to the list - data.extend(row) - loguru.logger.info(f"load_csv_file data_row:{data}") - - # TODO: https://github.com/SkywardAI/chat-backend/issues/171 - # embedding_list = ai_model.encode_string(data) - - # vector_db.insert_list(embedding_list, data) - - return True - - def load_data_set(self, param: TrainFileIn) -> bool: - loguru.logger.info(f"load_data_set param {param}") - if param.directLoad: - self.load_data_set_directly(param=param) - elif param.embedField is None or param.resField is None: - self.load_data_set_all_field(dataset_name=param.dataSet) - else: - self.load_data_set_by_field(param=param) - return True - - def load_data_set_directly(self, param: TrainFileIn) -> bool: - r""" - If the data set is already in the form of embeddings, - this function can be used to load the data set directly into the vector database. - - @param param: the instance of TrainFileIn - - @return: boolean - """ - # reader_dataset=load_dataset(param.dataSet) - # resField = param.resField if param.resField else '0' - # collection_name = self.trim_collection_name(param.dataSet) - # vector_db.create_collection(collection_name = collection_name) - # loguru.logger.info(f"load_data_set_all_field dataset_name:{param.dataSet} into collection_name:{collection_name}") - # count = 0 - # embed_field_list = [] - # res_field_list = [] - # for item in reader_dataset['train']: - # # check contail field - # # if resField not in item or embedField not in item : - # resField_val=item.get(resField, '') - # res_field_list.append(resField_val) - # embedField_val = [value for key, value in item.items() if key != resField] - # embed_field_list.append(embedField_val) - # count += 1 - # if count % LOAD_BATCH_SIZE == 0: - # vector_db.insert_list(embed_field_list, res_field_list, collection_name,start_idx = count) - # embed_field_list = [] - # res_field_list = [] - # loguru.logger.info(f"load_data_set_all_field count:{count}") - # vector_db.insert_list(embed_field_list, res_field_list, collection_name,start_idx = count) - # loguru.logger.info(f"load_data_set_all_field count:{count}") - # loguru.logger.info("Dataset loaded successfully") - # return True - pass - - def load_data_set_all_field(self, dataset_name: str) -> bool: - """ - Load the data set into the vector database - """ - - # reader_dataset=load_dataset(dataset_name) - # collection_name = self.trim_collection_name(dataset_name) - # vector_db.create_collection(collection_name = collection_name) - # loguru.logger.info(f"load_data_set_all_field dataset_name:{dataset_name} into collection_name:{collection_name}") - # count = 0 - # doc_list = [] - # for item_dict in reader_dataset['train']: - # doc_str ='' - # for key, value in item_dict.items(): - # if(isinstance(key, type(value))): - # doc_str += f" {key}:{value}" - # count += 1 - # doc_list.append(doc_str) - # if count % LOAD_BATCH_SIZE == 0: - # embedding_list = ai_model.encode_string(doc_list) - # vector_db.insert_list(embedding_list, doc_list, self.trim_collection_name(dataset_name),start_idx = count) - # loguru.logger.info(f"load_data_set_all_field count:{count}") - # doc_list = [] - # embedding_list = ai_model.encode_string(doc_list) - # vector_db.insert_list(embedding_list, doc_list, self.trim_collection_name(dataset_name),start_idx = count) - # loguru.logger.info(f"load_data_set_all_field count:{count}") - # loguru.logger.info("Dataset loaded successfully") - return True - - def load_data_set_by_field(self, param: TrainFileIn) -> bool: - """ - Load the data set into the vector database - """ - - # reader_dataset=load_dataset(param.dataSet) - # embedField=param.embedField - # resField= param.resField - # collection_name = self.trim_collection_name(param.dataSet) - # vector_db.create_collection(collection_name = collection_name) - # loguru.logger.info(f"load_data_set_all_field dataset_name:{param.dataSet} into collection_name:{collection_name}") - # count = 0 - # embed_field_list = [] - # res_field_list = [] - # for item in reader_dataset['train']: - # # check contail field - # # if resField not in item or embedField not in item : - # embedField_val=item.get(embedField, '') - # resField_val=item.get(resField, '') - # embed_field_list.append(embedField_val) - # res_field_list.append(resField_val) - # count += 1 - # if count % LOAD_BATCH_SIZE == 0: - # embedding_list = ai_model.encode_string(embed_field_list) - # vector_db.insert_list(embedding_list, res_field_list, collection_name,start_idx = count) - # embed_field_list = [] - # res_field_list = [] - # loguru.logger.info(f"load_data_set_all_field count:{count}") - # embedding_list = ai_model.encode_string(embed_field_list) - # vector_db.insert_list(embedding_list, res_field_list, collection_name,start_idx = count) - # loguru.logger.info(f"load_data_set_all_field count:{count}") - # loguru.logger.info("Dataset loaded successfully") - return True - - async def evaluate_response(self, request_msg: str, response_msg: str) -> float: - # evaluate_conbine=[request_msg, response_msg] - # score = ai_model.cross_encoder.predict(evaluate_conbine) - # return score - # TODO - pass - - def trim_collection_name(self, name: str) -> str: - return re.sub(r"\W+", "", name) - def format_prompt(self, prmpt: str, current_context: str = InferenceHelper.instruction) -> str: """ Format the input questions, can be used for saving the conversation history @@ -268,7 +113,7 @@ async def inference_with_rag( n_predict: int = 128, ) -> AsyncGenerator[Any, None]: """ - Inference using RAG model + Inference using RAG Returns: AsyncGenerator[Any, None]: response message diff --git a/backend/src/repository/vector_database.py b/backend/src/repository/vector_database.py index 013020f..cc27080 100644 --- a/backend/src/repository/vector_database.py +++ b/backend/src/repository/vector_database.py @@ -18,7 +18,7 @@ def __init__(self): err = e # loguru.logger.info(f"Exception --- {e}") # print(f"Failed to connect to Milvus: {e}") - time.sleep(10) + time.sleep(5) else: raise Exception(f"Failed to connect to Milvus after 3 attempts:{err}") @@ -42,15 +42,19 @@ def create_collection(self, collection_name=DEFAULT_COLLECTION, dimension=DEFAUL loguru.logger.info(f"Vector Databse --- Milvus: collection {collection_name} exist, dropping..") self.client.drop_collection(collection_name) - self.client.create_collection(collection_name=collection_name, dimension=dimension) + self.client.create_collection( + collection_name=collection_name, + dimension=dimension, + auto_id=True, # enable auto id + enable_dynamic_field=True, # enable dynamic field + vector_field_name="question_embedding", # map vector field name and embedding field name + consistency_level="Strong", # To enable search with latest data + ) loguru.logger.info(f"Vector Database --- Milvus: collection {collection_name} created") - def insert_list(self, embedding, data, collection_name=DEFAULT_COLLECTION, start_idx=0): + def insert_list(self, collection_name: str = DEFAULT_COLLECTION, data_list: list = []) -> dict: try: - for i, item in enumerate(embedding): - self.client.insert( - collection_name=collection_name, data={"id": i + start_idx, "vector": item, "doc": data[i]} - ) + return self.client.insert(collection_name=collection_name, data=data_list) except Exception as e: loguru.logger.info(f"Vector Databse --- Error: {e}") From c0a40a1d4e571d8f667c9fe4d88afdff8e0c04b8 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sat, 20 Jul 2024 23:12:40 +1000 Subject: [PATCH 02/12] Add tokenize Signed-off-by: Aisuko --- backend/src/api/endpoints.py | 2 +- backend/src/api/routes/chat.py | 2 +- backend/src/api/routes/datasets.py | 49 ----------- backend/src/api/routes/rag_datasets.py | 83 +++++++++++++++++++ backend/src/models/schemas/dataset.py | 29 +++++++ backend/src/repository/rag/chat.py | 19 ++++- .../{datasets_eng.py => rag_datasets_eng.py} | 11 ++- backend/src/repository/vector_database.py | 7 +- 8 files changed, 140 insertions(+), 62 deletions(-) delete mode 100644 backend/src/api/routes/datasets.py create mode 100644 backend/src/api/routes/rag_datasets.py rename backend/src/repository/{datasets_eng.py => rag_datasets_eng.py} (75%) diff --git a/backend/src/api/endpoints.py b/backend/src/api/endpoints.py index 395ae4f..dd1425e 100644 --- a/backend/src/api/endpoints.py +++ b/backend/src/api/endpoints.py @@ -22,7 +22,7 @@ from src.api.routes.train import router as train_router from src.api.routes.version import router as version_router from src.api.routes.health import router as health_router -from src.api.routes.datasets import router as datasets_router +from src.api.routes.rag_datasets import router as datasets_router router = fastapi.APIRouter() diff --git a/backend/src/api/routes/chat.py b/backend/src/api/routes/chat.py index 43b8b25..622cb5d 100644 --- a/backend/src/api/routes/chat.py +++ b/backend/src/api/routes/chat.py @@ -206,7 +206,7 @@ async def chat( n_predict=chat_in_msg.n_predict, ) case _: # default is chat robot - stream_func: ContentStream = rag_chat_repo.inference( + stream_func: ContentStream = rag_chat_repo.inference_with_rag( session_id=session.id, input_msg=chat_in_msg.message, temperature=chat_in_msg.temperature, diff --git a/backend/src/api/routes/datasets.py b/backend/src/api/routes/datasets.py deleted file mode 100644 index cd27d9c..0000000 --- a/backend/src/api/routes/datasets.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding=utf-8 - -# Copyright [2024] [SkywardAI] -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import fastapi -from src.models.schemas.dataset import DatasetResponse - -router = fastapi.APIRouter(prefix="/ds", tags=["datasets"]) - - -@router.get( - path="/list", - name="datasets:get-dataset-list", - response_model=DatasetResponse, - status_code=fastapi.status.HTTP_200_OK, -) -async def get_dataset_list() -> list[DatasetResponse]: - pass - - -@router.get( - path="/{name}", - name="datasets:get-dataset-by-name", - response_model=DatasetResponse, - status_code=fastapi.status.HTTP_200_OK, -) -async def get_dataset_by_name(name: str) -> DatasetResponse: - pass - - -@router.post( - path="/{name}", - name="datasets:create-dataset", - response_model=DatasetResponse, - status_code=fastapi.status.HTTP_201_CREATED, -) -async def load_dataset(name: str) -> bool: - pass diff --git a/backend/src/api/routes/rag_datasets.py b/backend/src/api/routes/rag_datasets.py new file mode 100644 index 0000000..16b6fd7 --- /dev/null +++ b/backend/src/api/routes/rag_datasets.py @@ -0,0 +1,83 @@ +# coding=utf-8 + +# Copyright [2024] [SkywardAI] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastapi +from src.models.schemas.dataset import RagDatasetCreate, RagDatasetResponse +from src.repository.rag_datasets_eng import DatasetEng + +router = fastapi.APIRouter(prefix="/ds", tags=["datasets"]) + + +@router.get( + path="/list", + name="datasets:get-dataset-list", + response_model=list[RagDatasetResponse], + status_code=fastapi.status.HTTP_200_OK, +) +async def get_dataset_list() -> list[RagDatasetResponse]: + pass + + +@router.get( + path="/{name}", + name="datasets:get-dataset-by-name", + response_model=RagDatasetResponse, + status_code=fastapi.status.HTTP_200_OK, +) +async def get_dataset_by_name(name: str) -> RagDatasetResponse: + pass + + +@router.post( + path="/load", + name="datasets:load-dataset", + response_model=RagDatasetResponse, + status_code=fastapi.status.HTTP_201_CREATED, +) +async def load_dataset( + rag_ds_create: RagDatasetCreate, +) -> RagDatasetResponse: + """ + + Loading the specific dataset into the vector db + + curl -X 'POST' \ + 'http://127.0.0.1:8000/api/ds/load' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "name": "aisuko/squad01", + "des": "string", + "ratio": 0 + }' + + Return: + { + "name": "aisuko/squad01", + "status": "Success" + } + """ + + res:dict=DatasetEng.load_dataset(rag_ds_create.name) + + if res.get('insert_count')>0: + status="Success" + else: + status="Failed" + + return RagDatasetResponse( + name=rag_ds_create.name, + status=status + ) \ No newline at end of file diff --git a/backend/src/models/schemas/dataset.py b/backend/src/models/schemas/dataset.py index fead10c..b11ec53 100644 --- a/backend/src/models/schemas/dataset.py +++ b/backend/src/models/schemas/dataset.py @@ -1,3 +1,19 @@ +# coding=utf-8 + +# Copyright [2024] [SkywardAI] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional import datetime from pydantic import Field @@ -14,3 +30,16 @@ class DatasetResponse(BaseSchemaModel): dataset_name: str = Field(..., title="DataSet Name", description="DataSet Name") created_at: datetime.datetime | None = Field(..., title="Creation time", description="Creation time") updated_at: datetime.datetime | None = Field(..., title="Update time", description="Update time") + + +class RagDatasetCreate(BaseSchemaModel): + name: str = Field(..., title="DataSet Name", description="DataSet Name") + des: str | None = Field(..., title="Details", description="Details") + ratio: Optional[float] = Field(..., title="Ratio", description="Ratio") + +class RagDatasetResponse(BaseSchemaModel): + name: str = Field(..., title="DataSet Name", description="DataSet Name") + # created_at: datetime.datetime | None = Field(..., title="Creation time", description="Creation time") + # updated_at: datetime.datetime | None = Field(..., title="Update time", description="Update time") + # ratio: Optional[float] = Field(..., title="Ratio", description="Ratio") + status: Optional[str] = Field(..., title="Status", description="Status") \ No newline at end of file diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py index 8b253e6..6800a0a 100644 --- a/backend/src/repository/rag/chat.py +++ b/backend/src/repository/rag/chat.py @@ -21,6 +21,7 @@ from src.repository.rag.base import BaseRAGRepository from src.repository.inference_eng import InferenceHelper from src.utilities.httpkit.httpx_kit import httpx_kit +from src.repository.vector_database import vector_db class RAGChatModelRepository(BaseRAGRepository): @@ -119,20 +120,32 @@ async def inference_with_rag( AsyncGenerator[Any, None]: response message """ - def get_context_by_question(input_msg: str): + async def get_context_by_question(input_msg: str): """ Get the context from v-db by the question """ # tokenized_input - + async with httpx.AsyncClient() as client: + try: + res=await client.post( + InferenceHelper.tokenizer_url(), + json={"content": input_msg}, + ) + res.raise_for_status() + tokenized_input = res.json().get("tokens") + except Exception as e: + pass # search the context in the vector database + result=await vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01") # combine the context with the input message context = "" return context or InferenceHelper.instruction + + current_context = await get_context_by_question(input_msg) data_with_context = { - "prompt": self.format_prompt(input_msg, get_context_by_question(input_msg)), + "prompt": self.format_prompt(input_msg, current_context), "temperature": temperature, "top_k": top_k, "top_p": top_p, diff --git a/backend/src/repository/datasets_eng.py b/backend/src/repository/rag_datasets_eng.py similarity index 75% rename from backend/src/repository/datasets_eng.py rename to backend/src/repository/rag_datasets_eng.py index badd02e..2af93d7 100644 --- a/backend/src/repository/datasets_eng.py +++ b/backend/src/repository/rag_datasets_eng.py @@ -28,7 +28,7 @@ def get_dataset_by_name(self, name: str): pass @classmethod - async def load_dataset(cls, name: str) -> dict: + def load_dataset(cls, name: str) -> dict: """ Load dataset from the given name, must connect to the internet @@ -36,8 +36,11 @@ async def load_dataset(cls, name: str) -> dict: """ ds = load_dataset(name) - ds_list = ds.to_list() + #TODO: validation isn't make sense, it should be removed + ds_list = ds.get("validation").to_list() - await vector_db.create_collection(collection_name=name) + name=name.replace("/", "_") - return await vector_db.insert_list(collection_name=name, data_list=ds_list) + vector_db.create_collection(collection_name=name) + + return vector_db.insert_list(collection_name=name, data_list=ds_list) diff --git a/backend/src/repository/vector_database.py b/backend/src/repository/vector_database.py index cc27080..b60134e 100644 --- a/backend/src/repository/vector_database.py +++ b/backend/src/repository/vector_database.py @@ -60,19 +60,18 @@ def insert_list(self, collection_name: str = DEFAULT_COLLECTION, data_list: list def search(self, data, n_results, collection_name=DEFAULT_COLLECTION): search_params = {"metric_type": "COSINE", "params": {}} - data_list = data.tolist() res = self.client.search( collection_name=collection_name, - data=data_list, + data=data, limit=n_results, search_params=search_params, - output_fields=["doc"], + output_fields=["title"], ) loguru.logger.info(f"Vector Database --- Result: {res}") sentences = [] for hits in res: for hit in hits: - sentences.append(hit.get("entity").get("doc")) + sentences.append(hit.get("entity").get("title")) return sentences def create_index(self, index_name, index_params, collection_name=DEFAULT_COLLECTION): From da02a1e22fbcc5e96f0258a4de5e77e11775bad8 Mon Sep 17 00:00:00 2001 From: micost Date: Sat, 20 Jul 2024 22:44:23 +0800 Subject: [PATCH 03/12] fix/get chats api Signed-off-by: micost --- backend/src/api/routes/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/api/routes/chat.py b/backend/src/api/routes/chat.py index 622cb5d..00b1476 100644 --- a/backend/src/api/routes/chat.py +++ b/backend/src/api/routes/chat.py @@ -313,7 +313,7 @@ async def get_chathistory( ``` """ current_user = await account_repo.read_account_by_username(username=jwt_payload.username) - if session_repo.verify_session_by_account_id(session_uuid=uuid, account_id=current_user.id) is False: + if await session_repo.verify_session_by_account_id(session_uuid=uuid, account_id=current_user.id) is False: raise http_404_exc_uuid_not_found_request(uuid=uuid) session = await session_repo.read_sessions_by_uuid(session_uuid=uuid) chats = await chat_repo.read_chat_history_by_session_id(id=session.id) From 810b04379c79acde7f3ff09399ca0d82c4474199 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 10:38:08 +1000 Subject: [PATCH 04/12] Add embedding Signed-off-by: Aisuko --- Makefile | 2 ++ backend/src/repository/inference_eng.py | 6 ++++ backend/src/repository/rag/chat.py | 29 ++++++++++--------- backend/src/repository/vector_database.py | 34 ++++++++++++++--------- docker-compose.yaml | 17 ++++++++++++ 5 files changed, 62 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 4657f9e..10939fd 100644 --- a/Makefile +++ b/Makefile @@ -64,6 +64,7 @@ INFERENCE_ENG:=llamacpp INFERENCE_ENG_PORT:=8080 INFERENCE_ENG_VERSION:=server--b1-2321a5e NUM_CPU_CORES:=8.00 +NUM_CPU_CORES_EMBEDDING:=4.00 # Language model, default is phi3-mini-4k-instruct-q4.gguf # https://github.com/SkywardAI/llama.cpp/blob/9b2f16f8055265c67e074025350736adc1ea0666/tests/test-chat-template.cpp#L91-L92 @@ -121,6 +122,7 @@ env: @echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(FILE_NAME) @echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(FILE_NAME) @echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(FILE_NAME) + @echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)" >> $(FILE_NAME) @echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(FILE_NAME) @echo "ADMIN_USERNAME=$(ADMIN_USERNAME)">> $(FILE_NAME) @echo "ADMIN_EMAIL=$(ADMIN_EMAIL)">> $(FILE_NAME) diff --git a/backend/src/repository/inference_eng.py b/backend/src/repository/inference_eng.py index d7111be..012d1a3 100644 --- a/backend/src/repository/inference_eng.py +++ b/backend/src/repository/inference_eng.py @@ -60,3 +60,9 @@ def instruct_infer_url(cls) -> str: str: URL for the inference engine """ return f"http://{cls.infer_eng_url}:{cls.infer_eng_port}/completion" + + @classmethod + def instruct_embedding_url(cls) -> str: + """ + """ + return f"http://embedding_eng:8082/embedding" diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py index 6800a0a..324c9af 100644 --- a/backend/src/repository/rag/chat.py +++ b/backend/src/repository/rag/chat.py @@ -126,26 +126,29 @@ async def get_context_by_question(input_msg: str): """ # tokenized_input - async with httpx.AsyncClient() as client: - try: - res=await client.post( - InferenceHelper.tokenizer_url(), - json={"content": input_msg}, - ) - res.raise_for_status() - tokenized_input = res.json().get("tokens") - except Exception as e: - pass + + try: + res=await httpx_kit.async_client.post( + InferenceHelper.instruct_embedding_url(), + headers={"Content-Type": "application/json"}, + json={"content": input_msg}, + timeout=httpx.Timeout(timeout=None) + ) + res.raise_for_status() + tokenized_input = res.json().get("embedding") + except Exception as e: + loguru.logger.error(e) # search the context in the vector database - result=await vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01") + # context=vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01") + context="" # combine the context with the input message - context = "" return context or InferenceHelper.instruction current_context = await get_context_by_question(input_msg) + data_with_context = { - "prompt": self.format_prompt(input_msg, current_context), + "prompt": self.format_prompt(input_msg, current_context=""), "temperature": temperature, "top_k": top_k, "top_p": top_p, diff --git a/backend/src/repository/vector_database.py b/backend/src/repository/vector_database.py index b60134e..baf0f54 100644 --- a/backend/src/repository/vector_database.py +++ b/backend/src/repository/vector_database.py @@ -60,19 +60,27 @@ def insert_list(self, collection_name: str = DEFAULT_COLLECTION, data_list: list def search(self, data, n_results, collection_name=DEFAULT_COLLECTION): search_params = {"metric_type": "COSINE", "params": {}} - res = self.client.search( - collection_name=collection_name, - data=data, - limit=n_results, - search_params=search_params, - output_fields=["title"], - ) - loguru.logger.info(f"Vector Database --- Result: {res}") - sentences = [] - for hits in res: - for hit in hits: - sentences.append(hit.get("entity").get("title")) - return sentences + try: + + res = self.client.search( + collection_name=collection_name, + data=data, + limit=n_results, + search_params=search_params, + output_fields=["title"], + ) + + loguru.logger.info(f"Vector Database --- Result: {res}") + sentences = [] + for hits in res: + for hit in hits: + sentences.append(hit.get("entity").get("title")) + return sentences + except Exception as e: + loguru.logger.error(e) + return None + + def create_index(self, index_name, index_params, collection_name=DEFAULT_COLLECTION): self.client.create_index(collection_name, index_name, index_params) diff --git a/docker-compose.yaml b/docker-compose.yaml index ec19f47..f030622 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -87,6 +87,7 @@ services: - ETCD_AUTO_COMPACTION_RETENTION=${ETCD_AUTO_COMPACTION_RETENTION} - ETCD_QUOTA_BACKEND_BYTES=${ETCD_QUOTA_BACKEND_BYTES} - NUM_CPU_CORES=${NUM_CPU_CORES} + - NUM_CPU_CORES_EMBEDDING=${NUM_CPU_CORES_EMBEDDING} volumes: - ./backend/:/app/ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models @@ -163,6 +164,22 @@ services: - 8080:8080 command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"] + embedding_eng: + container_name: embedding_eng + image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} + restart: always + deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md + resources: + reservations: + cpus: "${NUM_CPU_CORES_EMBEDDING}" + volumes: + - "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models" + expose: + - 8080 + ports: + - 8082:8080 + command: ["-m", "models/${LANGUAGE_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] + rebel: container_name: rebel image: ghcr.io/skywardai/rebel:v0.1.6 From 7cc80e31471e5b64af893e0616f2dcd7d37f08cd Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 10:57:31 +1000 Subject: [PATCH 05/12] fix wrong port Signed-off-by: Aisuko --- backend/src/repository/inference_eng.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/repository/inference_eng.py b/backend/src/repository/inference_eng.py index 012d1a3..28153de 100644 --- a/backend/src/repository/inference_eng.py +++ b/backend/src/repository/inference_eng.py @@ -65,4 +65,4 @@ def instruct_infer_url(cls) -> str: def instruct_embedding_url(cls) -> str: """ """ - return f"http://embedding_eng:8082/embedding" + return f"http://embedding_eng:8080/embedding" From aaf54a9a26c3c917740d6d4161af79f5e825c519 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 11:04:41 +1000 Subject: [PATCH 06/12] Add vector-db search Signed-off-by: Aisuko --- backend/src/repository/rag/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py index 324c9af..1b5ceb2 100644 --- a/backend/src/repository/rag/chat.py +++ b/backend/src/repository/rag/chat.py @@ -139,7 +139,7 @@ async def get_context_by_question(input_msg: str): except Exception as e: loguru.logger.error(e) # search the context in the vector database - # context=vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01") + context=vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01") context="" # combine the context with the input message return context or InferenceHelper.instruction From a9f4e3defbc5ac6891bf43ff6dc7fa66d5ed4822 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 11:36:12 +1000 Subject: [PATCH 07/12] Add embedding model Signed-off-by: Aisuko --- Makefile | 6 ++++++ backend/Dockerfile | 2 +- docker-compose.yaml | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 10939fd..56015bc 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,9 @@ LANGUAGE_MODEL_NAME:=Phi-3-mini-4k-instruct-q4.gguf LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true INSTRUCTION:="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the questions from human." +EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf +EMBEDDING_MODEL_URL:=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true + ADMIN_USERNAME:=admin ADMIN_EMAIL:=admin@admin.com ADMIN_PASS:=admin @@ -129,6 +132,7 @@ env: @echo "ADMIN_PASS=$(ADMIN_PASS)">> $(FILE_NAME) @echo "TIMEZONE=$(TIMEZONE)">> $(FILE_NAME) @echo "INSTRUCTION"=$(INSTRUCTION)>> $(FILE_NAME) + @echo "EMBEDDING_MODEL_NAME"=$(EMBEDDING_MODEL_NAME) >> $(FILE_NAME) .PHONY: prepare @@ -197,6 +201,8 @@ ruff: .PHONY: lm lm: @mkdir -p volumes/models && [ -f volumes/models/$(LANGUAGE_MODEL_NAME) ] || wget -O volumes/models/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL) + @wget -O volumes/models/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL) + .PHONY: localinfer localinfer: lm diff --git a/backend/Dockerfile b/backend/Dockerfile index 8101687..b2b293a 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -26,7 +26,7 @@ RUN pip --disable-pip-version-check --no-cache-dir install -r requirements.txt & EXPOSE 8000 -HEALTHCHECK --interval=60s --timeout=30s --retries=5 CMD ["curl", "-f", "http://localhost:8000/api/health"] +HEALTHCHECK --interval=300s --timeout=30s --retries=5 CMD ["curl", "-f", "http://localhost:8000/api/health"] # Execute entrypoint.sh ENTRYPOINT ["./entrypoint.sh"] diff --git a/docker-compose.yaml b/docker-compose.yaml index f030622..2880ca0 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -88,6 +88,7 @@ services: - ETCD_QUOTA_BACKEND_BYTES=${ETCD_QUOTA_BACKEND_BYTES} - NUM_CPU_CORES=${NUM_CPU_CORES} - NUM_CPU_CORES_EMBEDDING=${NUM_CPU_CORES_EMBEDDING} + - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME} volumes: - ./backend/:/app/ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models @@ -178,7 +179,7 @@ services: - 8080 ports: - 8082:8080 - command: ["-m", "models/${LANGUAGE_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] + command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] rebel: container_name: rebel From 1e227c552f500545c913513fcb62a93a59cf1a86 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 12:14:26 +1000 Subject: [PATCH 08/12] add env to api aggregator Signed-off-by: Aisuko --- backend/src/config/settings/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/src/config/settings/base.py b/backend/src/config/settings/base.py index 5f8e47b..0efbc74 100644 --- a/backend/src/config/settings/base.py +++ b/backend/src/config/settings/base.py @@ -110,6 +110,9 @@ class BackendBaseSettings(BaseSettings): ETCD_QUOTA_BACKEND_BYTES: int = decouple.config("ETCD_QUOTA_BACKEND_BYTES", cast=int) # type: ignore NUM_CPU_CORES: float = decouple.config("NUM_CPU_CORES", cast=float) # type: ignore + EMBEDDING_MODEL_NAME: str=decouple.config("EMBEDDING_MODEL_NAME", cast=str) # type: ignore + NUM_CPU_CORES_EMBEDDING: int=decouple.config("NUM_CPU_CORES_EMBEDDING", cast=str) # type: ignore + class Config(pydantic.ConfigDict): case_sensitive: bool = True env_file: str = f"{str(ROOT_DIR)}/.env" From 40c53cf6a30949e5108d57c87e1588cc6469f15c Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 12:20:43 +1000 Subject: [PATCH 09/12] Add code comment Signed-off-by: Aisuko --- backend/src/api/routes/rag_datasets.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/backend/src/api/routes/rag_datasets.py b/backend/src/api/routes/rag_datasets.py index 16b6fd7..ae2ee7e 100644 --- a/backend/src/api/routes/rag_datasets.py +++ b/backend/src/api/routes/rag_datasets.py @@ -27,9 +27,14 @@ status_code=fastapi.status.HTTP_200_OK, ) async def get_dataset_list() -> list[RagDatasetResponse]: + """ + Waiting for implementing + + """ pass + @router.get( path="/{name}", name="datasets:get-dataset-by-name", @@ -37,6 +42,9 @@ async def get_dataset_list() -> list[RagDatasetResponse]: status_code=fastapi.status.HTTP_200_OK, ) async def get_dataset_by_name(name: str) -> RagDatasetResponse: + """ + Waiting for implementing + """ pass From 22c10c2f38de8c03d9a12ba4132a9c8bcde15d5c Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 13:01:22 +1000 Subject: [PATCH 10/12] reformat code with ruff Signed-off-by: Aisuko --- Makefile | 6 ++++++ backend/src/api/endpoints.py | 2 +- backend/src/api/routes/rag_datasets.py | 20 ++++++++------------ backend/src/config/settings/base.py | 6 ++++-- backend/src/models/schemas/dataset.py | 3 ++- backend/src/repository/inference_eng.py | 9 +++++---- backend/src/repository/rag/chat.py | 19 +++++++------------ backend/src/repository/rag_datasets_eng.py | 4 ++-- backend/src/repository/vector_database.py | 3 --- docker-compose.yaml | 6 ++++-- 10 files changed, 39 insertions(+), 39 deletions(-) diff --git a/Makefile b/Makefile index 56015bc..2f4586a 100644 --- a/Makefile +++ b/Makefile @@ -66,6 +66,10 @@ INFERENCE_ENG_VERSION:=server--b1-2321a5e NUM_CPU_CORES:=8.00 NUM_CPU_CORES_EMBEDDING:=4.00 +# Embedding engine and it uses same version with Inference Engine +EMBEDDING_ENG:=embedding_eng +EMBEDDING_ENG_PORT:=8080 + # Language model, default is phi3-mini-4k-instruct-q4.gguf # https://github.com/SkywardAI/llama.cpp/blob/9b2f16f8055265c67e074025350736adc1ea0666/tests/test-chat-template.cpp#L91-L92 LANGUAGE_MODEL_NAME:=Phi-3-mini-4k-instruct-q4.gguf @@ -124,6 +128,8 @@ env: @echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(FILE_NAME) @echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(FILE_NAME) @echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(FILE_NAME) + @echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(FILE_NAME) + @echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(FILE_NAME) @echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(FILE_NAME) @echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)" >> $(FILE_NAME) @echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(FILE_NAME) diff --git a/backend/src/api/endpoints.py b/backend/src/api/endpoints.py index dd1425e..8b9d4ca 100644 --- a/backend/src/api/endpoints.py +++ b/backend/src/api/endpoints.py @@ -33,4 +33,4 @@ router.include_router(router=file_router) router.include_router(router=version_router) router.include_router(router=health_router) -router.include_router(router=datasets_router) \ No newline at end of file +router.include_router(router=datasets_router) diff --git a/backend/src/api/routes/rag_datasets.py b/backend/src/api/routes/rag_datasets.py index ae2ee7e..e611194 100644 --- a/backend/src/api/routes/rag_datasets.py +++ b/backend/src/api/routes/rag_datasets.py @@ -29,12 +29,11 @@ async def get_dataset_list() -> list[RagDatasetResponse]: """ Waiting for implementing - + """ pass - @router.get( path="/{name}", name="datasets:get-dataset-by-name", @@ -77,15 +76,12 @@ async def load_dataset( "status": "Success" } """ - - res:dict=DatasetEng.load_dataset(rag_ds_create.name) - if res.get('insert_count')>0: - status="Success" + res: dict = DatasetEng.load_dataset(rag_ds_create.name) + + if res.get("insert_count") > 0: + status = "Success" else: - status="Failed" - - return RagDatasetResponse( - name=rag_ds_create.name, - status=status - ) \ No newline at end of file + status = "Failed" + + return RagDatasetResponse(name=rag_ds_create.name, status=status) diff --git a/backend/src/config/settings/base.py b/backend/src/config/settings/base.py index 0efbc74..659717d 100644 --- a/backend/src/config/settings/base.py +++ b/backend/src/config/settings/base.py @@ -96,6 +96,7 @@ class BackendBaseSettings(BaseSettings): # Configurations for language model LANGUAGE_MODEL_NAME: str = decouple.config("LANGUAGE_MODEL_NAME", cast=str) # type: ignore + EMBEDDING_MODEL_NAME: str = decouple.config("EMBEDDING_MODEL_NAME", cast=str) # type: ignore # Admin setting ADMIN_USERNAME: str = decouple.config("ADMIN_USERNAME", cast=str) # type: ignore @@ -110,8 +111,9 @@ class BackendBaseSettings(BaseSettings): ETCD_QUOTA_BACKEND_BYTES: int = decouple.config("ETCD_QUOTA_BACKEND_BYTES", cast=int) # type: ignore NUM_CPU_CORES: float = decouple.config("NUM_CPU_CORES", cast=float) # type: ignore - EMBEDDING_MODEL_NAME: str=decouple.config("EMBEDDING_MODEL_NAME", cast=str) # type: ignore - NUM_CPU_CORES_EMBEDDING: int=decouple.config("NUM_CPU_CORES_EMBEDDING", cast=str) # type: ignore + EMBEDDING_ENG: str = decouple.config("EMBEDDING_ENG", cast=str) # type: ignore + EMBEDDING_ENG_PORT: int = decouple.config("EMBEDDING_ENG_PORT", cast=int) # type: ignore + NUM_CPU_CORES_EMBEDDING: int = decouple.config("NUM_CPU_CORES_EMBEDDING", cast=str) # type: ignore class Config(pydantic.ConfigDict): case_sensitive: bool = True diff --git a/backend/src/models/schemas/dataset.py b/backend/src/models/schemas/dataset.py index b11ec53..6d8ba67 100644 --- a/backend/src/models/schemas/dataset.py +++ b/backend/src/models/schemas/dataset.py @@ -37,9 +37,10 @@ class RagDatasetCreate(BaseSchemaModel): des: str | None = Field(..., title="Details", description="Details") ratio: Optional[float] = Field(..., title="Ratio", description="Ratio") + class RagDatasetResponse(BaseSchemaModel): name: str = Field(..., title="DataSet Name", description="DataSet Name") # created_at: datetime.datetime | None = Field(..., title="Creation time", description="Creation time") # updated_at: datetime.datetime | None = Field(..., title="Update time", description="Update time") # ratio: Optional[float] = Field(..., title="Ratio", description="Ratio") - status: Optional[str] = Field(..., title="Status", description="Status") \ No newline at end of file + status: Optional[str] = Field(..., title="Status", description="Status") diff --git a/backend/src/repository/inference_eng.py b/backend/src/repository/inference_eng.py index 28153de..3be0fac 100644 --- a/backend/src/repository/inference_eng.py +++ b/backend/src/repository/inference_eng.py @@ -24,6 +24,8 @@ class InferenceHelper: infer_eng_url: pydantic.StrictStr = settings.INFERENCE_ENG infer_eng_port: pydantic.PositiveInt = settings.INFERENCE_ENG_PORT instruction: pydantic.StrictStr = settings.INSTRUCTION + embedding_url: pydantic.StrictStr = settings.EMBEDDING_ENG + embedding_port: pydantic.PositiveInt = settings.EMBEDDING_ENG_PORT def init(self) -> None: raise NotImplementedError("InferenceHelper is a singleton class. Use inference_helper instead.") @@ -60,9 +62,8 @@ def instruct_infer_url(cls) -> str: str: URL for the inference engine """ return f"http://{cls.infer_eng_url}:{cls.infer_eng_port}/completion" - + @classmethod def instruct_embedding_url(cls) -> str: - """ - """ - return f"http://embedding_eng:8080/embedding" + """ """ + return f"http://{cls.embedding_url}:{cls.embedding_port}/embedding" diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py index 1b5ceb2..21ea4f0 100644 --- a/backend/src/repository/rag/chat.py +++ b/backend/src/repository/rag/chat.py @@ -125,30 +125,25 @@ async def get_context_by_question(input_msg: str): Get the context from v-db by the question """ - # tokenized_input - try: - res=await httpx_kit.async_client.post( + res = await httpx_kit.async_client.post( InferenceHelper.instruct_embedding_url(), headers={"Content-Type": "application/json"}, json={"content": input_msg}, - timeout=httpx.Timeout(timeout=None) + timeout=httpx.Timeout(timeout=None), ) res.raise_for_status() - tokenized_input = res.json().get("embedding") + embedd_input = res.json().get("embedding") except Exception as e: loguru.logger.error(e) - # search the context in the vector database - context=vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01") - context="" - # combine the context with the input message + # collection name for testing + context = vector_db.search(embedd_input, 1, collection_name="aisuko_squad01") return context or InferenceHelper.instruction - - current_context = await get_context_by_question(input_msg) + current_context = await get_context_by_question(input_msg) data_with_context = { - "prompt": self.format_prompt(input_msg, current_context=""), + "prompt": self.format_prompt(input_msg, current_context=current_context), "temperature": temperature, "top_k": top_k, "top_p": top_p, diff --git a/backend/src/repository/rag_datasets_eng.py b/backend/src/repository/rag_datasets_eng.py index 2af93d7..7eb5fb7 100644 --- a/backend/src/repository/rag_datasets_eng.py +++ b/backend/src/repository/rag_datasets_eng.py @@ -36,10 +36,10 @@ def load_dataset(cls, name: str) -> dict: """ ds = load_dataset(name) - #TODO: validation isn't make sense, it should be removed + # TODO: validation isn't make sense, it should be removed ds_list = ds.get("validation").to_list() - name=name.replace("/", "_") + name = name.replace("/", "_") vector_db.create_collection(collection_name=name) diff --git a/backend/src/repository/vector_database.py b/backend/src/repository/vector_database.py index baf0f54..219dd96 100644 --- a/backend/src/repository/vector_database.py +++ b/backend/src/repository/vector_database.py @@ -61,7 +61,6 @@ def insert_list(self, collection_name: str = DEFAULT_COLLECTION, data_list: list def search(self, data, n_results, collection_name=DEFAULT_COLLECTION): search_params = {"metric_type": "COSINE", "params": {}} try: - res = self.client.search( collection_name=collection_name, data=data, @@ -80,8 +79,6 @@ def search(self, data, n_results, collection_name=DEFAULT_COLLECTION): loguru.logger.error(e) return None - - def create_index(self, index_name, index_params, collection_name=DEFAULT_COLLECTION): self.client.create_index(collection_name, index_name, index_params) diff --git a/docker-compose.yaml b/docker-compose.yaml index 2880ca0..b536bbf 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -77,6 +77,8 @@ services: - INFERENCE_ENG=${INFERENCE_ENG} - INFERENCE_ENG_PORT=${INFERENCE_ENG_PORT} - INFERENCE_ENG_VERSION=${INFERENCE_ENG_VERSION} + - EMBEDDING_ENG=${EMBEDDING_ENG} + - EMBEDDING_ENG_PORT=${EMBEDDING_ENG_PORT} - LANGUAGE_MODEL_NAME=${LANGUAGE_MODEL_NAME} - ADMIN_USERNAME=${ADMIN_USERNAME} - ADMIN_EMAIL=${ADMIN_EMAIL} @@ -150,7 +152,7 @@ services: - "minio" llamacpp: - container_name: llamacpp + container_name: ${INFERENCE_ENG} image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} restart: always deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md @@ -166,7 +168,7 @@ services: command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"] embedding_eng: - container_name: embedding_eng + container_name: ${EMBEDDING_ENG} image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} restart: always deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md From ab4e0ec811a3c849cc95c159872860a2ab0a8589 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 17:19:26 +1000 Subject: [PATCH 11/12] try to fix input as a list Signed-off-by: Aisuko --- backend/src/repository/rag/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py index 21ea4f0..89fc3cf 100644 --- a/backend/src/repository/rag/chat.py +++ b/backend/src/repository/rag/chat.py @@ -137,7 +137,7 @@ async def get_context_by_question(input_msg: str): except Exception as e: loguru.logger.error(e) # collection name for testing - context = vector_db.search(embedd_input, 1, collection_name="aisuko_squad01") + context = vector_db.search(list(embedd_input), 1, collection_name="aisuko_squad01") return context or InferenceHelper.instruction current_context = await get_context_by_question(input_msg) From 3baf287733ffb05bc00c115d323421b46dc223f7 Mon Sep 17 00:00:00 2001 From: Aisuko Date: Sun, 21 Jul 2024 17:25:54 +1000 Subject: [PATCH 12/12] update docker-compose demo Signed-off-by: Aisuko --- docker-compose.demo.yaml | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docker-compose.demo.yaml b/docker-compose.demo.yaml index 1d902e1..b54275e 100644 --- a/docker-compose.demo.yaml +++ b/docker-compose.demo.yaml @@ -75,6 +75,8 @@ services: - INFERENCE_ENG=${INFERENCE_ENG} - INFERENCE_ENG_PORT=${INFERENCE_ENG_PORT} - INFERENCE_ENG_VERSION=${INFERENCE_ENG_VERSION} + - EMBEDDING_ENG=${EMBEDDING_ENG} + - EMBEDDING_ENG_PORT=${EMBEDDING_ENG_PORT} - LANGUAGE_MODEL_NAME=${LANGUAGE_MODEL_NAME} - ADMIN_USERNAME=${ADMIN_USERNAME} - ADMIN_EMAIL=${ADMIN_EMAIL} @@ -85,6 +87,8 @@ services: - ETCD_AUTO_COMPACTION_RETENTION=${ETCD_AUTO_COMPACTION_RETENTION} - ETCD_QUOTA_BACKEND_BYTES=${ETCD_QUOTA_BACKEND_BYTES} - NUM_CPU_CORES=${NUM_CPU_CORES} + - NUM_CPU_CORES_EMBEDDING=${NUM_CPU_CORES_EMBEDDING} + - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME} volumes: - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models expose: @@ -159,7 +163,25 @@ services: - 8080 ports: - 8080:8080 - command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","4096"] + command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"] + + + embedding_eng: + container_name: ${EMBEDDING_ENG} + image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} + restart: always + deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md + resources: + reservations: + cpus: "${NUM_CPU_CORES_EMBEDDING}" + volumes: + - "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models" + expose: + - 8080 + ports: + - 8082:8080 + command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] + rebel: container_name: rebel