From 9d60f502cbada08f241e70196aeac4a4cdedd092 Mon Sep 17 00:00:00 2001 From: Amit Kesarwani <93291915+kesarwam@users.noreply.github.com> Date: Fri, 10 May 2024 14:21:29 -0700 Subject: [PATCH] Updated Data Lineage demo to use Python Wrapper (#190) Co-authored-by: Iddo Avneri The test that fails (R notebook) is already removed in the futrue version. --- 00_notebooks/data-lineage.ipynb | 891 +++++++++----------------------- 1 file changed, 235 insertions(+), 656 deletions(-) diff --git a/00_notebooks/data-lineage.ipynb b/00_notebooks/data-lineage.ipynb index ad24d23c2..ec93f8405 100644 --- a/00_notebooks/data-lineage.ipynb +++ b/00_notebooks/data-lineage.ipynb @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "79a78d9a-10b0-4ffa-b501-213876abe1ad", "metadata": { "tags": [], @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f6a5b298-1879-4441-a607-0e64289bf90e", "metadata": { "tags": [], @@ -106,9 +106,17 @@ "**(you shouldn't need to change anything in this section, just run it)**" ] }, + { + "cell_type": "markdown", + "id": "fb3f61b0-770c-43f3-a227-787354a1fd2f", + "metadata": {}, + "source": [ + "### lakeFS repository name" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "a45fbab3-5f98-46ad-8e12-9021ed2bf81a", "metadata": { "tags": [], @@ -121,6 +129,53 @@ "repo_name = \"data-lineage\"" ] }, + { + "cell_type": "markdown", + "id": "c093db8e-68d9-409f-bde7-73ee4ceca5a5", + "metadata": {}, + "source": [ + "### Versioning Information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d78c797-911d-42f2-be06-f898f4d31c58", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "productionBranch = \"main\"\n", + "ingestionBranch1 = \"ingest1\"\n", + "ingestionBranch2 = \"ingest2\"\n", + "transformationBranch = \"transformation\"\n", + "newPath = \"partitioned_data\"\n", + "fileName = \"Employees.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "d00b4483-5365-452d-a122-d08c54b0e433", + "metadata": {}, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60cd210f-b9ce-4052-a0cd-b679d8cf43af", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import lakefs\n", + "from assets.lakefs_demo import print_commit" + ] + }, { "cell_type": "markdown", "id": "70fcf734-8b50-4c82-a42d-a16c3a007a38", @@ -128,12 +183,12 @@ "tags": [] }, "source": [ - "### Create lakeFSClient" + "### Set environment variables" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "4795b1e7-761e-4f7a-840b-982f99ff3c6a", "metadata": { "tags": [], @@ -143,17 +198,9 @@ }, "outputs": [], "source": [ - "import lakefs_client\n", - "from lakefs_client.models import *\n", - "from lakefs_client.client import LakeFSClient\n", - "\n", - "# lakeFS credentials and endpoint\n", - "configuration = lakefs_client.Configuration()\n", - "configuration.username = lakefsAccessKey\n", - "configuration.password = lakefsSecretKey\n", - "configuration.host = lakefsEndPoint\n", - "\n", - "lakefs = LakeFSClient(configuration)" + "os.environ[\"LAKECTL_SERVER_ENDPOINT_URL\"] = lakefsEndPoint\n", + "os.environ[\"LAKECTL_CREDENTIALS_ACCESS_KEY_ID\"] = lakefsAccessKey\n", + "os.environ[\"LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY\"] = lakefsSecretKey" ] }, { @@ -161,12 +208,12 @@ "id": "41c991ae-c9d1-4ca8-934e-ea3d80285037", "metadata": {}, "source": [ - "#### Verify lakeFS credentials by getting lakeFS version" + "### Verify lakeFS credentials by getting lakeFS version" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "e3bbe675-3c5e-41c7-8471-7a8a90253827", "metadata": { "tags": [], @@ -174,26 +221,15 @@ "languageId": "python" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Verifying lakeFS credentials…\n", - "…✅lakeFS credentials verified\n", - "\n", - "ℹ️lakeFS version 0.104.0\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Verifying lakeFS credentials…\")\n", "try:\n", - " v=lakefs.config.get_config()\n", + " v=lakefs.client.Client().version\n", "except:\n", " print(\"🛑 failed to get lakeFS version\")\n", "else:\n", - " print(f\"…✅lakeFS credentials verified\\n\\nℹ️lakeFS version {v['version_config']['version']}\")" + " print(f\"…✅lakeFS credentials verified\\n\\nℹ️lakeFS version {v}\")" ] }, { @@ -206,42 +242,15 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "384aa4d3-038a-4a56-b30e-c536e3911478", + "execution_count": null, + "id": "ebe86116-095c-4058-9e1e-aca78680d61d", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Repository data-lineage does not exist, so going to try and create it now.\n", - "Created new repo data-lineage using storage namespace s3://example/data-lineage\n" - ] - } - ], + "outputs": [], "source": [ - "from lakefs_client.exceptions import NotFoundException\n", - "\n", - "try:\n", - " repo=lakefs.repositories.get_repository(repo_name)\n", - " print(f\"Found existing repo {repo.id} using storage namespace {repo.storage_namespace}\")\n", - "except NotFoundException as f:\n", - " print(f\"Repository {repo_name} does not exist, so going to try and create it now.\")\n", - " try:\n", - " repo=lakefs.repositories.create_repository(repository_creation=RepositoryCreation(name=repo_name,\n", - " storage_namespace=f\"{storageNamespace}/{repo_name}\"))\n", - " print(f\"Created new repo {repo.id} using storage namespace {repo.storage_namespace}\")\n", - " except lakefs_client.ApiException as e:\n", - " print(f\"Error creating repo {repo_name}. Error is {e}\")\n", - " os._exit(00)\n", - "except lakefs_client.ApiException as e:\n", - " print(f\"Error getting repo {repo_name}: {e}\")\n", - " os._exit(00)" + "repo = lakefs.Repository(repo_name).create(storage_namespace=f\"{storageNamespace}/{repo_name}\", default_branch=productionBranch, exist_ok=True)\n", + "print(repo)" ] }, { @@ -254,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "6e7196c9-370a-441a-8ea3-02ee03484991", "metadata": { "tags": [], @@ -262,41 +271,7 @@ "languageId": "python" } }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "

SparkSession - in-memory

\n", - " \n", - "
\n", - "

SparkContext

\n", - "\n", - "

Spark UI

\n", - "\n", - "
\n", - "
Version
\n", - "
v3.3.2
\n", - "
Master
\n", - "
local[*]
\n", - "
AppName
\n", - "
lakeFS / Jupyter
\n", - "
\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from pyspark.sql import SparkSession\n", "spark = SparkSession.builder.appName(\"lakeFS / Jupyter\") \\\n", @@ -311,34 +286,6 @@ "spark" ] }, - { - "cell_type": "markdown", - "id": "c093db8e-68d9-409f-bde7-73ee4ceca5a5", - "metadata": {}, - "source": [ - "## Versioning Information" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "3b839850-5954-4631-8e7e-d0dee6d17dde", - "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } - }, - "outputs": [], - "source": [ - "productionBranch = \"main\"\n", - "ingestionBranch1 = \"ingest1\"\n", - "ingestionBranch2 = \"ingest2\"\n", - "transformationBranch = \"transformation\"\n", - "newPath = \"partitioned_data\"\n", - "fileName = \"Employees.csv\"" - ] - }, { "cell_type": "markdown", "id": "addf2911-8cb2-4cad-b9fd-ed874e21721e", @@ -365,69 +312,28 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "86cb3147-4da8-4e77-98ed-3073ee64bc02", + "execution_count": null, + "id": "80d76109-32b2-4f47-ba10-587318c3b66d", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "'565ac7c45d1207c91d85aaea8714013116747d8c8cb684aea0077b9f3a816222'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.branches.create_branch(\n", - " repository=repo.id,\n", - " branch_creation=BranchCreation(\n", - " name=ingestionBranch1,\n", - " source=productionBranch))" + "branchIngest1 = repo.branch(ingestionBranch1).create(source_reference=productionBranch, exist_ok=True)\n", + "print(f\"{ingestionBranch1} ref:\", branchIngest1.get_commit().id)" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "d2c1d011-b1ae-444b-88da-bf79f5eb00b8", + "execution_count": null, + "id": "ca915ea4-a5a2-4af1-b24d-ca2a84ab5a93", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'checksum': '4451cd251e4801764528483315b3d2b4',\n", - " 'content_type': 'text/csv',\n", - " 'mtime': 1689579651,\n", - " 'path': 'Employees.csv',\n", - " 'path_type': 'object',\n", - " 'physical_address': 's3://example/data-lineage/data/gmd1dg22tk3c76sjud40/ciqf10q2tk3c76sjudsg',\n", - " 'size_bytes': 771}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "import os\n", - "contentToUpload = open(f\"/data/{fileName}\", 'rb') # Only a single file per upload which must be named \\\\\\\"content\\\\\\\"\n", - "lakefs.objects.upload_object(\n", - " repository=repo.id,\n", - " branch=ingestionBranch1,\n", - " path=fileName, content=contentToUpload)" + "contentToUpload = open(f\"/data/{fileName}\", 'r').read()\n", + "branchIngest1.object(fileName).upload(data=contentToUpload, mode='wb')" ] }, { @@ -440,43 +346,18 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "87b1708e-7d0a-413c-85c9-d5266cde2e20", + "execution_count": null, + "id": "b2ca11c5-f25a-4588-b59d-a0718d33038f", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'committer': 'everything-bagel',\n", - " 'creation_date': 1689579651,\n", - " 'id': '76021a4d6f0587783abbd6fe3886c9fd641da3d0f20a8c8b77b91f69a142aaa9',\n", - " 'message': 'Ingesting employees IDs',\n", - " 'meta_range_id': '',\n", - " 'metadata': {'::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'source': 'Employees.csv',\n", - " 'using': 'python_api'},\n", - " 'parents': ['565ac7c45d1207c91d85aaea8714013116747d8c8cb684aea0077b9f3a816222']}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.commits.commit(\n", - " repository=repo.id,\n", - " branch=ingestionBranch1,\n", - " commit_creation=CommitCreation(\n", - " message='Ingesting employees IDs',\n", + "ref = branchIngest1.commit(message='Ingesting employees IDs',\n", " metadata={'using': 'python_api',\n", " '::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'source': 'Employees.csv'}))" + " 'source': 'Employees.csv'})\n", + "print_commit(ref.get_commit())" ] }, { @@ -489,71 +370,30 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "75f32838-5538-4e79-8d33-e548a31b2d1c", + "execution_count": null, + "id": "99c3d390-5773-4c4b-9752-7baa65c47d64", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "'565ac7c45d1207c91d85aaea8714013116747d8c8cb684aea0077b9f3a816222'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.branches.create_branch(\n", - " repository=repo.id,\n", - " branch_creation=BranchCreation(\n", - " name=ingestionBranch2,\n", - " source=productionBranch))" + "branchIngest2 = repo.branch(ingestionBranch2).create(source_reference=productionBranch, exist_ok=True)\n", + "print(f\"{ingestionBranch2} ref:\", branchIngest2.get_commit().id)" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "2d929967-2930-4d4c-8116-d0743572c88c", + "execution_count": null, + "id": "0f22a2d3-a2b7-4303-9564-7dca00cdbfe7", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'checksum': '4399afd66bf99ea96717d711ff1624ea',\n", - " 'content_type': 'text/csv',\n", - " 'mtime': 1689579651,\n", - " 'path': 'Salaries.csv',\n", - " 'path_type': 'object',\n", - " 'physical_address': 's3://example/data-lineage/data/gmd1dg22tk3c76sjud40/ciqf10q2tk3c76sjudv0',\n", - " 'size_bytes': 836}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fileName = \"Salaries.csv\"\n", "\n", - "import os\n", - "contentToUpload = open(f\"/data/{fileName}\", 'rb') # Only a single file per upload which must be named \\\\\\\"content\\\\\\\"\n", - "lakefs.objects.upload_object(\n", - " repository=repo.id,\n", - " branch=ingestionBranch2,\n", - " path=fileName, content=contentToUpload)" + "contentToUpload = open(f\"/data/{fileName}\", 'r').read()\n", + "branchIngest2.object(fileName).upload(data=contentToUpload, mode='wb')" ] }, { @@ -568,43 +408,18 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "de98ea2d-02af-4e77-b12e-718331def467", + "execution_count": null, + "id": "942744f3-3b02-4f32-b38f-f7acc3cb739c", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'committer': 'everything-bagel',\n", - " 'creation_date': 1689579652,\n", - " 'id': '34338a6dfc45a53bab344513eb09a6b2d3f4b62aae0550605b50844a397ca253',\n", - " 'message': 'Ingesting Salaries',\n", - " 'meta_range_id': '',\n", - " 'metadata': {'::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'source': '/Salaries.csv',\n", - " 'using': 'python_api'},\n", - " 'parents': ['565ac7c45d1207c91d85aaea8714013116747d8c8cb684aea0077b9f3a816222']}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.commits.commit(\n", - " repository=repo.id,\n", - " branch=ingestionBranch2,\n", - " commit_creation=CommitCreation(\n", - " message='Ingesting Salaries',\n", + "ref = branchIngest2.commit(message='Ingesting Salaries',\n", " metadata={'using': 'python_api',\n", " '::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'source': '/Salaries.csv'}))" + " 'source': '/Salaries.csv'})\n", + "print_commit(ref.get_commit())" ] }, { @@ -619,95 +434,46 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "2ae2e52c-9346-4db6-86b7-444e671a92c4", + "execution_count": null, + "id": "24ca42c4-6ae1-43c5-83ac-a982ac0b4008", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "'565ac7c45d1207c91d85aaea8714013116747d8c8cb684aea0077b9f3a816222'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.branches.create_branch(\n", - " repository=repo.id,\n", - " branch_creation=BranchCreation(\n", - " name=transformationBranch,\n", - " source=productionBranch))" + "branchTransformation = repo.branch(transformationBranch).create(source_reference=productionBranch, exist_ok=True)\n", + "print(f\"{transformationBranch} ref:\", branchTransformation.get_commit().id)" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "eaeccd29-542e-428b-90e2-e8dcc2300389", + "execution_count": null, + "id": "afe60a16-201a-42a5-8c19-538e6d718f2c", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'reference': 'ff6694ea7c30408aae2fa97cc614d52ecb0dcb2ee55cd0b624767d5b696ca4fe'}" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.refs.merge_into_branch(\n", - " repository=repo.id,\n", - " source_ref=ingestionBranch1, \n", - " destination_branch=transformationBranch)" + "res = branchIngest1.merge_into(branchTransformation)\n", + "print(res)" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "51963bf1-bfe4-4d48-9f74-d81340b0467a", + "execution_count": null, + "id": "344dc5ee-c735-444c-8363-9f5dd9344812", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'reference': 'bdabb4a97a4c7cfc0f266ff6fa8777a327164a959c24f77d0339a9e6b76674bb'}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.refs.merge_into_branch(\n", - " repository=repo.id,\n", - " source_ref=ingestionBranch2, \n", - " destination_branch=transformationBranch)" + "res = branchIngest2.merge_into(branchTransformation)\n", + "print(res)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "73862f53-86bc-4832-962d-e25515eac1ee", "metadata": { "tags": [], @@ -723,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "192534ef-122c-43b7-9265-1e789b59e808", "metadata": { "tags": [], @@ -731,50 +497,17 @@ "languageId": "python" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+--------+---+------+\n", - "| id| name|age|gender|\n", - "+---+--------+---+------+\n", - "|101| John| 32| Male|\n", - "|102| Jane| 28|Female|\n", - "|103| Bob| 40| Male|\n", - "|104| Alice| 36|Female|\n", - "|105| Mark| 44| Male|\n", - "|106| Julia| 29|Female|\n", - "|107| David| 50| Male|\n", - "|108| Emily| 34|Female|\n", - "|109| Michael| 41| Male|\n", - "|110|Samantha| 31|Female|\n", - "|111| Chris| 45| Male|\n", - "|112| Megan| 27|Female|\n", - "|113| Adam| 38| Male|\n", - "|114| Olivia| 33|Female|\n", - "|115| Nick| 43| Male|\n", - "|116| Kate| 30|Female|\n", - "|117| Max| 47| Male|\n", - "|118| Chloe| 25|Female|\n", - "|119| Tom| 39| Male|\n", - "|120| Lisa| 35|Female|\n", - "+---+--------+---+------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "dataPath = f\"s3a://{repo.id}/{transformationBranch}/{employeeFile}\"\n", + "dataPath = f\"s3a://{repo_name}/{transformationBranch}/{employeeFile}\"\n", "\n", "df1 = spark.read.option(\"header\", \"true\").csv(dataPath)\n", - "df1.show()\n" + "df1.show()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "bfc7b024-8047-405e-bc33-bb628c11ce82", "metadata": { "tags": [], @@ -782,42 +515,9 @@ "languageId": "python" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+---------------+------+\n", - "| id| department|salary|\n", - "+---+---------------+------+\n", - "|101| Sales| 60000|\n", - "|102| Marketing| 55000|\n", - "|103| Engineering| 70000|\n", - "|104| Finance| 65000|\n", - "|105|Human Resources| 50000|\n", - "|106| Sales| 62000|\n", - "|107| Marketing| 57000|\n", - "|108| Engineering| 72000|\n", - "|109| Finance| 66000|\n", - "|110|Human Resources| 51000|\n", - "|111| Sales| 63000|\n", - "|112| Marketing| 58000|\n", - "|113| Engineering| 73000|\n", - "|114| Finance| 67000|\n", - "|115|Human Resources| 52000|\n", - "|116| Sales| 64000|\n", - "|117| Marketing| 59000|\n", - "|118| Engineering| 74000|\n", - "|119| Finance| 68000|\n", - "|120|Human Resources| 53000|\n", - "+---+---------------+------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "dataPath = f\"s3a://{repo.id}/{transformationBranch}/{SalariesFile}\"\n", + "dataPath = f\"s3a://{repo_name}/{transformationBranch}/{SalariesFile}\"\n", "\n", "df2 = spark.read.option(\"header\", \"true\").csv(dataPath)\n", "df2.show()" @@ -825,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "5dd447d6-0285-4ccd-8c8d-ea4537d704af", "metadata": { "tags": [], @@ -833,40 +533,7 @@ "languageId": "python" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+--------+---+------+---------------+------+\n", - "| id| name|age|gender| department|salary|\n", - "+---+--------+---+------+---------------+------+\n", - "|101| John| 32| Male| Sales| 60000|\n", - "|102| Jane| 28|Female| Marketing| 55000|\n", - "|103| Bob| 40| Male| Engineering| 70000|\n", - "|104| Alice| 36|Female| Finance| 65000|\n", - "|105| Mark| 44| Male|Human Resources| 50000|\n", - "|106| Julia| 29|Female| Sales| 62000|\n", - "|107| David| 50| Male| Marketing| 57000|\n", - "|108| Emily| 34|Female| Engineering| 72000|\n", - "|109| Michael| 41| Male| Finance| 66000|\n", - "|110|Samantha| 31|Female|Human Resources| 51000|\n", - "|111| Chris| 45| Male| Sales| 63000|\n", - "|112| Megan| 27|Female| Marketing| 58000|\n", - "|113| Adam| 38| Male| Engineering| 73000|\n", - "|114| Olivia| 33|Female| Finance| 67000|\n", - "|115| Nick| 43| Male|Human Resources| 52000|\n", - "|116| Kate| 30|Female| Sales| 64000|\n", - "|117| Max| 47| Male| Marketing| 59000|\n", - "|118| Chloe| 25|Female| Engineering| 74000|\n", - "|119| Tom| 39| Male| Finance| 68000|\n", - "|120| Lisa| 35|Female|Human Resources| 53000|\n", - "+---+--------+---+------+---------------+------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "mergedDataset = df1.join(df2,[\"id\"])\n", "mergedDataset.show()" @@ -884,7 +551,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "27dde293-232a-4ed6-8d50-10d755b462ef", "metadata": { "tags": [], @@ -894,7 +561,7 @@ }, "outputs": [], "source": [ - "newDataPath = f\"s3a://{repo.id}/{transformationBranch}/{newPath}\"\n", + "newDataPath = f\"s3a://{repo_name}/{transformationBranch}/{newPath}\"\n", "\n", "mergedDataset.write.partitionBy(\"department\").csv(newDataPath)" ] @@ -911,41 +578,17 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "4f99a266-cc77-4f46-918d-f5931fadb7a7", + "execution_count": null, + "id": "74ac2bb7-6691-4da7-b2bb-8e92c0c45c86", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'committer': 'everything-bagel',\n", - " 'creation_date': 1689579662,\n", - " 'id': 'a5553491b803755f6454b39327243f5e1c0d9cf1b8e2a0db0533798c288155d3',\n", - " 'message': 'Repartitioned by departments',\n", - " 'meta_range_id': '',\n", - " 'metadata': {'::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'using': 'python_api'},\n", - " 'parents': ['bdabb4a97a4c7cfc0f266ff6fa8777a327164a959c24f77d0339a9e6b76674bb']}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.commits.commit(\n", - " repository=repo.id,\n", - " branch=transformationBranch,\n", - " commit_creation=CommitCreation(\n", - " message='Repartitioned by departments',\n", + "ref = branchTransformation.commit(message='Repartitioned by departments',\n", " metadata={'using': 'python_api',\n", - " '::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb'}))" + " '::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb'})\n", + "print_commit(ref.get_commit())" ] }, { @@ -960,31 +603,16 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "c17ec3ec-46a0-45f5-a811-8c4fe1776fd2", + "execution_count": null, + "id": "18709313-6f10-4b22-8b31-0931ad4cc6b2", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'reference': '1e90f483fbcd6fd8c7260cbae13badd9c429efeb61d829d536fc3ba0db8a68bd'}" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "lakefs.refs.merge_into_branch(\n", - " repository=repo.id,\n", - " source_ref=transformationBranch, \n", - " destination_branch=productionBranch)" + "branchProduction = repo.branch(productionBranch)\n", + "res = branchTransformation.merge_into(branchProduction)\n", + "print(res)" ] }, { @@ -999,65 +627,28 @@ }, { "cell_type": "code", - "execution_count": 25, - "id": "e8a75f3f-fd76-4d9c-a3ea-55260787103c", + "execution_count": null, + "id": "142af0aa-f2b4-4447-8b43-2cb723c8a3aa", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'committer': 'everything-bagel',\n", - " 'creation_date': 1689579662,\n", - " 'id': 'a5553491b803755f6454b39327243f5e1c0d9cf1b8e2a0db0533798c288155d3',\n", - " 'message': 'Repartitioned by departments',\n", - " 'meta_range_id': '9e512c33f1136faa0660db932c6a78ee79eacd661a9100481d217c2d5b8bbdd1',\n", - " 'metadata': {'::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'using': 'python_api'},\n", - " 'parents': ['bdabb4a97a4c7cfc0f266ff6fa8777a327164a959c24f77d0339a9e6b76674bb']}]\n" - ] - } - ], + "outputs": [], "source": [ - "commits = lakefs.refs.log_commits(repository=repo.id, ref='main', amount=1, limit=True, prefixes=['partitioned_data/department=Engineering/'])\n", - "print(commits.results)" + "for log in lakefs.Reference(repository_id=repo_name, reference_id=productionBranch).log(max_amount=1, limit=True, prefixes=['partitioned_data/department=Engineering/']):\n", + " print_commit(log)" ] }, { "cell_type": "code", - "execution_count": 26, - "id": "83bdd86e-1dad-4816-bc33-4674212443fa", + "execution_count": null, + "id": "c17917ba-c0eb-4fbf-be5c-299986e38957", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'committer': 'everything-bagel',\n", - " 'creation_date': 1689579651,\n", - " 'id': '76021a4d6f0587783abbd6fe3886c9fd641da3d0f20a8c8b77b91f69a142aaa9',\n", - " 'message': 'Ingesting employees IDs',\n", - " 'meta_range_id': 'd5bfb714473c6c5c6da1f0ab2dd2df54f78ff94b4a674d88ec9c2283d0fe8e7a',\n", - " 'metadata': {'::lakefs::codeVersion::url[url:ui]': 'https://github.com/treeverse/lakeFS-samples/blob/668c7d000b8c603b3f30789a8c10616086ef79c1/08-data-lineage/Data%20Lineage.ipynb',\n", - " 'source': 'Employees.csv',\n", - " 'using': 'python_api'},\n", - " 'parents': ['565ac7c45d1207c91d85aaea8714013116747d8c8cb684aea0077b9f3a816222']}]\n" - ] - } - ], + "outputs": [], "source": [ - "commits = lakefs.refs.log_commits(repository=repo.id, ref='main', amount=1, objects=['Employees.csv'])\n", - "print(commits.results)\n" + "for log in lakefs.Reference(repository_id=repo_name, reference_id=productionBranch).log(max_amount=1, objects=['Employees.csv']):\n", + " print_commit(log)" ] }, { @@ -1078,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "00d292ac", "metadata": { "tags": [], @@ -1086,32 +677,7 @@ "languageId": "python" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ending notebook execution\n" - ] - }, - { - "ename": "SystemExit", - "evalue": "0", - "output_type": "error", - "traceback": [ - "An exception has occurred, use %tb to see the full traceback.\n", - "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3513: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n", - " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n" - ] - } - ], + "outputs": [], "source": [ "# The section below will only work on lakeFS cloud. \n", "# This cell will stop execution which is useful if the notebook has been \n", @@ -1139,6 +705,39 @@ "## Setup" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "828c8aaa-4854-40ac-9152-1e9bee715eac", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import lakefs_sdk\n", + "from lakefs_sdk.client import LakeFSClient\n", + "from lakefs_sdk.models import GroupCreation, UserCreation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "437167ef-f375-4cc4-bbd3-8278c865c842", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "if not 'lakefsClient' in locals():\n", + " configuration = lakefs_sdk.Configuration(\n", + " host=lakefsEndPoint,\n", + " username=lakefsAccessKey,\n", + " password=lakefsSecretKey,\n", + " )\n", + " lakefsClient = LakeFSClient(configuration)\n", + " print(\"Created lakeFS client.\")" + ] + }, { "cell_type": "markdown", "id": "0a0e50ca-2147-47d4-bd2f-7c625d5a4c3d", @@ -1150,16 +749,13 @@ { "cell_type": "code", "execution_count": null, - "id": "a290f98f-6169-470a-834d-4c22115a0877", + "id": "f9e11efd-9a88-4945-acc7-e5f295bd6df3", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, "outputs": [], "source": [ - "lakefs.auth.create_group(\n", + "EngineeringGroup = lakefsClient.auth_api.create_group(\n", " group_creation=GroupCreation(\n", " id='Engineering'))" ] @@ -1184,7 +780,7 @@ }, "outputs": [], "source": [ - "lakefs.auth.create_user(\n", + "lakefsClient.auth_api.create_user(\n", " user_creation=UserCreation(\n", " id='engineer1'))" ] @@ -1200,7 +796,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c9ef4a30-0e0e-4f1c-98d1-b3e091b7e6ca", + "id": "265b3b9f-8e96-4315-81b7-ebc7c3728718", "metadata": { "tags": [], "vscode": { @@ -1209,8 +805,8 @@ }, "outputs": [], "source": [ - "lakefs.auth.add_group_membership(\n", - " group_id='Engineering',\n", + "lakefsClient.auth_api.add_group_membership(\n", + " group_id=EngineeringGroup.id,\n", " user_id='engineer1')" ] }, @@ -1224,7 +820,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "c70df586-65d5-4e4f-8a75-a0eab849f073", "metadata": { "tags": [], @@ -1232,29 +828,9 @@ "languageId": "python" } }, - "outputs": [ - { - "ename": "NotFoundException", - "evalue": "(404)\nReason: Not Found\nHTTP response headers: HTTPHeaderDict({'Content-Type': 'application/json', 'X-Content-Type-Options': 'nosniff', 'X-Request-Id': '9686a272-3cd5-42e3-b25d-e36be8928c50', 'Date': 'Mon, 17 Jul 2023 07:41:02 GMT', 'Content-Length': '35'})\nHTTP response body: {\"message\":\"engineer1: not found\"}\n\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotFoundException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[28], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m credentials \u001b[38;5;241m=\u001b[39m \u001b[43mlakefs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_credentials\u001b[49m\u001b[43m(\u001b[49m\u001b[43muser_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mengineer1\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(credentials)\n\u001b[1;32m 3\u001b[0m engineer1AccessKey \u001b[38;5;241m=\u001b[39m credentials\u001b[38;5;241m.\u001b[39maccess_key_id\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/api/auth_api.py:2377\u001b[0m, in \u001b[0;36mAuthApi.create_credentials\u001b[0;34m(self, user_id, **kwargs)\u001b[0m\n\u001b[1;32m 2374\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_host_index\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_host_index\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 2375\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124muser_id\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \\\n\u001b[1;32m 2376\u001b[0m user_id\n\u001b[0;32m-> 2377\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_credentials_endpoint\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_with_http_info\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/api_client.py:835\u001b[0m, in \u001b[0;36mEndpoint.call_with_http_info\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 831\u001b[0m header_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_client\u001b[38;5;241m.\u001b[39mselect_header_content_type(\n\u001b[1;32m 832\u001b[0m content_type_headers_list)\n\u001b[1;32m 833\u001b[0m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mheader\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mContent-Type\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m header_list\n\u001b[0;32m--> 835\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 836\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mendpoint_path\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhttp_method\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 837\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpath\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 838\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mquery\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 839\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mheader\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 840\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbody\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 841\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mform\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 842\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 843\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresponse_type\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 844\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth_settings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mauth\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 845\u001b[0m \u001b[43m \u001b[49m\u001b[43masync_req\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43masync_req\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 846\u001b[0m \u001b[43m \u001b[49m\u001b[43m_check_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_check_return_type\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 847\u001b[0m \u001b[43m \u001b[49m\u001b[43m_return_http_data_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_return_http_data_only\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 848\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_preload_content\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 849\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_request_timeout\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 850\u001b[0m \u001b[43m \u001b[49m\u001b[43m_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 851\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_formats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcollection_format\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/api_client.py:409\u001b[0m, in \u001b[0;36mApiClient.call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)\u001b[0m\n\u001b[1;32m 355\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Makes the HTTP request (synchronous) and returns deserialized data.\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \n\u001b[1;32m 357\u001b[0m \u001b[38;5;124;03mTo make an async_req request, set the async_req parameter.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 406\u001b[0m \u001b[38;5;124;03m then the method will return the response directly.\u001b[39;00m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m async_req:\n\u001b[0;32m--> 409\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__call_api\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheader_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauth_settings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43m_return_http_data_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcollection_formats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43m_check_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool\u001b[38;5;241m.\u001b[39mapply_async(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__call_api, (resource_path,\n\u001b[1;32m 418\u001b[0m method, path_params,\n\u001b[1;32m 419\u001b[0m query_params,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 427\u001b[0m _request_timeout,\n\u001b[1;32m 428\u001b[0m _host, _check_type))\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/api_client.py:203\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ApiException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 202\u001b[0m e\u001b[38;5;241m.\u001b[39mbody \u001b[38;5;241m=\u001b[39m e\u001b[38;5;241m.\u001b[39mbody\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlast_response \u001b[38;5;241m=\u001b[39m response_data\n\u001b[1;32m 207\u001b[0m return_data \u001b[38;5;241m=\u001b[39m response_data\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/api_client.py:196\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)\u001b[0m\n\u001b[1;32m 192\u001b[0m url \u001b[38;5;241m=\u001b[39m _host \u001b[38;5;241m+\u001b[39m resource_path\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 195\u001b[0m \u001b[38;5;66;03m# perform request and return response\u001b[39;00m\n\u001b[0;32m--> 196\u001b[0m response_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheader_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_request_timeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ApiException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 202\u001b[0m e\u001b[38;5;241m.\u001b[39mbody \u001b[38;5;241m=\u001b[39m e\u001b[38;5;241m.\u001b[39mbody\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/api_client.py:455\u001b[0m, in \u001b[0;36mApiClient.request\u001b[0;34m(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrest_client\u001b[38;5;241m.\u001b[39mOPTIONS(url,\n\u001b[1;32m 448\u001b[0m query_params\u001b[38;5;241m=\u001b[39mquery_params,\n\u001b[1;32m 449\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m _request_timeout\u001b[38;5;241m=\u001b[39m_request_timeout,\n\u001b[1;32m 453\u001b[0m body\u001b[38;5;241m=\u001b[39mbody)\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPOST\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 455\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrest_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPOST\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_request_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPUT\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrest_client\u001b[38;5;241m.\u001b[39mPUT(url,\n\u001b[1;32m 464\u001b[0m query_params\u001b[38;5;241m=\u001b[39mquery_params,\n\u001b[1;32m 465\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 468\u001b[0m _request_timeout\u001b[38;5;241m=\u001b[39m_request_timeout,\n\u001b[1;32m 469\u001b[0m body\u001b[38;5;241m=\u001b[39mbody)\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/rest.py:267\u001b[0m, in \u001b[0;36mRESTClientObject.POST\u001b[0;34m(self, url, headers, query_params, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mPOST\u001b[39m(\u001b[38;5;28mself\u001b[39m, url, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, query_params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, post_params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 266\u001b[0m body\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, _preload_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, _request_timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 267\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_request_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 273\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/lakefs_client/rest.py:221\u001b[0m, in \u001b[0;36mRESTClientObject.request\u001b[0;34m(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ForbiddenException(http_resp\u001b[38;5;241m=\u001b[39mr)\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m r\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m404\u001b[39m:\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m NotFoundException(http_resp\u001b[38;5;241m=\u001b[39mr)\n\u001b[1;32m 223\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m500\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m599\u001b[39m:\n\u001b[1;32m 224\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ServiceException(http_resp\u001b[38;5;241m=\u001b[39mr)\n", - "\u001b[0;31mNotFoundException\u001b[0m: (404)\nReason: Not Found\nHTTP response headers: HTTPHeaderDict({'Content-Type': 'application/json', 'X-Content-Type-Options': 'nosniff', 'X-Request-Id': '9686a272-3cd5-42e3-b25d-e36be8928c50', 'Date': 'Mon, 17 Jul 2023 07:41:02 GMT', 'Content-Length': '35'})\nHTTP response body: {\"message\":\"engineer1: not found\"}\n\n" - ] - } - ], + "outputs": [], "source": [ - "credentials = lakefs.auth.create_credentials(user_id='engineer1')\n", + "credentials = lakefsClient.auth_api.create_credentials(user_id='engineer1')\n", "print(credentials)\n", "engineer1AccessKey = credentials.access_key_id\n", "engineer1SecretKey = credentials.secret_access_key" @@ -1263,22 +839,17 @@ { "cell_type": "code", "execution_count": null, - "id": "4e7cd7d6-db1c-4f1a-8910-f6cf9c992a51", + "id": "62b643ab-4a7a-4758-b15b-b288a4f21c9d", "metadata": { - "tags": [], - "vscode": { - "languageId": "python" - } + "tags": [] }, "outputs": [], "source": [ - "# lakeFS credentials and endpoint\n", - "configuration = lakefs_client.Configuration()\n", - "configuration.username = engineer1AccessKey\n", - "configuration.password = engineer1SecretKey\n", - "configuration.host = lakefsEndPoint\n", - "\n", - "# Creating a client for engineer1\n", + "configuration = lakefs_sdk.Configuration(\n", + " host=lakefsEndPoint,\n", + " username=engineer1AccessKey,\n", + " password=engineer1SecretKey,\n", + ")\n", "engineer1Client = LakeFSClient(configuration)\n", "print(\"Created lakeFS client for engineer1.\")" ] @@ -1294,7 +865,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9015d02d-c7bd-4705-a373-579ba1ba47df", + "id": "7d6d407c-ae0a-49c5-b04e-f00d533af4d9", "metadata": { "tags": [], "vscode": { @@ -1303,8 +874,8 @@ }, "outputs": [], "source": [ - "lakefs.auth.attach_policy_to_group(\n", - " group_id='Engineering',\n", + "lakefsClient.auth_api.attach_policy_to_group(\n", + " group_id=EngineeringGroup.id,\n", " policy_id='FSFullAccess')" ] }, @@ -1328,12 +899,20 @@ }, "outputs": [], "source": [ - "engineer1Client.objects.list_objects(\n", - " repository=repo.id,\n", + "engineer1Client.objects_api.list_objects(\n", + " repository=repo_name,\n", " ref='main',\n", " prefix='partitioned_data/department=Finance/'\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dba1cac-d61a-4e53-bc76-659f4a78d155", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {