diff --git a/dlt/sources/helpers/rest_client/client.py b/dlt/sources/helpers/rest_client/client.py index 6d04373d8d..4b589813fc 100644 --- a/dlt/sources/helpers/rest_client/client.py +++ b/dlt/sources/helpers/rest_client/client.py @@ -96,18 +96,18 @@ def __init__( def _create_request( self, - path: str, + path_or_url: str, method: HTTPMethod, params: Optional[Dict[str, Any]] = None, json: Optional[Dict[str, Any]] = None, auth: Optional[AuthBase] = None, hooks: Optional[Hooks] = None, ) -> Request: - parsed_url = urlparse(path) + parsed_url = urlparse(path_or_url) if parsed_url.scheme in ("http", "https"): - url = path + url = path_or_url else: - url = join_url(self.base_url, path) + url = join_url(self.base_url, path_or_url) return Request( method=method, @@ -140,7 +140,7 @@ def _send_request(self, request: Request, **kwargs: Any) -> Response: def request(self, path: str = "", method: HTTPMethod = "GET", **kwargs: Any) -> Response: prepared_request = self._create_request( - path=path, + path_or_url=path, method=method, params=kwargs.pop("params", None), json=kwargs.pop("json", None), @@ -171,6 +171,8 @@ def paginate( Args: path (str): Endpoint path for the request, relative to `base_url`. + Can also be a fully qualified URL; if starting with http(s) it will + be used instead of the base_url + path. method (HTTPMethodBasic): HTTP method for the request, defaults to 'get'. params (Optional[Dict[str, Any]]): URL parameters for the request. json (Optional[Dict[str, Any]]): JSON payload for the request. @@ -210,7 +212,7 @@ def paginate( hooks["response"] = [raise_for_status] request = self._create_request( - path=path, method=method, params=params, json=json, auth=auth, hooks=hooks + path_or_url=path, method=method, params=params, json=json, auth=auth, hooks=hooks ) if paginator: diff --git a/docs/website/docs/dlt-ecosystem/notebooks.md b/docs/website/docs/dlt-ecosystem/notebooks.md deleted file mode 100644 index 4486b81b68..0000000000 --- a/docs/website/docs/dlt-ecosystem/notebooks.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: dlt in notebooks -description: Run dlt in notebooks like Colab, Databricks or Jupyter -keywords: [notebook, jupyter] ---- -# dlt in notebooks - -## Colab -You'll need to install `dlt` like any other dependency: -```sh -!pip install dlt -``` - -You can configure secrets using **Secrets** sidebar. Just create a variable with the name `secrets.toml` and paste -the content of the **toml** file from your `.dlt` folder into it. We support `config.toml` variable as well. - -:::note -`dlt` will not reload the secrets automatically. Please restart your interpreter in Colab options when you add/change -content of the variables above. -::: - -## Streamlit -`dlt` will look for `secrets.toml` and `config.toml` in the `.dlt` folder. If `secrets.toml` are not found, it will use -`secrets.toml` from `.streamlit` folder. -If you run locally, maintain your usual `.dlt` folder. When running on streamlit cloud, paste the content of `dlt` -`secrets.toml` into the `streamlit` secrets. - diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md index 48e7d6b765..14d9ecb04b 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md @@ -361,7 +361,8 @@ The endpoint configuration defines how to query the API endpoint. Quick example: The fields in the endpoint configuration are: -- `path`: The path to the API endpoint. +- `path`: The path to the API endpoint. By default this path is appended to the given `base_url`. If this is a fully qualified URL starting with `http:` or `https:` it will be +used as-is and `base_url` will be ignored. - `method`: The HTTP method to be used. The default is `GET`. - `params`: Query parameters to be sent with each request. For example, `sort` to order the results or `since` to specify [incremental loading](#incremental-loading). This is also used to define [resource relationships](#define-resource-relationships). - `json`: The JSON payload to be sent with the request (for POST and PUT requests). diff --git a/docs/website/docs/general-usage/dataset-access/streamlit.md b/docs/website/docs/general-usage/dataset-access/streamlit.md index 32589d8e23..2d76aac660 100644 --- a/docs/website/docs/general-usage/dataset-access/streamlit.md +++ b/docs/website/docs/general-usage/dataset-access/streamlit.md @@ -31,6 +31,18 @@ dlt pipeline {pipeline_name} show Use the pipeline name you defined in your Python code with the `pipeline_name` argument. If you are unsure, you can use the `dlt pipeline --list` command to list all pipelines. +## Credentials + +`dlt` will look for `secrets.toml` and `config.toml` in the `.dlt` folder. + +If `secrets.toml` are not found, it will use +`secrets.toml` from `.streamlit` folder. + +If you run locally, maintain your usual `.dlt` folder. + +When running on streamlit cloud, paste the content of `dlt` +`secrets.toml` into the `streamlit` secrets. + ## Inspecting your data You can now inspect the schema and your data. Use the left sidebar to switch between: diff --git a/docs/website/docs/walkthroughs/run-a-pipeline.md b/docs/website/docs/walkthroughs/run-a-pipeline.md index 49abe8675f..3c0e30ccf3 100644 --- a/docs/website/docs/walkthroughs/run-a-pipeline.md +++ b/docs/website/docs/walkthroughs/run-a-pipeline.md @@ -140,7 +140,24 @@ destination, etc. Please refer to [Running in production](../running-in-production/running.md#inspect-and-save-the-load-info-and-trace) for more details. -## 5. Detect and handle problems +## Run dlt in Notebooks + +### Colab +You'll need to install `dlt` like any other dependency: +```sh +!pip install dlt +``` + +You can configure secrets using **Secrets** sidebar. Just create a variable with the name `secrets.toml` and paste +the content of the **toml** file from your `.dlt` folder into it. We support `config.toml` variable as well. + +:::note +`dlt` will not reload the secrets automatically. Please restart your interpreter in Colab options when you add/change +content of the variables above. +::: + + +## Troubleshooting What happens if something goes wrong? In most cases, the `dlt` `run` command raises exceptions. We put a lot of effort into making the exception messages easy to understand. Reading them is the first step diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js index bff2fcc508..274f3e82b3 100644 --- a/docs/website/sidebars.js +++ b/docs/website/sidebars.js @@ -271,7 +271,6 @@ const sidebars = { 'general-usage/full-loading', ] }, - 'dlt-ecosystem/notebooks' ] }, { diff --git a/tests/sources/helpers/rest_client/test_client.py b/tests/sources/helpers/rest_client/test_client.py index 488d7ef525..36fe009b93 100644 --- a/tests/sources/helpers/rest_client/test_client.py +++ b/tests/sources/helpers/rest_client/test_client.py @@ -7,6 +7,7 @@ from requests import PreparedRequest, Request, Response from requests.auth import AuthBase from requests.exceptions import HTTPError +import requests_mock from dlt.common import logger from dlt.common.typing import TSecretStrValue @@ -512,3 +513,24 @@ def test_request_kwargs(self, mocker) -> None: "timeout": 432, "allow_redirects": False, } + + @requests_mock.Mocker(kw="mock") + def test_overwrite_path(self, mocker, **kwargs) -> None: + expected = {"foo": "bar"} + kwargs["mock"].get("https://completely.different/endpoint", json=expected) + rest_client = RESTClient( + base_url="https://api.example.com", + ) + response = rest_client.get("https://completely.different/endpoint") + assert response.json() == expected + + @requests_mock.Mocker(kw="mock") + def test_overwrite_path_ignores_different_protocol(self, mocker, **kwargs) -> None: + expected = {"foo": "bar"} + base_url = "https://api.example.com" + kwargs["mock"].get(f"{base_url}/my://protocol", json=expected) + rest_client = RESTClient( + base_url=base_url, + ) + response = rest_client.get("my://protocol") + assert response.json() == expected