From 43527531d74678a5fc455760a3e378fc0001f67b Mon Sep 17 00:00:00 2001
From: dat-a-man <amanguptanalytics@gmail.com>
Date: Mon, 23 Oct 2023 18:26:15 +0530
Subject: [PATCH 1/5] Added Readers Source and Filesystem Docs

---
 .../verified-sources/filesystem.md            | 419 ++++++++++++++++++
 docs/website/sidebars.js                      |   1 +
 2 files changed, 420 insertions(+)
 create mode 100644 docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
new file mode 100644
index 0000000000..db6948c0a2
--- /dev/null
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
@@ -0,0 +1,419 @@
+---
+title: Readers Source & Filesystem
+description: dlt verified source for Readers Source & Filesystem
+keywords: [readers source and filesystem, filesystem, readers source]
+---
+
+# Readers Source & Filesystem
+
+:::info Need help deploying these sources, or figuring out how to run them in your data stack?
+
+[Join our Slack community](https://dlthub-community.slack.com/join/shared_invite/zt-1slox199h-HAE7EQoXmstkP_bTqal65g)
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+:::
+
+This verified source easily streams files from s3, gcs, azure, or local filesystem using the reader source. Currently the following reader sources are supported:
+ - read_csv (with Pandas)
+ - read_jsonl
+ - read_parquet (with pyarrow)<br>
+Additionally, it can read Excel files with a standalone transformer and copy files locally.
+
+Sources and resources that can be loaded using this verified source are:
+
+| Name       | Description                                           |
+|------------|-------------------------------------------------------|
+| readers    | Provides chunked file reader resources                |
+| filesystem | Lists files in `bucket_url` using `file_glob` pattern |
+
+ We advice that you give each resource a [specific name](https://dlthub.com/docs/general-usage/resource#duplicate-and-rename-resources) before loading with pipeline.run. This will make
+ sure that data goes to a table with the name you want and that each pipeline uses a [separate state for
+ incremental loading.](https://dlthub.com/docs/general-usage/state#read-and-write-pipeline-state-in-a-resource)
+
+### Standalone filesystem
+Use the [standalone filesystem](https://dlthub.com/docs/general-usage/resource#declare-a-standalone-resource) resource to list files in s3, gcs, and azure buckets. This allows you to customize file
+readers or manage files using fsspec. For details, see the [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/index.html). The filesystem ensures consistent
+file representation across bucket types and offers methods to access and read data. You can quickly build pipelines to:
+
+ - Extract text from PDFs
+ - Stream large file content directly from buckets
+ - Copy files locally.
+
+## Setup Guide
+
+### Grab credentials
+
+This source can access various bucket types, including:
+
+ - AWS S3
+ - GCS Cloud Storage
+ - Azure Blob Storage
+
+To access these, you'll need secret credentials obtained as follows:
+
+#### AWS S3 credentials
+To get AWS keys for S3 access:
+
+1. Access IAM in AWS Console.
+1. Select "Users", choose a user, and open "Security credentials".
+1. Click "Create access key" for AWS ID and Secret Key.
+
+For more info, see [AWS official documentation.](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
+
+#### GCS Cloud Storage credentials
+
+To get GCS cloud storage access:
+
+1. Log in to [console.cloud.google.com](http://console.cloud.google.com/).
+1. Create a [service account](https://cloud.google.com/iam/docs/service-accounts-create#creating).
+1. Enable "Google Analytics API"; see [Google's guide](https://support.google.com/googleapi/answer/6158841?hl=en).
+1. In IAM & Admin > Service Accounts, find your account, click the three-dot menu > "Manage Keys" > "ADD KEY" > "CREATE" to get a JSON credential file.
+   > Grant the service account appropriate permissions for cloud storage access.
+
+For more info, see how to [create service account](https://support.google.com/a/answer/7378726?hl=en).
+
+#### Azure Blob Storage credentials
+
+To obtain Azure blob storage access:
+
+1. Go to Azure Portal (portal.azure.com).
+1. Select "Storage accounts" > your storage.
+1. Click "Settings" > "Access keys".
+1. View account name and two keys (primary/secondary). Keep keys confidential.
+
+For more info, see [Azure official documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal).
+
+
+### Initialize the verified source
+
+To get started with your data pipeline, follow these steps:
+
+1. Enter the following command:
+
+   ```bash
+   dlt init filesystem duckdb
+   ```
+
+   [This command](../../reference/command-line-interface) will initialize
+   [the pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/filesystem_pipeline.py)
+   with Readers source & filesystem as the [source](../../general-usage/source) and
+   [duckdb](../destinations/duckdb.md) as the [destination](../destinations).
+
+1. If you'd like to use a different destination, simply replace `duckdb` with the name of your
+   preferred [destination](../destinations).
+
+1. After running this command, a new directory will be created with the necessary files and
+   configuration settings to get started.
+
+For more information, read the
+[Walkthrough: Add a verified source.](../../walkthroughs/add-a-verified-source)
+
+### Add credentials
+
+1. In the `.dlt` folder, there's a file called `secrets.toml`. It's where you store sensitive
+   information securely, like access tokens. Keep this file safe. Here's its format for service
+   account authentication:
+
+   ```toml
+   # For AWS S3 access:
+   aws_access_key_id="Please set me up!"
+   aws_secret_access_key="Please set me up!"
+
+   # For Azure blob storage access:
+   azure_storage_account_name="Please set me up!"
+   azure_storage_account_key="Please set me up!"
+
+   # For GCS storage bucket access:
+   client_email="Please set me up!"
+   private_key="Please set me up!"
+   project_id="Please set me up!"
+   ```
+2. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
+
+3. You can pass the bucket URL and glob pattern or use "config.toml". For local filesystems, use file://
+   or skip the schema.
+
+   ```toml
+   [sources.filesystem]
+   bucket_url="~/Documents/csv_files/"
+   file_glob="*"
+   ```
+
+   For remote file systems you need to add the schema, it will be used to get the protocol being used, for example:
+   ```toml
+   [sources.filesystem]
+   bucket_url="s3://my-bucket/csv_files/"
+   ```
+   > Note: For Azure, use adlfs>=2023.9.0. Older versions mishandle globs.
+
+## Run the pipeline
+
+1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
+   running the command:
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+1. Install optional modules:
+
+   - For AWS S3:
+     ```bash
+     pip install s3fs
+     ```
+   - For Azure blob:
+     ```bash
+     pip install adlfs
+     ```
+   - GCS storage: No separate module needed.
+
+1. You're now ready to run the pipeline! To get started, run the following command:
+
+   ```bash
+   python3 filesystem_pipeline.py
+   ```
+
+1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
+   the following command:
+
+   ```bash
+   dlt pipeline <pipeline_name> show
+   ```
+
+   For example, the `pipeline_name` for the above pipeline example is `standard_filesystem`, you
+   may also use any custom name instead.
+
+For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/run-a-pipeline).
+
+## Sources and resources
+
+`dlt` works on the principle of [sources](../../general-usage/source) and
+[resources](../../general-usage/resource).
+
+### Source `readers`
+
+This source provides resources that are chunked file readers. You can customize these readers optionally, resources provided are:
+  - read_csv(chunksize, **pandas_kwargs)
+  - read_jsonl(chunksize)
+  - read_parquet(chunksize)
+
+```python
+@dlt.source(_impl_cls=ReadersSource, spec=FilesystemConfigurationResource)
+def readers(
+    bucket_url: str = dlt.secrets.value,
+    credentials: Union[FileSystemCredentials, AbstractFileSystem] = dlt.secrets.value,
+    file_glob: Optional[str] = "*",
+) -> Tuple[DltResource, ...]:
+```
+
+ `bucket_url`: The url to the bucket.<br>
+ `credentials`: The credentials to the filesystem of fsspec `AbstractFilesystem` instance.<br>
+ `file_glob`:  Glob filter for files; defaults to non-recursive listing in the bucket.<br>
+
+### Resource `filesystem`
+
+This resource enumerates files in bucket_url using the file_glob pattern. Files are returned as FileItem with
+methods to access data. Pair with transformers for further processing.
+
+```python
+@dlt.resource(
+    primary_key="file_url", spec=FilesystemConfigurationResource, standalone=True
+)
+def filesystem(
+    bucket_url: str = dlt.secrets.value,
+    credentials: Union[FileSystemCredentials, AbstractFileSystem] = dlt.secrets.value,
+    file_glob: Optional[str] = "*",
+    files_per_page: int = DEFAULT_CHUNK_SIZE,
+    extract_content: bool = False,
+) -> Iterator[List[FileItem]]:
+```
+
+`bucket_url`: URL of the bucket.<br>
+`credentials`: Filesystem credentials of `AbstractFilesystem` instance.<br>
+`file_glob`: File filter in glob format. Defaults to listing all non-recursive files in bucket_url.<br>
+`files_per_page`: Number of files processed at once (default: 100).<br>
+`extract_content`: If true, the content of the file will be read and returned in the resource. (default: False).<br>
+
+## Filesystem Integration and Data Extraction Guide
+
+### Filesystem Usage
+- The filesystem tool lists files in a chosen bucket based on a glob pattern and returns file details (as FileInfo)
+in adjustable page sizes.
+- The resource is designed to work with transform functions and transformers for custom extract pipelines.
+
+To load data into a specific table (instead of the default filesystem table), see the snippet below:
+```python
+@dlt.transformer(standalone=True)
+def read_csv(items, chunksize: int = 15) ->:
+    """Reads csv file with Pandas chunk by chunk."""
+    ...
+
+# list only the *.csv in specific folder and pass the file items to read_csv()
+met_files = (
+    filesystem(bucket_url="s3://my_bucket/data, file_glob="met_csv/A801/*.csv")
+    | read_csv()
+    )
+# load to met_csv table using with_name()
+pipeline.run(met_files.with_name("met_csv"))
+```
+
+### FileItem Representation
+
+- All dlt sources/resources that yield files follow the FileItem contract.
+- File content is typically not loaded; instead, full file info and methods to access content are available.
+- Users can request an authenticated filespec AbstractFilesystem instance.
+
+#### FileItem Fields:
+`file_url` - Complete URL of the file; also the primary key (e.g., file://).<br>
+`file_name` - Name or relative path of the file from the bucket_url.<br>
+`mime_type` - File's mime type; sourced from the bucket provider or inferred from its extension.<br>
+`modification_date` - File's last modification time (format: pendulum.DateTime).<br>
+`size_in_bytes` - File size.<br>
+`file_content` - Content, provided upon request.<br>
+
+>📌 Note: When using a nested or recursive glob pattern, file_name will include the file's path. For instance, using the resource:
+    `filesystem("az://dlt-ci-test-bucket/standard_source/samples", file_glob="met_csv/A801/*.csv")`
+    will produce file names relative to the /standard_source/samples path, such as met_csv/A801/A881_20230920.csv.
+
+### File Manipulation
+FileItem, backed by a dictionary implementation, offers these helper methods:
+
+- read_bytes(): Returns the file content as bytes.
+- open(): Provides a file object when opened.
+- filesystem: Gives access to an authorized AbstractFilesystem with standard fsspec methods.
+
+## Customization
+### Create your own pipeline
+
+If you wish to create your own pipelines, you can leverage source and resource methods from this
+verified source.
+
+1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
+
+   ```python
+   pipeline = dlt.pipeline(
+        pipeline_name="standard_filesystem_csv",  # Use a custom name if desired
+        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
+        dataset_name="filesystem_data"  # Use a custom name if desired
+   )
+   ```
+1. To read and load CSV files:
+Replace the placeholder for BUCKET_URL with the appropriate path to your bucket or local destination.
+   ```python
+   BUCKET_URL = "YOUR_BUCKET_PATH_HERE"
+   met_files = readers(
+        bucket_url=BUCKET_URL, file_glob="met_csv/A801/*.csv"
+    ).read_csv()
+    # tell dlt to merge on date
+    met_files.apply_hints(write_disposition="merge", merge_key="date")
+    # We load the data into the met_csv table
+    load_info = pipeline.run(met_files.with_name("met_csv"))
+    print(load_info)
+    print(pipeline.last_trace.last_normalize_info)
+   ```
+   >    The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory..<br>
+   >    The `print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details from the pipeline's last trace.<br>
+   >   📌 Note: If you have a default bucket URL set in "/.dlt/config.toml", you can omit the bucket_url parameter.
+
+      When rerun the next day, this pipeline updates both new and the previous day's records.
+1. To load only new CSV files:
+   ```python
+   # This configuration will only consider new csv files
+   new_files = filesystem(bucket_url=BUCKET_URL, file_glob="met_csv/A801/*.csv")
+   # add incremental on modification time
+   new_files.apply_hints(incremental=dlt.sources.incremental("modification_date"))
+   load_info = pipeline.run((new_files | read_csv()).with_name("csv_files"))
+   print(load_info)
+   print(pipeline.last_trace.last_normalize_info)
+   ```
+
+1. To read and load Parquet and JSONL from a bucket:
+   ```python
+   BUCKET_URL = "Please set me up!"  #path of the bucket url or local destination
+   jsonl_reader = readers(BUCKET_URL, file_glob="**/*.jsonl").read_jsonl(
+        chunksize=10000
+    )
+    # PARQUET reading
+    parquet_reader = readers(BUCKET_URL, file_glob="**/*.parquet").read_parquet()
+    # load both folders together to specified tables
+    load_info = pipeline.run(
+        [
+            jsonl_reader.with_name("jsonl_team_data"),
+            parquet_reader.with_name("parquet_team_data"),
+        ]
+    )
+    print(load_info)
+    print(pipeline.last_trace.last_normalize_info)
+   ```
+   >    `file_glob`: Specifies file pattern; reads all JSONL and Parquet files across directories.<br>
+   >    `chunksize`: Set to 10,000; data read in chunks of 10,000 records each.<br>
+   >    `print(pipeline.last_trace.last_normalize_info)` displays the data normalization details from the pipeline's last trace.<br>
+
+1. To set up a pipeline that reads from an Excel file using a standalone transformer:
+   ```python
+   # Define a standalone transformer to read data from an Excel file.
+   @dlt.transformer(standalone=True)
+   def read_excel(
+       items: Iterator[FileItemDict], sheet_name: str
+   ) -> Iterator[TDataItems]:
+       # Import the required pandas library.
+       import pandas as pd
+
+       # Iterate through each file item.
+       for file_obj in items:
+           # Open the file object.
+           with file_obj.open() as file:
+               # Read from the Excel file and yield its content as dictionary records.
+               yield pd.read_excel(file, sheet_name).to_dict(orient="records")
+
+   # Set up the pipeline to fetch a specific Excel file from a filesystem (bucket).
+   example_xls = filesystem(
+       bucket_url=TESTS_BUCKET_URL, file_glob="../directory/example.xlsx"
+   ) | read_excel("example_table")   # Pass the data through the transformer to read the "freshman_table" sheet.
+
+   # Execute the pipeline and load the extracted data into the "duckdb" destination.
+   load_info = dlt.run(
+       freshman_xls.with_name("example"),
+       destination="duckdb",
+       dataset_name="example_data",
+   )
+
+   # Print the loading information.
+   print(load_info)
+   ```
+   > The code loads data from "example.xlsx" into the destination.
+
+1. To copy files locally adding a step in the filesystem resource and then load the listing to the DB:
+
+   ```python
+   def copy_files_resource(local_folder: str) -> None:
+   """Demonstrates how to copy files locally by adding a step to filesystem resource and the to load the download listing to db"""
+   pipeline = dlt.pipeline(
+        pipeline_name="standard_filesystem_copy",
+        destination="duckdb",
+        dataset_name="standard_filesystem_data",
+        )
+        def _copy(item: FileItemDict) -> FileItemDict:
+             # instantiate fsspec and copy file
+             dest_file = os.path.join(local_folder, item["file_name"])
+             # create dest folder
+             os.makedirs(os.path.dirname(dest_file), exist_ok=True)
+             # download file
+             item.fsspec.download(item["file_url"], dest_file)
+             # return file item unchanged
+             return item
+
+         # use recursive glob pattern and add file copy step
+        downloader = filesystem(TESTS_BUCKET_URL, file_glob="**").add_map(_copy)
+
+        # NOTE: you do not need to load any data to execute extract, below we obtain
+        # a list of files in a bucket and also copy them locally
+        listing = list(downloader)
+        print(listing)
+        # download to table "listing"
+        # downloader = filesystem(TESTS_BUCKET_URL, file_glob="**").add_map(_copy)
+        load_info = pipeline.run(
+            downloader.with_name("listing"), write_disposition="replace"
+        )
+        # pretty print the information on data that was loaded
+        print(load_info)
+        print(pipeline.last_trace.last_normalize_info)
+   ```
\ No newline at end of file
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index befbfc0b2d..168ebaa137 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -41,6 +41,7 @@ const sidebars = {
             'dlt-ecosystem/verified-sources/asana',
             'dlt-ecosystem/verified-sources/chess',
             'dlt-ecosystem/verified-sources/facebook_ads',
+            'dlt-ecosystem/verified-sources/filesystem',
             'dlt-ecosystem/verified-sources/github',
             'dlt-ecosystem/verified-sources/google_analytics',
             'dlt-ecosystem/verified-sources/google_sheets',

From 3325d8728e21662df13b12bbcfb9fc3c0ca9afd9 Mon Sep 17 00:00:00 2001
From: dat-a-man <amanguptanalytics@gmail.com>
Date: Mon, 23 Oct 2023 18:32:46 +0530
Subject: [PATCH 2/5] Updated

---
 .../verified-sources/filesystem.md            | 191 +++++++++++-------
 1 file changed, 115 insertions(+), 76 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
index db6948c0a2..f00f3b299c 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
@@ -1,8 +1,6 @@
----
-title: Readers Source & Filesystem
-description: dlt verified source for Readers Source & Filesystem
-keywords: [readers source and filesystem, filesystem, readers source]
----
+______________________________________________________________________
+
+## title: Readers Source & Filesystem description: dlt verified source for Readers Source & Filesystem keywords: \[readers source and filesystem, filesystem, readers source\]
 
 # Readers Source & Filesystem
 
@@ -12,31 +10,40 @@ keywords: [readers source and filesystem, filesystem, readers source]
 or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
 :::
 
-This verified source easily streams files from s3, gcs, azure, or local filesystem using the reader source. Currently the following reader sources are supported:
- - read_csv (with Pandas)
- - read_jsonl
- - read_parquet (with pyarrow)<br>
-Additionally, it can read Excel files with a standalone transformer and copy files locally.
+This verified source easily streams files from s3, gcs, azure, or local filesystem using the reader
+source. Currently the following reader sources are supported:
+
+- read_csv (with Pandas)
+- read_jsonl
+- read_parquet (with pyarrow)<br> Additionally, it can read Excel files with a standalone
+  transformer and copy files locally.
 
 Sources and resources that can be loaded using this verified source are:
 
-| Name       | Description                                           |
-|------------|-------------------------------------------------------|
-| readers    | Provides chunked file reader resources                |
-| filesystem | Lists files in `bucket_url` using `file_glob` pattern |
+| Name       | Description                                          |
+|------------|------------------------------------------------------|
+| readers    | Provides chunked file reader resources               |
+| filesystem | Lists files in `bucket_url` using`file_glob` pattern |
 
- We advice that you give each resource a [specific name](https://dlthub.com/docs/general-usage/resource#duplicate-and-rename-resources) before loading with pipeline.run. This will make
- sure that data goes to a table with the name you want and that each pipeline uses a [separate state for
- incremental loading.](https://dlthub.com/docs/general-usage/state#read-and-write-pipeline-state-in-a-resource)
+We advice that you give each resource a
+[specific name](https://dlthub.com/docs/general-usage/resource#duplicate-and-rename-resources)
+before loading with pipeline.run. This will make sure that data goes to a table with the name you
+want and that each pipeline uses a
+[separate state for incremental loading.](https://dlthub.com/docs/general-usage/state#read-and-write-pipeline-state-in-a-resource)
 
 ### Standalone filesystem
-Use the [standalone filesystem](https://dlthub.com/docs/general-usage/resource#declare-a-standalone-resource) resource to list files in s3, gcs, and azure buckets. This allows you to customize file
-readers or manage files using fsspec. For details, see the [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/index.html). The filesystem ensures consistent
-file representation across bucket types and offers methods to access and read data. You can quickly build pipelines to:
 
- - Extract text from PDFs
- - Stream large file content directly from buckets
- - Copy files locally.
+Use the
+[standalone filesystem](https://dlthub.com/docs/general-usage/resource#declare-a-standalone-resource)
+resource to list files in s3, gcs, and azure buckets. This allows you to customize file readers or
+manage files using fsspec. For details, see the
+[fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/index.html). The filesystem
+ensures consistent file representation across bucket types and offers methods to access and read
+data. You can quickly build pipelines to:
+
+- Extract text from PDFs
+- Stream large file content directly from buckets
+- Copy files locally.
 
 ## Setup Guide
 
@@ -44,20 +51,22 @@ file representation across bucket types and offers methods to access and read da
 
 This source can access various bucket types, including:
 
- - AWS S3
- - GCS Cloud Storage
- - Azure Blob Storage
+- AWS S3
+- GCS Cloud Storage
+- Azure Blob Storage
 
 To access these, you'll need secret credentials obtained as follows:
 
 #### AWS S3 credentials
+
 To get AWS keys for S3 access:
 
 1. Access IAM in AWS Console.
 1. Select "Users", choose a user, and open "Security credentials".
 1. Click "Create access key" for AWS ID and Secret Key.
 
-For more info, see [AWS official documentation.](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
+For more info, see
+[AWS official documentation.](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
 
 #### GCS Cloud Storage credentials
 
@@ -65,11 +74,14 @@ To get GCS cloud storage access:
 
 1. Log in to [console.cloud.google.com](http://console.cloud.google.com/).
 1. Create a [service account](https://cloud.google.com/iam/docs/service-accounts-create#creating).
-1. Enable "Google Analytics API"; see [Google's guide](https://support.google.com/googleapi/answer/6158841?hl=en).
-1. In IAM & Admin > Service Accounts, find your account, click the three-dot menu > "Manage Keys" > "ADD KEY" > "CREATE" to get a JSON credential file.
+1. Enable "Google Analytics API"; see
+   [Google's guide](https://support.google.com/googleapi/answer/6158841?hl=en).
+1. In IAM & Admin > Service Accounts, find your account, click the three-dot menu > "Manage Keys" >
+   "ADD KEY" > "CREATE" to get a JSON credential file.
    > Grant the service account appropriate permissions for cloud storage access.
 
-For more info, see how to [create service account](https://support.google.com/a/answer/7378726?hl=en).
+For more info, see how to
+[create service account](https://support.google.com/a/answer/7378726?hl=en).
 
 #### Azure Blob Storage credentials
 
@@ -80,8 +92,8 @@ To obtain Azure blob storage access:
 1. Click "Settings" > "Access keys".
 1. View account name and two keys (primary/secondary). Keep keys confidential.
 
-For more info, see [Azure official documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal).
-
+For more info, see
+[Azure official documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal).
 
 ### Initialize the verified source
 
@@ -127,10 +139,11 @@ For more information, read the
    private_key="Please set me up!"
    project_id="Please set me up!"
    ```
-2. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
 
-3. You can pass the bucket URL and glob pattern or use "config.toml". For local filesystems, use file://
-   or skip the schema.
+1. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
+
+1. You can pass the bucket URL and glob pattern or use "config.toml". For local filesystems, use
+   file:// or skip the schema.
 
    ```toml
    [sources.filesystem]
@@ -138,11 +151,14 @@ For more information, read the
    file_glob="*"
    ```
 
-   For remote file systems you need to add the schema, it will be used to get the protocol being used, for example:
+   For remote file systems you need to add the schema, it will be used to get the protocol being
+   used, for example:
+
    ```toml
    [sources.filesystem]
    bucket_url="s3://my-bucket/csv_files/"
    ```
+
    > Note: For Azure, use adlfs>=2023.9.0. Older versions mishandle globs.
 
 ## Run the pipeline
@@ -153,6 +169,7 @@ For more information, read the
    ```bash
    pip install -r requirements.txt
    ```
+
 1. Install optional modules:
 
    - For AWS S3:
@@ -178,8 +195,8 @@ For more information, read the
    dlt pipeline <pipeline_name> show
    ```
 
-   For example, the `pipeline_name` for the above pipeline example is `standard_filesystem`, you
-   may also use any custom name instead.
+   For example, the `pipeline_name` for the above pipeline example is `standard_filesystem`, you may
+   also use any custom name instead.
 
 For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/run-a-pipeline).
 
@@ -190,10 +207,12 @@ For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/
 
 ### Source `readers`
 
-This source provides resources that are chunked file readers. You can customize these readers optionally, resources provided are:
-  - read_csv(chunksize, **pandas_kwargs)
-  - read_jsonl(chunksize)
-  - read_parquet(chunksize)
+This source provides resources that are chunked file readers. You can customize these readers
+optionally, resources provided are:
+
+- read_csv(chunksize, \*\*pandas_kwargs)
+- read_jsonl(chunksize)
+- read_parquet(chunksize)
 
 ```python
 @dlt.source(_impl_cls=ReadersSource, spec=FilesystemConfigurationResource)
@@ -204,14 +223,14 @@ def readers(
 ) -> Tuple[DltResource, ...]:
 ```
 
- `bucket_url`: The url to the bucket.<br>
- `credentials`: The credentials to the filesystem of fsspec `AbstractFilesystem` instance.<br>
- `file_glob`:  Glob filter for files; defaults to non-recursive listing in the bucket.<br>
+`bucket_url`: The url to the bucket.<br> `credentials`: The credentials to the filesystem of fsspec
+`AbstractFilesystem` instance.<br> `file_glob`: Glob filter for files; defaults to non-recursive
+listing in the bucket.<br>
 
 ### Resource `filesystem`
 
-This resource enumerates files in bucket_url using the file_glob pattern. Files are returned as FileItem with
-methods to access data. Pair with transformers for further processing.
+This resource enumerates files in bucket_url using the file_glob pattern. Files are returned as
+FileItem with methods to access data. Pair with transformers for further processing.
 
 ```python
 @dlt.resource(
@@ -226,20 +245,23 @@ def filesystem(
 ) -> Iterator[List[FileItem]]:
 ```
 
-`bucket_url`: URL of the bucket.<br>
-`credentials`: Filesystem credentials of `AbstractFilesystem` instance.<br>
-`file_glob`: File filter in glob format. Defaults to listing all non-recursive files in bucket_url.<br>
-`files_per_page`: Number of files processed at once (default: 100).<br>
-`extract_content`: If true, the content of the file will be read and returned in the resource. (default: False).<br>
+`bucket_url`: URL of the bucket.<br> `credentials`: Filesystem credentials of `AbstractFilesystem`
+instance.<br> `file_glob`: File filter in glob format. Defaults to listing all non-recursive files
+in bucket_url.<br> `files_per_page`: Number of files processed at once (default: 100).<br>
+`extract_content`: If true, the content of the file will be read and returned in the resource.
+(default: False).<br>
 
 ## Filesystem Integration and Data Extraction Guide
 
 ### Filesystem Usage
-- The filesystem tool lists files in a chosen bucket based on a glob pattern and returns file details (as FileInfo)
-in adjustable page sizes.
-- The resource is designed to work with transform functions and transformers for custom extract pipelines.
+
+- The filesystem tool lists files in a chosen bucket based on a glob pattern and returns file
+  details (as FileInfo) in adjustable page sizes.
+- The resource is designed to work with transform functions and transformers for custom extract
+  pipelines.
 
 To load data into a specific table (instead of the default filesystem table), see the snippet below:
+
 ```python
 @dlt.transformer(standalone=True)
 def read_csv(items, chunksize: int = 15) ->:
@@ -258,22 +280,26 @@ pipeline.run(met_files.with_name("met_csv"))
 ### FileItem Representation
 
 - All dlt sources/resources that yield files follow the FileItem contract.
-- File content is typically not loaded; instead, full file info and methods to access content are available.
+- File content is typically not loaded; instead, full file info and methods to access content are
+  available.
 - Users can request an authenticated filespec AbstractFilesystem instance.
 
 #### FileItem Fields:
-`file_url` - Complete URL of the file; also the primary key (e.g., file://).<br>
-`file_name` - Name or relative path of the file from the bucket_url.<br>
-`mime_type` - File's mime type; sourced from the bucket provider or inferred from its extension.<br>
-`modification_date` - File's last modification time (format: pendulum.DateTime).<br>
-`size_in_bytes` - File size.<br>
-`file_content` - Content, provided upon request.<br>
 
->📌 Note: When using a nested or recursive glob pattern, file_name will include the file's path. For instance, using the resource:
-    `filesystem("az://dlt-ci-test-bucket/standard_source/samples", file_glob="met_csv/A801/*.csv")`
-    will produce file names relative to the /standard_source/samples path, such as met_csv/A801/A881_20230920.csv.
+`file_url` - Complete URL of the file; also the primary key (e.g., file://).<br> `file_name` - Name
+or relative path of the file from the bucket_url.<br> `mime_type` - File's mime type; sourced from
+the bucket provider or inferred from its extension.<br> `modification_date` - File's last
+modification time (format: pendulum.DateTime).<br> `size_in_bytes` - File size.<br> `file_content` -
+Content, provided upon request.<br>
+
+> 📌 Note: When using a nested or recursive glob pattern, file_name will include the file's path. For
+> instance, using the resource:
+> `filesystem("az://dlt-ci-test-bucket/standard_source/samples", file_glob="met_csv/A801/*.csv")`
+> will produce file names relative to the /standard_source/samples path, such as
+> met_csv/A801/A881_20230920.csv.
 
 ### File Manipulation
+
 FileItem, backed by a dictionary implementation, offers these helper methods:
 
 - read_bytes(): Returns the file content as bytes.
@@ -281,6 +307,7 @@ FileItem, backed by a dictionary implementation, offers these helper methods:
 - filesystem: Gives access to an authorized AbstractFilesystem with standard fsspec methods.
 
 ## Customization
+
 ### Create your own pipeline
 
 If you wish to create your own pipelines, you can leverage source and resource methods from this
@@ -295,8 +322,10 @@ verified source.
         dataset_name="filesystem_data"  # Use a custom name if desired
    )
    ```
-1. To read and load CSV files:
-Replace the placeholder for BUCKET_URL with the appropriate path to your bucket or local destination.
+
+1. To read and load CSV files: Replace the placeholder for BUCKET_URL with the appropriate path to
+   your bucket or local destination.
+
    ```python
    BUCKET_URL = "YOUR_BUCKET_PATH_HERE"
    met_files = readers(
@@ -309,12 +338,16 @@ Replace the placeholder for BUCKET_URL with the appropriate path to your bucket
     print(load_info)
     print(pipeline.last_trace.last_normalize_info)
    ```
-   >    The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory..<br>
-   >    The `print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details from the pipeline's last trace.<br>
-   >   📌 Note: If you have a default bucket URL set in "/.dlt/config.toml", you can omit the bucket_url parameter.
 
-      When rerun the next day, this pipeline updates both new and the previous day's records.
+   > The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory..<br> The
+   > `print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details
+   > from the pipeline's last trace.<br> 📌 Note: If you have a default bucket URL set in
+   > "/.dlt/config.toml", you can omit the bucket_url parameter.
+
+   When rerun the next day, this pipeline updates both new and the previous day's records.
+
 1. To load only new CSV files:
+
    ```python
    # This configuration will only consider new csv files
    new_files = filesystem(bucket_url=BUCKET_URL, file_glob="met_csv/A801/*.csv")
@@ -326,6 +359,7 @@ Replace the placeholder for BUCKET_URL with the appropriate path to your bucket
    ```
 
 1. To read and load Parquet and JSONL from a bucket:
+
    ```python
    BUCKET_URL = "Please set me up!"  #path of the bucket url or local destination
    jsonl_reader = readers(BUCKET_URL, file_glob="**/*.jsonl").read_jsonl(
@@ -343,11 +377,14 @@ Replace the placeholder for BUCKET_URL with the appropriate path to your bucket
     print(load_info)
     print(pipeline.last_trace.last_normalize_info)
    ```
-   >    `file_glob`: Specifies file pattern; reads all JSONL and Parquet files across directories.<br>
-   >    `chunksize`: Set to 10,000; data read in chunks of 10,000 records each.<br>
-   >    `print(pipeline.last_trace.last_normalize_info)` displays the data normalization details from the pipeline's last trace.<br>
+
+   > `file_glob`: Specifies file pattern; reads all JSONL and Parquet files across directories.<br>
+   > `chunksize`: Set to 10,000; data read in chunks of 10,000 records each.<br>
+   > `print(pipeline.last_trace.last_normalize_info)` displays the data normalization details from
+   > the pipeline's last trace.<br>
 
 1. To set up a pipeline that reads from an Excel file using a standalone transformer:
+
    ```python
    # Define a standalone transformer to read data from an Excel file.
    @dlt.transformer(standalone=True)
@@ -379,9 +416,11 @@ Replace the placeholder for BUCKET_URL with the appropriate path to your bucket
    # Print the loading information.
    print(load_info)
    ```
+
    > The code loads data from "example.xlsx" into the destination.
 
-1. To copy files locally adding a step in the filesystem resource and then load the listing to the DB:
+1. To copy files locally adding a step in the filesystem resource and then load the listing to the
+   DB:
 
    ```python
    def copy_files_resource(local_folder: str) -> None:
@@ -416,4 +455,4 @@ Replace the placeholder for BUCKET_URL with the appropriate path to your bucket
         # pretty print the information on data that was loaded
         print(load_info)
         print(pipeline.last_trace.last_normalize_info)
-   ```
\ No newline at end of file
+   ```

From e063166a224303ade98b41df74bfd10d070efa13 Mon Sep 17 00:00:00 2001
From: dat-a-man <amanguptanalytics@gmail.com>
Date: Mon, 23 Oct 2023 19:00:54 +0530
Subject: [PATCH 3/5] Updated

---
 .../verified-sources/filesystem.md            | 59 ++++++++++++-------
 1 file changed, 39 insertions(+), 20 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
index f00f3b299c..55774cf063 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
@@ -15,8 +15,9 @@ source. Currently the following reader sources are supported:
 
 - read_csv (with Pandas)
 - read_jsonl
-- read_parquet (with pyarrow)<br> Additionally, it can read Excel files with a standalone
-  transformer and copy files locally.
+- read_parquet (with pyarrow)
+
+Additionally, it can read Excel files with a standalone transformer and copy files locally.
 
 Sources and resources that can be loaded using this verified source are:
 
@@ -210,9 +211,9 @@ For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/
 This source provides resources that are chunked file readers. You can customize these readers
 optionally, resources provided are:
 
-- read_csv(chunksize, \*\*pandas_kwargs)
-- read_jsonl(chunksize)
-- read_parquet(chunksize)
+- read_csv
+- read_jsonl
+- read_parquet
 
 ```python
 @dlt.source(_impl_cls=ReadersSource, spec=FilesystemConfigurationResource)
@@ -223,9 +224,14 @@ def readers(
 ) -> Tuple[DltResource, ...]:
 ```
 
-`bucket_url`: The url to the bucket.<br> `credentials`: The credentials to the filesystem of fsspec
-`AbstractFilesystem` instance.<br> `file_glob`: Glob filter for files; defaults to non-recursive
-listing in the bucket.<br>
+`bucket_url`: The url to the bucket.
+
+`credentials`: The credentials to the filesystem of fsspec
+
+`AbstractFilesystem` instance.
+
+`file_glob`: Glob filter for files; defaults to non-recursive
+listing in the bucket.
 
 ### Resource `filesystem`
 
@@ -245,11 +251,17 @@ def filesystem(
 ) -> Iterator[List[FileItem]]:
 ```
 
-`bucket_url`: URL of the bucket.<br> `credentials`: Filesystem credentials of `AbstractFilesystem`
-instance.<br> `file_glob`: File filter in glob format. Defaults to listing all non-recursive files
-in bucket_url.<br> `files_per_page`: Number of files processed at once (default: 100).<br>
+`bucket_url`: URL of the bucket.
+
+`credentials`: Filesystem credentials of `AbstractFilesystem` instance.
+
+`file_glob`: File filter in glob format. Defaults to listing all non-recursive files
+in bucket_url.
+
+`files_per_page`: Number of files processed at once (default: 100).
+
 `extract_content`: If true, the content of the file will be read and returned in the resource.
-(default: False).<br>
+(default: False).
 
 ## Filesystem Integration and Data Extraction Guide
 
@@ -286,11 +298,17 @@ pipeline.run(met_files.with_name("met_csv"))
 
 #### FileItem Fields:
 
-`file_url` - Complete URL of the file; also the primary key (e.g., file://).<br> `file_name` - Name
-or relative path of the file from the bucket_url.<br> `mime_type` - File's mime type; sourced from
-the bucket provider or inferred from its extension.<br> `modification_date` - File's last
-modification time (format: pendulum.DateTime).<br> `size_in_bytes` - File size.<br> `file_content` -
-Content, provided upon request.<br>
+`file_url` - Complete URL of the file; also the primary key (e.g., file://).
+
+`file_name` - Name or relative path of the file from the bucket_url.
+
+`mime_type` - File's mime type; sourced from the bucket provider or inferred from its extension.
+
+`modification_date` - File's last modification time (format: pendulum.DateTime).
+
+`size_in_bytes` - File size.
+
+`file_content` - Content, provided upon request.
 
 > 📌 Note: When using a nested or recursive glob pattern, file_name will include the file's path. For
 > instance, using the resource:
@@ -339,9 +357,10 @@ verified source.
     print(pipeline.last_trace.last_normalize_info)
    ```
 
-   > The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory..<br> The
-   > `print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details
-   > from the pipeline's last trace.<br> 📌 Note: If you have a default bucket URL set in
+   > The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory.<br>
+   > The`print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details
+   > from the pipeline's last trace.<br>
+   >📌 Note: If you have a default bucket URL set in
    > "/.dlt/config.toml", you can omit the bucket_url parameter.
 
    When rerun the next day, this pipeline updates both new and the previous day's records.

From b23e73b1c6552adb42893186a1098d9813b472a2 Mon Sep 17 00:00:00 2001
From: dat-a-man <amanguptanalytics@gmail.com>
Date: Mon, 23 Oct 2023 19:20:29 +0530
Subject: [PATCH 4/5] Updated

---
 .../verified-sources/filesystem.md            | 39 +++++++++----------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
index 55774cf063..0b9ce790a9 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
@@ -357,13 +357,11 @@ verified source.
     print(pipeline.last_trace.last_normalize_info)
    ```
 
-   > The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory.<br>
-   > The`print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details
-   > from the pipeline's last trace.<br>
-   >📌 Note: If you have a default bucket URL set in
-   > "/.dlt/config.toml", you can omit the bucket_url parameter.
-
-   When rerun the next day, this pipeline updates both new and the previous day's records.
+   > The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory.
+   >
+   > The `print(pipeline.last_trace.last_normalize_info)` line displays the data normalization details from the pipeline's last trace.
+   >
+   > 📌 **Note:** If you have a default bucket URL set in "/.dlt/config.toml", you can omit the `bucket_url` parameter.
 
 1. To load only new CSV files:
 
@@ -378,29 +376,28 @@ verified source.
    ```
 
 1. To read and load Parquet and JSONL from a bucket:
-
    ```python
    BUCKET_URL = "Please set me up!"  #path of the bucket url or local destination
    jsonl_reader = readers(BUCKET_URL, file_glob="**/*.jsonl").read_jsonl(
         chunksize=10000
     )
-    # PARQUET reading
-    parquet_reader = readers(BUCKET_URL, file_glob="**/*.parquet").read_parquet()
-    # load both folders together to specified tables
-    load_info = pipeline.run(
+   # PARQUET reading
+   parquet_reader = readers(BUCKET_URL, file_glob="**/*.parquet").read_parquet()
+   # load both folders together to specified tables
+   load_info = pipeline.run(
         [
             jsonl_reader.with_name("jsonl_team_data"),
             parquet_reader.with_name("parquet_team_data"),
-        ]
-    )
-    print(load_info)
-    print(pipeline.last_trace.last_normalize_info)
+       ]
+   )
+   print(load_info)
+   print(pipeline.last_trace.last_normalize_info)
    ```
-
-   > `file_glob`: Specifies file pattern; reads all JSONL and Parquet files across directories.<br>
-   > `chunksize`: Set to 10,000; data read in chunks of 10,000 records each.<br>
-   > `print(pipeline.last_trace.last_normalize_info)` displays the data normalization details from
-   > the pipeline's last trace.<br>
+   > The `file_glob`: Specifies file pattern; reads all JSONL and Parquet files across directories.
+   >
+   > The `chunksize`: Set to 10,000; data read in chunks of 10,000 records each.
+   >
+   >`print(pipeline.last_trace.last_normalize_info)`: Displays the data normalization details from the pipeline's last trace.
 
 1. To set up a pipeline that reads from an Excel file using a standalone transformer:
 

From a4c8f5b726106856ccf7c0426e60eaeee48d0ffa Mon Sep 17 00:00:00 2001
From: dat-a-man <amanguptanalytics@gmail.com>
Date: Tue, 24 Oct 2023 09:23:27 +0530
Subject: [PATCH 5/5] Updated

---
 ...stem.md => readers_source_&_filesystem.md} | 76 +++++++++++--------
 docs/website/sidebars.js                      |  2 +-
 2 files changed, 47 insertions(+), 31 deletions(-)
 rename docs/website/docs/dlt-ecosystem/verified-sources/{filesystem.md => readers_source_&_filesystem.md} (87%)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/readers_source_&_filesystem.md
similarity index 87%
rename from docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
rename to docs/website/docs/dlt-ecosystem/verified-sources/readers_source_&_filesystem.md
index 0b9ce790a9..e63ec9f1de 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/readers_source_&_filesystem.md
@@ -1,7 +1,8 @@
-______________________________________________________________________
-
-## title: Readers Source & Filesystem description: dlt verified source for Readers Source & Filesystem keywords: \[readers source and filesystem, filesystem, readers source\]
-
+---
+title: Readers Source & Filesystem
+description: dlt verified source for Readers Source & Filesystem keywords:
+keywords: [readers source and filesystem, filesystem, readers source\]
+---
 # Readers Source & Filesystem
 
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
@@ -11,7 +12,7 @@ or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support
 :::
 
 This verified source easily streams files from s3, gcs, azure, or local filesystem using the reader
-source. Currently the following reader sources are supported:
+source. Currently supports the following:
 
 - read_csv (with Pandas)
 - read_jsonl
@@ -28,7 +29,7 @@ Sources and resources that can be loaded using this verified source are:
 
 We advice that you give each resource a
 [specific name](https://dlthub.com/docs/general-usage/resource#duplicate-and-rename-resources)
-before loading with pipeline.run. This will make sure that data goes to a table with the name you
+before loading with `pipeline.run`. This will make sure that data goes to a table with the name you
 want and that each pipeline uses a
 [separate state for incremental loading.](https://dlthub.com/docs/general-usage/state#read-and-write-pipeline-state-in-a-resource)
 
@@ -37,9 +38,8 @@ want and that each pipeline uses a
 Use the
 [standalone filesystem](https://dlthub.com/docs/general-usage/resource#declare-a-standalone-resource)
 resource to list files in s3, gcs, and azure buckets. This allows you to customize file readers or
-manage files using fsspec. For details, see the
-[fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/index.html). The filesystem
-ensures consistent file representation across bucket types and offers methods to access and read
+manage files using [fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html).
+The filesystem ensures consistent file representation across bucket types and offers methods to access and read
 data. You can quickly build pipelines to:
 
 - Extract text from PDFs
@@ -56,9 +56,9 @@ This source can access various bucket types, including:
 - GCS Cloud Storage
 - Azure Blob Storage
 
-To access these, you'll need secret credentials obtained as follows:
+To access these, you'll need secret credentials:
 
-#### AWS S3 credentials
+#### <u>AWS S3 credentials</u>
 
 To get AWS keys for S3 access:
 
@@ -69,7 +69,7 @@ To get AWS keys for S3 access:
 For more info, see
 [AWS official documentation.](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
 
-#### GCS Cloud Storage credentials
+#### <u>GCS Cloud Storage credentials</u>
 
 To get GCS cloud storage access:
 
@@ -84,7 +84,7 @@ To get GCS cloud storage access:
 For more info, see how to
 [create service account](https://support.google.com/a/answer/7378726?hl=en).
 
-#### Azure Blob Storage credentials
+#### <u>Azure Blob Storage credentials</u>
 
 To obtain Azure blob storage access:
 
@@ -131,14 +131,16 @@ For more information, read the
    aws_access_key_id="Please set me up!"
    aws_secret_access_key="Please set me up!"
 
-   # For Azure blob storage access:
-   azure_storage_account_name="Please set me up!"
-   azure_storage_account_key="Please set me up!"
-
    # For GCS storage bucket access:
    client_email="Please set me up!"
    private_key="Please set me up!"
    project_id="Please set me up!"
+
+   # For Azure blob storage access:
+   azure_storage_account_name="Please set me up!"
+   azure_storage_account_key="Please set me up!"
+
+
    ```
 
 1. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
@@ -208,8 +210,7 @@ For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/
 
 ### Source `readers`
 
-This source provides resources that are chunked file readers. You can customize these readers
-optionally, resources provided are:
+This source offers chunked file readers as resources, which can be optionally customized. Provided resources include:
 
 - read_csv
 - read_jsonl
@@ -235,8 +236,7 @@ listing in the bucket.
 
 ### Resource `filesystem`
 
-This resource enumerates files in bucket_url using the file_glob pattern. Files are returned as
-FileItem with methods to access data. Pair with transformers for further processing.
+This resource lists files in bucket_url based on the file_glob pattern, returning them as "FileItem" with data access methods. These can be paired with transformers for enhanced processing.
 
 ```python
 @dlt.resource(
@@ -267,10 +267,9 @@ in bucket_url.
 
 ### Filesystem Usage
 
-- The filesystem tool lists files in a chosen bucket based on a glob pattern and returns file
-  details (as FileInfo) in adjustable page sizes.
-- The resource is designed to work with transform functions and transformers for custom extract
-  pipelines.
+- The filesystem tool enumerates files in a selected bucket using a glob pattern, returning details as FileInfo in customizable page sizes.
+
+- This resource integrates with transform functions and transformers for customised extraction pipelines.
 
 To load data into a specific table (instead of the default filesystem table), see the snippet below:
 
@@ -314,7 +313,7 @@ pipeline.run(met_files.with_name("met_csv"))
 > instance, using the resource:
 > `filesystem("az://dlt-ci-test-bucket/standard_source/samples", file_glob="met_csv/A801/*.csv")`
 > will produce file names relative to the /standard_source/samples path, such as
-> met_csv/A801/A881_20230920.csv.
+> "met_csv/A801/A881_20230920.csv".
 
 ### File Manipulation
 
@@ -347,12 +346,12 @@ verified source.
    ```python
    BUCKET_URL = "YOUR_BUCKET_PATH_HERE"
    met_files = readers(
-        bucket_url=BUCKET_URL, file_glob="met_csv/A801/*.csv"
+        bucket_url=BUCKET_URL, file_glob="directory/*.csv"
     ).read_csv()
     # tell dlt to merge on date
     met_files.apply_hints(write_disposition="merge", merge_key="date")
     # We load the data into the met_csv table
-    load_info = pipeline.run(met_files.with_name("met_csv"))
+    load_info = pipeline.run(met_files.with_name("table_name"))
     print(load_info)
     print(pipeline.last_trace.last_normalize_info)
    ```
@@ -363,11 +362,11 @@ verified source.
    >
    > 📌 **Note:** If you have a default bucket URL set in "/.dlt/config.toml", you can omit the `bucket_url` parameter.
 
-1. To load only new CSV files:
+1. To load only new CSV files, [incremental loading](https://dlthub.com/docs/general-usage/incremental-loading):
 
    ```python
    # This configuration will only consider new csv files
-   new_files = filesystem(bucket_url=BUCKET_URL, file_glob="met_csv/A801/*.csv")
+   new_files = filesystem(bucket_url=BUCKET_URL, file_glob="directory/*.csv")
    # add incremental on modification time
    new_files.apply_hints(incremental=dlt.sources.incremental("modification_date"))
    load_info = pipeline.run((new_files | read_csv()).with_name("csv_files"))
@@ -472,3 +471,20 @@ verified source.
         print(load_info)
         print(pipeline.last_trace.last_normalize_info)
    ```
+
+1. Cleanup after loading:
+
+   You can get fsspec client from filesystem resource after it was extracted i.e. in order to delete processed files etc. The filesystem module contains a convenient method "fsspec_from_resource" that can be used as follows:
+
+   ```python
+   from filesystem import filesystem, fsspec_from_resource
+   # get filesystem source
+   gs_resource = filesystem("gs://ci-test-bucket/")
+   # extract files
+   pipeline.run(gs_resource | read_csv)
+   # get fs client
+   fs_client = fsspec_from_resource(gs_resource)
+   # do any operation
+   fs_client.ls("ci-test-bucket/standard_source/samples")
+   ```
+
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index 168ebaa137..6ab64ff651 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -41,7 +41,6 @@ const sidebars = {
             'dlt-ecosystem/verified-sources/asana',
             'dlt-ecosystem/verified-sources/chess',
             'dlt-ecosystem/verified-sources/facebook_ads',
-            'dlt-ecosystem/verified-sources/filesystem',
             'dlt-ecosystem/verified-sources/github',
             'dlt-ecosystem/verified-sources/google_analytics',
             'dlt-ecosystem/verified-sources/google_sheets',
@@ -53,6 +52,7 @@ const sidebars = {
             'dlt-ecosystem/verified-sources/mux',
             'dlt-ecosystem/verified-sources/notion',
             'dlt-ecosystem/verified-sources/pipedrive',
+            'dlt-ecosystem/verified-sources/readers_source_&_filesystem',
             'dlt-ecosystem/verified-sources/salesforce',
             'dlt-ecosystem/verified-sources/shopify',
             'dlt-ecosystem/verified-sources/sql_database',