prepare tests for examples and run them

dlt-hub · Mar 25, 2024 · 4f5fe0b · 4f5fe0b
1 parent 78ae021
commit 4f5fe0b
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 26 deletions.
diff --git a/Makefile b/Makefile
@@ -69,6 +69,7 @@ lint-and-test-snippets:
 lint-and-test-examples:
 	poetry run mypy --config-file mypy.ini docs/examples
 	poetry run flake8 --max-line-length=200 docs/examples
+	cd docs/tools && poetry run python prepare_examples_tests.py
 	cd docs/examples && poetry run pytest
 
 

diff --git a/docs/examples/chess_production/.dlt/config.toml b/docs/examples/chess_production/.dlt/config.toml
@@ -0,0 +1 @@
+chess_url="https://api.chess.com/pub/"
diff --git a/docs/examples/chess_production/chess_production.py b/docs/examples/chess_production/chess_production.py
@@ -35,7 +35,6 @@
 from dlt.common.runtime.slack import send_slack_message
 
 
-
 @dlt.source
 def chess(
     chess_url: str = dlt.config.value,

diff --git a/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py b/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py
@@ -57,28 +57,29 @@ def pdf_to_text(file_item, separate_pages: bool = False):
         yield page_item
 
 
-pipeline = dlt.pipeline(pipeline_name="pdf_to_text", destination="weaviate")
-
-# this constructs a simple pipeline that: (1) reads files from "invoices" folder (2) filters only those ending with ".pdf"
-# (3) sends them to pdf_to_text transformer with pipe (|) operator
-pdf_pipeline = list_files("assets/invoices").add_filter(
-    lambda item: item["file_name"].endswith(".pdf")
-) | pdf_to_text(separate_pages=True)
-
-# set the name of the destination table to receive pages
-# NOTE: Weaviate, dlt's tables are mapped to classes
-pdf_pipeline.table_name = "InvoiceText"
-
-# use weaviate_adapter to tell destination to vectorize "text" column
-load_info = pipeline.run(weaviate_adapter(pdf_pipeline, vectorize="text"))
-row_counts = pipeline.last_trace.last_normalize_info
-print(row_counts)
-print("------")
-print(load_info)
-
-import weaviate
-
-client = weaviate.Client("http://localhost:8080")
-# get text of all the invoices in InvoiceText class we just created above
-print(client.query.get("InvoiceText", ["text", "file_name", "mtime", "page_id"]).do())
-assert_load_info(load_info)
+if __name__ == "__main__":
+    pipeline = dlt.pipeline(pipeline_name="pdf_to_text", destination="weaviate")
+
+    # this constructs a simple pipeline that: (1) reads files from "invoices" folder (2) filters only those ending with ".pdf"
+    # (3) sends them to pdf_to_text transformer with pipe (|) operator
+    pdf_pipeline = list_files("assets/invoices").add_filter(
+        lambda item: item["file_name"].endswith(".pdf")
+    ) | pdf_to_text(separate_pages=True)
+
+    # set the name of the destination table to receive pages
+    # NOTE: Weaviate, dlt's tables are mapped to classes
+    pdf_pipeline.table_name = "InvoiceText"
+
+    # use weaviate_adapter to tell destination to vectorize "text" column
+    load_info = pipeline.run(weaviate_adapter(pdf_pipeline, vectorize="text"))
+    row_counts = pipeline.last_trace.last_normalize_info
+    print(row_counts)
+    print("------")
+    print(load_info)
+
+    import weaviate
+
+    client = weaviate.Client("http://localhost:8080")
+    # get text of all the invoices in InvoiceText class we just created above
+    print(client.query.get("InvoiceText", ["text", "file_name", "mtime", "page_id"]).do())
+    assert_load_info(load_info)
diff --git a/docs/tools/prepare_examples_tests.py b/docs/tools/prepare_examples_tests.py
@@ -0,0 +1,53 @@
+"""
+Creates the pytest files for our examples tests. These will not be committed
+"""
+import os
+import dlt.cli.echo as fmt
+
+EXAMPLES_DIR = "../examples"
+
+# settings
+SKIP_FOLDERS = ["archive", ".dlt", "__pycache__"]
+
+# the entry point for the script
+MAIN_CLAUSE = 'if __name__ == "__main__":'
+
+# some stuff to insert for setting up and tearing down fixtures
+TEST_HEADER = """
+from tests.utils import skipifgithubfork
+
+"""
+
+
+if __name__ == "__main__":
+    count = 0
+    for example in next(os.walk(EXAMPLES_DIR))[1]:
+        if example in SKIP_FOLDERS:
+            continue
+        count += 1
+
+        example_file = f"{EXAMPLES_DIR}/{example}/{example}.py"
+        test_example_file = f"{EXAMPLES_DIR}/{example}/test_{example}.py"
+
+        with open(example_file, "r", encoding="utf-8") as f:
+            lines = f.read().split("\n")
+
+        processed_lines = TEST_HEADER.split("\n")
+        main_clause_found = False
+
+        for line in lines:
+            if line.startswith(MAIN_CLAUSE):
+                main_clause_found = True
+                processed_lines.append("@skipifgithubfork")
+                processed_lines.append(f"def test_{example}():")
+            else:
+                processed_lines.append(line)
+
+        if not main_clause_found:
+            fmt.error(f"No main clause defined for example {example}")
+            exit(1)
+
+        with open(test_example_file, "w", encoding="utf-8") as f:
+            f.write("\n".join(processed_lines))
+
+    fmt.note(f"Prepared {count} examples for testing.")