From d2eb614121907bbbb6cf80b0a1fff6ae08773162 Mon Sep 17 00:00:00 2001 From: Sean Chatman <136349053+seanchatmangpt@users.noreply.github.com> Date: Sat, 23 Mar 2024 19:34:56 -0700 Subject: [PATCH] Added workflow DSL --- pyproject.toml | 2 +- src/dspygen/api.py | 8 +- .../experiments/control_flow/__init__.py | 0 .../control_flow/control_flow_workflow.yaml | 45 +++ .../control_flow_workflow_output_new.yaml | 61 ++++ .../control_flow/dsl_control_flow_models.py | 132 +++++++ .../control_flow/workflow_executor.py | 46 +++ .../experiments/module_docstrings/__init__.py | 0 .../generate_docstring_exec.py | 30 ++ .../react_code_gen/data_gherkin_pipeline.yaml | 60 ++++ .../generate_react_code_from_csv.py | 3 +- .../react_code_gen/hello-world.tsx | 191 ++++++++++ .../retrieve_and_generate_pipeline.py | 30 ++ src/dspygen/modules/gen_module.py | 51 +++ src/dspygen/modules/gen_pydantic_class.py | 327 ++++++++++++++++++ .../gen_pydantic_instance.py | 0 src/dspygen/modules/gen_python_primitive.py | 148 ++++++++ src/dspygen/rm/code_retriever.py | 92 +++++ src/dspygen/rm/web_retriever.py | 20 ++ .../{command_cmd.py => cmd_cmd.py} | 0 src/dspygen/subcommands/lm_cmd.py | 6 +- .../subcommands/{dsl_cmd.py => pln_cmd.py} | 3 +- src/dspygen/subcommands/rm_cmd.py | 45 +++ src/dspygen/subcommands/wkf_cmd.py | 192 ++++++++++ src/dspygen/utils/file_tools.py | 4 + src/dspygen/utils/pydantic_tools.py | 2 +- test_workflow.http | 7 + .../data_hello_world_pipeline.yaml | 0 tests/{dsl => pipeline}/gherkin_pipeline.yaml | 0 .../test_dsl_data_retriever.py | 0 .../test_dsl_pipeline_execute.py | 0 tests/{dsl => pipeline}/test_prd_generator.py | 0 tests/workflow/test_control_flow.py | 7 + 33 files changed, 1499 insertions(+), 13 deletions(-) create mode 100644 src/dspygen/experiments/control_flow/__init__.py create mode 100644 src/dspygen/experiments/control_flow/control_flow_workflow.yaml create mode 100644 src/dspygen/experiments/control_flow/control_flow_workflow_output_new.yaml create mode 100644 src/dspygen/experiments/control_flow/dsl_control_flow_models.py create mode 100644 src/dspygen/experiments/control_flow/workflow_executor.py create mode 100644 src/dspygen/experiments/module_docstrings/__init__.py create mode 100644 src/dspygen/experiments/module_docstrings/generate_docstring_exec.py create mode 100644 src/dspygen/experiments/react_code_gen/data_gherkin_pipeline.yaml create mode 100644 src/dspygen/experiments/react_code_gen/hello-world.tsx create mode 100644 src/dspygen/experiments/react_code_gen/retrieve_and_generate_pipeline.py create mode 100644 src/dspygen/modules/gen_module.py create mode 100644 src/dspygen/modules/gen_pydantic_class.py rename src/dspygen/{experiments => modules}/gen_pydantic_instance.py (100%) create mode 100644 src/dspygen/modules/gen_python_primitive.py create mode 100644 src/dspygen/rm/code_retriever.py create mode 100644 src/dspygen/rm/web_retriever.py rename src/dspygen/subcommands/{command_cmd.py => cmd_cmd.py} (100%) rename src/dspygen/subcommands/{dsl_cmd.py => pln_cmd.py} (98%) create mode 100644 src/dspygen/subcommands/rm_cmd.py create mode 100644 src/dspygen/subcommands/wkf_cmd.py create mode 100644 test_workflow.http rename tests/{dsl => pipeline}/data_hello_world_pipeline.yaml (100%) rename tests/{dsl => pipeline}/gherkin_pipeline.yaml (100%) rename tests/{dsl => pipeline}/test_dsl_data_retriever.py (100%) rename tests/{dsl => pipeline}/test_dsl_pipeline_execute.py (100%) rename tests/{dsl => pipeline}/test_prd_generator.py (100%) create mode 100644 tests/workflow/test_control_flow.py diff --git a/pyproject.toml b/pyproject.toml index 812bd00..55e8c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] # https://python-poetry.org/docs/pyproject/ name = "dspygen" -version = "2024.3.22" +version = "2024.3.23" description = "A Ruby on Rails style framework for the DSPy (Demonstrate, Search, Predict) project for Language Models like GPT, BERT, and LLama." authors = ["Sean Chatman "] readme = "README.md" diff --git a/src/dspygen/api.py b/src/dspygen/api.py index d436d57..47776fa 100644 --- a/src/dspygen/api.py +++ b/src/dspygen/api.py @@ -1,8 +1,5 @@ """dspygen REST API.""" -import importlib import logging -import os -from fastapi.middleware.cors import CORSMiddleware # Import CORS middleware import coloredlogs @@ -19,11 +16,11 @@ import os from dspygen.dsl.dsl_pipeline_executor import router as pipeline_router - - +from dspygen.experiments.control_flow.workflow_executor import router as workflow_router app.include_router(pipeline_router) +app.include_router(workflow_router) def load_module_routers(app: FastAPI): @@ -66,4 +63,3 @@ def ping_pong(): allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], # Adjust as per your requirements allow_headers=["*"], # Adjust this to your specific headers if needed ) - diff --git a/src/dspygen/experiments/control_flow/__init__.py b/src/dspygen/experiments/control_flow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dspygen/experiments/control_flow/control_flow_workflow.yaml b/src/dspygen/experiments/control_flow/control_flow_workflow.yaml new file mode 100644 index 0000000..da164fc --- /dev/null +++ b/src/dspygen/experiments/control_flow/control_flow_workflow.yaml @@ -0,0 +1,45 @@ +name: SalesDataAnalysisWorkflow +triggers: manual +jobs: + - name: SalesDataProcessing + runner: python + steps: + - name: InitializeSalesData + code: | + sales_data = [ + {'id': 1, 'amount': 150, 'product': 'Laptop', 'region': 'North'}, + {'id': 2, 'amount': 90, 'product': 'Tablet', 'region': 'West'}, + {'id': 3, 'amount': 200, 'product': 'Laptop', 'region': 'East'}, + {'id': 4, 'amount': 30, 'product': 'Mouse', 'region': 'South'}, + {'id': 5, 'amount': 120, 'product': 'Keyboard', 'region': 'North'} + ] + print('Sales data initialized.') + + - name: FilterHighValueSales + code: | + global high_value_sales + high_value_sales = [sale for sale in sales_data if sale['amount'] > 100] + print(f'High value sales filtered: {high_value_sales}') + if_: + expr: "len(sales_data) > 0" + + - name: PrintHighValueSalesDetails + code: | + for sale in high_value_sales: + print(f"Sale ID: {sale['id']}, Amount: ${sale['amount']}, Product: {sale['product']}, Region: {sale['region']}") + loop: + over: "high_value_sales" + var: "sale" + + - name: CalculateSummaryStatistics + code: | + total_sales_amount = sum(sale['amount'] for sale in high_value_sales) + average_sales_amount = total_sales_amount / len(high_value_sales) + print(f'Total sales amount from high value sales: ${total_sales_amount}') + print(f'Average sales amount from high value sales: ${average_sales_amount}') + if_: + expr: "len(high_value_sales) > 0" + + - name: Summary + code: | + print('Workflow execution completed. High value sales analysis done.') diff --git a/src/dspygen/experiments/control_flow/control_flow_workflow_output_new.yaml b/src/dspygen/experiments/control_flow/control_flow_workflow_output_new.yaml new file mode 100644 index 0000000..a4fdd1f --- /dev/null +++ b/src/dspygen/experiments/control_flow/control_flow_workflow_output_new.yaml @@ -0,0 +1,61 @@ +jobs: +- depends: null + env: null + name: SalesDataProcessing + runner: python + steps: + - args: null + code: "sales_data = [\n {'id': 1, 'amount': 150, 'product': 'Laptop', 'region': 'North'},\n {'id': 2, 'amount': 90, 'product': 'Tablet', 'region': 'West'},\n {'id': 3, 'amount': 200, 'product': 'Laptop', 'region': 'East'},\n {'id': 4, 'amount': 30, 'product': 'Mouse', 'region': 'South'},\n {'id': 5, 'amount': 120, 'product': 'Keyboard', 'region': 'North'}\n]\nprint('Sales data initialized.')\n" + cond: null + env: null + loop: null + name: InitializeSalesData + use: null + - args: null + code: 'global high_value_sales + + high_value_sales = [sale for sale in sales_data if sale[''amount''] > 100] + + print(f''High value sales filtered: {high_value_sales}'') + + ' + cond: null + env: null + loop: null + name: FilterHighValueSales + use: null + - args: null + code: "for sale in high_value_sales:\n print(f\"Sale ID: {sale['id']}, Amount: ${sale['amount']}, Product: {sale['product']}, Region: {sale['region']}\")\n" + cond: null + env: null + loop: + over: high_value_sales + var: sale + name: PrintHighValueSalesDetails + use: null + - args: null + code: 'total_sales_amount = sum(sale[''amount''] for sale in high_value_sales) + + average_sales_amount = total_sales_amount / len(high_value_sales) + + print(f''Total sales amount from high value sales: ${total_sales_amount}'') + + print(f''Average sales amount from high value sales: ${average_sales_amount}'') + + ' + cond: null + env: null + loop: null + name: CalculateSummaryStatistics + use: null + - args: null + code: 'print(''Workflow execution completed. High value sales analysis done.'') + + ' + cond: null + env: null + loop: null + name: Summary + use: null +name: SalesDataAnalysisWorkflow +triggers: manual diff --git a/src/dspygen/experiments/control_flow/dsl_control_flow_models.py b/src/dspygen/experiments/control_flow/dsl_control_flow_models.py new file mode 100644 index 0000000..cd91bdb --- /dev/null +++ b/src/dspygen/experiments/control_flow/dsl_control_flow_models.py @@ -0,0 +1,132 @@ +import json +from typing import List, Union, Dict, Any, Optional +from pydantic import BaseModel, validator, Field + +from dspygen.utils.yaml_tools import YAMLMixin + + +class Condition(BaseModel): + expr: str = Field(..., description="Expression to evaluate the condition.") + + +class Loop(BaseModel): + over: str = Field(..., description="Iterable expression.") + var: str = Field(..., description="Variable name for current item.") + + +class Action(BaseModel): + name: str + use: Optional[str] = None + args: Optional[Dict[str, Any]] = None + code: Optional[str] = None + env: Optional[Dict[str, str]] = None + cond: Optional[Condition] = Field(None) + loop: Optional[Loop] = None + + +class Job(BaseModel): + name: str + depends: Optional[List[str]] = None + runner: str + steps: List[Action] + env: Optional[Dict[str, str]] = None + + +class Workflow(BaseModel, YAMLMixin): + name: str + triggers: Union[str, List[str]] + jobs: List[Job] + + +def evaluate_condition(condition: str, context: Dict[str, Any]) -> bool: + """ + Evaluates a condition expression against the given context. + """ + try: + return eval(condition, {}, context) + except Exception as e: + print(f"Error evaluating condition '{condition}': {e}") + return False + + +def execute_action(action: Action, context: Dict[str, Any]) -> Dict[str, Any]: + """ + Executes a single action based on its type (use or code) and updates the context. + """ + new_context = context.copy() + if action.cond and not evaluate_condition(action.cond.expr, context): + print(f"Skipping action {action.name} due to condition") + return new_context + + if action.use: + print(f"Executing module {action.use} with args {action.args}") + elif action.code: + # Prepare an isolated yet shared execution environment + local_context = {} + global_context = context + exec(action.code, global_context, local_context) + + # Merge local changes back into the global context + context.update(local_context) + + return context + + +def execute_loop(loop: Loop, actions: List[Action], context: Dict[str, Any]) -> Dict[str, Any]: + """ + Iterates over a loop, executing contained actions for each item. + """ + items = eval(loop.over, {}, context) + for item in items: + loop_context = context.copy() + loop_context[loop.var] = item + for action in actions: + # Ensure loop_context is updated with each action's changes + loop_context = execute_action(action, loop_context) + return loop_context + + +def execute_job(job: Job, global_context: Dict[str, Any]) -> Dict[str, Any]: + """ + Executes all actions in a job, respecting conditions and loops. + """ + job_context = {**global_context, **(job.env or {})} + for action in job.steps: + if action.loop: + job_context = execute_loop(action.loop, [action], job_context) + else: + job_context = execute_action(action, job_context) + return job_context + + +def execute_workflow(workflow: Workflow, init_ctx: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """ + Orchestrates the execution of all jobs in a workflow. + """ + global_context = init_ctx + for job in workflow.jobs: + # In a real scenario, respect job.depends for execution order + global_context = execute_job(job, global_context) + print("Workflow execution completed.") + return global_context + + +def serialize_context(context): + serialized_context = {} + for key, value in context.items(): + try: + json.dumps(value) # Test if value is serializable + serialized_context[key] = value + except (TypeError, ValueError): + serialized_context[key] = str(value) # Convert non-serializable types to string + return serialized_context + + +def main(): + wf = Workflow.from_yaml("control_flow_workflow.yaml") + execute_workflow(wf) + wf.to_yaml("control_flow_workflow_output_new.yaml") + + +if __name__ == '__main__': + main() diff --git a/src/dspygen/experiments/control_flow/workflow_executor.py b/src/dspygen/experiments/control_flow/workflow_executor.py new file mode 100644 index 0000000..95dae0a --- /dev/null +++ b/src/dspygen/experiments/control_flow/workflow_executor.py @@ -0,0 +1,46 @@ +import json +import os +import tempfile +from typing import Optional + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from dspygen.experiments.control_flow.dsl_control_flow_models import execute_workflow, Workflow, serialize_context + +router = APIRouter() + + +class WorkflowRequest(BaseModel): + yaml_content: str + init_ctx: Optional[dict] = None + + +@router.post("/execute_workflow/") +async def run_workflow(request: WorkflowRequest): + try: + # Create a temporary file to hold the YAML content + with tempfile.NamedTemporaryFile(delete=False, mode='w+', suffix='.yaml') as tmp: + tmp.write(request.yaml_content) + tmp_path = tmp.name + + wf = Workflow.from_yaml(tmp_path) + + context = execute_workflow(wf, request.init_ctx) + + # Optionally, clean up the temporary file after execution + os.remove(tmp_path) + + # Convert the context to a dictionary making sure it is JSON serializable + # context = {k: v for k, v in context.items() if isinstance(v, (str, int, float, list, dict, bool, type(None)))} + + serializable_context = serialize_context(context) + + del serializable_context["__builtins__"] + + return serializable_context + except Exception as e: + # Ensure the temporary file is removed even if an error occurs + if 'tmp_path' in locals(): + os.remove(tmp_path) + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/src/dspygen/experiments/module_docstrings/__init__.py b/src/dspygen/experiments/module_docstrings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dspygen/experiments/module_docstrings/generate_docstring_exec.py b/src/dspygen/experiments/module_docstrings/generate_docstring_exec.py new file mode 100644 index 0000000..2bf14c2 --- /dev/null +++ b/src/dspygen/experiments/module_docstrings/generate_docstring_exec.py @@ -0,0 +1,30 @@ +import os + +import inflection +from slugify import slugify + +from dspygen.dsl.dsl_pipeline_executor import execute_pipeline + + +def feature_code_generation(): + + context = execute_pipeline(f'{os.getcwd()}/data_gherkin_pipeline.yaml', + init_ctx={"file_path": f"{os.getcwd()}/features.csv"}) + + file_name = "hello-world" # slugify(f"{inflection.underscore(result['FeatureDescription'])}") + + with open(f"{file_name}.tsx", 'w') as f: + code = context.react_code + # remove trailing ``` if present + if code.endswith("```"): + code = code[:-3] + f.write(context.react_code) + print(f"React JSX code written to {file_name}") + + +def main(): + feature_code_generation() + + +if __name__ == '__main__': + main() diff --git a/src/dspygen/experiments/react_code_gen/data_gherkin_pipeline.yaml b/src/dspygen/experiments/react_code_gen/data_gherkin_pipeline.yaml new file mode 100644 index 0000000..0bc39c5 --- /dev/null +++ b/src/dspygen/experiments/react_code_gen/data_gherkin_pipeline.yaml @@ -0,0 +1,60 @@ +# gherkin_pipeline.yaml +lm_models: + - label: "default" + name: "OpenAI" + args: + max_tokens: 3000 + + - label: "smart" + name: "OpenAI" + args: + model: "gpt-4" + max_tokens: 6000 + +rm_models: + - label: "data_retriever" + name: "DataRetriever" + +signatures: + - name: "GenerateGherkinSignature" + docstring: "Generates a comprehensive gherkin from structured data." + inputs: + - name: "data" + desc: "Structured data to be included in the gherkin." + outputs: + - name: "gherkin" + desc: "The final gherkin generated from the structured data." + + - name: "GenerateReactCodeSignature" + docstring: "Generates Typescript React code to enable the functionalities described in the provided Gherkin scenarios, with an emphasis on code quality." + inputs: + - name: "gherkin" + desc: "The Gherkin scenarios." + outputs: + - name: "react_code" + desc: "The generated React functionl component code with hooks. Write only one ```tsx block in the response." + prefix: "```tsx\n" + +lm_modules: + - name: "GherkinGeneratorModule" + signature: "GenerateGherkinSignature" + predictor: "ChainOfThought" + + - name: "ReactCodeGeneratorModule" + signature: "GenerateReactCodeSignature" + predictor: "Predictor" + + +steps: + - module: "FeatureDataModule" + rm_model: "data_retriever" + + - module: "GherkinGeneratorModule" + lm_model: "default" + args: + data: "{{ data }}" + + - module: "ReactCodeGeneratorModule" + lm_model: "default" + args: + gherkin: "{{ gherkin }}" \ No newline at end of file diff --git a/src/dspygen/experiments/react_code_gen/generate_react_code_from_csv.py b/src/dspygen/experiments/react_code_gen/generate_react_code_from_csv.py index b2224b3..058eab0 100644 --- a/src/dspygen/experiments/react_code_gen/generate_react_code_from_csv.py +++ b/src/dspygen/experiments/react_code_gen/generate_react_code_from_csv.py @@ -7,7 +7,8 @@ def feature_code_generation(): - context = execute_pipeline(f'{os.getcwd()}/feature_data_pipeline.yaml', init_ctx={"file_path": f"{os.getcwd()}/features.csv"}) + context = execute_pipeline(f'{os.getcwd()}/feature_data_pipeline.yaml', + init_ctx={"file_path": f"{os.getcwd()}/features.csv"}) for result in context.data: print(result) diff --git a/src/dspygen/experiments/react_code_gen/hello-world.tsx b/src/dspygen/experiments/react_code_gen/hello-world.tsx new file mode 100644 index 0000000..8f3b740 --- /dev/null +++ b/src/dspygen/experiments/react_code_gen/hello-world.tsx @@ -0,0 +1,191 @@ +import React, { useState, useEffect } from 'react'; + +interface Document { + name: string; + url: string; +} + +interface Signer { + name: string; + email: string; +} + +interface Signature { + name: string; + image: string; +} + +interface DocumentSigningProps { + document: Document; + signer: Signer; + signature: Signature; +} + +const DocumentSigning: React.FC = ({ + document, + signer, + signature, +}) => { + const [uploadedDocument, setUploadedDocument] = useState( + null + ); + const [signingLink, setSigningLink] = useState(null); + const [signatureImage, setSignatureImage] = useState(null); + const [signaturePlacement, setSignaturePlacement] = useState(false); + const [confirmationEmail, setConfirmationEmail] = useState(false); + const [signatureValidated, setSignatureValidated] = useState(false); + const [signedDocument, setSignedDocument] = useState(null); + const [customInstructions, setCustomInstructions] = useState( + null + ); + const [linkExpired, setLinkExpired] = useState(false); + + // Function to handle document upload + const handleDocumentUpload = (file: File) => { + // Code to upload document to server and set uploadedDocument state + setUploadedDocument({ + name: file.name, + url: 'https://example.com/document.pdf', + }); + }; + + // Function to generate unique signing link + const generateSigningLink = () => { + // Code to generate unique link and set signingLink state + setSigningLink('https://example.com/signing-link'); + }; + + // Function to handle sending link to signer's email + const sendLinkToSigner = () => { + // Code to send email to signer with signing link + setConfirmationEmail(true); + }; + + // Function to handle signature capture + const handleSignatureCapture = (image: string) => { + // Code to capture signature and set signatureImage state + setSignatureImage(image); + }; + + // Function to handle document preview + const handleDocumentPreview = () => { + // Code to display signed document in preview mode + setSignedDocument({ + name: document.name, + url: 'https://example.com/signed-document.pdf', + }); + }; + + // Function to save signature for future use + const saveSignature = () => { + // Code to save signature and set signature state + setSignatureImage('https://example.com/signature.png'); + }; + + // Function to handle signature placement confirmation + const confirmSignaturePlacement = () => { + // Code to confirm signature placement and set signaturePlacement state + setSignaturePlacement(true); + }; + + // Function to send confirmation email to sender + const sendConfirmationEmail = () => { + // Code to send email to sender confirming document has been signed + setConfirmationEmail(true); + }; + + // Function to validate signature + const validateSignature = () => { + // Code to validate signature and set signatureValidated state + setSignatureValidated(true); + }; + + // Function to handle document download + const handleDocumentDownload = () => { + // Code to download signed document to device + setSignedDocument({ + name: document.name, + url: 'https://example.com/signed-document.pdf', + }); + }; + + // Function to handle custom signing instructions + const handleCustomInstructions = (instructions: string) => { + // Code to set custom instructions state + setCustomInstructions(instructions); + }; + + // Function to handle link expiration + const handleLinkExpiration = () => { + // Code to set linkExpired state + setLinkExpired(true); + }; + + // UseEffect hook to handle mobile responsive design + useEffect(() => { + // Code to check if user is accessing page on a mobile device and adjust layout accordingly + }, []); + + // UseEffect hook to handle API integration + useEffect(() => { + // Code to integrate document signing API into application + }, []); + + // UseEffect hook to handle drag-and-drop document upload + useEffect(() => { + // Code to handle drag-and-drop functionality + }, []); + + return ( +
+ {/* Document Upload */} + handleDocumentUpload(e.target.files[0])} /> + + {/* Generate Unique Signing Link */} + + + {/* Email Link to Signer */} + + + + {/* Signature Capture */} + + + {/* Document Preview */} + + + {/* Save Signature */} + + + {/* Confirm Signature Placement */} + + + {/* Email Confirmation to Sender */} + + + {/* Signature Validation */} + + + {/* Document Download */} + + + {/* Mobile Responsive Design */} + {/* Code to handle mobile responsive design */} + + {/* API for Document Management */} + {/* Code to integrate document signing API into application */} + + {/* Drag-and-Drop Document Upload */} + {/* Code to handle drag-and-drop functionality */} + + {/* Custom Signing Instructions */} + handleCustomInstructions(e.target.value)} /> + + {/* Link Expiration */} + {/* Code to handle link expiration */} +
+ ); +}; + +export default DocumentSigning; +``` \ No newline at end of file diff --git a/src/dspygen/experiments/react_code_gen/retrieve_and_generate_pipeline.py b/src/dspygen/experiments/react_code_gen/retrieve_and_generate_pipeline.py new file mode 100644 index 0000000..2bf14c2 --- /dev/null +++ b/src/dspygen/experiments/react_code_gen/retrieve_and_generate_pipeline.py @@ -0,0 +1,30 @@ +import os + +import inflection +from slugify import slugify + +from dspygen.dsl.dsl_pipeline_executor import execute_pipeline + + +def feature_code_generation(): + + context = execute_pipeline(f'{os.getcwd()}/data_gherkin_pipeline.yaml', + init_ctx={"file_path": f"{os.getcwd()}/features.csv"}) + + file_name = "hello-world" # slugify(f"{inflection.underscore(result['FeatureDescription'])}") + + with open(f"{file_name}.tsx", 'w') as f: + code = context.react_code + # remove trailing ``` if present + if code.endswith("```"): + code = code[:-3] + f.write(context.react_code) + print(f"React JSX code written to {file_name}") + + +def main(): + feature_code_generation() + + +if __name__ == '__main__': + main() diff --git a/src/dspygen/modules/gen_module.py b/src/dspygen/modules/gen_module.py new file mode 100644 index 0000000..46ed8e6 --- /dev/null +++ b/src/dspygen/modules/gen_module.py @@ -0,0 +1,51 @@ +import logging # Import the logging module +from typing import Optional + +from dspy import ChainOfThought, Module, OpenAI, settings + +logger = logging.getLogger(__name__) # Create a logger instance +logger.setLevel( + logging.ERROR +) # Set the logger's level to ERROR or the appropriate level + + +class GenModule(Module): + def __init__(self, output_key, input_keys: Optional[list[str]] = None, lm=None): + if input_keys is None: + self.input_keys = ["prompt"] + else: + self.input_keys = input_keys + + super().__init__() + + self.output_key = output_key + + # Define the generation and correction queries based on generation_type + self.signature = ", ".join(self.input_keys) + f" -> {self.output_key}" + self.correction_signature = ( + ", ".join(self.input_keys) + f", error -> {self.output_key}" + ) + + # DSPy modules for generation and correction + self.generate = ChainOfThought(self.signature) + self.correct_generate = ChainOfThought(self.correction_signature) + + def forward(self, **kwargs): + # Generate the output using provided inputs + gen_result = self.generate(**kwargs) + output = gen_result.get(self.output_key) + + # Try validating the output + try: + return self.validate_output(output) + except (AssertionError, ValueError, TypeError) as error: + logger.error(error) + logger.error(output) + # Correction attempt + corrected_result = self.correct_generate(**kwargs, error=str(error)) + corrected_output = corrected_result.get(self.output_key) + return self.validate_output(corrected_output) + + def validate_output(self, output): + # Implement validation logic or override in subclass + raise NotImplementedError("Validation logic should be implemented in subclass") diff --git a/src/dspygen/modules/gen_pydantic_class.py b/src/dspygen/modules/gen_pydantic_class.py new file mode 100644 index 0000000..76f4362 --- /dev/null +++ b/src/dspygen/modules/gen_pydantic_class.py @@ -0,0 +1,327 @@ +import inflection +from pydantic import BaseModel, Field + +import dspy +from dspy import InputField, OutputField, Signature + +from dspygen.modules.gen_pydantic_instance import GenPydanticInstance +from dspygen.typetemp.functional import render + + +class FieldTemplateSpecificationModel(BaseModel): + field_name: str = Field( + ..., + description="The name of the field in the model. No prefixes, suffixes, or abbreviations.", + ) + field_type: str = Field( + ..., + description="The data type of the field, e.g., 'str', 'int', 'EmailStr', or 'datetime'. No dict or classes.", + ) + default_value: str | int | None = Field( + "...", + description="The default value for the field if not provided. ", + ) + description: str = Field( + ..., + description="A detailed description of the field's purpose and usage.", + ) + constraints: str | None = Field( + None, + description="Constraints or validation rules for the field, if any. Specify as a string, e.g., 'min_length=2, max_length=50' or 'ge=0, le=120'.", + ) + + +class ConfigTemplateSpecificationModel(BaseModel): + title: str = Field( + ..., + description="The title for the BaseModel configuration.", + ) + description: str = Field( + ..., + description="A detailed description of the BaseModel configuration's purpose and usage.", + ) + allow_population_by_field_name: bool = Field( + True, + description="Whether to allow populating a model using field names.", + ) + underscore_attrs_are_private: bool = Field( + False, + description="Whether to treat underscore-prefixed attributes as private (no validation).", + ) + alias_generator: str = Field( + ..., + description="The alias generator to use for field aliasing.", + ) + + +class ValidatorTemplateSpecificationModel(BaseModel): + validator_name: str = Field( + ..., + title="Validator Name", + description="The name of the validator.", + ) + description: str = Field( + ..., + title="Description", + description="A detailed description of the validator's purpose and usage.", + ) + parameters: list[str] = Field( + [], + title="Parameters", + description="A list of parameter names accepted by the validator.", + ) + + +class PydanticClassTemplateSpecificationModel(BaseModel): + class_name: str = Field( + ..., + description="The class name of the Pydantic model.", + ) + description: str = Field( + ..., + description="A detailed description of the Pydantic model's purpose and usage.", + ) + fields: list[FieldTemplateSpecificationModel] = Field( + ..., + description="A list of field specifications for the model. Each field specifies the name, type, default value, description, and constraints. 15 fields max.", + ) + + +class_template_str = '''from pydantic import BaseModel, Field, validator, root_validator, EmailStr, UrlStr +from typing import List, Optional +from datetime import datetime + + +class {{ model.class_name }}(BaseModel): + """{{ model.description }}""" + {% for field in model.fields %} + {{ field.field_name | underscore }}: {{ field.field_type }} = Field(default={{ field.default_value }}, title="{{ field.title }}", description="{{ field.description }}"{% if field.constraints %}, {{ field.constraints }}{% endif %}) + {% endfor %} + + {% if model.validators|length > 0 %} + {% for validator in model.validators %} + @validator('{{ validator.parameters|join("', '") }}') + def {{ validator.validator_name }}(cls, value): + # {{ validator.description }} + return value + {% endfor %} + {% endif %} + {% if model.config %} + class Config: + {% if model.config.allow_population_by_field_name %}allow_population_by_field_name = True{% endif %} + {% if model.config.underscore_attrs_are_private %}underscore_attrs_are_private = True{% endif %} + {% if model.config.alias_generator %}alias_generator = {{ model.config.alias_generator }}{% endif %} + {% endif %} +''' + + +def write_pydantic_class_to_file(class_str, filename): + with open(filename, "w") as file: + file.write(class_str) + + +class PromptToPydanticInstanceSignature(Signature): + """Converts a prompt into Pydantic model initialization kwargs.""" + + root_pydantic_model_class_name = InputField( + desc="Class name of the Pydantic model for which `kwargs` are being generated." + ) + pydantic_model_definitions = InputField( + desc="Complete Python code string containing the class definitions of the target Pydantic model and any related models." + ) + prompt = InputField( + desc="Data structure and values to be converted into `kwargs` for the Pydantic model instantiation." + ) + root_model_kwargs_dict = OutputField( + prefix="kwargs_dict: dict = ", + desc="Python dictionary (as a string) representing the keyword arguments for initializing the Pydantic model. The dictionary is minimized in terms of whitespace and includes only JSON-compatible values.", + ) + + +class PromptToPydanticInstanceErrorSignature(Signature): + error = InputField( + desc="An error message indicating issues with previously generated `kwargs`, used to guide adjustments in the synthesis process." + ) + # Inheriting fields from PromptToPydanticInstanceSignature + root_pydantic_model_class_name = InputField( + desc="Class name of the Pydantic model to be corrected based on the error." + ) + pydantic_model_definitions = InputField( + desc="Python class definitions of the Pydantic model and any dependencies, provided as a string." + ) + prompt = InputField( + desc="Original natural language prompt, potentially adjusted to incorporate insights from the error message." + ) + root_model_kwargs_dict = OutputField( + prefix="kwargs_dict = ", + desc="Refined Python dictionary (as a string) for model initialization, adjusted to address the provided error message. Ensures minimized whitespace and JSON-compatible values.", + ) + + +# Example usage +def main(): + lm = dspy.OpenAI(max_tokens=1000) + dspy.settings.configure(lm=lm) + + model_prompt = "I need a verbose contact model named ContactModel from the friend of a friend ontology with 10 fields, each with length constraints" + + model_module = GenPydanticInstance( + root_model=PydanticClassTemplateSpecificationModel, + child_models=[FieldTemplateSpecificationModel], + ) + + model_inst = model_module.forward(model_prompt) + + # Render the Pydantic class from the specification + rendered_class_str = render(class_template_str, model=model_inst) + + # Write the rendered class to a Python file + write_pydantic_class_to_file( + rendered_class_str, f"{inflection.underscore(model_inst.class_name)}.py" + ) + + +icalendar_entities = { + "VEVENT": "This is one of the most commonly used components in iCalendar and represents an event.", + "VTODO": "Represents a to-do task or action item.", + "VJOURNAL": "Represents a journal entry or a note.", + "VFREEBUSY": "Represents information about the free or busy time of a calendar user.", + "VTIMEZONE": "Represents time zone information.", + "VAVAILABILITY": "Represents availability information for a calendar user.", + "VALARM": "Represents an alarm or reminder associated with an event or to-do.", +} + + +class GenPydanticClass(dspy.Module): + """A DSPy module that generates Pydantic class definition based on a prompt""" + + def forward(self, prompt: str, to_dir: str = "") -> str: + spec = dspy.Predict("prompt -> pydantic_class") + + + instance_module = GenPydanticInstance( + model=PydanticClassTemplateSpecificationModel, + generate_sig=PromptToPydanticInstanceSignature, + correct_generate_sig=PromptToPydanticInstanceErrorSignature, + ) + + instance = instance_module.forward(prompt) + + rendered_class_str = render(class_template_str, model=instance) + + if to_dir: + write_pydantic_class_to_file( + rendered_class_str, + f"{to_dir}/{inflection.underscore(instance.class_name)}.py", + ) + + return rendered_class_str + + +def generate_icalendar_models(): + for entity, description in icalendar_entities.items(): + # Define a Pydantic class dynamically for each entity + model_prompt = f"I need a model named {entity}Model that has all of the relevant fields for RFC 5545 compliance." + + model_module = GenPydanticInstance( + root_model=PydanticClassTemplateSpecificationModel, + child_models=[FieldTemplateSpecificationModel], + generate_sig=PromptToPydanticInstanceSignature, + correct_generate_sig=PromptToPydanticInstanceErrorSignature, + ) + + model_inst = model_module.forward(model_prompt) + + # Render the Pydantic class from the specification + rendered_class_str = render(class_template_str, model=model_inst) + + # Write the rendered class to a Python file + write_pydantic_class_to_file( + rendered_class_str, + f"ical/{inflection.underscore(model_inst.class_name)}.py", + ) + + print(f"{model_inst.class_name} written to {model_inst.class_name}.py") + + +from pydantic import BaseModel, Field + + +class GRDDDFLSSFramework(BaseModel): + digital_twin_integration: str = Field( + ..., + description="Represents the cumulative impact of real-time monitoring and predictive analytics on project management effectiveness. Calculus: Σ(RealTimeMonitoring(t) + PredictiveAnalytics(t)) over time t.", + ) + gp_optimization: str = Field( + ..., + description="Quantifies the continuous optimization of project management strategies over the project timeline. Calculus: ∫(AdaptationStrategies(t) * ResourceEfficiency(t)) dt from t0 to tf.", + ) + cp_compliance: str = Field( + ..., + description="Represents the multiplicative effect of adhering to quality standards and compliance measures across all project constraints. Calculus: ∏(QualityStandards(i) + ComplianceMeasures(i)) for each constraint i.", + ) + project_change_management: str = Field( + ..., + description="Quantifies the change in project efficiency as a result of analyzing interdependencies and optimizing interfaces over time. Calculus: Δ(ΣInterdependenciesAnalysis(i, t) + ΣInterfacesOptimization(i, t)) over all components i and time t.", + ) + digital_twin_semantic_enrichment: str = Field( + ..., + description="Indicates the use of semantic enrichment for advanced change management within digital twins. Impact: Enhances the digital twin's ability to manage change by identifying and visualizing complex interdependencies.", + ) + genetic_programming_adaptation_impact: str = Field( + ..., + description="Integral of adaptation strategies over time, highlighting the role of GP in adapting project management strategies. Calculus: ∫AdaptationStrategies(t) dt.", + ) + constraint_programming_quality_impact: str = Field( + ..., + description="Product of quality standards across constraints, underlining CP's role in ensuring project quality and compliance. Calculus: ∏QualityStandards(i).", + ) + change_management_interdependency_analysis: str = Field( + ..., + description="Change in efficiency due to interdependency analysis over time, integral to managing change within projects. Calculus: ΔΣInterdependenciesAnalysis(i, t).", + ) + change_management_interface_optimization: str = Field( + ..., + description="Change in efficiency due to interface optimization over time, crucial for effective change management in projects. Calculus: ΔΣInterfacesOptimization(i, t).", + ) + + +if __name__ == "__main__": + lm = dspy.OpenAI(max_tokens=3000) + dspy.settings.configure(lm=lm) + + prompt = """ +Develop a Full Stack application utilizing the GRDDDFLSSFramework to showcase the seamless integration of Design for Lean Six Sigma (DFLSS) methodologies within a Reactive Domain-Driven Design (RDD) environment. The project aims to create a secure, compliant, and operationally excellent software system by embedding DFLSS principles directly into the codebase, leveraging Python for its dynamic and expressive capabilities. + +### Project Overview + +The Full Stack application will serve as a dynamic reporting tool for analyzing and visualizing performance metrics, security vulnerabilities, and compliance adherence in real-time. It will feature a user-friendly interface for navigating through data, accompanied by a backend system that efficiently processes, stores, and retrieves information according to DFLSS principles. + +### Objectives + +- **Security Optimization**: Apply continuous security assessments and improvements to minimize vulnerabilities. +- **Compliance Assurance**: Ensure strict adherence to industry standards and regulatory requirements. +- **Operational Excellence**: Enhance system performance and reliability through DFLSS-driven continuous improvement. + +### Technical Specification + +- **Frontend**: Develop a responsive web interface using React, embedding DFLSS principles in component design and state management. +- **Backend**: Implement a Python-based server utilizing Flask, with domain models, services, and entities designed around RDD and DFLSS methodologies. +- **Database**: Integrate a PostgreSQL database, applying normalization and indexing strategies to optimize data retrieval and storage efficiency in compliance with DFLSS measures. + +### DFLSS Integration Calculus + +- **Define Phase**: Define security and compliance requirements using domain models, calculating the alignment with business objectives. + - \\( \text{Define}_{RDD} = \\sum (\text{DomainModels} + \text{SecurityAnnotations} + \text{ComplianceConstraints}) \\) +- **Measure Phase**: Instrument the system to log key performance metrics, identifying and addressing security vulnerabilities and compliance deviations. + - \\( \text{Measure}_{RDD} = \\int (\text{DomainEvents} \rightarrow \text{Log}( \text{PerformanceMetrics} + \text{SecurityVulnerabilities} + \text{ComplianceAdherence})) \\,dt \\) +- **Explore Phase**: Conduct domain-driven experiments to explore security configurations and compliance scenarios for system optimization. + - \\( \text{Explore}_{RDD} = \text{DomainExperiments}( \text{SecurityConfigurations} \times \text{ComplianceScenarios +""" + + model_module = GenPydanticInstance(root_model=GRDDDFLSSFramework) + model_inst = model_module(prompt=prompt) + print(model_inst) + + # generate_icalendar_models() + # main() diff --git a/src/dspygen/experiments/gen_pydantic_instance.py b/src/dspygen/modules/gen_pydantic_instance.py similarity index 100% rename from src/dspygen/experiments/gen_pydantic_instance.py rename to src/dspygen/modules/gen_pydantic_instance.py diff --git a/src/dspygen/modules/gen_python_primitive.py b/src/dspygen/modules/gen_python_primitive.py new file mode 100644 index 0000000..26c2a6d --- /dev/null +++ b/src/dspygen/modules/gen_python_primitive.py @@ -0,0 +1,148 @@ +import ast + +from dspy import Assert + +from dspygen.modules.gen_module import GenModule +from dspygen.utils.dspy_tools import init_dspy + + +def is_primitive_type(data_type): + primitive_types = {int, float, str, bool, list, tuple, dict, set} + + return data_type in primitive_types + + +class GenPythonPrimitive(GenModule): + def __init__(self, primitive_type, lm=None): + if not is_primitive_type(primitive_type): + raise ValueError( + f"primitive type {primitive_type.__name__} must be a Python primitive type" + ) + super().__init__(f"{primitive_type.__name__}_str_for_ast_literal_eval", lm) + self.primitive_type = primitive_type + + def validate_primitive(self, output) -> bool: + try: + return isinstance(ast.literal_eval(output), self.primitive_type) + except SyntaxError: + return False + + def validate_output(self, output): + Assert( + self.validate_primitive(output), + f"You need to create a valid python {self.primitive_type.__name__} " + f"primitive type for \n{self.output_key}\n" + f"You will be penalized for not returning only a {self.primitive_type.__name__} for " + f"{self.output_key}", + ) + data = ast.literal_eval(output) + + if self.primitive_type is set: + data = set(data) + return data + + def __call__(self, prompt): + return self.forward(prompt=prompt) + + +class GenDict(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=dict) + + +def gen_dict(prompt): + return GenDict()(prompt) + + +class GenList(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=list) + + +def gen_list(prompt): + return GenList()(prompt) + + +class GenBool(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=bool) + + +def gen_bool(prompt): + return GenBool()(prompt) + + +class GenInt(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=int) + + +def gen_int(prompt): + return GenInt()(prompt) + + +class GenFloat(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=float) + + +def gen_float(prompt): + return GenFloat()(prompt) + + +class GenTuple(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=tuple) + + +def gen_tuple(prompt): + return GenTuple()(prompt) + + +class GenSet(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=set) + + +def gen_set(prompt): + return GenSet()(prompt) + + +class GenStr(GenPythonPrimitive): + def __init__(self): + super().__init__(primitive_type=str) + + +def gen_str(prompt): + return GenStr()(prompt) + + +def main(): + init_dspy() + + result = gen_list( + "Create a list of planets in our solar system sorted by largest to smallest" + ) + + assert result == [ + "Jupiter", + "Saturn", + "Uranus", + "Neptune", + "Earth", + "Venus", + "Mars", + "Mercury", + ] + + print(f"The planets of the solar system are {result}") + + for planet in result: + print(planet) + + if gen_bool(f"Is {result[0]} the largest planet in the solar system?"): + print(f"{result[0]} is the largest planet in the solar system") + + +if __name__ == "__main__": + main() diff --git a/src/dspygen/rm/code_retriever.py b/src/dspygen/rm/code_retriever.py new file mode 100644 index 0000000..229bc99 --- /dev/null +++ b/src/dspygen/rm/code_retriever.py @@ -0,0 +1,92 @@ +import dspy +from pathlib import Path +from fnmatch import fnmatch + + +class CodeRetriever(dspy.Retrieve): + def __init__(self, path, gitignore=None): + super().__init__() + self.path = Path(path) + self.gitignore = Path(gitignore) if gitignore else self.path / ".gitignore" + self.gitignore_patterns = self.parse_gitignore(self.gitignore) + self.gitignore_patterns.add(".git") + + def parse_gitignore(self, gitignore_path): + if not gitignore_path.exists(): + return set() + + with gitignore_path.open("r", encoding="utf-8") as file: + patterns = set( + line.strip() for line in file if line.strip() and not line.startswith("#") + ) + return patterns + + def forward(self, query=None): + content = [] + for file_path in self.path.rglob("*"): + if ( + file_path.is_file() + and not self.is_ignored(file_path) + and (not query or self.is_filtered(file_path, query)) + and not self.is_binary(file_path) + ): + try: + with file_path.open("r", encoding="utf-8") as f: + file_content = f.read() + except UnicodeDecodeError: + continue + + file_info = self.extract_file_info(file_path) + content.append(file_info + file_content + "\n```\n\n") + + return dspy.Prediction(passages=content) + + def is_ignored(self, file_path): + relative_path = file_path.relative_to(self.path) + return any( + self.match_gitignore_pattern(relative_path, pattern) + for pattern in self.gitignore_patterns + ) + + def is_filtered(self, file_path, query): + return fnmatch(file_path.name, query) + + def is_binary(self, file_path): + try: + with open(file_path, "rb") as file: + return b"\x00" in file.read(1024) + except IOError: + return False + + def extract_file_info(self, file_path): + file_extension = file_path.suffix.lstrip('.') + file_info = f"## File: {file_path}\n\n```{file_extension}\n" + return file_info + + def match_gitignore_pattern(self, relative_path, pattern): + if pattern.startswith("/"): + if fnmatch(str(relative_path), pattern[1:]) or fnmatch(str(relative_path.parent), pattern[1:]): + return True + else: + if any(fnmatch(str(path), pattern) for path in [relative_path, *relative_path.parents]): + return True + return False + + +def main(): + path = "/Users/candacechatman/dev/dspygen/src/dspygen/" + gitignore = "/Users/candacechatman/dev/dspygen/.gitignore" # Optional + + code_retriever = CodeRetriever(path, gitignore) + result = code_retriever.forward("*pipeline.yaml") + # for file_content in result.passages: + # print(file_content) # Here, you can instead write to a Markdown file or process further. + + # If I want one file containing all the code snippets + with open("code_snippets.md", "w") as f: + for file_content in result.passages: + f.write(file_content) + + +if __name__ == '__main__': + main() diff --git a/src/dspygen/rm/web_retriever.py b/src/dspygen/rm/web_retriever.py new file mode 100644 index 0000000..46ce1e7 --- /dev/null +++ b/src/dspygen/rm/web_retriever.py @@ -0,0 +1,20 @@ +import dspy + + +class WebRetriever(dspy.Retrieve): + def __init__(self, source, **kwargs): + super().__init__() + + self.source = source + + def forward(self, query, **kwargs): + return None + + +def main(): + rm = WebRetriever(source="