diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e0cc78f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,27 @@ +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5a050c7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,154 @@ +### TODO: This was taken from a random repo. Update it to be more relevant to this project. +# debug +output/ +.dev/ +.vscode/ +..vscode/ +.git/ + +# package files +config.json +default_config.json + +# General +*.ipynb +.pytype +.DS_Store +.vscode +.idea +mypy_report +docs/build +docs/source/_build +tools/*.txt +playground/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ +.nox/ +*.pstats + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +__pycache__/ +*.pyc +*.pyo +*.pyd +*.pyc +*.so +*.egg-info/ +dist/ +build/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site +/docs-offline +/mkdocs-nav-online.yml +/mkdocs-nav-offline.yml + +# mypy +.mypy_cache/ + +# Snapshot testing report output directory +tests/snapshot_tests/output + +# Sandbox folder - convenient place for us to develop small test apps without leaving the repo +sandbox/ + +# Cache of screenshots used in the docs +.screenshot_cache + +# Used by mkdocs-material social plugin +.cache diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..58c63fb --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Quaternion Media + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index c5f0c2c..9cb53db 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,122 @@ -# CodeCartographer -Dev project for mapping code +# Codecarto: + +Development tool for mapping source code. + +Create graphs. + +Plot the graphs. + +Create JSON object of the graph. + +--- + +## Installation + +### From pypi: + +``` +python -m venv venv + +.\venv\Scripts\activate + +pip install codecarto +``` + +### From Git [dev use]: + +clone repo + +open terminal + +navigate to repo + +``` +python -m venv venv + +.\venv\Scripts\activate + +pip install -e . +``` + +--- + +## Usage + +### Help Information + +Check this first to see all usage information. + +``` +codecarto help +``` + +### Check output dir: + +To show the current output directory + +``` +codecarto output +``` + +### Change output: + +-s | --set : options can be used to set the output directory + +If directory does not exist, will ask if you'd like to make it + +``` +codecarto output -s DIR_PATH +``` + +### Demo: + +Parse the package source code + +``` +codecarto demo +``` + +### Passed file: + +Can pass a file or running script of source code in. + +``` +codecarto FILE_PATH +``` + +--- + +## Testing + +Can test the package using nox commands. + +### All Tests + +``` +nox +``` + +### Session Tests + +Test the use of package as an imported library. + +``` +nox -s unit_test +``` + +Test the package CLI commands. + +``` +nox -s test_dir +nox -s test_help +nox -s test_output +nox -s test_palette +nox -s test_palette_import +nox -s test_palette_export +nox -s test_palette_reset +nox -s test_palette_types +nox -s test_palette_new +nox -s test_demo +nox -s test_empty +nox -s test_file +``` diff --git a/docker-compose.debug.yml b/docker-compose.debug.yml new file mode 100644 index 0000000..bb7b60b --- /dev/null +++ b/docker-compose.debug.yml @@ -0,0 +1,17 @@ +# version: '3.4' + +# services: +# codecartographer: +# image: codecartographer +# build: +# context: . +# dockerfile: ./Dockerfile +# command: +# [ +# 'sh', +# '-c', +# "pip install debugpy -t /tmp && python /tmp/debugpy --wait-for-client --listen 0.0.0.0:5678 -m uvicorn src.codecarto\__init__:app --host 0.0.0.0 --port 2000", +# ] +# ports: +# - 2000:2000 +# - 5678:5678 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cc354db --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,37 @@ +version: '3' + +services: + web: + build: + context: . + dockerfile: ./src/codecarto/containers/web/Dockerfile + ports: + - '2000:2000' + networks: + - external_network + - internal_network + volumes: + - ./src/codecarto/containers/web/src:/app/src + + processor: + build: + context: . + dockerfile: ./src/codecarto/containers/processor/Dockerfile + networks: + - internal_network + + database: + image: mongo:latest + ports: + - '27017:27017' + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: examplepassword + networks: + - internal_network + +networks: + external_network: + driver: bridge + internal_network: + driver: bridge diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bc48852 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,59 @@ +[tool.poetry] +name = "codecarto" +version = "0.2.0" +homepage = "https://github.com/QuaternionMedia/codecarto" +description = "A tool used to analyze and graph source code." +authors = ['"Quaternion Media" '] +license = "MIT" +readme = "README.md" +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", +] +include = [ + { path = "docs/examples", format = "sdist" }, + { path = "tests", format = "sdist" }, +] + +[tool.poetry.scripts] +codecarto = "codecarto.cli.cli:run" + +[tool.poetry.dependencies] +python = "^3.8.1" +networkx = "^3.1" +numpy = "^1.24.2" +matplotlib = "^3.7.1" +scipy = "^1.7.3" +importlib-metadata = "^5.2.0" +click = {version = ">=8.1.2", optional = true} +fastapi = "^0.70.0" +python-multipart = ">=0.0.6" +mpld3 = "^0.5.5" +trogon = "^0.4.0" + +# these lines can make cli and library separate +# pip install codecarto will enable library usage +# pip install codecarto[cli] will enable cli usage as weell +# [tool.poetry.extras] +# cli = ["click>=8.1.2"] + +[tool.poetry.group.dev.dependencies] +nox = "^2022.11.21" +pytest = "^7.1.3" +pytest-cov = "^2.12.1" +pytest-xdist = "^2.4.0" +pytest-profiling = "^1.7.0" +black = "^23.1.0" +flake8 = "^6.0.0" +mkdocs = "^1.4.2" +mkdocstrings = {extras = ["python"], version = "^0.20.0"} +mkdocs-material = "^9.0.11" +mkdocs-exclude = "^1.0.2" +python-dotenv = "^0.19.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/src/codecarto/README.md b/src/codecarto/README.md new file mode 100644 index 0000000..6c7b24a --- /dev/null +++ b/src/codecarto/README.md @@ -0,0 +1,15 @@ +For the moment, we have a 'local' and 'containers' folder to separate the two versions of codecarto. +Local is intended to be run as a local package. Containers as a web service. +Local was initially intended to be a CLI version and library for use in other projects. + +At some point these will be merged and CLI will call the api in the containers. +The core logic will be moved to 'core' or something alike. +the library version will be the 'core' logic (and possibly the api) + +So in the end, we will have 3 ways to use codecarto: + +- Web App: api calls to core logic +- CLI: api calls to core logic +- Library: core logic (and possibly api for use in other projects) + +For now, these are separated in the folders 'containers' and 'local'. diff --git a/src/codecarto/__init__.py b/src/codecarto/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/__init__.py b/src/codecarto/containers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/processor/Dockerfile b/src/codecarto/containers/processor/Dockerfile new file mode 100644 index 0000000..c520316 --- /dev/null +++ b/src/codecarto/containers/processor/Dockerfile @@ -0,0 +1,28 @@ +FROM tiangolo/uvicorn-gunicorn:python3.11 + +EXPOSE 2020 + +# Keeps Python from generating .pyc files in the container +ENV PYTHONDONTWRITEBYTECODE=1 + +# Turns off buffering for easier container logging +ENV PYTHONUNBUFFERED=1 + +# Install pip requirements +COPY ./src/codecarto/containers/processor/requirements.txt . +RUN python -m pip install -r requirements.txt + +# Directory +WORKDIR /app +COPY ./src/codecarto/containers/processor/api /app/api +COPY ./src/codecarto/containers/processor/src /app/src +ENV PYTHONPATH=/app + +# # Creates a non-root user with an explicit UID and adds permission to access the /app folder +# # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers +# RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app +# USER appuser + +# # During debugging, this entry point will be overridden. +# For more information, please refer to https://aka.ms/vscode-docker-python-debug +CMD ["gunicorn", "--bind", "0.0.0.0:2020", "-k", "uvicorn.workers.UvicornWorker", "api.main:app"] \ No newline at end of file diff --git a/src/codecarto/containers/processor/__init__.py b/src/codecarto/containers/processor/__init__.py new file mode 100644 index 0000000..2873a70 --- /dev/null +++ b/src/codecarto/containers/processor/__init__.py @@ -0,0 +1,71 @@ +# API folder holds logic for the API endpoints and routers. +# These are called through browsers or other applications. + +# These will do the MAIN functionality of the application alone. +# Things like config set up and output directories are not necessary +# since the server will have fixed output directories and configurations. + +# The main functions the API will have access to are: +# - Converting any input data to GraphData (PolyGraph) +# - Parsing source code to a GraphData object +# - Passing plot layouts and themes +# TODO: how will we save these on the server? +# Will need to have access to them when plotting +# could we somehow have layout and theme objects passed in to the plotter? +# - Plotting a GraphData object to an image +# - ? Analyzing the graph +# TODO: this part actually may be handled in TechOps) +# I can't think of anything else that the API will need to do. + + +################## Performance Metrics ########################## +# Performance Metrics: +# This includes timing how long it takes to process each request, how long it takes +# to parse each file, etc. This can help you find any performance bottlenecks in your code. +# Usage Metrics: +# This includes how often each endpoint is hit, how many files are uploaded, how large the +# files are, etc. This can help you understand how your API is being used and plan for scaling. +# Error Metrics: +# Track the number and type of errors that occur. This can help you identify the most common +# problems and prioritize fixes. + + +################## Abusive Request Protection ################### +# Rate limiting: +# This is to prevent a single user from overwhelming your server by sending +# too many requests in a short period of time. You can use the slowapi library +# to apply rate limiting in FastAPI. +# File Type Checks: +# You may want to validate the file type of uploaded files. This can prevent +# users from uploading potentially malicious files. +# Error Handling: +# Providing clear and user-friendly error messages can help users understand what +# went wrong if their request fails. However, be careful not to provide too much +# detail in error messages, as this could provide useful information to an attacker. +# Logging and Monitoring: +# Keeping logs of API usage can help you understand how your API is used and identify +# potential security issues. Monitoring API usage can help you identify unusual patterns +# that may indicate a security issue. +# Authentication and Authorization: +# Depending on your use case, you might want to require users to authenticate +# (log in) before they can use your API, and limit what each user is authorized +# to do based on their role or permissions. +# Input Sanitization: +# This involves cleaning the input to prevent injection attacks. This is especially +# important if you're passing the user's input to a command line operation, or using +# it to generate SQL queries, etc. +# Timeouts: +# If the parsing of the file takes too long, you may want to abort the operation +# and return an error. This can prevent a user from unintentionally overwhelming +# your server with a very complex file. You can set a timeout for requests at the +# server level. For example, if you're using uvicorn as your ASGI server, you can +# set the timeout like this: +# uvicorn main:app --timeout 30 # 30 seconds +# It means the server will automatically stop processing any request that takes +# longer than 30 seconds. +# Secure Transmission: +# If your API is accessible over the internet, you should enforce HTTPS to ensure that +# data is transmitted securely. You could use a HTTPS reverse proxy, such as Nginx or +# Apache, to handle the HTTPS part. Basically, you configure your server to handle HTTPS +# and then forward the requests to your FastAPI application. Another alternative would +# be using a cloud platform like AWS or GCP, they provide options to set up HTTPS. diff --git a/src/codecarto/containers/processor/api/__init__.py b/src/codecarto/containers/processor/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/processor/api/main.py b/src/codecarto/containers/processor/api/main.py new file mode 100644 index 0000000..7556c2d --- /dev/null +++ b/src/codecarto/containers/processor/api/main.py @@ -0,0 +1,31 @@ +from fastapi import FastAPI, Request, HTTPException +from fastapi.responses import JSONResponse + +from .routers.palette_router import PaletteRoute +from .routers.plotter_router import PlotterRoute +from .routers.parser_router import ParserRoute +from .routers.polygraph_router import PolyGraphRoute + +# Debug +import logging + +logging.basicConfig(level=logging.INFO) + +# Create the app +app = FastAPI() + + +# Catch all exceptions +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + return JSONResponse( + status_code=exc.status_code, + content={"message": exc.detail}, + ) + + +# Add the routers +app.include_router(PaletteRoute, prefix="/palette", tags=["palette"]) +app.include_router(PlotterRoute, prefix="/plotter", tags=["plotter"]) +app.include_router(ParserRoute, prefix="/parser", tags=["parser"]) +app.include_router(PolyGraphRoute, prefix="/polygraph", tags=["polygraph"]) diff --git a/src/codecarto/containers/processor/api/routers/__init__.py b/src/codecarto/containers/processor/api/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/processor/api/routers/palette_router.py b/src/codecarto/containers/processor/api/routers/palette_router.py new file mode 100644 index 0000000..5072dfd --- /dev/null +++ b/src/codecarto/containers/processor/api/routers/palette_router.py @@ -0,0 +1,127 @@ +import os +import traceback +from json import load +from fastapi import APIRouter, HTTPException +from fastapi.responses import JSONResponse + +from src.plotter.palette import Theme +from api.util import generate_return, proc_exception + +PaletteRoute: APIRouter = APIRouter() +default_palette_path: str = "src/plotter/default_palette.json" +temp_palette_path: str = "src/plotter/palette.json" + +# TODO: This is a temporary solution, +# it should be coming from src.plotter.palette.get_palette_data() +# or actually, since data is actually in the database, we'll need to load database data and pass +debug: bool = True + + +@PaletteRoute.get( + "/get_palette", +) +async def get_palette(user_id: int = -1) -> dict: + try: + # TODO: DEBUG - temporary solution + if debug == True: + file_path = temp_palette_path + # check if file exists + if not os.path.exists(file_path): + file_path = default_palette_path + + # Open the palette file + with open(file_path, "r") as f: + pal_data = load(f) + + return generate_return("success", "Proc - Success", pal_data) + except Exception as e: + proc_exception( + "get_palette", + "Could not fetch palette data", + {"user_id": user_id}, + e, + ) + + +@PaletteRoute.get("/set_palette") +async def set_palette(user_id: int = -1, new_pal_data: dict = {}) -> dict: + try: + if new_pal_data != {}: + # If user, look up user palette id + if user_id != -1: + # get_user_from_database(user_id) + pass + + # save_palette_to_database(user_id, new_pal_data) + + # TODO: DEBUG - temporary solution + if debug == True: + file_path = temp_palette_path + # check if file exists + if not os.path.exists(file_path): + # create the file from the default palette + with open(default_palette_path, "r") as f: + pal_data = load(f) + + with open(file_path, "w") as f: + f.write(pal_data) + + # Save the new palette data + with open(file_path, "w") as f: + f.write(new_pal_data) + pal_data = new_pal_data + + return pal_data + else: + proc_exception( + "set_palette", + "No new palette data provided", + {"user_id": user_id, "new_pal_data": new_pal_data}, + ) + except Exception as e: + proc_exception( + "set_palette", + "Could not set palette data", + {"user_id": user_id, "new_pal_data": new_pal_data}, + e, + ) + + +@PaletteRoute.get("/reset_palette") +async def update_palette(user_id: int = -1, new_type: Theme = {}) -> dict: + try: + # If user, look up user palette id + if user_id != -1: + # get_user_from_database(user_id) + pass + + # load_palette_from_database(user_id) + + # TODO: DEBUG - temporary solution + if debug == True: + file_path = temp_palette_path + # check if file exists + if not os.path.exists(file_path): + file_path = default_palette_path + + # Open the palette file + with open(file_path, "r") as f: + pal_data = load(f) + + # Check if new_type is in pal_data + if new_type in pal_data: + # Set the new type to existing type + pal_data[new_type.base] = new_type + + # Save the new palette data + with open(file_path, "w") as f: + f.write(pal_data) + + return pal_data + except Exception as e: + proc_exception( + "update_palette", + "Could not add new palette type", + {"user_id": user_id, "new_type": new_type}, + e, + ) diff --git a/src/codecarto/containers/processor/api/routers/parser_router.py b/src/codecarto/containers/processor/api/routers/parser_router.py new file mode 100644 index 0000000..f7f62f4 --- /dev/null +++ b/src/codecarto/containers/processor/api/routers/parser_router.py @@ -0,0 +1,220 @@ +import httpx +from fastapi import APIRouter, HTTPException + +from api.util import generate_return, proc_exception + +# DEBUG +import logging + +logger = logging.getLogger(__name__) + +# Create a router +ParserRoute = APIRouter() + + +@ParserRoute.get("/parse") +async def parse(): + pass + + +@ParserRoute.get("/handle_github_url") +async def handle_github_url(github_url: str) -> dict: + try: + client = httpx.AsyncClient() + logger.info( + f" Started Proc.handle_github_url(): github_url - {github_url}" + ) + # check that the url is a github url + if "github.com" not in github_url: + proc_exception( + "handle_github_url", + "URL is not a valid GitHub URL", + {"github_url": github_url}, + status=404, + ) + + # Extract owner and repo from the URL + # Assuming the URL is like: https://github.com/owner/repo + parts = github_url.split("/") + if len(parts) < 5 or parts[2] != "github.com": + proc_exception( + "read_github_file", + "Invalid GitHub URL format", + {"github_url": github_url}, + ) + owner, repo = parts[3], parts[4] + + # get content from url + url_content: list[dict] = await read_github_content(github_url, owner, repo) + if not url_content: + proc_exception( + "handle_github_url", + "Empty file content received from GitHub", + {"github_url": github_url}, + ) + + # url_content = await fetch_directory(github_url) + repo_contents = { + "package_owner": owner, + "package_name": repo, + "contents": {}, + } + logger.info(f" Started Proc.parse_github_content(): {owner}/{repo}") + repo_contents["contents"]: dict = await parse_github_content( + url_content, owner, repo + ) + logger.info(f" Finished Proc.parse_github_content()") + if repo_contents: + return generate_return( + "success", "Proc.handle_github_url() - Success", repo_contents + ) + else: + proc_exception( + "handle_github_url", + "Could not parse file content", + {"github_url": github_url}, + ) + except HTTPException as exc: + # Handle network errors + proc_exception( + "handle_github_url", + "An error occurred while requesting", + {"github_url": github_url}, + exc, + ) + except Exception as exc: + proc_exception( + "handle_github_url", + "Error when handling GitHub URL", + {"github_url": github_url}, + exc, + ) + finally: + await client.aclose() + logger.info(f" Finished Proc.handle_github_url()") + + +async def read_github_content( + url: str, owner: str, repo: str, path: str = "" +) -> list[dict]: + try: + import os + from dotenv import load_dotenv + + client = httpx.AsyncClient() + logger.info(f"Started Proc.read_github_content(): {url}") + + # Construct the API URL + api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + load_dotenv() + git_api_key = os.getenv("GIT_API_KEY") + headers = { + "Accept": "application/vnd.github.v3+json", + # Uncomment and set your token if you have one + "Authorization": git_api_key, + } + + response = await client.get(api_url, headers=headers, follow_redirects=False) + + if response.status_code == 200: + json_data = response.json() + if not json_data: + proc_exception( + "read_github_content", + "No data returned from GitHub API for UR", + {"url": url, "api_url": api_url}, + ) + + # Remove unnecessary data from the response + # this will leave us with {name, path, size, html_url, download_url, type} + # html url is the url to view the file in the browser + # download url is the url to see just the raw file contents + for item in json_data: + item.pop("sha", None) + item.pop("url", None) + item.pop("git_url", None) + item.pop("_links", None) + + logger.info(f" json_data: {json_data}") + return json_data + else: + if response.status_code == 404: + proc_exception( + "read_github_content", + "GitHub API returned 404", + {"url": url, "api_url": api_url}, + HTTPException, + 404, + ) + else: + proc_exception( + "read_github_content", + "Error with client response", + {"url": url, "status_code": response.status_code}, + ) + except httpx.RequestError as exc: + proc_exception( + "read_github_content", + "Error while attempting to set up request url & headers", + {"url": url}, + exc, + ) + finally: + logger.info(f"Finished Proc.read_github_content(): {url}") + + +async def parse_github_content(file_content, owner, repo) -> dict: + try: + # Check that the file content is a list + if not file_content or not isinstance(file_content, list): + proc_exception( + "parse_github_content", + "Invalid file content format", + {"file_content": file_content}, + ) + + # Process directories + results = {} + directories: list = [item for item in file_content if item["type"] == "dir"] + files: list = [item for item in file_content if item["type"] == "file"] + + for dir in directories: + dir_content = await read_github_content("", owner, repo, dir["path"]) + parsed_dir_content = await parse_github_content(dir_content, owner, repo) + dir_name = dir["name"] + results[dir_name] = parsed_dir_content + + # Process files + top_files = [] + for file in files: + top_files.append(file) + if top_files and len(top_files) > 0: + results["files"] = top_files + + return results + except Exception as exc: + proc_exception( + "parse_github_content", + "Error when parsing GitHub content", + {"owner": owner, "repo": repo}, + exc, + ) + + +def raw_to_graph(raw_data: str, filename: str): + try: + logger.info(f" Started Proc.text_to_json()") + from src.parser.parser import Parser + + parser = Parser(None, {"raw": raw_data, "filename": filename}) + graph = parser.graph + return graph + except Exception as exc: + proc_exception( + "text_to_json", + "Error when transforming raw data to JSON", + {"raw_data": raw_data}, + exc, + ) + finally: + logger.info(f" Finished Proc.text_to_json()") diff --git a/src/codecarto/containers/processor/api/routers/plotter_router.py b/src/codecarto/containers/processor/api/routers/plotter_router.py new file mode 100644 index 0000000..7db5f54 --- /dev/null +++ b/src/codecarto/containers/processor/api/routers/plotter_router.py @@ -0,0 +1,337 @@ +from fastapi import APIRouter, Request +import networkx as nx +import matplotlib.pyplot as plt +import mpld3 +import matplotlib.lines as mlines + +from api.util import generate_return, proc_exception + +PlotterRoute: APIRouter = APIRouter() + + +@PlotterRoute.get( + "/plot", +) +async def plot( + request: Request, + graph_data: dict = None, + file: str = None, + url: str = None, + layout: str = "Spring", + grid: bool = False, + labels: bool = False, + ntx: bool = True, + custom: bool = True, + palette: dict = None, + debug: bool = False, +): + """Plot a graph. + + Parameters: + ----------- + request : Request + The request object. + graph_data : dict + The graph data. JSON format. + file : str + The file to parse and plot. + url : str + The url to parse and plot. + layout : str + The name of the layout to plot. + Used to plot a single layout. + grid : bool + Whether to plot all plot layouts in a grid. + labels : bool + Whether to plot the graph with labels. + ntx : bool + Whether to use the networkx layouts. + custom : bool + Whether to use the custom layouts. + palette: dict + The palette to use for plotting. + debug: bool + Whether to run long process vs short process. + + Returns: + -------- + dict + The results of the plot. {index: plot html} + """ + + # TODO: DEBUG - This is a demo file + try: + results: dict = {} + filename: str = "" + if debug: + graph = nx.Graph() + # add nodes with a type and label attribute + graph.add_nodes_from( + [ + (1, {"type": "file", "label": "1"}), + (2, {"type": "file", "label": "2"}), + (3, {"type": "file", "label": "3"}), + (4, {"type": "file", "label": "4"}), + (5, {"type": "file", "label": "5"}), + (6, {"type": "file", "label": "6"}), + (7, {"type": "file", "label": "7"}), + (8, {"type": "file", "label": "8"}), + ] + ) + graph.add_edges_from( + [ + (1, 2), + (1, 3), + (2, 3), + (2, 4), + (3, 4), + (5, 6), + (5, 7), + (6, 7), + (6, 8), + (7, 8), + ] + ) + else: + from src.parser.parser import Parser + + # Convert the graph data to a networkx graph + graph: nx.DiGraph = None + if not graph_data: # if no graph, run demo + if url: + from .polygraph_router import read_raw_data_from_url + + filename = url.split("/")[-1] + raw_data: str = await read_raw_data_from_url(url) + parser: Parser = Parser( + source_dict={"raw": raw_data, "filename": filename} + ) + graph = parser.graph + elif file: + import os + + py_file_path = file + if not os.path.exists(py_file_path): + return {"error": "File not found."} + filename = os.path.basename(py_file_path) + parser: Parser = Parser(source_files=[py_file_path]) + graph = parser.graph + else: + filename = "Demo Graph" + graph = nx.DiGraph(graph_data) + + if layout.lower() == "all": + results = grid_plot(graph) + else: + results = single_plot(graph=graph, title=layout, file_name=filename) + return generate_return("success", "Proc - Plot generated successfully", results) + except Exception as e: + proc_exception( + "plot", + "Could not generate plot", + { + "graph_data": graph_data, + "file": file, + "layout": layout, + }, + e, + ) + + +def single_plot(graph: nx.Graph, title: str = "Sprial", file_name: str = "Fib Demo"): + """Plot a graph. + + Parameters: + ----------- + graph : nx.Graph + The graph to plot. + title : str + The name of the layout to plot. + Used to plot a single layout. + file_name : str + The name of the file to plot. + + Returns: + -------- + dict + The results of the plot. {index: plot html} + """ + # create a simple plot + fig, ax = plt.subplots(figsize=(15, 10)) + ax.set_title(f"{title} Layout for '{file_name}'") + ax.set_axis_off() + + # positions + pos = get_node_positions(graph, f"{title.lower()}_layout") + + # nodes + nx.drawing.draw_networkx_nodes( + graph, + pos, + nodelist=graph.nodes, + ax=ax, + ) + + # labels + nx.drawing.draw_networkx_labels( + graph, + pos, + labels=nx.get_node_attributes(graph, "label"), + font_size=12, + font_color="black", + ax=ax, + ) + + # edges + nx.drawing.draw_networkx_edges( + graph, + pos, + edgelist=graph.edges, + width=2, + alpha=0.5, + edge_color="black", + ax=ax, + ) + + # convert to html + plt.tight_layout() + plot_html = mpld3.fig_to_html( + fig, + template_type="simple", + figid="pltfig", + d3_url=None, + no_extras=False, + use_http=False, + include_libraries=True, + ) + return plot_html + + +def grid_plot(graph: nx.DiGraph = None): + import math + + layouts: list[str] = [ + "circular", + "spiral", + # "spring", + "shell", + # "spectral", + "sorted_Square", + ] + + # create a grid plot + num_layouts = len(layouts) + grid_size = math.ceil(math.sqrt(num_layouts)) + + fig, axs = plt.subplots( + grid_size, + grid_size, + figsize=(grid_size * 15, grid_size * 10), + ) + fig.set_size_inches(18.5, 9.5) # TODO: try to size in css + + idx: int = 0 + for layout_name in layouts: + # ax + ax = axs[idx // grid_size, idx % grid_size] if grid_size > 1 else axs + ax.set_title(f"{str(layout_name).capitalize()} Layout") + + # positions + pos = get_node_positions(graph, f"{layout_name.lower()}_layout") + + # nodes + nx.drawing.draw_networkx_nodes( + graph, + pos, + nodelist=graph.nodes, + ax=ax, + ) + + idx += 1 + + # labels + nx.drawing.draw_networkx_labels( + graph, + pos, + labels=nx.get_node_attributes(graph, "label"), + font_size=12, + font_color="black", + ax=ax, + ) + + # edges + nx.drawing.draw_networkx_edges( + graph, + pos, + edgelist=graph.edges, + width=2, + alpha=0.5, + edge_color="black", + ax=ax, + ) + + # convert to html + plt.tight_layout() + plot_html = mpld3.fig_to_html( + fig, + template_type="simple", + figid="pltfig", + d3_url=None, + no_extras=False, + use_http=False, + include_libraries=True, + ) + return plot_html + + +def get_node_positions(graph: nx.Graph, layout_name: str) -> dict: + """Gets the node positions for a given layout. + + Parameters: + ----------- + layout_name (str): + The name of the layout. + + Returns: + -------- + positions (dict): + The positions of nodes for layout. + """ + from src.plotter.positions import Positions + + position = Positions(True, True) + seed = -1 + layout_params = position.get_layout_params(layout_name) + layout_kwargs = {"G": graph} + for param in layout_params: + if param == "seed": + import random + + seed = random.randint(0, 1000) + layout_kwargs["seed"] = seed + elif param == "nshells" and layout_name == "shell_layout": + # Group nodes by parent + grouped_nodes: dict[str, list] = {} + for node, data in graph.nodes(data=True): + parent = data.get("parent", "Unknown") + if parent not in grouped_nodes: + grouped_nodes[parent] = [] + grouped_nodes[parent].append(node) + # Create the list of lists (shells) + shells = list(grouped_nodes.values()) + layout_kwargs["nshells"] = shells + elif param == "root" and layout_name == "cluster_layout": + # get the node at the very top + root = None + for node, data in graph.nodes(data=True): + if data.get("label", "") == "root": + root = node + break + layout_kwargs["root"] = root + elif param != "G": + # TODO: Handle other parameters here + pass + + # Compute layout positions + pos: dict = position.get_positions(layout_name, **layout_kwargs) + return pos diff --git a/src/codecarto/containers/processor/api/routers/polygraph_router.py b/src/codecarto/containers/processor/api/routers/polygraph_router.py new file mode 100644 index 0000000..2de7235 --- /dev/null +++ b/src/codecarto/containers/processor/api/routers/polygraph_router.py @@ -0,0 +1,108 @@ +import httpx +from fastapi import APIRouter + +from api.util import generate_return, proc_exception + +# DEBUG +import logging + +logger = logging.getLogger(__name__) + +# Create a router +PolyGraphRoute = APIRouter() + + +@PolyGraphRoute.get("/get_graph_desc") +async def get_graph_desc() -> dict: + try: + logger.info(f" Started Proc.get_graph_desc()") + from src.models.graph_data import get_graph_description + + graph_desc: dict = get_graph_description() + + return generate_return( + "success", + "Graph description successfully fetched from processor.", + graph_desc, + ) + except Exception as e: + proc_exception( + "get_graph_desc", + "Could not fetch graph description", + {}, + e, + ) + finally: + logger.info(f" Finished Proc.get_graph_desc()") + + +@PolyGraphRoute.get("/raw_to_json") +async def raw_to_json(file_url: str) -> dict: + try: + logger.info(f" Started Proc.raw_to_json(): file_url - {file_url}") + from .parser_router import raw_to_graph + + raw_data: str = await read_raw_data_from_url(file_url) + filename = file_url.split("/")[-1] + graph = raw_to_graph(raw_data, filename) + json_data = graph_to_json(graph) + return generate_return("success", "Proc - Success", json_data) + except Exception as exc: + proc_exception( + "raw_to_json", + "Error when converting raw data to JSON", + {"file_url": file_url}, + exc, + ) + finally: + logger.info(f" Finished Proc.raw_to_json()") + + +async def read_raw_data_from_url(url: str) -> str: + try: + logger.info(f" Started Proc.read_raw_data_from_url(): url - {url}") + if not url.endswith(".py"): + proc_exception( + "read_raw_data_from_url", + "URL is not a valid Python file", + {"url": url}, + ) + client = httpx.AsyncClient() + response = await client.get(url) + if response.status_code == 200: + return response.text + else: + proc_exception( + "read_raw_data_from_url", + "Could not read raw data from URL", + {"url": url}, + ) + except Exception as exc: + proc_exception( + "read_raw_data_from_url", + "Error when reading raw data from URL", + {"url": url}, + exc, + ) + finally: + await client.aclose() + logger.info(f" Finished Proc.read_raw_data_from_url()") + + +def graph_to_json(raw_graph) -> dict: + try: + logger.info(f" Started Proc.text_to_json(): raw_graph - {raw_graph}") + from src.polygraph.polygraph import PolyGraph + + polygraph = PolyGraph() + json_data = polygraph.graph_to_json_data(graph=raw_graph) + return json_data + except Exception as exc: + proc_exception( + "text_to_json", + "Error when transforming raw data to JSON", + {"raw_graph": raw_graph}, + exc, + ) + finally: + logger.info(f" Finished Proc.text_to_json()") diff --git a/src/codecarto/containers/processor/api/util.py b/src/codecarto/containers/processor/api/util.py new file mode 100644 index 0000000..6ed7fe7 --- /dev/null +++ b/src/codecarto/containers/processor/api/util.py @@ -0,0 +1,44 @@ +import logging + +logger = logging.getLogger(__name__) + + +def generate_return(status: str, message: str, results) -> dict: + logger.info(f"{message} - Results: {results}") + return { + "status": status, # success or error + "message": message, # friendly message + "results": results, # the actual results or the error message + } + + +def proc_exception( + called_from: str, + message: str, + params: dict = {}, + exc: Exception = None, + status: int = 500, +) -> dict: + import traceback + from fastapi import HTTPException + + # log the error and stack trace + error_message = f"Proc.{called_from}() - status: {status} - param: {params} - message: {message}" + logger.error(error_message) + if exc: + error_message = f"{error_message} - exception: {str(exc)}" + tbk_str = traceback.format_exception(type(exc), exc, exc.__traceback__) + tbk_str = "".join(tbk_str) + logger.error(tbk_str) + + # raise the exception + if status == 404: + raise HTTPException( + status_code=404, + detail=error_message, + ) + else: + raise HTTPException( + status_code=500, + detail=error_message, + ) diff --git a/src/codecarto/containers/processor/requirements.txt b/src/codecarto/containers/processor/requirements.txt new file mode 100644 index 0000000..0c1acfe --- /dev/null +++ b/src/codecarto/containers/processor/requirements.txt @@ -0,0 +1,13 @@ +# Processor dependencies +fastapi[all] +uvicorn[standard] +gunicorn +importlib-metadata +networkx +numpy +matplotlib +mpld3 +scipy +pydantic +requests +httpx \ No newline at end of file diff --git a/src/codecarto/containers/processor/src/__init__.py b/src/codecarto/containers/processor/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/processor/src/codecarto.py b/src/codecarto/containers/processor/src/codecarto.py new file mode 100644 index 0000000..28cff5e --- /dev/null +++ b/src/codecarto/containers/processor/src/codecarto.py @@ -0,0 +1,782 @@ +################################################################### +# THIS IS A COPY OF 'LOCAL' PROCESSOR.PY +# THIS IS JUST USED TO TEST PLOTTING AND JSON CONVERSION IN THE API +################################################################### +# EVERYTHING COMMENTED OUT FOR NOW BECAUSE THERE ARE A BUNCH OF +# REQUIRED REWORKS NEEDED FOR USING DATABASE INSTEAD OF LOCAL FILES +################################################################### + +pass + +# """ CodeCarto: A Python library for code visualization. """ + +# # CW: I talk myself back and forth for the need of this module. +# # I like having the functionality pulled into one place. +# # I don't like having a large file with a bunch of functions. + + +# # This will pull all the package functionality to the top level which allows +# # for easier use of the package. +# # The actual exporting of the functions happens in the top __init__.py file. +# # Doing this here also allows for tying togethr multiple functions, as well +# # as validating the data passed to the functions in a single place. + +# # API: API will call the functions from importing this file +# # and will be the only file that needs to be imported + +# # Lib: A local clone/fork/installation of the package will use +# # this file to call the functions from importing this file + +# # CLI: Using local CLI commands will call the functions from +# # importing this file + +# # CLI-API: CLI commands can be used to call the API functions +# # which in turn will reference the functions from this file + +# import os +# import networkx as nx +# from pydantic import BaseModel +# from .config.config import Config +# from .config.directory.package_dir import ( +# CODE_CARTO_PACKAGE_VERSION, +# PROCESSOR_FILE_PATH, +# ) +# from .config.directory.directories import print_all_directories +# from .config.directory.output_dir import ( +# get_output_dir, +# reset_output_dir, +# set_output_dir, +# get_last_dated_output_dirs, +# ) +# from .models.graph_data import GraphData, get_graph_description +# from .parser.parser import Parser +# from .parser.import_source_dir import get_all_source_files +# from .plotter.palette import Palette +# from .plotter.palette_dir import PALETTE_DIRECTORY +# from .plotter.plotter import Plotter +# from .plotter.positions import Positions +# from .polygraph.polygraph import PolyGraph +# from .processor import Processor +# from .utils.utils import ( +# check_file_path, +# get_date_time_file_format, +# load_json, +# save_json, +# ) + + +# class Theme(BaseModel): +# node_type: str +# base: str +# label: str +# shape: str +# color: str +# size: str +# alpha: str + + +# class ParserHandler: +# def __init__(self): +# self.parser: Parser = Parser() + +# def parse_source_files(self, source: str | list = None) -> nx.DiGraph: +# """Parses a source file or list of source files into a networkx graph. + +# Args: +# ----- +# source (str): The source file to get the source files from, then parse. \n +# Or source (list): The list of source files to parse. + +# Returns: +# -------- +# nx.DiGraph: The networkx graph. +# """ +# if source is None: +# raise ValueError("No source file or list of source files provided.") +# if isinstance(source, str): +# source = DirectoryHandler.get_source_files(source) +# parser: Parser = Parser(source) +# return parser.graph + + +# class PlotterHandler: +# def __init__(self): +# self.plotter: Plotter = Plotter() + +# def set_plotter_attrs( +# self, +# dirs: dict[str, str] = None, +# file_path: str = "", +# labels: bool = False, +# grid: bool = False, +# json: bool = False, +# show_plot: bool = False, +# single_file: bool = False, +# ntx_layouts: bool = True, +# custom_layouts: bool = True, +# ): +# """Sets the plotter attributes. + +# Parameters: +# ----------- +# dirs (dict): +# The directories to use for the plotter. +# file_path (str): +# The file path to use for the plotter. +# labels (bool): +# Whether or not to show the labels. +# grid (bool): +# Whether or not to show the grid. +# json (bool): +# Whether or not to save the json file. +# show_plot (bool): +# Whether or not to show the plot. +# single_file (bool): +# Whether or not to save the plot to a single file. +# ntx_layouts (bool): +# Whether or not to save the plot to a networkx file. +# custom_layouts (bool): +# Whether or not to save the plot to a custom file. +# """ +# self.plotter.dirs = dirs if dirs is not None else self.plotter.dirs +# self.plotter.file_path = ( +# file_path if file_path is not None else self.plotter.file_path +# ) +# self.plotter.labels = labels +# self.plotter.grid = grid +# self.plotter.json = json +# self.plotter.show_plot = show_plot +# self.plotter.single_file = single_file +# self.plotter.ntx_layouts = ntx_layouts +# self.plotter.custom_layouts = custom_layouts + +# def plot_graph( +# self, +# graph: GraphData, +# output_dir: str = None, +# file_name: str = None, +# specific_layout: str = "", +# grid: bool = False, +# json: bool = False, +# api: bool = False, +# ) -> dict[dict, str]: +# """Plots a graph representing code. + +# Parameters: +# ----------- +# graph (GraphData): +# The networkx graph to plot. +# output_dir (str): +# The directory to save the plot to. +# file_name (str): +# The name of the plot file. +# specific_layout (str): +# The specific layout to use for the plot. +# default (""): will plot all layouts. +# grid (bool): +# Whether or not to show the grid. +# json (bool): +# Whether or not to save the json file. +# api (bool): +# Whether or not the function is being called from the API. + +# Returns: +# -------- +# dict: +# The Json data and output directory. +# """ + +# # Validate the data +# if graph is None: +# raise ValueError("No graph provided.") +# if not api and output_dir is None: +# output_dir = get_output_dir() +# self.plotter.dirs["output_dir"] = output_dir +# if file_name is None: +# file_name = f"GraphPlot {get_date_time_file_format()}" + +# # Convert the GraphData object to a networkx graph +# graph = PolyGraph.graphdata_to_nx(GraphData) + +# # Set the plotter parameters +# self.plotter.grid = grid +# self.plotter.json = json + +# # Plot the graph +# self.plotter.plot(graph, specific_layout) + +# # Get the last dated folder in output folder +# last_date_folder = DirectoryHandler.get_last_dated_output_dirs() + +# # Get the json object +# json_file_path = last_date_folder + "/json/graph_data.json" +# json_data: dict = load_json(json_file_path) + +# # Return the json object and output directory of the plots +# return {"graph_data": json_data, "output_dir": self.plotter.dirs["output_dir"]} + +# def set_plot_output_dir(self, output_dir: str = None): +# """Sets the plot output directory. + +# Parameters: +# ----------- +# output_dir (str) Default = None: +# The directory to use. +# """ + +# # Validate the data +# if output_dir is None: +# raise ValueError("No output directory provided.") +# if not os.path.isdir(output_dir): +# raise ValueError("The provided output directory does not exist.") +# if not output_dir == self.plotter.dirs["output_dir"]: +# # Set the plot output directory +# self.plotter.dirs["output_dir"] = output_dir +# self.plotter.dirs["graph_code_dir"] = os.path.join(output_dir, "code") +# self.plotter.dirs["graph_json_dir"] = os.path.join(output_dir, "json") +# else: +# raise ValueError( +# "The provided output directory is already set as the plot output directory." +# ) + +# def reset_plot_output_dir(self): +# """Resets the plot output directory to the default output directory.""" +# from codecarto.processor import Directory as Dir + +# self.plotter.dirs = Dir.reset_output_dir(make_dir=True) + + +# class ModelHandler: +# def get_graph_description() -> dict: +# """Gets the graph description. + +# Returns: +# -------- +# dict: The graph description. +# """ +# return get_graph_description() + + +# class PolyGraphHandler: +# def __init__(self): +# """Converts an assortment of data objects to an nx.DiGraph object and vice versa.""" +# self.polygraph: PolyGraph = PolyGraph() + +# def graph_to_json_file_to_graph( +# self, graph: nx.DiGraph, json_file_path: str +# ) -> nx.DiGraph: +# """Converts a networkx graph to a json file and then back to a networkx graph from the json file.\n +# This is used to ensure that the json file is valid and can be converted back to a networkx graph. + +# Parameters: +# ----------- +# graph (networkx.classes.graph.Graph): +# The networkx graph to convert to json data. +# json_file_path (str): +# The path to save the json data to. + +# Returns: +# -------- +# networkx.classes.graph.Graph: The networkx graph generated from the saved json data. +# """ + +# # Validate the data +# if graph is None: +# raise ValueError("No graph provided.") +# if json_file_path is None: +# raise ValueError("No json file path provided.") + +# # Convert the networkx graph to a json data and save it to a json file +# json_data: dict = self.graph_to_json_data(graph) +# save_json(json_file_path, json_data) + +# # Convert the json file back to a networkx graph +# return self.json_file_to_graph(json_file_path) + +# def json_file_to_graph(self, json_file_path: str) -> nx.DiGraph: +# """Converts a json file to a networkx graph. + +# Parameters: +# ----------- +# json_file_path (str): +# The path to the json file to convert to a networkx graph. + +# Returns: +# -------- +# networkx.classes.graph.Graph: The networkx graph. +# """ + +# # Validate the data +# if json_file_path is None: +# raise ValueError("No json file path provided.") +# if not os.path.isfile(json_file_path): +# raise ValueError("The provided json file path does not exist.") + +# # Convert the json data to a networkx graph +# return self.json_data_to_graph(load_json(json_file_path)) + +# def graph_to_json_data(self, graph: nx.DiGraph) -> dict: +# """Converts a networkx graph to a json object. + +# Parameters: +# ----------- +# graph (networkx.classes.graph.Graph): +# The graph to convert to json. + +# Returns: +# -------- +# dict: The json object. +# """ + +# # Validate the data +# if graph is None: +# raise ValueError("No graph provided.") + +# return self.polygraph.graph_to_json_data(graph) + +# def json_data_to_graph(self, json_data: dict) -> nx.DiGraph: +# """Converts a json object to a networkx graph. + +# Parameters: +# ----------- +# json_data (dict): +# The json object to convert to a networkx graph. + +# Returns: +# -------- +# networkx.classes.graph.Graph: The networkx graph. +# """ + +# # Validate the data +# if json_data is None: +# raise ValueError("No json data provided.") + +# return self.polygraph.json_data_to_graph(json_data) + +# def source_code_to_json_data(self, source: str | list) -> dict: +# """Converts a source file or list of source files to a json serializable object. + +# Parameters: +# ----------- +# source (str): +# The source file to get the source files from, then convert to json. \n +# OR source (list): +# The list of source files to convert to json. + +# Returns: +# -------- +# dict: The json serializable object. +# """ +# if source is None: +# raise ValueError("No source file path provided.") +# if isinstance(source, str): +# source = DirectoryHandler.get_source_files(source) +# if isinstance(source, list): +# graph = Parser.parse_code(source) +# return self.polygraph.graph_to_json_data(graph) +# else: +# raise ValueError("'source' must be a file path or list of file paths.") + + +# class PaletteHandler: +# def __init__(self): +# self.palette = Palette() + +# def get_palette(self): +# """Get the data of the current palette. + +# Returns: +# -------- +# dict: A dictionary containing the data of the current palette. +# """ +# return self.palette.get_palette_data() + +# def set_palette(self, palette_file_path: str): +# """Sets the palette for plots. + +# Args: +# ----- +# palette_file_path (str): The path to the palette.json file to set. +# """ +# self.palette.load_palette(palette_file_path) + +# def reset_palette(self): +# """Resets the palette to the default package palette.""" +# self.palette.reset_palette() + +# def export_palette(self, path: str) -> str: +# """Gets the palette.json file. + +# Args: +# ----- +# path (str): The path to export the palette. + +# Returns: +# -------- +# str: The palette file. +# """ +# import os + +# if not os.path.exists(path): +# raise ValueError(f"Path {path} does not exist.") +# return self.palette.export_palette(path) + +# def import_palette(self, path: str): +# """Imports a palette.json file. + +# Args: +# ----- +# path (str): The path to import the palette from. +# """ +# import os + +# if not os.path.exists(path): +# raise ValueError(f"Path {path} does not exist.") +# self.palette.import_palette(path) + +# def create_new_theme(self, theme: Theme) -> dict: +# """Creates a new theme. + +# Args: +# ----- +# node_type (str): The node type to create a new theme for. +# base (str): The base color to use for the theme. +# label (str): The label color to use for the theme. +# shape (str): The shape color to use for the theme. +# color (str): The color to use for the theme. +# size (str): The size to use for the theme. +# alpha (str): The alpha to use for the theme. + +# Returns: +# -------- +# dict: The new theme. +# """ +# return self.palette.create_new_theme( +# theme.node_type, +# theme.base, +# theme.label, +# theme.shape, +# theme.color, +# theme.size, +# theme.alpha, +# ) + + +# class PositionHandler: +# def __init__(self, include_networkx: bool = True, include_custom: bool = True): +# self.layouts = Positions(include_networkx, include_custom) + +# def add_layout(self, name: str, function: callable, attributes: list): +# """Adds a layout to the list of available layouts. + +# Args: +# ----- +# name (str): The name of the layout to add. \n +# function (callable): The function to use for the layout. \n +# attributes (list): The attributes to use for the layout. +# """ +# self.layouts.add_layout(name, function, attributes) + +# def add_networkx_layouts(self): +# """Adds all networkx layouts to the list of available layouts.""" +# self.layouts.add_networkx_layouts() + +# def add_custom_layouts(self): +# """Adds all custom layouts to the list of available layouts.""" +# self.layouts.add_custom_layouts() + +# def get_layout_names(self) -> list: +# """Gets all layout names from the list of available layouts. + +# Returns: +# -------- +# list: The name of available layouts. +# """ +# return self.layouts.get_layout_names() + +# def get_positions(self, graph: nx.DiGraph, layout: str = "") -> dict: +# """Gets the positions of the nodes in the graph. + +# Args: +# ----- +# graph (nx.DiGraph): The networkx graph to get the positions of. \n +# layout (str): The layout to use to get the positions of the nodes. + +# Returns: +# -------- +# dict: The positions of the nodes in the graph. +# """ +# return self.layouts.get_positions(graph, layout) + +# def get_layouts(self) -> list: +# """Gets the available layouts. + +# Returns: +# -------- +# list: The available layouts. +# """ +# return self.layouts.get_layouts() + +# def get_layout(self, name: str) -> tuple: +# """Gets a layout from the list of available layouts. + +# Args: +# ----- +# name (str): The name of the layout to get. + +# Returns: +# -------- +# tuple: The layout. +# """ +# return self.layouts.get_layout(name) + + +# class ProcessorHandler: +# def process( +# self, +# source: str, +# api: bool = False, +# plot: bool = True, +# labels: bool = False, +# json: bool = False, +# grid: bool = False, +# show: bool = False, +# single_file: bool = False, +# output_dir: str = None, +# ) -> dict: +# """Parses the source code, creates a graph, creates a plot, creates a json file, and outputs the results. + +# Parameters: +# ----------- +# source (str): +# The source directory or source file to process. +# api (bool): +# Whether calling from api or not. +# single_file (bool): +# Whether to process a single file or the whole source file directory. +# plot (bool): +# Whether to plot the graph or not. +# labels (bool): +# Whether to show the labels or not. +# json (bool): +# Whether to save the json file or not. +# grid (bool): +# Whether to show the grid or not. +# show (bool): +# Whether to show the plot or not. (will interrupt the program until the plot is closed) +# RECOMMENDED TO KEEP '_show' FALSE. +# output_dir (str): +# The output directory to save the json file to. + +# Returns: +# -------- +# dict | None +# If called from the API dict is json object of the graph.\n +# If called locally dict is the paths to the output directory. +# 'version': +# the runtime version of the process. +# 'output_dir': +# the path to the output directory. +# 'version_dir': +# the path to the output/version directory. +# 'graph_dir': +# the path to the output/graph directory. +# 'graph_code_dir': +# the path to the output/graph/from_code directory. +# 'graph_json_dir': +# the path to the output/graph/from_json directory. +# 'json_dir': +# the path to the output/json directory. +# 'json_graph_file_path': +# the path to the output/json/graph.json file. +# """ +# from .processor import process + +# # Validate the source +# if not os.path.exists(source): +# raise ValueError(f"Source {source} does not exist.") +# if not os.path.isdir(source) and not os.path.isfile(source): +# raise ValueError(f"Source {source} is not a directory or file.") + +# if not api: +# return process( +# source, api, plot, labels, json, grid, show, single_file, output_dir +# ) +# else: +# return process(source=source, api=api, single_file=single_file) + + +# class DirectoryHandler: +# def __init__(self, source_files: list = None): +# self.source: list = source_files +# self.parser = None +# self.graph = None + +# def get_source_files(self, source_file: str = None) -> list: +# """Gets all source files from a source file or directory. + +# Args: +# ----- +# source_file (str): The source file or directory to get the source files from. + +# Returns: +# -------- +# list: The list of source files. +# """ +# _source_file: str = self.source[0] if source_file is None else source_file +# if _source_file is None: +# raise ValueError("No source provided.") +# return get_all_source_files(_source_file) + +# def get_output_dir(self) -> str: +# """Gets the output directory. + +# Returns: +# -------- +# str: The output directory. +# """ +# return get_output_dir() + +# def reset_output_dir(self) -> str: +# """Resets the output directory to the default package output.""" +# return reset_output_dir() + +# def set_output_dir(self, output_dir: str) -> str: +# """Sets the output directory. + +# Args: +# ----- +# output_dir (str): The output directory to set. +# """ +# return set_output_dir(output_dir) + +# def create_output_dir(self, make_dir: bool = False) -> str: +# """Setup the output directory. + +# Parameters: +# ----------- +# make_dir : bool +# Whether or not to make the output directory. If False, the output directory is set to 'RUN_TIME' filler string. + +# Returns: +# -------- +# str +# The path to the output directory. +# """ +# return reset_output_dir(make_dir) + +# def get_last_dated_output_dirs() -> dict: +# """Gets the last dated output directories. + +# Returns: +# -------- +# dict: The last dated output directories. +# """ +# return get_last_dated_output_dirs() + +# def get_processor_path(self) -> str: +# """Gets the processor.py path. + +# Returns: +# -------- +# str: The processor.py path. +# """ +# return PROCESSOR_FILE_PATH + +# def get_package_version(self) -> str: +# """Gets the package version. + +# Returns: +# -------- +# str: The package version. +# """ +# return CODE_CARTO_PACKAGE_VERSION + +# def get_palette_directory(self, default: bool = False) -> str: +# """Gets the palette directory. + +# Parameters: +# ----------- +# default : bool +# Whether to get the default palette directory or not. + +# Returns: +# -------- +# str: The palette directory. +# """ +# if default: +# return PALETTE_DIRECTORY["default"] +# else: +# return PALETTE_DIRECTORY["user"] + +# def print_all_directories(self) -> dict: +# """Prints all directories.""" +# return print_all_directories() + + +# class UtilityHandler: +# def __init__(self): +# self.utility = None + +# def get_date_time_file_format(self) -> str: +# """Gets the date time file format. + +# Returns: +# -------- +# str: The date time file format. +# """ +# return get_date_time_file_format() + +# def check_file_exists(self, file: str) -> bool: +# """Checks if a file exists. + +# Args: +# ----- +# file (str): The file to check. + +# Returns: +# -------- +# bool: Whether the file exists or not. +# """ +# return check_file_path(file) + + +# class LogHandler: +# from datetime import datetime + +# def __init__(self): +# self.log_handler = None + +# def log_duration( +# self, +# path: str = None, +# start: datetime = None, +# end: datetime = None, +# duration: float = None, +# ) -> None: +# """Logs process duration. + +# Parameters: +# ----------- +# path (str): +# The API router path. +# start (datetime): +# The start time of the process. +# end (datetime): +# The end time of the process. +# duration (float): +# The duration of the process. +# """ +# if path is None: +# raise ValueError("No path provided.") +# if start is None: +# raise ValueError("No start time provided.") +# if end is None: +# raise ValueError("No end time provided.") +# if duration is None: +# raise ValueError("No duration provided.") + +# # CW: Move this to a Logging sub module +# # connect to the database +# # insert error into ErrorLog table in database +# pass diff --git a/src/codecarto/containers/processor/src/models/__init__.py b/src/codecarto/containers/processor/src/models/__init__.py new file mode 100644 index 0000000..a42288e --- /dev/null +++ b/src/codecarto/containers/processor/src/models/__init__.py @@ -0,0 +1 @@ +# Models folder will hold data models for the application. \ No newline at end of file diff --git a/src/codecarto/containers/processor/src/models/graph_data.py b/src/codecarto/containers/processor/src/models/graph_data.py new file mode 100644 index 0000000..c38b4c1 --- /dev/null +++ b/src/codecarto/containers/processor/src/models/graph_data.py @@ -0,0 +1,59 @@ +from pydantic import BaseModel, Field + + +# This is a description of the Graph class accepted by the plotter. +# This way the client can send a general graph object and the server +# can convert it to a networkx graph object, the expected data. +# Doing so makes it easier to handle and validate the incoming data, +# as well as keeping the API purely HTTP/JSON without needing to +# serialize/deserialize complex objects. +class Edge(BaseModel): + id: int = None + type: str = "" + source: int = None + target: int = None + + +class Node(BaseModel): + id: int = None + type: str = "" + label: str = "" + base: str = "" + parent: int = None + children: list["Node"] = [] + edges: list[Edge] = [] + + +Node.update_forward_refs() + + +class GraphData(BaseModel): + nodes: dict[str, Node] = Field(..., alias="nodes") + edges: dict[str, Edge] = Field(..., alias="edges") + + +def get_graph_description() -> dict: + """Returns a description of the GraphData class. + + Returns: + -------- + dict: A description of the GraphData class. + """ + return { + "nodes": "Dictionary where each key is the ID of a node and the value is the node's data.", + "edges": "Dictionary where each key is the ID of an edge and the value is the edge's data.", + "node data": { + "id": "The node's ID.", + "type": "The type of the node.", + "label": "The node's label.", + "base": "The node's base.", + "parent": "The ID of the node's parent, or null if the node has no parent.", + "children": "List of the node's children. Each child is represented by its data.", + }, + "edge data": { + "id": "The edge's ID.", + "type": "The type of the edge.", + "source": "The ID of the edge's source node.", + "target": "The ID of the edge's target node.", + }, + } diff --git a/src/codecarto/containers/processor/src/parser/__init__.py b/src/codecarto/containers/processor/src/parser/__init__.py new file mode 100644 index 0000000..e856f80 --- /dev/null +++ b/src/codecarto/containers/processor/src/parser/__init__.py @@ -0,0 +1,2 @@ +# Parser holds the logic for parsing through source code. +# this includes directory information for source crawling diff --git a/src/codecarto/containers/processor/src/parser/import_source_dir.py b/src/codecarto/containers/processor/src/parser/import_source_dir.py new file mode 100644 index 0000000..c6e9d4d --- /dev/null +++ b/src/codecarto/containers/processor/src/parser/import_source_dir.py @@ -0,0 +1,111 @@ +import os + + +def find_starting_file(source_files: list) -> str: + """Find the starting file. + + Parameters: + ----------- + source_files : list + List of source files. + + Returns: + -------- + str + The starting file. + """ + # Prioritize user-specified starting file + user_specified_file = os.environ.get("STARTING_FILE") + if user_specified_file: + for source_file in source_files: + source_file_name = os.path.basename(source_file) + if source_file_name == user_specified_file: + if os.path.exists(source_file): + return source_file + + # Heuristics to guess the starting file + possible_starting_files = [ + "main.py", + "app.py", + "__init__.py", + "run.py", + "cli.py", + "application.py", + ] + for possible_starting_file in possible_starting_files: + for source_file in source_files: + source_file_name = os.path.basename(source_file) + if source_file_name == possible_starting_file: + if os.path.exists(source_file): + return source_file + + # If no Python files found, return None + return None + + +def find_top_level_directory(file_path) -> str: + """Returns the top level directory of the starting file. + + Parameters + ---------- + file_path : str + The path to the starting file. + + Returns + ------- + str + The top level directory of the starting file. + """ + current_dir = os.path.dirname(file_path) + last_dir_with_init = None + + while os.path.exists(os.path.join(current_dir, "__init__.py")): + last_dir_with_init = current_dir + parent_dir = os.path.dirname(current_dir) + if parent_dir == current_dir: + break + current_dir = parent_dir + + return last_dir_with_init or current_dir + + +def get_all_source_files(starting_file_path) -> list: + """Returns a list of all Python source files in the directory of the starting file. + + Parameters + ---------- + starting_file_path : str + The path to the starting file. + + Returns + ------- + list + A list of all Python source files in the directory of the starting file. + """ + top_level_directory = find_top_level_directory(starting_file_path) + + source_files: list = [] + for root, dirs, files in os.walk(top_level_directory): + filters = [".dev", ".git", ".env", "env", ".venv", "venv", "__pycache__", ".nox", ".pytest_cache", ".benchmarks"] + # remove dirs in filters list + dirs[:] = [d for d in dirs if d not in filters] + # remove files in filters list + files[:] = [f for f in files if f not in filters] + # add files to source_files list + for file in files: + if file.endswith(".py"): + source_files.append(os.path.join(root, file)) + return source_files + + +def get_file_source_directory(file_path) -> dict: + # source_files = get_all_source_files(file_path) + # top_level_directory = find_top_level_directory(file_path) + # starting_file = find_starting_file(top_level_directory) + + # return { + # "start_file": starting_file, + # "top_level_directory": top_level_directory, + # "source_files": source_files, + # } + pass diff --git a/src/codecarto/containers/processor/src/parser/parser.py b/src/codecarto/containers/processor/src/parser/parser.py new file mode 100644 index 0000000..4be8ff4 --- /dev/null +++ b/src/codecarto/containers/processor/src/parser/parser.py @@ -0,0 +1,2262 @@ +import ast +import networkx as nx +import os + +# Walk through the steps +# Note: Not every visitor will add a node to the graph. They will do one more more of the following: +# 1. Add a node to the graph +# 2. Add an edge to the graph +# 4. Act as a helper node for other nodes +# 5. Visit the node's children +# 6. Do nothing +# 1. loop through source files +# 1. add file to parsed files +# 2. get a tree of each file +# 3. parse the tree, which visits each ast.node found for each item in the tree representing the file. +# 1. The first visit should be to a module for each file. Files are modules. Module parents will be the root graph. +# 2. When Module is visited, it will visit it's children with 'generic_visit'. +# 3. The immediate children are typically imports, functions, or classes. +# - These would have a parent of the module. +# 4. Each child also calls the 'generic_visit' method, which will visit the child's children. +# - Their parents would be the child of the module. +# 5. Some children have children, like functions and classes, and some do not like 'pass' and 'break'. +# 1. Typical containing children are classes, functions, collections +# 2. Typical non-containing children are variables and constants +# - Imports will be considered barren, they do have children, but for this they act a connecting node to other module graphs +# - ImportFroms are similar to imports, but they will be a connecting node from the current module to the from module's object +# that we're importing +# - They need to be nodes in the current module graph, but are not the imported module graph itself +# - The imported obj will point to the import | importfrom node +# - The module that is importing will point to the import | importfrom node as well +# - The import | importfrom node will point to the obj using the imported obj in the module importing the obj +# - So Graph(module).node(import|importfram) -> Graph(module).node(imported item) +# - processor.py -> Processor -> main -> parser <- (from parser.py import Parser) <- processor.py +# - processor.py -> from models import graph_data -> models module graph -> graph_data +# - processor.py -> (from parser.py import Parser) <- Parser, a node in the parser module graph +# - parser.py -> Parser -> (from parser.py import Parser) <- processor.py +# 4. once we get back to the module graph, we can mark it as complete and add it to the root graph +# 5. next file + + +class Parser(ast.NodeVisitor): + """Parse a python source file into a networkx graph.""" + + # TODO: when we eventually add import and importFrom, they need to be the id of the Module they represent + # TODO: when this gets updated, do logic of option 'uno' + def __init__(self, source_files: list = None, source_dict: dict = None): + """Initialize the parser. + + Parameters: + ----------- + source_files : set + A set of source files to parse. + """ + # The graph to populate + self.source_files: list = source_files + self.graph: nx.DiGraph = nx.DiGraph() + # To track current elements + self.current_file: str = None # file + self.current_node: nx.DiGraph = None # node + self.current_type: str = None # type + # self.current_module: nx.DiGraph = None # module + # self.current_class: nx.DiGraph = None # class + # self.current_function: nx.DiGraph = None # function + self.current_parent: nx.DiGraph = None # for, while, if, etc. + # Create root and python nodes + self.root: nx.DiGraph = nx.DiGraph(name="root") + self.python: nx.DiGraph = nx.DiGraph(name="python") + self.add_start_nodes() + # Parse the source code + self.parsed_files: list = [] + self.text_to_json_filename: str = None + if source_files: + self.parse_list_of_files(source_files) + elif source_dict: + self.text_to_json_filename = source_dict["filename"] + self.parse_text(source_dict["raw"]) + + def add_start_nodes(self): + """Add root and python node to the graph.""" + # add the root node + self.graph.add_node( + id(self.root), type="Module", label="root", base="module", parent=None + ) + # add the python node + self.graph.add_node( + id(self.python), + type="Module", + label="python", + base="module", + parent=id(self.root), + ) + self.graph.add_edge(id(self.root), id(self.python)) + + def parse_list_of_files(self, source_files: list) -> nx.DiGraph: + """Parse the codes in the list. + + Parameters: + ----------- + source_files : list + A list of source files to parse. + """ + test = False + if test: + # TODO: for local testing purpose + _files = ["plotter.py"] + # loop through the list of source files + for file_path in source_files: + # TODO: testing purpose: check if base name of file_path in _files + if os.path.basename(file_path) in _files: + # Check if the file has already been parsed + if file_path in self.parsed_files: + continue + # Add the file to the parsed files + self.parsed_files.append(file_path) + # Parse the code + self.current_file = file_path + self.parse_code(file_path) + else: + # check if graph only has root and python nodes + if not source_files or len(source_files) == 0: + if len(self.graph.nodes) == 2: + # remove the root and python nodes + self.graph.remove_node(id(self.root)) + self.graph.remove_node(id(self.python)) + return None + + # loop through the list of source files + for file_path in source_files: + # Check if the file has already been parsed + if file_path in self.parsed_files: + continue + # Add the file to the parsed files + self.parsed_files.append(file_path) + # Parse the code + self.current_file = file_path + self.parse_code(file_path) + + def parse_text(self, source_text: str) -> nx.DiGraph: + """Parse the code in the specified file path. + + Parameters: + ----------- + source_text : str + The source text to parse. + """ + # Check params + if not source_text: + raise ValueError("Parser.parse_text: source_text is None") + if not self.text_to_json_filename: + raise ValueError("Parser.parse_text: filename is None") + + # Parse the code + tree = ast.parse(source_text, filename=self.text_to_json_filename) + # self.pretty_ast_dump(tree) + # Visit the tree + # this starts the decent through the file's code objects + if tree: + self.visit(tree) + else: + print(f"Parser.parse_text: tree is None for {self.text_to_json_filename}") + raise ValueError( + f"Parser.parse_text: tree is None for {self.text_to_json_filename}" + ) + + def parse_code(self, file_path) -> nx.DiGraph: + """Parse the code in the specified file path. + + Parameters: + ----------- + file_path : str + The path to the file to parse. + """ + # Parse the code + with open(file_path, "r") as with_file: + code = with_file.read() + tree = ast.parse(code, filename=file_path) + # self.pretty_ast_dump(tree) + # Visit the tree + # this starts the decent through the file's code objects + self.visit(tree) + + def pretty_ast_dump(self, node, indent=0): + """Pretty print the ast tree. + + Parameters: + ----------- + node : ast.AST + The ast node to print. + indent : int + The indent level. + """ + if isinstance(node, ast.AST): + node_name = node.__class__.__name__ + print(" " * indent + node_name) + + for field_name, field_value in ast.iter_fields(node): + print(" " * (indent + 1) + field_name + ":") + self.pretty_ast_dump(field_value, indent + 2) + elif isinstance(node, list): + for item in node: + self.pretty_ast_dump(item, indent) + else: + print(" " * indent + repr(node)) + + def create_new_node( + self, node_id: int, node_type: str, node_label: str, node_parent_id: int + ) -> nx.DiGraph: + """Create new node. + + Parameters: + ----------- + node_id : int + The id of the new node. + node_type : str + The type of the new node. + node_label : str + The label of the new node. + node_parent_id : int + The id of the parent new node. + """ + if not node_label: + node_label = f"{node_type} (u)" + _node = self.graph.add_node( + node_id, type=node_type, label=node_label, parent=node_parent_id + ) + self.graph.add_edge(node_parent_id, node_id) + return _node + + # Deprecated + # def visit_Bytes(self, node : ast.Bytes): + # def visit_Ellipsis(self, node : ast.Ellipsis): + # def visit_ExtSlice(self, node : ast.ExtSlice): + # def visit_Index(self, node : ast.Index): + # def visit_NameConstant(self, node : ast.NameConstant): + # def visit_Num(self, node : ast.Num): + # def visit_Str(self, node : ast.Str): + # def visit_Param(self, node: ast.Param): + + # region Mode + def visit_Expression(self, node: ast.Expression): + """Visit the expression node. + + Parameters: + ----------- + node : ast.Expression + The expression node to visit. + + Notes: + ------ + In the following "xsqur = x * x", the ast.Expression node represents "x * x". \n + While ast.Name.id represents 'xsqur'. + """ + return + # Add the expression node to the graph + self.create_new_node( + node_id=id(node), + node_type="Expression", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_FunctionType(self, node: ast.FunctionType): + """Visit the function type node. + + Parameters: + ----------- + node : ast.FunctionType + The function type node to visit. + + Notes: + ------ + In the following "def foo(x: int) -> int:", the ast.FunctionType node represents "x: int -> int". \n + While ast.Name.id represents 'foo'. + """ + return + # Add the function type node to the graph + self.create_new_node( + node_id=id(node), + node_type="FunctionType", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_Interactive(self, node: ast.Interactive): + """Visit the interactive node. + + Parameters: + ----------- + node : ast.Interactive + The interactive node to visit. + + Notes: + ------ + In the following "python -i", the ast.Interactive node represents "python -i". + """ + return + # Add the interactive node to the graph + self.create_new_node( + node_id=id(node), + node_type="Interactive", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_Module(self, node: ast.Module): + """Visit the module node. + + Parameters: + ----------- + node : ast.Module + The module node to visit. + + Notes: + ------ + ast.Module represents the entire python file. + """ + # Add the module node to the graph + if self.current_file: + node_label = os.path.basename(self.current_file) + elif self.text_to_json_filename: + node_label = self.text_to_json_filename + else: + node_label = "default_filename.py" + + self.create_new_node( + node_id=id(node), + node_type="Module", + node_label=node_label, + node_parent_id=id(self.root), + ) + # Set the current parent to the module node + self.current_parent = node + # self.current_module = node + # Visit the children of the Module + self.generic_visit(node) + + # endregion + + # region Literals + def visit_Constant(self, node: ast.Constant): + """Visit the constant node. + + Parameters: + ----------- + node : ast.Constant + The constant node to visit. + + Notes: + ------ + In the following "x = 1", the ast.Constant node represents "1". \n + While ast.Name.id represents 'x'. + """ + return + # Add the constant node to the graph + self.graph.add_node( + id(node), + type="Constant", + label=node.value, + base="literal", + parent=id(self.current_parent), + ) + # Add an edge from the current parent to the constant node + self.graph.add_edge(id(self.current_parent), id(node)) + # Set the current parent to the constant node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_Dict(self, node: ast.Dict): + """Visit the dict node. + + Parameters: + ----------- + node : ast.Dict + The dict node to visit. + + Notes: + ------ + In the following "x = {1: 2}", the ast.Dict node represents "{1: 2}". \n + While ast.Name.id represents 'x'. + """ + # I don't really want the 'Dict' node in the graph, + # but to say that it's children are of type Dict + # so set the current_type to Dict, visit children, + # then unset the current_type + + # Set the current type to Dict + self.current_type = "Dict" + # Visit the children + self.generic_visit(node) + # Unset the current type + self.current_type = None + + # # Add the dict node to the graph + # self.create_new_node( + # node_id=id(node), + # node_type="Dict", + # node_label="dict", + # node_parent_id=id(self.current_parent), + # ) + # # Set the current parent to the dict node + # self.current_parent = node + # # Visit the children + # self.generic_visit(node) + + def visit_FormattedValue(self, node: ast.FormattedValue): + """Visit the formatted value node. + + Parameters: + ----------- + node : ast.FormattedValue + The formatted value node to visit. + + Notes: + ------ + In the following "x = f'{1}'", the ast.FormattedValue node represents "'{1}'". \n + While ast.Name.id represents 'x'. + """ + return + # Add the formatted value node to the graph + self.create_new_node( + node_id=id(node), + node_type="FormattedValue", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_JoinedStr(self, node: ast.JoinedStr): + """Visit the joined string node. + + Parameters: + ----------- + node : ast.JoinedStr + The joined string node to visit. + + Notes: + ------ + In the following "x = f'{1}'", the ast.JoinedStr node represents "f'{1}'". \n + While ast.Name.id represents 'x'. + """ + return + # Add the joined string node to the graph + self.create_new_node( + node_id=id(node), + node_type="JoinedStr", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_List(self, node: ast.List): + """Visit the list node. + + Parameters: + ----------- + node : ast.List + The list node to visit. + + Notes: + ------ + In the following "x = [1, 2]", the ast.List node represents "[1, 2]". \n + While ast.Name.id represents 'x'. + """ + # I don't really want the 'List' node in the graph, + # but to say that it's children are of type List + # so set the current_type to List, visit children, + # then unset the current_type + + # Set the current type to List + self.current_type = "List" + # Visit the children + self.generic_visit(node) + # Unset the current type + self.current_type = None + + # # Add the list node to the graph + # self.create_new_node( + # node_id=id(node), + # node_type="List", + # node_label="list", + # node_parent_id=id(self.current_parent), + # ) + # # Set the current parent to the list node + # self.current_parent = node + # # Visit the children + # self.generic_visit(node) + + def visit_Set(self, node: ast.Set): + """Visit the set node. + + Parameters: + ----------- + node : ast.Set + The set node to visit. + + Notes: + ------ + In the following "x = {1, 2}", the ast.Set node represents "{1, 2}". \n + While ast.Name.id represents 'x'. + """ + # I don't really want the 'Set' node in the graph, + # but to say that it's children are of type Set + # so set the current_type to Set, visit children, + # then unset the current_type + + # Set the current type to Set + self.current_type = "Set" + # Visit the children + self.generic_visit(node) + # Unset the current type + self.current_type = None + + # # Add the set node to the graph + # self.create_new_node( + # node_id=id(node), + # node_type="Set", + # node_label="set", + # node_parent_id=id(self.current_parent), + # ) + # # Set the current parent to the set node + # self.current_parent = node + # # Visit the children + # self.generic_visit(node) + + def visit_Tuple(self, node: ast.Tuple): + """Visit the tuple node. + + Parameters: + ----------- + node : ast.Tuple + The tuple node to visit. + + Notes: + ------ + In the following "x = (1, 2)", the ast.Tuple node represents "(1, 2)". \n + While ast.Name.id represents 'x'. + """ + # I don't really want the 'Tuple' node in the graph, + # but to say that it's children are of type Tuple + # so set the current_type to Tuple, visit children, + # then unset the current_type + + # Set the current type to Tuple + self.current_type = "Tuple" + # Visit the children + self.generic_visit(node) + # Unset the current type + self.current_type = None + + # # Add the tuple node to the graph + # self.create_new_node( + # node_id=id(node), + # node_type="Tuple", + # node_label="tuple", + # node_parent_id=id(self.current_parent), + # ) + # # Set the current parent to the tuple node + # self.current_parent = node + # # Visit the children + # self.generic_visit(node) + + # endregion + + # region Variables + def visit_Name(self, node: ast.Name): + """Visit the name node. + + Parameters: + ----------- + node : ast.Name + The name node to visit. + + Notes: + ------ + In the following "x = 1", the ast.Name node represents 'x'. \n + While ast.Constant.value represents 1. + """ + # Add the name node to the graph + _type: str = None + if self.current_type: + _type = self.current_type + else: + _type = "Variable" + self.create_new_node( + node_id=id(node), + node_type=_type, + node_label=node.id, + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the name node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_Store(self, node: ast.Store): + """Visit the store node. + + Parameters: + ----------- + node : ast.Store + The store node to visit. + + Notes: + ------ + The ast.Store node indicates that this is a store context (i.e., the var is being assigned a value). \n + ast.Name.id represents the name of the variable being assigned. \n + ast.Constant.value represents the value being assigned to the variable. + """ + return + # Add the store node to the graph + self.create_new_node( + node_id=id(node), + node_type="Store", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_Starred(self, node: ast.Starred): + """Visit the starred node. + + Parameters: + ----------- + node : ast.Starred + The starred node to visit. + + Notes: + ------ + In the following "x = [*range(10)]", the ast.Starred node represents "*range(10)". \n + While ast.Name.id represents 'x'. + """ + return + # Add the starred node to the graph + self.create_new_node( + node_id=id(node), + node_type="Starred", + node_label=None, + node_parent_id=id(self.current_parent), + ) + # Visit the children + self.generic_visit(node) + + def visit_arg(self, node: ast.arg): + """Visit the arg node. + + Parameters: + ----------- + node : ast.arg + The arg node to visit. + + Notes: + ------ + In the following "def some_func(x):", the ast.arg node represents 'x'. \n + While ast.Name.id represents 'some_func'. + """ + # Add the arg node to the graph + _type: str = None + if self.current_type: + _type = self.current_type + else: + _type = "Variable" + self.create_new_node( + node_id=id(node), + node_type=_type, + node_label=node.arg, + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the arg node + self.current_parent = node + + # endregion + + # region Expressions + def visit_Attribute(self, node: ast.Attribute): + """Visit the attribute node. + + Parameters: + ----------- + node : ast.Attribute + The attribute node to visit. + + Notes: + ------ + In the following "x = some_obj.some_attr" : \n + ast.Attribute.value node represents 'some_obj'. \n + ast.Attribute.attr node represents 'some_attr'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the attribute node to the graph + self.create_new_node( + node_id=id(node), + node_type="Attribute", + node_label=node.attr, + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the attribute node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_BinOp(self, node: ast.BinOp): + """Visit the binop node. + + Parameters: + ----------- + node : ast.BinOp + The binop node to visit. + + Notes: + ------ + In the following "x = 1 + 2", the ast.BinOp node represents Binary Operation itself. \n + ast.BinOp.value outputs "BinOp(left=Num(n=1), op=Add(), right=Num(n=2))" : \n + ast.BinOp.left node represents '1'. \n + ast.BinOp.op node represents 'Add()'. \n + ast.BinOp.right node represents '2'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the binop node to the graph + self.create_new_node( + node_id=id(node), + node_type="BinOp", + node_label="BinOp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the binop node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_BoolOp(self, node: ast.BoolOp): + """Visit the boolop node. + + Parameters: + ----------- + node : ast.BoolOp + The boolop node to visit. + + Notes: + ------ + In the following "x = True and False", the ast.BoolOp node represents Boolean Operation itself. \n + ast.BoolOp.values outputs "BoolOp(values=[NameConstant(value=True), NameConstant(value=False)])" : \n + ast.BoolOp.values[0] node represents 'True'. \n + ast.BoolOp.values[1] node represents 'False'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the boolop node to the graph + self.create_new_node( + node_id=id(node), + node_type="BoolOp", + node_label="BoolOp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the boolop node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_Call(self, node: ast.Call): + """Visit the call node. + + Parameters: + ----------- + node : ast.Call + The call node to visit. + + Notes: + ------ + In the following "x = some_func(1, 2)", the ast.Call node represents the function call itself. \n + ast.Call.func outputs "Name(id='some_func', ctx=Load())" : \n + ast.Call.func.id node represents 'some_func'. \n + ast.Call.args[0] node represents '1'. \n + ast.Call.args[1] node represents '2'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the call node to the graph + self.graph.add_node( + id(node), type="Call", label="Call", parent=id(self.current_parent) + ) + # Add an edge from the current parent to the call node + self.graph.add_edge(id(self.current_parent), id(node)) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the call node + self.current_parent = node + # Visit the call's children + self.generic_visit(node) + # Set the current parent back to the call's parent + self.current_parent = old_parent + + def visit_Compare(self, node: ast.Compare): + """Visit the compare node. + + Parameters: + ----------- + node : ast.Compare + The compare node to visit. + + Notes: + ------ + In the following "x = 1 < 2", the ast.Compare node represents the comparison itself. \n + ast.Compare.left outputs "Num(n=1)" : \n + ast.Compare.left node represents '1'. \n + ast.Compare.ops[0] node represents 'Lt()'. \n + ast.Compare.comparators[0] node represents '2'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the compare node to the graph + self.create_new_node( + node_id=id(node), + node_type="Compare", + node_label="Compare", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the compare node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_Expr(self, node: ast.Expr): + """Visit the expression node. + + Parameters: + ----------- + node : ast.Expr + The expression node to visit. + + Notes: + ------ + In the following "x = 1", the ast.Expr node represents the expression itself. \n + ast.Expr.value outputs "Num(n=1)" : \n + ast.Expr.value node represents '1'. \n + While ast.Name.id represents 'x'.\n \n + The difference between ast.Expr and ast.Expression is that ast.Expr is a statement, while ast.Expression is an expression. \n + For example, "x = 1" is a statement, while "1" is an expression. + """ + return + # Add the expression node to the graph + self.graph.add_node( + id(node), type="Expr", label="Expr", parent=id(self.current_parent) + ) + # Add an edge from the current parent to the expression node + self.graph.add_edge(id(self.current_parent), id(node)) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the expression node + self.current_parent = node + # Visit the expression's children + self.generic_visit(node) + # Set the current parent back to the expression's parent + self.current_parent = old_parent + + def visit_IfExp(self, node: ast.IfExp): + """Visit the ifexp node. + + Parameters: + ----------- + node : ast.IfExp + The ifexp node to visit. + + Notes: + ------ + In the following "x = 1 if True else 2", the ast.IfExp node represents the if expression itself. \n + ast.IfExp.body outputs "Num(n=1)" : \n + ast.IfExp.body node represents '1'. \n + ast.IfExp.test node represents 'True'. \n + ast.IfExp.orelse node represents '2'. \n + While ast.Name.id represents 'x'. + """ + # Add the ifexp node to the graph + self.create_new_node( + node_id=id(node), + node_type="IfExp", + node_label="IfExp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the ifexp node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_NamedExpr(self, node: ast.NamedExpr): + """Visit the namedexpr node. + + Parameters: + ----------- + node : ast.NamedExpr + The namedexpr node to visit. + + Notes: + ------ + In the following "x := 1", the ast.NamedExpr node represents the named expression itself. \n + ast.NamedExpr.value outputs "Num(n=1)" : \n + ast.NamedExpr.value node represents '1'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the namedexpr node to the graph + self.create_new_node( + node_id=id(node), + node_type="NamedExpr", + node_label="NamedExpr", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the namedexpr node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_UnaryOp(self, node: ast.UnaryOp): + """Visit the unaryop node. + + Parameters: + ----------- + node : ast.UnaryOp + The unaryop node to visit. + + Notes: + ------ + In the following "-x", the ast.UnaryOp node represents the unary operation itself. \n + ast.UnaryOp.operand outputs "Name(id='x')" : \n + ast.UnaryOp.operand node represents 'x'. \n + ast.UnaryOp.op node represents 'USub()'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the unaryop node to the graph + self.create_new_node( + node_id=id(node), + node_type="UnaryOp", + node_label="UnaryOp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the unaryop node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + # endregion + + # region Expression - Comprehensions + def visit_DictComp(self, node: ast.DictComp): + """Visit the dictcomp node. + + Parameters: + ----------- + node : ast.DictComp + The dictcomp node to visit. + + Notes: + ------ + In the following "squares = {x: x**2 for x in range(1, 6)}", the ast.DictComp node represents the dictionary comprehension itself. \n + ast.DictComp.key outputs "Name(id='x')" : \n + ast.DictComp.key node represents 'x'. \n + ast.DictComp.value node represents an ast.BinOp node 'BinOp()'. \n + ast.DictComp.generators node represents 'comprehension()'. \n + While ast.Name.id represents 'squares'. + """ + return + # Add the dictcomp node to the graph + self.create_new_node( + node_id=id(node), + node_type="DictComp", + node_label="DictComp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the dictcomp node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_GeneratorExp(self, node: ast.GeneratorExp): + """Visit the generatorexp node. + + Parameters: + ----------- + node : ast.GeneratorExp + The generatorexp node to visit. + + Notes: + ------ + In the following "squares = (x**2 for x in range(1, 6))", the ast.GeneratorExp node represents the generator expression itself. \n + ast.GeneratorExp.elt outputs "BinOp()" : \n + ast.GeneratorExp.elt node represents 'BinOp()'. \n + ast.GeneratorExp.generators node represents 'comprehension()'. \n + While ast.Name.id represents 'squares'. + """ + return + # Add the generatorexp node to the graph + self.create_new_node( + node_id=id(node), + node_type="GeneratorExp", + node_label="GenExp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the generatorexp node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_ListComp(self, node: ast.ListComp): + """Visit the listcomp node. + + Parameters: + ----------- + node : ast.ListComp + The listcomp node to visit. + + Notes: + ------ + In the following "squares = [x**2 for x in range(1, 6)]", the ast.ListComp node represents the list comprehension itself. \n + ast.ListComp.elt outputs "BinOp()" : \n + ast.ListComp.elt node represents a ast.BinOp node 'BinOp()'. \n + ast.ListComp.generators node represents 'comprehension()'. \n + While ast.Name.id represents 'squares'. + """ + # Add the listcomp node to the graph + self.create_new_node( + node_id=id(node), + node_type="ListComp", + node_label="ListComp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the listcomp node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_SetComp(self, node: ast.SetComp): + """Visit the setcomp node. + + Parameters: + ----------- + node : ast.SetComp + The setcomp node to visit. + + Notes: + ------ + In the following "squares = {x**2 for x in range(1, 6)}", the ast.SetComp node represents the set comprehension itself. \n + ast.SetComp.elt outputs "BinOp()" : \n + ast.SetComp.elt node represents a ast.BinOp node 'BinOp()'. \n + ast.SetComp.generators node represents 'comprehension()'. \n + While ast.Name.id represents 'squares'. + """ + # Add the setcomp node to the graph + self.create_new_node( + node_id=id(node), + node_type="SetComp", + node_label="SetComp", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the setcomp node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + # endregion + + # region Expression - Subscripting + def visit_Slice(self, node: ast.Slice): + """Visit the slice node. + + Parameters: + ----------- + node : ast.Slice + The slice node to visit. + + Notes: + ------ + In the following "x[1:2]", the ast.Slice node represents the slice itself. \n + ast.Slice.lower outputs "Constant(value=1)" : \n + ast.Slice.lower node represents 'Constant(value=1)'. \n + ast.Slice.upper node represents 'Constant(value=2)'. \n + ast.Slice.step node represents 'None'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the slice node to the graph + self.create_new_node( + node_id=id(node), + node_type="Slice", + node_label="Slice", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the slice node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + def visit_Subscript(self, node: ast.Subscript): + """Visit the subscript node. + + Parameters: + ----------- + node : ast.Subscript + The subscript node to visit. + + Notes: + ------ + In the following "x[1:2]", the ast.Subscript node represents the subscript itself. \n + ast.Subscript.value outputs "Name(id='x')" : \n + ast.Subscript.value node represents an ast.Name node Name(id='x'). \n + ast.Subscript.slice node represents an ast.Slice node 'Slice()'. \n + While ast.Name.id represents 'x'. + """ + return + # Add the subscript node to the graph + self.create_new_node( + node_id=id(node), + node_type="Subscript", + node_label="Subscript", + node_parent_id=id(self.current_parent), + ) + # Set the current parent to the subscript node + self.current_parent = node + # Visit the children + self.generic_visit(node) + + # endregion + + # region Statements + def visit_AnnAssign(self, node: ast.AnnAssign): + """Visit the annotated assignment node. + + Parameters: + ----------- + node : ast.AnnAssign + The annotated assignment node to visit. + + Notes: + ------ + In the following "x: int = 1", the ast.AnnAssign node represents the annotated assignment itself. \n + ast.AnnAssign.target outputs "Name(id='x')" : \n + ast.AnnAssign.target node represents an ast.Name node Name(id='x'). \n + ast.AnnAssign.annotation node represents 'Name(id='int')'. \n + ast.AnnAssign.value node represents 'Constant(value=1)'. \n + While ast.Name.id represents 'x'. + """ + self.generic_visit(node) + return + # Add the annotated assignment node to the graph + self.graph.add_node( + id(node), + type="AnnAssign", + label="AnnAssign", + parent=id(self.current_parent), + ) + # Add an edge from the current parent to the annotated assignment node + self.graph.add_edge(id(self.current_parent), id(node)) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the annotated assignment node + self.current_parent = node + # Visit the annotated assignment's children + self.generic_visit(node) + # Set the current parent back to the annotated assignment's parent + self.current_parent = old_parent + + def visit_Assert(self, node: ast.Assert): + """Visit the assert node. + + Parameters: + ----------- + node : ast.Assert + The assert node to visit. + + Notes: + ------ + In the following "assert x == 1", the ast.Assert node represents the assert itself. \n + ast.Assert.test outputs "Compare()" : \n + ast.Assert.test node represents a ast.Compare node 'Compare()'. \n + ast.Assert.msg node represents 'None'. \n + While ast.Name.id represents 'x'. + """ + self.generic_visit(node) + return + # Add the assert node to the graph + self.graph.add_node( + id(node), + type="Assert", + label="Assert", + parent=id(self.current_parent), + ) + # Add an edge from the current parent to the assert node + self.graph.add_edge(id(self.current_parent), id(node)) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the assert node + self.current_parent = node + # Visit the assert's children + self.generic_visit(node) + # Set the current parent back to the assert's parent + self.current_parent = old_parent + + # def infer_type(self, node): + # if isinstance(node, ast.Constant): + # # For Python 3.8+ + # return type(node.value).__name__ + # elif isinstance(node, ast.Num): + # return type(node.n).__name__ + # elif isinstance(node, ast.Str): + # return "str" + # # Add other cases if needed + # else: + # return "var" + + def visit_Assign(self, node: ast.Assign): + """Visit the assignment node. + + Parameters: + ----------- + node : ast.Assign + The assignment node to visit. + + Notes: + ------ + In the following "x = 1", the ast.Assign node represents the assignment itself. \n + ast.Assign.targets outputs "Name(id='x')" : \n + ast.Assign.targets node represents an ast.Name node Name(id='x'). \n + ast.Assign.value node represents 'Constant(value=1)'. \n + While ast.Name.id represents 'x'. + """ + # Assuming the target is a single variable + # check if the target has an id attribute + self.generic_visit(node) + return + _name = "" + parent_id = id(self.current_parent) + + if hasattr(node.targets[0], "id"): + _name = node.targets[0].id + elif hasattr(node.targets[0], "attr"): + _name = node.targets[0].attr + if ( + isinstance(node.targets[0].value, ast.Name) + and node.targets[0].value.id == "self" + ): + # If the attribute is assigned to `self`, set the parent to the current class + parent_id = id(self.current_class) + elif hasattr(node.targets[0], "value"): + _name = node.targets[0].value + else: + _name = node.value + + # # Infer the type of the assigned value + # var_type = self.infer_type(node.value) + + # Now you can create a node with the label as the variable name and base as the inferred type + self.create_new_node( + node_id=id(node), + node_type="Variable", + node_label=_name, + node_parent_id=id(self.current_parent), + ) + + def visit_AugAssign(self, node: ast.AugAssign): + """Visit the augmented assignment node. + + Parameters: + ----------- + node : ast.AugAssign + The augmented assignment node to visit. + + Notes: + ------ + In the following "x += 1", the ast.AugAssign node represents the augmented assignment itself. \n + ast.AugAssign.target outputs "Name(id='x')" : \n + ast.AugAssign.target node represents an ast.Name node Name(id='x'). \n + ast.AugAssign.op node represents an ast.Add node 'Add()'. \n + ast.AugAssign.value node represents an ast.Constant node 'Constant(value=1)'. \n + While ast.Name.id represents 'x'. + """ + self.generic_visit(node) + return + # Add the augmented assignment node to the graph + self.graph.add_node( + id(node), + type="AugAssign", + label="AugAssign", + parent=id(self.current_parent), + ) + # Add an edge from the current parent to the augmented assignment node + self.graph.add_edge(id(self.current_parent), id(node)) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the augmented assignment node + self.current_parent = node + # Visit the augmented assignment's children + self.generic_visit(node) + # Set the current parent back to the augmented assignment's parent + self.current_parent = old_parent + + def visit_Delete(self, node: ast.Delete): + """Visit the delete node. + + Parameters: + ----------- + node : ast.Delete + The delete node to visit. + + Notes: + ------ + In the following "del x", the ast.Delete node represents the delete itself. \n + ast.Delete.targets outputs "Name(id='x')" : \n + ast.Delete.targets node represents an ast.Name node Name(id='x'). \n + While ast.Name.id represents 'x'. + """ + # Add the delete node to the graph + self.create_new_node( + node_id=id(node), + node_type="Delete", + node_label="del", + node_parent_id=id(self.current_parent), + ) + + def visit_Pass(self, node: ast.Pass): + """Visit the pass node. + + Parameters: + ----------- + node : ast.Pass + The pass node to visit. + + Notes: + ------ + In the following "def my_func(): pass", the ast.Pass node represents the pass itself. \n + ast.Pass has no children. + """ + + def visit_Raise(self, node: ast.Raise): + """Visit the raise node. + + Parameters: + ----------- + node : ast.Raise + The raise node to visit. + + Notes: + ------ + In the following "raise Exception()", the ast.Raise node represents the raise itself. \n + ast.Raise.exc outputs "Name(id='Exception')" : \n + ast.Raise.exc node represents an ast.Name node Name(id='Exception'). \n + While ast.Name.id represents 'Exception'. + """ + # Add the raise node to the graph + self.create_new_node( + node_id=id(node), + node_type="Raise", + node_label="raise", + node_parent_id=id(self.current_parent), + ) + + # endregion + + # region Statements - Imports + def visit_Import(self, node: ast.Import): + """Visit the import node. + + Parameters: + ----------- + node : ast.Import + The import node to visit. + + Notes: + ------ + In the following "import os", the ast.Import node represents the import itself. \n + ast.Import.names outputs "alias(name='os')" : \n + ast.Import.names node represents an ast.alias node alias(name='os'). \n + While ast.alias.name represents 'os'. + """ + return + # Add the import node to the graph + self.create_new_node( + node_id=id(node), + node_type="Import", + node_label="import", + node_parent_id=id(self.current_parent), + ) + + def visit_ImportFrom(self, node: ast.ImportFrom): + """Visit the import from node. + + Parameters: + ----------- + node : ast.ImportFrom + The import from node to visit. + + Notes: + ------ + In the following "from os import path", the ast.ImportFrom node represents the import from itself. \n + ast.ImportFrom.module outputs "os" : \n + ast.ImportFrom.module node represents an ast.Module node for os. \n + """ + return + # Add the import from node to the graph + self.create_new_node( + node_id=id(node), + node_type="ImportFrom", + node_label="import from", + node_parent_id=id(self.current_parent), + ) + + # endregion + + # region Control Flow + def visit_Break(self, node: ast.Break): + """Visit the break node. + + Parameters: + ----------- + node : ast.Break + The break node to visit. + + Notes: + ------ + In the following "while True: break", the ast.Break node represents the break itself. \n + ast.Break has no children. + """ + return + # Add the break node to the graph + self.create_new_node( + node_id=id(node), + node_type="Break", + node_label="break", + node_parent_id=id(self.current_parent), + ) + + def visit_Continue(self, node: ast.Continue): + """Visit the continue node. + + Parameters: + ----------- + node : ast.Continue + The continue node to visit. + + Notes: + ------ + In the following "while True: continue", the ast.Continue node represents the continue itself. \n + ast.Continue has no children. + """ + return + # Add the continue node to the graph + self.create_new_node( + node_id=id(node), + node_type="Continue", + node_label="continue", + node_parent_id=id(self.current_parent), + ) + + def visit_ExceptHandler(self, node: ast.ExceptHandler): + """Visit the except handler node. + + Parameters: + ----------- + node : ast.ExceptHandler + The except handler node to visit. + + Notes: + ------ + In the following "try: except Exception as e: pass", the ast.ExceptHandler node represents the except handler itself. \n + ast.ExceptHandler.type outputs "Name(id='Exception')" : \n + ast.ExceptHandler.type node represents an ast.Name node Name(id='Exception'). \n + While ast.Name.id represents 'Exception'. + """ + # Add the except handler node to the graph + self.create_new_node( + node_id=id(node), + node_type="ExceptHandler", + node_label="except", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the except handler node + self.current_parent = node + # Visit the except handler's children + self.generic_visit(node) + # Set the current parent back to the except handler's parent + self.current_parent = old_parent + + def visit_For(self, node: ast.For): + """Visit the for node. + + Parameters: + ----------- + node : ast.For + The for node to visit. + + Notes: + ------ + In the following "for i in range(10): pass", the ast.For node represents the for itself. \n + ast.For.target node represents an ast.Name node Name(id='i'). \n + ast.For.iter node represents an ast.Call node Call(func=Name(id='range'), args=[Num(n=10)], keywords=[]). + """ + # Add the for node to the graph + self.create_new_node( + node_id=id(node), + node_type="For", + node_label="for", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the for node + self.current_parent = node + # Visit the for's children + self.generic_visit(node) + # Set the current parent back to the for's parent + self.current_parent = old_parent + + def visit_If(self, node: ast.If): + """Visit the if node. + + Parameters: + ----------- + node : ast.If + The if node to visit. + + Notes: + ------ + In the following "if x==z: pass", the ast.If node represents the if itself. \n + ast.If.test node represents an ast.Compare node Compare(left=Name(id='x'), ops=[Eq()], comparators=[Name(id='z')]). + """ + # Add the if node to the graph + self.create_new_node( + node_id=id(node), + node_type="If", + node_label="if", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the if node + self.current_parent = node + # Visit the if's children + self.generic_visit(node) + # Set the current parent back to the if's parent + self.current_parent = old_parent + + def visit_Try(self, node: ast.Try): + """Visit the try node. + + Parameters: + ----------- + node : ast.Try + The try node to visit. + + Notes: + ------ + In the following "try: x += 5" except: pass else: print(worked) finally: print(done), the ast.Try node represents the try itself. \n + ast.Try.body represents the contents of try, in this case an ast.Assign node. \n + ast.Try.handlers represents the except handlers, in this case an ast.ExceptHandler node. \n + ast.Try.orelse represents the else clause of the try, in this case an ast.Expr node. \n + ast.Try.finalbody represents the finally clause of the try, in this case an ast.Expr node. + """ + # Add the try node to the graph + self.create_new_node( + node_id=id(node), + node_type="Try", + node_label="try", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the try node + self.current_parent = node + # Visit the try's children + self.generic_visit(node) + # Set the current parent back to the try's parent + self.current_parent = old_parent + + # def visit_TryStar(self, node : ast.TryStar): + def visit_While(self, node: ast.While): + """Visit the while node. + + Parameters: + ----------- + node : ast.While + The while node to visit. + + Notes: + ------ + In the following "while x < 10: y + 5", the ast.While node represents the while itself. \n + ast.While.test node represents an ast.Compare node for 'x < 10'. \n + ast.While.body represents the contents of the while, in this case an ast.Expr node. + """ + # Add the while node to the graph + self.create_new_node( + node_id=id(node), + node_type="While", + node_label="while", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the while node + self.current_parent = node + # Visit the while's children + self.generic_visit(node) + # Set the current parent back to the while's parent + self.current_parent = old_parent + + def visit_With(self, node: ast.With): + """Visit the with node. + + Parameters: + ----------- + node : ast.With + The with node to visit. + + Notes: + ------ + In the following "with open('file.txt', 'r') as f: f.read()", the ast.With node represents the with itself. \n + ast.With.items represents the context managers, in this case an ast.withitem node. \n + ast.With.body represents the contents of the with, in this case an ast.Expr node. + """ + # Add the with node to the graph + self.create_new_node( + node_id=id(node), + node_type="With", + node_label="with", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the with node + self.current_parent = node + # Visit the with's children + self.generic_visit(node) + # Set the current parent back to the with's parent + self.current_parent = old_parent + + # endregion + + # region Pattern Matching + def visit_Match(self, node: ast.Match): + """Visit the match node. + + Parameters: + ----------- + node : ast.Match + The match node to visit. + + Notes: + ------ + In the following "match x: case 1: pass", the ast.Match node represents the match itself. \n + ast.Match.cases represents the cases of the match, in this case an ast.MatchCase node. + """ + return + # Add the match node to the graph + self.create_new_node( + node_id=id(node), + node_type="Match", + node_label="match", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match node + self.current_parent = node + # Visit the match's children + self.generic_visit(node) + # Set the current parent back to the match's parent + self.current_parent = old_parent + + def visit_MatchAs(self, node: ast.MatchAs): + """Visit the match as node. + + Parameters: + ----------- + node : ast.MatchAs + The match as node to visit. + + Notes: + ------ + In the following "match x: case 1 as y: pass", the ast.MatchAs node represents the match as itself. \n + ast.MatchAs.pattern represents the pattern of the match as, in this case an ast.Name node. \n + ast.MatchAs.name represents the name of the match as, in this case an ast.Name node. + """ + return + # Add the match as node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchAs", + node_label="match as", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match as node + self.current_parent = node + # Visit the match as's children + self.generic_visit(node) + # Set the current parent back to the match as's parent + self.current_parent = old_parent + + def visit_MatchClass(self, node: ast.MatchClass): + """Visit the match class node. + + Parameters: + ----------- + node : ast.MatchClass + The match class node to visit. + + Notes: + ------ + In the following "match x: case A(y): pass", the ast.MatchClass node represents the match class itself. \n + ast.MatchClass.pattern represents the pattern of the match class, in this case an ast.Call node. + """ + return + # Add the match class node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchClass", + node_label="match class", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match class node + self.current_parent = node + # Visit the match class's children + self.generic_visit(node) + # Set the current parent back to the match class's parent + self.current_parent = old_parent + + def visit_MatchMapping(self, node: ast.MatchMapping): + """Visit the match mapping node. + + Parameters: + ----------- + node : ast.MatchMapping + The match mapping node to visit. + + Notes: + ------ + In the following "match x: case {1: y}: pass", the ast.MatchMapping node represents the match mapping itself. \n + ast.MatchMapping.pattern represents the pattern of the match mapping, in this case an ast.Dict node. + """ + return + # Add the match mapping node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchMapping", + node_label="match mapping", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match mapping node + self.current_parent = node + # Visit the match mapping's children + self.generic_visit(node) + # Set the current parent back to the match mapping's parent + self.current_parent = old_parent + + def visit_MatchOr(self, node: ast.MatchOr): + """Visit the match or node. + + Parameters: + ----------- + node : ast.MatchOr + The match or node to visit. + + Notes: + ------ + In the following "match x: case 1 | 2: pass", the ast.MatchOr node represents the match or itself. \n + ast.MatchOr.left represents the left side of the match or, in this case an ast.Constant node. \n + ast.MatchOr.right represents the right side of the match or, in this case an ast.Constant node. + """ + return + # Add the match or node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchOr", + node_label="match or", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match or node + self.current_parent = node + # Visit the match or's children + self.generic_visit(node) + # Set the current parent back to the match or's parent + self.current_parent = old_parent + + def visit_MatchSequence(self, node: ast.MatchSequence): + """Visit the match sequence node. + + Parameters: + ----------- + node : ast.MatchSequence + The match sequence node to visit. + + Notes: + ------ + In the following "match x: case [1, 2]: pass", the ast.MatchSequence node represents the match sequence itself. \n + ast.MatchSequence.pattern represents the pattern of the match sequence, in this case an ast.List node. + """ + return + # Add the match sequence node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchSequence", + node_label="match sequence", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match sequence node + self.current_parent = node + # Visit the match sequence's children + self.generic_visit(node) + # Set the current parent back to the match sequence's parent + self.current_parent = old_parent + + def visit_MatchSingleton(self, node: ast.MatchSingleton): + """Visit the match singleton node. + + Parameters: + ----------- + node : ast.MatchSingleton + The match singleton node to visit. + + Notes: + ------ + In the following "match x: case 1: pass", the ast.MatchSingleton node represents the match singleton itself. \n + ast.MatchSingleton.pattern represents the pattern of the match singleton, in this case an ast.Constant node. + """ + return + # Add the match singleton node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchSingleton", + node_label="match singleton", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match singleton node + self.current_parent = node + # Visit the match singleton's children + self.generic_visit(node) + # Set the current parent back to the match singleton's parent + self.current_parent = old_parent + + def visit_MatchStar(self, node: ast.MatchStar): + """Visit the match star node. + + Parameters: + ----------- + node : ast.MatchStar + The match star node to visit. + + Notes: + ------ + In the following "match x: case [1, *y]: pass", the ast.MatchStar node represents the match star itself. \n + ast.MatchStar.pattern represents the pattern of the match star, in this case an ast.List node. + """ + return + # Add the match star node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchStar", + node_label="match star", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match star node + self.current_parent = node + # Visit the match star's children + self.generic_visit(node) + # Set the current parent back to the match star's parent + self.current_parent = old_parent + + def visit_MatchValue(self, node: ast.MatchValue): + """Visit the match value node. + + Parameters: + ----------- + node : ast.MatchValue + The match value node to visit. + + Notes: + ------ + In the following "match x: case 1: pass", the ast.MatchValue node represents the match value itself. \n + ast.MatchValue.pattern represents the pattern of the match value, in this case an ast.Constant node. + """ + return + # Add the match value node to the graph + self.create_new_node( + node_id=id(node), + node_type="MatchValue", + node_label="match value", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the match value node + self.current_parent = node + # Visit the match value's children + self.generic_visit(node) + # Set the current parent back to the match value's parent + self.current_parent = old_parent + + # endregion + + # region Functions and Class Definitions + def visit_ClassDef(self, node: ast.ClassDef): + """Visit the class node. + + Parameters: + ----------- + node : ast.ClassDef + The class node to visit. + + Notes: + ------ + In the following "class A(baseClass): def __init__(self, a:int=5): self.x=a*2", the ast.ClassDef node represents the class itself. \n + ast.ClassDef.name represents the name of the class, in this case 'A'. \n + ast.ClassDef.bases represents the base classes, in this case 'baseClass'. \n + ast.ClassDef.keywords represents the keyword arguments, in this case an ast.arg node to represent 'self' and 'a'. \n + ast.ClassDef.body represents the contents of the class, in this case an ast.FunctionDef node for '__init__'. + """ + # Add the class node to the graph + # current_parent should be a module node, unless it's a sub class + self.create_new_node( + node_id=id(node), + node_type="ClassDef", + node_label=node.name, + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the class node + # because now we'll be visiting the class' children + self.current_parent = node + # # Save the previous current_class + # previous_class = self.current_class + # # Set the current_class + # self.current_class = node + # Visit the class' children + self.generic_visit(node) + # Set the current parent back to the class' parent + # we've finished visiting the class' children, so go back to class' parent + self.current_parent = old_parent + # # Restore the previous current_class + # self.current_class = previous_class + + def visit_FunctionDef(self, node: ast.FunctionDef): + """Visit the function node. + + Parameters: + ----------- + node : ast.FunctionDef + The function node to visit. + + Notes: + ------ + In the following "def func(a:int=5): b = a*2 return b", the ast.FunctionDef node represents the function itself. \n + ast.FunctionDef.name represents the name of the function, in this case 'func'. \n + ast.FunctionDef.args represents the arguments, in this case an ast.arg node to represent 'a'. \n + ast.FunctionDef.body represents the contents of the function, in this case an ast.Assign node for 'b = a*2' and an ast.Return node. + """ + # function_parent: nx.DiGraph + # if self.current_class: + # function_parent = self.current_class + # else: + # if self.current_module: + # function_parent = self.current_module + # else: + # function_parent = self.current_parent + # Add the function node to the graph + self.create_new_node( + node_id=id(node), + node_type="FunctionDef", + node_label=node.name, + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the function node + self.current_parent = node + # # Save the previous current_function + # previous_function = self.current_function + # # Set the current_function + # self.current_function = node + # Visit the function's children + self.generic_visit(node) + # Set the current parent back to the function's parent + self.current_parent = old_parent + # # Restore the previous current_function + # self.current_function = previous_function + + def visit_Global(self, node: ast.Global): + """Visit the global node. + + Parameters: + ----------- + node : ast.Global + The global node to visit. + + Notes: + ------ + In the following "global a b", the ast.Global node represents the global itself. \n + ast.Global doesn't have any children nodes to represent 'a' and 'b'. + """ + # Add the global node to the graph + self.create_new_node( + node_id=id(node), + node_type="Global", + node_label="global", + node_parent_id=id(self.current_parent), + ) + + def visit_Lambda(self, node: ast.Lambda): + """Visit the lambda node. + + Parameters: + ----------- + node : ast.Lambda + The lambda node to visit. + + Notes: + ------ + In the following "lambda a: a*2", the ast.Lambda node represents the lambda itself. \n + ast.Lambda.args represents the arguments, in this case an ast.arg node to represent 'a'. \n + ast.Lambda.body represents the contents of the lambda, in this case an ast.BinOp node for 'a*2'. + """ + return + # Add the lambda node to the graph + self.create_new_node( + node_id=id(node), + node_type="Lambda", + node_label="lambda", + node_parent_id=id(self.current_parent), + ) + + def visit_Nonlocal(self, node: ast.Nonlocal): + """Visit the nonlocal node. + + Parameters: + ----------- + node : ast.Nonlocal + The nonlocal node to visit. + + Notes: + ------ + In the following "nonlocal a b", the ast.Nonlocal node represents the nonlocal itself. \n + ast.Nonlocal doesn't have any children nodes to represent 'a' and 'b'. + """ + return + # Add the nonlocal node to the graph + self.create_new_node( + node_id=id(node), + node_type="Nonlocal", + node_label="nonlocal", + node_parent_id=id(self.current_parent), + ) + + def visit_Return(self, node: ast.Return): + """Visit the return node. + + Parameters: + ----------- + node : ast.Return + The return node to visit. + + Notes: + ------ + In the following "return a", the ast.Return node represents the return itself. \n + ast.Return.value represents the value to return, in this case an ast.Name node for 'a'. + """ + # Add the return node to the graph + self.create_new_node( + node_id=id(node), + node_type="Return", + node_label="return", + node_parent_id=id(self.current_parent), + ) + + def visit_Yield(self, node: ast.Yield): + """Visit the yield node. + + Parameters: + ----------- + node : ast.Yield + The yield node to visit. + + Notes: + ------ + In the following "yield a", the ast.Yield node represents the yield itself. \n + ast.Yield.value represents the value to yield, in this case an ast.Name node for 'a'. + """ + return + # Add the yield node to the graph + self.create_new_node( + node_id=id(node), + node_type="Yield", + node_label="yield", + node_parent_id=id(self.current_parent), + ) + + def visit_YieldFrom(self, node: ast.YieldFrom): + """Visit the yield from node. + + Parameters: + ----------- + node : ast.YieldFrom + The yield from node to visit. + + Notes: + ------ + In the following "yield from a", the ast.YieldFrom node represents the yield from itself. \n + ast.YieldFrom.value represents the value to yield from, in this case an ast.Name node for 'a'. + """ + return + # Add the yield from node to the graph + self.create_new_node( + node_id=id(node), + node_type="YieldFrom", + node_label="yield from", + node_parent_id=id(self.current_parent), + ) + + # endregion + + # region Async and Await + def visit_AsyncFor(self, node: ast.AsyncFor): + """Visit the async for node. + + Parameters: + ----------- + node : ast.AsyncFor + The async for node to visit. + + Notes: + ------ + In the following "async for a in b: a+5", the ast.AsyncFor node represents the async for itself. \n + ast.AsyncFor.target represents the target of the async for, in this case an ast.Name node for 'a'. \n + ast.AsyncFor.iter represents the iterable of the async for, in this case an ast.Name node for 'b'. \n + ast.AsyncFor.body represents the contents of the async for, in this case an ast.BinOp node for 'a+5'. + """ + return + # Add the async for node to the graph + self.create_new_node( + node_id=id(node), + node_type="AsyncFor", + node_label="async for", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the async for node + self.current_parent = node + # Visit the async for's children + self.generic_visit(node) + # Set the current parent back to the async for's parent + self.current_parent = old_parent + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef): + """Visit the async function node. + + Parameters: + ----------- + node : ast.AsyncFunctionDef + The async function node to visit. + + Notes: + ------ + In the following "async def a(b): b+5", the ast.AsyncFunctionDef node represents the async function itself. \n + ast.AsyncFunctionDef.name represents the name of the async function, in this case an ast.Name node for 'a'. \n + ast.AsyncFunctionDef.args represents the arguments of the async function, in this case an ast.arguments node for 'b'. \n + ast.AsyncFunctionDef.body represents the contents of the async function, in this case an ast.BinOp node for 'b+5'. + """ + return + # Add the async function node to the graph + self.create_new_node( + node_id=id(node), + node_type="AsyncFunctionDef", + node_label=node.name, + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the async function node + self.current_parent = node + # Visit the async function's children + self.generic_visit(node) + # Set the current parent back to the async function's parent + self.current_parent = old_parent + + def visit_AsyncWith(self, node: ast.AsyncWith): + """Visit the async with node. + + Parameters: + ----------- + node : ast.AsyncWith + The async with node to visit. + + Notes: + ------ + In the following "async with a as b: b+5", the ast.AsyncWith node represents the async with itself. \n + ast.AsyncWith.items represents the items of the async with, in this case an ast.withitem node for 'a as b'. \n + ast.AsyncWith.body represents the contents of the async with, in this case an ast.BinOp node for 'b+5'. + """ + return + # Add the async with node to the graph + self.create_new_node( + node_id=id(node), + node_type="AsyncWith", + node_label="async with", + node_parent_id=id(self.current_parent), + ) + # Set the old parent to the current parent + old_parent = self.current_parent + # Set the current parent to the async with node + self.current_parent = node + # Visit the async with's children + self.generic_visit(node) + # Set the current parent back to the async with's parent + self.current_parent = old_parent + + def visit_Await(self, node: ast.Await): + """Visit the await node. + + Parameters: + ----------- + node : ast.Await + The await node to visit. + + Notes: + ------ + In the following "await a", the ast.Await node represents the await itself. \n + ast.Await.value represents the value to await, in this case an ast.Name node for 'a'. + """ + return + # Add the await node to the graph + self.create_new_node( + node_id=id(node), + node_type="Await", + node_label="await", + node_parent_id=id(self.current_parent), + ) + + # endregion diff --git a/src/codecarto/containers/processor/src/plotter/__init__.py b/src/codecarto/containers/processor/src/plotter/__init__.py new file mode 100644 index 0000000..380140d --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/__init__.py @@ -0,0 +1,2 @@ +# Plotter folder contains the modules needed to plot graphs. +# This includes the palette, pal dirs, positioning, layouts, etc. diff --git a/src/codecarto/containers/processor/src/plotter/custom_layouts/__init__.py b/src/codecarto/containers/processor/src/plotter/custom_layouts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/processor/src/plotter/custom_layouts/arch_layout.py b/src/codecarto/containers/processor/src/plotter/custom_layouts/arch_layout.py new file mode 100644 index 0000000..be7830e --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/custom_layouts/arch_layout.py @@ -0,0 +1,23 @@ +import matplotlib.pyplot as plt +import networkx as nx + +def arc_layout(G): + pos = nx.spring_layout(G) # To make the initial layout more interesting + nodes = G.nodes() + plt.figure(figsize=(8, 4)) + # Draw nodes + for node in nodes: + plt.scatter(pos[node][0], 0, s=100, c='blue') + + # Draw edges + for edge in G.edges(): + start, end = pos[edge[0]], pos[edge[1]] + x = [start[0], end[0]] + y = [0, 0] + plt.plot(x, y, c='red', alpha=0.5, zorder=1) + + # Remove y-axis + plt.gca().axes.get_yaxis().set_visible(False) + + # Show the plot + plt.show() \ No newline at end of file diff --git a/src/codecarto/containers/processor/src/plotter/custom_layouts/cluster_layout.py b/src/codecarto/containers/processor/src/plotter/custom_layouts/cluster_layout.py new file mode 100644 index 0000000..ea25526 --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/custom_layouts/cluster_layout.py @@ -0,0 +1,91 @@ +import numpy as np + +def cluster_layout(G, root, radius=1): + positions = {root: np.array([0, 0])} # Initialize the positions dictionary with the root at the center + unvisited = set(G.nodes()) - {root} # Nodes that haven't been visited yet + + stack = [(root, 0, 2*np.pi, radius)] # Initialize the stack with the root, start_angle, end_angle, and depth + + while stack: + node, angle, d_angle, depth = stack.pop() # Pop a node from the stack + children = list(G.neighbors(node)) + num_children = len(children) + for i, child in enumerate(children): + child_angle = angle - d_angle/2 + i*d_angle/num_children # calculate the angle for the child + positions[child] = depth * np.array([np.cos(child_angle), np.sin(child_angle)]) # calculate the position for the child + stack.append((child, child_angle, d_angle/num_children, depth+radius)) # add the child to the stack + + # Process unvisited nodes, if any + while unvisited: + node = unvisited.pop() + if node in positions: # If we've already visited the node, continue + continue + parents = list(G.predecessors(node)) # Get the node's parents + if parents: # If the node has parents + parent = parents[0] # Assume that the node has only one parent + if parent in positions: # If the parent has been visited + positions[node] = positions[parent] # Position the node at the parent's position + # Layout the subgraph rooted at the node, iteratively + stack.append((node, 0, 2*np.pi, radius)) + + return positions + + + + + + + + + + + + + + + + +# import math + +# def create_clusters(G, root): +# clusters = {} +# visited = set() + +# def dfs(node): +# visited.add(node) +# clusters[node] = [] +# for child in G.neighbors(node): +# if child not in visited: +# clusters[node].append(child) +# dfs(child) + +# dfs(root) +# return clusters + +# def cluster_layout(G, root): +# clusters = create_clusters(G, root) +# positions = {root: (0, 0)} + +# def count_nodes(node): +# return 1 + sum(count_nodes(child) for child in clusters[node]) + +# node_sizes = {node: count_nodes(node) for node in clusters} + +# def layout_clusters(node, radius, start_angle, end_angle): +# children = clusters[node] +# if not children: +# return +# total_size = sum(node_sizes[child] for child in children) +# angle_step = (end_angle - start_angle) / max(total_size, 1) +# angle = start_angle +# for child in children: +# child_size = node_sizes[child] +# mid_angle = angle + angle_step * child_size / 2 +# x = radius * math.cos(mid_angle) +# y = radius * math.sin(mid_angle) +# positions[child] = (x, y) +# layout_clusters(child, radius + 1, angle, angle + angle_step * child_size) +# angle += angle_step * child_size + +# layout_clusters(root, 1, 0, 2 * math.pi) +# return positions diff --git a/src/codecarto/containers/processor/src/plotter/custom_layouts/sorted_square_layout.py b/src/codecarto/containers/processor/src/plotter/custom_layouts/sorted_square_layout.py new file mode 100644 index 0000000..ebeda7a --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/custom_layouts/sorted_square_layout.py @@ -0,0 +1,32 @@ +import networkx as nx + +def sorted_square_layout(G: nx.Graph): + """Position nodes in a grid. + + Parameters + ---------- + G : NetworkX graph or list of nodes + A position will be assigned to every node in G. + + Returns + ------- + pos : dict + A dictionary of positions keyed by node + """ + import math + import numpy as np + + num_nodes = len(G.nodes()) + sqrt_num_nodes = math.sqrt(num_nodes) + grid_size = math.ceil(sqrt_num_nodes) + + # Sort nodes by 'type' attribute + sorted_nodes = sorted(G.nodes(data=True), key=lambda x: x[1]["type"]) + + # Create a grid of positions + positions = np.array([(x, y) for x in range(grid_size) for y in range(grid_size)]) + + # Create a mapping from node to position + pos = {node: pos for (node, _attr), pos in zip(sorted_nodes, positions)} + + return pos \ No newline at end of file diff --git a/src/codecarto/containers/processor/src/plotter/default_palette.json b/src/codecarto/containers/processor/src/plotter/default_palette.json new file mode 100644 index 0000000..090dd94 --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/default_palette.json @@ -0,0 +1,259 @@ +{ + "bases": { + "Unknown": "unknown", + "Async": "async", + "AsyncFor": "async.for", + "AsyncWith": "async.with", + "AsyncFunctionDef": "async.function", + "Control": "control", + "Conditional": "control.cond", + "Break": "control.break", + "Continue": "control.continue", + "ExceptHandler": "control", + "For": "control.loop.for", + "If": "control.cond.if", + "Try": "control.try", + "While": "control.loop.while", + "With": "control.loop.with", + "Definitions": "def", + "ClassDef": "def.class", + "FunctionDef": "def.function", + "Global": "def.global", + "Nonlocal": "def.nonlocal", + "Return": "def", + "Yield": "def", + "YieldFrom": "def", + "Argument": "deprecated", + "Body": "deprecated", + "Bytes": "deprecated", + "Ellipsis": "deprecated", + "ExtSlice": "deprecated", + "Float": "deprecated", + "Index": "deprecated", + "Int": "deprecated", + "Loop": "deprecated", + "Method": "deprecated", + "NameConstant": "deprecated", + "Num": "deprecated", + "Str": "deprecated", + "Expressions": "expr", + "Attribute": "expr", + "BinOp": "expr", + "BoolOp": "expr", + "Call": "expr", + "Compare": "expr", + "Expr": "expr", + "IfExp": "expr", + "UnaryOp": "expr", + "Slice": "expr.subscript", + "Subscript": "expr.subscript", + "DictComp": "expr.comp", + "GeneratorExp": "expr.comp", + "ListComp": "expr.comp", + "SetComp": "expr.comp", + "Literals": "literals", + "Constant": "literals.constant", + "Dict": "literals.dict", + "List": "literals", + "Set": "literals", + "Tuple": "literals", + "Match": "match", + "MatchAs": "match", + "MatchClass": "match", + "MatchMap": "match", + "MatchOr": "match", + "MatchSequence": "match", + "MatchSingleton": "match", + "MatchStar": "match", + "MatchValue": "match", + "Module": "module", + "FunctionType": "module", + "Interactive": "module", + "Statements": "statements", + "AnnAssign": "statements", + "Assert": "statements", + "Assign": "statements", + "Delete": "statements", + "Pass": "statements", + "Raise": "statements", + "Import": "statements.import", + "ImportFrom": "statements.importfrom", + "Variable": "variables", + "Name": "variables" + }, + "labels": { + "unknown": "u", + "async": "@", + "async.for": "@for", + "async.with": "@wi", + "async.function": "@Fn", + "control": "c", + "control.cond": "cc", + "control.cond.loop": "ccl", + "control.break": "brk", + "control.continue": "cont", + "control.loop.for": "for", + "control.cond.if": "if", + "control.try": "try", + "control.loop.while": "wh", + "control.loop.with": "wi", + "def": "d", + "def.class": "Cl", + "def.function": "Fn", + "def.global": "gl", + "def.nonlocal": "nl", + "deprecated": "x", + "expr": "e", + "expr.subscript": "sbscpt", + "expr.comp": "comp", + "literals": "l", + "literals.constant": "const", + "literals.dict": "dict", + "match": "mat", + "module": "Mod", + "statements": "s", + "statements.import": "I", + "statements.importfrom": "IF", + "variables": "var" + }, + "alphas": { + "unknown": 0.3, + "async": 0.3, + "async.for": 0.3, + "async.with": 0.3, + "async.function": 0.3, + "control": 0.3, + "control.cond": 0.3, + "control.cond.loop": 0.3, + "control.break": 0.3, + "control.continue": 0.3, + "control.loop.for": 0.3, + "control.cond.if": 0.3, + "control.try": 0.3, + "control.loop.while": 0.3, + "control.loop.with": 0.3, + "def": 0.3, + "def.class": 0.3, + "def.function": 0.3, + "def.global": 0.3, + "def.nonlocal": 0.3, + "deprecated": 0.3, + "expr": 0.5, + "expr.subscript": 0.5, + "expr.comp": 0.5, + "literals": 0.3, + "literals.constant": 0.3, + "literals.dict": 0.3, + "match": 0.3, + "module": 0.5, + "statements": 0.3, + "statements.import": 0.3, + "statements.importfrom": 0.3, + "variables": 0.5 + }, + "sizes": { + "unknown": 400, + "async": 400, + "async.for": 400, + "async.with": 400, + "async.function": 800, + "control": 400, + "control.cond": 400, + "control.cond.loop": 400, + "control.break": 400, + "control.continue": 400, + "control.loop.for": 400, + "control.cond.if": 400, + "control.try": 400, + "control.loop.while": 400, + "control.loop.with": 400, + "def": 400, + "def.class": 1000, + "def.function": 800, + "def.global": 600, + "def.nonlocal": 400, + "deprecated": 400, + "expr": 400, + "expr.subscript": 400, + "expr.comp": 400, + "literals": 400, + "literals.constant": 600, + "literals.dict": 400, + "match": 400, + "module": 1000, + "statements": 400, + "statements.import": 800, + "statements.importfrom": 800, + "variables": 400 + }, + "shapes": { + "unknown": "o", + "async": "^", + "async.for": "^", + "async.with": "^", + "async.function": "^", + "control": "s", + "control.cond": "s", + "control.cond.loop": "s", + "control.break": "s", + "control.continue": "s", + "control.loop.for": "s", + "control.cond.if": "s", + "control.try": "s", + "control.loop.while": "s", + "control.loop.with": "s", + "def": "H", + "def.class": "H", + "def.function": "H", + "def.global": "H", + "def.nonlocal": "H", + "deprecated": "x", + "expr": "<", + "expr.subscript": "<", + "expr.comp": "<", + "literals": ">", + "literals.constant": ">", + "literals.dict": ">", + "match": "D", + "module": "s", + "statements": "d", + "statements.import": "d", + "statements.importfrom": "d", + "variables": "o" + }, + "colors": { + "unknown": "gray", + "async": "pink", + "async.for": "pink", + "async.with": "pink", + "async.function": "pink", + "control": "orange", + "control.cond": "green", + "control.cond.loop": "maroon", + "control.break": "violet", + "control.continue": "violet", + "control.loop.for": "maroon", + "control.cond.if": "green", + "control.try": "green", + "control.loop.while": "maroon", + "control.loop.with": "maroon", + "def": "purple", + "def.class": "purple", + "def.function": "purple", + "def.global": "purple", + "def.nonlocal": "purple", + "deprecated": "red", + "expr": "khaki", + "expr.subscript": "khaki", + "expr.comp": "khaki", + "literals": "gold", + "literals.constant": "gold", + "literals.dict": "gold", + "match": "salmon", + "module": "red", + "statements": "blue", + "statements.import": "lightblue", + "statements.importfrom": "darkblue", + "variables": "skyblue" + } +} diff --git a/src/codecarto/containers/processor/src/plotter/palette.py b/src/codecarto/containers/processor/src/plotter/palette.py new file mode 100644 index 0000000..7b00deb --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/palette.py @@ -0,0 +1,145 @@ +from pydantic import BaseModel + +# this is the default palette in the package +# when containerized, we'll be looking in the container's /app directory +# can see DockerFile for more info +default_palette_path: str = "src/plotter/default_palette.json" + + +class Theme(BaseModel): + node_type: str + base: str + label: str + shape: str + color: str + size: str + alpha: str + + +class Palette: + """A class to manage the plotter palette.""" + + def __init__(self, palette: dict = None): + """Initialize a palette. + + Parameters: + ----------- + palette : dict (optional) (default=None) + A dictionary containing the data of the palette to initialize. + """ + if palette and isinstance(palette, dict) and palette != {}: + self.palette: dict[str, dict] = palette + else: + from json import load + + # load the default palette from package + with open(default_palette_path, "r") as f: + self.palette: dict[str, dict] = load(f) + + def create_new_theme( + self, + node_type: str, + base: str, + label: str, + shape: str, + color: str, + size: float, + alpha: float, + ) -> dict: + """Create a new theme with the specified parameters. + + Parameters: + ----------- + node_type : str + The type of node for which to create a new theme. + label : str + The label of the nodes in the new theme. + shape : str + The shape of the nodes in the new theme. + color : str + The color of the nodes in the new theme. + size : float + The size of the nodes in the new theme. + alpha : float + The alpha (transparency) value of the nodes in the new theme. + + Returns: + -------- + dict + The palette. + """ + _base: str = base + + # check if node type already exists + if node_type in self.palette["bases"].keys(): + # update existing base for the node type + _base = self.palette["bases"][node_type] + else: + # add new node type to palette + self.palette["bases"][node_type] = _base + + # save base attrs to palette file + self.palette["labels"][_base] = label + self.palette["shapes"][_base] = shape + self.palette["colors"][_base] = color + self.palette["sizes"][_base] = size + self.palette["alphas"][_base] = alpha + + # return the palette + return self.palette + + def get_node_styles(self, type: str = None) -> dict: + """Get the styles for all node types. + + Parameters: + ----------- + type : str (optional) (default=None) + If specified, only the style for the specified node type will be returned. + + Returns: + -------- + dict[node_type(str), styles(dict)] + A dictionary containing the styles for all node types, or a specfied type. + """ + if type: + _base = self.palette["bases"][type] + return { + type: { + "base": _base, + "label": self.palette["labels"][_base], + "shape": self.palette["shapes"][_base], + "color": self.palette["colors"][_base], + "size": self.palette["sizes"][_base], + "alpha": self.palette["alphas"][_base], + } + } + else: + styles = {} + for node_type in self.palette["bases"].keys(): + _base = self.palette["bases"][node_type] + styles[node_type] = { + "base": _base, + "label": self.palette["labels"][_base], + "shape": self.palette["shapes"][_base], + "color": self.palette["colors"][_base], + "size": self.palette["sizes"][_base], + "alpha": self.palette["alphas"][_base], + } + return styles + + def get_palette_data(self) -> dict[str, dict]: + """Get the data of the current palette. + + Returns: + -------- + dict + A dictionary containing the data of the current palette. + """ + return { + "bases": self.palette["bases"], + "labels": self.palette["labels"], + "shapes": self.palette["shapes"], + "colors": self.palette["colors"], + "sizes": self.palette["sizes"], + "alphas": self.palette["alphas"], + } diff --git a/src/codecarto/containers/processor/src/plotter/plotter.py b/src/codecarto/containers/processor/src/plotter/plotter.py new file mode 100644 index 0000000..f33a91f --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/plotter.py @@ -0,0 +1,401 @@ +import math +import matplotlib.lines as mlines +import matplotlib.pyplot as plt +import mpld3 +import networkx as nx +import random + + +class Plotter: + def __init__( + self, + graph: nx.DiGraph = None, + labels: bool = False, + grid: bool = False, + ntx_layouts: bool = True, + custom_layouts: bool = True, + palette_dict: dict = None, + ): + """Plots a graph using matplotlib and outputs the plots to the output directory. + + Parameters: + ----------- + graph (networkx.DiGraph) Default = None: + The graph to plot. + labels (bool) Default = False: + Whether or not to show the labels. + grid (bool) Default = False: + Whether or not to plot all layouts in a grid. + ntx_layouts (bool) Default = True: + Whether or not to include networkx layouts. + custom_layouts (bool) Default = True: + Whether or not to include custom layouts. + palette_dict (dict) Default = None: + The palette to use. + """ + from .palette import Palette + from .positions import Positions + + if not graph: + raise ValueError("No graph provided.") + self.graph: nx.DiGraph = graph + self.labels: bool = labels + self.grid: bool = grid + self.seed: dict[str, int] = {} # layout_name: seed + self.ntx_layouts: bool = ntx_layouts + self.custom_layouts: bool = custom_layouts + self.layouts: list[dict] = Positions( + self.ntx_layouts, self.custom_layouts + ).get_layouts() + self.palette_dict: dict = palette_dict + # if palette_dict is None uses default in Palette + self.palette: Palette = Palette(palette_dict) + self.node_styles: dict = self.palette.get_node_styles() + self.unique_node_types: set = set() + self.node_data: dict[str, list] = self.set_node_data() + + def set_plotter_attrs( + self, + graph: nx.DiGraph = None, + labels: bool = False, + grid: bool = False, + ntx_layouts: bool = True, + custom_layouts: bool = True, + palette_dict: dict = None, + ): + """Change the attributes of an already initiated Plotter object. + + Parameters: + ----------- + graph (networkx.DiGraph): + The graph to plot. + labels (bool): + Whether or not to show the labels. + grid (bool): + Whether or not to show the grid. + ntx_layouts (bool): + Whether or not to save the plot to a networkx file. + custom_layouts (bool): + Whether or not to save the plot to a custom file. + palette_dict (dict): + The palette to use. + """ + from .palette import Palette + from .positions import Positions, Layout + + # Graph + if not graph and not self.graph: + raise ValueError("No graph provided.") + self.graph: nx.DiGraph = graph if graph is not None else self.graph + + # Bools + self.labels: bool = labels if labels is not None else self.labels + self.grid: bool = grid if grid is not None else self.grid + + # Layouts + self.seed: dict[str, int] = {} # layout_name: seed + self.ntx_layouts: bool = ( + ntx_layouts if ntx_layouts is not None else self.ntx_layouts + ) + self.custom_layouts: bool = ( + custom_layouts if custom_layouts is not None else self.custom_layouts + ) + self.layouts: list[dict] = Positions( + self.ntx_layouts, self.custom_layouts + ).get_layouts() + + # Palette + self.palette_dict: dict = ( + palette_dict if palette_dict is not None else self.palette_dict + ) + self.palette: Palette = Palette(palette_dict) + self.node_styles: dict = self.palette.get_node_styles() + self.unique_node_types: set = set() + self.node_data: dict[str, list] = self.set_node_data() + + def plot( + self, + graph: nx.DiGraph = None, + layout_name: str = "", + grid: bool = False, + ) -> list[str]: + """Plots a graph using matplotlib. + + Parameters: + ----------- + graph (networkx.DiGraph) Default = None: + The graph to plot. + Can use this to overwrite the Plotter object's graph attribute. + layout_name (str) Default = "": + The name of the layout to use. Will return only the specified layout. + Leave blank to return all layouts. + grid (bool) Default = False: + Whether or not to plot all layouts in a grid. Will return only the grid plot with all layouts. + Can use this to overwrite the Plotter object's grid attribute. + + Returns: + -------- + plots (list[str]): + The plots as html strings. + """ + # Check graph + if graph and isinstance(graph, nx.DiGraph): + self.graph = graph + elif not self.graph or not isinstance(self.graph, nx.DiGraph): + raise ValueError("No graph provided or is not valid.") + + # Check layout + if layout_name != "": + try: + from .positions import Positions, LayoutType + + # set self.layouts to the specified layout + # will make it so we only plot the specified layout + position: Positions = Positions() + _layout: LayoutType = position.get_layout(layout_name) + self.layouts = [_layout] + except: + raise ValueError("Layout does not exist.") + + # Start the plotting + # If grid, setup the figure and axes before loop + self.grid = grid + if self.grid: + num_layouts = len(self.layouts) + grid_size = math.ceil(math.sqrt(num_layouts)) + figwh = (5, 5) + figs = (figwh[0] * grid_size, figwh[1] * grid_size) + + fig, axs = plt.subplots( + grid_size, + grid_size, + figsize=figs, + ) + fig.set_size_inches(18.5, 9.5) # TODO: try to size in css + + idx: int = 0 + ax: plt.Axes = None + plots: list[str] = [] + # loop through self.layouts list of dictionaries + for layout in self.layouts: + layout_name = layout["name"] + # Set up the ax for the figure + if self.grid: + # Figure has been created already + # Get the ax for the subplot in the figure + ax = axs[idx // grid_size, idx % grid_size] if grid_size > 1 else axs + idx += 1 + else: + # Only one plot per figure + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(5, 5)) + ax.axis("off") + ax.set_title( + f"{str(layout_name).replace('_layout', '').capitalize()} Layout" + ) + + # Get node positions for layout + pos: dict = self.get_node_positions(layout_name) + + # Draw the plot + self.draw_plot(ax, pos) + + # If not a grid, append the plot to the list + if not self.grid: + plt.tight_layout() + plot_html: str = mpld3.fig_to_html(fig) + plots.append(plot_html) + plt.close() + + # If grid, append the one plot to the list + if self.grid: + plt.tight_layout() + plot_html = mpld3.fig_to_html( + fig, + template_type="simple", + figid="figid", + d3_url=None, + no_extras=False, + use_http=False, + include_libraries=True, + ) + plots.append(plot_html) + plt.close() + + return plots + + def plotting_progress(self) -> float: + """Calculates the progress of the current plotting. + + Returns: + -------- + progress (float): + The progress of the current plotting. + """ + # TODO: could be used to return the percentage of the plotting progress + # would need to track position during plot and compare against a total + # these would need to be established before plotting + + # progress_total = self.calculate_progress_total() + pass + + def calculate_progress_total(self) -> float: + """Calculates the total number of steps needed to plot the graph. + + Returns: + -------- + total (float): + The total number of steps. + """ + # TODO: calc total based on number of layouts and number of nodes + # and the type of layouts, some layous need to loop graph a second time + # also if labels are involved, if edges are involved, legend, etc. + pass + + def get_node_positions(self, layout_name: str) -> dict: + """Gets the node positions for a given layout. + + Parameters: + ----------- + layout_name (str): + The name of the layout. + + Returns: + -------- + positions (dict): + The positions of nodes for layout. + """ + from .positions import Positions + + position = Positions( + include_networkx=self.ntx_layouts, + include_custom=self.custom_layouts, + ) + seed = -1 + layout_params = position.get_layout_params(layout_name) + layout_kwargs = {"G": self.graph} + for param in layout_params: + if param == "seed": + seed = random.randint(0, 1000) + layout_kwargs["seed"] = seed + elif param == "nshells" and layout_name == "shell_layout": + # Group nodes by parent + grouped_nodes: dict[str, list] = {} + for node, data in self.graph.nodes(data=True): + parent = data.get("parent", "Unknown") + if parent not in grouped_nodes: + grouped_nodes[parent] = [] + grouped_nodes[parent].append(node) + # Create the list of lists (shells) + shells = list(grouped_nodes.values()) + layout_kwargs["nshells"] = shells + elif param == "root" and layout_name == "cluster_layout": + # get the node at the very top + root = None + for node, data in self.graph.nodes(data=True): + if data.get("label", "") == "root": + root = node + break + layout_kwargs["root"] = root + elif param != "G": + # TODO: Handle other parameters here + pass + + # Compute layout positions + pos: dict = {} + try: + pos = position.get_positions(layout_name, **layout_kwargs) + except Exception as e: + # TODO: log an error in the database to be displayed to the user + raise e + + return pos + + def set_node_data(self) -> dict: + """Sets the node data for the Plotter object.""" + # Collect the node data and unique node types + self.node_data = {node_type: [] for node_type in self.node_styles.keys()} + self.unique_node_types = set() # clear the set + + for n, a in self.graph.nodes(data=True): + node_type = a.get("type", "Unknown") # if no type, set to Unknown + if node_type and (node_type not in self.unique_node_types): + # could be duplicates in self.node_styles + self.unique_node_types.add(node_type) + if node_type not in self.node_styles.keys(): + node_type = "Unknown" + self.node_data[node_type].append(n) + + def draw_plot(self, ax: plt.Axes, pos: dict): + """Draws the nodes, edges, labels and legend. + + Parameters: + ----------- + ax (plt.Axes): + The axes to draw on. + pos (dict): + The positions of the nodes. + """ + # Draw nodes with different attrs + for node_type, nodes in self.node_data.items(): + nx.drawing.draw_networkx_nodes( + self.graph, + pos, + nodelist=nodes, + node_color=self.node_styles[node_type]["color"], + node_shape=self.node_styles[node_type]["shape"], + node_size=self.node_styles[node_type]["size"], + alpha=self.node_styles[node_type]["alpha"], + ax=ax, + ) + + # Draw edges and labels + nx.drawing.draw_networkx_edges(self.graph, pos, alpha=0.2, ax=ax) + if self.labels: + nx.drawing.draw_networkx_labels( + self.graph, + pos, + labels=nx.classes.get_node_attributes(self.graph, "label"), + font_size=10, + font_family="sans-serif", + ax=ax, + ) + + # Create legend + _colors: dict = {} + _shapes: dict = {} + for node_type in self.unique_node_types: + _colors[node_type] = self.node_styles[node_type]["color"] + _shapes[node_type] = self.node_styles[node_type]["shape"] + legend_elements = [ + mlines.Line2D( + [0], + [0], + color=color, + marker=shape, + linestyle="None", + markersize=10, + label=theme, + ) + for theme, color, shape in zip(_colors, _colors.values(), _shapes.values()) + ] + ax.legend(handles=legend_elements, loc="upper right", fontsize=10) + + +########## OPTIONAL ########## +# in the plot() function after creating pos, can use this to center the main and plot nodes + +# Center the main and plot nodes +# import numpy as np +# x_center = ( +# max(x for x, _ in pos.values()) + min(x for x, _ in pos.values()) +# ) / 2 +# y_center = ( +# max(y for _, y in pos.values()) + min(y for _, y in pos.values()) +# ) / 2 + +# if "main" in pos: +# print("main") +# pos["main"] = np.array([x_center, y_center + 0.2]) +# if "plot" in pos: +# print("plot") +# pos["plot"] = np.array([x_center, y_center]) diff --git a/src/codecarto/containers/processor/src/plotter/positions.py b/src/codecarto/containers/processor/src/plotter/positions.py new file mode 100644 index 0000000..a76b5df --- /dev/null +++ b/src/codecarto/containers/processor/src/plotter/positions.py @@ -0,0 +1,179 @@ +import networkx as nx +from typing import Callable + + +class LayoutType(dict): + name: str + func: Callable + params: list[str] + + +class Positions: + def __init__(self, include_networkx: bool = True, include_custom: bool = True): + """Constructor for Layouts + + Parameters + ---------- + layouts : tuple(str,function,list) + A tuple of layout_names, the layout_function, and their attributes + """ + self._layouts: list[LayoutType] = [] + if include_networkx: + self.add_networkx_layouts() + if include_custom: + self.add_custom_layouts() + + def add_layout(self, name: str, layout: Callable, attr: list) -> None: + """Add a layout to the list of available layouts + + Parameters + ---------- + name : str + The name of the layout + layout : function + The layout function + attr : list + The attributes of the layout + """ + self._layouts.append( + { + "name": name, + "func": layout, + "params": attr, + } + ) + + def add_networkx_layouts(self) -> None: + """Add all networkx layouts to the list of available layouts""" + self.add_layout( + "spring_layout", + nx.layout.spring_layout, + ["graph", "seed"], + ) + self.add_layout("spiral_layout", nx.layout.spiral_layout, ["graph"]) + self.add_layout("circular_layout", nx.layout.circular_layout, ["graph"]) + self.add_layout("random_layout", nx.layout.random_layout, ["graph", "seed"]) + self.add_layout("spectral_layout", nx.layout.spectral_layout, ["graph"]) + self.add_layout("shell_layout", nx.layout.shell_layout, ["graph", "nshells"]) + # self.add_layout("planar_layout", nx.layout.planar_layout, ["graph"]) + + def add_custom_layouts(self) -> None: + """Add all custom layouts to the list of available layouts""" + from .custom_layouts.sorted_square_layout import sorted_square_layout + + self.add_layout("sorted_square_layout", sorted_square_layout, ["graph"]) + + # from .custom_layouts.cluster_layout import cluster_layout + # self.add_layout("cluster_layout", cluster_layout, ["graph", "root"]) + + def get_layout_names(self) -> list: + """Get all layout names from the list of available layouts + + Returns + ------- + list + The name of available layouts + """ + return [layout["name"] for layout in self._layouts] + + def get_layouts(self) -> list: + """Get all layouts with their attributes from the list of available layouts + + Returns + ------- + list[LayoutType]: + The layouts with their attributes + """ + return self._layouts + + def get_layout(self, name: str) -> LayoutType: + """Get a layout from the list of available layouts + + Parameters + ---------- + name : str + The name of the layout + + Returns + ------- + dict (LayoutType): + The layout with its attributes + """ + # Check if the provided name in list (_layouts: list[LayoutType]) + for layout in self._layouts: + if layout["name"] == name: + return layout + # if here then layout not found + raise ValueError(f"Layout {name} does not exist") + + def get_layout_params(self, name: str) -> list: + """Get the parameters of a layout from the list of available layouts + + Parameters + ---------- + name : str + The name of the layout + + Returns + ------- + list + The parameters of the layout + """ + # Check if the provided name in list (_layouts: list[LayoutType]) + for layout in self._layouts: + if layout["name"] == name: + return layout["params"] + + # if here then layout not found + raise ValueError(f"Layout {name} does not exist") + + def get_positions(self, name: str, seed: int = -1, **kwargs) -> dict: + """Get a positions from the list of available layouts + + Parameters + ---------- + name : str + The name of the layout + seed : int (optional, default=-1) + The seed to use for the layout + **kwargs : dict + The attributes of the layout + + Returns + ------- + dict + The positions of the layout + """ + _graph: nx.Graph = kwargs.get("G", None) + # get the layout function + layout_func: Callable = None + layout_params: list = None + for layout in self._layouts: + if layout["name"] == name: + layout_func = layout["func"] + layout_params = layout["params"] + break + layout_kwargs: dict = {} + for param in layout_params: + if param == "seed" and seed != -1: + # Set the seed if it is not -1 + layout_kwargs["seed"] = seed + elif param == "nshells" and name == "shell_layout": + # Group nodes by parent + if "G" not in kwargs: + grouped_nodes: dict[str, list] = {} + for node, data in kwargs["G"].nodes(data=True): + parent = data.get("parent", "Unknown") + if parent not in grouped_nodes: + grouped_nodes[parent] = [] + grouped_nodes[parent].append(node) + # Create the list of lists (shells) + shells = list(grouped_nodes.values()) + layout_kwargs["nshells"] = shells + elif param == "root" and name == "cluster_layout": + # Set the root node + layout_kwargs["root"] = kwargs["root"] + elif param != "G": + # TODO Handle other parameters here + pass + return layout_func(G=_graph, **layout_kwargs) diff --git a/src/codecarto/containers/processor/src/polygraph/__init__.py b/src/codecarto/containers/processor/src/polygraph/__init__.py new file mode 100644 index 0000000..aa7b332 --- /dev/null +++ b/src/codecarto/containers/processor/src/polygraph/__init__.py @@ -0,0 +1,2 @@ +# Polygraph folder holds the logic for the polygraph module +# as well as logic around conversions and data manipulation. diff --git a/src/codecarto/containers/processor/src/polygraph/polygraph.py b/src/codecarto/containers/processor/src/polygraph/polygraph.py new file mode 100644 index 0000000..0b3eb7c --- /dev/null +++ b/src/codecarto/containers/processor/src/polygraph/polygraph.py @@ -0,0 +1,207 @@ +import networkx as nx +from ..models.graph_data import GraphData + + +class PolyGraph: + """A class used to convert data types to a networkx graph and vice versa.""" + + def graph_to_json_file(self, graph: GraphData, json_path: str) -> dict: + """Converts a networkx graph to a JSON object. + + Parameters: + ----------- + graph (GraphData): The graph to convert. + json_path (str): The path to save the JSON file to. + + Returns: + -------- + dict: The JSON object. + """ + # Validate inputs + if graph is None: + raise ValueError("No graph provided.") + if json_path is None or json_path == "": + raise ValueError("No json_path provided.") + + # Convert the graph to a JSON object and save it to a file + json_data = self.graph_to_json_data(graph) + return json_data + + def json_file_to_graph(self, json_file: str) -> nx.DiGraph: + """Converts a JSON object to a networkx graph. + + Parameters: + ----------- + json_file (str): The path to the JSON file to load. + + Returns: + -------- + nx.DiGraph: The networkx graph. + """ + import os + import json + + # Validate inputs + if json_file is None or json_file == "": + raise ValueError("No json_file provided.") + + # Check if file exists + if not os.path.exists(json_file): + print(f"File {json_file} not found.") + raise FileNotFoundError(f"File not found: {json_file}") + + # Load the JSON file and convert it to a graph + try: + with open(json_file, "r") as f: + graph_data = json.load(f) + except json.JSONDecodeError as e: + print(f"Failed to load data from {json_file}.") + raise e + + return self.json_data_to_graph(graph_data) + + def graph_to_json_data(self, graph: GraphData) -> dict: + """Converts a networkx graph to a JSON object. + + Parameters: + ----------- + graph (GraphData): The graph to convert. + + Returns: + -------- + dict: The JSON object. + """ + from ..plotter.palette import Palette + + # Validate inputs + if graph is None: + raise ValueError("No graph provided.") + if not isinstance(graph, nx.DiGraph): + try: + graph: nx.DiGraph = self.graphdata_to_nx(graph) + except: + raise ValueError("'graph' must be formatted as a GraphData object.") + + # Create the JSON object + graph_data: dict[str, dict[str, dict[str, list]]] = {"nodes": {}, "edges": {}} + + # Create all node objects + node_styles = Palette().get_node_styles() + for node_id, data in graph.nodes.data(True): + node_type = data.get("type", "Unknown") + if node_type not in node_styles.keys(): + node_type = "Unknown" + + node_obj = { + "id": node_id, + "type": node_type, + "label": data.get("label", node_id), + "base": data.get("base", "unknown"), + "parent": data.get("parent"), + "children": [], + "edges": [], + } + graph_data["nodes"][node_id] = node_obj + + # Link parent and child nodes together + for node_id, node_obj in graph_data["nodes"].items(): + parent_id = node_obj["parent"] + if parent_id and parent_id in graph_data["nodes"]: + graph_data["nodes"][parent_id]["children"].append(node_obj) + + # Create edge objects and link them to their source nodes + for edge_id, (source, target) in enumerate(graph.edges()): + if source not in graph_data["nodes"] or target not in graph_data["nodes"]: + continue + source_node: dict[str, list] = graph_data["nodes"][source] + target_node: dict[str, list] = graph_data["nodes"][target] + + edge_obj = { + "id": edge_id, + "type": "edge", + "source": source_node["id"], + "target": target_node["id"], + } + graph_data["edges"][edge_id] = edge_obj + source_node["edges"].append(edge_obj) + + # # Clean out any graph_data["nodes"] that have parents + # for node_id, node_obj in list(graph_data["nodes"].items()): + # if node_obj["parent"]: + # del graph_data["nodes"][node_id] + + return graph_data + + def json_data_to_graph(self, json_data: dict[str, dict]) -> nx.DiGraph: + """Converts a JSON object to a networkx graph. + + Parameters: + ----------- + json_data (dict): The JSON object to convert. + + Returns: + -------- + networkx.classes.graph.DiGraph: The graph. + """ + + # Validate inputs + if json_data is None: + raise ValueError("No json provided.") + + # Create the graph + graph = nx.DiGraph() + + def add_node_and_children(node_id, node_obj): + # Recursively add children + graph.add_node( + node_id, + type=node_obj["type"], + label=node_obj["label"], + base=node_obj["base"], + parent=node_obj["parent"], + ) + for child_obj in node_obj["children"]: + child_id = child_obj["id"] + add_node_and_children(child_id, child_obj) + + # Add nodes and their children to the graph + for node_id, node_obj in json_data["nodes"].items(): + add_node_and_children(node_id, node_obj) + + # Add edges to the graph + for edge_id, edge_obj in json_data["edges"].items(): + graph.add_edge(edge_obj["source"], edge_obj["target"]) + + return graph + + def graphdata_to_nx(graph_data: GraphData) -> nx.DiGraph: + """Converts a GraphData object to a networkx graph. + + Parameters: + ----------- + graph_data (GraphData): The GraphData object to convert. + + Returns: + -------- + networkx.classes.graph.Graph: The graph. + """ + + # Validate inputs + if graph_data is None: + raise ValueError("No graph provided.") + + # Create the graph + try: + G = nx.DiGraph() + + # Add nodes to the graph + for node_id, node in graph_data.nodes.items(): + G.add_node(node_id, label=node.label, type=node.type, base=node.base) + + # Add edges to the graph + for edge_id, edge in graph_data.edges.items(): + G.add_edge(edge.source, edge.target, id=edge_id, type=edge.type) + + return G + except: + raise ValueError("'graph' must be formatted as a GraphData object.") diff --git a/src/codecarto/containers/processor/src/processor.py b/src/codecarto/containers/processor/src/processor.py new file mode 100644 index 0000000..a7196cd --- /dev/null +++ b/src/codecarto/containers/processor/src/processor.py @@ -0,0 +1,249 @@ +################################################################### +# THIS IS A COPY OF 'LOCAL' PROCESSOR.PY +# THIS IS JUST USED TO TEST PLOTTING AND JSON CONVERSION IN THE API +################################################################### + +previous_output_dir: str = "" + + +def get_config_data() -> dict: + pass + + +def create_output_dirs() -> dict: + pass + + +def process( + file_path: str = __file__, + from_api: bool = False, + single_file: bool = False, + plot: bool = True, + json: bool = False, + labels: bool = False, + grid: bool = False, + show_plot: bool = False, + output_dir: str = "", +) -> dict | None: + """Parses the source code, creates a graph, creates a plot, creates a json file, and outputs the results. + + Parameters: + ----------- + config (Config) - Default: None + The code cartographer config object. + file_path (str) - Default: __file__ + The path to the file to be analyzed. + from_api (bool) - Default: False + Whether the process is being run from the API. + single_file (bool) - Default: False + Whether to analyze a single file. + plot (bool) - Default: True + Whether to plot the graph. + json (bool) - Default: False + Whether to create a json file of the graph. + labels (bool) - Default: False + Whether to label the nodes of the graph. + grid (bool) - Default: False + Whether to add a grid to the graph. + show_plot (bool) - Default: False + Whether to show the graph. + output_dir (str) - Default: "" + The path to the output directory. + + Returns: + -------- + dict | None + If called from the API dict is json object of the graph.\n + If called locally dict is the paths to the output directory. + 'version': + the runtime version of the process. + 'output_dir': + the path to the output directory. + 'version_dir': + the path to the output/version directory. + 'graph_dir': + the path to the output/graph directory. + 'graph_code_dir': + the path to the output/graph/from_code directory. + 'graph_json_dir': + the path to the output/graph/from_json directory. + 'json_dir': + the path to the output/json directory. + 'json_graph_file_path': + the path to the output/json/graph.json file. + """ + from .parser.parser import Parser + from .parser.import_source_dir import get_all_source_files + + config_data: dict = get_config_data() + config_path: str = config_data["config_path"] + + # TODO: Should we do a progress bar for all of these?? + # If we do, we'll need to calculate the + + ############# SETUP OUTPUT ############# + if not from_api: + print_status(f"\nCodeCartographer:\nProcessing File:\n{file_path}", from_api) + # if the user provides an output directory, use it instead of the one in the config + # BE SURE TO CHANGE THE OUTPUT DIRECTORY IN THE CONFIG FILE BACK TO THE PREVIOUS ONE + if output_dir and output_dir != "": + previous_output_dir: str = config_data["output_dir"] + config_data["output_dir"] = output_dir + save_json(config_data, config_path) + + ############# PARSE THE CODE ############# + source_files: list[str] = [] + if single_file: + source_files = [file_path] + else: + source_files = get_all_source_files(file_path) + graph = Parser(source_files=source_files).graph + parse_msg: str = f"... {len(source_files)} source files parsed ...\n" + print_status(parse_msg, from_api) + + ############# PROCESS THE GRAPH ############# + return_data: dict = None + if graph and graph.number_of_nodes() > 0: + from .plotter.plotter import Plotter + from .polygraph.polygraph import PolyGraph + + # TODO: until we figure out how to return a plot through API, + # we won't do the plotting we'll just return the json file of the graph + pg: PolyGraph = PolyGraph() + if not from_api: + # Create the output directory + paths = create_output_dirs() + + # Plot the graph + if plot: + # Create the graph plotter, needs to be same + # Plotter for both to handle seed correctly + plot: Plotter = Plotter() + + # Set the plotter attributes + plot.set_plotter_attrs( + dirs=paths, + file_path=file_path, + labels=labels, + grid=grid, + json=False, + show_plot=show_plot, + single_file=single_file, + ntx_layouts=True, + custom_layouts=True, + ) + + # Plot the graph made from code + print_process_settings(plot, from_api) + print_status("", from_api) + if grid: + print_status("Plotting all layouts to a grid...", from_api) + else: + print_status("Plotting all layouts in separate files...", from_api) + print_status("Plotting Source Code Graph...\n", from_api) + plot.plot(graph) + print_status("Source Code Plots Saved...\n", from_api) + + # Create a json file of the graph + print_status("Converting Graph to JSON...", from_api) + json_graph_file = paths["json_file_path"] + pg.graph_to_json_file(graph, json_graph_file) + + # Create the json graph + if json: # this is asking if we should convert back from json to graph + print_status("Converting JSON back to Graph...", from_api) + json_graph = pg.json_file_to_graph(json_graph_file) + + # Plot the graph from json file + plot.json = True + if grid: + print_status("Plotting all layouts to a grid...", from_api) + else: + print_status( + "Plotting all layouts in separate files...", from_api + ) + print_status("Plotting JSON Graph...\n", from_api) + plot.plot(json_graph) + print_status("JSON Plots Saved...\n", from_api) + else: + # Create a json file of the graph + json_data: dict = pg.graph_to_json_data(graph) + json_file_path = paths["json_file_path"] + save_json(json_data, json_file_path) + + print_status("\nFinished!\n") + print_status(f"Output Directory:\n{paths['output_dir']}\n", from_api) + return_data = paths + else: + # TODO: this is just until we can figure out how to return a plot through API + # this is being run through the API, don't create a bunch of stuff on server + # just return the graph as json data + return_data = pg.graph_to_json_data(graph) + else: + if not from_api: + print_status("No graph to plot\n", from_api) + return_data = None + else: + raise ValueError("Graph was not able to be created from file.") + + # Change the output dir back to the previous one + if not from_api: # not needed for API + # BE SURE TO CHANGE THE OUTPUT DIRECTORY IN THE CONFIG FILE BACK TO THE PREVIOUS ONE + if output_dir and output_dir != "" and previous_output_dir != "": + config_data["output_dir"] = previous_output_dir + save_json(config_data, config_path) + + return return_data + + +def print_status(message: str = None, from_api: bool = False): + """Print the status of the code cartographer. + + Parameters: + ----------- + message (str) - Default: None + The message to print. + """ + # TODO: this function is for local use until we can figure out how to return status messages through API + if message and not from_api: + print(message) + + +def print_process_settings(plotter, from_api: bool = False): + """Print the settings for the process.""" + from .plotter.plotter import Plotter + + plot: Plotter = plotter + settings_msg: str = f"Plot Settings:\n" + settings_msg += f" Labels: {plot.labels}\n" + settings_msg += f" Grid: {plot.grid}\n" + settings_msg += f" JSON: {plot.json}\n" + settings_msg += f" Show Plot: {plot.show_plot}\n" + settings_msg += f" Single File: {plot.single_file}\n" + settings_msg += f" NTX Layouts: {plot.ntx_layouts}\n" + settings_msg += f" Custom Layouts: {plot.custom_layouts}\n" + print_status(settings_msg, from_api) + + +def save_json(json_data: dict, json_file_path: str): + import os + from json import JSONDecodeError, dump + + # Check if file exists + if not os.path.exists(os.path.dirname(json_file_path)): + os.makedirs(os.path.dirname(json_file_path), exist_ok=True) + + # Save data to file + try: + with open(json_file_path, "w") as f: + dump(json_data, f, indent=4) + except JSONDecodeError as e: + print(f"Failed to save data to {json_file_path}.") + raise e + + # Check if file was created + if os.path.exists(json_file_path): + return True + else: + print(f"File {json_file_path} was not created.") + return False diff --git a/src/codecarto/containers/web/Dockerfile b/src/codecarto/containers/web/Dockerfile new file mode 100644 index 0000000..cddb762 --- /dev/null +++ b/src/codecarto/containers/web/Dockerfile @@ -0,0 +1,28 @@ +FROM tiangolo/uvicorn-gunicorn:python3.11 + +EXPOSE 2000 + +# Keeps Python from generating .pyc files in the container +ENV PYTHONDONTWRITEBYTECODE=1 + +# Turns off buffering for easier container logging +ENV PYTHONUNBUFFERED=1 + +# Install pip requirements +COPY ./src/codecarto/containers/web/requirements.txt . +RUN python -m pip install -r requirements.txt + +# Directory +WORKDIR /app +COPY ./src/codecarto/containers/web/api /app/api +COPY ./src/codecarto/containers/web/src /app/src +ENV PYTHONPATH=/app + +# # Creates a non-root user with an explicit UID and adds permission to access the /app folder +# # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers +# RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app +# USER appuser + +# # During debugging, this entry point will be overridden. +# For more information, please refer to https://aka.ms/vscode-docker-python-debug +CMD ["gunicorn", "--bind", "0.0.0.0:2000", "-k", "uvicorn.workers.UvicornWorker", "api.main:app"] \ No newline at end of file diff --git a/src/codecarto/containers/web/__init__.py b/src/codecarto/containers/web/__init__.py new file mode 100644 index 0000000..ad249e3 --- /dev/null +++ b/src/codecarto/containers/web/__init__.py @@ -0,0 +1,71 @@ +# API folder holds logic for the API endpoints and routers. +# These are called through browsers or other applications. + +# These will do the MAIN functionality of the application alone. +# Things like config set up and output directories are not necessary +# since the server will have fixed output directories and configurations. + +# The main functions the API will have access to are: +# - Converting any input data to GraphData (PolyGraph) +# - Parsing source code to a GraphData object +# - Passing plot layouts and themes +# TODO: how will we save these on the server? +# Will need to have access to them when plotting +# could we somehow have layout and theme objects passed in to the plotter? +# - Plotting a GraphData object to an image +# - ? Analyzing the graph +# TODO: this part actually may be handled in TechOps) +# I can't think of anything else that the API will need to do. + + +################## Performance Metrics ########################## +# Performance Metrics: +# This includes timing how long it takes to process each request, how long it takes +# to parse each file, etc. This can help you find any performance bottlenecks in your code. +# Usage Metrics: +# This includes how often each endpoint is hit, how many files are uploaded, how large the +# files are, etc. This can help you understand how your API is being used and plan for scaling. +# Error Metrics: +# Track the number and type of errors that occur. This can help you identify the most common +# problems and prioritize fixes. + + +################## Abusive Request Protection ################### +# Rate limiting: +# This is to prevent a single user from overwhelming your server by sending +# too many requests in a short period of time. You can use the slowapi library +# to apply rate limiting in FastAPI. +# File Type Checks: +# You may want to validate the file type of uploaded files. This can prevent +# users from uploading potentially malicious files. +# Error Handling: +# Providing clear and user-friendly error messages can help users understand what +# went wrong if their request fails. However, be careful not to provide too much +# detail in error messages, as this could provide useful information to an attacker. +# Logging and Monitoring: +# Keeping logs of API usage can help you understand how your API is used and identify +# potential security issues. Monitoring API usage can help you identify unusual patterns +# that may indicate a security issue. +# Authentication and Authorization: +# Depending on your use case, you might want to require users to authenticate +# (log in) before they can use your API, and limit what each user is authorized +# to do based on their role or permissions. +# Input Sanitization: +# This involves cleaning the input to prevent injection attacks. This is especially +# important if you're passing the user's input to a command line operation, or using +# it to generate SQL queries, etc. +# Timeouts: +# If the parsing of the file takes too long, you may want to abort the operation +# and return an error. This can prevent a user from unintentionally overwhelming +# your server with a very complex file. You can set a timeout for requests at the +# server level. For example, if you're using uvicorn as your ASGI server, you can +# set the timeout like this: +# uvicorn main:app --timeout 30 # 30 seconds +# It means the server will automatically stop processing any request that takes +# longer than 30 seconds. +# Secure Transmission: +# If your API is accessible over the internet, you should enforce HTTPS to ensure that +# data is transmitted securely. You could use a HTTPS reverse proxy, such as Nginx or +# Apache, to handle the HTTPS part. Basically, you configure your server to handle HTTPS +# and then forward the requests to your FastAPI application. Another alternative would +# be using a cloud platform like AWS or GCP, they provide options to set up HTTPS. diff --git a/src/codecarto/containers/web/api/__init__.py b/src/codecarto/containers/web/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/web/api/main.py b/src/codecarto/containers/web/api/main.py new file mode 100644 index 0000000..3e048a5 --- /dev/null +++ b/src/codecarto/containers/web/api/main.py @@ -0,0 +1,43 @@ +from fastapi import FastAPI, Request, HTTPException +from fastapi.responses import JSONResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates + +from .routers.palette_router import PaletteRoute +from .routers.plotter_router import PlotterRoute +from .routers.parser_router import ParserRoute +from .routers.polygraph_router import PolyGraphRoute + +# Debug +import logging + +logging.basicConfig(level=logging.INFO) + +# Create the app +app = FastAPI() + +# Serve the static files +app.mount("/pages", StaticFiles(directory="src/pages"), name="pages") +pages = Jinja2Templates(directory="src/pages") + + +# Catch all exceptions +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + return JSONResponse( + status_code=exc.status_code, + content={"message": exc.detail}, + ) + + +# Root page +@app.get("/") +async def root(request: Request): + return pages.TemplateResponse("/home/home.html", {"request": request}) + + +# Add the routers +app.include_router(PaletteRoute, prefix="/palette", tags=["palette"]) +app.include_router(PlotterRoute, prefix="/plotter", tags=["plotter"]) +app.include_router(ParserRoute, prefix="/parser", tags=["parser"]) +app.include_router(PolyGraphRoute, prefix="/polygraph", tags=["polygraph"]) diff --git a/src/codecarto/containers/web/api/routers/__init__.py b/src/codecarto/containers/web/api/routers/__init__.py new file mode 100644 index 0000000..706e908 --- /dev/null +++ b/src/codecarto/containers/web/api/routers/__init__.py @@ -0,0 +1 @@ +# API Routers \ No newline at end of file diff --git a/src/codecarto/containers/web/api/routers/palette_router.py b/src/codecarto/containers/web/api/routers/palette_router.py new file mode 100644 index 0000000..7b43b14 --- /dev/null +++ b/src/codecarto/containers/web/api/routers/palette_router.py @@ -0,0 +1,123 @@ +import httpx +from fastapi import APIRouter, Request +from fastapi.templating import Jinja2Templates + +from api.util import generate_return, web_exception + +# Create a router +PaletteRoute: APIRouter = APIRouter() +pages = Jinja2Templates(directory="src/pages") +html_page = "/palette/palette.html" + +# Set the processor api url +PROC_API_URL = "http://processor:2020/palette" +PROC_API_GET_PALETTE = f"{PROC_API_URL}/get_palette" + + +# Root page +@PaletteRoute.get("/") +async def root(request: Request): + return pages.TemplateResponse(html_page, {"request": request}) + + +@PaletteRoute.get("/get_palette") +async def get_palette() -> dict: + async with httpx.AsyncClient() as client: + try: + response = await client.get(PROC_API_GET_PALETTE) + response.raise_for_status() + if not response.status_code == 200: + web_exception( + "get_palette", + "Could not fetch palette from processor", + {}, + ) + return response.json() + except Exception as exc: + error_message = exc.response.json().get("detail", str(exc)) + web_exception( + "get_palette", + "Error from processor", + {}, + exc, + proc_error=error_message, + ) + + +# @PaletteRoute.get("/set_palette") +# async def set_palette(palette_file_path: str) -> dict[str, str]: +# """Sets the palette to use for plots + +# Parameters: +# ----- +# palette_file_path (str): +# The path to the palette file. + +# Returns: +# -------- +# dict: +# The new palette data. +# """ +# from ....processor.plotter.palette import Palette + +# palette: Palette = Palette() +# palette.set_palette(palette_file_path) +# return palette.get_palette_data() + + +# @PaletteRoute.get("/reset_palette") +# async def reset_palette() -> dict[str, str]: +# """Resets the palette to the default. + +# Returns: +# -------- +# dict: +# The current palette data. +# """ +# from ....processor.plotter.palette import Palette + +# palette: Palette = Palette() +# palette.reset_palette() +# return palette.get_palette_data() + + +# @PaletteRoute.get("/add_theme") +# async def add_theme( +# node_type: str, +# base: str, +# label: str, +# shape: str, +# color: str, +# size: str, +# alpha: str, +# ) -> dict[str, str]: +# """Creates a new theme. + +# Parameters: +# ----- +# node_type (str): +# The type of node to create a theme for. +# base (str): +# The base color of the theme. +# label (str): +# The label color of the theme. +# shape (str): +# The shape color of the theme. +# color (str): +# The color color of the theme. +# size (str): +# The size color of the theme. +# alpha (str): +# The alpha color of the theme. + +# Returns: +# -------- +# dict: +# The current palette data. +# """ +# from ....processor.plotter.palette import Palette, Theme + +# theme = Theme(node_type, base, label, shape, color, size, alpha) +# palette: Palette = Palette() +# palette.create_new_theme(theme) +# return palette.get_palette_data() diff --git a/src/codecarto/containers/web/api/routers/parser_router.py b/src/codecarto/containers/web/api/routers/parser_router.py new file mode 100644 index 0000000..7ffbbf2 --- /dev/null +++ b/src/codecarto/containers/web/api/routers/parser_router.py @@ -0,0 +1,54 @@ +import httpx +from fastapi import APIRouter, Request +from fastapi.templating import Jinja2Templates + +from api.util import generate_return, web_exception + +# Create a router +ParserRoute: APIRouter = APIRouter() +pages = Jinja2Templates(directory="src/pages") +parse_html_page = "/parse/parse.html" + +# Set the processor api url +PROC_API_URL = "http://processor:2020/parser" +PROC_API_GITHUB_URL = f"{PROC_API_URL}/handle_github_url" + + +# Root page +@ParserRoute.get("/") +async def root(request: Request): + return pages.TemplateResponse(parse_html_page, {"request": request}) + + +@ParserRoute.get("/handle_github_url/") +async def handle_github_url(github_url: str) -> dict: + # Call the processor container + + # TODO: call the proc api to start it, will get a job id + # TODO: check the database every X secs on job id for results + # TODO: Temp work around to see if working + async with httpx.AsyncClient(timeout=60.0) as client: + try: + response = await client.get( + PROC_API_GITHUB_URL, + params={ + "github_url": github_url, + }, + ) + response.raise_for_status() + if not response.status_code == 200: + web_exception( + "handle_github_url", + "Could not fetch github contents from processor", + {"github_url": github_url}, + ) + return response.json() + except Exception as exc: + error_message = exc.response.json().get("detail", str(exc)) + web_exception( + "handle_github_url", + "Error from processor", + {"github_url": github_url}, + exc, + proc_error=error_message, + ) diff --git a/src/codecarto/containers/web/api/routers/plotter_router.py b/src/codecarto/containers/web/api/routers/plotter_router.py new file mode 100644 index 0000000..dc38e70 --- /dev/null +++ b/src/codecarto/containers/web/api/routers/plotter_router.py @@ -0,0 +1,107 @@ +import httpx +from fastapi import APIRouter, Request +from fastapi.templating import Jinja2Templates + +from api.util import generate_return, web_exception + +# Create a router +PlotterRoute: APIRouter = APIRouter() +pages = Jinja2Templates(directory="src/pages") +html_page = "/plot/plot.html" + +# Set the processor api url +PROC_API_URL = "http://processor:2020/plotter" +PROC_API_PLOT = f"{PROC_API_URL}/plot" + + +# Root page +@PlotterRoute.get("/") +async def root(request: Request, file_url: str = None): + if file_url and file_url != "": + return pages.TemplateResponse( + html_page, {"request": request, "file_url": file_url} + ) + else: + return pages.TemplateResponse(html_page, {"request": request}) + + +@PlotterRoute.get("/plot") +async def plot( + graph_data: dict = None, + file: str = None, + url: str = None, + layout: str = None, + grid: bool = False, + labels: bool = False, + ntx: bool = True, + custom: bool = True, + palette: dict = None, + debug: bool = False, +) -> dict: + """Plot a graph. + + Parameters: + ----------- + request : Request + The request object. + graph_data : dict + The graph data. JSON format. + file : str + The file to parse and plot. + url : str + The url to parse and plot. + layout : str + The name of the layout to plot. + Used to plot a single layout. + grid : bool + Whether to plot all plot layouts in a grid. + labels : bool + Whether to plot the graph with labels. + ntx : bool + Whether to use the networkx layouts. + custom : bool + Whether to use the custom layouts. + palette: dict + The palette to use for plotting. + debug: bool + Whether to run long process vs short process. + + Returns: + -------- + dict + The results of the plot. {index: plot html} + """ + # Call the processor container + async with httpx.AsyncClient(timeout=60.0) as client: + params: dict = { + "graph_data": graph_data, + "file": file, + "url": url, + "layout": layout, + "grid": grid, + "labels": labels, + "ntx": ntx, + "custom": custom, + "palette": palette, + "debug": debug, + } + + try: + response = await client.get(PROC_API_PLOT, params=params) + response.raise_for_status() + if not response.status_code == 200: + web_exception( + "plot", + "Could not fetch plot from processor", + params, + ) + return response.json() + except Exception as exc: + error_message = exc.response.json().get("detail", str(exc)) + web_exception( + "plot", + "Error from processor", + params, + exc, + proc_error=error_message, + ) diff --git a/src/codecarto/containers/web/api/routers/polygraph_router.py b/src/codecarto/containers/web/api/routers/polygraph_router.py new file mode 100644 index 0000000..441ef68 --- /dev/null +++ b/src/codecarto/containers/web/api/routers/polygraph_router.py @@ -0,0 +1,61 @@ +import httpx +from fastapi import APIRouter +from fastapi.templating import Jinja2Templates + +from api.util import generate_return, web_exception + +# Create a router +PolyGraphRoute: APIRouter = APIRouter() +pages = Jinja2Templates(directory="src/pages") +html_page = "/parse/parse.html" + +# Set the processor api url +PROC_API_URL = "http://processor:2020/polygraph" +PROC_API_GRAPH_DESC = f"{PROC_API_URL}/get_graph_desc" +PROC_API_RAW_TO_JSON = f"{PROC_API_URL}/raw_to_json" + + +@PolyGraphRoute.get("/get_graph_desc") +async def get_graph_desc() -> dict: + async with httpx.AsyncClient() as client: + try: + response = await client.get(PROC_API_GRAPH_DESC) + response.raise_for_status() + if not response.status_code == 200: + web_exception( + "get_graph_desc", + "Could not fetch graph description from processor", + ) + return response.json() + except Exception as exc: + error_message = exc.response.json().get("detail", str(exc)) + web_exception( + "get_graph_desc", + "Error from processor", + {}, + exc, + proc_error=error_message, + ) + + +@PolyGraphRoute.get("/raw_to_json") +async def raw_to_json(file_url: str) -> dict: + async with httpx.AsyncClient(timeout=60.0) as client: + try: + response = await client.get( + PROC_API_RAW_TO_JSON, + params={ + "file_url": file_url, + }, + ) + response.raise_for_status() + return response.json() + except Exception as exc: + error_message = exc.response.json().get("detail", str(exc)) + web_exception( + "raw_to_json", + "Error from processor", + {}, + exc, + proc_error=error_message, + ) diff --git a/src/codecarto/containers/web/api/util.py b/src/codecarto/containers/web/api/util.py new file mode 100644 index 0000000..2fb1be6 --- /dev/null +++ b/src/codecarto/containers/web/api/util.py @@ -0,0 +1,41 @@ +def generate_return(status: str, message: str, results: str): + return { + "status": status, # success or error + "message": message, # friendly message + "results": results, # the actual results or the error message + } + + +def web_exception( + called_from: str, + message: str, + params: dict = {}, + exc: Exception = None, + status: int = 500, + proc_error: str = "", +) -> dict: + import traceback + import logging + from fastapi import HTTPException + + # log the error and stack trace + error_message = f"Web.{called_from}() - status: {status} - param: {params} - message: {message} - proc_error: {proc_error}" + logger = logging.getLogger(__name__) + logger.error(error_message) + if exc: + error_message = f"{error_message} - exception: {str(exc)}" + tbk_str = traceback.format_exception(type(exc), exc, exc.__traceback__) + tbk_str = "".join(tbk_str) + logger.error(tbk_str) + + # raise the exception + if status == 404: + raise HTTPException( + status_code=404, + detail=error_message, + ) + else: + raise HTTPException( + status_code=500, + detail=error_message, + ) diff --git a/src/codecarto/containers/web/requirements.txt b/src/codecarto/containers/web/requirements.txt new file mode 100644 index 0000000..1c35db1 --- /dev/null +++ b/src/codecarto/containers/web/requirements.txt @@ -0,0 +1,5 @@ + # API/web dependencies +fastapi[all] +uvicorn[standard] +gunicorn +httpx diff --git a/src/codecarto/containers/web/src/__init__.py b/src/codecarto/containers/web/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/web/src/pages/__init__.py b/src/codecarto/containers/web/src/pages/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/codecarto/containers/web/src/pages/base.css b/src/codecarto/containers/web/src/pages/base.css new file mode 100644 index 0000000..09dae46 --- /dev/null +++ b/src/codecarto/containers/web/src/pages/base.css @@ -0,0 +1,103 @@ +/* site wide styles */ +body { + font-family: 'Calibri', sans-serif; + background-color: #2c221a; + color: #e1c48f; + font-size: 16px; +} +hr { + width: 75%; + border: 1px solid #e1c48f; + margin-left: auto; +} +/* selection lists */ +select { + height: 30px; + width: 200px; + background-color: #e5ca8f; + border: none; + border-radius: 5px; + margin: 5px; + outline: none; + color: rgb(24, 22, 21); + font-size: 16px; +} +select:focus, +select:hover, +option { + background-color: #e8dbad; +} +/* buttons */ +button { + height: 30px; + width: 200px; + background-color: #d7b073; + border: none; + border-radius: 5px; + margin: 5px; + outline: none; + color: rgb(24, 22, 21); + font-size: 16px; +} +button:hover { + background-color: #e8dbad; +} +button:disabled { + background-color: #b1987e; +} +/* collapsibles */ +button.collapsible { + height: 40px; + width: 300px; + border: none; + border-radius: 0px; + outline: none; + margin: 0px; + padding: 10px; + margin-top: 0; + background-color: #d7b073; + color: rgb(24, 22, 21); + font-size: 18px; + text-align: left; + cursor: pointer; +} +button.collapsible.active, +button.collapsible:hover { + background-color: #e8dbad; +} +div.content { + display: none; + width: 400px; + padding: 5px; + margin-bottom: -14px; + box-sizing: border-box; + border: solid 1px #e8dbad; + font-size: 16px; +} +div.content > div.content { + width: 300px; + margin-bottom: 5px; +} +ul { + list-style-type: none; +} + +/* load spinners */ +/* +.loader { + border: 16px solid #705e4b; + border-top: 16px solid #d7b073; + border-radius: 50%; + width: 80px; + height: 80px; + animation: spin 2s linear infinite; +} +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} +*/ diff --git a/src/codecarto/containers/web/src/pages/base.html b/src/codecarto/containers/web/src/pages/base.html new file mode 100644 index 0000000..889f6dd --- /dev/null +++ b/src/codecarto/containers/web/src/pages/base.html @@ -0,0 +1,18 @@ + + + + + + + CodeCartographer{% block title %}{% endblock %} + + + + + + + +

Code Cartographer

+ {% block content %} {% endblock %} + + diff --git a/src/codecarto/containers/web/src/pages/base.js b/src/codecarto/containers/web/src/pages/base.js new file mode 100644 index 0000000..73404ef --- /dev/null +++ b/src/codecarto/containers/web/src/pages/base.js @@ -0,0 +1,50 @@ +function displayError(elementId, message, consoleMessage) { + document.getElementById(elementId).innerHTML = message + if (consoleMessage) { + console.error(consoleMessage, message) + } +} + +function convertListToButtons(data) { + // Convert