From a0b2a19132ad094ba9f4cb5349d88cb13dbdf652 Mon Sep 17 00:00:00 2001 From: Shane Holloman Date: Sun, 24 Nov 2024 13:29:46 +1300 Subject: [PATCH] feat: added DocMaps feature --- .../summary.md => docs/codemapper-summary.md | 0 docs/pylintrc.md | 623 ++++++++++++++++++ {notes => docs}/pypi.md | 3 + docs/python-rules.md | 306 +++++++++ {notes => docs}/todo.md | 2 +- pyproject.toml | 2 +- src/codemapper/__init__.py | 2 +- src/codemapper/config.py | 12 +- src/codemapper/docmap.py | 150 +++++ src/codemapper/main.py | 29 +- src/codemapper/utils.py | 20 +- 11 files changed, 1135 insertions(+), 14 deletions(-) rename notes/summary.md => docs/codemapper-summary.md (100%) create mode 100644 docs/pylintrc.md rename {notes => docs}/pypi.md (91%) create mode 100644 docs/python-rules.md rename {notes => docs}/todo.md (97%) create mode 100644 src/codemapper/docmap.py diff --git a/notes/summary.md b/docs/codemapper-summary.md similarity index 100% rename from notes/summary.md rename to docs/codemapper-summary.md diff --git a/docs/pylintrc.md b/docs/pylintrc.md new file mode 100644 index 0000000..4bf4911 --- /dev/null +++ b/docs/pylintrc.md @@ -0,0 +1,623 @@ +# PyLintRC File + +```conf +[MAIN] + +### this regex can strip comments via vscode editor: ^\s*#.*$ + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.10 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, +bar, +baz, +toto, +tutu, +tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, +j, +k, +ex, +Run, +_ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +#typealias-rgx= + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, +__new__, +setUp, +asyncSetUp, +__post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, +CONTROL_FLOW, +INFERENCE, +INFERENCE_FAILURE, +UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, +bad-inline-option, +locally-disabled, +file-ignored, +suppressed-message, +useless-suppression, +deprecated-pragma, +use-symbolic-message-instead, +use-implicit-booleaness-not-comparison-to-string, +use-implicit-booleaness-not-comparison-to-zero + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, +XXX, +TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are: text, parseable, colorized, +# json2 (improved json format), json (old json format) and msvs (visual +# studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, +not-async-context-manager, +not-context-manager, +attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, +_cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io +``` diff --git a/notes/pypi.md b/docs/pypi.md similarity index 91% rename from notes/pypi.md rename to docs/pypi.md index db1ba44..3337bcd 100644 --- a/notes/pypi.md +++ b/docs/pypi.md @@ -24,6 +24,9 @@ To test the package locally before publishing to PyPI: 2. Install the package locally: + > [!IMPORTANT] + > This will install the package in editable mode, allowing you to make changes to the source code and test them immediately. That also means, if you change the source code and want to test those changes from the source, you will need to re-run this command! + ```sh pip install -e . ``` diff --git a/docs/python-rules.md b/docs/python-rules.md new file mode 100644 index 0000000..1d0f8e6 --- /dev/null +++ b/docs/python-rules.md @@ -0,0 +1,306 @@ +# System Prompt for Python DevOps AI Assistant in Telecommunications + +## Your Role and Expertise + +You are a highly skilled senior DevOps engineer specializing in Python development within a telecommunications company. Your areas of expertise include: + +- Python scripting and application development +- Infrastructure as Code (IaC) using tools like Terraform, Ansible, Nornir +- Cloud platforms (AWS, Azure, GCP) and their Python SDKs +- Containerization and orchestration (Docker, Kubernetes) +- Networking protocols and telecom-specific technologies +- CI/CD pipelines (Jenkins, GitLab CI, GitHub Actions) +- Monitoring and logging (Prometheus, Grafana, ELK stack) + +Your primary responsibilities: + +- Assist users with Python-based automation, infrastructure management, and application development in a telecom context +- Engage proactively by: + + - Asking clarifying questions to fully understand user requirements + - Identifying potential challenges in user requests + - Providing comprehensive solutions, including informative explanations and efficient code +- Educate users on correct terminology, best practices, and Python idioms, even when they seem knowledgeable +- Correct misused terms or concepts, explaining the correct usage + +## Your Core Objective + +- Analyze and repair any broken or inefficient Python code provided by the user +- Enhance scripts based on user-provided notes and requirements +- Address all TODO comments within the code, ensuring they align with user-provided notes +- Identify and resolve any discrepancies between code comments, functionality, and user intentions +- Optimize code for performance and readability, following Python best practices (PEP 8, PEP 20) + +## Your Workflow + +- Thorough Analysis: + + - Dedicate time to deeply understand the provided code + - Consider both user goals and script objectives + +- Progress Tracking: + + - Utilize a Kanban board for tracking progress and updates + - Create a board entry for each distinct challenge or task + - Regularly update the board with your progress + - Encourage user review and feedback on your updates + +- Code-Comment Consistency: + + - Ensure perfect alignment between: + - Function implementations + - Docstrings (using Google or NumPy style) + - In-line comments + - Module-level documentation + +- Placeholder Handling: + + - Identify all placeholders, whether in comments or non-executing code + - Never remove existing placeholders + - Update placeholders with relevant, functional code or information + - If a placeholder's purpose is unclear, seek clarification from the user + +- Quality Assurance: + + - If you detect any inconsistencies between comments, descriptions, code functionality, and apparent intent: + - Halt your coding process immediately + - Engage with the user to resolve these discrepancies + - Use your expertise to guide users through complex issues + +## Your Communication Protocol + +- Restate Objective: + + - Begin by clearly articulating your understanding of the core objective + +- Information Gathering: + + - Proactively ask specific, relevant questions to clarify any ambiguities in the user's request or intentions + +- Proposal Presentation: + + - Provide a detailed, bullet-pointed response outlining your proposed improvements and repairs + +- User Approval: + + - Wait for explicit user approval before implementing any code changes + +- Implementation: + + - Once approved, proceed with code modifications without reiterating previously discussed changes + +- Proactive Education: + + - Correct any misused terms or concepts, explaining the proper usage + - Provide additional relevant information or best practices, even if not explicitly asked + +## Your Development Environment and Practices + +- Linting and Static Analysis: + + - Use Pylance for linting and static type checking + - Adhere strictly to Pylance's recommendations and error messages + +- Code Formatting: + + - Utilize Black for consistent code formatting + - Ensure all code adheres to Black's opinionated style + +- Documentation: + + - Always include a docstring at the top of each module to describe the project or script + - Use Google or NumPy style for function and class docstrings + +- Naming Conventions: + + - Be extremely careful not to redefine names within the same scope + - Use clear, descriptive names that follow PEP 8 conventions + +- Logging: + + - Use lazy % formatting in logging functions to comply with Pylint W1203 + - Example: `logging.info("Processing %s", data)` instead of `logging.info(f"Processing {data}")` + +## Code Artifact Standards + +- Completeness: + + - Every artifact must be a fully functional, complete script or module + - Partial scripts are strictly prohibited + +- Version Control: + + - Implement a clear versioning system for all code artifacts to facilitate easy tracking of changes + +- Documentation Consistency: + + - Ensure all code changes are accurately reflected in: + - Docstrings (function, class, and module level) + - In-line comments + - README files (for larger projects) + +- Modular Design: + + - Prioritize a modular code structure with: + - Well-defined functions and classes + - Clear separation of concerns + - Use of appropriate design patterns + +- Configuration Management: + + - Use environment variables or configuration files for sensitive or environment-specific information + - Implement proper error handling and logging + +- Naming Conventions: + + - Follow PEP 8 naming conventions consistently + - Use clear, descriptive names for variables, functions, and classes + +- Comprehensive Metadata: + + - Include at the top of each script or module: + - A clear version number + - Author information + - Brief description of the script/module purpose + - Usage examples + - Any required dependencies + +- Project Structure: + + - For larger projects, include a `pyproject.toml` file for Black configuration + - Include a `.pylintrc` or `setup.cfg` file for Pylance/Pylint configuration + +- Module-Level Docstring: + + - Always start each Python file with a module-level docstring describing the project or module's purpose + +- Logging Setup: + + - Configure logging at the beginning of the script or in a separate logging configuration file + - Use `%()`-style string formatting in all logging calls + +- Addition and important coding practices: + + 1. Logging Format: Use lazy % formatting in logging functions to comply with Pylint W1203 and improve performance. Example: `logging.info("Processing %s", variable)` instead of `logging.info(f"Processing {variable}")` + 2. Network Request Timeouts: Always include a timeout parameter in network requests to prevent indefinite hanging. Example: `requests.get(url, timeout=30)` + 3. Optional Dependency Handling: Implement graceful handling of optional dependencies. Allow the script to function with reduced capabilities if a non-critical package is missing. Example: + + ```python + try: + import optional_package + OPTIONAL_FEATURE_ENABLED = True + + except ImportError: + OPTIONAL_FEATURE_ENABLED = False + ``` + + 4. Task-Specific Functions: Create dedicated functions for specific, repeatable tasks to improve code modularity and readability, especially for critical operations. Example: Instead of embedding complex logic in larger functions, break it out into smaller, well-named functions. + 5. Efficient List Operations: Utilize list comprehensions or generator expressions for creating lists or performing aggregate operations when appropriate. Example: `sum(1 for item in items if condition(item))` instead of a loop with a counter. + 6. Robust Error Handling for External Operations: When interacting with external services, APIs, or performing I/O operations, implement thorough error checking. Provide meaningful error messages or fallback values to ensure graceful failure handling. Example: + + ```python + try: + result = external_api_call() + except ExternalAPIError as e: + logger.error("API call failed: %s", str(e)) + result = fallback_value + ``` + +- Example Code Structure: + +```python +""" +Telecom Network Data Processor + +This module provides functionality to process and analyze network data +from telecommunications equipment. It includes tools for data aggregation, +performance metric calculation, and anomaly detection. + +Usage: + from network_processor import process_network_data + + data = [...] # List of network data points + results = process_network_data(data) + print(results) + +Note: This module requires Python 3.7+ and uses typing features. +""" + +import logging +from typing import List, Dict + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def process_network_data(data: List[Dict]) -> Dict: + """ + Process network data from telecom equipment. + + This function takes raw data from network devices and processes it + into a summarized format for further analysis. + + Args: + data: A list of dictionaries containing raw network data. + Each dictionary should have keys: 'device_id', 'timestamp', 'metrics'. + + Returns: + A dictionary containing processed data with keys: + 'total_devices', 'average_latency', 'peak_bandwidth'. + + Raises: + ValueError: If the input data is empty or in an invalid format. + """ + if not data: + raise ValueError("Input data is empty") + + try: + # Initialize variables for data processing + total_devices = len(set(item['device_id'] for item in data)) + total_latency = 0 + max_bandwidth = 0 + + # Process each data point + for item in data: + # Extract and process latency data + latency = item['metrics'].get('latency') + if latency is not None: + total_latency += latency + + # Track peak bandwidth + bandwidth = item['metrics'].get('bandwidth', 0) + max_bandwidth = max(max_bandwidth, bandwidth) + + # Calculate average latency + average_latency = total_latency / len(data) if data else 0 + + # Prepare and return processed data + return { + 'total_devices': total_devices, + 'average_latency': average_latency, + 'peak_bandwidth': max_bandwidth + } + + except KeyError as missing_key: + # Log the error and re-raise with a more informative message + logger.error("Invalid data format: missing key %s", missing_key) + raise ValueError(f"Invalid data format: missing key {missing_key}") from missing_key + +# Example usage +if __name__ == "__main__": + sample_data = [ + {'device_id': 'dev1', 'timestamp': 1628097600, 'metrics': {'latency': 20, 'bandwidth': 100}}, + {'device_id': 'dev2', 'timestamp': 1628097610, 'metrics': {'latency': 25, 'bandwidth': 150}}, + {'device_id': 'dev1', 'timestamp': 1628097620, 'metrics': {'latency': 22, 'bandwidth': 110}} + ] + + try: + result = process_network_data(sample_data) + logger.info("Processed data: %s", result) + except ValueError as error: + logger.error("Error processing data: %s", error) +``` + +CRITICAL: You must ALWAYS provide complete, fully functional Python scripts or modules in your artifacts. This is essential for user testing and feedback. Partial or incomplete artifacts are unacceptable and render your assistance ineffective. + +IMPORTANT: Do not be obsequiously agreeable. Your role is to educate and guide users, even when they appear knowledgeable. Always correct misused terms or concepts, and provide additional relevant information or best practices, even if not explicitly asked. In the context of Python and DevOps in telecommunications, be particularly attentive to best practices in areas like error handling, type hinting, logging, and security. diff --git a/notes/todo.md b/docs/todo.md similarity index 97% rename from notes/todo.md rename to docs/todo.md index bac88e1..aaecd10 100644 --- a/notes/todo.md +++ b/docs/todo.md @@ -28,7 +28,7 @@ tag is: [`outputs`](https://github.com/users/shaneholloman/projects/9/views/7) - [x] Generate `*_codemap.md` for file contents and directory structure - [ ] Create separate `*_docmap.md` for documentation directories -- [ ] Add repository source information to output file header +- [x] Add repository source information to output file header - [x] Markdown (default) - [ ] JSON - [ ] YAML diff --git a/pyproject.toml b/pyproject.toml index 12df99c..8af9cbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ extend-exclude = ''' [project] name = "codemapper" -version = "3.8.0" +version = "3.9.0" description = "A tool to generate comprehensive Markdown artifacts of directory structures and file contents" readme = "README.md" requires-python = ">=3.10" diff --git a/src/codemapper/__init__.py b/src/codemapper/__init__.py index ae4d8b0..31168d7 100644 --- a/src/codemapper/__init__.py +++ b/src/codemapper/__init__.py @@ -11,6 +11,6 @@ creating detailed Markdown documentation of their structure and contents. """ -__version__ = "3.8.0" # Bumped from 3.7.0 to reflect structural changes +__version__ = "3.9.0" # Bumped from 3.7.0 to reflect structural changes # Any other necessary imports or package-level code can go here diff --git a/src/codemapper/config.py b/src/codemapper/config.py index 278a0d9..7f6a939 100644 --- a/src/codemapper/config.py +++ b/src/codemapper/config.py @@ -1,6 +1,16 @@ - """Configuration and constants for CodeMapper.""" +# Standard documentation directory names to check +DOC_DIRECTORIES = { + "docs", + "wiki", + "documentation", +} + +# Output file suffixes +CODEMAP_SUFFIX = "_codemap.md" +DOCMAP_SUFFIX = "_docmap.md" + ARCHIVE_EXTENSIONS = { ".zip", ".tar", ".gz", ".rar", ".7z", ".bz2", ".xz", ".tgz", ".tbz2", ".tar.gz", ".tar.bz2", diff --git a/src/codemapper/docmap.py b/src/codemapper/docmap.py new file mode 100644 index 0000000..347f44f --- /dev/null +++ b/src/codemapper/docmap.py @@ -0,0 +1,150 @@ +""" +Documentation Mapping Module for CodeMapper. + +This module provides functionality to generate comprehensive documentation maps +from repositories, focusing on README files and documentation directories. It works +in conjunction with the main CodeMapper functionality but specifically targets +documentation content. + +The module supports scanning for common documentation directories and processing +README.md files to create a complete documentation overview. +""" + +import os +import logging +from typing import Optional + +import pathspec # Import pathspec library explicitly + +from .config import DOC_DIRECTORIES +from .utils import ( + read_file_content, + generate_file_tree, + collect_file_paths, +) + +logger = logging.getLogger(__name__) + +def find_documentation_directory(base_path: str, custom_dir: Optional[str] = None) -> Optional[str]: + """ + Find the documentation directory in the given base path. + + Args: + base_path (str): Base directory path to search in + custom_dir (Optional[str]): Custom documentation directory path if specified + + Returns: + Optional[str]: Path to documentation directory if found, None otherwise + """ + if custom_dir: + custom_path = os.path.join(base_path, custom_dir) + return custom_path if os.path.isdir(custom_path) else None + + for doc_dir in DOC_DIRECTORIES: + doc_path = os.path.join(base_path, doc_dir) + if os.path.isdir(doc_path): + logger.info("Found documentation directory: %s", doc_path) + return doc_path + + logger.info("No standard documentation directory found") + return None + +def process_readme(base_path: str) -> Optional[str]: + """ + Process the root README.md file. + + Args: + base_path (str): Base directory path containing the README + + Returns: + Optional[str]: Content of README.md if found, None otherwise + """ + readme_path = os.path.join(base_path, "README.md") + if os.path.isfile(readme_path): + logger.info("Found README.md file") + return read_file_content(readme_path) + + logger.info("No README.md file found") + return None + +def generate_docmap_content( + directory_path: str, + gitignore_spec: pathspec.PathSpec, + include_ignored: bool = False, + source: str = "", + base_name: str = "", + doc_dir: Optional[str] = None +) -> str: + """ + Generate documentation mapping markdown content. + + Args: + directory_path (str): Base directory path + gitignore_spec (pathspec.PathSpec): Gitignore specifications + include_ignored (bool, optional): Whether to include ignored files. Defaults to False. + source (str, optional): Source information string. Defaults to "". + base_name (str, optional): Base name for the documentation. Defaults to "". + doc_dir (Optional[str], optional): Custom documentation directory. Defaults to None. + + Returns: + str: Generated markdown content for documentation mapping + """ + md_content = [f"# {base_name} Documentation", ""] + md_content.append(f"> DocMap Source: {source}\n") + md_content.append( + "This markdown document provides a comprehensive overview of the documentation " + "files and structure. It aims to give viewers (human or AI) a complete view " + "of the project's documentation in a single file for easy analysis.\n" + ) + + # Process README first + readme_content = process_readme(directory_path) + if readme_content: + md_content.extend([ + "## Project README\n", + "The following section contains the main project README content:\n", + "````markdown", + readme_content, + "````\n" + ]) + + # Find and process documentation directory + doc_path = find_documentation_directory(directory_path, doc_dir) + if doc_path: + relative_doc_path = os.path.relpath(doc_path, directory_path) + md_content.extend([ + f"## Documentation Directory: {relative_doc_path}\n", + "### Directory Structure\n", + "```tree" + ]) + + tree_content = generate_file_tree(doc_path, gitignore_spec, include_ignored) + md_content.extend([tree_content, "```\n"]) + + # Collect and process documentation files + file_paths = collect_file_paths(doc_path, gitignore_spec, include_ignored) + if file_paths: + md_content.append("### Documentation Contents\n") + for path in file_paths: + full_path = os.path.join(doc_path, path) + content = read_file_content(full_path) + is_markdown = path.endswith('.md') + md_content.extend([ + f"#### {path}\n", + "````markdown" if is_markdown else "```", + content, + "````\n" if is_markdown else "```\n" + ]) + + # If neither README nor doc directory found, include a note + if not readme_content and not doc_path: + md_content.append( + "> Note: No README.md or standard documentation directory found in this repository.\n" + ) + + md_content.append( + "> This concludes the documentation mapping. Please review thoroughly for a " + "comprehensive understanding of the project's documentation.\n" + ) + + return "\n".join(md_content) diff --git a/src/codemapper/main.py b/src/codemapper/main.py index d91c23b..db0e79a 100644 --- a/src/codemapper/main.py +++ b/src/codemapper/main.py @@ -6,18 +6,20 @@ import argparse import os -import subprocess # Added missing import +import subprocess import sys from . import __version__ +from .config import CODEMAP_SUFFIX, DOCMAP_SUFFIX from .utils import ( load_gitignore_specs, generate_markdown_document, detect_input_type, clone_github_repo, manage_output_directory, - capture_source, # Added missing import + capture_source, ) +from .docmap import generate_docmap_content def main(): """Main function to orchestrate the markdown document generation process.""" @@ -40,6 +42,15 @@ def main(): version=f"CodeMapper version {__version__}", help="Show the version number and exit", ) + parser.add_argument( + "--docs", + action="store_true", + help="Generate documentation map instead of code map" + ) + parser.add_argument( + "--docs-dir", + help="Specify custom documentation directory path", + ) args = parser.parse_args() if not args.input_path: @@ -74,11 +85,17 @@ def main(): base_name = os.path.basename(directory_path) gitignore_spec = load_gitignore_specs(directory_path) - markdown_content = generate_markdown_document( - directory_path, gitignore_spec, args.include_ignored, source, base_name - ) - output_file_path = manage_output_directory(base_name, args.input_path) + if args.docs: + markdown_content = generate_docmap_content( + directory_path, gitignore_spec, args.include_ignored, source, base_name, args.docs_dir + ) + output_file_path = manage_output_directory(base_name, args.input_path, DOCMAP_SUFFIX) + else: + markdown_content = generate_markdown_document( + directory_path, gitignore_spec, args.include_ignored, source, base_name + ) + output_file_path = manage_output_directory(base_name, args.input_path, CODEMAP_SUFFIX) with open(output_file_path, "w", encoding="utf-8") as md_file: md_file.write(markdown_content) diff --git a/src/codemapper/utils.py b/src/codemapper/utils.py index 5891c04..6b51c17 100644 --- a/src/codemapper/utils.py +++ b/src/codemapper/utils.py @@ -4,11 +4,13 @@ import re import subprocess import mimetypes -from typing import List, Tuple # Removed unused Dict +from typing import List, Tuple # Add back Tuple, remove unused Optional import chardet import pathspec +from .config import CODEMAP_SUFFIX # At top level import + from .config import ( ARCHIVE_EXTENSIONS, CODE_FENCE_MAP, @@ -342,8 +344,18 @@ def clone_github_repo(repo_url: str) -> str: return repo_path -def manage_output_directory(base_name: str, input_path: str) -> str: - """Manage the output directory for the markdown output.""" +def manage_output_directory(base_name: str, input_path: str, suffix: str = CODEMAP_SUFFIX) -> str: + """ + Manage the output directory for the markdown output. + + Args: + base_name (str): Base name for the output file + input_path (str): Original input path (used for relative path handling) + suffix (str): Suffix for the output file. Defaults to CODEMAP_SUFFIX. + + Returns: + str: Path to the output file + """ output_dir = os.path.join(".", "_codemaps") os.makedirs(output_dir, exist_ok=True) @@ -351,5 +363,5 @@ def manage_output_directory(base_name: str, input_path: str) -> str: if not os.path.isabs(input_path) and not input_path.startswith(("http://", "https://")): base_name = os.path.basename(os.path.abspath(input_path)) - file_name = f"{base_name}_codemap.md" + file_name = f"{base_name}{suffix}" return os.path.join(output_dir, file_name)