Skip to content

Commit

Permalink
feat: add local LLM support with llama.cpp (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
fynnfluegge authored Sep 13, 2023
1 parent a28ecaf commit 9032c35
Show file tree
Hide file tree
Showing 16 changed files with 224 additions and 132 deletions.
56 changes: 46 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@

<div align="center">

Focus on writing your code, let AI write the documentation for you. With just a few keystrokes in your terminal.
Focus on writing your code, let AI write the documentation for you.

With just a few keystrokes in your terminal by using the OpenAI API or 100% local LLMs without any data leaks.

Powered by [langchain](https://github.com/langchain-ai/langchain), [lama.cpp](https://github.com/ggerganov/llama.cpp) and [treesitter](https://github.com/tree-sitter/tree-sitter).

![ezgif-4-53d6e634af](https://github.com/fynnfluegge/doc-comments.ai/assets/16321871/8f2756cb-36f9-43c6-94b1-658b89b49786)

Expand All @@ -22,15 +26,32 @@ Focus on writing your code, let AI write the documentation for you. With just a
- e.g. Javadoc, JSDoc, Docstring, Rustdoc
- ✍️ Create inline documentation comments in method bodies
- 🌳 Treesitter integration
- 💻 Local LLM support

> [!NOTE]
> Documentations will only be added to files without unstaged changes, so that nothing is overwritten.
## 🚀 Usage
- `aicomments <RELATIVE_FILE_PATH>`: Create documentations for any method in the file which doesn't have any yet.
- `aicomments <RELATIVE_FILE_PATH> --inline`: Create also documentation comments in the method body.
- `aicomments <RELATIVE_FILE_PATH> --gpt4`: Use GPT-4 model (Default is GPT-3.5).
- `aicomments <RELATIVE_FILE_PATH> --guided`: Guided mode, confirm documentation generation for each method.
Create documentations for any method in the file with GPT-3.5 Turbo model:
```
aicomments <RELATIVE_FILE_PATH>
```
Create also documentation comments in the method body:
```
aicomments <RELATIVE_FILE_PATH> --inline
```
Use GPT-4 model (Default is GPT-3.5):
```
aicomments <RELATIVE_FILE_PATH> --gpt4
```
Guided mode, confirm documentation generation for each method:
```
aicomments <RELATIVE_FILE_PATH> --guided
```
Use a local LLM on your machine:
```
aicomments <RELATIVE_FILE_PATH> --local --model_path <RELATIVE_MODEL_PATH>
```

## ⚙️ Supported Languages
- [x] Python
Expand All @@ -49,8 +70,8 @@ Focus on writing your code, let AI write the documentation for you. With just a
- Python >= 3.9

## 🔧 Installation

Create your personal OpenAI Api key and add it as `$OPENAI_API_KEY` to your environment with:
### 1. OpenAI API usage
Create your personal OpenAI API key and add it as `$OPENAI_API_KEY` to your environment with:

```
export OPENAI_API_KEY=<YOUR_API_KEY>
Expand All @@ -62,9 +83,24 @@ Install with `pipx`:
pipx install doc-comments-ai
```

> [!NOTE]
> It is recommended to use `pipx` for installation, nonetheless it is also possible to use `pip`.
## 🚨 Disclaimer
### 2. Local LLM usage
By using a local LLM no API key is required. The recommended way for the installation is `pip` since `CMake` arguments needs to be passed to the `llama.cpp` build for better performance which is not possible with `pipx`.
You can also use the OpenAI API with this installation.

Your code won't be stored, but your code does leave your machine.
> [!WARNING]
> If your are sensitive to your global `pip` packages you may consider to checkout the repo and install and run it manually with `poetry` or `conda`.
See the following instructions for your machine with `CMake`: [installation-with-hardware-acceleration](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)
and install `llama-cpp-python` with your desired hardware acceleration, e.g. for Metal on Mac run:
```
CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python
```

To install `doc-comments.ai` which should use your previously installed `llama.cpp` build run:
```
pip install doc-comments-ai
```
> [!IMPORTANT]
> The results by using a local LLM will highly be affected by your selected model. To get similar results compared to GPT-3.5/4 you need to select very large models which require a powerful hardware.
31 changes: 21 additions & 10 deletions doc_comments_ai/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@

from doc_comments_ai import domain, llm, utils
from doc_comments_ai.llm import GptModel
from doc_comments_ai.treesitter.treesitter import (Treesitter, TreesitterNode,
get_source_from_node)
from doc_comments_ai.treesitter import Treesitter, TreesitterMethodNode


def run():
Expand All @@ -21,6 +20,16 @@ def run():

parser = argparse.ArgumentParser()
parser.add_argument("dir", nargs="?", default=os.getcwd())
parser.add_argument(
"--local",
action="store_true",
help="Uses the local version of the LLM model.",
)
parser.add_argument(
"--model_path",
type=str,
help="Path to the local model.",
)
parser.add_argument(
"--inline",
action="store_true",
Expand Down Expand Up @@ -53,7 +62,7 @@ def run():
if args.gpt4:
llm_wrapper = llm.LLM(model=GptModel.GPT_4)
else:
llm_wrapper = llm.LLM()
llm_wrapper = llm.LLM(local=args.local, model_path=args.model_path)

generated_doc_comments = {}

Expand All @@ -65,23 +74,25 @@ def run():
programming_language = utils.get_programming_language(file_extension)

treesitter_parser = Treesitter.create_treesitter(programming_language)
treesitterNodes: list[TreesitterNode] = treesitter_parser.parse(file_bytes)
treesitterNodes: list[TreesitterMethodNode] = treesitter_parser.parse(
file_bytes
)

for node in treesitterNodes:
method_name = utils.get_bold_text(node.name)

if args.guided:
print(f"Generate doc for {utils.get_bold_text(method_name)}? (y/n)")
if not input().lower() == "y":
continue

if node.doc_comment:
print(
f"⚠️ Method {method_name} already has a doc comment. Skipping..."
)
continue

method_source_code = get_source_from_node(node.node)
if args.guided:
print(f"Generate doc for {utils.get_bold_text(method_name)}? (y/n)")
if not input().lower() == "y":
continue

method_source_code = node.node.text.decode()

tokens = utils.count_tokens(method_source_code)
if tokens > 2048 and not args.gpt4:
Expand Down
34 changes: 28 additions & 6 deletions doc_comments_ai/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from enum import Enum

from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.llms import LlamaCpp


class GptModel(Enum):
Expand All @@ -9,14 +11,32 @@ class GptModel(Enum):


class LLM:
def __init__(self, model: GptModel = GptModel.GPT_35):
def __init__(
self,
model: GptModel = GptModel.GPT_35,
local: bool = False,
model_path: str | None = None,
):
max_tokens = 2048 if model == GptModel.GPT_35 else 4096
self.llm = ChatOpenAI(temperature=0.9, max_tokens=max_tokens, model=model.value)
if local:
if model_path is None:
raise ValueError("model_path must be set in local mode.")

self.llm = LlamaCpp(
model_path=model_path,
temperature=0.9,
max_tokens=max_tokens,
verbose=False,
)
else:
self.llm = ChatOpenAI(
temperature=0.9, max_tokens=max_tokens, model=model.value
)
self.template = (
"I have this {language} method:\n{code}\nAdd a doc comment to the method. "
"Add a detailed doc comment to the following {language} method:\n{code}\n"
"The doc comment should describe what the method does. "
"{inline_comments} "
"Return the method implementaion with the doc comment as a markdown code block. "
"{inline_comments}"
"Don't include any explanations in your response."
)
self.prompt = PromptTemplate(
Expand All @@ -31,7 +51,9 @@ def generate_doc_comment(self, language, code, inline=False):
"""

if inline:
inline_comments = "Add inline comments to the code if necessary."
inline_comments = (
"Add inline comments to the method body where it makes sense."
)
else:
inline_comments = ""

Expand Down
1 change: 1 addition & 0 deletions doc_comments_ai/treesitter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from doc_comments_ai.treesitter.treesitter import Treesitter, TreesitterMethodNode
9 changes: 3 additions & 6 deletions doc_comments_ai/treesitter/treesitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from doc_comments_ai.treesitter.treesitter_registry import TreesitterRegistry


class TreesitterNode:
class TreesitterMethodNode:
def __init__(
self,
name: "str | bytes | None",
Expand All @@ -16,6 +16,7 @@ def __init__(
):
self.name = name
self.doc_comment = doc_comment
self.method_source_code = node.text.decode()
self.node = node


Expand All @@ -29,7 +30,7 @@ def create_treesitter(language: Language) -> "Treesitter":
return TreesitterRegistry.create_treesitter(language)

@abstractmethod
def parse(self, file_bytes: bytes) -> list[TreesitterNode]:
def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]:
self.tree = self.parser.parse(file_bytes)
pass

Expand All @@ -47,7 +48,3 @@ def _query_method_name(self, node: tree_sitter.Node):
This function returns the name of a method node
"""
pass


def get_source_from_node(node: tree_sitter.Node) -> str:
return node.text.decode()
15 changes: 7 additions & 8 deletions doc_comments_ai/treesitter/treesitter_go.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
import tree_sitter
from doc_comments_ai.treesitter.treesitter import (
Treesitter,
TreesitterNode,
get_source_from_node,
)

from doc_comments_ai.constants import Language
from doc_comments_ai.treesitter.treesitter import Treesitter, TreesitterMethodNode
from doc_comments_ai.treesitter.treesitter_registry import TreesitterRegistry


class TreesitterGo(Treesitter):
def __init__(self):
super().__init__(Language.GO)

def parse(self, file_bytes: bytes) -> list[TreesitterNode]:
def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]:
super().parse(file_bytes)
result = []
methods = self._query_all_methods(self.tree.root_node)
for method in methods:
method_name = self._query_method_name(method["method"])
doc_comment = method["doc_comment"]
result.append(TreesitterNode(method_name, doc_comment, method["method"]))
result.append(
TreesitterMethodNode(method_name, doc_comment, method["method"])
)
return result

def _query_method_name(self, node: tree_sitter.Node):
Expand All @@ -34,7 +33,7 @@ def _query_all_methods(self, node: tree_sitter.Node):
if node.type == "function_declaration":
doc_comment_node = None
if node.prev_named_sibling and node.prev_named_sibling.type == "comment":
doc_comment_node = get_source_from_node(node.prev_named_sibling)
doc_comment_node = node.prev_named_sibling.text.decode()
methods.append({"method": node, "doc_comment": doc_comment_node})
else:
for child in node.children:
Expand Down
15 changes: 7 additions & 8 deletions doc_comments_ai/treesitter/treesitter_java.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
import tree_sitter
from doc_comments_ai.treesitter.treesitter import (
Treesitter,
TreesitterNode,
get_source_from_node,
)

from doc_comments_ai.constants import Language
from doc_comments_ai.treesitter.treesitter import Treesitter, TreesitterMethodNode
from doc_comments_ai.treesitter.treesitter_registry import TreesitterRegistry


class TreesitterJava(Treesitter):
def __init__(self):
super().__init__(Language.JAVA)

def parse(self, file_bytes: bytes) -> list[TreesitterNode]:
def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]:
super().parse(file_bytes)
result = []
methods = self._query_all_methods(self.tree.root_node)
for method in methods:
method_name = self._query_method_name(method["method"])
doc_comment = method["doc_comment"]
result.append(TreesitterNode(method_name, doc_comment, method["method"]))
result.append(
TreesitterMethodNode(method_name, doc_comment, method["method"])
)
return result

def _query_method_name(self, node: tree_sitter.Node):
Expand All @@ -37,7 +36,7 @@ def _query_all_methods(self, node: tree_sitter.Node):
node.prev_named_sibling
and node.prev_named_sibling.type == "block_comment"
):
doc_comment_node = get_source_from_node(node.prev_named_sibling)
doc_comment_node = node.prev_named_sibling.text.decode()
methods.append({"method": node, "doc_comment": doc_comment_node})
else:
for child in node.children:
Expand Down
15 changes: 7 additions & 8 deletions doc_comments_ai/treesitter/treesitter_js.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
import tree_sitter
from doc_comments_ai.treesitter.treesitter import (
Treesitter,
TreesitterNode,
get_source_from_node,
)

from doc_comments_ai.constants import Language
from doc_comments_ai.treesitter.treesitter import Treesitter, TreesitterMethodNode
from doc_comments_ai.treesitter.treesitter_registry import TreesitterRegistry


class TreesitterJavascript(Treesitter):
def __init__(self):
super().__init__(Language.JAVASCRIPT)

def parse(self, file_bytes: bytes) -> list[TreesitterNode]:
def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]:
super().parse(file_bytes)
result = []
methods = self._query_all_methods(self.tree.root_node)
for method in methods:
method_name = self._query_method_name(method["method"])
doc_comment = method["doc_comment"]
result.append(TreesitterNode(method_name, doc_comment, method["method"]))
result.append(
TreesitterMethodNode(method_name, doc_comment, method["method"])
)
return result

def _query_method_name(self, node: tree_sitter.Node):
Expand All @@ -34,7 +33,7 @@ def _query_all_methods(self, node: tree_sitter.Node):
if node.type == "function_declaration":
doc_comment_node = None
if node.prev_named_sibling and node.prev_named_sibling.type == "comment":
doc_comment_node = get_source_from_node(node.prev_named_sibling)
doc_comment_node = node.prev_named_sibling.text.decode()
methods.append({"method": node, "doc_comment": doc_comment_node})
else:
for child in node.children:
Expand Down
Loading

0 comments on commit 9032c35

Please sign in to comment.