Merge pull request #377 from 1712n/release/mh-reporter-1.1.0

Release/mh reporter 1.1.0
1712n · Feb 27, 2024 · 574d372 · 574d372
2 parents eaa3978 + f895b35
commit 574d372
Show file tree

Hide file tree

Showing 10 changed files with 959 additions and 171 deletions.
diff --git a/.github/workflows/market-health-reporter.yml b/.github/workflows/market-health-reporter.yml
@@ -8,7 +8,7 @@ jobs:
     name: "Perform market analysis"
     if: |
       !github.event.issue.pull_request &&
-      (contains(github.event.comment.body, 'openai:') || contains(github.event.comment.body, 'claude:'))
+      (contains(github.event.comment.body, 'analyze:') 
     
     steps:
       - uses: actions/checkout@v3
@@ -22,17 +22,48 @@ jobs:
       - name: Install package
         run: pipx install poetry && poetry install --no-interaction
 
-      - name: Set API key
-        run: |
-          if [[ "${{ github.event.comment.body }}" == *"openai:"* ]]; then
-            echo "LLM_API_KEY=${{ secrets.OPENAI_KEY }}" >> $GITHUB_ENV
-          elif [[ "${{ github.event.comment.body }}" == *"claude:"* ]]; then
-            echo "LLM_API_KEY=${{ secrets.LLM_API_KEY }}" >> $GITHUB_ENV
-
       - name: Run script
         run: |
           poetry run market-health-reporter \
             --issue "${{ github.event.issue.number }}" \
             --comment-body "${{ github.event.comment.body }}" \
             --github-token "${{ secrets.GITHUB_TOKEN }}" \
-            --llm-api-key "${{ env.LLM_API_KEY }}"
+            --llm-api-key "${{ secrets.OPENAI_KEY }}" \
+            --rapid-api "${{ secrets.RAPID_API_KEY }}" 
+
+      - name: Configure Git
+        run: |
+          git config --global user.email "[email protected]"
+          git config --global user.name "GitHub Action"
+
+      - name: Create new branch
+        run: |
+          git checkout -b new-branch-${{ github.run_id }}
+          echo "Creating a new branch"
+
+      - name: Add new files
+        run: |
+          git add .
+          echo "Adding new files"
+      
+      - name: Commit new files
+        run: |
+          git commit -m "Add new market analysis data and a report"
+          git status
+      
+      - name: Push changes
+        run: |
+          git push origin new-branch-${{ github.run_id }}
+          echo "Pushing changes to origin"
+          git log origin/new-branch-${{ github.run_id }} --oneline
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Create a PR from the branch with the commit
+        run: |
+          PR_TITLE="${{ github.event.comment.body }}"
+          PR_URL=$(gh pr create --fill --base main --head new-branch-${{ github.run_id }} --title "$PR_TITLE" --body "This PR adds new market analysis data and a report." --repo ${{ github.repository }}) 
+          echo "PR created at URL: $PR_URL"
+          echo "PR_URL=$PR_URL" >> $GITHUB_ENV
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,8 @@ markdown2 = "^2.4.10"
 bs4 = "^0.0.1"
 anthropic = "^0.7.7"
 tenacity = "^8.2.3"
+matplotlib = "3.6.0"
+pandas = "2.0.1"
 
 [tool.poetry.scripts]
 fact-check = "tools.fact_checker:main"

diff --git a/tools/market_health_reporter.py b/tools/market_health_reporter.py
@@ -1,10 +1,22 @@
-from openai import OpenAI
-from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+import openai
+from tiktoken import encoding_for_model
 import argparse
 import json
 import os
+import requests
+import glob
 from github import Github
-from tools.claude_retriever.client import extract_between_tags
+from tools.utils import read_file, extract_between_tags
+from tools.report_graphics_tool import Visualization
+
+
+REPO_NAME = "1712n/dn-institute"
+SYSTEM_PROMPT_FILE = 'tools/market_health_reporter_doc/prompts/system_prompt.txt'
+HUMAN_PROMPT_FILE = 'tools/market_health_reporter_doc/prompts/prompt1.txt'
+ARTICLE_EXAMPLE_FILE = 'content/market-health/posts/2023-08-14-huobi/index.md'
+OUTPUT_DIR = 'content/market-health/posts/'
+DATA_DIR = 'tools/market_health_reporter_doc/data/'
+MAX_TOKENS = 125000
 
 
 def parse_cli_args():
@@ -24,104 +36,157 @@ def parse_cli_args():
     parser.add_argument(
         "--github-token", dest="github_token", help="Github token", required=True
     )
+    parser.add_argument(
+        "--rapid-api", dest="rapid_api", help="Rapid API key", required=True
+    )
     return parser.parse_args()
 
 
-def post_comment_to_issue(github_token, issue_number, repo_name, comment):
+def extract_data_from_comment(comment: str) -> tuple:
     """
-    Post a comment to a GitHub issue.
+    Extract data from the comment.
     """
-    g = Github(github_token)
-    repo = g.get_repo(repo_name)
-    issue = repo.get_issue(number=issue_number)
-        # only post comment if running on Github Actions
-    if os.environ.get("GITHUB_ACTIONS") == "true":
-        issue.create_comment(comment)
+    parts = comment.split(',')
+    marketvenueid = parts[1].strip().lower()
+    pairid = parts[0].split(':')[1].strip().lower()  
+    start, end = parts[2].strip(), parts[3].strip()
+    return marketvenueid, pairid, start, end
 
 
-def main():
-    args = parse_cli_args()
-    repo_name = "1712n/dn-institute"
-
-    if "openai:" in args.comment_body:
-        with open('tools/market_health_reporter_doc/data/data1.json', 'r') as data_file:
-            data = json.load(data_file)
-
-        with open('tools/market_health_reporter_doc/openai/prompts/system_prompt.txt', 'r') as file:
-            SYSTEM_PROMPT = file.read()
-
-        with open('tools/market_health_reporter_doc/openai/prompts/prompt1.txt', 'r') as file:
-            HUMAN_PROMPT_CONTENT = file.read()
-
-        with open('content/market-health/posts/2023-08-14-huobi/index.md', 'r') as file:
-            article_example = file.read()
-
-
-        HUMAN_PROMPT_CONTENT = f"""
-        <example> %s </example>
-        {HUMAN_PROMPT_CONTENT}
-        <data> %s </data>
-        """
-
-        prompt = f"{HUMAN_PROMPT_CONTENT%(article_example, data)}"
-        print('This is a prompt: ', prompt)
-
-        client = OpenAI(api_key=args.API_key)
-
-        completion = client.chat.completions.create(
-        model="gpt-4",
-        messages=[
-            {"role": "system", "content": f"{SYSTEM_PROMPT}"},
-            {"role": "user", "content": f"{prompt}"}
-        ]
-        )
+def save_output(output: str, directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> None:
+    """
+    Saves the output to a markdown file in the specified directory, creating a subdirectory for it.
+    """
+    output_subdir = os.path.join(directory, f"{start}-{end}-{marketvenueid}-{pairid}")  
+    os.makedirs(output_subdir, exist_ok=True)  
+    safe_start = start.replace(":", "-")
+    safe_end = end.replace(":", "-")
+    base_file_name = "index"
+    file_path = os.path.join(output_subdir, base_file_name)  
+
+    existing_files = glob.glob(f"{file_path}*.md")
+    if existing_files:
+        numbers = [int(file_name.split('-')[-1].split('.md')[0]) for file_name in existing_files if file_name.split('-')[-1].split('.md')[0].isdigit()]
+        file_number = max(numbers, default=0) + 1
+        full_path = f"{file_path}-{file_number}.md"
+    else:
+        full_path = f"{file_path}.md"
+
+    with open(full_path, 'w', encoding='utf-8') as file:
+        file.write(output)
+    print(f"Output saved to: {full_path}")
 
-        output = completion.choices[0].message.content
-
-        output = extract_between_tags("article", output)
 
-        print("This is an answer: ", output)
+def save_data(data: str, directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> None:
+    """
+    Saves data to a JSON file in the specified directory.
+    """
+    new_file_name = f'{directory}{marketvenueid}_{pairid}_{start.replace(":", "-")}_{end.replace(":", "-")}.json'
+    with open(new_file_name, 'w', encoding='utf-8') as file:
+        file.write(data)
 
-        #with open('tools/market_health_reporter_doc/openai/outputs/output1.md', 'w', encoding='utf-8') as file:
-            #file.write(output)   
 
-        post_comment_to_issue(args.github_token, int(args.issue), repo_name, output)
-
-    elif "claude:" in args.comment_body:
-        with open('tools/market_health_reporter_doc/data/data1.json', 'r') as data_file:
-            data = json.load(data_file)
+def file_exists(directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> str:
+    """
+    Checks if a file with the specified parameters exists.
+    Returns the path to the file if found, otherwise returns None.
+    """
+    pattern = f"{directory}/{marketvenueid}_{pairid}_{start.replace(':', '-')}_{end.replace(':', '-')}.json"
+    matching_files = glob.glob(pattern)
+    return matching_files[0] if matching_files else None
 
-        with open('tools/market_health_reporter_doc/claude/prompts/system_prompt.txt', 'r') as file:
-            SYSTEM_PROMPT = file.read()
 
-        with open('tools/market_health_reporter_doc/claude/prompts/prompt1.txt', 'r') as file:
-            HUMAN_PROMPT_CONTENT = file.read()
+def fetch_or_load_market_data(querystring: dict, headers: dict, url: str, directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> dict:
+    """
+    Tries to load market data from a file if it is already saved.
+    Otherwise, makes an API request and saves the data.
+    """
+    existing_file = file_exists(directory, marketvenueid, pairid, start, end)
+    if existing_file:
+        print(f"Loading data from existing file: {existing_file}")
+        with open(existing_file, 'r', encoding='utf-8') as file:
+            return json.load(file)
+    else:
+        response = requests.get(url, headers=headers, params=querystring)
+        response.raise_for_status()
+        data = response.json()
+        save_data(json.dumps(data), directory, marketvenueid, pairid, start, end)
+        return data
 
-        with open('content/market-health/posts/2023-08-14-huobi/index.md', 'r') as file:
-            article_example = file.read()
 
+def post_comment_to_issue(github_token, issue_number, repo_name, comment):
+    """
+    Post a comment to a GitHub issue.
+    """
+    g = Github(github_token)
+    repo = g.get_repo(repo_name)
+    issue = repo.get_issue(number=issue_number)
+    # only post comment if running on Github Actions
+    if os.environ.get("GITHUB_ACTIONS") == "true":
+        issue.create_comment(comment)
 
-        HUMAN_PROMPT_CONTENT = f"""
-        <example> %s </example>
-        {HUMAN_PROMPT_CONTENT}
-        <data> %s </data>
-        """
-
-        prompt = f"{SYSTEM_PROMPT}{HUMAN_PROMPT}{HUMAN_PROMPT_CONTENT%(article_example, data)}{AI_PROMPT}"
-        print('This is a prompt: ', prompt)
 
-        completion = anthropic.completions.create(
-            model="claude-2.1",
-            max_tokens_to_sample=4000,
-            temperature=0,
-            prompt=prompt,
-        )
-
-        output = extract_between_tags("article", completion.completion)
+def create_prompt(article_example: str, data: dict, human_prompt_content: str) -> str:
+    """
+    Creates a prompt string using article example and data.
+    """
+    return f"<example> {article_example} </example>\n{human_prompt_content}\n<data> {json.dumps(data)} </data>"
 
-        print("This is an answer: ", completion.completion)
 
-        #with open('tools/market_health_reporter_doc/claude/outputs/output1.md', 'w') as file:
-            #file.write(output)   
+def main():
+    args = parse_cli_args()
 
-        post_comment_to_issue(args.github_token, int(args.issue), repo_name, output)
+    system_prompt = read_file(SYSTEM_PROMPT_FILE)
+    human_prompt_content = read_file(HUMAN_PROMPT_FILE)
+    article_example = read_file(ARTICLE_EXAMPLE_FILE)
+
+    marketvenueid, pairid, start, end = extract_data_from_comment(args.comment_body)
+    print(f"Marketvenueid: {marketvenueid}, Pairid: {pairid}, Start: {start}, End: {end}")
+    querystring = {
+        "marketvenueid": marketvenueid,
+        "pairid": pairid,
+        "start": f"{start}T00:00:00",
+        "end": f"{end}T00:00:00",
+        "gran": "1h",
+        "sort": "asc",
+        "limit": "1000"
+    }
+    headers = {"X-RapidAPI-Key": args.rapid_api, "X-RapidAPI-Host": "cross-market-surveillance.p.rapidapi.com"}
+    url = "https://cross-market-surveillance.p.rapidapi.com/metrics/wt/market"
+
+    try:
+        data = fetch_or_load_market_data(querystring, headers, url, DATA_DIR, marketvenueid, pairid, start, end)
+
+        encoding = encoding_for_model("gpt-4")     
+        print('num of data tokens: ', len(encoding.encode(str(data))))
+
+        prompt = create_prompt(article_example, data, human_prompt_content)
+        prompt_token_count = len(encoding.encode(prompt))
+
+        if prompt_token_count > MAX_TOKENS:
+            error_message = "Your request is too long. It's possible that the period for the data is too broad. Please narrow it down."
+            print(error_message)
+            post_comment_to_issue(args.github_token, int(args.issue), REPO_NAME, error_message)
+        else:
+            openai.api_key = args.API_key
+            completion = openai.ChatCompletion.create(
+                model="gpt-4-0125-preview",
+                temperature=0.0,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt}
+                ]
+            )
+            output = completion.choices[0].message.content
+            output = extract_between_tags("article", output)
+
+            print("This is an answer: ", output)
+            save_output(output, OUTPUT_DIR, marketvenueid, pairid, start, end)
+            vis = Visualization()
+            output_subdir = os.path.join(OUTPUT_DIR, f"{start}-{end}-{marketvenueid}-{pairid}") 
+            vis.generate_report(data, output_subdir)  
+
+            post_comment_to_issue(args.github_token, int(args.issue), REPO_NAME, output)
+
+    except Exception as e:
+        print(f"Error occurred: {e}")