Skip to content

Commit

Permalink
Merge pull request #377 from 1712n/release/mh-reporter-1.1.0
Browse files Browse the repository at this point in the history
Release/mh reporter 1.1.0
  • Loading branch information
marina-chibizova authored Feb 27, 2024
2 parents eaa3978 + f895b35 commit 574d372
Show file tree
Hide file tree
Showing 10 changed files with 959 additions and 171 deletions.
49 changes: 40 additions & 9 deletions .github/workflows/market-health-reporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
name: "Perform market analysis"
if: |
!github.event.issue.pull_request &&
(contains(github.event.comment.body, 'openai:') || contains(github.event.comment.body, 'claude:'))
(contains(github.event.comment.body, 'analyze:')
steps:
- uses: actions/checkout@v3
Expand All @@ -22,17 +22,48 @@ jobs:
- name: Install package
run: pipx install poetry && poetry install --no-interaction

- name: Set API key
run: |
if [[ "${{ github.event.comment.body }}" == *"openai:"* ]]; then
echo "LLM_API_KEY=${{ secrets.OPENAI_KEY }}" >> $GITHUB_ENV
elif [[ "${{ github.event.comment.body }}" == *"claude:"* ]]; then
echo "LLM_API_KEY=${{ secrets.LLM_API_KEY }}" >> $GITHUB_ENV
- name: Run script
run: |
poetry run market-health-reporter \
--issue "${{ github.event.issue.number }}" \
--comment-body "${{ github.event.comment.body }}" \
--github-token "${{ secrets.GITHUB_TOKEN }}" \
--llm-api-key "${{ env.LLM_API_KEY }}"
--llm-api-key "${{ secrets.OPENAI_KEY }}" \
--rapid-api "${{ secrets.RAPID_API_KEY }}"
- name: Configure Git
run: |
git config --global user.email "[email protected]"
git config --global user.name "GitHub Action"
- name: Create new branch
run: |
git checkout -b new-branch-${{ github.run_id }}
echo "Creating a new branch"
- name: Add new files
run: |
git add .
echo "Adding new files"
- name: Commit new files
run: |
git commit -m "Add new market analysis data and a report"
git status
- name: Push changes
run: |
git push origin new-branch-${{ github.run_id }}
echo "Pushing changes to origin"
git log origin/new-branch-${{ github.run_id }} --oneline
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Create a PR from the branch with the commit
run: |
PR_TITLE="${{ github.event.comment.body }}"
PR_URL=$(gh pr create --fill --base main --head new-branch-${{ github.run_id }} --title "$PR_TITLE" --body "This PR adds new market analysis data and a report." --repo ${{ github.repository }})
echo "PR created at URL: $PR_URL"
echo "PR_URL=$PR_URL" >> $GITHUB_ENV
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
627 changes: 625 additions & 2 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ markdown2 = "^2.4.10"
bs4 = "^0.0.1"
anthropic = "^0.7.7"
tenacity = "^8.2.3"
matplotlib = "3.6.0"
pandas = "2.0.1"

[tool.poetry.scripts]
fact-check = "tools.fact_checker:main"
Expand Down
233 changes: 149 additions & 84 deletions tools/market_health_reporter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
from openai import OpenAI
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
import openai
from tiktoken import encoding_for_model
import argparse
import json
import os
import requests
import glob
from github import Github
from tools.claude_retriever.client import extract_between_tags
from tools.utils import read_file, extract_between_tags
from tools.report_graphics_tool import Visualization


REPO_NAME = "1712n/dn-institute"
SYSTEM_PROMPT_FILE = 'tools/market_health_reporter_doc/prompts/system_prompt.txt'
HUMAN_PROMPT_FILE = 'tools/market_health_reporter_doc/prompts/prompt1.txt'
ARTICLE_EXAMPLE_FILE = 'content/market-health/posts/2023-08-14-huobi/index.md'
OUTPUT_DIR = 'content/market-health/posts/'
DATA_DIR = 'tools/market_health_reporter_doc/data/'
MAX_TOKENS = 125000


def parse_cli_args():
Expand All @@ -24,104 +36,157 @@ def parse_cli_args():
parser.add_argument(
"--github-token", dest="github_token", help="Github token", required=True
)
parser.add_argument(
"--rapid-api", dest="rapid_api", help="Rapid API key", required=True
)
return parser.parse_args()


def post_comment_to_issue(github_token, issue_number, repo_name, comment):
def extract_data_from_comment(comment: str) -> tuple:
"""
Post a comment to a GitHub issue.
Extract data from the comment.
"""
g = Github(github_token)
repo = g.get_repo(repo_name)
issue = repo.get_issue(number=issue_number)
# only post comment if running on Github Actions
if os.environ.get("GITHUB_ACTIONS") == "true":
issue.create_comment(comment)
parts = comment.split(',')
marketvenueid = parts[1].strip().lower()
pairid = parts[0].split(':')[1].strip().lower()
start, end = parts[2].strip(), parts[3].strip()
return marketvenueid, pairid, start, end


def main():
args = parse_cli_args()
repo_name = "1712n/dn-institute"

if "openai:" in args.comment_body:
with open('tools/market_health_reporter_doc/data/data1.json', 'r') as data_file:
data = json.load(data_file)

with open('tools/market_health_reporter_doc/openai/prompts/system_prompt.txt', 'r') as file:
SYSTEM_PROMPT = file.read()

with open('tools/market_health_reporter_doc/openai/prompts/prompt1.txt', 'r') as file:
HUMAN_PROMPT_CONTENT = file.read()

with open('content/market-health/posts/2023-08-14-huobi/index.md', 'r') as file:
article_example = file.read()


HUMAN_PROMPT_CONTENT = f"""
<example> %s </example>
{HUMAN_PROMPT_CONTENT}
<data> %s </data>
"""

prompt = f"{HUMAN_PROMPT_CONTENT%(article_example, data)}"
print('This is a prompt: ', prompt)

client = OpenAI(api_key=args.API_key)

completion = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": f"{SYSTEM_PROMPT}"},
{"role": "user", "content": f"{prompt}"}
]
)
def save_output(output: str, directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> None:
"""
Saves the output to a markdown file in the specified directory, creating a subdirectory for it.
"""
output_subdir = os.path.join(directory, f"{start}-{end}-{marketvenueid}-{pairid}")
os.makedirs(output_subdir, exist_ok=True)
safe_start = start.replace(":", "-")
safe_end = end.replace(":", "-")
base_file_name = "index"
file_path = os.path.join(output_subdir, base_file_name)

existing_files = glob.glob(f"{file_path}*.md")
if existing_files:
numbers = [int(file_name.split('-')[-1].split('.md')[0]) for file_name in existing_files if file_name.split('-')[-1].split('.md')[0].isdigit()]
file_number = max(numbers, default=0) + 1
full_path = f"{file_path}-{file_number}.md"
else:
full_path = f"{file_path}.md"

with open(full_path, 'w', encoding='utf-8') as file:
file.write(output)
print(f"Output saved to: {full_path}")

output = completion.choices[0].message.content

output = extract_between_tags("article", output)

print("This is an answer: ", output)
def save_data(data: str, directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> None:
"""
Saves data to a JSON file in the specified directory.
"""
new_file_name = f'{directory}{marketvenueid}_{pairid}_{start.replace(":", "-")}_{end.replace(":", "-")}.json'
with open(new_file_name, 'w', encoding='utf-8') as file:
file.write(data)

#with open('tools/market_health_reporter_doc/openai/outputs/output1.md', 'w', encoding='utf-8') as file:
#file.write(output)

post_comment_to_issue(args.github_token, int(args.issue), repo_name, output)

elif "claude:" in args.comment_body:
with open('tools/market_health_reporter_doc/data/data1.json', 'r') as data_file:
data = json.load(data_file)
def file_exists(directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> str:
"""
Checks if a file with the specified parameters exists.
Returns the path to the file if found, otherwise returns None.
"""
pattern = f"{directory}/{marketvenueid}_{pairid}_{start.replace(':', '-')}_{end.replace(':', '-')}.json"
matching_files = glob.glob(pattern)
return matching_files[0] if matching_files else None

with open('tools/market_health_reporter_doc/claude/prompts/system_prompt.txt', 'r') as file:
SYSTEM_PROMPT = file.read()

with open('tools/market_health_reporter_doc/claude/prompts/prompt1.txt', 'r') as file:
HUMAN_PROMPT_CONTENT = file.read()
def fetch_or_load_market_data(querystring: dict, headers: dict, url: str, directory: str, marketvenueid: str, pairid: str, start: str, end: str) -> dict:
"""
Tries to load market data from a file if it is already saved.
Otherwise, makes an API request and saves the data.
"""
existing_file = file_exists(directory, marketvenueid, pairid, start, end)
if existing_file:
print(f"Loading data from existing file: {existing_file}")
with open(existing_file, 'r', encoding='utf-8') as file:
return json.load(file)
else:
response = requests.get(url, headers=headers, params=querystring)
response.raise_for_status()
data = response.json()
save_data(json.dumps(data), directory, marketvenueid, pairid, start, end)
return data

with open('content/market-health/posts/2023-08-14-huobi/index.md', 'r') as file:
article_example = file.read()

def post_comment_to_issue(github_token, issue_number, repo_name, comment):
"""
Post a comment to a GitHub issue.
"""
g = Github(github_token)
repo = g.get_repo(repo_name)
issue = repo.get_issue(number=issue_number)
# only post comment if running on Github Actions
if os.environ.get("GITHUB_ACTIONS") == "true":
issue.create_comment(comment)

HUMAN_PROMPT_CONTENT = f"""
<example> %s </example>
{HUMAN_PROMPT_CONTENT}
<data> %s </data>
"""

prompt = f"{SYSTEM_PROMPT}{HUMAN_PROMPT}{HUMAN_PROMPT_CONTENT%(article_example, data)}{AI_PROMPT}"
print('This is a prompt: ', prompt)

completion = anthropic.completions.create(
model="claude-2.1",
max_tokens_to_sample=4000,
temperature=0,
prompt=prompt,
)

output = extract_between_tags("article", completion.completion)
def create_prompt(article_example: str, data: dict, human_prompt_content: str) -> str:
"""
Creates a prompt string using article example and data.
"""
return f"<example> {article_example} </example>\n{human_prompt_content}\n<data> {json.dumps(data)} </data>"

print("This is an answer: ", completion.completion)

#with open('tools/market_health_reporter_doc/claude/outputs/output1.md', 'w') as file:
#file.write(output)
def main():
args = parse_cli_args()

post_comment_to_issue(args.github_token, int(args.issue), repo_name, output)
system_prompt = read_file(SYSTEM_PROMPT_FILE)
human_prompt_content = read_file(HUMAN_PROMPT_FILE)
article_example = read_file(ARTICLE_EXAMPLE_FILE)

marketvenueid, pairid, start, end = extract_data_from_comment(args.comment_body)
print(f"Marketvenueid: {marketvenueid}, Pairid: {pairid}, Start: {start}, End: {end}")
querystring = {
"marketvenueid": marketvenueid,
"pairid": pairid,
"start": f"{start}T00:00:00",
"end": f"{end}T00:00:00",
"gran": "1h",
"sort": "asc",
"limit": "1000"
}
headers = {"X-RapidAPI-Key": args.rapid_api, "X-RapidAPI-Host": "cross-market-surveillance.p.rapidapi.com"}
url = "https://cross-market-surveillance.p.rapidapi.com/metrics/wt/market"

try:
data = fetch_or_load_market_data(querystring, headers, url, DATA_DIR, marketvenueid, pairid, start, end)

encoding = encoding_for_model("gpt-4")
print('num of data tokens: ', len(encoding.encode(str(data))))

prompt = create_prompt(article_example, data, human_prompt_content)
prompt_token_count = len(encoding.encode(prompt))

if prompt_token_count > MAX_TOKENS:
error_message = "Your request is too long. It's possible that the period for the data is too broad. Please narrow it down."
print(error_message)
post_comment_to_issue(args.github_token, int(args.issue), REPO_NAME, error_message)
else:
openai.api_key = args.API_key
completion = openai.ChatCompletion.create(
model="gpt-4-0125-preview",
temperature=0.0,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
)
output = completion.choices[0].message.content
output = extract_between_tags("article", output)

print("This is an answer: ", output)
save_output(output, OUTPUT_DIR, marketvenueid, pairid, start, end)
vis = Visualization()
output_subdir = os.path.join(OUTPUT_DIR, f"{start}-{end}-{marketvenueid}-{pairid}")
vis.generate_report(data, output_subdir)

post_comment_to_issue(args.github_token, int(args.issue), REPO_NAME, output)

except Exception as e:
print(f"Error occurred: {e}")
Loading

0 comments on commit 574d372

Please sign in to comment.