This repository has been archived by the owner on Oct 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
173 lines (148 loc) · 7.53 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# .main.py
import dspy
from dsp.modules.anthropic import Claude
from dsp.modules.google import Google
# from dsp.utils import format_examples
# from dspy.retrieve.chromadb_rm import ChromadbRM
from dspy.evaluate import Evaluate
# from .datasets.hotpotqa import HotPotQA
from dspy.teleprompt import BootstrapFewShotWithRandomSearch, BootstrapFinetune
from dsp.modules.lm import LM
from dsp.utils.utils import deduplicate
import huggingface_hub
from huggingface_hub import HfApi
import os
import random
import requests
from pathlib import Path
from typing import Optional, Any, List, Dict
import base64
import time
import chromadb
from chromadb.utils import embedding_functions
from functools import wraps
from dotenv import load_dotenv, set_key
from pydantic import BaseModel
import gradio as gr
from src.dataloaders.dataloader import DataProcessor, DocumentLoader
from src.torchonlongform.longform import LongFormContent, PromptToExample, PromptToRetrieval , Retriever , LongFormQA, LongFormQAWithAssertions
from src.torchonsyntheticdata.syntheticdata import SyntheticDataGenerator, SyntheticDataHandler
from src.config.config import APIKeyManager
from src.torchonrag.torchonrag import Upsert, MyRetriever
from src.torchonpublish.publish import TorchonPublisher
# from src.torchonapplicationcompiler.torchoncompiler import
from src.torchonllms.torchonrequestclients import ClaudeGenerate
from src.torchonragmaker.torchonragmaker import CreateRAG
import logging
kwargs = {
"temperature": 0.7,
"max_tokens": 2048,
"top_p": 0.9,
"top_k": 40,
"n": 1,
}
class Application:
def __init__(self):
# For publishing - add these prompts to .env file, then load them accordingly !
self.title = ""
self.system_prompt = ""
self.example_input = ""
self.hf_token = ""
self.anthropic_api_key = ""
self.openai_api_key = ""
self.Retriever = MyRetriever(api_key=self.anthropic_api_key)
self.api_key_manager = APIKeyManager()
self.data_processor = DataProcessor(source_file="", collection_name="torchon-tonic-ai", persist_directory="/your_files_here")
# self.claude_model_manager = ClaudeModelManager()
self.synthetic_data_handler = SyntheticDataHandler()
self.CreateRAG = CreateRAG()
# self.ChatbotManager = ChatbotManager
# history = history[]
# self.handle_chatbot_interaction()
def set_api_keys(self, anthropic_api_key, openai_api_key, hf_token):
return self.api_key_manager.set_api_keys(anthropic_api_key, openai_api_key, hf_token)
def handle_file_upload(self, uploaded_file):
self.data_processor.source_file = uploaded_file.name
loaded_data = self.data_processor.load_data_from_source_and_store()
print("Data from {uploaded_file.name} loaded and stored successfully.")
return loaded_data
def handle_synthetic_data(self, schema_class_name, sample_size): # schema_class_name
synthetic_data = self.synthetic_data_handler.generate_data(sample_size=int(sample_size))
synthetic_data_str = "\n".join([str(data) for data in synthetic_data])
print ("Generated {sample_size} synthetic data items:\n{synthetic_data_str}")
return synthetic_data
def generate_content(prompt):
content_generator = LongFormContent()
result = content_generator(prompt)
return result.blog
def handle_chatbot_interaction(self, text, model_select, top_p, temperature, repetition_penalty, max_length_tokens, max_context_length_tokens):
chatbot_response = self.Retriever.generate_response(text, None, model_select, top_p, temperature, repetition_penalty, max_length_tokens, max_context_length_tokens)
return chatbot_response
# def generate_RAG(self, input_text, title, system_prompt, example_input):
# response = self.CreateRAG.create_rag(input_text, title, system_prompt, example_input)
# return response
def publish(self):
publisher = TorchonPublisher(self.title, self.hf_token, self.anthropic_api_key)
return publisher.publish()
def main(self):
with gr.Blocks() as demo:
with gr.Accordion("API Keys", open=True) as api_keys_accordion:
with gr.Row():
anthropic_api_key_input = gr.Textbox(label="Anthropic API Key", type="password")
openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password")
hf_token_input = gr.Textbox(type="password", label="Hugging Face Write Token")
github_token_input = gr.Textbox(type="password", label="GitHub Write Token")
submit_button = gr.Button("Submit")
confirmation_output = gr.Textbox(label="Confirmation", visible=False)
submit_button.click(
fn=self.set_api_keys,
inputs=[anthropic_api_key_input, openai_api_key_input, hf_token_input, github_token_input],
outputs=confirmation_output
)
with gr.Accordion("Upload Data") as upload_data_accordion:
file_upload = gr.File(label="Upload Data Files")
folder_upload = gr.Files(label="Upload Folder")
webpage_input = gr.Textbox(label="Web Page URL")
github_input = gr.Textbox(label="GitHub Repository Link")
file_upload_button = gr.Button("Process Data")
validation_output = gr.JSON(label="Validation Output")
file_upload_button.click(
fn=self.handle_file_upload,
inputs=[file_upload, folder_upload, webpage_input, github_input],
outputs=validation_output
)
with gr.Accordion("Generate Synthetic Data") as generate_data_accordion:
schema_input = gr.Textbox(label="Schema Class Name")
sample_size_input = gr.Number(label="Sample Size", value=100)
synthetic_data_button = gr.Button("Generate Synthetic Data")
synthetic_data_output = gr.Textbox()
synthetic_data_button.click(
fn=self.handle_synthetic_data,
inputs=[schema_input, sample_size_input],
outputs=synthetic_data_output
)
with gr.Accordion("SimpleTestingChatbot"):
with gr.Tab("Create"):
chatbot_maker = gr.Chatbot(layout="panel", elem_id="chatbot-maker")
textbox_maker = gr.Textbox(placeholder="Your words here", autofocus=True)
submit_btn = gr.Button("Send")
submit_btn.click(
fn=self.CreateRAG,
inputs=[textbox_maker],
outputs=[chatbot_maker]
)
with gr.Tab("Publish"):
title_input = gr.Textbox(value=self.title, label="Title")
system_prompt_input = gr.Textbox(value=self.system_prompt, label="System Prompt")
example_input = gr.Textbox(value=self.example_input, label="Example Input")
hf_token_input = gr.Textbox(label="Hugging Face Token", type="password")
publish_btn = gr.Button("Publish")
publish_btn.click(
fn=self.publish,
inputs=[title_input, system_prompt_input, example_input, hf_token_input],
outputs=[]
)
demo.launch()
if __name__ == "__main__":
app = Application()
app.main()