-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathskill.py
126 lines (117 loc) · 5.05 KB
/
skill.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import voyager.utils as U
from langchain_community.llms.llamacpp import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_community.embeddings import OllamaEmbeddings
from langchain.schema import HumanMessage, SystemMessage
from langchain_community.vectorstores import Chroma
from voyager.prompts import load_prompt
from voyager.control_primitives import load_control_primitives
class SkillManager:
def __init__(
self,
model_name="gpt-3.5-turbo",
temperature=0,
retrieval_top_k=5,
request_timout=120,
ckpt_dir="ckpt",
resume=False,
):
self.llm = model_name
U.f_mkdir(f"{ckpt_dir}/skill/code")
U.f_mkdir(f"{ckpt_dir}/skill/description")
U.f_mkdir(f"{ckpt_dir}/skill/vectordb")
# programs for env execution
self.control_primitives = load_control_primitives()
if resume:
print(f"\033[33mLoading Skill Manager from {ckpt_dir}/skill\033[0m")
self.skills = U.load_json(f"{ckpt_dir}/skill/skills.json")
else:
self.skills = {}
self.retrieval_top_k = retrieval_top_k
self.ckpt_dir = ckpt_dir
self.vectordb = Chroma(
collection_name="skill_vectordb",
embedding_function=OllamaEmbeddings(model="deepseek-coder-v2:16b"),
persist_directory=f"{ckpt_dir}/skill/vectordb",
)
assert self.vectordb._collection.count() == len(self.skills), (
f"Skill Manager's vectordb is not synced with skills.json.\n"
f"There are {self.vectordb._collection.count()} skills in vectordb but {len(self.skills)} skills in skills.json.\n"
f"Did you set resume=False when initializing the manager?\n"
f"You may need to manually delete the vectordb directory for running from scratch."
)
@property
def programs(self):
programs = ""
for skill_name, entry in self.skills.items():
programs += f"{entry['code']}\n\n"
for primitives in self.control_primitives:
programs += f"{primitives}\n\n"
return programs
def add_new_skill(self, info):
if info["task"].startswith("Deposit useless items into the chest at"):
# No need to reuse the deposit skill
return
program_name = info["program_name"]
program_code = info["program_code"]
skill_description = self.generate_skill_description(program_name, program_code)
print(
f"\033[33mSkill Manager generated description for {program_name}:\n{skill_description}\033[0m"
)
if program_name in self.skills:
print(f"\033[33mSkill {program_name} already exists. Rewriting!\033[0m")
self.vectordb._collection.delete(ids=[program_name])
i = 2
while f"{program_name}V{i}.js" in os.listdir(f"{self.ckpt_dir}/skill/code"):
i += 1
dumped_program_name = f"{program_name}V{i}"
else:
dumped_program_name = program_name
self.vectordb.add_texts(
texts=[skill_description],
ids=[program_name],
metadatas=[{"name": program_name}],
)
self.skills[program_name] = {
"code": program_code,
"description": skill_description,
}
assert self.vectordb._collection.count() == len(
self.skills
), "vectordb is not synced with skills.json"
U.dump_text(
program_code, f"{self.ckpt_dir}/skill/code/{dumped_program_name}.js"
)
U.dump_text(
skill_description,
f"{self.ckpt_dir}/skill/description/{dumped_program_name}.txt",
)
U.dump_json(self.skills, f"{self.ckpt_dir}/skill/skills.json")
self.vectordb.persist()
def generate_skill_description(self, program_name, program_code):
messages = [
SystemMessage(content=load_prompt("skill")),
HumanMessage(
content=program_code
+ "\n\n"
+ f"The main function is `{program_name}`."
),
]
llm_response = self.llm.invoke(messages)
skill_description = f" // { llm_response }"
return f"async function {program_name}(bot) {{\n{skill_description}\n}}"
def retrieve_skills(self, query):
k = min(self.vectordb._collection.count(), self.retrieval_top_k)
if k == 0:
return []
print(f"\033[33mSkill Manager retrieving for {k} skills\033[0m")
docs_and_scores = self.vectordb.similarity_search_with_score(query, k=k)
print(
f"\033[33mSkill Manager retrieved skills: "
f"{', '.join([doc.metadata['name'] for doc, _ in docs_and_scores])}\033[0m"
)
skills = []
for doc, _ in docs_and_scores:
skills.append(self.skills[doc.metadata["name"]]["code"])
return skills