Skip to content

Commit

Permalink
deal with a issue
Browse files Browse the repository at this point in the history
  • Loading branch information
jingjyyao committed Jun 18, 2024
1 parent 1c5fdaf commit b32cce7
Show file tree
Hide file tree
Showing 42 changed files with 563,559 additions and 443 deletions.
56 changes: 26 additions & 30 deletions Knowledge_Plugin/DOKE/call_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import queue
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
import openai
from openai import OpenAI

api_keys = [
"your openai keys"
Expand Down Expand Up @@ -44,55 +46,49 @@ def generate_Davinci(api_key, text):

def generate_chatgpt(api_key, prompt, version):
# 使用你的 API 密钥初始化 OpenAI GPT-3
openai.api_key = api_key
openai.api_base = "https://api.openai.com/v1"
client = OpenAI(api_key=api_key)
text = [{'role': 'user', 'content': prompt}]
if version == "0301":
model = "gpt-3.5-turbo-0301"
else:
model = "gpt-3.5-turbo"

for i in range(MAX_RETRIES):
try:
# 进行 GPT-3 聊天模型 API 调用,并设置超时时间
response = openai.ChatCompletion.create(
response = client.chat.completions.create(
model=model,
messages=text,
temperature=0.0,
request_timeout=30,
max_tokens=2048,
frequency_penalty=0.0,
presence_penalty=0.0
)
content = response['choices'][0]['message']['content']
content = response.choices[0].message.content.strip()
return content
except Exception as e:
print(f"{api_key}\nError occurred: {e}. Retrying...")
time.sleep(INTERVAL) # 重试之间的休眠时间
time.sleep(INTERVAL)
print(f"Failed to get response for prompt: {prompt} after {MAX_RETRIES} retries.")
return "None"

def generate_gpt4(prompt):
available_configs = [
{"api_key": "your apikey", "url": "deployment url"},
]
message = [{"role": "user", "content": prompt}]
data = {
"messages": message,
"max_tokens": 2048,
"temperature": 0.,
'n': 1,
}

def generate_gpt4(api_key, prompt):
client = OpenAI(api_key=api_key)
text = [{'role': 'user', 'content': prompt}]
for _ in range(MAX_RETRIES):
try:
config = random.choice(available_configs)
headers = {'Content-Type': 'application/json', 'api-key': config["api_key"]}
response = requests.post(config["url"], json=data, headers=headers)
# print(response)
if (response.status_code == 200):
answer = response.json()["choices"][0]["message"]['content'].strip()
return answer
response = client.chat.completions.create(
model="gpt-4",
messages=text,
temperature=0.0,
max_tokens=2048,
frequency_penalty=0.0,
presence_penalty=0.0
)
content = response.choices[0].message.content.strip()
return content
except Exception as e:
print(f"Error occurred: {e}. Retrying...")
time.sleep(30) # 重试之间的休眠时间

print(f"{api_key}\nError occurred: {e}. Retrying...")
time.sleep(INTERVAL)

print("out of max_retry_times")
return "Error"
Expand Down Expand Up @@ -154,7 +150,7 @@ def worker(i, model, version):
index, prompt = prompts_queue.get()
api_key = api_keys[i % len(api_keys)]
if model == "GPT4":
result = generate_gpt4(prompt)
result = generate_gpt4(api_key, prompt)
if model == "ChatGPT":
result = generate_chatgpt(api_key, prompt, version)
elif model == "Davinci":
Expand Down
6 changes: 3 additions & 3 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_empty.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
6 changes: 3 additions & 3 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_feature.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_global_I2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"global_cf_data_path": "data/ml1m/global_CF.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"global_cf_data_path": "../data/ml1m/global_CF.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his-can_I2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his-can_U2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
2 changes: 1 addition & 1 deletion Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
4 changes: 2 additions & 2 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I_path-I.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict-I.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict-I.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
4 changes: 2 additions & 2 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I_path-II.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict-II.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict-II.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict-III.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict-III.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
4 changes: 2 additions & 2 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I_path.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_U2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_pop.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
1 change: 1 addition & 0 deletions Knowledge_Plugin/Knowledge_Extraction/extract_U2I.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def prepare_U2I_dict(embedding, sequential_data, candidate_data):
candidate_scores = [(item2_id, score) for item2_id, score in enumerate(user_item_score[idx]) if item2_id in candidates]
U2I_candidate_dict[user] = sorted(candidate_scores, key=lambda x:-x[1])[:20]
return U2I_dict, U2I_candidate_dict

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='extract CF information')
parser.add_argument('--dataset', type=str, default='steam', help='dataset')
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ For example:
```bash
cd Knowledge_Extraction
python extract_I2I.py \
--dataset beauty \
--dataset ml1m \
--negative_type pop
python extract_U2I.py \
--dataset beauty \
--dataset ml1m \
--negative_type pop
```

Expand All @@ -46,8 +46,8 @@ python generate_prompt.py \
--config config/ml1m/popneg_his_I2I.json \
--dataset ml1m
python call_openai.py \
--prompt out/prompts/ml1m/popneg_his_I2I_path.json \
--prompt out/prompts/ml1m/popneg_his_I2I.json \
--model ChatGPT \
--dataset ml1m
bash metric.bash out/result/ml1m/ChatGPT_popneg_his_I2I_path ml1m
bash metric.bash out/result/ml1m/ChatGPT_popneg_his_I2I ml1m
```
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ We need to process the following data sets separately:
+ Online Retail (https://www.kaggle.com/carrie1/ecommerce-data)

# Download
Create the directory at Knowledge_Plugin/
```bash
mkdir data/raw_data
cd data/raw_data
Expand All @@ -22,6 +23,9 @@ unzip ml-1m.zip

Run each notebook according to the dataset.

+ data_preprocess_amazon.ipynb
+ data_preprocess_ml1m.ipynb
+ data_preprocess_onlineretail.ipynb

# Result

Expand Down
Loading

0 comments on commit b32cce7

Please sign in to comment.