added new outputs

Christianfoley · Dec 6, 2023 · b9c1900 · b9c1900
1 parent 1743ee2
commit b9c1900
Show file tree

Hide file tree

Showing 13 changed files with 17,715 additions and 4 deletions.
diff --git a/SongLyricsScraper/fix_schema.ipynb b/SongLyricsScraper/fix_schema.ipynb
diff --git a/SongLyricsScraper/taylor_swift_test.csv b/SongLyricsScraper/taylor_swift_test.csv
diff --git a/SongLyricsScraper/taylor_swift_training.csv b/SongLyricsScraper/taylor_swift_training.csv
diff --git a/data/prompts/conversation_style_taylor.json b/data/prompts/conversation_style_taylor.json
diff --git a/data/taylor_swift_model_ouputs/llama-2-7b-chat-ft_taylor_swift-taylor-test-outputs.json b/data/taylor_swift_model_ouputs/llama-2-7b-chat-ft_taylor_swift-taylor-test-outputs.json
diff --git a/data/taylor_swift_model_ouputs/llama-2-7b-chat-taylor-test-outputs.json b/data/taylor_swift_model_ouputs/llama-2-7b-chat-taylor-test-outputs.json
diff --git a/data/taylor_swift_model_ouputs/lyre-chat-checkpoint-100-taylor-test-outputs.json b/data/taylor_swift_model_ouputs/lyre-chat-checkpoint-100-taylor-test-outputs.json
diff --git a/data/taylor_swift_model_ouputs/lyre-chat_taylor_swift-checkpoint-10-taylor-test-outputs.json b/data/taylor_swift_model_ouputs/lyre-chat_taylor_swift-checkpoint-10-taylor-test-outputs.json
diff --git a/data/taylor_swift_model_ouputs/lyre-chat_taylor_swift-taylor-test-outputs.json b/data/taylor_swift_model_ouputs/lyre-chat_taylor_swift-taylor-test-outputs.json
diff --git a/generate_prompts/convert_to_fastchat.py b/generate_prompts/convert_to_fastchat.py
@@ -0,0 +1,22 @@
+import argparse
+import pandas as pd
+
+
+def main(args):
+
+    data = pd.read_json(args.data_path, lines=True)
+    data['conversations'] = pd.Series(list(zip(data['prompt'], data['lyrics']))).map(lambda t: [{'from': 'human', 'value': t[0]}, {'from': 'gpt', 'value': t[1]}])
+    data[['id', 'conversations']].to_json(args.out_path, orient='records', indent=1)
+
+    return
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Generate Prompts')
+    parser.add_argument('--data-path', type=str, help='Data path', required=True)
+    parser.add_argument('--out-path', type=str, help='Out data path', required=True)
+
+
+    args = parser.parse_args()
+
+    main(args)
diff --git a/generate_prompts/taylor_test_set/output.jsonl b/generate_prompts/taylor_test_set/output.jsonl
diff --git a/generate_prompts/taylor_train_set/output.jsonl b/generate_prompts/taylor_train_set/output.jsonl
diff --git a/generate_prompts/test_prompts.ipynb b/generate_prompts/test_prompts.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -797,6 +797,71 @@
     "songs.loc[431]['lyrics']"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "song = \"\"\"Yeah I know 所以我開始想到底怎麼做\n",
+    "那些謊 那些傷 不是我的錯\n",
+    "妳説得天馬行空\n",
+    "我倒在那片星空\n",
+    "看著妳要怎麼形容我們惹的禍\n",
+    "I hope that we were on the same page\n",
+    "單純的愛情 we were on the same thing\n",
+    "曾經那麼堅定\n",
+    "如果能夠回到過去I ain’t worry bout nothin\n",
+    "\n",
+    "But 妳說得我都懂\n",
+    "Yeah妳說得我都信\n",
+    "我說過我的痛\n",
+    "但是妳卻都不聽\n",
+    "I’m just sayin I’m insane\n",
+    "我掉進同個陷阱三個年又幾天了\n",
+    "\n",
+    "妳說妳相信我但你不是真的信我\n",
+    "你要我相信妳但卻都是妳在騙我\n",
+    "Goddamn 不是我沒有發現\n",
+    "我藏在心底 說不出的再見\n",
+    "\n",
+    "再說一次\n",
+    "The one last time\n",
+    "就說妳還是愛著我\n",
+    "我放在心裡and I close my eyes\n",
+    "最後一次\n",
+    "The one last time yeah\n",
+    "我會好好得記著你\n",
+    "我閉上眼睛and I close my heart\n",
+    "\n",
+    "\n",
+    "我受夠那些謊\n",
+    "我受夠那些lies\n",
+    "我受夠那些好\n",
+    "我受夠那些壞\n",
+    "我受夠這個夢\n",
+    "我受夠這個愛\n",
+    "我受夠結局會兩敗俱傷不論成敗\n",
+    "\n",
+    "\n",
+    "\n",
+    "我受夠每天重複在打一場完全不想贏的仗\n",
+    "如果 別在不懂愛的年紀愛最深\n",
+    "如果 能放下 能忘掉 能不怕\n",
+    "如果 能夠man up 但我沒有辦法 and I\n",
+    "我不想讓自己看起來像 pussy nah nah\n",
+    "不想再透過眼淚看這一切 nah nah\n",
+    "視線模糊不清 像被判無期徒刑\n",
+    "I’m too innocent give me freedom nah nah\n",
+    "\n",
+    "說到底不想承認是你佈的局\n",
+    "還是不承認我出得去\n",
+    "多想揮揮衣袖\n",
+    "不帶走一片雲彩\n",
+    "偏偏我依舊\n",
+    "忘不了妳的裙擺\"\"\""
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 59,
@@ -835,9 +900,11 @@
     }
    ],
    "source": [
+    "\n",
+    "\n",
     "resp = openai.ChatCompletion.create(\n",
     "    model = \"gpt-4-1106-preview\",\n",
-    "    messages = [{ \"role\": \"user\", \"content\": SYSTEM_PROMPT} , { \"role\": \"user\", \"content\": USER_PROMPT + f\"\\n\\n[ARTIST]: {songs.loc[431]['artist']}\\n\\n [SONG]:\\n\" + songs.loc[431]['lyrics'] + \"\\n\\n[PROMPT]:\\n\"}],\n",
+    "    messages = [{ \"role\": \"user\", \"content\": SYSTEM_PROMPT} , { \"role\": \"user\", \"content\": USER_PROMPT + f\"\\n\\n[ARTIST]: 高爾宣 OSN\\n\\n [SONG]:\\n\" + song + \"\\n\\n[PROMPT]:\\n\"}],\n",
     "    temperature = 0.7\n",
     ")\n",
     "resp"