Skip to content

Commit

Permalink
Ver 1.0: emotion control
Browse files Browse the repository at this point in the history
  • Loading branch information
AnyaCoder committed Sep 25, 2024
1 parent b453933 commit 36fd155
Show file tree
Hide file tree
Showing 10 changed files with 325 additions and 48 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# fish-speech-gui

<img src="fish/assets/example.png" width="800" />
## Basic Setup

<img src="fish/assets/example_1_basic.png" width="800" />

## Text to Speech

<img src="fish/assets/example_1_tts.png" width="800" />

# Build from Source

Expand All @@ -12,6 +18,13 @@ pdm install
pdm run build.py
```

# Debug

```bash
conda activate pyqt
python fish/__main__.py
```

# Run

```
Expand Down
Binary file added fish/assets/example_1_basic.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added fish/assets/example_1_tts.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 5 additions & 2 deletions fish/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ class Config:
mp3_bitrate: int = 64
opus_bitrate: int = -1000

chunk_length: int = 100
max_new_tokens: int = 1024
chunk_length: int = 200
max_new_tokens: int = 0
top_p: int = 700
repetition_penalty: int = 1200
temperature: int = 700
Expand All @@ -39,6 +39,9 @@ class Config:
volume: int = 50
speed: int = 100

font_size: int = 10
font_family: str = "Microsoft YaHei UI"

# Plugins
current_plugin: str | None = None
plugins: dict[str, dict] = field(default_factory=dict)
Expand Down
109 changes: 65 additions & 44 deletions fish/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
QMessageBox,
QPushButton,
QSlider,
QTextEdit,
QTabWidget,
QVBoxLayout,
QWidget,
)
Expand All @@ -35,6 +35,7 @@
from fish.config import application_path, config, load_config, save_config
from fish.file import *
from fish.i18n import _t, language_map
from fish.input import TextEditorWidget


class MainWindow(QWidget):
Expand All @@ -49,18 +50,16 @@ def __init__(self):
self.setWindowTitle(_t("title").format(version=version))

self.main_layout = QVBoxLayout()
self.tab_widget = QTabWidget()

self.tab_widget.addTab(self.create_settings_tab1(), _t("tab.page1"))
self.tab_widget.addTab(self.create_settings_tab2(), _t("tab.page2"))

# Stick to the top
self.main_layout.setAlignment(Qt.AlignmentFlag.AlignTop)
self.main_layout.addWidget(self.tab_widget)
self.setup_action_buttons(self.main_layout)

self.setup_ui_settings()
self.setup_backend_settings()
self.setup_device_settings()
self.setup_audio_settings()
self.setup_reference_settings()
self.setup_textinput_settings()
self.setup_audioplayer_settings()

self.setup_action_buttons()
self.setLayout(self.main_layout)

# Use size hint to set a reasonable size
Expand All @@ -69,6 +68,29 @@ def __init__(self):

self.files = []

def create_settings_tab1(self):
tab1 = QWidget()
layout1 = QVBoxLayout()

self.setup_ui_settings(layout1)
self.setup_backend_settings(layout1)
self.setup_device_settings(layout1)
self.setup_audio_settings(layout1)
self.setup_reference_settings(layout1)

tab1.setLayout(layout1)
return tab1

def create_settings_tab2(self):
tab2 = QWidget()
layout2 = QVBoxLayout()

self.setup_textinput_settings(layout2)
self.setup_audioplayer_settings(layout2)

tab2.setLayout(layout2)
return tab2

def center(self):
screen = QApplication.primaryScreen()
screen_geometry = screen.availableGeometry()
Expand All @@ -77,7 +99,7 @@ def center(self):
y = (screen_geometry.height() - window_geometry.height()) // 4
self.move(x, y)

def setup_ui_settings(self):
def setup_ui_settings(self, layout: QVBoxLayout):
# we have language and backend settings in the first row
row = QHBoxLayout()
row.setAlignment(Qt.AlignmentFlag.AlignLeft)
Expand Down Expand Up @@ -115,9 +137,9 @@ def setup_ui_settings(self):
self.load_button.clicked.connect(self.load_config)
row.addWidget(self.load_button)

self.main_layout.addLayout(row)
layout.addLayout(row)

def setup_device_settings(self):
def setup_device_settings(self, layout: QVBoxLayout):
# second row: a group box for audio device settings
row = QGroupBox(_t("audio_device.name"))
row_layout = QGridLayout()
Expand Down Expand Up @@ -165,12 +187,11 @@ def setup_device_settings(self):

self.input_device_combo.setFixedWidth(300)
row_layout.addWidget(self.output_device_combo, 1, 1)

row.setMaximumHeight(100)
row.setLayout(row_layout)
layout.addWidget(row)

self.main_layout.addWidget(row)

def setup_audio_settings(self):
def setup_audio_settings(self, layout: QVBoxLayout):
# third row: a group box for audio settings
row = QGroupBox(_t("audio.name"))
row_layout = QGridLayout()
Expand All @@ -193,7 +214,8 @@ def setup_audio_settings(self):

row_layout.addWidget(QLabel(_t("audio.max_new_tokens")), 0, 3)
self.max_new_tokens_slider = QSlider(Qt.Orientation.Horizontal)
self.max_new_tokens_slider.setMinimum(1024)
self.max_new_tokens_slider.setToolTip("0 means no limit")
self.max_new_tokens_slider.setMinimum(0)
self.max_new_tokens_slider.setMaximum(4096)
self.max_new_tokens_slider.setSingleStep(128)
self.max_new_tokens_slider.setTickInterval(128)
Expand Down Expand Up @@ -270,9 +292,10 @@ def setup_audio_settings(self):
row_layout.addWidget(self.mp3_bitrate_combo, 2, 4)

row.setLayout(row_layout)
self.main_layout.addWidget(row)
row.setMaximumHeight(200)
layout.addWidget(row)

def setup_reference_settings(self):
def setup_reference_settings(self, layout: QVBoxLayout):
row = QGroupBox()
row.setTitle(_t("reference.name"))
row_layout = QGridLayout()
Expand All @@ -288,8 +311,7 @@ def setup_reference_settings(self):

self.file_list_widget = QListWidget()
# self.file_list_widget.setFixedWidth(300)
self.file_list_widget.setMinimumHeight(50)
self.file_list_widget.setMaximumHeight(100)
self.file_list_widget.setMinimumHeight(100)
self.file_list_widget.setVerticalScrollBarPolicy(
Qt.ScrollBarPolicy.ScrollBarAsNeeded
)
Expand All @@ -308,26 +330,19 @@ def setup_reference_settings(self):
row_layout.addWidget(self.upload_button, 3, 2, 1, 1)

row.setLayout(row_layout)
self.main_layout.addWidget(row)
layout.addWidget(row)

def setup_textinput_settings(self):
def setup_textinput_settings(self, layout: QVBoxLayout):
row = QGroupBox()
row.setTitle(_t("tts_input.name"))
row.setFixedHeight(150)
row_layout = QGridLayout()
self.text_edit = QTextEdit()

self.text_edit.setLineWrapMode(QTextEdit.LineWrapMode.WidgetWidth)
self.text_edit.setHorizontalScrollBarPolicy(
Qt.ScrollBarPolicy.ScrollBarAlwaysOff
)
self.text_edit.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded)
row_layout.addWidget(self.text_edit, 0, 0)

row_layout = QGridLayout()
self.text_editor = TextEditorWidget()
row_layout.addWidget(self.text_editor)
row.setLayout(row_layout)
self.main_layout.addWidget(row)
layout.addWidget(row)

def setup_audioplayer_settings(self):
def setup_audioplayer_settings(self, layout: QVBoxLayout):
row = QGroupBox()
row.setTitle(_t("tts_output.name"))
row_layout = QGridLayout()
Expand Down Expand Up @@ -370,7 +385,7 @@ def setup_audioplayer_settings(self):

self.speed_slider = QSlider(Qt.Orientation.Horizontal)
self.speed_slider.setRange(50, 200) # 50% 到 200% 的播放速率
self.speed_slider.setValue(100) # 初始速率为 100%
self.speed_slider.setValue(config.speed) # 初始速率为 100%
self.speed_slider.sliderMoved.connect(self.set_speed)
row_layout.addWidget(
QLabel(_t("tts_output.speed") + " >>"),
Expand Down Expand Up @@ -400,14 +415,14 @@ def setup_audioplayer_settings(self):
self.save_audio_path.setPlaceholderText(_t("tts_output.save_audio_input"))
self.save_audio_path.setText(f"{config.save_path}")
row_layout.addWidget(self.save_audio_path, 3, 1, 1, 4)

row.setMaximumHeight(200)
row.setLayout(row_layout)

self.player.positionChanged.connect(self.update_position)
self.player.durationChanged.connect(self.update_duration)
self.main_layout.addWidget(row)
layout.addWidget(row)

def setup_backend_settings(self):
def setup_backend_settings(self, layout: QVBoxLayout):
widget = QGroupBox()
widget.setTitle(_t("backend.title"))
row = QHBoxLayout()
Expand All @@ -431,9 +446,10 @@ def setup_backend_settings(self):
row.addWidget(self.test_button)

widget.setLayout(row)
self.main_layout.addWidget(widget)
widget.setMaximumHeight(100)
layout.addWidget(widget)

def setup_action_buttons(self):
def setup_action_buttons(self, layout: QVBoxLayout):
row = QWidget()
row_layout = QHBoxLayout()
self.now_audio = QLabel(_t("action.audio").format(audio_name="(null)"))
Expand All @@ -452,8 +468,9 @@ def setup_action_buttons(self):
self.latency_label = QLabel(_t("action.latency").format(latency=0))
row_layout.addWidget(self.latency_label)

row.setMaximumHeight(100)
row.setLayout(row_layout)
self.main_layout.addWidget(row)
layout.addWidget(row)

def change_theme(self, index):
config.theme = self.theme_combo.itemData(index)
Expand Down Expand Up @@ -537,6 +554,10 @@ def save_config(self, save_to_file=True):
config.mp3_bitrate = int(self.mp3_bitrate_combo.currentText())
config.ref_id = self.ref_id_input.text()
config.save_path = self.save_audio_path.text()
config.speed = self.speed_slider.value()
config.volume = self.volume_slider.value()
config.font_size = self.text_editor.font_size_spin.value()
config.font_family = self.text_editor.font_combo.currentText()

save_config()

Expand Down Expand Up @@ -645,7 +666,7 @@ def start_conversion(self):
self.stop_button.setEnabled(True)

now = datetime.datetime.now()
text = self.text_edit.toPlainText()
text = self.text_editor.input_edit.toPlainText()

audio_name = now.strftime("%Y%m%d_%H%M%S") + "_" + text[:5]
audio_path = Path(self.save_audio_path.text()) / f"{audio_name}.mp3"
Expand Down
Loading

0 comments on commit 36fd155

Please sign in to comment.